Python tensorflow.pow() Examples

The following are 30 code examples of tensorflow.pow(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module tensorflow , or try the search function

Example #1

Source File: losses.py From R2CNN_Faster-RCNN_Tensorflow with MIT License

6 votes

def _smooth_l1_loss_base(bbox_pred, bbox_targets, sigma=1.0):
    '''

    :param bbox_pred: [-1, 4] in RPN. [-1, cls_num+1, 4] or [-1, cls_num+1, 5] in Fast-rcnn
    :param bbox_targets: shape is same as bbox_pred
    :param sigma:
    :return:
    '''
    sigma_2 = sigma**2

    box_diff = bbox_pred - bbox_targets

    abs_box_diff = tf.abs(box_diff)

    smoothL1_sign = tf.stop_gradient(
        tf.to_float(tf.less(abs_box_diff, 1. / sigma_2)))
    loss_box = tf.pow(box_diff, 2) * (sigma_2 / 2.0) * smoothL1_sign \
               + (abs_box_diff - (0.5 / sigma_2)) * (1.0 - smoothL1_sign)
    return loss_box

Example #2

Source File: attacks_tf.py From neural-fingerprinting with BSD 3-Clause "New" or "Revised" License

6 votes

def _apply_gradients(self, grads, x, optim_state):
        """Refer to parent class documentation."""
        new_x = [None] * len(x)
        new_optim_state = {
            "t": optim_state["t"] + 1.,
            "m": [None] * len(x),
            "u": [None] * len(x)
        }
        t = new_optim_state["t"]
        for i in xrange(len(x)):
            g = grads[i]
            m_old = optim_state["m"][i]
            u_old = optim_state["u"][i]
            new_optim_state["m"][i] = (
                self._beta1 * m_old + (1. - self._beta1) * g)
            new_optim_state["u"][i] = (
                self._beta2 * u_old + (1. - self._beta2) * g * g)
            m_hat = new_optim_state["m"][i] / (1. - tf.pow(self._beta1, t))
            u_hat = new_optim_state["u"][i] / (1. - tf.pow(self._beta2, t))
            new_x[i] = (
                x[i] - self._lr * m_hat / (tf.sqrt(u_hat) + self._epsilon))
        return new_x, new_optim_state

Example #3

Source File: ops.py From mac-network with Apache License 2.0

6 votes

def locationPE(h, w, dim, outDim = -1, addBias = True):    
    x = tf.expand_dims(tf.to_float(tf.linspace(-config.locationBias, config.locationBias, w)), axis = -1)
    y = tf.expand_dims(tf.to_float(tf.linspace(-config.locationBias, config.locationBias, h)), axis = -1)
    i = tf.expand_dims(tf.to_float(tf.range(dim)), axis = 0)

    peSinX = tf.sin(x / (tf.pow(10000.0, i / dim)))
    peCosX = tf.cos(x / (tf.pow(10000.0, i / dim)))
    peSinY = tf.sin(y / (tf.pow(10000.0, i / dim)))
    peCosY = tf.cos(y / (tf.pow(10000.0, i / dim)))

    peSinX = tf.tile(tf.expand_dims(peSinX, axis = 0), [h, 1, 1])
    peCosX = tf.tile(tf.expand_dims(peCosX, axis = 0), [h, 1, 1])
    peSinY = tf.tile(tf.expand_dims(peSinY, axis = 1), [1, w, 1])
    peCosY = tf.tile(tf.expand_dims(peCosY, axis = 1), [1, w, 1]) 

    grid = tf.concat([peSinX, peCosX, peSinY, peCosY], axis = -1)
    dim *= 4
    
    if outDim > 0:
        grid = linear(grid, dim, outDim, addBias = addBias, name = "locationPE")
        dim = outDim

    return grid, dim

Example #4

Source File: common_attention.py From fine-lm with MIT License

6 votes

def scaled_dot_product_attention_simple(q, k, v, bias, name=None):
  """Scaled dot-product attention. One head. One spatial dimension.

  Args:
    q: a Tensor with shape [batch, length_q, depth_k]
    k: a Tensor with shape [batch, length_kv, depth_k]
    v: a Tensor with shape [batch, length_kv, depth_v]
    bias: optional Tensor broadcastable to [batch, length_q, length_kv]
    name: an optional string

  Returns:
    A Tensor.
  """
  with tf.variable_scope(
      name, default_name="scaled_dot_product_attention_simple"):
    scalar = tf.rsqrt(tf.to_float(common_layers.shape_list(q)[2]))
    logits = tf.matmul(q * scalar, k, transpose_b=True)
    if bias is not None:
      logits += bias
    weights = tf.nn.softmax(logits, name="attention_weights")
    if common_layers.should_generate_summaries():
      tf.summary.image(
          "attention", tf.expand_dims(tf.pow(weights, 0.2), 3), max_outputs=1)
    return tf.matmul(weights, v)

Example #5

Source File: tacotron.py From vae_tacotron with MIT License

6 votes

def add_loss(self, global_step):
    '''Adds loss to the model. Sets "loss" field. initialize must have been called.'''
    with tf.variable_scope('loss') as scope:
      hp = self._hparams
      self.mel_loss = tf.reduce_mean(tf.abs(self.mel_targets - self.mel_outputs))
      l1 = tf.abs(self.linear_targets - self.linear_outputs)
      # Prioritize loss for frequencies under 3000 Hz.
      n_priority_freq = int(3000 / (hp.sample_rate * 0.5) * hp.num_freq)
      self.linear_loss = 0.5 * tf.reduce_mean(l1) + 0.5 * tf.reduce_mean(l1[:,:,0:n_priority_freq])
             
      self.loss = self.mel_loss + self.linear_loss
   
      if hp.use_vae:
          # 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
          self.ki_loss = -0.5 * tf.reduce_sum(1 + self.log_var - tf.pow(self.mu, 2) - tf.exp(self.log_var))
          vae_loss_weight = vae_weight(global_step)
          self.loss += self.ki_loss * vae_loss_weight

Example #6

Source File: layers.py From PADME with MIT License

6 votes

def create_tensor(self, in_layers=None, set_tensors=True, **kwargs):
    inputs = self._get_input_tensors(in_layers)
    temp = []
    subspaces = []
    # creates subspaces the same way it was done in AlphaShare
    for input_tensor in inputs:
      subspace_size = int(input_tensor.get_shape()[-1].value / 2)
      subspaces.append(input_tensor[:, :subspace_size])
      subspaces.append(input_tensor[:, subspace_size:])
      product = tf.matmul(tf.transpose(subspaces[0]), subspaces[1])
      subspaces = []
      # calculate squared Frobenius norm
      temp.append(tf.reduce_sum(tf.pow(product, 2)))
    out_tensor = tf.reduce_sum(temp)
    self.out_tensor = out_tensor
    return out_tensor

Example #7

Source File: losses.py From CapsLayer with Apache License 2.0

6 votes

def spread_loss(labels, logits, margin, regularizer=None):
    """
    Args:
        labels: [batch_size, num_label].
        logits: [batch_size, num_label].
        margin: Integer or 1-D Tensor.
        regularizer: use regularization.

    Returns:
        loss: Spread loss.
    """
    a_target = cl.reduce_sum(labels * logits, axis=1, keepdims=True)
    dist = (1 - labels) * margin - (a_target - logits)
    dist = tf.pow(tf.maximum(0., dist), 2)
    loss = tf.reduce_mean(tf.reduce_sum(dist, axis=-1))
    if regularizer is not None:
        regularizer = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
        loss += tf.reduce_mean(regularizer)
    return(loss)

Example #8

Source File: losses.py From CapsLayer with Apache License 2.0

6 votes

def margin_loss(labels,
                logits,
                upper_margin=0.9,
                bottom_margin=0.1,
                downweight=0.5):
    """
    Args:
        labels: [batch_size, num_label].
        logits: [batch_size, num_label].
    """
    positive_selctor = tf.cast(tf.less(logits, upper_margin), tf.float32)
    positive_cost = positive_selctor * labels * tf.pow(logits - upper_margin, 2)

    negative_selctor = tf.cast(tf.greater(logits, bottom_margin), tf.float32)
    negative_cost = negative_selctor * (1 - labels) * tf.pow(logits - bottom_margin, 2)
    loss = 0.5 * positive_cost + 0.5 * downweight * negative_cost
    return tf.reduce_mean(tf.reduce_sum(loss, axis=-1))

Example #9

Source File: loss.py From centernet_tensorflow_wilderface_voc with MIT License

6 votes

def focal_loss(pred, gt):
  ''' Modified focal loss. Exactly the same as CornerNet.
      Runs faster and costs a little bit more memory
    Arguments:
      pred (batch,h,w,c)
      gt_regr (batch,h,w,c)
  '''
  pos_inds = tf.cast(tf.equal(gt,1.0),dtype=tf.float32)
  neg_inds = 1.0-pos_inds
  neg_weights = tf.pow(1.0 - gt, 4.0)
 
  pred=tf.clip_by_value(pred, 1e-6, 1.0 - 1e-6)
  pos_loss = tf.log(pred) * tf.pow(1.0 - pred, 2.0) * pos_inds
  neg_loss = tf.log(1.0 - pred) * tf.pow(pred, 2.0) * neg_weights * neg_inds

  num_pos  = tf.reduce_sum(pos_inds)
  pos_loss = tf.reduce_sum(pos_loss)
  neg_loss = tf.reduce_sum(neg_loss)

   loss =  - (pos_loss + neg_loss) / num_pos

Example #10

Source File: hmc.py From zhusuan with MIT License

6 votes

def tune(self, acceptance_rate, fresh_start):
        def adapt_stepsize():
            new_step = tf.assign(self.step, (1 - fresh_start) * self.step + 1)
            rate1 = 1.0 / (new_step + self.t0)
            new_h_bar = tf.assign(
                self.h_bar, (1 - fresh_start) * (1 - rate1) * self.h_bar +
                rate1 * (self.delta - acceptance_rate))
            log_epsilon = self.mu - tf.sqrt(new_step) / self.gamma * new_h_bar
            rate = tf.pow(new_step, -self.kappa)
            new_log_epsilon_bar = tf.assign(
                self.log_epsilon_bar,
                rate * log_epsilon + (1 - fresh_start) * (1 - rate) *
                self.log_epsilon_bar)
            with tf.control_dependencies([new_log_epsilon_bar]):
                new_log_epsilon = tf.identity(log_epsilon)

            return tf.exp(new_log_epsilon)

        c = tf.cond(self.adapt_step_size,
                    adapt_stepsize,
                    lambda: tf.exp(self.log_epsilon_bar))

        return c

Example #11

Source File: hmc.py From zhusuan with MIT License

6 votes

def update(self, x):
        # x: (chain_dims data_dims)
        new_t = tf.assign(self.t, self.t + 1)
        weight = (1 - self.decay) / (1 - tf.pow(self.decay, new_t))
        # incr: (chain_dims data_dims)
        incr = [weight * (q - mean) for q, mean in zip(x, self.mean)]
        # mean: (1,...,1 data_dims)
        update_mean = [mean.assign_add(
            tf.reduce_mean(i, axis=self.chain_axes, keepdims=True))
            for mean, i in zip(self.mean, incr)]
        # var: (1,...,1 data_dims)
        new_var = [
            (1 - weight) * var +
            tf.reduce_mean(i * (q - mean), axis=self.chain_axes,
                           keepdims=True)
            for var, i, q, mean in zip(self.var, incr, x, update_mean)]

        update_var = [tf.assign(var, n_var)
                      for var, n_var in zip(self.var, new_var)]
        return update_var

Example #12

Source File: layers.py From aboleth with Apache License 2.0

6 votes

def __init__(self, n_features, lenscale=None, p=1, variational=False,
                 learn_lenscale=False):
        """Create an instance of an arc cosine kernel layer."""
        # Setup random weights
        if variational:
            kern = RBFVariational(lenscale=lenscale,
                                  learn_lenscale=learn_lenscale)
        else:
            kern = RBF(lenscale=lenscale, learn_lenscale=learn_lenscale)
        super().__init__(n_features=n_features, kernel=kern)

        # Kernel order
        assert isinstance(p, int) and p >= 0
        if p == 0:
            self.pfunc = tf.sign
        elif p == 1:
            self.pfunc = lambda x: x
        else:
            self.pfunc = lambda x: tf.pow(x, p)

Example #13

Source File: layers.py From basenji with Apache License 2.0

6 votes

def call(self, inputs):
    input_shape = tf.shape(inputs)
    batch_size, seq_len = input_shape[0], input_shape[1]

    pos_range = tf.range(-seq_len//2, seq_len//2)
    if self.transform is None:
      pos_feature = pos_range
    elif self.transform == 'abs':
      pos_feature = tf.math.abs(pos_range)
    elif self.transform == 'reversed':
      pos_feature = pos_range[::-1]
    else:
      raise ValueError('Unknown ConcatPosition transform.')

    if self.power != 1:
      pos_feature = tf.pow(pos_feature, self.power)
    pos_feature = tf.expand_dims(pos_feature, axis=0)
    pos_feature = tf.expand_dims(pos_feature, axis=-1)
    pos_feature = tf.tile(pos_feature, [batch_size, 1, 1])
    pos_feature = tf.dtypes.cast(pos_feature, dtype=tf.float32)

    return tf.concat([pos_feature, inputs], axis=-1)

Example #14

Source File: losses.py From ros_people_object_detection_tensorflow with Apache License 2.0

5 votes

def _compute_loss(self,
                    prediction_tensor,
                    target_tensor,
                    weights,
                    class_indices=None):
    """Compute loss function.

    Args:
      prediction_tensor: A float tensor of shape [batch_size, num_anchors,
        num_classes] representing the predicted logits for each class
      target_tensor: A float tensor of shape [batch_size, num_anchors,
        num_classes] representing one-hot encoded classification targets
      weights: a float tensor of shape [batch_size, num_anchors]
      class_indices: (Optional) A 1-D integer tensor of class indices.
        If provided, computes loss only for the specified class indices.

    Returns:
      loss: a float tensor of shape [batch_size, num_anchors, num_classes]
        representing the value of the loss function.
    """
    weights = tf.expand_dims(weights, 2)
    if class_indices is not None:
      weights *= tf.reshape(
          ops.indices_to_dense_vector(class_indices,
                                      tf.shape(prediction_tensor)[2]),
          [1, 1, -1])
    per_entry_cross_ent = (tf.nn.sigmoid_cross_entropy_with_logits(
        labels=target_tensor, logits=prediction_tensor))
    prediction_probabilities = tf.sigmoid(prediction_tensor)
    p_t = ((target_tensor * prediction_probabilities) +
           ((1 - target_tensor) * (1 - prediction_probabilities)))
    modulating_factor = 1.0
    if self._gamma:
      modulating_factor = tf.pow(1.0 - p_t, self._gamma)
    alpha_weight_factor = 1.0
    if self._alpha is not None:
      alpha_weight_factor = (target_tensor * self._alpha +
                             (1 - target_tensor) * (1 - self._alpha))
    focal_cross_entropy_loss = (modulating_factor * alpha_weight_factor *
                                per_entry_cross_ent)
    return focal_cross_entropy_loss * weights

Example #15

Source File: network.py From SSH-TensorFlow with MIT License

5 votes

def _smooth_l1_loss(self, bbox_pred, bbox_targets, bbox_inside_weights, bbox_outside_weights, sigma=1.0, dim=[1]):
        sigma_2 = sigma ** 2
        box_diff = bbox_pred - bbox_targets
        in_box_diff = bbox_inside_weights * box_diff
        abs_in_box_diff = tf.abs(in_box_diff)
        smoothL1_sign = tf.stop_gradient(tf.to_float(tf.less(abs_in_box_diff, 1. / sigma_2)))
        in_loss_box = tf.pow(in_box_diff, 2) * (sigma_2 / 2.) * smoothL1_sign \
                      + (abs_in_box_diff - (0.5 / sigma_2)) * (1. - smoothL1_sign)
        out_loss_box = bbox_outside_weights * in_loss_box
        loss_box = tf.reduce_mean(tf.reduce_sum(
            out_loss_box,
            axis=dim
        ))
        return loss_box

Example #16

Source File: prioritized_replay.py From rlgraph with Apache License 2.0

5 votes

def _graph_fn_update_records(self, indices, update):
        num_records = get_batch_size(indices)
        max_priority = 0.0

        # Update has to be sequential.
        def insert_body(i, max_priority_):
            priority = tf.pow(x=update[i], y=self.alpha)

            sum_insert = self.sum_segment_tree.insert(
                index=indices[i],
                element=priority,
                insert_op=tf.add
            )
            min_insert = self.min_segment_tree.insert(
                index=indices[i],
                element=priority,
                insert_op=tf.minimum
            )
            # Keep track of current max priority element.
            max_priority_ = tf.maximum(x=max_priority_, y=priority)

            with tf.control_dependencies(control_inputs=[tf.group(sum_insert, min_insert)]):
                # TODO: This confuses the auto-return value detector.
                return i + 1, max_priority_

        def cond(i, max_priority_):
            return i < num_records - 1

        _, max_priority = tf.while_loop(
            cond=cond,
            body=insert_body,
            loop_vars=(0, max_priority)
        )

        assignment = self.assign_variable(ref=self.max_priority, value=max_priority)
        with tf.control_dependencies(control_inputs=[assignment]):
            return tf.no_op()

Example #17

Source File: inputs.py From cloudml-samples with Apache License 2.0

5 votes

def process_features(features):
  """ Use to implement custom feature engineering logic.

  Default behaviour is to return the original feature tensors dictionary as-is.

  Args:
      features: {string:tensors} - dictionary of feature tensors
  Returns:
      {string:tensors}: extended feature tensors dictionary
  """

  # examples - given:
  # 'x' and 'y' are two numeric features:
  # 'alpha' and 'beta' are two categorical features

  # # create new features using custom logic
  # features['x_2'] = tf.pow(features['x'],2)
  # features['y_2'] = tf.pow(features['y'], 2)
  # features['xy'] = features['x'] * features['y']
  # features['sin_x'] = tf.sin(features['x'])
  # features['cos_y'] = tf.cos(features['x'])
  # features['log_xy'] = tf.log(features['xy'])
  # features['sqrt_xy'] = tf.sqrt(features['xy'])

  # # add created features to metadata (if not already defined in metadata.py)
  # NUMERIC_FEATURE_NAMES_WITH_STATS['x_2']: None
  # NUMERIC_FEATURE_NAMES_WITH_STATS['y_2']: None
  # ....


  return features


# ******************************************************************************
# YOU NEED NOT TO CHANGE THIS FUNCTION TO READ DATA FILES
# ******************************************************************************

Example #18

Source File: inputs.py From cloudml-samples with Apache License 2.0

5 votes

def process_features(features):
  """ Use to implement custom feature engineering logic.

  Default behaviour is to return the original feature tensors dictionary as-is.

  Args:
      features: {string:tensors} - dictionary of feature tensors
  Returns:
      {string:tensors}: extended feature tensors dictionary
  """

  # examples - given:
  # 'x' and 'y' are two numeric features:
  # 'alpha' and 'beta' are two categorical features

  # # create new features using custom logic
  # features['x_2'] = tf.pow(features['x'],2)
  # features['y_2'] = tf.pow(features['y'], 2)
  # features['xy'] = features['x'] * features['y']
  # features['sin_x'] = tf.sin(features['x'])
  # features['cos_y'] = tf.cos(features['x'])
  # features['log_xy'] = tf.log(features['xy'])
  # features['sqrt_xy'] = tf.sqrt(features['xy'])

  # # add created features to metadata (if not already defined in metadata.py)
  # NUMERIC_FEATURE_NAMES_WITH_STATS['x_2']: None
  # NUMERIC_FEATURE_NAMES_WITH_STATS['y_2']: None
  # ....


  return features


# ******************************************************************************
# YOU NEED NOT TO CHANGE THIS FUNCTION TO READ DATA FILES
# ******************************************************************************

Example #19

Source File: optim.py From glow with MIT License

5 votes

def adam(params, cost_or_grads, alpha=3e-4, hps=None, epsilon=1e-8):
    updates = []
    if type(cost_or_grads) is not list:
        gs = tf.gradients(cost_or_grads, params)
    else:
        gs = cost_or_grads

    beta2 = 1-1./(hps.train_its*hps.polyak_epochs)

    # all-reduce
    grads = [Z.allreduce_mean(g) for g in gs]

    t = tf.Variable(1., 'adam_t')
    alpha_t = alpha * tf.sqrt((1. - tf.pow(beta2, t))) / \
        (1. - tf.pow(hps.beta1, t))
    updates.append(t.assign_add(1))

    for w, g in zip(params, grads):
        mom2 = tf.Variable(tf.zeros(w.get_shape()), w.name + '_adam_m2')
        if hps.beta1 > 0:
            mom1 = tf.Variable(tf.zeros(w.get_shape()), w.name + '_adam_m1')
            mom1_new = hps.beta1 * mom1 + (1. - hps.beta1) * g
            updates.append(mom1.assign(mom1_new))
        else:
            mom1_new = g
        m2_new = beta2 * mom2 + (1. - beta2) * tf.square(g)
        delta_t = mom1_new / (tf.sqrt(m2_new) + epsilon)
        w_new = hps.weight_decay * w - alpha_t * delta_t
        updates.append(mom2.assign(m2_new))
        updates.append(w.assign(w_new))

    # Polyak averaging
    polyak_avg_op, polyak_swap_op, ema = polyak(params, beta2)
    train_op = tf.group(polyak_avg_op, *updates)
    return train_op, polyak_swap_op, ema

Example #20

Source File: siamese_net.py From atec-nlp with MIT License

5 votes

def contrastive_loss(self, y, e):
        # margin and pos_weight can directly influence P and R metrics.
        l_1 = self._contrastive_loss_pos_weight * tf.pow(1-e, 2)
        l_0 = tf.square(tf.maximum(e-self._margin, 0))
        loss = tf.reduce_mean(y * l_1 + (1 - y) * l_0)
        return loss

Example #21

Source File: ops.py From HyperGAN with MIT License

5 votes

def gelu(self, x):
        return 0.5*x*(1+tf.nn.tanh(np.sqrt(2/np.pi)*(x+0.044715*tf.pow(x,3))))

Example #22

Source File: model.py From rgn with MIT License

5 votes

def _curriculum(config, step, loss_history, dependency_ops):
    """ Creates TF ops for maintaining and advancing the curriculum. """

    # assign appropriate curriculum increment value
    for case in switch(config['behavior']):
        if case('fixed_rate'):
            # fixed rate, always return same number
            increment = tf.constant(config['rate'], name='curriculum_increment')
        elif case('loss_threshold'):
            # return fixed increment if last loss is below threshold, zero otherwise
            increment_pred = tf.less(loss_history[-1], config['threshold'], name='curriculum_predicate')
            full_increment_func = lambda: tf.constant(config['rate'], name='full_curriculum_increment')
            zero_increment_func = lambda: tf.constant(0.0,            name='zero_curriculum_increment')
            increment = tf.cond(increment_pred, full_increment_func, zero_increment_func)
        elif case('loss_change'):
            # predicate for increment type
            increment_pred = tf.not_equal(loss_history[0], DUMMY_LOSS, name='curriculum_predicate')

            # increment function for when loss history is still
            def full_increment_func():
                lin_seq = tf.expand_dims(tf.linspace(0., 1., config['change_num_iterations']), 1)
                ls_matrix = tf.concat([tf.ones_like(lin_seq), lin_seq], 1)
                ls_rhs = tf.expand_dims(loss_history, 1)
                ls_slope = tf.matrix_solve_ls(ls_matrix, ls_rhs)[1, 0]

                full_increment = tf.div(config['rate'], tf.pow(tf.abs(ls_slope) + 1, config['sharpness']), name='full_curriculum_increment')

                return full_increment

            # dummy increment function for when loss history is changing rapidly
            zero_increment_func = lambda: tf.constant(0.0, name='zero_curriculum_increment')

            # final conditional increment
            increment = tf.cond(increment_pred, full_increment_func, zero_increment_func)

    # create updating op. the semantics are such that training / gradient update is first performed before the curriculum is incremented.
    with tf.control_dependencies(dependency_ops):
        update_op = tf.assign_add(step, increment, name='update_curriculum_op')

    return update_op

Example #23

Source File: modeling.py From bert-for-tf2 with MIT License

5 votes

def gelu(x):
  """Gaussian Error Linear Unit.

  This is a smoother version of the RELU.
  Original paper: https://arxiv.org/abs/1606.08415
  Args:
    x: float Tensor to perform activation.

  Returns:
    `x` with the GELU activation applied.
  """
  cdf = 0.5 * (1.0 + tf.tanh(
      (np.sqrt(2 / np.pi) * (x + 0.044715 * tf.pow(x, 3)))))
  return x * cdf

Example #24

Source File: dop853.py From astroNN with MIT License

5 votes

def hinit(func, x, t, pos_neg, f0, iord, hmax, rtol, atol, args):
    """
    Estimate initial step size
    """
    sk = atol + rtol * tf.abs(x)
    dnf = tf.reduce_sum(tf.square(f0 / sk), axis=0)
    dny = tf.reduce_sum(tf.square(x / sk), axis=0)

    h = tf.sqrt(dny / dnf) * 0.01
    h = tf.reduce_min([h, tf.abs(hmax)])
    h = custom_sign(h, pos_neg)

    # perform an explicit Euler step
    xx1 = x + h * f0
    f1 = func(xx1, t[0] + h, *args)

    # estimate the second derivative of the solution
    der2 = tf.reduce_sum(tf.square((f1 - f0) / sk), axis=0)
    der2 = tf.sqrt(der2) / h

    # step size is computed such that h ** iord * max_d(norm(f0), norm(der2)) = 0.01
    der12 = tf.reduce_max([tf.abs(der2), tf.sqrt(dnf)])
    h1 = tf.pow(0.01 / der12, 1.0 / iord)

    h = tf.reduce_min([100.0 * tf.abs(h), tf.reduce_min([tf.abs(h1), tf.abs(hmax)])])

    return custom_sign(h, pos_neg), f0, f1, xx1

Example #25

Source File: optim.py From glow with MIT License

5 votes

def adam2(params, cost_or_grads, alpha=3e-4, hps=None, epsilon=1e-8):
    updates = []
    if type(cost_or_grads) is not list:
        gs = tf.gradients(cost_or_grads, params)
    else:
        gs = cost_or_grads

    beta2 = 1-1./(hps.train_its*hps.polyak_epochs)

    # all-reduce
    grads1 = [Z.allreduce_mean(g) for g in gs]
    grads2 = [Z.allreduce_mean(g**2) for g in gs]

    t = tf.Variable(1., 'adam_t')
    alpha_t = alpha * tf.sqrt((1. - tf.pow(beta2, t))) / \
        (1. - tf.pow(hps.beta1, t))
    updates.append(t.assign_add(1))

    for w, g1, g2 in zip(params, grads1, grads2):
        mom2 = tf.Variable(tf.zeros(w.get_shape()), w.name + '_adam_m2')
        if hps.beta1 > 0:
            mom1 = tf.Variable(tf.zeros(w.get_shape()), w.name + '_adam_m1')
            mom1_new = hps.beta1 * mom1 + (1. - hps.beta1) * g1
            updates.append(mom1.assign(mom1_new))
        else:
            mom1_new = g1
        m2_new = beta2 * mom2 + (1. - beta2) * g2
        delta_t = mom1_new / (tf.sqrt(m2_new) + epsilon)
        w_new = hps.weight_decay * w - alpha_t * delta_t
        updates.append(mom2.assign(m2_new))
        updates.append(w.assign(w_new))

    # Polyak averaging
    polyak_avg_op, polyak_swap_op, ema = polyak(params, beta2)
    train_op = tf.group(polyak_avg_op, *updates)
    return train_op, polyak_swap_op, ema

Example #26

Source File: optim.py From glow with MIT License

5 votes

def adam2_old(params, cost_or_grads, lr=3e-4, mom1=0.9, mom2=0.999, epsilon=1e-8):
    updates = []
    if type(cost_or_grads) is not list:
        gs = tf.gradients(cost_or_grads, params)
    else:
        gs = cost_or_grads

    # all-reduce
    grads1 = [Z.allreduce_mean(g) for g in gs]
    grads2 = [Z.allreduce_mean(tf.square(g)) for g in gs]
    mom2 = tf.maximum(0., 1. - (hvd.size() * (1 - mom2)))

    t = tf.Variable(1., 'adam_t')
    lr_t = lr * tf.sqrt((1. - tf.pow(mom2, t))) / (1. - tf.pow(mom1, t))
    updates.append(t.assign_add(1))

    for p, g1, g2 in zip(params, grads1, grads2):
        mg = tf.Variable(tf.zeros(p.get_shape()), p.name + '_adam_mg')
        if mom1 > 0:
            v = tf.Variable(tf.zeros(p.get_shape()), p.name + '_adam_v')
            v_t = mom1 * v + (1. - mom1) * g1
            updates.append(v.assign(v_t))
        else:
            v_t = g1
        mg_t = mom2 * mg + (1. - mom2) * g2
        delta_t = v_t / (tf.sqrt(mg_t) + epsilon)
        p_t = p - lr_t * delta_t
        updates.append(mg.assign(mg_t))
        updates.append(p.assign(p_t))
    return tf.group(*updates)

Example #27

Source File: model.py From lm-human-preferences with MIT License

5 votes

def gelu(x):
    with tf.name_scope('gelu'):
        return 0.5*x*(1+tf.tanh(np.sqrt(2/np.pi)*(x+0.044715*tf.pow(x, 3))))

Example #28

Source File: optim.py From glow with MIT License

5 votes

def adamax(params, cost_or_grads, alpha=3e-4, hps=None, epsilon=1e-8):
    updates = []
    if type(cost_or_grads) is not list:
        gs = tf.gradients(cost_or_grads, params)
    else:
        gs = cost_or_grads

    beta2 = 1-1./(hps.train_its*hps.polyak_epochs)

    # all-reduce
    grads = [Z.allreduce_mean(g) for g in gs]

    t = tf.Variable(1., 'adam_t')
    alpha_t = alpha * tf.sqrt((1. - tf.pow(beta2, t))) / \
        (1. - tf.pow(hps.beta1, t))
    updates.append(t.assign_add(1))

    for w, g in zip(params, grads):
        mom2 = tf.Variable(tf.zeros(w.get_shape()), w.name + '_adam_m2')
        if hps.beta1 > 0:
            mom1 = tf.Variable(tf.zeros(w.get_shape()), w.name + '_adam_m1')
            mom1_new = hps.beta1 * mom1 + (1. - hps.beta1) * g
            updates.append(mom1.assign(mom1_new))
        else:
            mom1_new = g
        m2_new = tf.maximum(beta2 * mom2, abs(g))
        delta_t = mom1_new / (m2_new + epsilon)
        w_new = hps.weight_decay * w - alpha_t * delta_t
        updates.append(mom2.assign(m2_new))
        updates.append(w.assign(w_new))

    # Polyak averaging
    polyak_avg_op, polyak_swap_op, ema = polyak(params, beta2)
    train_op = tf.group(polyak_avg_op, *updates)
    return train_op, polyak_swap_op, ema

Example #29

Source File: optim.py From glow with MIT License

5 votes

def adam(params, cost_or_grads, alpha=3e-4, hps=None, epsilon=1e-8):
    updates = []
    if type(cost_or_grads) is not list:
        gs = tf.gradients(cost_or_grads, params)
    else:
        gs = cost_or_grads

    beta2 = 1-1./(hps.train_its*hps.polyak_epochs)

    # all-reduce
    grads = [Z.allreduce_mean(g) for g in gs]

    t = tf.Variable(1., 'adam_t')
    alpha_t = alpha * tf.sqrt((1. - tf.pow(beta2, t))) / \
        (1. - tf.pow(hps.beta1, t))
    updates.append(t.assign_add(1))

    for w, g in zip(params, grads):
        mom2 = tf.Variable(tf.zeros(w.get_shape()), w.name + '_adam_m2')
        if hps.beta1 > 0:
            mom1 = tf.Variable(tf.zeros(w.get_shape()), w.name + '_adam_m1')
            mom1_new = hps.beta1 * mom1 + (1. - hps.beta1) * g
            updates.append(mom1.assign(mom1_new))
        else:
            mom1_new = g
        m2_new = beta2 * mom2 + (1. - beta2) * tf.square(g)
        delta_t = mom1_new / (tf.sqrt(m2_new) + epsilon)
        w_new = hps.weight_decay * w - alpha_t * delta_t
        updates.append(mom2.assign(m2_new))
        updates.append(w.assign(w_new))

    # Polyak averaging
    polyak_avg_op, polyak_swap_op, ema = polyak(params, beta2)
    train_op = tf.group(polyak_avg_op, *updates)
    return train_op, polyak_swap_op, ema

Example #30

Source File: losses.py From MultiKE with MIT License

5 votes

def orthogonal_loss(mapping, eye):
    loss = tf.reduce_sum(tf.reduce_sum(tf.pow(tf.matmul(mapping, mapping, transpose_b=True) - eye, 2), 1))
    return loss