Python tensorflow.clip_by_norm() Examples

The following are 30 code examples of tensorflow.clip_by_norm(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module tensorflow , or try the search function .
Example #1
Source File: graph_builder.py    From DOTA_models with Apache License 2.0 9 votes vote down vote up
def _clip_gradients(self, grad):
    """Clips gradients if the hyperparameter `gradient_clip_norm` requires it.

    Sparse tensors, in the form of IndexedSlices returned for the
    gradients of embeddings, require special handling.

    Args:
      grad: Gradient Tensor, IndexedSlices, or None.

    Returns:
      Optionally clipped gradient.
    """
    if grad is not None and self.hyperparams.gradient_clip_norm > 0:
      logging.info('Clipping gradient %s', grad)
      if isinstance(grad, tf.IndexedSlices):
        tmp = tf.clip_by_norm(grad.values, self.hyperparams.gradient_clip_norm)
        return tf.IndexedSlices(tmp, grad.indices, grad.dense_shape)
      else:
        return tf.clip_by_norm(grad, self.hyperparams.gradient_clip_norm)
    else:
      return grad 
Example #2
Source File: graph_builder.py    From yolo_v2 with Apache License 2.0 6 votes vote down vote up
def _clip_gradients(self, grad):
    """Clips gradients if the hyperparameter `gradient_clip_norm` requires it.

    Sparse tensors, in the form of IndexedSlices returned for the
    gradients of embeddings, require special handling.

    Args:
      grad: Gradient Tensor, IndexedSlices, or None.

    Returns:
      Optionally clipped gradient.
    """
    if grad is not None and self.hyperparams.gradient_clip_norm > 0:
      logging.info('Clipping gradient %s', grad)
      if isinstance(grad, tf.IndexedSlices):
        tmp = tf.clip_by_norm(grad.values, self.hyperparams.gradient_clip_norm)
        return tf.IndexedSlices(tmp, grad.indices, grad.dense_shape)
      else:
        return tf.clip_by_norm(grad, self.hyperparams.gradient_clip_norm)
    else:
      return grad 
Example #3
Source File: TfEnas.py    From rafiki with Apache License 2.0 6 votes vote down vote up
def _optimize(self, loss, step, **knobs):
        opt_momentum = knobs['opt_momentum'] # Momentum optimizer momentum
        grad_clip_norm = knobs['grad_clip_norm'] # L2 norm to clip gradients by

        # Compute learning rate, gradients
        tf_trainable_vars = tf.trainable_variables()
        lr = self._get_learning_rate(step, **knobs)
        grads = tf.gradients(loss, tf_trainable_vars)
        self._mark_for_monitoring('lr', lr)

        # Clip gradients
        if grad_clip_norm > 0:
            grads = [tf.clip_by_norm(x, grad_clip_norm) for x in grads]

        # Init optimizer
        opt = tf.train.MomentumOptimizer(lr, opt_momentum, use_locking=True, use_nesterov=True)
        train_op = opt.apply_gradients(zip(grads, tf_trainable_vars), global_step=step)

        return train_op 
Example #4
Source File: model.py    From deeping-flow with MIT License 6 votes vote down vote up
def __init__(self, model, args):
        self.args = args

        with tf.variable_scope('supervisor_loss'):
            optimizer = tf.train.AdamOptimizer(
                args.ml_lr, beta1=0.9, beta2=0.98, epsilon=1e-8)

            loss = self.compute_loss(model)

            gradients = optimizer.compute_gradients(loss)
            for i, (grad, var) in enumerate(gradients):
                if grad is not None:
                    gradients[i] = (
                        tf.clip_by_norm(grad, args.clip_norm), var)
            self.train_op = optimizer.apply_gradients(
                gradients, global_step=model.global_step)

            tf.summary.scalar('loss', loss)
            self.merged = tf.summary.merge_all() 
Example #5
Source File: model.py    From deeping-flow with MIT License 6 votes vote down vote up
def __init__(self, model, args):
        self.args = args

        with tf.variable_scope('mix_loss'):
            optimizer = tf.train.AdamOptimizer(
                args.lr, beta1=0.9, beta2=0.98, epsilon=1e-8)

            loss, reward, baseline, advantage = self.compute_loss(model)

            gradients = optimizer.compute_gradients(loss)
            for i, (grad, var) in enumerate(gradients):
                if grad is not None:
                    gradients[i] = (
                        tf.clip_by_norm(grad, args.clip_norm), var)
            self.train_op = optimizer.apply_gradients(
                gradients, global_step=model.global_step)

            tf.summary.scalar('loss', loss)
            tf.summary.scalar("reward", tf.reduce_mean(reward))
            tf.summary.scalar("baseline", tf.reduce_mean(baseline))
            tf.summary.scalar("advantage", tf.reduce_mean(advantage))

            self.merged = tf.summary.merge_all() 
Example #6
Source File: model.py    From deeping-flow with MIT License 6 votes vote down vote up
def __init__(self, model, args):
        self.args = args

        with tf.variable_scope('reinforced_loss'):
            optimizer = tf.train.AdamOptimizer(
                args.lr, beta1=0.9, beta2=0.98, epsilon=1e-8)

            loss, reward, baseline, advantage = self.compute_loss(model)

            gradients = optimizer.compute_gradients(loss)
            for i, (grad, var) in enumerate(gradients):
                if grad is not None:
                    gradients[i] = (
                        tf.clip_by_norm(grad, args.clip_norm), var)
            self.train_op = optimizer.apply_gradients(
                gradients, global_step=model.global_step)

            tf.summary.scalar('loss', loss)
            tf.summary.scalar("reward", tf.reduce_mean(reward))
            tf.summary.scalar("baseline", tf.reduce_mean(baseline))
            tf.summary.scalar("advantage", tf.reduce_mean(advantage))
            self.merged = tf.summary.merge_all() 
Example #7
Source File: utils.py    From youtube8mchallenge with Apache License 2.0 6 votes vote down vote up
def clip_gradient_norms(gradients_to_variables, max_norm):
  """Clips the gradients by the given value.

  Args:
    gradients_to_variables: A list of gradient to variable pairs (tuples).
    max_norm: the maximum norm value.

  Returns:
    A list of clipped gradient to variable pairs.
  """
  clipped_grads_and_vars = []
  for grad, var in gradients_to_variables:
    if grad is not None:
      if isinstance(grad, tf.IndexedSlices):
        tmp = tf.clip_by_norm(grad.values, max_norm)
        grad = tf.IndexedSlices(tmp, grad.indices, grad.dense_shape)
      else:
        grad = tf.clip_by_norm(grad, max_norm)
    clipped_grads_and_vars.append((grad, var))
  return clipped_grads_and_vars 
Example #8
Source File: utils.py    From youtube-8m with Apache License 2.0 6 votes vote down vote up
def clip_gradient_norms(gradients_to_variables, max_norm):
  """Clips the gradients by the given value.

  Args:
    gradients_to_variables: A list of gradient to variable pairs (tuples).
    max_norm: the maximum norm value.

  Returns:
    A list of clipped gradient to variable pairs.
  """
  clipped_grads_and_vars = []
  for grad, var in gradients_to_variables:
    if grad is not None:
      if isinstance(grad, tf.IndexedSlices):
        tmp = tf.clip_by_norm(grad.values, max_norm)
        grad = tf.IndexedSlices(tmp, grad.indices, grad.dense_shape)
      else:
        grad = tf.clip_by_norm(grad, max_norm)
    clipped_grads_and_vars.append((grad, var))
  return clipped_grads_and_vars 
Example #9
Source File: local_optimizers.py    From rlgraph with Apache License 2.0 6 votes vote down vote up
def _graph_fn_calculate_gradients(self, variables, loss, time_percentage):
        """
        Args:
            variables (DataOpTuple): A list of variables to calculate gradients for.
            loss (SingeDataOp): The total loss over a batch to be minimized.
        """
        if get_backend() == "tf":
            var_list = list(variables.values()) if isinstance(variables, dict) else force_list(variables)
            grads_and_vars = self.optimizer.compute_gradients(
                loss=loss,
                var_list=var_list
            )
            if self.clip_grad_norm is not None:
                for i, (grad, var) in enumerate(grads_and_vars):
                    if grad is not None:
                        grads_and_vars[i] = (tf.clip_by_norm(t=grad, clip_norm=self.clip_grad_norm), var)
            return DataOpTuple(grads_and_vars) 
Example #10
Source File: tf_util.py    From stable-baselines with MIT License 6 votes vote down vote up
def flatgrad(loss, var_list, clip_norm=None):
    """
    calculates the gradient and flattens it

    :param loss: (float) the loss value
    :param var_list: ([TensorFlow Tensor]) the variables
    :param clip_norm: (float) clip the gradients (disabled if None)
    :return: ([TensorFlow Tensor]) flattened gradient
    """
    grads = tf.gradients(loss, var_list)
    if clip_norm is not None:
        grads = [tf.clip_by_norm(grad, clip_norm=clip_norm) for grad in grads]
    return tf.concat(axis=0, values=[
        tf.reshape(grad if grad is not None else tf.zeros_like(v), [numel(v)])
        for (v, grad) in zip(var_list, grads)
    ]) 
Example #11
Source File: optimizer.py    From BERT with Apache License 2.0 6 votes vote down vote up
def grad_clip_fn(self, loss, tvars, **kargs):
		grads = tf.gradients(loss, tvars)
		grad_clip = self.config.get("grad_clip", "global_norm")
		tf.logging.info(" gradient clip method {}".format(grad_clip))
		if grad_clip == "global_norm":
			clip_norm = self.config.get("clip_norm", 1.0)
			[grads, _] = tf.clip_by_global_norm(grads, 
								clip_norm=clip_norm)
		elif grad_clip == "norm":
			clip_norm = self.config.get("clip_norm", 1.0)
			grads = [tf.clip_by_norm(grad, clip_norm) for grad in grads]
		elif grad_clip == "value":
			clip_min_value = self.config.get("clip_min_value", -1.0)
			clip_max_value = self.config.get("clip_max_value", 1.0)
			grads = [tf.clip_by_value(grad, clip_norm) for grad in grads]
		else:
			grads = grads
		return grads 
Example #12
Source File: hvd_distributed_optimizer.py    From BERT with Apache License 2.0 6 votes vote down vote up
def grad_clip_fn(self, opt, loss, tvars, **kargs):
		grads_and_vars = opt.compute_gradients(loss, tvars)
		grads = [grad for grad, _ in grads_and_vars]
		grad_clip = self.config.get("grad_clip", "global_norm")
		tf.logging.info(" gradient clip method {}".format(grad_clip))
		if grad_clip == "global_norm":
			clip_norm = self.config.get("clip_norm", 1.0)
			[grads, _] = tf.clip_by_global_norm(grads, 
								clip_norm=clip_norm)
		elif grad_clip == "norm":
			clip_norm = self.config.get("clip_norm", 1.0)
			grads = [tf.clip_by_norm(grad, clip_norm) for grad in grads]
		elif grad_clip == "value":
			clip_min_value = self.config.get("clip_min_value", -1.0)
			clip_max_value = self.config.get("clip_max_value", 1.0)
			grads = [tf.clip_by_value(grad, clip_norm) for grad in grads]
		else:
			grads = grads
		return grads 
Example #13
Source File: clip_ops_test.py    From deep_image_model with Apache License 2.0 6 votes vote down vote up
def testClipByNormClipped(self):
    # Norm clipping when clip_norm < 5
    with self.test_session():
      x = tf.constant([-3.0, 0.0, 0.0, 4.0, 0.0, 0.0], shape=[2, 3])
      # Norm of x = sqrt(3^2 + 4^2) = 5
      np_ans = [[-2.4, 0.0, 0.0],
                [3.2, 0.0, 0.0]]
      clip_norm = 4.0
      ans = tf.clip_by_norm(x, clip_norm)
      tf_ans = ans.eval()

      clip_tensor = tf.constant(4.0)
      ans = tf.clip_by_norm(x, clip_norm)
      tf_ans_tensor = ans.eval()

    self.assertAllClose(np_ans, tf_ans)
    self.assertAllClose(np_ans, tf_ans_tensor) 
Example #14
Source File: model.py    From Seq2Seq-Tensorflow with MIT License 6 votes vote down vote up
def build_model(self):
        self.build_memory()

        self.W = tf.Variable(tf.random_normal([self.edim, self.nwords], stddev=self.init_std))
        z = tf.matmul(self.hid[-1], self.W)

        self.loss = tf.nn.softmax_cross_entropy_with_logits(z, self.target)

        self.lr = tf.Variable(self.current_lr)
        self.opt = tf.train.GradientDescentOptimizer(self.lr)

        params = [self.A, self.B, self.C, self.T_A, self.T_B, self.W]
        grads_and_vars = self.opt.compute_gradients(self.loss,params)
        clipped_grads_and_vars = [(tf.clip_by_norm(gv[0], self.max_grad_norm), gv[1]) \
                                   for gv in grads_and_vars]

        inc = self.global_step.assign_add(1)
        with tf.control_dependencies([inc]):
            self.optim = self.opt.apply_gradients(clipped_grads_and_vars)

        tf.initialize_all_variables().run()
        self.saver = tf.train.Saver() 
Example #15
Source File: graph_builder.py    From Gun-Detector with Apache License 2.0 6 votes vote down vote up
def _clip_gradients(self, grad):
    """Clips gradients if the hyperparameter `gradient_clip_norm` requires it.

    Sparse tensors, in the form of IndexedSlices returned for the
    gradients of embeddings, require special handling.

    Args:
      grad: Gradient Tensor, IndexedSlices, or None.

    Returns:
      Optionally clipped gradient.
    """
    if grad is not None and self.hyperparams.gradient_clip_norm > 0:
      logging.info('Clipping gradient %s', grad)
      if isinstance(grad, tf.IndexedSlices):
        tmp = tf.clip_by_norm(grad.values, self.hyperparams.gradient_clip_norm)
        return tf.IndexedSlices(tmp, grad.indices, grad.dense_shape)
      else:
        return tf.clip_by_norm(grad, self.hyperparams.gradient_clip_norm)
    else:
      return grad 
Example #16
Source File: train.py    From text-gan-tensorflow with MIT License 6 votes vote down vote up
def set_train_op(loss, tvars):
    if FLAGS.optimizer_type == "sgd":
        optimizer = tf.train.GradientDescentOptimizer(learning_rate=FLAGS.learning_rate)
    elif FLAGS.optimizer_type == "rmsprop":
        optimizer = tf.train.RMSPropOptimizer(learning_rate=FLAGS.learning_rate)
    elif FLAGS.optimizer_type == "adam":
        optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate)
    else:
        raise ValueError("Wrong optimizer_type.")

    gradients = optimizer.compute_gradients(loss, var_list=tvars)
    clipped_gradients = [(grad if grad is None else tf.clip_by_norm(grad, FLAGS.max_grads), var)
                         for grad, var in gradients]

    train_op = optimizer.apply_gradients(clipped_gradients)
    return train_op 
Example #17
Source File: models.py    From Question_Answering_Models with MIT License 6 votes vote down vote up
def add_train_op(self, loss):
        """
        训练节点
        """
        with tf.name_scope('train_op'):
            # 记录训练步骤
            self.global_step = tf.Variable(0, 
                    name='global_step', trainable=False)
            opt = tf.train.AdamOptimizer(self.config.lr)
            # train_op = opt.minimize(loss, self.global_step)
            train_variables = tf.trainable_variables()
            grads_vars = opt.compute_gradients(loss, train_variables)
            for i, (grad, var) in enumerate(grads_vars):
                grads_vars[i] = (
                    tf.clip_by_norm(grad, self.config.grad_clip), var)
            train_op = opt.apply_gradients(
                grads_vars, global_step=self.global_step)
            return train_op 
Example #18
Source File: tf_util.py    From m3ddpg with MIT License 6 votes vote down vote up
def minimize_and_clip(optimizer, objective, var_list, clip_val=10):
    """Minimized `objective` using `optimizer` w.r.t. variables in
    `var_list` while ensure the norm of the gradients for each
    variable is clipped to `clip_val`
    """    
    if clip_val is None:
        return optimizer.minimize(objective, var_list=var_list)
    else:
        gradients = optimizer.compute_gradients(objective, var_list=var_list)
        for i, (grad, var) in enumerate(gradients):
            if grad is not None:
                gradients[i] = (tf.clip_by_norm(grad, clip_val), var)
        return optimizer.apply_gradients(gradients)


# ================================================================
# Global session
# ================================================================ 
Example #19
Source File: model.py    From tensorflow_nlp with Apache License 2.0 6 votes vote down vote up
def build_model(self):
        self.build_memory()

        self.W = tf.Variable(tf.random_normal([self.edim, 3], stddev=self.init_std))
        z = tf.matmul(self.hid[-1], self.W)

        self.loss = tf.nn.softmax_cross_entropy_with_logits(logits=z, labels=self.target)

        self.lr = tf.Variable(self.current_lr)
        self.opt = tf.train.GradientDescentOptimizer(self.lr)

        params = [self.A, self.B, self.C, self.T_A, self.T_B, self.W, self.ASP, self.BL_W, self.BL_B]
        grads_and_vars = self.opt.compute_gradients(self.loss, params)
        clipped_grads_and_vars = [(tf.clip_by_norm(gv[0], self.max_grad_norm), gv[1]) \
                                  for gv in grads_and_vars]

        inc = self.global_step.assign_add(1)
        with tf.control_dependencies([inc]):
            self.optim = self.opt.apply_gradients(clipped_grads_and_vars)

        tf.global_variables_initializer().run()

        self.correct_prediction = tf.argmax(z, 1) 
Example #20
Source File: dqn_utils.py    From deep-reinforcement-learning with MIT License 5 votes vote down vote up
def minimize_and_clip(optimizer, objective, var_list, clip_val=10):
    """Minimized `objective` using `optimizer` w.r.t. variables in
    `var_list` while ensure the norm of the gradients for each
    variable is clipped to `clip_val`
    """
    gradients = optimizer.compute_gradients(objective, var_list=var_list)
    for i, (grad, var) in enumerate(gradients):
        if grad is not None:
            gradients[i] = (tf.clip_by_norm(grad, clip_val), var)
    return optimizer.apply_gradients(gradients) 
Example #21
Source File: runner.py    From leo with Apache License 2.0 5 votes vote down vote up
def _clip_gradients(gradients, gradient_threshold, gradient_norm_threshold):
  """Clips gradients by value and then by norm."""
  if gradient_threshold > 0:
    gradients = [
        tf.clip_by_value(g, -gradient_threshold, gradient_threshold)
        for g in gradients
    ]
  if gradient_norm_threshold > 0:
    gradients = [
        tf.clip_by_norm(g, gradient_norm_threshold) for g in gradients
    ]
  return gradients 
Example #22
Source File: HRDE_Model_SA.py    From QA_HRDE_LTC with MIT License 5 votes vote down vote up
def _create_optimizer(self):
        print '[launch] create optimizer'
        
        with tf.name_scope('optimizer') as scope:
            
            opt_func = tf.train.AdamOptimizer(learning_rate=self.lr)
            gvs = opt_func.compute_gradients(self.loss)
            capped_gvs = [(tf.clip_by_norm(t=grad, clip_norm=1), var) for grad, var in gvs]
            self.optimizer = opt_func.apply_gradients(grads_and_vars=capped_gvs, global_step=self.global_step) 
Example #23
Source File: model.py    From gconvRNN with MIT License 5 votes vote down vote up
def _build_optim(self):
        def minimize(loss, step, var_list, learning_rate, optimizer):
            if optimizer == "sgd":
                optim = tf.train.GradientDescentOptimizer(learning_rate)
            elif optimizer == "adam":
                optim = tf.train.AdamOptimizer(learning_rate)
            elif optimizer == "rmsprop":
                optim = tf.train.RMSPropOptimizer(learning_rate)
            else:
                raise Exception("[!] Unkown optimizer: {}".format(
                    optimizer))
            ## Gradient clipping ##    
            if self.max_grad_norm is not None:
                grads_and_vars = optim.compute_gradients(
                    loss, var_list=var_list)
                new_grads_and_vars = []
                for idx, (grad, var) in enumerate(grads_and_vars):
                    if grad is not None and var in var_list:
                        grad = tf.clip_by_norm(grad, self.max_grad_norm)
                        grad = tf.check_numerics(
                            grad, "Numerical error in gradient for {}".format(
                                var.name))
                        new_grads_and_vars.append((grad, var))
                return optim.apply_gradients(new_grads_and_vars, global_step=step)
            else:
                grads_and_vars = optim.compute_gradients(
                    loss, var_list=var_list)
                return optim.apply_gradients(grads_and_vars,
                                             global_step=step)
        
        # optim #
        self.model_optim = minimize(
            self.loss,
            self.model_step,
            self.model_vars,
            self.learning_rate,
            self.optimizer) 
Example #24
Source File: model.py    From neural-combinatorial-rl-tensorflow with MIT License 5 votes vote down vote up
def _build_optim(self):
    losses = tf.nn.sparse_softmax_cross_entropy_with_logits(
        labels=self.dec_targets, logits=self.dec_pred_logits)
    inference_losses = tf.nn.sparse_softmax_cross_entropy_with_logits(
        labels=self.dec_targets, logits=self.dec_inference_logits)

    def apply_mask(op):
      length = tf.cast(op[:1], tf.int32)
      loss = op[1:]
      return tf.multiply(loss, tf.ones(length, dtype=tf.float32))

    batch_loss = tf.div(
        tf.reduce_sum(tf.multiply(losses, self.mask)),
        tf.reduce_sum(self.mask), name="batch_loss")

    batch_inference_loss = tf.div(
        tf.reduce_sum(tf.multiply(losses, self.mask)),
        tf.reduce_sum(self.mask), name="batch_inference_loss")

    tf.losses.add_loss(batch_loss)
    total_loss = tf.losses.get_total_loss()

    self.total_loss = total_loss
    self.target_cross_entropy_losses = losses
    self.total_inference_loss = batch_inference_loss

    self.lr = tf.train.exponential_decay(
        self.lr_start, self.global_step, self.lr_decay_step,
        self.lr_decay_rate, staircase=True, name="learning_rate")

    optimizer = tf.train.AdamOptimizer(self.lr)

    if self.max_grad_norm != None:
      grads_and_vars = optimizer.compute_gradients(self.total_loss)
      for idx, (grad, var) in enumerate(grads_and_vars):
        if grad is not None:
          grads_and_vars[idx] = (tf.clip_by_norm(grad, self.max_grad_norm), var)
      self.optim = optimizer.apply_gradients(grads_and_vars, global_step=self.global_step)
    else:
      self.optim = optimizer.minimize(self.total_loss, global_step=self.global_step) 
Example #25
Source File: policy_gradient.py    From blocks with GNU General Public License v3.0 5 votes vote down vote up
def __init__(self, agent, policy_model, total_reward):
        self.agent = agent
        self.policy_model = policy_model
        self.total_reward = total_reward

        # Compute MLE loss function. MLE is used to initialize parameters for policy gradient
        self.mle_policy_gradient = MaximumLikelihoodEstimation(agent, policy_model)

        # Compute loss function
        loss, entropy_penalty = self.calc_loss(
            self.policy_model.model_output, self.policy_model.model_output_indices, self.policy_model.target)

        optimizer = tf.train.AdamOptimizer(AbstractLearning.rl_learning_rate)

        using_grad_clip = True
        grad_clip_val = 5.0
        if not using_grad_clip:
            train_step = optimizer.minimize(loss)
        else:
            gvs = optimizer.compute_gradients(loss)
            capped_gvs = [(tf.clip_by_norm(grad, grad_clip_val), var)
                          if grad is not None else (grad, var) for grad, var in gvs]
            train_step = optimizer.apply_gradients(capped_gvs)

        # Create summaries for training
        summary_loss = tf.scalar_summary("Loss", loss)
        summary_target_min = tf.scalar_summary("Target Min", tf.reduce_min(self.policy_model.target))
        summary_target_max = tf.scalar_summary("Target Max", tf.reduce_max(self.policy_model.target))
        summary_target_mean = tf.scalar_summary("Target Mean", tf.reduce_mean(self.policy_model.target))
        summary_entropy_penalty = tf.scalar_summary("Entropy Penalty", entropy_penalty)
        update_summaries = [summary_loss, summary_target_min,
                            summary_target_max, summary_target_mean, summary_entropy_penalty]

        AbstractLearning.__init__(self, policy_model, loss, train_step, update_summaries) 
Example #26
Source File: ml_estimation.py    From blocks with GNU General Public License v3.0 5 votes vote down vote up
def __init__(self, agent, policy_model):
        self.agent = agent
        self.policy_model = policy_model

        # Replay memory
        max_replay_memory_size = 2000
        self.replay_memory = collections.deque(maxlen=max_replay_memory_size)
        rho = 0.5
        self.ps = prioritized_sweeping.PrioritizedSweeping(0, rho)

        optimizer = tf.train.AdamOptimizer(self.mle_learning_rate)
        loss = MaximumLikelihoodEstimation.calc_loss(
            self.policy_model.model_output, self.policy_model.model_output_indices)

        using_grad_clip = True
        grad_clip_val = 5.0
        if not using_grad_clip:
            train_step = optimizer.minimize(loss)
        else:
            gvs = optimizer.compute_gradients(loss)
            capped_gvs = [(tf.clip_by_norm(grad, grad_clip_val), var)
                          if grad is not None else (grad, var) for grad, var in gvs]
            train_step = optimizer.apply_gradients(capped_gvs)

        # Create summaries for training
        summary_loss = tf.scalar_summary("Loss", loss)
        update_summaries = [summary_loss]

        AbstractLearning.__init__(self, policy_model, loss, train_step, update_summaries) 
Example #27
Source File: policy_gradient_with_advantage.py    From blocks with GNU General Public License v3.0 5 votes vote down vote up
def __init__(self, agent, policy_model, state_value_model, total_reward):
        self.agent = agent
        self.policy_model = policy_model
        self.state_value_model = state_value_model
        self.total_reward = total_reward

        # Compute MLE loss function. MLE is used to initialize parameters for reinforce
        self.mle_policy_gradient = MaximumLikelihoodEstimation(agent, policy_model)

        # Compute reinforce loss function
        loss_reinforce, entropy_penalty = self.calc_loss(
            policy_model.model_output, policy_model.model_output_indices, policy_model.target)

        optimizer = tf.train.AdamOptimizer(self.rl_learning_rate)

        using_grad_clip = True
        grad_clip_val = 5.0
        if not using_grad_clip:
            train_step = optimizer.minimize(loss_reinforce)
        else:
            gvs = optimizer.compute_gradients(loss_reinforce)
            capped_gvs = [(tf.clip_by_norm(grad, grad_clip_val), var)
                          if grad is not None else (grad, var) for grad, var in gvs]
            train_step = optimizer.apply_gradients(capped_gvs)

        # Create summaries for training
        summary_loss = tf.scalar_summary("Loss", loss_reinforce)
        summary_target_min = tf.scalar_summary("Target Min", tf.reduce_min(self.policy_model.target))
        summary_target_max = tf.scalar_summary("Target Max", tf.reduce_max(self.policy_model.target))
        summary_target_mean = tf.scalar_summary("Target Mean", tf.reduce_mean(self.policy_model.target))
        summary_entropy_penalty = tf.scalar_summary("Entropy Penalty", entropy_penalty)
        update_summaries = [summary_loss, summary_target_min,
                            summary_target_max, summary_target_mean, summary_entropy_penalty]

        AbstractLearning.__init__(self, policy_model, loss_reinforce, train_step, update_summaries) 
Example #28
Source File: q_learning.py    From blocks with GNU General Public License v3.0 5 votes vote down vote up
def __init__(self, agent, q_network, target_q_network):
        """ Creates constructor for an abstract learning setup """

        self.agent = agent
        self.loss = None
        self.q_network = q_network
        self.target_q_network = target_q_network

        # Define epsilon greedy behaviour policy
        epsilon = 1.0
        min_epsilon = 0.1
        self.behaviour_policy = egp.EpsilonGreedyPolicy(epsilon, min_epsilon)

        # Replay memory and prioritized sweeping for sampling from the replay memory
        max_replay_memory_size = 2000
        self.replay_memory = collections.deque(maxlen=max_replay_memory_size)
        rho = 0.5
        self.ps = prioritized_sweeping.PrioritizedSweeping(0, rho)

        optimizer = tf.train.AdamOptimizer(self.rl_learning_rate)
        loss = self.calc_loss(self.q_network.model_output, self.q_network.model_output_indices, self.q_network.target)

        using_grad_clip = True
        grad_clip_val = 5.0
        if not using_grad_clip:
            train_step = optimizer.minimize(loss)
        else:
            gvs = optimizer.compute_gradients(loss)
            capped_gvs = [(tf.clip_by_norm(grad, grad_clip_val), var)
                          if grad is not None else (grad, var) for grad, var in gvs]
            train_step = optimizer.apply_gradients(capped_gvs)

        # Create summaries for training
        summary_loss = tf.scalar_summary("Loss", loss)
        update_summaries = [summary_loss]

        AbstractLearning.__init__(self, q_network, loss, train_step, update_summaries) 
Example #29
Source File: DE_Model_SA.py    From QA_HRDE_LTC with MIT License 5 votes vote down vote up
def _create_optimizer(self):
        print '[launch] create optimizer'
        
        with tf.name_scope('optimizer') as scope:
            
            opt_func = tf.train.AdamOptimizer(learning_rate=self.lr)
            gvs = opt_func.compute_gradients(self.loss)
            capped_gvs = [(tf.clip_by_norm(t=grad, clip_norm=1), var) for grad, var in gvs]
            self.optimizer = opt_func.apply_gradients(grads_and_vars=capped_gvs, global_step=self.global_step) 
Example #30
Source File: clipping_step.py    From tensorforce with Apache License 2.0 5 votes vote down vote up
def tf_step(self, variables, **kwargs):
        deltas = self.optimizer.step(variables=variables, **kwargs)

        with tf.control_dependencies(control_inputs=deltas):
            threshold = self.threshold.value()
            if self.mode == 'global_norm':
                clipped_deltas, update_norm = tf.clip_by_global_norm(
                    t_list=deltas, clip_norm=threshold
                )
            else:
                update_norm = tf.linalg.global_norm(t_list=deltas)
                clipped_deltas = list()
                for delta in deltas:
                    if self.mode == 'norm':
                        clipped_delta = tf.clip_by_norm(t=delta, clip_norm=threshold)
                    elif self.mode == 'value':
                        clipped_delta = tf.clip_by_value(
                            t=delta, clip_value_min=-threshold, clip_value_max=threshold
                        )
                    clipped_deltas.append(clipped_delta)

            clipped_deltas = self.add_summary(
                label='update-norm', name='update-norm-unclipped', tensor=update_norm,
                pass_tensors=clipped_deltas
            )

            exceeding_deltas = list()
            for delta, clipped_delta in zip(deltas, clipped_deltas):
                exceeding_deltas.append(clipped_delta - delta)

        applied = self.apply_step(variables=variables, deltas=exceeding_deltas)

        with tf.control_dependencies(control_inputs=(applied,)):
            return util.fmap(function=util.identity_operation, xs=clipped_deltas)