Python tensorflow.add_check_numerics_ops() Examples

The following are 12 code examples of tensorflow.add_check_numerics_ops(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module tensorflow , or try the search function .
Example #1
Source File: model.py    From ShuffleNet with Apache License 2.0 6 votes vote down vote up
def __init_output(self):
        with tf.variable_scope('output'):
            # Losses
            self.regularization_loss = tf.reduce_sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
            self.cross_entropy_loss = tf.reduce_mean(
                tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.logits, labels=self.y, name='loss'))
            self.loss = self.regularization_loss + self.cross_entropy_loss

            # Optimizer
            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            with tf.control_dependencies(update_ops):
                self.optimizer = tf.train.AdamOptimizer(learning_rate=self.args.learning_rate)
                self.train_op = self.optimizer.minimize(self.loss)
                # This is for debugging NaNs. Check TensorFlow documentation.
                self.check_op = tf.add_check_numerics_ops()

            # Output and Metrics
            self.y_out_softmax = tf.nn.softmax(self.logits)
            self.y_out_argmax = tf.argmax(self.y_out_softmax, axis=-1, output_type=tf.int32)
            self.accuracy = tf.reduce_mean(tf.cast(tf.equal(self.y, self.y_out_argmax), tf.float32))

        with tf.name_scope('train-summary-per-iteration'):
            tf.summary.scalar('loss', self.loss)
            tf.summary.scalar('acc', self.accuracy)
            self.summaries_merged = tf.summary.merge_all() 
Example #2
Source File: train.py    From g-tensorflow-models with Apache License 2.0 6 votes vote down vote up
def add_check_numerics_ops():
  check_op = []
  for op in tf.get_default_graph().get_operations():
    bad = ["logits/Log", "sample/Reshape", "log_prob/mul",
           "log_prob/SparseSoftmaxCrossEntropyWithLogits/Reshape",
           "entropy/Reshape", "entropy/LogSoftmax", "Categorical", "Mean"]
    if all([x not in op.name for x in bad]):
      for output in op.outputs:
        if output.dtype in [tf.float16, tf.float32, tf.float64]:
          if op._get_control_flow_context() is not None:  # pylint: disable=protected-access
            raise ValueError("`tf.add_check_numerics_ops() is not compatible "
                             "with TensorFlow control flow operations such as "
                             "`tf.cond()` or `tf.while_loop()`.")

          message = op.name + ":" + str(output.value_index)
          with tf.control_dependencies(check_op):
            check_op = [tf.check_numerics(output, message=message)]
  return tf.group(*check_op) 
Example #3
Source File: train.py    From models with Apache License 2.0 6 votes vote down vote up
def add_check_numerics_ops():
  check_op = []
  for op in tf.get_default_graph().get_operations():
    bad = ["logits/Log", "sample/Reshape", "log_prob/mul",
           "log_prob/SparseSoftmaxCrossEntropyWithLogits/Reshape",
           "entropy/Reshape", "entropy/LogSoftmax", "Categorical", "Mean"]
    if all([x not in op.name for x in bad]):
      for output in op.outputs:
        if output.dtype in [tf.float16, tf.float32, tf.float64]:
          if op._get_control_flow_context() is not None:  # pylint: disable=protected-access
            raise ValueError("`tf.add_check_numerics_ops() is not compatible "
                             "with TensorFlow control flow operations such as "
                             "`tf.cond()` or `tf.while_loop()`.")

          message = op.name + ":" + str(output.value_index)
          with tf.control_dependencies(check_op):
            check_op = [tf.check_numerics(output, message=message)]
  return tf.group(*check_op) 
Example #4
Source File: train.py    From multilabel-image-classification-tensorflow with MIT License 6 votes vote down vote up
def add_check_numerics_ops():
  check_op = []
  for op in tf.get_default_graph().get_operations():
    bad = ["logits/Log", "sample/Reshape", "log_prob/mul",
           "log_prob/SparseSoftmaxCrossEntropyWithLogits/Reshape",
           "entropy/Reshape", "entropy/LogSoftmax", "Categorical", "Mean"]
    if all([x not in op.name for x in bad]):
      for output in op.outputs:
        if output.dtype in [tf.float16, tf.float32, tf.float64]:
          if op._get_control_flow_context() is not None:  # pylint: disable=protected-access
            raise ValueError("`tf.add_check_numerics_ops() is not compatible "
                             "with TensorFlow control flow operations such as "
                             "`tf.cond()` or `tf.while_loop()`.")

          message = op.name + ":" + str(output.value_index)
          with tf.control_dependencies(check_op):
            check_op = [tf.check_numerics(output, message=message)]
  return tf.group(*check_op) 
Example #5
Source File: callbacks.py    From keras-fcn with MIT License 5 votes vote down vote up
def set_model(self, model):
        self.model = model
        self.sess = K.get_session()
        self.check_num = tf.add_check_numerics_ops() 
Example #6
Source File: actor.py    From phillip with GNU General Public License v3.0 5 votes vote down vote up
def __init__(self, **kwargs):
    super(Actor, self).__init__(**kwargs)

    with self.graph.as_default(), tf.device(self.device): 
      if self.predict: self._init_model(**kwargs)
      self._init_policy(**kwargs)
      
      # build computation graph
      self.input = ct.inputCType(ssbm.SimpleStateAction, [self.config.memory+1], "input")
      self.input['delayed_action'] = tf.placeholder(tf.int64, [self.config.delay], "delayed_action")
      self.input['hidden'] = util.deepMap(lambda size: tf.placeholder(tf.float32, [size], name="input/hidden"), self.core.hidden_size)

      batch_input = util.deepMap(lambda t: tf.expand_dims(t, 0), self.input)

      states = self.embedGame(batch_input['state'])
      prev_actions = self.embedAction(batch_input['prev_action'])
      combined = tf.concat(axis=-1, values=[states, prev_actions])
      history = tf.unstack(combined, axis=1)
      inputs = tf.concat(axis=-1, values=history)
      core_output, hidden_state = self.core(inputs, batch_input['hidden'])
      actions = self.embedAction(batch_input['delayed_action'])
      
      if self.predict:
        predict_actions = actions[:, :self.model.predict_steps]
        delayed_actions = actions[:, self.model.predict_steps:]
        core_output = self.model.predict(history, core_output, hidden_state, predict_actions, batch_input['state'])
      else:
        delayed_actions = actions
      
      batch_policy = self.policy.getPolicy(core_output, delayed_actions), hidden_state
      self.run_policy = util.deepMap(lambda t: tf.squeeze(t, [0]), batch_policy)

      self.check_op = tf.no_op() if self.dynamic else tf.add_check_numerics_ops()
      
      self._finalize_setup() 
Example #7
Source File: numerics_test.py    From deep_image_model with Apache License 2.0 5 votes vote down vote up
def testInf(self):
    with self.test_session(graph=tf.Graph()):
      t1 = tf.constant(1.0)
      t2 = tf.constant(0.0)
      a = tf.div(t1, t2)
      check = tf.add_check_numerics_ops()
      a = control_flow_ops.with_dependencies([check], a)
      with self.assertRaisesOpError("Inf"):
        a.eval() 
Example #8
Source File: numerics_test.py    From deep_image_model with Apache License 2.0 5 votes vote down vote up
def testNaN(self):
    with self.test_session(graph=tf.Graph()):
      t1 = tf.constant(0.0)
      t2 = tf.constant(0.0)
      a = tf.div(t1, t2)
      check = tf.add_check_numerics_ops()
      a = control_flow_ops.with_dependencies([check], a)
      with self.assertRaisesOpError("NaN"):
        a.eval() 
Example #9
Source File: numerics_test.py    From deep_image_model with Apache License 2.0 5 votes vote down vote up
def testBoth(self):
    with self.test_session(graph=tf.Graph()):
      t1 = tf.constant([1.0, 0.0])
      t2 = tf.constant([0.0, 0.0])
      a = tf.div(t1, t2)
      check = tf.add_check_numerics_ops()
      a = control_flow_ops.with_dependencies([check], a)
      with self.assertRaisesOpError("Inf and NaN"):
        a.eval() 
Example #10
Source File: joint_model.py    From elbow with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def train(self, adam_rate=0.1, stopping_rule=None, steps=None,
              avg_decay=None, debug=False, print_s=1):
        elbo, elp, entropy = self.construct_elbo(return_all=True)


        if stopping_rule is None:
            if steps is not None:
                stopping_rule = StepCountStopper(step_count=steps)
            elif avg_decay is not None:
                stopping_rule = MovingAverageStopper(decay=avg_decay)
            else:
                stopping_rule = MovingAverageStopper()
        try:
            train_step = tf.train.AdamOptimizer(adam_rate).minimize(-elbo)
        except ValueError as e:
            print e
            return
            
        if debug:
            debug_ops = tf.add_check_numerics_ops()

        session = self.get_session(do_init=False)

        init = tf.global_variables_initializer()
        session.run(init)
        
        elbo_val = None
        running_elbo = 0
        i = 0
        t = -np.inf
        stopping_rule.reset()
        while not stopping_rule.observe(elbo_val):
            if debug:
                session.run(debug_ops)

            fd = self.feed_dict()
                
            session.run(train_step, feed_dict=fd)

            elbo_val, elp_val, entropy_val = session.run((elbo, elp, entropy), feed_dict=fd)
            if print_s is not None and (time.time() - t) > print_s:
                print "step %d elp %.2f entropy %.2f elbo %.2f" % (i, elp_val, entropy_val, elbo_val)
                t = time.time()
                
            i += 1 
Example #11
Source File: model_template.py    From ReSAN with Apache License 2.0 4 votes vote down vote up
def update_tensor_add_ema_and_opt(self):
        self.logits, (self.s1_act, self.s1_logpa), (self.s2_act, self.s2_logpa), \
            (self.s1_percentage, self.s2_percentage) = self.build_network()
        self.loss_sl, self.loss_rl = self.build_loss()
        self.accuracy = self.build_accuracy()

        # ------------ema-------------
        if True:
            self.var_ema = tf.train.ExponentialMovingAverage(cfg.var_decay)
            self.build_var_ema()

        if cfg.mode == 'train':
            self.ema = tf.train.ExponentialMovingAverage(cfg.decay)
            self.build_ema()
        self.summary = tf.summary.merge_all()

        # ---------- optimization ---------
        if cfg.optimizer.lower() == 'adadelta':
            assert cfg.learning_rate > 0.1 and cfg.learning_rate < 1.
            self.opt_sl = tf.train.AdadeltaOptimizer(cfg.learning_rate)
            self.opt_rl = tf.train.AdadeltaOptimizer(cfg.learning_rate)
        elif cfg.optimizer.lower() == 'adam':
            assert cfg.learning_rate < 0.1
            self.opt_sl = tf.train.AdamOptimizer(cfg.learning_rate)
            self.opt_rl = tf.train.AdamOptimizer(cfg.learning_rate)
        elif cfg.optimizer.lower() == 'rmsprop':
            assert cfg.learning_rate < 0.1
            self.opt_sl = tf.train.RMSPropOptimizer(cfg.learning_rate)
            self.opt_rl = tf.train.RMSPropOptimizer(cfg.learning_rate)
        else:
            raise AttributeError('no optimizer named as \'%s\'' % cfg.optimizer)

        trainable_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, self.scope)
        # trainable param num:
        # print params num
        all_params_num = 0
        for elem in trainable_vars:
            # elem.name
            var_name = elem.name.split(':')[0]
            if var_name.endswith('emb_mat'):
                continue
            params_num = 1
            for l in elem.get_shape().as_list(): params_num *= l
            all_params_num += params_num
        _logger.add('Trainable Parameters Number: %d' % all_params_num)

        sl_vars = [var for var in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, self.scope)
                      if not var.op.name.startswith(self.scope+'/hard_network')]
        self.train_op_sl = self.opt_sl.minimize(
            self.loss_sl, self.global_step,
            var_list=sl_vars)

        rl_vars = [var for var in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, self.scope)
                      if var.op.name.startswith(self.scope + '/hard_network')]
        if len(rl_vars) > 0:
            self.train_op_rl = self.opt_rl.minimize(
                self.loss_rl,
                var_list=rl_vars)
        else:
            self.train_op_rl = None
        # self.check_op = tf.add_check_numerics_ops() 
Example #12
Source File: Trainer.py    From PReMVOS with MIT License 4 votes vote down vote up
def __init__(self, config, train_network, test_network, global_step, session):
    self.profile = config.bool("profile", False)
    self.add_grad_checks = config.bool("add_grad_checks", False)
    self.add_numerical_checks = config.bool("add_numerical_checks", False)
    self.measures = config.unicode_list("measures", [])
    self.opt_str = config.str("optimizer", "adam").lower()
    self.train_network = train_network
    self.test_network = test_network
    self.session = session
    self.global_step = global_step
    self.validation_step_number = 0
    self.gradient_clipping = config.float("gradient_clipping", -1.0)
    self.optimizer_exclude_prefix = config.str("optimizer_exclude_prefix", "")
    self.learning_rates = config.int_key_dict("learning_rates")
    self.recursive_training = config.bool(Constants.RECURSIVE_TRAINING, False)
    assert 1 in self.learning_rates, "no initial learning rate specified"
    self.curr_learning_rate = self.learning_rates[1]
    self.lr_var = tf.placeholder(config.dtype, shape=[], name="learning_rate")
    self.loss_scale_var = tf.placeholder_with_default(1.0, shape=[], name="loss_scale")
    self.opt, self.reset_opt_op = self.create_optimizer(config)
    grad_norm = None
    if train_network is not None:
      if train_network.use_partialflow:
        self.prepare_partialflow()
        self.step_op = tf.no_op("step")
      else:
        self.step_op, grad_norm = self.create_step_op()
      if len(self.train_network.update_ops) == 0:
        self.update_ops = []
      else:
        self.update_ops = self.train_network.update_ops
      if self.add_numerical_checks:
        self.update_ops.append(tf.add_check_numerics_ops())
      self.train_targets = self.train_network.raw_labels
      self.train_inputs = self.train_network.inputs
      self.train_network_ys = self.train_network.y_softmax
      if self.train_network_ys is not None and self.train_targets is not None:
        self.train_network_ys = self._adjust_results_to_targets(self.train_network_ys, self.train_targets)
    else:
      self.step_op = None
      self.update_ops = None
    self.summary_writer, self.summary_op, self.summary_op_test = self.init_summaries(config, grad_norm)

    if test_network is not None:
      self.test_targets = self.test_network.raw_labels
      self.test_inputs = self.test_network.inputs
      self.test_network_ys = self.test_network.y_softmax
      if self.test_network_ys is not None and self.test_targets is not None:
        self.test_network_ys = self._adjust_results_to_targets(self.test_network_ys, self.test_targets)