Python tensorflow.float16() Examples

The following are 30 code examples of tensorflow.float16(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module tensorflow , or try the search function .
Example #1
Source File: sg_optimize.py    From sugartensor with MIT License 6 votes vote down vote up
def _apply_dense(self, grad, var):
        lr_t = tf.cast(self._lr_t, var.dtype.base_dtype)
        beta1_t = tf.cast(self._beta1_t, var.dtype.base_dtype)
        beta2_t = tf.cast(self._beta2_t, var.dtype.base_dtype)
        if var.dtype.base_dtype == tf.float16:
            eps = 1e-7  # Can't use 1e-8 due to underflow -- not sure if it makes a big difference.
        else:
            eps = 1e-8

        v = self.get_slot(var, "v")
        v_t = v.assign(beta1_t * v + (1. - beta1_t) * grad)
        m = self.get_slot(var, "m")
        m_t = m.assign(tf.maximum(beta2_t * m + eps, tf.abs(grad)))
        g_t = v_t / m_t

        var_update = tf.assign_sub(var, lr_t * g_t)
        return tf.group(*[var_update, m_t, v_t]) 
Example #2
Source File: model.py    From tensorflow_nlp with Apache License 2.0 6 votes vote down vote up
def create_model(forward_only, args):
    """建立模型"""
    buckets = data_utils.buckets

    dtype = tf.float16 if args.use_fp16 else tf.float32
    model = S2SModel(
        data_utils.dim,
        data_utils.dim,
        buckets,
        args.size,
        args.dropout,
        args.num_layers,
        args.max_gradient_norm,
        args.batch_size,
        args.learning_rate,
        args.num_samples,
        forward_only,
        dtype
    )
    return model 
Example #3
Source File: py_func_batch_env.py    From fine-lm with MIT License 6 votes vote down vote up
def simulate(self, action):
    """Step the batch of environments.

    The results of the step can be accessed from the variables defined below.

    Args:
      action: Tensor holding the batch of actions to apply.

    Returns:
      Operation.
    """
    with tf.name_scope('environment/simulate'):
      if action.dtype in (tf.float16, tf.float32, tf.float64):
        action = tf.check_numerics(action, 'action')
      observ_dtype = utils.parse_dtype(self._batch_env.observation_space)
      observ, reward, done = tf.py_func(
          lambda a: self._batch_env.step(a)[:3], [action],
          [observ_dtype, tf.float32, tf.bool], name='step')
      observ = tf.check_numerics(observ, 'observ')
      reward = tf.check_numerics(reward, 'reward')
      reward.set_shape((len(self),))
      done.set_shape((len(self),))
      with tf.control_dependencies([self._observ.assign(observ)]):
        return tf.identity(reward), tf.identity(done) 
Example #4
Source File: adamax.py    From iaf with MIT License 6 votes vote down vote up
def _apply_dense(self, grad, var):
        lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype)
        beta1_t = math_ops.cast(self._beta1_t, var.dtype.base_dtype)
        beta2_t = math_ops.cast(self._beta2_t, var.dtype.base_dtype)
        if var.dtype.base_dtype == tf.float16:
            eps = 1e-7  # Can't use 1e-8 due to underflow -- not sure if it makes a big difference.
        else:
            eps = 1e-8

        v = self.get_slot(var, "v")
        v_t = v.assign(beta1_t * v + (1. - beta1_t) * grad)
        m = self.get_slot(var, "m")
        m_t = m.assign(tf.maximum(beta2_t * m + eps, tf.abs(grad)))
        g_t = v_t / m_t

        var_update = state_ops.assign_sub(var, lr_t * g_t)
        return control_flow_ops.group(*[var_update, m_t, v_t]) 
Example #5
Source File: dataset_factory.py    From FastNN with BSD 2-Clause "Simplified" License 6 votes vote down vote up
def get_dataset_iterator(dataset_name, train_image_size, preprocessing_fn=None, data_sources=None, reader=None):
  with tf.device("/cpu:0"):
    if not dataset_name:
      raise ValueError('expect dataset_name not None.')
    if dataset_name == 'mock':
      return dataset_utils._create_mock_iterator(train_image_size)
    if dataset_name not in datasets_map:
      raise ValueError('Name of network unknown %s' % dataset_name)

    def parse_fn(example):
      with tf.device("/cpu:0"):
        image, label = datasets_map[dataset_name].parse_fn(example)
        if preprocessing_fn is not None:
          image = preprocessing_fn(image, train_image_size, train_image_size)
        if FLAGS.use_fp16:
          image = tf.cast(image, tf.float16)
        label -= FLAGS.labels_offset
        return image, label
    return dataset_utils._create_dataset_iterator(data_sources, parse_fn, reader) 
Example #6
Source File: in_graph_batch_env.py    From soccer-matlab with BSD 2-Clause "Simplified" License 6 votes vote down vote up
def simulate(self, action):
    """Step the batch of environments.

    The results of the step can be accessed from the variables defined below.

    Args:
      action: Tensor holding the batch of actions to apply.

    Returns:
      Operation.
    """
    with tf.name_scope('environment/simulate'):
      if action.dtype in (tf.float16, tf.float32, tf.float64):
        action = tf.check_numerics(action, 'action')
      observ_dtype = self._parse_dtype(self._batch_env.observation_space)
      observ, reward, done = tf.py_func(
          lambda a: self._batch_env.step(a)[:3], [action],
          [observ_dtype, tf.float32, tf.bool], name='step')
      observ = tf.check_numerics(observ, 'observ')
      reward = tf.check_numerics(reward, 'reward')
      return tf.group(
          self._observ.assign(observ),
          self._action.assign(action),
          self._reward.assign(reward),
          self._done.assign(done)) 
Example #7
Source File: cifar10.py    From DOTA_models with Apache License 2.0 6 votes vote down vote up
def _variable_with_weight_decay(name, shape, stddev, wd):
  """Helper to create an initialized Variable with weight decay.

  Note that the Variable is initialized with a truncated normal distribution.
  A weight decay is added only if one is specified.

  Args:
    name: name of the variable
    shape: list of ints
    stddev: standard deviation of a truncated Gaussian
    wd: add L2Loss weight decay multiplied by this float. If None, weight
        decay is not added for this Variable.

  Returns:
    Variable Tensor
  """
  dtype = tf.float16 if FLAGS.use_fp16 else tf.float32
  var = _variable_on_cpu(
      name,
      shape,
      tf.truncated_normal_initializer(stddev=stddev, dtype=dtype))
  if wd is not None:
    weight_decay = tf.multiply(tf.nn.l2_loss(var), wd, name='weight_loss')
    tf.add_to_collection('losses', weight_decay)
  return var 
Example #8
Source File: diet.py    From fine-lm with MIT License 6 votes vote down vote up
def _quantize(x, params, randomize=True):
  """Quantize x according to params, optionally randomizing the rounding."""
  if not params.quantize:
    return x

  if not randomize:
    return tf.bitcast(
        tf.cast(x / params.quantization_scale, tf.int16), tf.float16)

  abs_x = tf.abs(x)
  sign_x = tf.sign(x)
  y = abs_x / params.quantization_scale
  y = tf.floor(y + tf.random_uniform(common_layers.shape_list(x)))
  y = tf.minimum(y, tf.int16.max) * sign_x
  q = tf.bitcast(tf.cast(y, tf.int16), tf.float16)
  return q 
Example #9
Source File: cifar10.py    From DOTA_models with Apache License 2.0 6 votes vote down vote up
def inputs(eval_data):
  """Construct input for CIFAR evaluation using the Reader ops.

  Args:
    eval_data: bool, indicating if one should use the train or eval data set.

  Returns:
    images: Images. 4D tensor of [batch_size, IMAGE_SIZE, IMAGE_SIZE, 3] size.
    labels: Labels. 1D tensor of [batch_size] size.

  Raises:
    ValueError: If no data_dir
  """
  if not FLAGS.data_dir:
    raise ValueError('Please supply a data_dir')
  data_dir = os.path.join(FLAGS.data_dir, 'cifar-10-batches-bin')
  images, labels = cifar10_input.inputs(eval_data=eval_data,
                                        data_dir=data_dir,
                                        batch_size=FLAGS.batch_size)
  if FLAGS.use_fp16:
    images = tf.cast(images, tf.float16)
    labels = tf.cast(labels, tf.float16)
  return images, labels 
Example #10
Source File: in_graph_batch_env.py    From soccer-matlab with BSD 2-Clause "Simplified" License 6 votes vote down vote up
def simulate(self, action):
    """Step the batch of environments.

    The results of the step can be accessed from the variables defined below.

    Args:
      action: Tensor holding the batch of actions to apply.

    Returns:
      Operation.
    """
    with tf.name_scope('environment/simulate'):
      if action.dtype in (tf.float16, tf.float32, tf.float64):
        action = tf.check_numerics(action, 'action')
      observ_dtype = self._parse_dtype(self._batch_env.observation_space)
      observ, reward, done = tf.py_func(
          lambda a: self._batch_env.step(a)[:3], [action],
          [observ_dtype, tf.float32, tf.bool], name='step')
      observ = tf.check_numerics(observ, 'observ')
      reward = tf.check_numerics(reward, 'reward')
      return tf.group(
          self._observ.assign(observ),
          self._action.assign(action),
          self._reward.assign(reward),
          self._done.assign(done)) 
Example #11
Source File: resnet-multigpu.py    From benchmarks with The Unlicense 6 votes vote down vote up
def _get_logits(self, image):
        ctx = get_current_tower_context()
        with maybe_freeze_updates(ctx.index > 0):
            network = ConvNetBuilder(
                image, 3, True,
                use_tf_layers=True,
                data_format=self.data_format,
                dtype=tf.float16 if args.use_fp16 else tf.float32,
                variable_dtype=tf.float32)
            with custom_getter_scope(network.get_custom_getter()):
                dataset = lambda: 1
                dataset.name = 'imagenet'
                model_conf = model_config.get_model_config('resnet50', dataset)
                model_conf.set_batch_size(args.batch)
                model_conf.add_inference(network)
                return network.affine(1000, activation='linear', stddev=0.001) 
Example #12
Source File: cifar10_reusable.py    From blackbox-attacks with MIT License 6 votes vote down vote up
def _variable_on_cpu(name, shape, initializer):
  """Helper to create a Variable stored on CPU memory.

  Args:
    name: name of the variable
    shape: list of ints
    initializer: initializer for Variable

  Returns:
    Variable Tensor
  """
  key = (tf.get_variable_scope().name, name)
  if key in shared_variables:
    return shared_variables[key]
  # with tf.device('/cpu:0'):
  dtype = tf.float16 if FLAGS.use_fp16 else tf.float32
  var = tf.get_variable(name, shape, initializer=initializer, dtype=dtype)
  shared_variables[key] = var
  return var 
Example #13
Source File: cifar10_reusable.py    From blackbox-attacks with MIT License 6 votes vote down vote up
def _variable_with_weight_decay(name, shape, stddev, wd):
  """Helper to create an initialized Variable with weight decay.

  Note that the Variable is initialized with a truncated normal distribution.
  A weight decay is added only if one is specified.

  Args:
    name: name of the variable
    shape: list of ints
    stddev: standard deviation of a truncated Gaussian
    wd: add L2Loss weight decay multiplied by this float. If None, weight
        decay is not added for this Variable.

  Returns:
    Variable Tensor
  """
  dtype = tf.float16 if FLAGS.use_fp16 else tf.float32
  var = _variable_on_cpu(
      name,
      shape,
      tf.truncated_normal_initializer(stddev=stddev, dtype=dtype))
  if wd is not None:
    weight_decay = tf.multiply(tf.nn.l2_loss(var), wd, name='weight_loss')
    tf.add_to_collection('losses', weight_decay)
  return var 
Example #14
Source File: cifar10_reusable.py    From blackbox-attacks with MIT License 6 votes vote down vote up
def inputs(eval_data):
  """Construct input for CIFAR evaluation using the Reader ops.

  Args:
    eval_data: bool, indicating if one should use the train or eval data set.

  Returns:
    images: Images. 4D tensor of [batch_size, IMAGE_SIZE, IMAGE_SIZE, 3] size.
    labels: Labels. 1D tensor of [batch_size] size.

  Raises:
    ValueError: If no data_dir
  """
  if not FLAGS.data_dir:
    raise ValueError('Please supply a data_dir')
  data_dir = os.path.join(FLAGS.data_dir, 'cifar-10-batches-bin')
  images, labels = cifar10_input_nostd.inputs(eval_data=eval_data,
                                              data_dir=data_dir,
                                              batch_size=FLAGS.batch_size)
  if FLAGS.use_fp16:
    images = tf.cast(images, tf.float16)
    labels = tf.cast(labels, tf.float16)
  return images, labels 
Example #15
Source File: optimizers.py    From zhusuan with MIT License 6 votes vote down vote up
def _apply_dense(self, grad, var):
        lr_t = tf.cast(self._lr_t, var.dtype.base_dtype)
        beta1_t = tf.cast(self._beta1_t, var.dtype.base_dtype)
        beta2_t = tf.cast(self._beta2_t, var.dtype.base_dtype)
        if var.dtype.base_dtype == tf.float16:
            # Can't use 1e-8 due to underflow
            eps = 1e-7
        else:
            eps = 1e-8

        v = self.get_slot(var, "v")
        v_t = v.assign(beta1_t * v + (1. - beta1_t) * grad)
        m = self.get_slot(var, "m")
        m_t = m.assign(tf.maximum(beta2_t * m + eps, tf.abs(grad)))
        g_t = v_t / m_t

        var_update = tf.assign_sub(var, lr_t * g_t)
        return tf.group(*[var_update, m_t, v_t]) 
Example #16
Source File: diet.py    From BERT with Apache License 2.0 6 votes vote down vote up
def _quantize(x, params, randomize=True):
  """Quantize x according to params, optionally randomizing the rounding."""
  if not params.quantize:
    return x

  if not randomize:
    return tf.bitcast(
        tf.cast(x / params.quantization_scale, tf.int16), tf.float16)

  abs_x = tf.abs(x)
  sign_x = tf.sign(x)
  y = abs_x / params.quantization_scale
  y = tf.floor(y + tf.random_uniform(common_layers.shape_list(x)))
  y = tf.minimum(y, tf.int16.max) * sign_x
  q = tf.bitcast(tf.cast(y, tf.int16), tf.float16)
  return q 
Example #17
Source File: translate.py    From DOTA_models with Apache License 2.0 6 votes vote down vote up
def create_model(session, forward_only):
  """Create translation model and initialize or load parameters in session."""
  dtype = tf.float16 if FLAGS.use_fp16 else tf.float32
  model = seq2seq_model.Seq2SeqModel(
      FLAGS.from_vocab_size,
      FLAGS.to_vocab_size,
      _buckets,
      FLAGS.size,
      FLAGS.num_layers,
      FLAGS.max_gradient_norm,
      FLAGS.batch_size,
      FLAGS.learning_rate,
      FLAGS.learning_rate_decay_factor,
      forward_only=forward_only,
      dtype=dtype)
  ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir)
  if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path):
    print("Reading model parameters from %s" % ckpt.model_checkpoint_path)
    model.saver.restore(session, ckpt.model_checkpoint_path)
  else:
    print("Created model with fresh parameters.")
    session.run(tf.global_variables_initializer())
  return model 
Example #18
Source File: models.py    From aletheia with MIT License 6 votes vote down vote up
def _apply_dense(self, grad, var):
        lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype)
        beta1_t = math_ops.cast(self._beta1_t, var.dtype.base_dtype)
        beta2_t = math_ops.cast(self._beta2_t, var.dtype.base_dtype)
        if var.dtype.base_dtype == tf.float16:
            eps = 1e-7  # Can't use 1e-8 due to underflow -- not sure if it makes a big difference.
        else:
            eps = 1e-8

        v = self.get_slot(var, "v")
        v_t = v.assign(beta1_t * v + (1. - beta1_t) * grad)
        m = self.get_slot(var, "m")
        m_t = m.assign(tf.maximum(beta2_t * m + eps, tf.abs(grad)))
        g_t = v_t / m_t

        var_update = state_ops.assign_sub(var, lr_t * g_t)
        return control_flow_ops.group(*[var_update, m_t, v_t]) 
Example #19
Source File: adamirror.py    From HyperGAN with MIT License 6 votes vote down vote up
def _apply_dense(self, grad, var):
    lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype)
    beta1_t = math_ops.cast(self._beta1_t, var.dtype.base_dtype)
    beta2_t = math_ops.cast(self._beta2_t, var.dtype.base_dtype)
    if var.dtype.base_dtype == tf.float16:
        eps = 1e-7  # Can't use 1e-8 due to underflow -- not sure if it makes a big difference.
    else:
        eps = 1e-8

    v = self.get_slot(var, "v")
    v_t = v.assign(beta2_t * v + (1. - beta2_t) * tf.square(grad))
    m = self.get_slot(var, "m")
    m_t = m.assign( beta1_t * m + (1. - beta1_t) * grad )
    v_t_hat = tf.div(v_t, 1. - beta2_t)
    m_t_hat = tf.div(m_t, 1. - beta1_t)
    
    g_t = tf.div( m_t, tf.sqrt(v_t)+eps )
    g_t_1 = self.get_slot(var, "g")
    g_t = g_t_1.assign( g_t )

    var_update = state_ops.assign_sub(var, 2. * lr_t * g_t - lr_t * g_t_1) #Adam would be lr_t * g_t
    return control_flow_ops.group(*[var_update, m_t, v_t, g_t]) 
Example #20
Source File: cifar10_reusable.py    From blackbox-attacks with MIT License 6 votes vote down vote up
def distorted_inputs():
  """Construct distorted input for CIFAR training using the Reader ops.

  Returns:
    images: Images. 4D tensor of [batch_size, IMAGE_SIZE, IMAGE_SIZE, 3] size.
    labels: Labels. 1D tensor of [batch_size] size.

  Raises:
    ValueError: If no data_dir
  """
  if not FLAGS.data_dir:
    raise ValueError('Please supply a data_dir')
  data_dir = os.path.join(FLAGS.data_dir, 'cifar-10-batches-bin')
  images, labels = cifar10_input_nostd.distorted_inputs(data_dir=data_dir,
                                                        batch_size=FLAGS.batch_size)
  if FLAGS.use_fp16:
    images = tf.cast(images, tf.float16)
    labels = tf.cast(labels, tf.float16)
  return images, labels 
Example #21
Source File: tensor.py    From dgl with Apache License 2.0 5 votes vote down vote up
def data_type_dict():
    return {'float16': tf.float16,
            'float32': tf.float32,
            'float64': tf.float64,
            'uint8': tf.uint8,
            'int8': tf.int8,
            'int16': tf.int16,
            'int32': tf.int32,
            'int64': tf.int64,
            'bool' : tf.bool} 
Example #22
Source File: optimize.py    From BERT with Apache License 2.0 5 votes vote down vote up
def _mixed_precision_is_enabled(hparams):
  """Should be the same as in common_attention, avoiding import."""
  activation_dtype = hparams.activation_dtype
  weight_dtype = hparams.weight_dtype
  return activation_dtype == tf.float16 and weight_dtype == tf.float32 
Example #23
Source File: test_node.py    From onnx-tensorflow with Apache License 2.0 5 votes vote down vote up
def test_cast(self):
    if legacy_onnx_pre_ver(1, 2) or legacy_opset_pre_ver(6):
      test_cases = [("FLOAT", tf.float32), ("UINT8", tf.uint8),
                    ("INT8", tf.int8),
                    ("UINT16", tf.uint16), ("INT16", tf.int16),
                    ("INT32", tf.int32), ("INT64", tf.int64), ("BOOL", tf.bool),
                    ("FLOAT16", tf.float16), ("DOUBLE", tf.float64),
                    ("COMPLEX64", tf.complex64), ("COMPLEX128", tf.complex128)]
    else:
      test_cases = [(TensorProto.FLOAT, tf.float32),
                    (TensorProto.UINT8, tf.uint8), (TensorProto.INT8, tf.int8),
                    (TensorProto.UINT16, tf.uint16),
                    (TensorProto.INT16, tf.int16),
                    (TensorProto.INT32, tf.int32),
                    (TensorProto.INT64, tf.int64), (TensorProto.BOOL, tf.bool),
                    (TensorProto.FLOAT16, tf.float16),
                    (TensorProto.DOUBLE, tf.float64),
                    (TensorProto.COMPLEX64, tf.complex64),
                    (TensorProto.COMPLEX128, tf.complex128)]
      if not legacy_opset_pre_ver(9):
        test_cases.append((TensorProto.STRING, tf.string))
    for ty, tf_type in test_cases:
      node_def = helper.make_node("Cast", ["input"], ["output"], to=ty)
      vector = [2, 3]
      output = run_node(node_def, [vector])
      np.testing.assert_equal(output["output"].dtype, tf_type)

    if not legacy_opset_pre_ver(9):
      test_cases2 = [(TensorProto.FLOAT, tf.float32),
                     (TensorProto.INT32, tf.int32),
                     (TensorProto.INT64, tf.int64),
                     (TensorProto.DOUBLE, tf.float64)]
      for ty, tf_type in test_cases2:
        node_def = helper.make_node("Cast", ["input"], ["output"], to=ty)
        vector = ['2', '3']
        output = run_node(node_def, [vector])
        np.testing.assert_equal(output["output"].dtype, tf_type) 
Example #24
Source File: metrics_test.py    From larq with Apache License 2.0 5 votes vote down vote up
def test_config():
    mcv = metrics.FlipRatio(values_dtype="int16", name="mcv", dtype=tf.float16)
    assert mcv.name == "mcv"
    assert mcv.stateful
    assert mcv.dtype == tf.float16
    assert mcv.values_dtype == tf.int16

    mcv2 = metrics.FlipRatio.from_config(mcv.get_config())
    assert mcv2.name == "mcv"
    assert mcv2.stateful
    assert mcv2.dtype == tf.float16
    assert mcv2.values_dtype == tf.int16 
Example #25
Source File: model.py    From nucleus7 with Mozilla Public License 2.0 5 votes vote down vote up
def _cast_plugin_inputs_for_mixed_precision(plugin: ModelPlugin,
                                            nucleotide_inputs: dict):
    if not plugin.allow_mixed_precision:
        cast_dtypes = {tf.float16: tf.float32}
    else:
        cast_dtypes = {tf.float32: tf.float16}
    nucleotide_inputs_casted = tf_ops.maybe_cast_dtype(
        nucleotide_inputs, cast_dtypes)
    return nucleotide_inputs_casted


# pylint: disable=unused-argument
# nucleotide is needed here for further use as an factory with this signature 
Example #26
Source File: ops.py    From HyperGAN with MIT License 5 votes vote down vote up
def parse_dtype(self, dtype):
        if dtype == 'float32':
            return tf.float32
        elif dtype == 'float16':
            return tf.float16
        else:
            raise Exception("dtype not defined: "+str(dtype)) 
Example #27
Source File: diet.py    From BERT with Apache License 2.0 5 votes vote down vote up
def make_diet_var_getter(params):
  """Create a custom variable getter for diet variables according to params."""

  def diet_var_initializer(shape, dtype, partition_info=None):
    """Initializer for a diet variable."""
    del dtype
    del partition_info

    with common_layers.fn_device_dependency("diet_init") as out_deps:
      float_range = math.sqrt(3)
      ret = tf.random_uniform(shape, -float_range, float_range)
      if params.quantize:
        ret = _quantize(ret, params, randomize=False)
      out_deps.append(ret)
      return ret

  def diet_var_getter(getter, **kwargs):
    """Get diet variable and return it dequantized."""
    if params.quantize:
      kwargs["dtype"] = tf.float16
    kwargs["initializer"] = diet_var_initializer
    kwargs["trainable"] = False

    base_var = getter(**kwargs)

    dequantized = _dequantize(base_var, params)

    if not hasattr(params, "dequantized"):
      params.dequantized = defaultdict(list)
    params.dequantized[base_var.name].append(dequantized)

    return dequantized

  return diet_var_getter 
Example #28
Source File: ops.py    From HyperGAN with MIT License 5 votes vote down vote up
def parse_dtype(self, dtype):
        if type(dtype) == Function:
            return dtype
        if dtype == 'float32':
            return tf.float32
        elif dtype == 'float16':
            return tf.float16
        else:
            raise Exception("dtype not defined: "+str(dtype)) 
Example #29
Source File: quantization.py    From BERT with Apache License 2.0 5 votes vote down vote up
def float16_activations_var_getter(getter, *args, **kwargs):
  """A custom getter function for float32 parameters and float16 activations.

  This function ensures the following:
    1. All variables requested with type fp16 are stored as type fp32.
    2. All variables requested with type fp32 are returned as type fp16.
  See https://docs.nvidia.com/deeplearning/sdk/mixed-precision-training/
  #training_tensorflow for more information on this strategy.

  Args:
    getter: custom getter
    *args: arguments
    **kwargs: keyword arguments

  Returns:
    variables with the correct dtype.

  Raises:
    KeyError: if "dtype" is not provided as a kwarg.
  """
  requested_dtype = kwargs["dtype"]

  if requested_dtype == tf.float16:
    kwargs["dtype"] = tf.float32

  if requested_dtype == tf.float32:
    requested_dtype = tf.float16
  var = getter(*args, **kwargs)
  # This if statement is needed to guard the cast, because batch norm
  # assigns directly to the return value of this custom getter. The cast
  # makes the return value not a variable so it cannot be assigned. Batch
  # norm variables are always in fp32 so this if statement is never
  # triggered for them.
  if var.dtype.base_dtype != requested_dtype:
    var = tf.cast(var, requested_dtype)
  return var 
Example #30
Source File: tf_util.py    From ASIS with MIT License 5 votes vote down vote up
def _variable_on_cpu(name, shape, initializer, use_fp16=False, trainable=True):
  """Helper to create a Variable stored on CPU memory.
  Args:
    name: name of the variable
    shape: list of ints
    initializer: initializer for Variable
  Returns:
    Variable Tensor
  """
  with tf.device('/cpu:0'):
    dtype = tf.float16 if use_fp16 else tf.float32
    var = tf.get_variable(name, shape, initializer=initializer, dtype=dtype, trainable=trainable)
  return var