Python tensorflow.cast() Examples

The following are 30 code examples of tensorflow.cast(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module tensorflow , or try the search function .
Example #1
Source File: loss.py    From neural-fingerprinting with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def fprop(self, x, y, **kwargs):
        if self.attack is not None:
            x = x, self.attack(x)
        else:
            x = x,

        # Catching RuntimeError: Variable -= value not supported by tf.eager.
        try:
            y -= self.smoothing * (y - 1. / tf.cast(y.shape[-1], tf.float32))
        except RuntimeError:
            y.assign_sub(self.smoothing * (y - 1. / tf.cast(y.shape[-1],
                                                            tf.float32)))

        logits = [self.model.get_logits(x, **kwargs) for x in x]
        loss = sum(
            softmax_cross_entropy_with_logits(labels=y,
                                              logits=logit)
            for logit in logits)
        warnings.warn("LossCrossEntropy is deprecated, switch to "
                      "CrossEntropy. LossCrossEntropy may be removed on "
                      "or after 2019-03-06.")
        return loss 
Example #2
Source File: ops.py    From DOTA_models with Apache License 2.0 6 votes vote down vote up
def one_hot_encoding(labels, num_classes, scope=None):
  """Transform numeric labels into onehot_labels.

  Args:
    labels: [batch_size] target labels.
    num_classes: total number of classes.
    scope: Optional scope for name_scope.
  Returns:
    one hot encoding of the labels.
  """
  with tf.name_scope(scope, 'OneHotEncoding', [labels]):
    batch_size = labels.get_shape()[0]
    indices = tf.expand_dims(tf.range(0, batch_size), 1)
    labels = tf.cast(tf.expand_dims(labels, 1), indices.dtype)
    concated = tf.concat(axis=1, values=[indices, labels])
    onehot_labels = tf.sparse_to_dense(
        concated, tf.stack([batch_size, num_classes]), 1.0, 0.0)
    onehot_labels.set_shape([batch_size, num_classes])
    return onehot_labels 
Example #3
Source File: tf_example_decoder.py    From DOTA_models with Apache License 2.0 6 votes vote down vote up
def _reshape_instance_masks(self, keys_to_tensors):
    """Reshape instance segmentation masks.

    The instance segmentation masks are reshaped to [num_instances, height,
    width] and cast to boolean type to save memory.

    Args:
      keys_to_tensors: a dictionary from keys to tensors.

    Returns:
      A 3-D boolean tensor of shape [num_instances, height, width].
    """
    masks = keys_to_tensors['image/segmentation/object']
    if isinstance(masks, tf.SparseTensor):
      masks = tf.sparse_tensor_to_dense(masks)
    height = keys_to_tensors['image/height']
    width = keys_to_tensors['image/width']
    to_shape = tf.cast(tf.stack([-1, height, width]), tf.int32)

    return tf.cast(tf.reshape(masks, to_shape), tf.bool) 
Example #4
Source File: tfutil.py    From disentangling_conditional_gans with MIT License 6 votes vote down vote up
def _create_autosummary_var(name, value_expr):
    assert not _autosummary_finalized
    v = tf.cast(value_expr, tf.float32)
    if v.shape.ndims is 0:
        v = [v, np.float32(1.0)]
    elif v.shape.ndims is 1:
        v = [tf.reduce_sum(v), tf.cast(tf.shape(v)[0], tf.float32)]
    else:
        v = [tf.reduce_sum(v), tf.reduce_prod(tf.cast(tf.shape(v), tf.float32))]
    v = tf.cond(tf.is_finite(v[0]), lambda: tf.stack(v), lambda: tf.zeros(2))
    with tf.control_dependencies(None):
        var = tf.Variable(tf.zeros(2)) # [numerator, denominator]
    update_op = tf.cond(tf.is_variable_initialized(var), lambda: tf.assign_add(var, v), lambda: tf.assign(var, v))
    if name in _autosummary_vars:
        _autosummary_vars[name].append(var)
    else:
        _autosummary_vars[name] = [var]
    return update_op

#----------------------------------------------------------------------------
# Call filewriter.add_summary() with all summaries in the default graph,
# automatically finalizing and merging them on the first call. 
Example #5
Source File: target_assigner.py    From DOTA_models with Apache License 2.0 6 votes vote down vote up
def _create_regression_weights(self, match):
    """Set regression weight for each anchor.

    Only positive anchors are set to contribute to the regression loss, so this
    method returns a weight of 1 for every positive anchor and 0 for every
    negative anchor.

    Args:
      match: a matcher.Match object that provides a matching between anchors
        and groundtruth boxes.

    Returns:
      reg_weights: a float32 tensor with shape [num_anchors] representing
        regression weights
    """
    reg_weights = tf.cast(match.matched_column_indicator(), tf.float32)
    return reg_weights 
Example #6
Source File: keypoint_ops.py    From DOTA_models with Apache License 2.0 6 votes vote down vote up
def scale(keypoints, y_scale, x_scale, scope=None):
  """Scales keypoint coordinates in x and y dimensions.

  Args:
    keypoints: a tensor of shape [num_instances, num_keypoints, 2]
    y_scale: (float) scalar tensor
    x_scale: (float) scalar tensor
    scope: name scope.

  Returns:
    new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
  """
  with tf.name_scope(scope, 'Scale'):
    y_scale = tf.cast(y_scale, tf.float32)
    x_scale = tf.cast(x_scale, tf.float32)
    new_keypoints = keypoints * [[[y_scale, x_scale]]]
    return new_keypoints 
Example #7
Source File: networks.py    From disentangling_conditional_gans with MIT License 6 votes vote down vote up
def minibatch_stddev_layer(x, group_size=4):
    with tf.variable_scope('MinibatchStddev'):
        group_size = tf.minimum(group_size, tf.shape(x)[0])     # Minibatch must be divisible by (or smaller than) group_size.
        s = x.shape                                             # [NCHW]  Input shape.
        y = tf.reshape(x, [group_size, -1, s[1], s[2], s[3]])   # [GMCHW] Split minibatch into M groups of size G.
        y = tf.cast(y, tf.float32)                              # [GMCHW] Cast to FP32.
        y -= tf.reduce_mean(y, axis=0, keep_dims=True)           # [GMCHW] Subtract mean over group.
        y = tf.reduce_mean(tf.square(y), axis=0)                # [MCHW]  Calc variance over group.
        y = tf.sqrt(y + 1e-8)                                   # [MCHW]  Calc stddev over group.
        y = tf.reduce_mean(y, axis=[1,2,3], keep_dims=True)      # [M111]  Take average over fmaps and pixels.
        y = tf.cast(y, x.dtype)                                 # [M111]  Cast back to original data type.
        y = tf.tile(y, [group_size, 1, s[2], s[3]])             # [N1HW]  Replicate over group and pixels.
        return tf.concat([x, y], axis=1)                        # [NCHW]  Append as new fmap.

#----------------------------------------------------------------------------
# Generator network used in the paper. 
Example #8
Source File: loss.py    From neural-fingerprinting with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def fprop(self, x, y, **kwargs):
        kwargs.update(self.kwargs)
        if self.attack is not None:
            x = x, self.attack(x)
        else:
            x = x,

        # Catching RuntimeError: Variable -= value not supported by tf.eager.
        try:
            y -= self.smoothing * (y - 1. / tf.cast(y.shape[-1], y.dtype))
        except RuntimeError:
            y.assign_sub(self.smoothing * (y - 1. / tf.cast(y.shape[-1],
                                                            y.dtype)))

        logits = [self.model.get_logits(x, **kwargs) for x in x]
        loss = sum(
            tf.reduce_mean(softmax_cross_entropy_with_logits(labels=y,
                                                             logits=logit))
            for logit in logits)
        return loss 
Example #9
Source File: tensor.py    From spleeter with MIT License 6 votes vote down vote up
def from_float32_to_uint8(
        tensor,
        tensor_key='tensor',
        min_key='min',
        max_key='max'):
    """

    :param tensor:
    :param tensor_key:
    :param min_key:
    :param max_key:
    :returns:
    """
    tensor_min = tf.reduce_min(tensor)
    tensor_max = tf.reduce_max(tensor)
    return {
        tensor_key: tf.cast(
            (tensor - tensor_min) / (tensor_max - tensor_min + 1e-16)
            * 255.9999, dtype=tf.uint8),
        min_key: tensor_min,
        max_key: tensor_max
    } 
Example #10
Source File: spectrogram.py    From spleeter with MIT License 6 votes vote down vote up
def time_stretch(
        spectrogram,
        factor=1.0,
        method=tf.image.ResizeMethod.BILINEAR):
    """ Time stretch a spectrogram preserving shape in tensorflow. Note that
    this is an approximation in the frequency domain.

    :param spectrogram: Input spectrogram to be time stretched as tensor.
    :param factor: (Optional) Time stretch factor, must be >0, default to 1.
    :param mehtod: (Optional) Interpolation method, default to BILINEAR.
    :returns: Time stretched spectrogram as tensor with same shape.
    """
    T = tf.shape(spectrogram)[0]
    T_ts = tf.cast(tf.cast(T, tf.float32) * factor, tf.int32)[0]
    F = tf.shape(spectrogram)[1]
    ts_spec = tf.image.resize_images(
        spectrogram,
        [T_ts, F],
        method=method,
        align_corners=True)
    return tf.image.resize_image_with_crop_or_pad(ts_spec, T, F) 
Example #11
Source File: spectrogram.py    From spleeter with MIT License 6 votes vote down vote up
def pitch_shift(
        spectrogram,
        semitone_shift=0.0,
        method=tf.image.ResizeMethod.BILINEAR):
    """ Pitch shift a spectrogram preserving shape in tensorflow. Note that
    this is an approximation in the frequency domain.

    :param spectrogram: Input spectrogram to be pitch shifted as tensor.
    :param semitone_shift: (Optional) Pitch shift in semitone, default to 0.0.
    :param mehtod: (Optional) Interpolation method, default to BILINEAR.
    :returns: Pitch shifted spectrogram (same shape as spectrogram).
    """
    factor = 2 ** (semitone_shift / 12.)
    T = tf.shape(spectrogram)[0]
    F = tf.shape(spectrogram)[1]
    F_ps = tf.cast(tf.cast(F, tf.float32) * factor, tf.int32)[0]
    ps_spec = tf.image.resize_images(
        spectrogram,
        [T, F_ps],
        method=method,
        align_corners=True)
    paddings = [[0, 0], [0, tf.maximum(0, F - F_ps)], [0, 0]]
    return tf.pad(ps_spec[:, :F, :], paddings, 'CONSTANT') 
Example #12
Source File: train_eval.py    From DOTA_models with Apache License 2.0 6 votes vote down vote up
def batch_of_random_bools(batch_size, n):
  """Return a batch of random "boolean" numbers.

  Args:
    batch_size:  Batch size dimension of returned tensor.
    n:  number of entries per batch.

  Returns:
    A [batch_size, n] tensor of "boolean" numbers, where each number is
    preresented as -1 or 1.
  """

  as_int = tf.random_uniform(
      [batch_size, n], minval=0, maxval=2, dtype=tf.int32)
  expanded_range = (as_int * 2) - 1
  return tf.cast(expanded_range, tf.float32) 
Example #13
Source File: decoder.py    From neural-combinatorial-optimization-rl-tensorflow with MIT License 6 votes vote down vote up
def loop_decode(self):
        # decoder_initial_state: Tuple Tensor (c,h) of size [batch_size x cell.state_size]
        # decoder_first_input: Tensor [batch_size x cell.state_size]

        # Loop the decoding process and collect results
        s,i = self.decoder_initial_state,  tf.cast(self.decoder_first_input,tf.float32)
        for step in range(self.seq_length):
            s, i = self.decode(s,i,step)

        # Return to start
        self.positions.append(self.first_city)

        # Stack visited indices
        self.positions=tf.stack(self.positions,axis=1)  # [Batch,seq_length+1]

        # Sum log_softmax over output steps
        self.log_softmax=tf.add_n(self.log_softmax)  # [Batch,seq_length]

        # Stack attending & pointing distribution
        self.attending=tf.stack(self.attending,axis=1) # [Batch,seq_length,seq_length]
        self.pointing=tf.stack(self.pointing,axis=1) # [Batch,seq_length,seq_length]
        
        # Return stacked lists of visited_indices and log_softmax for backprop
        return self.positions,self.log_softmax 
Example #14
Source File: model.py    From DOTA_models with Apache License 2.0 6 votes vote down vote up
def compute_first_or_last(self, select, first=True):
    #perform first ot last operation on row select with probabilistic row selection
    answer = tf.zeros_like(select)
    running_sum = tf.zeros([self.batch_size, 1], self.data_type)
    for i in range(self.max_elements):
      if (first):
        current = tf.slice(select, [0, i], [self.batch_size, 1])
      else:
        current = tf.slice(select, [0, self.max_elements - 1 - i],
                           [self.batch_size, 1])
      curr_prob = current * (1 - running_sum)
      curr_prob = curr_prob * tf.cast(curr_prob >= 0.0, self.data_type)
      running_sum += curr_prob
      temp_ans = []
      curr_prob = tf.expand_dims(tf.reshape(curr_prob, [self.batch_size]), 0)
      for i_ans in range(self.max_elements):
        if (not (first) and i_ans == self.max_elements - 1 - i):
          temp_ans.append(curr_prob)
        elif (first and i_ans == i):
          temp_ans.append(curr_prob)
        else:
          temp_ans.append(tf.zeros_like(curr_prob))
      temp_ans = tf.transpose(tf.concat(axis=0, values=temp_ans))
      answer += temp_ans
    return answer 
Example #15
Source File: model.py    From DOTA_models with Apache License 2.0 6 votes vote down vote up
def char_predictions(self, chars_logit):
    """Returns confidence scores (softmax values) for predicted characters.

    Args:
      chars_logit: chars logits, a tensor with shape
        [batch_size x seq_length x num_char_classes]

    Returns:
      A tuple (ids, log_prob, scores), where:
        ids - predicted characters, a int32 tensor with shape
          [batch_size x seq_length];
        log_prob - a log probability of all characters, a float tensor with
          shape [batch_size, seq_length, num_char_classes];
        scores - corresponding confidence scores for characters, a float
        tensor
          with shape [batch_size x seq_length].
    """
    log_prob = utils.logits_to_log_prob(chars_logit)
    ids = tf.to_int32(tf.argmax(log_prob, dimension=2), name='predicted_chars')
    mask = tf.cast(
        slim.one_hot_encoding(ids, self._params.num_char_classes), tf.bool)
    all_scores = tf.nn.softmax(chars_logit)
    selected_scores = tf.boolean_mask(all_scores, mask, name='char_scores')
    scores = tf.reshape(selected_scores, shape=(-1, self._params.seq_length))
    return ids, log_prob, scores 
Example #16
Source File: cifar10.py    From DOTA_models with Apache License 2.0 6 votes vote down vote up
def loss(logits, labels):
  """Add L2Loss to all the trainable variables.

  Add summary for "Loss" and "Loss/avg".
  Args:
    logits: Logits from inference().
    labels: Labels from distorted_inputs or inputs(). 1-D tensor
            of shape [batch_size]

  Returns:
    Loss tensor of type float.
  """
  # Calculate the average cross entropy loss across the batch.
  labels = tf.cast(labels, tf.int64)
  cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
      labels=labels, logits=logits, name='cross_entropy_per_example')
  cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')
  tf.add_to_collection('losses', cross_entropy_mean)

  # The total loss is defined as the cross entropy loss plus all of the weight
  # decay terms (L2 loss).
  return tf.add_n(tf.get_collection('losses'), name='total_loss') 
Example #17
Source File: cifar10.py    From DOTA_models with Apache License 2.0 6 votes vote down vote up
def inputs(eval_data):
  """Construct input for CIFAR evaluation using the Reader ops.

  Args:
    eval_data: bool, indicating if one should use the train or eval data set.

  Returns:
    images: Images. 4D tensor of [batch_size, IMAGE_SIZE, IMAGE_SIZE, 3] size.
    labels: Labels. 1D tensor of [batch_size] size.

  Raises:
    ValueError: If no data_dir
  """
  if not FLAGS.data_dir:
    raise ValueError('Please supply a data_dir')
  data_dir = os.path.join(FLAGS.data_dir, 'cifar-10-batches-bin')
  images, labels = cifar10_input.inputs(eval_data=eval_data,
                                        data_dir=data_dir,
                                        batch_size=FLAGS.batch_size)
  if FLAGS.use_fp16:
    images = tf.cast(images, tf.float16)
    labels = tf.cast(labels, tf.float16)
  return images, labels 
Example #18
Source File: cifar10.py    From DOTA_models with Apache License 2.0 6 votes vote down vote up
def distorted_inputs():
  """Construct distorted input for CIFAR training using the Reader ops.

  Returns:
    images: Images. 4D tensor of [batch_size, IMAGE_SIZE, IMAGE_SIZE, 3] size.
    labels: Labels. 1D tensor of [batch_size] size.

  Raises:
    ValueError: If no data_dir
  """
  if not FLAGS.data_dir:
    raise ValueError('Please supply a data_dir')
  data_dir = os.path.join(FLAGS.data_dir, 'cifar-10-batches-bin')
  images, labels = cifar10_input.distorted_inputs(data_dir=data_dir,
                                                  batch_size=FLAGS.batch_size)
  if FLAGS.use_fp16:
    images = tf.cast(images, tf.float16)
    labels = tf.cast(labels, tf.float16)
  return images, labels 
Example #19
Source File: accountant.py    From DOTA_models with Apache License 2.0 5 votes vote down vote up
def accumulate_privacy_spending(self, eps_delta, unused_sigma,
                                  num_examples):
    """Accumulate the privacy spending.

    Currently only support approximate privacy. Here we assume we use Gaussian
    noise on randomly sampled batch so we get better composition: 1. the per
    batch privacy is computed using privacy amplication via sampling bound;
    2. the composition is done using the composition with Gaussian noise.
    TODO(liqzhang) Add a link to a document that describes the bounds used.

    Args:
      eps_delta: EpsDelta pair which can be tensors.
      unused_sigma: the noise sigma. Unused for this accountant.
      num_examples: the number of examples involved.
    Returns:
      a TensorFlow operation for updating the privacy spending.
    """

    eps, delta = eps_delta
    with tf.control_dependencies(
        [tf.Assert(tf.greater(delta, 0),
                   ["delta needs to be greater than 0"])]):
      amortize_ratio = (tf.cast(num_examples, tf.float32) * 1.0 /
                        self._total_examples)
      # Use privacy amplification via sampling bound.
      # See Lemma 2.2 in http://arxiv.org/pdf/1405.7085v2.pdf
      # TODO(liqzhang) Add a link to a document with formal statement
      # and proof.
      amortize_eps = tf.reshape(tf.log(1.0 + amortize_ratio * (
          tf.exp(eps) - 1.0)), [1])
      amortize_delta = tf.reshape(amortize_ratio * delta, [1])
      return tf.group(*[tf.assign_add(self._eps_squared_sum,
                                      tf.square(amortize_eps)),
                        tf.assign_add(self._delta_sum, amortize_delta)]) 
Example #20
Source File: vgslspecs.py    From DOTA_models with Apache License 2.0 5 votes vote down vote up
def AddReShape(self, prev_layer, index):
    """Reshapes the input tensor by moving each (x_scale,y_scale) rectangle to.

       the depth dimension. NOTE that the TF convention is that inputs are
       [batch, y, x, depth].

    Args:
      prev_layer: Input tensor.
      index:      Position in model_str to start parsing

    Returns:
      Output tensor, end index in model_str.
    """
    pattern = re.compile(R'(S)(?:{(\w)})?(\d+)\((\d+)x(\d+)\)(\d+),(\d+)')
    m = pattern.match(self.model_str, index)
    if m is None:
      return None, None
    name = self._GetLayerName(m.group(0), index, m.group(2))
    src_dim = int(m.group(3))
    part_a = int(m.group(4))
    part_b = int(m.group(5))
    dest_dim_a = int(m.group(6))
    dest_dim_b = int(m.group(7))
    if part_a == 0:
      part_a = -1
    if part_b == 0:
      part_b = -1
    prev_shape = tf.shape(prev_layer)
    layer = shapes.transposing_reshape(
        prev_layer, src_dim, part_a, part_b, dest_dim_a, dest_dim_b, name=name)
    # Compute scale factors.
    result_shape = tf.shape(layer)
    for i in xrange(len(self.reduction_factors)):
      if self.reduction_factors[i] is not None:
        factor1 = tf.cast(self.reduction_factors[i], tf.float32)
        factor2 = tf.cast(prev_shape[i], tf.float32)
        divisor = tf.cast(result_shape[i], tf.float32)
        self.reduction_factors[i] = tf.div(tf.multiply(factor1, factor2), divisor)
    return layer, m.end() 
Example #21
Source File: model.py    From DOTA_models with Apache License 2.0 5 votes vote down vote up
def compute_lookup_error(self, val):
    #computes lookup error.
    cond = tf.equal(self.batch_print_answer, val)
    inter = tf.where(
        cond, self.init_print_error,
        tf.tile(
            tf.reshape(tf.constant(1e10, self.data_type), [1, 1, 1]), [
                self.batch_size, self.utility.FLAGS.max_word_cols +
                self.utility.FLAGS.max_number_cols,
                self.utility.FLAGS.max_elements
            ]))
    return tf.reduce_min(tf.reduce_min(inter, 1), 1) * tf.cast(
        tf.greater(
            tf.reduce_sum(tf.reduce_sum(tf.cast(cond, self.data_type), 1), 1),
            0.0), self.data_type) 
Example #22
Source File: model.py    From DOTA_models with Apache License 2.0 5 votes vote down vote up
def compute_max_or_min(self, select, maxi=True):
    #computes the argmax and argmin of a column with probabilistic row selection
    answer = tf.zeros([
        self.batch_size, self.num_cols + self.num_word_cols, self.max_elements
    ], self.data_type)
    sum_prob = tf.zeros([self.batch_size, self.num_cols + self.num_word_cols],
                        self.data_type)
    for j in range(self.max_elements):
      if (maxi):
        curr_pos = j
      else:
        curr_pos = self.max_elements - 1 - j
      select_index = tf.slice(self.full_processed_sorted_index_column,
                              [0, 0, curr_pos], [self.batch_size, -1, 1])
      select_mask = tf.equal(
          tf.tile(
              tf.expand_dims(
                  tf.tile(
                      tf.expand_dims(tf.range(self.max_elements), 0),
                      [self.batch_size, 1]), 1),
              [1, self.num_cols + self.num_word_cols, 1]), select_index)
      curr_prob = tf.expand_dims(select, 1) * tf.cast(
          select_mask, self.data_type) * self.select_bad_number_mask
      curr_prob = curr_prob * tf.expand_dims((1 - sum_prob), 2)
      curr_prob = curr_prob * tf.expand_dims(
          tf.cast((1 - sum_prob) > 0.0, self.data_type), 2)
      answer = tf.where(select_mask, curr_prob, answer)
      sum_prob += tf.reduce_sum(curr_prob, 2)
    return answer 
Example #23
Source File: model.py    From DOTA_models with Apache License 2.0 5 votes vote down vote up
def make_hard_softmax(self, softmax):
    #converts soft selection to hard selection. used at test time
    cond = tf.equal(
        softmax, tf.reshape(tf.reduce_max(softmax, 1), [self.batch_size, 1]))
    softmax = tf.where(
        cond, tf.fill(tf.shape(softmax), 1.0), tf.fill(tf.shape(softmax), 0.0))
    softmax = tf.cast(softmax, self.data_type)
    return softmax 
Example #24
Source File: ops.py    From DOTA_models with Apache License 2.0 5 votes vote down vote up
def padded_one_hot_encoding(indices, depth, left_pad):
  """Returns a zero padded one-hot tensor.

  This function converts a sparse representation of indices (e.g., [4]) to a
  zero padded one-hot representation (e.g., [0, 0, 0, 0, 1] with depth = 4 and
  left_pad = 1). If `indices` is empty, the result will simply be a tensor of
  shape (0, depth + left_pad). If depth = 0, then this function just returns
  `None`.

  Args:
    indices: an integer tensor of shape [num_indices].
    depth: depth for the one-hot tensor (integer).
    left_pad: number of zeros to left pad the one-hot tensor with (integer).

  Returns:
    padded_onehot: a tensor with shape (num_indices, depth + left_pad). Returns
      `None` if the depth is zero.

  Raises:
    ValueError: if `indices` does not have rank 1 or if `left_pad` or `depth are
      either negative or non-integers.

  TODO: add runtime checks for depth and indices.
  """
  if depth < 0 or not isinstance(depth, (int, long) if six.PY2 else int):
    raise ValueError('`depth` must be a non-negative integer.')
  if left_pad < 0 or not isinstance(left_pad, (int, long) if six.PY2 else int):
    raise ValueError('`left_pad` must be a non-negative integer.')
  if depth == 0:
    return None
  if len(indices.get_shape().as_list()) != 1:
    raise ValueError('`indices` must have rank 1')

  def one_hot_and_pad():
    one_hot = tf.cast(tf.one_hot(tf.cast(indices, tf.int64), depth,
                                 on_value=1, off_value=0), tf.float32)
    return tf.pad(one_hot, [[0, 0], [left_pad, 0]], mode='CONSTANT')
  result = tf.cond(tf.greater(tf.size(indices), 0), one_hot_and_pad,
                   lambda: tf.zeros((depth + left_pad, 0)))
  return tf.reshape(result, [-1, depth + left_pad]) 
Example #25
Source File: metrics.py    From DOTA_models with Apache License 2.0 5 votes vote down vote up
def sequence_accuracy(predictions, targets, rej_char, streaming=False):
  """Computes sequence level accuracy.

  Both input tensors should have the same shape: [batch_size x seq_length].

  Args:
    predictions: predicted character classes.
    targets: ground truth character classes.
    rej_char: the character id used to mark empty element (end of sequence).
    streaming: if True, uses the streaming mean from the slim.metric module.

  Returns:
    a update_ops for execution and value tensor whose value on evaluation
    returns the total sequence accuracy.
  """

  with tf.variable_scope('SequenceAccuracy'):
    predictions.get_shape().assert_is_compatible_with(targets.get_shape())

    targets = tf.to_int32(targets)
    const_rej_char = tf.constant(
        rej_char, shape=targets.get_shape(), dtype=tf.int32)
    include_mask = tf.not_equal(targets, const_rej_char)
    include_predictions = tf.to_int32(
        tf.where(include_mask, predictions,
                 tf.zeros_like(predictions) + rej_char))
    correct_chars = tf.to_float(tf.equal(include_predictions, targets))
    correct_chars_counts = tf.cast(
        tf.reduce_sum(correct_chars, reduction_indices=[1]), dtype=tf.int32)
    target_length = targets.get_shape().dims[1].value
    target_chars_counts = tf.constant(
        target_length, shape=correct_chars_counts.get_shape())
    accuracy_per_example = tf.to_float(
        tf.equal(correct_chars_counts, target_chars_counts))
    if streaming:
      return tf.contrib.metrics.streaming_mean(accuracy_per_example)
    else:
      return tf.reduce_mean(accuracy_per_example) 
Example #26
Source File: faster_rcnn_meta_arch.py    From DOTA_models with Apache License 2.0 5 votes vote down vote up
def _sample_box_classifier_minibatch(self,
                                       proposal_boxlist,
                                       groundtruth_boxlist,
                                       groundtruth_classes_with_background):
    """Samples a mini-batch of proposals to be sent to the box classifier.

    Helper function for self._postprocess_rpn.

    Args:
      proposal_boxlist: A BoxList containing K proposal boxes in absolute
        coordinates.
      groundtruth_boxlist: A Boxlist containing N groundtruth object boxes in
        absolute coordinates.
      groundtruth_classes_with_background: A tensor with shape
        `[N, self.num_classes + 1]` representing groundtruth classes. The
        classes are assumed to be k-hot encoded, and include background as the
        zero-th class.

    Returns:
      a BoxList contained sampled proposals.
    """
    (cls_targets, cls_weights, _, _, _) = self._detector_target_assigner.assign(
        proposal_boxlist, groundtruth_boxlist,
        groundtruth_classes_with_background)
    # Selects all boxes as candidates if none of them is selected according
    # to cls_weights. This could happen as boxes within certain IOU ranges
    # are ignored. If triggered, the selected boxes will still be ignored
    # during loss computation.
    cls_weights += tf.to_float(tf.equal(tf.reduce_sum(cls_weights), 0))
    positive_indicator = tf.greater(tf.argmax(cls_targets, axis=1), 0)
    sampled_indices = self._second_stage_sampler.subsample(
        tf.cast(cls_weights, tf.bool),
        self._second_stage_batch_size,
        positive_indicator)
    return box_list_ops.boolean_mask(proposal_boxlist, sampled_indices) 
Example #27
Source File: tf_logits.py    From Black-Box-Audio with MIT License 5 votes vote down vote up
def compute_mfcc(audio, **kwargs):
    """
    Compute the MFCC for a given audio waveform. This is
    identical to how DeepSpeech does it, but does it all in
    TensorFlow so that we can differentiate through it.
    """

    batch_size, size = audio.get_shape().as_list()
    audio = tf.cast(audio, tf.float32)

    # 1. Pre-emphasizer, a high-pass filter
    audio = tf.concat((audio[:, :1], audio[:, 1:] - 0.97*audio[:, :-1], np.zeros((batch_size,1000),dtype=np.float32)), 1)

    # 2. windowing into frames of 320 samples, overlapping
    windowed = tf.stack([audio[:, i:i+400] for i in range(0,size-320,160)],1)

    # 3. Take the FFT to convert to frequency space
    ffted = tf.spectral.rfft(windowed, [512])
    ffted = 1.0 / 512 * tf.square(tf.abs(ffted))

    # 4. Compute the Mel windowing of the FFT
    energy = tf.reduce_sum(ffted,axis=2)+1e-30
    filters = np.load("filterbanks.npy").T
    feat = tf.matmul(ffted, np.array([filters]*batch_size,dtype=np.float32))+1e-30

    # 5. Take the DCT again, because why not
    feat = tf.log(feat)
    feat = tf.spectral.dct(feat, type=2, norm='ortho')[:,:,:26]

    # 6. Amplify high frequencies for some reason
    _,nframes,ncoeff = feat.get_shape().as_list()
    n = np.arange(ncoeff)
    lift = 1 + (22/2.)*np.sin(np.pi*n/22)
    feat = lift*feat
    width = feat.get_shape().as_list()[1]

    # 7. And now stick the energy next to the features
    feat = tf.concat((tf.reshape(tf.log(energy),(-1,width,1)), feat[:, :, 1:]), axis=2)
    
    return feat 
Example #28
Source File: cifar10.py    From DOTA_models with Apache License 2.0 5 votes vote down vote up
def parser(self, value):
    """Parse a Cifar10 record from value.

    Output images are in [height, width, depth] layout.
    """
    # Dimensions of the images in the CIFAR-10 dataset.
    # See http://www.cs.toronto.edu/~kriz/cifar.html for a description of the
    # input format.
    label_bytes = 1
    image_bytes = HEIGHT * WIDTH * DEPTH
    # Every record consists of a label followed by the image, with a
    # fixed number of bytes for each.
    record_bytes = label_bytes + image_bytes

    # Convert from a string to a vector of uint8 that is record_bytes long.
    record_as_bytes = tf.decode_raw(value, tf.uint8)

    # The first bytes represent the label, which we convert from
    # uint8->int32.
    label = tf.cast(
        tf.strided_slice(record_as_bytes, [0], [label_bytes]), tf.int32)

    label.set_shape([1])

    # The remaining bytes after the label represent the image, which
    # we reshape from [depth * height * width] to [depth, height, width].
    depth_major = tf.reshape(
        tf.strided_slice(record_as_bytes, [label_bytes], [record_bytes]),
        [3, 32, 32])
    # Convert from [depth, height, width] to [height, width, depth].
    # This puts data in a compatible layout with TF image preprocessing APIs.
    image = tf.transpose(depth_major, [1, 2, 0])

    # Do custom preprocessing here.
    image = self.preprocess(image)

    return image, label 
Example #29
Source File: word2vec.py    From DOTA_models with Apache License 2.0 5 votes vote down vote up
def optimize(self, loss):
    """Build the graph to optimize the loss function."""

    # Optimizer nodes.
    # Linear learning rate decay.
    opts = self._options
    words_to_train = float(opts.words_per_epoch * opts.epochs_to_train)
    lr = opts.learning_rate * tf.maximum(
        0.0001, 1.0 - tf.cast(self._words, tf.float32) / words_to_train)
    self._lr = lr
    optimizer = tf.train.GradientDescentOptimizer(lr)
    train = optimizer.minimize(loss,
                               global_step=self.global_step,
                               gate_gradients=optimizer.GATE_NONE)
    self._train = train 
Example #30
Source File: tf_utils.py    From DOTA_models with Apache License 2.0 5 votes vote down vote up
def inverse_sigmoid_decay(k, global_step_op):
  with tf.name_scope('inverse_sigmoid_decay'):
    k = tf.constant(k, dtype=tf.float32)
    tmp = k*tf.exp(-tf.cast(global_step_op, tf.float32)/k)
    tmp = tmp / (1. + tmp)
  return tmp