Python tensorflow.unique() Examples

The following are 30 code examples of tensorflow.unique(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module tensorflow , or try the search function

Example #1

Source File: tf_utils.py From transform with Apache License 2.0

6 votes

def lookup_key(key, key_vocab):
  """Look up the index of a key.

  Args:
    key: A `Tensor`.
    key_vocab: A `Tensor` of unique keys that can be converted to a hash table.

  Returns:
    The indices of the keys in key, determined by position in key_vocab.
  """
  initializer = tf.lookup.KeyValueTensorInitializer(
      keys=key_vocab,
      values=tf.cast(tf.range(tf.size(key_vocab)), tf.int64),
      key_dtype=key_vocab.dtype,
      value_dtype=tf.int64)
  table = tf.lookup.StaticHashTable(initializer, default_value=-1)
  return table.lookup(key)

Example #2

Source File: train_utils.py From DeepPavlov with Apache License 2.0

6 votes

def _deduplicate_indexed_slices(values, indices):
    """Sums `values` associated with any non-unique `indices`.
    Args:
      values: A `Tensor` with rank >= 1.
      indices: A one-dimensional integer `Tensor`, indexing into the first
      dimension of `values` (as in an IndexedSlices object).
    Returns:
      A tuple of (`summed_values`, `unique_indices`) where `unique_indices` is a
      de-duplicated version of `indices` and `summed_values` contains the sum of
      `values` slices associated with each unique index.
    """
    unique_indices, new_index_positions = tf.unique(indices)
    summed_values = tf.unsorted_segment_sum(values,
                                            new_index_positions,
                                            tf.shape(unique_indices)[0])
    return (summed_values, unique_indices)

Example #3

Source File: training.py From embedding with MIT License

6 votes

def _deduplicate_indexed_slices(values, indices):
    """Sums `values` associated with any non-unique `indices`.
    Args:
      values: A `Tensor` with rank >= 1.
      indices: A one-dimensional integer `Tensor`, indexing into the first
      dimension of `values` (as in an IndexedSlices object).
    Returns:
      A tuple of (`summed_values`, `unique_indices`) where `unique_indices` is a
      de-duplicated version of `indices` and `summed_values` contains the sum of
      `values` slices associated with each unique index.
    """
    unique_indices, new_index_positions = tf.unique(indices)
    summed_values = tf.unsorted_segment_sum(
      values, new_index_positions,
      tf.shape(unique_indices)[0])
    return (summed_values, unique_indices)

Example #4

Source File: nar_model.py From chameleon_recsys with MIT License

6 votes

def get_neg_items_click(self, valid_samples_session, num_neg_samples):
        #Shuffles neg. samples for each click
        valid_samples_shuffled = tf.random.shuffle(valid_samples_session)

        
        samples_unique_vals, samples_unique_idx = tf.unique(valid_samples_shuffled)

        #Returning first N unique items (to avoid repetition)
        first_unique_items = tf.unsorted_segment_min(data=valid_samples_shuffled,                   
                                           segment_ids=samples_unique_idx,                        
                                           num_segments=tf.shape(samples_unique_vals)[0])[:num_neg_samples]

        #Padding if necessary to keep the number of neg samples constant (ex: first batch)
        first_unique_items_padded_if_needed = tf.concat([first_unique_items, tf.zeros(num_neg_samples-tf.shape(first_unique_items)[0], tf.int64)], axis=0)

        return first_unique_items_padded_if_needed

Example #5

Source File: training.py From bilm-tf with Apache License 2.0

6 votes

def _deduplicate_indexed_slices(values, indices):
    """Sums `values` associated with any non-unique `indices`.
    Args:
      values: A `Tensor` with rank >= 1.
      indices: A one-dimensional integer `Tensor`, indexing into the first
      dimension of `values` (as in an IndexedSlices object).
    Returns:
      A tuple of (`summed_values`, `unique_indices`) where `unique_indices` is a
      de-duplicated version of `indices` and `summed_values` contains the sum of
      `values` slices associated with each unique index.
    """
    unique_indices, new_index_positions = tf.unique(indices)
    summed_values = tf.unsorted_segment_sum(
      values, new_index_positions,
      tf.shape(unique_indices)[0])
    return (summed_values, unique_indices)

Example #6

Source File: all_transformer.py From CalibNet with MIT License

6 votes

def get_pixel_value(img, x, y):

    """Cantor pairing for removing non-unique updates and indices. At the time of implementation, unfixed issue with scatter_nd causes problems with int32 update values. Till resolution, implemented on cpu """

    with tf.device('/cpu:0'):
        indices = tf.stack([y, x], 2)
        indices = tf.reshape(indices, (375*1242, 2))
        values = tf.reshape(img, [-1])

        Y = indices[:,0]
        X = indices[:,1]
        Z = (X + Y)*(X + Y + 1)/2 + Y

        filtered, idx = tf.unique(tf.squeeze(Z))
        updated_values  = tf.unsorted_segment_max(values, idx, tf.shape(filtered)[0])

        # updated_indices = tf.map_fn(fn=lambda i: reverse(i), elems=filtered, dtype=tf.float32)
        updated_indices = reverse_all(filtered)
        updated_indices = tf.cast(updated_indices, 'int32')
        resolved_map = tf.scatter_nd(updated_indices, updated_values, img.shape)

        return resolved_map

Example #7

Source File: training.py From nlp_research with MIT License

6 votes

def _deduplicate_indexed_slices(values, indices):
    """Sums `values` associated with any non-unique `indices`.
    Args:
      values: A `Tensor` with rank >= 1.
      indices: A one-dimensional integer `Tensor`, indexing into the first
      dimension of `values` (as in an IndexedSlices object).
    Returns:
      A tuple of (`summed_values`, `unique_indices`) where `unique_indices` is a
      de-duplicated version of `indices` and `summed_values` contains the sum of
      `values` slices associated with each unique index.
    """
    unique_indices, new_index_positions = tf.unique(indices)
    summed_values = tf.unsorted_segment_sum(
      values, new_index_positions,
      tf.shape(unique_indices)[0])
    return (summed_values, unique_indices)

Example #8

Source File: protocol.py From AmpliGraph with Apache License 2.0

6 votes

def create_mappings(X):
    """Create string-IDs mappings for entities and relations.

    Entities and relations are assigned incremental, unique integer IDs.
    Mappings are preserved in two distinct dictionaries,
    and counters are separated for entities and relations mappings.

    Parameters
    ----------
    X : ndarray, shape [n, 3]
        The triples to extract mappings.

    Returns
    -------
    rel_to_idx : dict
        The relation-to-internal-id associations.
    ent_to_idx: dict
        The entity-to-internal-id associations.

    """
    logger.debug('Creating mappings for entities and relations.')
    unique_ent = np.unique(np.concatenate((X[:, 0], X[:, 2])))
    unique_rel = np.unique(X[:, 1])
    return _create_unique_mappings(unique_ent, unique_rel)

Example #9

Source File: training.py From ELMo_Chin with Apache License 2.0

6 votes

def _deduplicate_indexed_slices(values, indices):
    """Sums `values` associated with any non-unique `indices`.
    Args:
      values: A `Tensor` with rank >= 1.
      indices: A one-dimensional integer `Tensor`, indexing into the first
      dimension of `values` (as in an IndexedSlices object).
    Returns:
      A tuple of (`summed_values`, `unique_indices`) where `unique_indices` is a
      de-duplicated version of `indices` and `summed_values` contains the sum of
      `values` slices associated with each unique index.
    """
    unique_indices, new_index_positions = tf.unique(indices)
    summed_values = tf.unsorted_segment_sum(
      values, new_index_positions,
      tf.shape(unique_indices)[0])
    return (summed_values, unique_indices)

Example #10

Source File: common_utils.py From SegSort with MIT License

6 votes

def prepare_prototype_labels(semantic_labels, instance_labels, offset=256):
  """Prepares prototype labels from semantic and instance labels.

  This function generates unique prototype labels from semantic and instance
  labels. Note that instance labels sometimes can be cluster labels.

  Args:
    semantic_labels: A 1-D integer tensor for semantic labels.
    instance_labels: A 1-D integer tensor for instance labels.
    offset: An integer for instance offset.

  Returns:
    unique_instance_labels: A 1-D integer tensor for unique instance labels with
      the same length as the input semantic labels.
    prototype_labels: A 1-D integer tensor for the semantic labels of
      prototypes with length as the number of unique instances.
  """
  instance_labels = tf.cast(instance_labels, tf.int64)
  semantic_labels = tf.cast(semantic_labels, tf.int64)
  prototype_labels, unique_instance_labels = tf.unique(
      tf.reshape(semantic_labels + instance_labels * offset, [-1]))

  unique_instance_labels = tf.cast(unique_instance_labels, tf.int32)
  prototype_labels = tf.cast(prototype_labels % offset, tf.int32)
  return unique_instance_labels, prototype_labels

Example #11

Source File: run_pretraining_gpu.py From BERT-multi-gpu with Apache License 2.0

6 votes

def _deduplicate_indexed_slices(values, indices):
    """Sums `values` associated with any non-unique `indices`.
    Args:
      values: A `Tensor` with rank >= 1.
      indices: A one-dimensional integer `Tensor`, indexing into the first
      dimension of `values` (as in an IndexedSlices object).
    Returns:
      A tuple of (`summed_values`, `unique_indices`) where `unique_indices` is a
      de-duplicated version of `indices` and `summed_values` contains the sum of
      `values` slices associated with each unique index.
    """
    unique_indices, new_index_positions = tf.unique(indices)
    summed_values = tf.unsorted_segment_sum(
        values, new_index_positions,
        tf.shape(unique_indices)[0])
    return (summed_values, unique_indices)

Example #12

Source File: training.py From delft with Apache License 2.0

6 votes

def _deduplicate_indexed_slices(values, indices):
    """Sums `values` associated with any non-unique `indices`.
    Args:
      values: A `Tensor` with rank >= 1.
      indices: A one-dimensional integer `Tensor`, indexing into the first
      dimension of `values` (as in an IndexedSlices object).
    Returns:
      A tuple of (`summed_values`, `unique_indices`) where `unique_indices` is a
      de-duplicated version of `indices` and `summed_values` contains the sum of
      `values` slices associated with each unique index.
    """
    unique_indices, new_index_positions = tf.unique(indices)
    summed_values = tf.unsorted_segment_sum(
      values, new_index_positions,
      tf.shape(unique_indices)[0])
    return (summed_values, unique_indices)

Example #13

Source File: span_prediction.py From document-qa with Apache License 2.0

6 votes

def predict(self, answer, start_logits, end_logits, mask) -> Prediction:
        masked_start_logits = exp_mask(start_logits, mask)
        masked_end_logits = exp_mask(end_logits, mask)

        if len(answer) == 3:
            group_ids = answer[2]
            # Turn the ids into segment ids using tf.unique
            _, group_segments = tf.unique(group_ids, out_idx=tf.int32)

            losses = []
            for answer_mask, logits in zip(answer, [masked_start_logits, masked_end_logits]):
                group_norms = segment_logsumexp(logits, group_segments)
                if self.aggregate == "sum":
                    log_score = segment_logsumexp(logits + VERY_NEGATIVE_NUMBER * (1 - tf.cast(answer_mask, tf.float32)),
                                                  group_segments)
                else:
                    raise ValueError()
                losses.append(tf.reduce_mean(-(log_score - group_norms)))
            loss = tf.add_n(losses)
        else:
            raise NotImplemented()
        tf.add_to_collection(tf.GraphKeys.LOSSES, loss)
        return BoundaryPrediction(tf.nn.softmax(masked_start_logits),
                                  tf.nn.softmax(masked_end_logits),
                                  masked_start_logits, masked_end_logits, mask)

Example #14

Source File: det_utils.py From MobileNet with Apache License 2.0

6 votes

def find_dup(a):
  """ Find the duplicated elements in 1-D a tensor.
  Args:
    a: 1-D tensor.
    
  Return:
    more_than_one_vals: duplicated value in a.
    indexes_in_a: duplicated value's index in a.
    dups_in_a: duplicated value with duplicate in a.
  """
  unique_a_vals, unique_idx = tf.unique(a)
  count_a_unique = tf.unsorted_segment_sum(tf.ones_like(a),
                                           unique_idx,
                                           tf.shape(a)[0])

  more_than_one = tf.greater(count_a_unique, 1)
  more_than_one_idx = tf.squeeze(tf.where(more_than_one))
  more_than_one_vals = tf.squeeze(tf.gather(unique_a_vals, more_than_one_idx))

  not_duplicated, _ = tf.setdiff1d(a, more_than_one_vals)
  dups_in_a, indexes_in_a = tf.setdiff1d(a, not_duplicated)

  return more_than_one_vals, indexes_in_a, dups_in_a

Example #15

Source File: bulk_component.py From multilabel-image-classification-tensorflow with MIT License

5 votes

def extract_fixed_feature_ids(comp, state, stride):
  """Extracts fixed feature IDs.

  Args:
    comp: Component whose fixed feature IDs we wish to extract.
    state: Live MasterState object for the component.
    stride: Tensor containing current batch * beam size.

  Returns:
    state handle: Updated state handle to be used after this call.
    ids: List of [stride * num_steps, 1] feature IDs per channel.  Missing IDs
         (e.g., due to batch padding) are set to -1.
  """
  num_channels = len(comp.spec.fixed_feature)
  if not num_channels:
    return state.handle, []

  for feature_spec in comp.spec.fixed_feature:
    check.Eq(feature_spec.size, 1, 'All features must have size=1')
    check.Lt(feature_spec.embedding_dim, 0, 'All features must be non-embedded')

  state.handle, indices, ids, _, num_steps = dragnn_ops.bulk_fixed_features(
      state.handle, component=comp.name, num_channels=num_channels)
  size = stride * num_steps

  fixed_ids = []
  for channel, feature_spec in enumerate(comp.spec.fixed_feature):
    tf.logging.info('[%s] Adding fixed feature IDs "%s"', comp.name,
                    feature_spec.name)

    # The +1 and -1 increments ensure that missing IDs default to -1.
    #
    # TODO(googleuser): This formula breaks if multiple IDs are extracted at some
    # step.  Try using tf.unique() to enforce the unique-IDS precondition.
    sums = tf.unsorted_segment_sum(ids[channel] + 1, indices[channel], size) - 1
    sums = tf.expand_dims(sums, axis=1)
    fixed_ids.append(network_units.NamedTensor(sums, feature_spec.name, dim=1))
  return state.handle, fixed_ids

Example #16

Source File: train_utils.py From SegSort with MIT License

5 votes

def add_unsupervised_segsort_loss(embedding,
                                  concentration,
                                  cluster_labels,
                                  num_banks=0,
                                  loss_scope=None):
  with tf.name_scope(loss_scope, 'unsupervised_segsort_loss',
                     (embedding, concentration, cluster_labels,
                      num_banks)):

    # Normalize embedding.
    embedding = common_utils.normalize_embedding(embedding)
    shape = embedding.get_shape().as_list()
    batch_size = shape[0]
    embedding_dim = shape[3]

    # Add offset to cluster labels.
    max_clusters = 256
    offset = tf.range(0, max_clusters * batch_size, max_clusters)
    cluster_labels += tf.reshape(offset, [-1, 1, 1, 1])
    _, cluster_labels = tf.unique(tf.reshape(cluster_labels, [-1]))

    # Calculate prototypes.
    embedding = tf.reshape(embedding, [-1, embedding_dim])
    prototypes = common_utils.calculate_prototypes_from_labels(
      embedding, cluster_labels)

    similarities = _calculate_similarities(embedding, prototypes,
                                           concentration, batch_size)

    # Calculate the unsupervised loss.
    self_indices = tf.concat(
      [tf.expand_dims(tf.range(tf.shape(similarities)[0]), 1),
       tf.expand_dims(cluster_labels, 1)], axis=1)
    numerator = tf.reshape(tf.gather_nd(similarities, self_indices), [-1])
    denominator = tf.reduce_sum(similarities, axis=1)

    probabilities = tf.divide(numerator, denominator)
    return tf.reduce_mean(-tf.log(probabilities))

Example #17

Source File: linear.py From estimator with Apache License 2.0

5 votes

def _prune_and_unique_sparse_ids(self, id_weight_pair):
    """Remove duplicate and negative ids in a sparse tendor."""

    id_tensor = id_weight_pair.id_tensor
    if id_weight_pair.weight_tensor:
      weight_tensor = id_weight_pair.weight_tensor.values
    else:
      weight_tensor = tf.ones([tf.compat.v1.shape(id_tensor.indices)[0]],
                              tf.dtypes.float32)

    example_ids = tf.reshape(id_tensor.indices[:, 0], [-1])
    flat_ids = tf.cast(
        tf.reshape(id_tensor.values, [-1]), dtype=tf.dtypes.int64)
    # Prune invalid IDs (< 0) from the flat_ids, example_ids, and
    # weight_tensor.  These can come from looking up an OOV entry in the
    # vocabulary (default value being -1).
    is_id_valid = tf.math.greater_equal(flat_ids, 0)
    flat_ids = tf.compat.v1.boolean_mask(flat_ids, is_id_valid)
    example_ids = tf.compat.v1.boolean_mask(example_ids, is_id_valid)
    weight_tensor = tf.compat.v1.boolean_mask(weight_tensor, is_id_valid)

    projection_length = tf.math.reduce_max(flat_ids) + 1
    # project ids based on example ids so that we can dedup ids that
    # occur multiple times for a single example.
    projected_ids = projection_length * example_ids + flat_ids

    # Remove any redundant ids.
    ids, idx = tf.unique(projected_ids)
    # Keep only one example id per duplicated ids.
    example_ids_filtered = tf.math.unsorted_segment_min(
        example_ids, idx,
        tf.compat.v1.shape(ids)[0])

    # reproject ids back feature id space.
    reproject_ids = (ids - projection_length * example_ids_filtered)

    weights = tf.reshape(
        tf.math.unsorted_segment_sum(weight_tensor, idx,
                                     tf.compat.v1.shape(ids)[0]), [-1])
    return sdca_ops._SparseFeatureColumn(  # pylint: disable=protected-access
        example_ids_filtered, reproject_ids, weights)

Example #18

Source File: utils.py From multilabel-image-classification-tensorflow with MIT License

5 votes

def accumulate_sparse_gradients(grad):
  """Accumulates repeated indices of a sparse gradient update.

  Args:
    grad: a tf.IndexedSlices gradient

  Returns:
    grad_indices: unique indices
    grad_values: gradient values corresponding to the indices
  """

  grad_indices, grad_segments = tf.unique(grad.indices)
  grad_values = tf.unsorted_segment_sum(grad.values, grad_segments,
                                        tf.shape(grad_indices)[0])
  return grad_indices, grad_values

Example #19

Source File: embedding_utils.py From models with Apache License 2.0

5 votes

def create_initial_softmax_from_labels(last_frame_labels, reference_labels,
                                       decoder_output_stride, reduce_labels):
  """Creates initial softmax predictions from last frame labels.

  Args:
    last_frame_labels: last frame labels of shape [1, height, width, 1].
    reference_labels: reference frame labels of shape [1, height, width, 1].
    decoder_output_stride: Integer, the stride of the decoder. Can be None, in
      this case it's assumed that the last_frame_labels and reference_labels
      are already scaled to the decoder output resolution.
    reduce_labels: Boolean, whether to reduce the depth of the softmax one_hot
      encoding to the actual number of labels present in the reference frame
      (otherwise the depth will be the highest label index + 1).

  Returns:
    init_softmax: the initial softmax predictions.
  """
  if decoder_output_stride is None:
    labels_output_size = last_frame_labels
    reference_labels_output_size = reference_labels
  else:
    h = tf.shape(last_frame_labels)[1]
    w = tf.shape(last_frame_labels)[2]
    h_sub = model.scale_dimension(h, 1.0 / decoder_output_stride)
    w_sub = model.scale_dimension(w, 1.0 / decoder_output_stride)
    labels_output_size = tf.image.resize_nearest_neighbor(
        last_frame_labels, [h_sub, w_sub], align_corners=True)
    reference_labels_output_size = tf.image.resize_nearest_neighbor(
        reference_labels, [h_sub, w_sub], align_corners=True)
  if reduce_labels:
    unique_labels, _ = tf.unique(tf.reshape(reference_labels_output_size, [-1]))
    depth = tf.size(unique_labels)
  else:
    depth = tf.reduce_max(reference_labels_output_size) + 1
  one_hot_assertion = tf.assert_less(tf.reduce_max(labels_output_size), depth)
  with tf.control_dependencies([one_hot_assertion]):
    init_softmax = tf.one_hot(tf.squeeze(labels_output_size,
                                         axis=-1),
                              depth=depth,
                              dtype=tf.float32)
  return init_softmax

Example #20

Source File: utils.py From object_detection_with_tensorflow with MIT License

5 votes

def accumulate_sparse_gradients(grad):
  """Accumulates repeated indices of a sparse gradient update.

  Args:
    grad: a tf.IndexedSlices gradient

  Returns:
    grad_indices: unique indices
    grad_values: gradient values corresponding to the indices
  """

  grad_indices, grad_segments = tf.unique(grad.indices)
  grad_values = tf.unsorted_segment_sum(grad.values, grad_segments,
                                        tf.shape(grad_indices)[0])
  return grad_indices, grad_values

Example #21

Source File: embedder.py From document-qa with Apache License 2.0

5 votes

def query_once(self) -> bool:
        """
        Should the embedder be queried once for each unique word in the input, or once for each word.
        Intended to support placeholders, although I ended up not experimenting much w/that route
        """
        return False

Example #22

Source File: utils.py From models with Apache License 2.0

5 votes

def accumulate_sparse_gradients(grad):
  """Accumulates repeated indices of a sparse gradient update.

  Args:
    grad: a tf.IndexedSlices gradient

  Returns:
    grad_indices: unique indices
    grad_values: gradient values corresponding to the indices
  """

  grad_indices, grad_segments = tf.unique(grad.indices)
  grad_values = tf.unsorted_segment_sum(grad.values, grad_segments,
                                        tf.shape(grad_indices)[0])
  return grad_indices, grad_values

Example #23

Source File: utils.py From object_detection_kitti with Apache License 2.0

5 votes

def accumulate_sparse_gradients(grad):
  """Accumulates repeated indices of a sparse gradient update.

  Args:
    grad: a tf.IndexedSlices gradient

  Returns:
    grad_indices: unique indices
    grad_values: gradient values corresponding to the indices
  """

  grad_indices, grad_segments = tf.unique(grad.indices)
  grad_values = tf.unsorted_segment_sum(grad.values, grad_segments,
                                        tf.shape(grad_indices)[0])
  return grad_indices, grad_values

Example #24

Source File: bulk_component.py From object_detection_kitti with Apache License 2.0

5 votes

def extract_fixed_feature_ids(comp, state, stride):
  """Extracts fixed feature IDs.

  Args:
    comp: Component whose fixed feature IDs we wish to extract.
    state: Live MasterState object for the component.
    stride: Tensor containing current batch * beam size.

  Returns:
    state handle: Updated state handle to be used after this call.
    ids: List of [stride * num_steps, 1] feature IDs per channel.  Missing IDs
         (e.g., due to batch padding) are set to -1.
  """
  num_channels = len(comp.spec.fixed_feature)
  if not num_channels:
    return state.handle, []

  for feature_spec in comp.spec.fixed_feature:
    check.Eq(feature_spec.size, 1, 'All features must have size=1')
    check.Lt(feature_spec.embedding_dim, 0, 'All features must be non-embedded')

  state.handle, indices, ids, _, num_steps = dragnn_ops.bulk_fixed_features(
      state.handle, component=comp.name, num_channels=num_channels)
  size = stride * num_steps

  fixed_ids = []
  for channel, feature_spec in enumerate(comp.spec.fixed_feature):
    tf.logging.info('[%s] Adding fixed feature IDs "%s"', comp.name,
                    feature_spec.name)

    # The +1 and -1 increments ensure that missing IDs default to -1.
    #
    # TODO(googleuser): This formula breaks if multiple IDs are extracted at some
    # step.  Try using tf.unique() to enforce the unique-IDS precondition.
    sums = tf.unsorted_segment_sum(ids[channel] + 1, indices[channel], size) - 1
    sums = tf.expand_dims(sums, axis=1)
    fixed_ids.append(network_units.NamedTensor(sums, feature_spec.name, dim=1))
  return state.handle, fixed_ids

Example #25

Source File: bulk_component.py From hands-detection with MIT License

5 votes

def extract_fixed_feature_ids(comp, state, stride):
  """Extracts fixed feature IDs.

  Args:
    comp: Component whose fixed feature IDs we wish to extract.
    state: Live MasterState object for the component.
    stride: Tensor containing current batch * beam size.

  Returns:
    state handle: Updated state handle to be used after this call.
    ids: List of [stride * num_steps, 1] feature IDs per channel.  Missing IDs
         (e.g., due to batch padding) are set to -1.
  """
  num_channels = len(comp.spec.fixed_feature)
  if not num_channels:
    return state.handle, []

  for feature_spec in comp.spec.fixed_feature:
    check.Eq(feature_spec.size, 1, 'All features must have size=1')
    check.Lt(feature_spec.embedding_dim, 0, 'All features must be non-embedded')

  state.handle, indices, ids, _, num_steps = dragnn_ops.bulk_fixed_features(
      state.handle, component=comp.name, num_channels=num_channels)
  size = stride * num_steps

  fixed_ids = []
  for channel, feature_spec in enumerate(comp.spec.fixed_feature):
    tf.logging.info('[%s] Adding fixed feature IDs "%s"', comp.name,
                    feature_spec.name)

    # The +1 and -1 increments ensure that missing IDs default to -1.
    #
    # TODO(googleuser): This formula breaks if multiple IDs are extracted at some
    # step.  Try using tf.unique() to enforce the unique-IDS precondition.
    sums = tf.unsorted_segment_sum(ids[channel] + 1, indices[channel], size) - 1
    sums = tf.expand_dims(sums, axis=1)
    fixed_ids.append(network_units.NamedTensor(sums, feature_spec.name, dim=1))
  return state.handle, fixed_ids

Example #26

Source File: expert_utils.py From ASR with Apache License 2.0

5 votes

def ParallelEmbeddingLookup(params, ids, data_parallelism):
  """Mod-sharded embedding lookup with multiple datashards.

  TODO(noam): does this work when vocab_size is not a multiple of `num_shards`?

  Args:
    params:  A list of `num_shards` `Tensors`, each with shapes
       `[vocab_size / num_params, depth]`.
    ids: A list of `num_datashards` one-dimensional ineger `Tensors`,
       with shapes `[batch_size[i]]`
    data_parallelism: A Parallelism object.

  Returns:
    a list of `num_datashards` `Tensors`, each with shape
       `[batch_size[i], depth]`.
  """
  param_devices = [x.device for x in params]
  model_parallelism = Parallelism(param_devices)
  num_shards = len(param_devices)
  # pylint: disable=unbalanced-tuple-unpacking
  ids, unique_idx = data_parallelism(tf.unique, ids)
  # pylint: enable=unbalanced-tuple-unpacking
  gates = data_parallelism(tf.mod, ids, num_shards)
  ids_div = data_parallelism(tf.div, ids, num_shards)
  dispatcher = DistributedSingleDispatcher(data_parallelism, model_parallelism,
                                           gates)
  x_ids_div = dispatcher.Dispatch(ids_div)
  params = model_parallelism(ConvertGradientToTensor, params)
  x_emb = model_parallelism(tf.gather, params, x_ids_div)
  r_emb = dispatcher.Combine(x_emb)
  r_emb = data_parallelism(tf.gather, r_emb, unique_idx)
  return r_emb

Example #27

Source File: latent_factor.py From openrec with Apache License 2.0

5 votes

def censor(self, censor_id):
        
        unique_censor_id, _ = tf.unique(censor_id)
        embedding_gather = tf.gather(self.variables[0], indices=unique_censor_id)
        norm = tf.norm(embedding_gather, axis=1, keepdims=True)
        return self.variables[0].scatter_nd_update(indices=tf.expand_dims(unique_censor_id, 1), 
                                                   updates=embedding_gather / tf.math.maximum(norm, 0.1))

Example #28

Source File: cml.py From openrec with Apache License 2.0

5 votes

def _build_post_training_ops(self):
        unique_user_id, _ = tf.unique(self._get_input('user_id'))
        unique_item_id, _ = tf.unique(tf.concat([self._get_input('p_item_id'), self._get_input('n_item_id')], axis=0))
        return [self._get_module('user_vec').censor_l2_norm_op(censor_id_list=unique_user_id),
                self._get_module('p_item_vec').censor_l2_norm_op(censor_id_list=unique_item_id)]

Example #29

Source File: wcml.py From openrec with Apache License 2.0

5 votes

def _build_post_training_ops(self):
        unique_user_id, _ = tf.unique(self._get_input('user_id'))
        unique_item_id, _ = tf.unique(tf.concat([self._get_input('p_item_id'), tf.reshape(self._get_input('n_item_id'), [-1])], axis=0))
        return [self._get_module('user_vec').censor_l2_norm_op(censor_id_list=unique_user_id),
                self._get_module('p_item_vec').censor_l2_norm_op(censor_id_list=unique_item_id)]

Example #30

Source File: temporal_latent_factor.py From openrec with Apache License 2.0

5 votes

def _build_shared_graph(self):

        with tf.variable_scope(self._scope, reuse=self._reuse):
            
            self._embedding = tf.get_variable('embedding', dtype=tf.float32, shape=self._shape, trainable=False,
                                      initializer=self._initializer)
            
            self._flag = tf.get_variable('flag', dtype=tf.bool, shape=[self._shape[0]], trainable=False,
                                    initializer=tf.constant_initializer(value=False, dtype=tf.bool))
            unique_ids, _ = tf.unique(self._ids)

            with tf.control_dependencies([tf.scatter_update(self._flag, unique_ids, 
                                                            tf.ones_like(unique_ids, dtype=tf.bool))]):
                trans_embedding = MultiLayerFC(in_tensor=tf.nn.embedding_lookup(self._embedding, self._ids), 
                                               dims=self._mlp_dims, 
                                               batch_norm=True, 
                                               scope=self._scope+'/MLP', 
                                               train=self._train,
                                               reuse=self._reuse,
                                               l2_reg=self._l2_reg,
                                               relu_out=True)

            self._outputs += trans_embedding.get_outputs()
            self._loss += trans_embedding.get_loss()
            
            update_ids = tf.reshape(tf.where(self._flag), [-1])
            update_embedding = MultiLayerFC(in_tensor=tf.nn.embedding_lookup(self._embedding, update_ids), 
                                           dims=self._mlp_dims, 
                                           batch_norm=True, 
                                           scope=self._scope+'/MLP', 
                                           train=False,
                                           reuse=True,
                                           l2_reg=self._l2_reg,
                                           relu_out=True)
            self._update_node = tf.scatter_update(self._embedding, update_ids, update_embedding.get_outputs()[0])
            self._clear_flag = tf.scatter_update(self._flag, update_ids, tf.zeros_like(update_ids, dtype=tf.bool))