Python numpy.argpartition() Examples

The following are 30 code examples of numpy.argpartition(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module numpy , or try the search function .
Example #1
Source File: inference-sample-error-analysis.py    From youtube-8m with Apache License 2.0 6 votes vote down vote up
def format_lines(video_ids, predictions, labels, top_k):
  batch_size = len(video_ids)
  for video_index in range(batch_size):
    n_recall = max(int(numpy.sum(labels[video_index])), 1)
    # labels
    label_indices = numpy.argpartition(labels[video_index], -n_recall)[-n_recall:]
    label_predictions = [(class_index, predictions[video_index][class_index]) 
                           for class_index in label_indices]
    label_predictions = sorted(label_predictions, key=lambda p: -p[1])
    label_str = "\t".join(["%d\t%f"%(x,y) for x,y in label_predictions])
    # predictions
    top_k_indices = numpy.argpartition(predictions[video_index], -top_k)[-top_k:]
    top_k_predictions = [(class_index, predictions[video_index][class_index])
                         for class_index in top_k_indices]
    top_k_predictions = sorted(top_k_predictions, key=lambda p: -p[1])
    top_k_str = "\t".join(["%d\t%f"%(x,y) for x,y in top_k_predictions])
    # compute PERR
    top_n_indices = numpy.argpartition(predictions[video_index], -n_recall)[-n_recall:]
    positives = [labels[video_index][class_index] 
                 for class_index in top_n_indices]
    perr = sum(positives) / float(n_recall)
    # URL
    url = "https://www.youtube.com/watch?v=" + video_ids[video_index].decode('utf-8')
    yield url + "\t" + str(1-perr) + "\t" + top_k_str + "\t" + label_str + "\n" 
Example #2
Source File: misc.py    From pyxclib with MIT License 6 votes vote down vote up
def _update_predicted(start_idx, predicted_batch_labels, 
                      predicted_labels, top_k=10):
    """
        Update the predicted answers for the batch
        Args:
            predicted_batch_labels
            predicted_labels
    """
    def _select_topk(vec, k):
        batch_size = vec.shape[0]
        top_ind = np.argpartition(vec, -k)[:, -k:]
        ind = np.zeros((k*batch_size, 2), dtype=np.int)
        ind[:, 0] = np.repeat(np.arange(0, batch_size, 1), [k]*batch_size)
        ind[:, 1] = top_ind.flatten('C')
        return top_ind.flatten('C'), vec[ind[:, 0], ind[:, 1]]
    batch_size = predicted_batch_labels.shape[0]
    top_indices, top_vals = _select_topk(predicted_batch_labels, k=top_k)
    ind = np.zeros((top_k*batch_size, 2), dtype=np.int)
    ind[:, 0] = np.repeat(
        np.arange(start_idx, start_idx+batch_size, 1), [top_k]*batch_size)
    ind[:, 1] = top_indices
    predicted_labels[ind[:, 0], ind[:, 1]] = top_vals 
Example #3
Source File: top_terms.py    From xam with MIT License 6 votes vote down vote up
def fit(self, X, y=None, **fit_params):

        # scikit-learn checks
        X, y = utils.check_X_y(X, y, accept_sparse='csr', order='C')

        n_terms = min(self.n_terms, X.shape[1])

        # Get a list of unique labels from y
        labels = np.unique(y)

        # Determine the n top terms per class
        self.top_terms_per_class_ = {
            c: set(np.argpartition(np.sum(X[y == c], axis=0), -n_terms)[-n_terms:])
            for c in labels
        }

        # Return the classifier
        return self 
Example #4
Source File: resnet_as_a_service.py    From iAI with MIT License 6 votes vote down vote up
def analyze(output_data):
    #Results from the engine are returned as a list of 5D numpy arrays: 
    #        (Number of Batches x Batch Size x C x H x W)
    output = output_data.reshape(len(LABELS))
    
    # Get result
    top = np.argmax(output)
    top = LABELS[top]
    
    # Get top5
    top5 = np.argpartition(output, -5, axis=-1)[-5:]
    top5 = top5[np.argsort(output[top5])][::-1]
    top5_classes = []
    for i in top5:
        top5_classes.append((LABELS[i], output[i]))
        
    return [top, top5_classes]

#Arguments to create lite engine 
Example #5
Source File: test_shape_base.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_argequivalent(self):
        """ Test it translates from arg<func> to <func> """
        from numpy.random import rand
        a = rand(3, 4, 5)

        funcs = [
            (np.sort, np.argsort, dict()),
            (_add_keepdims(np.min), _add_keepdims(np.argmin), dict()),
            (_add_keepdims(np.max), _add_keepdims(np.argmax), dict()),
            (np.partition, np.argpartition, dict(kth=2)),
        ]

        for func, argfunc, kwargs in funcs:
            for axis in list(range(a.ndim)) + [None]:
                a_func = func(a, axis=axis, **kwargs)
                ai_func = argfunc(a, axis=axis, **kwargs)
                assert_equal(a_func, take_along_axis(a, ai_func, axis=axis)) 
Example #6
Source File: resnet_as_a_service.py    From iAI with MIT License 6 votes vote down vote up
def analyze(output_data):
    #Results from the engine are returned as a list of 5D numpy arrays:
    #        (Number of Batches x Batch Size x C x H x W)
    output = output_data.reshape(len(LABELS))

    # Get result
    top = np.argmax(output)
    top = LABELS[top]

    # Get top5
    top5 = np.argpartition(output, -5, axis=-1)[-5:]
    top5 = top5[np.argsort(output[top5])][::-1]
    top5_classes = []
    for i in top5:
        top5_classes.append((LABELS[i], output[i]))

    return [top, top5_classes]

#Arguments to create lite engine 
Example #7
Source File: direct_spin1.py    From pyscf with Apache License 2.0 6 votes vote down vote up
def _get_init_guess(na, nb, nroots, hdiag):
    '''Initial guess is the single Slater determinant
    '''
    # The "nroots" lowest determinats based on energy expectation value.
    ci0 = []
    try:
        addrs = numpy.argpartition(hdiag, nroots-1)[:nroots]
    except AttributeError:
        addrs = numpy.argsort(hdiag)[:nroots]
    for addr in addrs:
        x = numpy.zeros((na*nb))
        x[addr] = 1
        ci0.append(x.ravel())

    # Add noise
    ci0[0][0 ] += 1e-5
    ci0[0][-1] -= 1e-5
    return ci0 
Example #8
Source File: replay_buffer.py    From yolo_v2 with Apache License 2.0 6 votes vote down vote up
def remove_n(self, n):
    """Get n items for removal."""
    assert self.init_length + n <= self.cur_size

    if self.eviction_strategy == 'rand':
      # random removal
      idxs = random.sample(xrange(self.init_length, self.cur_size), n)
    elif self.eviction_strategy == 'fifo':
      # overwrite elements in cyclical fashion
      idxs = [
          self.init_length +
          (self.remove_idx + i) % (self.max_size - self.init_length)
          for i in xrange(n)]
      self.remove_idx = idxs[-1] + 1 - self.init_length
    elif self.eviction_strategy == 'rank':
      # remove lowest-priority indices
      idxs = np.argpartition(self.priorities, n)[:n]

    return idxs 
Example #9
Source File: search.py    From hred-latent-piecewise with GNU General Public License v3.0 6 votes vote down vote up
def select_next_words(self, next_costs, next_probs, step_num, how_many):
        # Pick only on the first line (for the beginning of sampling)
        # This will avoid duplicate <q> token.
        if step_num == 0:
            flat_next_costs = next_costs[:1, :].flatten()
        else:
            # Set the next cost to infinite for finished utterances (they will be replaced)
            # by other utterances in the beam
            flat_next_costs = next_costs.flatten()
         
        voc_size = next_costs.shape[1]
         
        args = numpy.argpartition(flat_next_costs, how_many)[:how_many]
        args = args[numpy.argsort(flat_next_costs[args])]
        
        return numpy.unravel_index(args, next_costs.shape), flat_next_costs[args] 
Example #10
Source File: tfidf_doc_ranker.py    From OpenQA with MIT License 6 votes vote down vote up
def closest_docs(self, query, k=1):
        """Closest docs by dot product between query and documents
        in tfidf weighted word vector space.
        """
        spvec = self.text2spvec(query)
        res = spvec * self.doc_mat

        if len(res.data) <= k:
            o_sort = np.argsort(-res.data)
        else:
            o = np.argpartition(-res.data, k)[0:k]
            o_sort = o[np.argsort(-res.data[o])]

        doc_scores = res.data[o_sort]
        doc_ids = [self.get_doc_id(i) for i in res.indices[o_sort]]
        return doc_ids, doc_scores 
Example #11
Source File: codebook.py    From AugmentedAutoencoder with MIT License 6 votes vote down vote up
def nearest_rotation(self, session, x, top_n=1, upright=False, return_idcs=False):
        #R_model2cam

        if x.dtype == 'uint8':
            x = x/255.
        if x.ndim == 3:
            x = np.expand_dims(x, 0)
        
        cosine_similarity = session.run(self.cos_similarity, {self._encoder.x: x})
        if top_n == 1:
            if upright:
                idcs = np.argmax(cosine_similarity[:,::int(self._dataset._kw['num_cyclo'])], axis=1)*int(self._dataset._kw['num_cyclo'])
            else:
                idcs = np.argmax(cosine_similarity, axis=1)
        else:
            unsorted_max_idcs = np.argpartition(-cosine_similarity.squeeze(), top_n)[:top_n]
            idcs = unsorted_max_idcs[np.argsort(-cosine_similarity.squeeze()[unsorted_max_idcs])]
        if return_idcs:
            return idcs
        else:
            return self._dataset.viewsphere_for_embedding[idcs].squeeze() 
Example #12
Source File: risk.py    From cryptotrader with MIT License 6 votes vote down vote up
def polar_returns(ret, k):
    """
    Calculate polar return
    :param obs: pandas DataFrame
    :return: return radius, return angles
    """
    ret= np.mat(ret)
    # Find the radius and the angle decomposition on price relative vectors
    radius = np.linalg.norm(ret, ord=1, axis=1)
    angle = np.divide(ret, np.mat(radius).T)

    # Select the 'window' greater values on the observation
    index = np.argpartition(radius, -(int(ret.shape[0] * k) + 1))[-(int(ret.shape[0] * k) + 1):]
    index = index[np.argsort(radius[index])]

    # Return the radius and the angle for extreme found values
    return radius[index][::-1], angle[index][::-1]


# Pareto Extreme Risk Index 
Example #13
Source File: test_shape_base.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_argequivalent(self):
        """ Test it translates from arg<func> to <func> """
        from numpy.random import rand
        a = rand(3, 4, 5)

        funcs = [
            (np.sort, np.argsort, dict()),
            (_add_keepdims(np.min), _add_keepdims(np.argmin), dict()),
            (_add_keepdims(np.max), _add_keepdims(np.argmax), dict()),
            (np.partition, np.argpartition, dict(kth=2)),
        ]

        for func, argfunc, kwargs in funcs:
            for axis in list(range(a.ndim)) + [None]:
                a_func = func(a, axis=axis, **kwargs)
                ai_func = argfunc(a, axis=axis, **kwargs)
                assert_equal(a_func, take_along_axis(a, ai_func, axis=axis)) 
Example #14
Source File: replay_buffer.py    From DOTA_models with Apache License 2.0 6 votes vote down vote up
def remove_n(self, n):
    """Get n items for removal."""
    assert self.init_length + n <= self.cur_size

    if self.eviction_strategy == 'rand':
      # random removal
      idxs = random.sample(xrange(self.init_length, self.cur_size), n)
    elif self.eviction_strategy == 'fifo':
      # overwrite elements in cyclical fashion
      idxs = [
          self.init_length +
          (self.remove_idx + i) % (self.max_size - self.init_length)
          for i in xrange(n)]
      self.remove_idx = idxs[-1] + 1 - self.init_length
    elif self.eviction_strategy == 'rank':
      # remove lowest-priority indices
      idxs = np.argpartition(self.priorities, n)[:n]

    return idxs 
Example #15
Source File: test_multiarray.py    From Computable with MIT License 6 votes vote down vote up
def test_partition_cdtype(self):
        d = array([('Galahad', 1.7, 38), ('Arthur', 1.8, 41),
                   ('Lancelot', 1.9, 38)],
                  dtype=[('name', '|S10'), ('height', '<f8'), ('age', '<i4')])

        tgt = np.sort(d, order=['age', 'height'])
        assert_array_equal(np.partition(d, range(d.size),
                                        order=['age', 'height']),
                           tgt)
        assert_array_equal(d[np.argpartition(d, range(d.size),
                                             order=['age', 'height'])],
                           tgt)
        for k in range(d.size):
            assert_equal(np.partition(d, k, order=['age', 'height'])[k],
                        tgt[k])
            assert_equal(d[np.argpartition(d, k, order=['age', 'height'])][k],
                         tgt[k])

        d = array(['Galahad', 'Arthur', 'zebra', 'Lancelot'])
        tgt = np.sort(d)
        assert_array_equal(np.partition(d, range(d.size)), tgt)
        for k in range(d.size):
            assert_equal(np.partition(d, k)[k], tgt[k])
            assert_equal(d[np.argpartition(d, k)][k], tgt[k]) 
Example #16
Source File: utils.py    From BrainSpace with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def _dominant_set_sparse(s, k, is_thresh=False, norm=False):
    """Compute dominant set for a sparse matrix."""
    if is_thresh:
        mask = s > k
        idx, data = np.where(mask), s[mask]
        s = ssp.coo_matrix((data, idx), shape=s.shape)

    else:  # keep top k
        nr, nc = s.shape
        idx = np.argpartition(s, nc - k, axis=1)
        col = idx[:, -k:].ravel()  # idx largest
        row = np.broadcast_to(np.arange(nr)[:, None], (nr, k)).ravel()
        data = s[row, col].ravel()
        s = ssp.coo_matrix((data, (row, col)), shape=s.shape)

    if norm:
        s.data /= s.sum(axis=1).A1[s.row]

    return s.tocsr(copy=False) 
Example #17
Source File: utils.py    From siamese-triplet with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def get_pairs(self, embeddings, labels):
        if self.cpu:
            embeddings = embeddings.cpu()
        distance_matrix = pdist(embeddings)

        labels = labels.cpu().data.numpy()
        all_pairs = np.array(list(combinations(range(len(labels)), 2)))
        all_pairs = torch.LongTensor(all_pairs)
        positive_pairs = all_pairs[(labels[all_pairs[:, 0]] == labels[all_pairs[:, 1]]).nonzero()]
        negative_pairs = all_pairs[(labels[all_pairs[:, 0]] != labels[all_pairs[:, 1]]).nonzero()]

        negative_distances = distance_matrix[negative_pairs[:, 0], negative_pairs[:, 1]]
        negative_distances = negative_distances.cpu().data.numpy()
        top_negatives = np.argpartition(negative_distances, len(positive_pairs))[:len(positive_pairs)]
        top_negative_pairs = negative_pairs[torch.LongTensor(top_negatives)]

        return positive_pairs, top_negative_pairs 
Example #18
Source File: utils.py    From BrainSpace with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def _dominant_set_dense(s, k, is_thresh=False, norm=False, copy=True):
    """Compute dominant set for a dense matrix."""

    if is_thresh:
        s = s.copy() if copy else s
        s[s <= k] = 0

    else:  # keep top k
        nr, nc = s.shape
        idx = np.argpartition(s, nc - k, axis=1)
        row = np.arange(nr)[:, None]
        if copy:
            col = idx[:, -k:]  # idx largest
            data = s[row, col]
            s = np.zeros_like(s)
            s[row, col] = data
        else:
            col = idx[:, :-k]  # idx smallest
            s[row, col] = 0

    if norm:
        s /= np.nansum(s, axis=1, keepdims=True)

    return s 
Example #19
Source File: _k_medoids.py    From scikit-learn-extra with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def _initialize_medoids(self, D, n_clusters, random_state_):
        """Select initial mediods when beginning clustering."""

        if self.init == "random":  # Random initialization
            # Pick random k medoids as the initial ones.
            medoids = random_state_.choice(len(D), n_clusters)
        elif self.init == "k-medoids++":
            medoids = self._kpp_init(D, n_clusters, random_state_)
        elif self.init == "heuristic":  # Initialization by heuristic
            # Pick K first data points that have the smallest sum distance
            # to every other point. These are the initial medoids.
            medoids = np.argpartition(np.sum(D, axis=1), n_clusters - 1)[
                :n_clusters
            ]
        else:
            raise ValueError(f"init value '{self.init}' not recognized")

        return medoids

    # Copied from sklearn.cluster.k_means_._k_init 
Example #20
Source File: __init__.py    From DeepHash with MIT License 6 votes vote down vote up
def get_mAPs(q_output, q_labels, db_output, db_labels, Rs, dist_type):
    dist = distance(q_output, db_output, dist_type=dist_type, pair=True)
    unsorted_ids = np.argpartition(dist, Rs - 1)[:, :Rs]
    APx = []
    for i in range(dist.shape[0]):
        label = q_labels[i, :]
        label[label == 0] = -1
        idx = unsorted_ids[i, :]
        idx = idx[np.argsort(dist[i, :][idx])]
        imatch = np.sum(np.equal(db_labels[idx[0: Rs], :], label), 1) > 0
        rel = np.sum(imatch)
        Lx = np.cumsum(imatch)
        Px = Lx.astype(float) / np.arange(1, Rs + 1, 1)
        if rel != 0:
            APx.append(np.sum(Px * imatch) / rel)
    return np.mean(np.array(APx)) 
Example #21
Source File: tfidf_doc_ranker.py    From justcopy-backend with MIT License 6 votes vote down vote up
def closest_docs(self, query, k=1):
        """Closest docs by dot product between query and documents
        in tfidf weighted word vector space.
        """
        spvec = self.text2spvec(query)
        res = spvec * self.doc_mat

        if len(res.data) <= k:
            o_sort = np.argsort(-res.data)
        else:
            o = np.argpartition(-res.data, k)[0:k]
            o_sort = o[np.argsort(-res.data[o])]

        doc_scores = res.data[o_sort]
        doc_ids = [self.get_doc_id(i) for i in res.indices[o_sort]]
        return doc_ids, doc_scores 
Example #22
Source File: bilind.py    From otalign with GNU General Public License v3.0 6 votes vote down vote up
def csls_sparse(X, Y, idx_x, idx_y, knn = 10):
    def mean_similarity_sparse(X, Y, seeds, knn, axis = 1, metric = 'cosine'):
        if axis == 1:
            dists = sp.spatial.distance.cdist(X[seeds,:], Y, metric=metric)
        else:
            dists = sp.spatial.distance.cdist(X, Y[seeds,:], metric=metric).T
        nghbs = np.argpartition(dists, knn, axis = 1) # for rows #[-k:] # argpartition returns top k not in order but it's efficient (doesnt sort all rows)
        nghbs = nghbs[:,:knn]
        nghbs_dists = np.concatenate([row[indices] for row, indices in zip(dists, nghbs)]).reshape(nghbs.shape)
        nghbs_sims  = 1 - nghbs_dists
        return nghbs_sims.mean(axis = 1)

    src_ms = mean_similarity_sparse(X, Y, idx_x, knn,  axis = 1)
    trg_ms = mean_similarity_sparse(X, Y, idx_y, knn,  axis = 0)
    sims =  1 - sp.spatial.distance.cdist(X[idx_x,:], Y[idx_y,:])
    normalized_sims = ((2*sims - trg_ms).T - src_ms).T
    print(normalized_sims)
    nn = normalized_sims.argmax(axis=1).tolist()
    return nn 
Example #23
Source File: beam_search.py    From knmt with GNU General Public License v3.0 6 votes vote down vote up
def iterate_eos_scores(new_scores, eos_idx, existing_cases = None, beam_width=None)->Tuple[Sequence, Sequence, Sequence]:
    """
    Return the indices and scores corresponding to the eos word.
    Meaning of returned values is the same as for iterate_best_score
    """
    nb_cases, v_size = new_scores.shape
    num_cases = np.arange(nb_cases, dtype=np.int32)
    scores = -cuda.to_cpu(new_scores[:, eos_idx])
    if existing_cases is not None:
        need_to_return = np.logical_not(np.isin(num_cases, existing_cases))
        num_cases = num_cases[need_to_return]
        scores = scores[need_to_return]

    idx_in_cases = np.full(num_cases.shape[0], eos_idx, dtype=np.int32)

    if beam_width is not None:
        if beam_width < len(scores):
            idx_to_keep = np.argpartition(scores, beam_width)[:beam_width]
            scores = scores[idx_to_keep]
            num_cases = num_cases[idx_to_keep]
            idx_in_cases = idx_in_cases[idx_to_keep]

    return num_cases, idx_in_cases, scores 
Example #24
Source File: numpy_compat.py    From hred-qs with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def argpartition(a, kth, axis=-1, order=None):
            return numpy.argsort(a, axis=axis, order=order) 
Example #25
Source File: eval_util.py    From youtube-8m with Apache License 2.0 5 votes vote down vote up
def calculate_precision_at_equal_recall_rate(predictions, actuals):
  """Performs a local (numpy) calculation of the PERR.

  Args:
    predictions: Matrix containing the outputs of the model.
      Dimensions are 'batch' x 'num_classes'.
    actuals: Matrix containing the ground truth labels.
      Dimensions are 'batch' x 'num_classes'.

  Returns:
    float: The average precision at equal recall rate across the entire batch.
  """
  aggregated_precision = 0.0
  num_videos = actuals.shape[0]
  for row in numpy.arange(num_videos):
    num_labels = int(numpy.sum(actuals[row]))
    top_indices = numpy.argpartition(predictions[row],
                                     -num_labels)[-num_labels:]
    item_precision = 0.0
    for label_index in top_indices:
      if predictions[row][label_index] > 0:
        item_precision += actuals[row][label_index]
    item_precision /= top_indices.size
    aggregated_precision += item_precision
  aggregated_precision /= num_videos
  return aggregated_precision 
Example #26
Source File: inference-layer.py    From youtube-8m with Apache License 2.0 5 votes vote down vote up
def format_lines(video_ids, predictions, top_k):
  batch_size = len(video_ids)
  for video_index in range(batch_size):
    top_indices = numpy.argpartition(predictions[video_index], -top_k)[-top_k:]
    line = [(class_index, predictions[video_index][class_index])
            for class_index in top_indices]
  #  print("Type - Test :")
  #  print(type(video_ids[video_index]))
  #  print(video_ids[video_index].decode('utf-8'))
    line = sorted(line, key=lambda p: -p[1])
    yield video_ids[video_index].decode('utf-8') + "," + " ".join("%i %f" % pair
                                                  for pair in line) + "\n" 
Example #27
Source File: inference-sample-error.py    From youtube-8m with Apache License 2.0 5 votes vote down vote up
def format_lines(video_ids, predictions, labels):
  batch_size = len(video_ids)
  for video_index in range(batch_size):
    # error rate
    top_k = max(int(numpy.sum(labels[video_index])), 1)
    top_indices = numpy.argpartition(predictions[video_index], -top_k)[-top_k:]
    positives = [labels[video_index][class_index] for class_index in top_indices]
    perr = sum(positives) / float(top_k)
    yield video_ids[video_index].decode('utf-8') + "\t" + str(1-perr) + "\n" 
Example #28
Source File: inference-stage1.py    From youtube-8m with Apache License 2.0 5 votes vote down vote up
def format_lines(video_ids, predictions, top_k):
  batch_size = len(video_ids)
  for video_index in range(batch_size):
    top_indices = numpy.argpartition(predictions[video_index], -top_k)[-top_k:]
    line = [(class_index, predictions[video_index][class_index])
            for class_index in top_indices]
  #  print("Type - Test :")
  #  print(type(video_ids[video_index]))
  #  print(video_ids[video_index].decode('utf-8'))
    line = sorted(line, key=lambda p: -p[1])
    yield video_ids[video_index].decode('utf-8') + "," + " ".join("%i %f" % pair
                                                  for pair in line) + "\n" 
Example #29
Source File: bilind.py    From otalign with GNU General Public License v3.0 5 votes vote down vote up
def translations_from_coupling(G, src_words = None, tgt_words=None, verbose = False):
    """
        Returns pairs of matched (row, col) pairs according to some criterion
    """
    # Naive method: look for unambiguous words who are mutually NN
    G.max(0)
    n_s, n_t = G.shape
    best_match_src = G.argmax(1) # Best match for each source word
    best_match_tgt = G.argmax(0)

    paired = []
    for i in range(n_s):
        m = best_match_src[i]
        if verbose:
            k = 10
            topk_idx = np.argpartition(G[i,:], -k)[-k:]
            topk_idx_sort = topk_idx[np.argsort(-G[i,topk_idx])] # With - to get descending order
            print('{:20s} -> {}'.format(src_words[i],','.join([tgt_words[m] for m in topk_idx_sort])))
        if best_match_tgt[m] == i:
            paired.append((i,m))

    paired_toks = []
    if src_words and tgt_words:
        paired_toks = [(src_words[i],tgt_words[j]) for (i,j) in paired]
    else:
        paired_toks = paired
    return paired_toks 
Example #30
Source File: inference.py    From youtube-8m with Apache License 2.0 5 votes vote down vote up
def format_lines(video_ids, predictions, top_k):
  batch_size = len(video_ids)
  for video_index in range(batch_size):
    top_indices = numpy.argpartition(predictions[video_index], -top_k)[-top_k:]
    line = [(class_index, predictions[video_index][class_index])
            for class_index in top_indices]
  #  print("Type - Test :")
  #  print(type(video_ids[video_index]))
  #  print(video_ids[video_index].decode('utf-8'))
    line = sorted(line, key=lambda p: -p[1])
    yield video_ids[video_index].decode('utf-8') + "," + " ".join("%i %f" % pair
                                                  for pair in line) + "\n"