Python sklearn.metrics.pairwise.pairwise_distances() Examples

The following are 30 code examples of sklearn.metrics.pairwise.pairwise_distances(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.metrics.pairwise , or try the search function

Example #1

Source File: classification.py From default-credit-card-prediction with MIT License

7 votes

def predict(self, X):
		"""
		Classify the input data assigning the label of the nearest prototype

		Keyword arguments:
		X -- The feature vectors
		"""
		classification=np.zeros(len(X))

		if self.distance_metric=="euclidean":
			distances=pairwise_distances(X, self.M_,self.distance_metric)									#compute distances to the prototypes (template matching)
		if self.distance_metric=="minkowski":
			distances=pairwise_distances(X, self.M_,self.distance_metric)	
		elif self.distance_metric=="manhattan":
			distances=pairwise_distances(X, self.M_,self.distance_metric)
		elif self.distance_metric=="mahalanobis":
			distances=pairwise_distances(X, self.M_,self.distance_metric)
		else:
			distances=pairwise_distances(X, self.M_,"euclidean")

		for i in xrange(len(X)):
			classification[i]=self.outcomes[distances[i].tolist().index(min(distances[i]))]					#choose the class belonging to nearest prototype distance

		return classification

Example #2

Source File: test_t_sne.py From twitter-stock-recommendation with MIT License

6 votes

def _run_answer_test(pos_input, pos_output, neighbors, grad_output,
                     verbose=False, perplexity=0.1, skip_num_points=0):
    distances = pairwise_distances(pos_input).astype(np.float32)
    args = distances, perplexity, verbose
    pos_output = pos_output.astype(np.float32)
    neighbors = neighbors.astype(np.int64)
    pij_input = _joint_probabilities(*args)
    pij_input = squareform(pij_input).astype(np.float32)
    grad_bh = np.zeros(pos_output.shape, dtype=np.float32)

    from scipy.sparse import csr_matrix
    P = csr_matrix(pij_input)

    neighbors = P.indices.astype(np.int64)
    indptr = P.indptr.astype(np.int64)

    _barnes_hut_tsne.gradient(P.data, pos_output, neighbors, indptr,
                              grad_bh, 0.5, 2, 1, skip_num_points=0)
    assert_array_almost_equal(grad_bh, grad_output, decimal=4)

Example #3

Source File: test_pairwise.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_paired_distances(metric, func):
    # Test the pairwise_distance helper function.
    rng = np.random.RandomState(0)
    # Euclidean distance should be equivalent to calling the function.
    X = rng.random_sample((5, 4))
    # Euclidean distance, with Y != X.
    Y = rng.random_sample((5, 4))

    S = paired_distances(X, Y, metric=metric)
    S2 = func(X, Y)
    assert_array_almost_equal(S, S2)
    S3 = func(csr_matrix(X), csr_matrix(Y))
    assert_array_almost_equal(S, S3)
    if metric in PAIRWISE_DISTANCE_FUNCTIONS:
        # Check the pairwise_distances implementation
        # gives the same value
        distances = PAIRWISE_DISTANCE_FUNCTIONS[metric](X, Y)
        distances = np.diag(distances)
        assert_array_almost_equal(distances, S)

Example #4

Source File: main.py From FaceRecognition with MIT License

6 votes

def find_matching_ids(self, embs):
        if self.id_names:
            matching_ids = []
            matching_distances = []
            distance_matrix = pairwise_distances(embs, self.embeddings)
            for distance_row in distance_matrix:
                min_index = np.argmin(distance_row)
                if distance_row[min_index] < self.distance_treshold:
                    matching_ids.append(self.id_names[min_index])
                    matching_distances.append(distance_row[min_index])
                else:
                    matching_ids.append(None)
                    matching_distances.append(None)
        else:
            matching_ids = [None] * len(embs)
            matching_distances = [np.inf] * len(embs)
        return matching_ids, matching_distances

Example #5

Source File: test_t_sne.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_trustworthiness_precomputed_deprecation():
    # FIXME: Remove this test in v0.23

    # Use of the flag `precomputed` in trustworthiness parameters has been
    # deprecated, but will still work until v0.23.
    random_state = check_random_state(0)
    X = random_state.randn(100, 2)
    assert_equal(assert_warns(DeprecationWarning, trustworthiness,
                              pairwise_distances(X), X, precomputed=True), 1.)
    assert_equal(assert_warns(DeprecationWarning, trustworthiness,
                              pairwise_distances(X), X, metric='precomputed',
                              precomputed=True), 1.)
    assert_raises(ValueError, assert_warns, DeprecationWarning,
                  trustworthiness, X, X, metric='euclidean', precomputed=True)
    assert_equal(assert_warns(DeprecationWarning, trustworthiness,
                              pairwise_distances(X), X, metric='euclidean',
                              precomputed=True), 1.)

Example #6

Source File: test_t_sne.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def _run_answer_test(pos_input, pos_output, neighbors, grad_output,
                     verbose=False, perplexity=0.1, skip_num_points=0):
    distances = pairwise_distances(pos_input).astype(np.float32)
    args = distances, perplexity, verbose
    pos_output = pos_output.astype(np.float32)
    neighbors = neighbors.astype(np.int64, copy=False)
    pij_input = _joint_probabilities(*args)
    pij_input = squareform(pij_input).astype(np.float32)
    grad_bh = np.zeros(pos_output.shape, dtype=np.float32)

    from scipy.sparse import csr_matrix
    P = csr_matrix(pij_input)

    neighbors = P.indices.astype(np.int64)
    indptr = P.indptr.astype(np.int64)

    _barnes_hut_tsne.gradient(P.data, pos_output, neighbors, indptr,
                              grad_bh, 0.5, 2, 1, skip_num_points=0)
    assert_array_almost_equal(grad_bh, grad_output, decimal=4)

Example #7

Source File: TestMyself_Multithreading.py From MTCNN-VGG-face with MIT License

6 votes

def Calculate_Distance_1(dist1,dist2,metric,min_predicts,Lists_Num):
    global ThreadingState1
    global ThreadingState2
    ThreadingState1=0
    ThreadingState2=0
    i=0
    for sublist in range(Lists_Num/2):
        predicts1 = pw.pairwise_distances(dist1[i], dist2, metric=metric)
        i=i+2
        if predicts1[0][0] > 0.12:
            if ThreadingState2 is 1:
                break
            if predicts1[0][0] < min_predicts :
                min_predicts = predicts1[0][0]

        else:
            min_predicts = predicts1[0][0]
            ThreadingState1=1
            break

Example #8

Source File: UDFS.py From scikit-feature with GNU General Public License v2.0

6 votes

def construct_M(X, k, gamma):
    """
    This function constructs the M matrix described in the paper
    """
    n_sample, n_feature = X.shape
    Xt = X.T
    D = pairwise_distances(X)
    # sort the distance matrix D in ascending order
    idx = np.argsort(D, axis=1)
    # choose the k-nearest neighbors for each instance
    idx_new = idx[:, 0:k+1]
    H = np.eye(k+1) - 1/(k+1) * np.ones((k+1, k+1))
    I = np.eye(k+1)
    Mi = np.zeros((n_sample, n_sample))
    for i in range(n_sample):
        Xi = Xt[:, idx_new[i, :]]
        Xi_tilde =np.dot(Xi, H)
        Bi = np.linalg.inv(np.dot(Xi_tilde.T, Xi_tilde) + gamma*I)
        Si = np.zeros((n_sample, k+1))
        for q in range(k+1):
            Si[idx_new[q], q] = 1
        Mi = Mi + np.dot(np.dot(Si, np.dot(np.dot(H, Bi), H)), Si.T)
    M = np.dot(np.dot(X.T, Mi), X)
    return M

Example #9

Source File: density.py From modAL with MIT License

6 votes

def information_density(X: modALinput, metric: Union[str, Callable] = 'euclidean') -> np.ndarray:
    """
    Calculates the information density metric of the given data using the given metric.

    Args:
        X: The data for which the information density is to be calculated.
        metric: The metric to be used. Should take two 1d numpy.ndarrays for argument.

    Todo:
        Should work with all possible modALinput.
        Perhaps refactor the module to use some stuff from sklearn.metrics.pairwise

    Returns:
        The information density for each sample.
    """
    # inf_density = np.zeros(shape=(X.shape[0],))
    # for X_idx, X_inst in enumerate(X):
    #     inf_density[X_idx] = sum(similarity_measure(X_inst, X_j) for X_j in X)
    #
    # return inf_density/X.shape[0]

    similarity_mtx = 1/(1+pairwise_distances(X, X, metric=metric))

    return similarity_mtx.mean(axis=1)

Example #10

Source File: test_neighbors.py From scikit-hubness with BSD 3-Clause "New" or "Revised" License

6 votes

def test_precomputed_cross_validation():
    # Ensure array is split correctly
    rng = np.random.RandomState(0)
    X = rng.rand(20, 2)
    D = pairwise_distances(X, metric='euclidean')
    y = rng.randint(3, size=20)
    for Est in (neighbors.KNeighborsClassifier,
                neighbors.RadiusNeighborsClassifier,
                neighbors.KNeighborsRegressor,
                neighbors.RadiusNeighborsRegressor):
        metric_score = cross_val_score(Est(algorithm_params={'n_candidates': 5}), X, y)
        precomp_score = cross_val_score(Est(metric='precomputed',
                                            algorithm_params={'n_candidates': 5},
                                            ),
                                        D, y)
        assert_array_equal(metric_score, precomp_score)

Example #11

Source File: bidirectional_eval.py From Cross-Modal-Projection-Learning with MIT License

6 votes

def _eval_retrieval(PX, PY, GX, GY):

    # D_{i, j} is the distance between the ith array from PX and the jth array from GX.
    D = pairwise_distances(PX, GX, metric=args.method, n_jobs=-2)
    Rank = np.argsort(D, axis=1)

    # Evaluation
    recall_1 = recall_at_k(Rank, PY, GY, k=1)  # Recall @ K
    print "{:8}{:8.2%}".format('Recall@1', recall_1)

    recall_5 = recall_at_k(Rank, PY, GY, k=5)  # Recall @ K
    print "{:8}{:8.2%}".format('Recall@5', recall_5)

    recall_10 = recall_at_k(Rank, PY, GY, k=10)  # Recall @ K
    print "{:8}{:8.2%}".format('Recall@10', recall_10)

    map_value = mean_average_precision(Rank, PY, GY)  # Mean Average Precision
    print "{:8}{:8.2%}".format('MAP', map_value)

    return recall_1, recall_5, recall_10, map_value

Example #12

Source File: k_medoids.py From alphacsc with BSD 3-Clause "New" or "Revised" License

6 votes

def transform(self, X):
        """Transforms X to cluster-distance space.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape=(n_samples, n_features)
            Data to transform.

        Returns
        -------
        X_new : {array-like, sparse matrix}, shape=(n_samples, n_clusters)
            X transformed in the new space of distances to cluster centers.
        """
        X = check_array(X, accept_sparse=['csr', 'csc'])
        check_is_fitted(self, "cluster_centers_")

        if callable(self.distance_metric):
            return self.distance_metric(X, Y=self.cluster_centers_)
        else:
            return pairwise_distances(X, Y=self.cluster_centers_,
                                      metric=self.distance_metric)

Example #13

Source File: test_approximate.py From twitter-stock-recommendation with MIT License

5 votes

def test_neighbors_accuracy_with_n_candidates():
    # Checks whether accuracy increases as `n_candidates` increases.
    n_candidates_values = np.array([.1, 50, 500])
    n_samples = 100
    n_features = 10
    n_iter = 10
    n_points = 5
    rng = np.random.RandomState(42)
    accuracies = np.zeros(n_candidates_values.shape[0], dtype=float)
    X = rng.rand(n_samples, n_features)

    for i, n_candidates in enumerate(n_candidates_values):
        lshf = ignore_warnings(LSHForest, category=DeprecationWarning)(
            n_candidates=n_candidates)
        ignore_warnings(lshf.fit)(X)
        for j in range(n_iter):
            query = X[rng.randint(0, n_samples)].reshape(1, -1)

            neighbors = lshf.kneighbors(query, n_neighbors=n_points,
                                        return_distance=False)
            distances = pairwise_distances(query, X, metric='cosine')
            ranks = np.argsort(distances)[0, :n_points]

            intersection = np.intersect1d(ranks, neighbors).shape[0]
            ratio = intersection / float(n_points)
            accuracies[i] = accuracies[i] + ratio

        accuracies[i] = accuracies[i] / float(n_iter)
    # Sorted accuracies should be equal to original accuracies
    print('accuracies:', accuracies)
    assert_true(np.all(np.diff(accuracies) >= 0),
                msg="Accuracies are not non-decreasing.")
    # Highest accuracy should be strictly greater than the lowest
    assert_true(np.ptp(accuracies) > 0,
                msg="Highest accuracy is not strictly greater than lowest.")

Example #14

Source File: test_pairwise.py From twitter-stock-recommendation with MIT License

5 votes

def test_pairwise_callable_nonstrict_metric():
    # paired_distances should allow callable metric where metric(x, x) != 0
    # Knowing that the callable is a strict metric would allow the diagonal to
    # be left uncalculated and set to 0.
    assert_equal(pairwise_distances([[1.]], metric=lambda x, y: 5)[0, 0], 5)

Example #15

Source File: test_pairwise.py From twitter-stock-recommendation with MIT License

5 votes

def test_pairwise_parallel():
    wminkowski_kwds = {'w': np.arange(1, 5).astype('double'), 'p': 1}
    metrics = [(pairwise_distances, 'euclidean', {}),
               (pairwise_distances, wminkowski, wminkowski_kwds),
               (pairwise_distances, 'wminkowski', wminkowski_kwds),
               (pairwise_kernels, 'polynomial', {'degree': 1}),
               (pairwise_kernels, callable_rbf_kernel, {'gamma': .1}),
               ]
    for func, metric, kwds in metrics:
        yield check_pairwise_parallel, func, metric, kwds

Example #16

Source File: cosine.py From ibench with MIT License

5 votes

def _compute(self):
        self._cor_dist = pairwise_distances(self._X, metric='cosine', n_jobs=-1)

Example #17

Source File: test_neighbors.py From twitter-stock-recommendation with MIT License

5 votes

def test_precomputed_cross_validation():
    # Ensure array is split correctly
    rng = np.random.RandomState(0)
    X = rng.rand(20, 2)
    D = pairwise_distances(X, metric='euclidean')
    y = rng.randint(3, size=20)
    for Est in (neighbors.KNeighborsClassifier,
                neighbors.RadiusNeighborsClassifier,
                neighbors.KNeighborsRegressor,
                neighbors.RadiusNeighborsRegressor):
        metric_score = cross_val_score(Est(), X, y)
        precomp_score = cross_val_score(Est(metric='precomputed'), D, y)
        assert_array_equal(metric_score, precomp_score)

Example #18

Source File: corr.py From ibench with MIT License

5 votes

def _compute(self):
        self._cor_dist = pairwise_distances(self._X, metric='correlation', n_jobs=-1)

Example #19

Source File: RS-sklearn-rating.py From AiLearning with GNU General Public License v3.0

5 votes

def calc_similarity(n_users, n_items, train_data, test_data):
    # 创建用户产品矩阵，针对测试数据和训练数据，创建两个矩阵: 
    train_data_matrix = np.zeros((n_users, n_items))
    for line in train_data.itertuples():
        train_data_matrix[line[1] - 1, line[2] - 1] = line[3]
    test_data_matrix = np.zeros((n_users, n_items))
    for line in test_data.itertuples():
        test_data_matrix[line[1] - 1, line[2] - 1] = line[3]

    # 使用sklearn的pairwise_distances函数来计算余弦相似性。
    print("1:", np.shape(train_data_matrix))  # 行: 人，列: 电影
    print("2:", np.shape(train_data_matrix.T))  # 行: 电影，列: 人

    user_similarity = pairwise_distances(train_data_matrix, metric="cosine")
    item_similarity = pairwise_distances(train_data_matrix.T, metric="cosine")

    print('开始统计流行item的数量...', file=sys.stderr)
    item_popular = {}
    # 统计在所有的用户中，不同电影的总出现次数
    for i_index in range(n_items):
        if np.sum(train_data_matrix[:, i_index]) != 0:
            item_popular[i_index] = np.sum(train_data_matrix[:, i_index] != 0)
            # print "pop=", i_index, self.item_popular[i_index]

    # save the total number of items
    item_count = len(item_popular)
    print('总共流行item数量 = %d' % item_count, file=sys.stderr)

    return train_data_matrix, test_data_matrix, user_similarity, item_similarity, item_popular

Example #20

Source File: sklearn-RS-demo-cf-item-test.py From AiLearning with GNU General Public License v3.0

5 votes

def calc_similarity(self):
        # 创建用户产品矩阵，针对测试数据和训练数据，创建两个矩阵: 
        self.train_mat = np.zeros((self.n_users, self.n_items))
        for line in self.train_data.itertuples():
            self.train_mat[int(line.user_id) - 1,
                           int(line.item_id) - 1] = float(line.rating)
        self.test_mat = np.zeros((self.n_users, self.n_items))
        for line in self.test_data.itertuples():
            # print "line", line.user_id-1, line.item_id-1, line.rating
            self.test_mat[int(line.user_id) - 1,
                          int(line.item_id) - 1] = float(line.rating)

        # 使用sklearn的pairwise_distances函数来计算余弦相似性。
        print("1:", np.shape(np.mat(self.train_mat).T))  # 行: 电影，列: 人
        # 电影-电影-距离(1682, 1682)
        self.item_mat_similarity = pairwise_distances(
            np.mat(self.train_mat).T, metric='cosine')
        print('item_mat_similarity=', np.shape(
            self.item_mat_similarity), file=sys.stderr)

        print('开始统计流行item的数量...', file=sys.stderr)

        # 统计在所有的用户中，不同电影的总出现次数
        for i_index in range(self.n_items):
            if np.sum(self.train_mat[:, i_index]) != 0:
                self.item_popular[i_index] = np.sum(
                    self.train_mat[:, i_index] != 0)
                # print "pop=", i_index, self.item_popular[i_index]

        # save the total number of items
        self.item_count = len(self.item_popular)
        print('总共流行item数量 = %d' % self.item_count, file=sys.stderr)

    # @profile

Example #21

Source File: test_neighbors.py From scikit-hubness with BSD 3-Clause "New" or "Revised" License

5 votes

def test_pairwise_boolean_distance():
    # Non-regression test for #4523
    # 'brute': uses scipy.spatial.distance through pairwise_distances
    # 'ball_tree': uses sklearn.neighbors.dist_metrics
    rng = np.random.RandomState(0)
    X = rng.uniform(size=(6, 5))
    NN = neighbors.NearestNeighbors

    nn1 = NN(metric="jaccard", algorithm='brute').fit(X)
    nn2 = NN(metric="jaccard", algorithm='ball_tree').fit(X)
    assert_array_equal(nn1.kneighbors(X)[0], nn2.kneighbors(X)[0])

Example #22

Source File: test_neighbors.py From twitter-stock-recommendation with MIT License

5 votes

def test_non_euclidean_kneighbors():
    rng = np.random.RandomState(0)
    X = rng.rand(5, 5)

    # Find a reasonable radius.
    dist_array = pairwise_distances(X).flatten()
    np.sort(dist_array)
    radius = dist_array[15]

    # Test kneighbors_graph
    for metric in ['manhattan', 'chebyshev']:
        nbrs_graph = neighbors.kneighbors_graph(
            X, 3, metric=metric, mode='connectivity',
            include_self=True).toarray()
        nbrs1 = neighbors.NearestNeighbors(3, metric=metric).fit(X)
        assert_array_equal(nbrs_graph, nbrs1.kneighbors_graph(X).toarray())

    # Test radiusneighbors_graph
    for metric in ['manhattan', 'chebyshev']:
        nbrs_graph = neighbors.radius_neighbors_graph(
            X, radius, metric=metric, mode='connectivity',
            include_self=True).toarray()
        nbrs1 = neighbors.NearestNeighbors(metric=metric, radius=radius).fit(X)
        assert_array_equal(nbrs_graph, nbrs1.radius_neighbors_graph(X).A)

    # Raise error when wrong parameters are supplied,
    X_nbrs = neighbors.NearestNeighbors(3, metric='manhattan')
    X_nbrs.fit(X)
    assert_raises(ValueError, neighbors.kneighbors_graph, X_nbrs, 3,
                  metric='euclidean')
    X_nbrs = neighbors.NearestNeighbors(radius=radius, metric='manhattan')
    X_nbrs.fit(X)
    assert_raises(ValueError, neighbors.radius_neighbors_graph, X_nbrs,
                  radius, metric='euclidean')

Example #23

Source File: test_neighbors.py From scikit-hubness with BSD 3-Clause "New" or "Revised" License

5 votes

def test_non_euclidean_kneighbors():
    rng = np.random.RandomState(0)
    X = rng.rand(5, 5)

    # Find a reasonable radius.
    dist_array = pairwise_distances(X).flatten()
    np.sort(dist_array)
    radius = dist_array[15]

    # Test kneighbors_graph
    for metric in ['manhattan', 'chebyshev']:
        nbrs_graph = neighbors.kneighbors_graph(
            X, 3, metric=metric, mode='connectivity',
            include_self=True).toarray()
        nbrs1 = neighbors.NearestNeighbors(3, metric=metric).fit(X)
        assert_array_equal(nbrs_graph, nbrs1.kneighbors_graph(X).toarray())

    # Test radiusneighbors_graph
    for metric in ['manhattan', 'chebyshev']:
        nbrs_graph = neighbors.radius_neighbors_graph(
            X, radius, metric=metric, mode='connectivity',
            include_self=True).toarray()
        nbrs1 = neighbors.NearestNeighbors(metric=metric, radius=radius).fit(X)
        assert_array_equal(nbrs_graph, nbrs1.radius_neighbors_graph(X).A)

    # Raise error when wrong parameters are supplied,
    X_nbrs = neighbors.NearestNeighbors(3, metric='manhattan')
    X_nbrs.fit(X)
    assert_raises(ValueError, neighbors.kneighbors_graph, X_nbrs, 3,
                  metric='euclidean')
    X_nbrs = neighbors.NearestNeighbors(radius=radius, metric='manhattan')
    X_nbrs.fit(X)
    assert_raises(ValueError, neighbors.radius_neighbors_graph, X_nbrs,
                  radius, metric='euclidean')

Example #24

Source File: test_neighbors.py From scikit-hubness with BSD 3-Clause "New" or "Revised" License

5 votes

def test_kneighbors_regressor_sparse(sparsemat,
                                     n_samples=40,
                                     n_features=5,
                                     n_neighbors=5,
                                     random_state=0):
    # Test radius-based regression on sparse matrices
    # Like the above, but with various types of sparse matrices
    rng = np.random.RandomState(random_state)
    X = 2 * rng.rand(n_samples, n_features) - 1
    y = ((X ** 2).sum(axis=1) < .25).astype(np.int)

    knn = neighbors.KNeighborsRegressor(n_neighbors=n_neighbors,
                                        algorithm='auto')
    knn.fit(sparsemat(X), y)

    knn_pre = neighbors.KNeighborsRegressor(n_neighbors=n_neighbors,
                                            metric='precomputed')
    knn_pre.fit(pairwise_distances(X, metric='euclidean'), y)

    for sparsev in SPARSE_OR_DENSE:
        X2 = sparsev(X)
        assert np.mean(knn.predict(X2).round() == y) > 0.95

        X2_pre = sparsev(pairwise_distances(X, metric='euclidean'))
        if issparse(sparsev(X2_pre)):
            assert_raises(ValueError, knn_pre.predict, X2_pre)
        else:
            assert np.mean(knn_pre.predict(X2_pre).round() == y) > 0.95

Example #25

Source File: test_neighbors.py From twitter-stock-recommendation with MIT License

5 votes

def test_pairwise_boolean_distance():
    # Non-regression test for #4523
    # 'brute': uses scipy.spatial.distance through pairwise_distances
    # 'ball_tree': uses sklearn.neighbors.dist_metrics
    rng = np.random.RandomState(0)
    X = rng.uniform(size=(6, 5))
    NN = neighbors.NearestNeighbors

    nn1 = NN(metric="jaccard", algorithm='brute').fit(X)
    nn2 = NN(metric="jaccard", algorithm='ball_tree').fit(X)
    assert_array_equal(nn1.kneighbors(X)[0], nn2.kneighbors(X)[0])

Example #26

Source File: labeled_eval.py From multilabel-image-classification-tensorflow with MIT License

5 votes

def nearest_cross_sequence_neighbors(data, tasks, n_neighbors=1):
  """Computes the n_neighbors nearest neighbors for every row in data.

  Args:
    data: A np.float32 array of shape [num_data, embedding size] holding
      an embedded validation / test dataset.
    tasks: A list of strings of size [num_data] holding the task or sequence
      name that each row belongs to.
    n_neighbors: The number of knn indices to return for each row.
  Returns:
    indices: an np.int32 array of size [num_data, n_neighbors] holding the
      n_neighbors nearest indices for every row in data. These are
      restricted to be from different named sequences (as defined in `tasks`).
  """

  # Compute the pairwise sequence adjacency matrix from `tasks`.
  num_data = data.shape[0]
  tasks = np.array(tasks)
  tasks = np.reshape(tasks, (num_data, 1))
  assert len(tasks.shape) == 2
  not_adjacent = (tasks != tasks.T)

  # Compute the symmetric pairwise distance matrix.
  pdist = pairwise_distances(data, metric='sqeuclidean')

  # For every row in the pairwise distance matrix, only consider
  # cross-sequence columns.
  indices = np.zeros((num_data, n_neighbors), dtype=np.int32)
  for idx in range(num_data):
    # Restrict to cross_sequence neighbors.
    distances = [(
        pdist[idx][i], i) for i in xrange(num_data) if not_adjacent[idx][i]]
    _, nearest_indices = zip(*sorted(
        distances, key=lambda x: x[0])[:n_neighbors])
    indices[idx] = nearest_indices
  return indices

Example #27

Source File: k_medoids.py From alphacsc with BSD 3-Clause "New" or "Revised" License

5 votes

def predict(self, X):
        """Predict the closest cluster for each sample in X

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape = [n_samples, n_features]
            New data to predict.

        Returns
        -------
        labels : array, shape [n_samples,]
            Index of the cluster each sample belongs to.
        """
        check_is_fitted(self, "cluster_centers_")
        X = check_array(X, accept_sparse=['csr', 'csc'])

        if callable(self.distance_metric):
            distances = self.distance_metric(X, Y=self.cluster_centers_)
        else:
            distances = pairwise_distances(X, Y=self.cluster_centers_,
                                           metric=self.distance_metric)

        # Assign data points to clusters based on which cluster assignment
        # yields the smallest distance
        labels = np.argmin(distances, axis=1)

        return labels

Example #28

Source File: main.py From FaceRecognition with MIT License

5 votes

def print_distance_table(self, id_image_paths):
        """Prints distances between id embeddings"""
        distance_matrix = pairwise_distances(self.embeddings, self.embeddings)
        image_names = [path.split("/")[-1] for path in id_image_paths]
        print("Distance matrix:\n{:20}".format(""), end="")
        [print("{:20}".format(name), end="") for name in image_names]
        for path, distance_row in zip(image_names, distance_matrix):
            print("\n{:20}".format(path), end="")
            for distance in distance_row:
                print("{:20}".format("%0.3f" % distance), end="")
        print()

Example #29

Source File: rocchioclassifier.py From Quadflor with BSD 3-Clause "New" or "Revised" License

5 votes

def predict_proba(self, X):
        """
        Returns a matrix for each of the samples to belong to each of the classes.
        The matrix has shape = [n_samples, n_classes] where n_samples is the
        size of the first dimension of the input matrix X and n_classes is the number of
        classes as determined from the parameter 'y' obtained during training.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape = [n_samples, n_features]
            Prediction vector, where n_samples in the number of samples and
            n_features is the number of features.
        """
        probabilities = np.zeros((X.shape[0], self.y.shape[1]), dtype=np.float64)
        distances = (pairwise_distances(X, self.centroids_, metric=self.metric))
        
        # in order to get probability like values, we ensure that the closer
        # the distance is to zero, the closer the probability is to 1
        if(self.metric == 'cosine'):
            distances = 1 - distances
        else:
            # in the case of euclidean distance metric we need to normalize by the largest distance
            # to get a value between 0 and 1
            distances = 1 - (distances / distances.max())
        
        # map back onto a matrix containing all labels
        probabilities[:,self._mem_original_mapping] = distances
        
        return probabilities

Example #30

Source File: ICGS_NMF.py From altanalyze with Apache License 2.0

5 votes

def caldist(X,i,keys,keylist):
        D=[]
        Xxd=[]
        newlist=[]
    #for i in range(len(visited)):
        #Xd=np.array(X[i])
        #Xd=Xd.reshape(1, -1)
        for ii in keys:
            if ii==i: continue
            newlist.append(ii)
            Xxd.append(X[ii].tolist())
        
        Xxd=np.array(Xxd)
        Xd=X[i]
        
        #Xd=Xxd
        #Xxd=Xxd.tolist()
        Xd=Xd.reshape(1, -1)
        D=pairwise_distances(Xd,Xxd,metric='euclidean').tolist()
        
        for q in range(len(np.argsort(D)[0])):
            if newlist[q] in keylist:
                continue
            else:
                key1=newlist[q]
                break
        return key1