Python sklearn.neighbors.NearestNeighbors() Examples

The following are 30 code examples of sklearn.neighbors.NearestNeighbors(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.neighbors , or try the search function .
Example #1
Source File: icp.py    From 3d-lmnet with MIT License 7 votes vote down vote up
def nearest_neighbor(src, dst):
    '''
    Find the nearest (Euclidean) neighbor in dst for each point in src
    Input:
        src: Nxm array of points
        dst: Nxm array of points
    Output:
        distances: Euclidean distances of the nearest neighbor
        indices: dst indices of the nearest neighbor
    '''

    assert src.shape == dst.shape

    neigh = NearestNeighbors(n_neighbors=1)
    neigh.fit(dst)
    distances, indices = neigh.kneighbors(src, return_distance=True)
    return distances.ravel(), indices.ravel() 
Example #2
Source File: test_neighbors.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_radius_neighbors_boundary_handling():
    """Test whether points lying on boundary are handled consistently

    Also ensures that even with only one query point, an object array
    is returned rather than a 2d array.
    """

    X = np.array([[1.5], [3.0], [3.01]])
    radius = 3.0

    for algorithm in ALGORITHMS:
        nbrs = neighbors.NearestNeighbors(radius=radius,
                                          algorithm=algorithm).fit(X)
        results = nbrs.radius_neighbors([[0.0]], return_distance=False)
        assert_equal(results.shape, (1,))
        assert_equal(results.dtype, object)
        assert_array_equal(results[0], [0, 1]) 
Example #3
Source File: create_npy.py    From vkitti3D-dataset with MIT License 6 votes vote down vote up
def knn_interpolation(pointcloud: np.ndarray, k: int = 1, eps: float = 0.2, njobs: int = 4) -> np.ndarray:
    """
    points without label will get an interpolated label given the k closest neighbors with a maximal distance of eps
    :param pointcloud: points (labeled and unlabeled)
    :param k: maximal k neighbors are considered
    :param eps: considered neighbors have to be in range of eps
    :return: interpolated point cloud
    """
    labeled = pointcloud[pointcloud[:, -1] != 13]

    unlabeled_idx = (pointcloud[:, -1] == 13)
    to_be_predicted = pointcloud[unlabeled_idx]

    neigh = NearestNeighbors(n_neighbors=k, radius=eps, algorithm='ball_tree', metric='euclidean', n_jobs=njobs)
    neigh.fit(labeled[:, :3])

    if to_be_predicted.shape[0] != 0:
        dist, ind = neigh.kneighbors(to_be_predicted[:, :3])

        knn_classes = labeled[ind][:, :, -1].astype(int)
        knn_classes[dist > eps] = 13

        pointcloud[unlabeled_idx, -1] = np.apply_along_axis(lambda x: np.bincount(x).argmax(), 1, knn_classes)

    return pointcloud 
Example #4
Source File: icp_zou.py    From Silhouette-Guided-3D with MIT License 6 votes vote down vote up
def nearest_neighbor(src, dst):
    '''
    Find the nearest (Euclidean) neighbor in dst for each point in src
    Input:
        src: Nxm array of points
        dst: Nxm array of points
    Output:
        distances: Euclidean distances of the nearest neighbor
        indices: dst indices of the nearest neighbor
    '''

    assert src.shape == dst.shape

    neigh = NearestNeighbors(n_neighbors=1)
    neigh.fit(dst)
    distances, indices = neigh.kneighbors(src, return_distance=True)
    return distances.ravel(), indices.ravel() 
Example #5
Source File: evaluate.py    From BERMUDA with MIT License 6 votes vote down vote up
def cal_entropy(code, idx, dataset_labels, k=100):
    """ Calculate entropy of cell types of nearest neighbors
    Args:
        code: num_cells * num_features, embedding for calculating entropy
        idx: binary, index of observations to calculate entropy
        dataset_labels:
        k: number of nearest neighbors
    Returns:
        entropy_list: list of entropy of each cell
    """
    cell_sample = np.where(idx == True)[0]
    nbrs = NearestNeighbors(n_neighbors=k, algorithm='kd_tree').fit(code)
    entropy_list = []
    _, indices = nbrs.kneighbors(code[cell_sample, :])
    for i in range(len(cell_sample)):
        entropy_list.append(entropy(dataset_labels[indices[i, :]]))

    return entropy_list 
Example #6
Source File: NormalsEstimation.py    From pcloudpy with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def update(self):

        array_with_color  = numpy_from_polydata(self.input_)
        normals = np.empty_like(array_with_color[:,0:3])
        coord = array_with_color[:,0:3]

        neigh = NearestNeighbors(self.number_neighbors)
        neigh.fit(coord)

        for i in xrange(0,len(coord)):
            #Determine the neighbours of point
            d = neigh.kneighbors(coord[i])
            #Add coordinates of neighbours , dont include center point to array. Determine coordinate by the index of the neighbours.
            y = np.zeros((self.number_neighbors-1,3))
            y = coord[d[1][0][1:self.number_neighbors],0:3]
            #Get information content
            #Assign information content to each point i.e xyzb
            normals[i,0:3] = self.get_normals(y)


        self.output_ = copy_polydata_add_normals(self.input_, normals) 
Example #7
Source File: pc_util.py    From H3DNet with MIT License 6 votes vote down vote up
def point_add_sem_label(pt, sem, k=10):
    sem_pt = sem[:, 0:3]
    sem_label = sem[:,3]
    pt_label = np.zeros(pt.shape[0])
    if pt.shape[0]==0:
        return pt_label
    else:
        nbrs = NearestNeighbors(n_neighbors=k,algorithm='ball_tree').fit(sem_pt)
        distances, indices = nbrs.kneighbors(pt)
        for i in range(pt.shape[0]):
            labels = sem_label[indices[i]]
            l, count = stats.mode(labels, axis=None)
            pt_label[i] = l
        return pt_label


    
# ----------------------------------------
# Testing
# ---------------------------------------- 
Example #8
Source File: icp.py    From PyGraphSLAM with MIT License 6 votes vote down vote up
def nearest_neighbor(src, dst):
    '''
    Find the nearest (Euclidean) neighbor in dst for each point in src
    Input:
        src: Nxm array of points
        dst: Nxm array of points
    Output:
        distances: Euclidean distances of the nearest neighbor
        indices: dst indices of the nearest neighbor
    '''

    assert src.shape == dst.shape

    neigh = NearestNeighbors(n_neighbors=1)
    neigh.fit(dst)
    distances, indices = neigh.kneighbors(src, return_distance=True)
    return distances.ravel(), indices.ravel() 
Example #9
Source File: icp.py    From occupancy_flow with MIT License 6 votes vote down vote up
def nearest_neighbor(src, dst):
    '''
    Find the nearest (Euclidean) neighbor in dst for each point in src
    Input:
        src: Nxm array of points
        dst: Nxm array of points
    Output:
        distances: Euclidean distances of the nearest neighbor
        indices: dst indices of the nearest neighbor
    '''

    assert src.shape == dst.shape

    neigh = NearestNeighbors(n_neighbors=1)
    neigh.fit(dst)
    distances, indices = neigh.kneighbors(src, return_distance=True)
    return distances.ravel(), indices.ravel() 
Example #10
Source File: icp.py    From Silhouette-Guided-3D with MIT License 6 votes vote down vote up
def nearest_neighbor(src, dst):
    '''
    Find the nearest (Euclidean) neighbor in dst for each point in src
    Input:
        src: Nxm array of points
        dst: Nxm array of points
    Output:
        distances: Euclidean distances of the nearest neighbor
        indices: dst indices of the nearest neighbor
    '''

    assert src.shape == dst.shape

    neigh = NearestNeighbors(n_neighbors=1)
    neigh.fit(dst)
    distances, indices = neigh.kneighbors(src, return_distance=True)
    return distances.ravel(), indices.ravel() 
Example #11
Source File: nearest_neighbors_model.py    From deep-regex with MIT License 6 votes vote down vote up
def do_classify(train_x, train_y, test_x, test_y):
    train_x_bow, test_x_bow = get_all_bow(train_x, test_x)
    classifier = NearestNeighbors(n_neighbors=1, algorithm='ball_tree').fit(train_x_bow)
    distances, indices = classifier.kneighbors(test_x_bow)
    indices = [index[0] for index in indices]
    exact = 0.0
    dfa_equal = 0.0
    for row_index in range(len(test_x_bow)):
        gold = test_y[row_index]
        pred_index = indices[row_index]
        pred = train_y[pred_index]
        print("PRED: {}".format(pred))
        print("GOLD: {}".format(gold))
        if pred == gold:
            exact += 1.0
            print("string equal")
        if regex_equiv_from_raw(pred, gold):
            dfa_equal += 1.0
            print("dfa equal")
        print("")

    print("{} String-Equal Correct".format(exact/len(test_x_bow)))
    print("{} DFA-Equal Correct".format(dfa_equal/len(test_x_bow))) 
Example #12
Source File: test_nearest_neighbors.py    From mars with Apache License 2.0 6 votes vote down vote up
def testGPUFaissNearestNeighborsExecution(self):
        rs = np.random.RandomState(0)

        raw_X = rs.rand(10, 5)
        raw_Y = rs.rand(8, 5)

        # test faiss execution
        X = mt.tensor(raw_X, chunk_size=7).to_gpu()
        Y = mt.tensor(raw_Y, chunk_size=8).to_gpu()

        nn = NearestNeighbors(n_neighbors=3, algorithm='faiss', metric='l2')
        nn.fit(X)

        ret = nn.kneighbors(Y)

        snn = SkNearestNeighbors(n_neighbors=3, algorithm='auto', metric='l2')
        snn.fit(raw_X)
        expected = snn.kneighbors(raw_Y)

        result = [r.fetch() for r in ret]
        np.testing.assert_almost_equal(result[0].get(), expected[0], decimal=6)
        np.testing.assert_almost_equal(result[1].get(), expected[1]) 
Example #13
Source File: angle_based.py    From kenchi with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def _fit(self, X):
        n_samples, _            = X.shape
        self.n_neighbors_       = np.minimum(self.n_neighbors, n_samples - 1)
        self.estimator_         = NearestNeighbors(
            algorithm           = self.algorithm,
            leaf_size           = self.leaf_size,
            metric              = self.metric,
            n_jobs              = self.n_jobs,
            n_neighbors         = self.n_neighbors_,
            p                   = self.p,
            metric_params       = self.metric_params
        ).fit(X)
        self._anomaly_score_min = np.max(
            self._anomaly_score(X, regularize=False)
        )

        return self 
Example #14
Source File: interactive_plot.py    From srl-zoo with MIT License 6 votes vote down vote up
def __init__(self, states, rewards, image_plot, ax, images_path, view=0):

        self.image_plot = image_plot
        self.images_path = images_path
        self.knn = NearestNeighbors(n_neighbors=1, algorithm='ball_tree').fit(states)
        self.state_dim = states.shape[1]
        self.ax = ax
        self.states = states
        self.rewards = rewards
        self.view = view

        # Highlight the selected state
        self.kwargs = dict(s=130, color='green', alpha=0.7)
        coords = self.getCoords(0)
        if states.shape[1] > 2:
            self.dot = ax.scatter([coords[0]], [coords[1]], [coords[2]], **self.kwargs)
        else:
            self.dot = ax.scatter([coords[0]], [coords[1]], **self.kwargs) 
Example #15
Source File: distance_based.py    From kenchi with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def _fit(self, X):
        n_samples, _      = X.shape
        self.n_neighbors_ = np.maximum(
            1, np.minimum(self.n_neighbors, n_samples - 1)
        )
        self.estimator_   = NearestNeighbors(
            algorithm     = self.algorithm,
            leaf_size     = self.leaf_size,
            metric        = self.metric,
            n_jobs        = self.n_jobs,
            n_neighbors   = self.n_neighbors_,
            p             = self.p,
            metric_params = self.metric_params
        ).fit(X)

        return self 
Example #16
Source File: test_neighbors.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_callable_metric():

    def custom_metric(x1, x2):
        return np.sqrt(np.sum(x1 ** 2 + x2 ** 2))

    X = np.random.RandomState(42).rand(20, 2)
    nbrs1 = neighbors.NearestNeighbors(3, algorithm='auto',
                                       metric=custom_metric)
    nbrs2 = neighbors.NearestNeighbors(3, algorithm='brute',
                                       metric=custom_metric)

    nbrs1.fit(X)
    nbrs2.fit(X)

    dist1, ind1 = nbrs1.kneighbors(X)
    dist2, ind2 = nbrs2.kneighbors(X)

    assert_array_almost_equal(dist1, dist2) 
Example #17
Source File: test_neighbors.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_unsupervised_inputs():
    # test the types of valid input into NearestNeighbors
    X = rng.random_sample((10, 3))

    nbrs_fid = neighbors.NearestNeighbors(n_neighbors=1)
    nbrs_fid.fit(X)

    dist1, ind1 = nbrs_fid.kneighbors(X)

    nbrs = neighbors.NearestNeighbors(n_neighbors=1)

    for input in (nbrs_fid, neighbors.BallTree(X), neighbors.KDTree(X)):
        nbrs.fit(input)
        dist2, ind2 = nbrs.kneighbors(X)

        assert_array_almost_equal(dist1, dist2)
        assert_array_almost_equal(ind1, ind2) 
Example #18
Source File: test_neighbors.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_unsupervised_kneighbors(n_samples=20, n_features=5,
                                 n_query_pts=2, n_neighbors=5):
    # Test unsupervised neighbors methods
    X = rng.rand(n_samples, n_features)

    test = rng.rand(n_query_pts, n_features)

    for p in P:
        results_nodist = []
        results = []

        for algorithm in ALGORITHMS:
            neigh = neighbors.NearestNeighbors(n_neighbors=n_neighbors,
                                               algorithm=algorithm,
                                               p=p)
            neigh.fit(X)

            results_nodist.append(neigh.kneighbors(test,
                                                   return_distance=False))
            results.append(neigh.kneighbors(test, return_distance=True))

        for i in range(len(results) - 1):
            assert_array_almost_equal(results_nodist[i], results[i][1])
            assert_array_almost_equal(results[i][0], results[i + 1][0])
            assert_array_almost_equal(results[i][1], results[i + 1][1]) 
Example #19
Source File: test_neighbors.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_k_and_radius_neighbors_X_None():
    # Test kneighbors et.al when query is None
    for algorithm in ALGORITHMS:

        nn = neighbors.NearestNeighbors(n_neighbors=1, algorithm=algorithm)

        X = [[0], [1]]
        nn.fit(X)

        dist, ind = nn.kneighbors()
        assert_array_equal(dist, [[1], [1]])
        assert_array_equal(ind, [[1], [0]])
        dist, ind = nn.radius_neighbors(None, radius=1.5)
        check_object_arrays(dist, [[1], [1]])
        check_object_arrays(ind, [[1], [0]])

        # Test the graph variants.
        rng = nn.radius_neighbors_graph(None, radius=1.5)
        kng = nn.kneighbors_graph(None)
        for graph in [rng, kng]:
            assert_array_equal(rng.A, [[0, 1], [1, 0]])
            assert_array_equal(rng.data, [1, 1])
            assert_array_equal(rng.indices, [1, 0])

        X = [[0, 1], [0, 1], [1, 1]]
        nn = neighbors.NearestNeighbors(n_neighbors=2, algorithm=algorithm)
        nn.fit(X)
        assert_array_equal(
            nn.kneighbors_graph().A,
            np.array([[0., 1., 1.], [1., 0., 1.], [1., 1., 0]])) 
Example #20
Source File: clustering.py    From retentioneering-tools with Mozilla Public License 2.0 6 votes vote down vote up
def find_best_eps(data, q=0.05):
    """
    Find best maximal distance (eps) between dots for DBSCAN clustering.

    Parameters
    -------
    data: pd.DataFrame
        Dataframe with features for clustering indexed as in ``retention_config.index_col``
    q: float, optional
        Quantile of nearest neighbor positive distance between dots. The value of it will be an eps. Default: ``0.05``

    Returns
    -------
    Optimal eps

    Return type
    -------
    Float
    """
    nn = NearestNeighbors()
    nn.fit(data)
    dist = nn.kneighbors()[0]
    dist = dist.flatten()
    dist = dist[dist > 0]
    return np.quantile(dist, q) 
Example #21
Source File: icp.py    From AugmentedAutoencoder with MIT License 6 votes vote down vote up
def nearest_neighbor(self,src, dst):
        '''
        Find the nearest (Euclidean) neighbor in dst for each point in src
        Input:
            src: Nxm array of points
            dst: Nxm array of points
        Output:
            distances: Euclidean distances of the nearest neighbor
            indices: dst indices of the nearest neighbor
        '''

        assert src.shape == dst.shape

        neigh = NearestNeighbors(n_neighbors=1)
        neigh.fit(dst)
        distances, indices = neigh.kneighbors(src, return_distance=True)
        return distances.ravel(), indices.ravel() 
Example #22
Source File: icp_utils.py    From AugmentedAutoencoder with MIT License 6 votes vote down vote up
def nearest_neighbor(src, dst):
    '''
    Find the nearest (Euclidean) neighbor in dst for each point in src
    Input:
        src: Nxm array of points
        dst: Nxm array of points
    Output:
        distances: Euclidean distances of the nearest neighbor
        indices: dst indices of the nearest neighbor
    '''

    assert src.shape == dst.shape

    neigh = NearestNeighbors(n_neighbors=1)
    neigh.fit(dst)
    distances, indices = neigh.kneighbors(src, return_distance=True)
    return distances.ravel(), indices.ravel() 
Example #23
Source File: localized_ensemble.py    From xam with MIT License 6 votes vote down vote up
def fit(self, X, y):

        # Split the training set in two
        X_fit, self.X_val_, y_fit, self.y_val_ = model_selection.train_test_split(
            X,
            y,
            test_size=self.test_ratio,
            random_state=self.random_state
        )

        # Fit the nearest neighbours
        n_neighbors = int(self.neighbors_ratio * len(self.X_val_))
        self.nn_ = neighbors.NearestNeighbors(n_neighbors=n_neighbors, algorithm=self.algorithm)
        self.nn_.fit(self.X_val_)

        # Fit the ensemble
        self.ensemble.fit(X_fit, y_fit)

        return self 
Example #24
Source File: make_knn_feats.py    From wsdm19cup with MIT License 6 votes vote down vote up
def fit(self, X, y):	
        '''	
            Set's up the train set and self.NN object	
        '''	
        # Create a NearestNeighbors (NN) object. We will use it in `predict` function 	
        self.NN = NearestNeighbors(n_neighbors=max(self.k_list), 	
                                      metric=self.metric, 	
                                      n_jobs=1, 	
                                      algorithm='brute' if self.metric=='cosine' else 'auto')	
        self.NN.fit(X)	

        # Store labels 	
        self.y_train = y	

        # Save how many classes we have	
        self.n_classes = np.unique(y).shape[0] if self.n_classes_ is None else self.n_classes_ 
Example #25
Source File: test_neighbors.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_unsupervised_radius_neighbors(n_samples=20, n_features=5,
                                       n_query_pts=2, radius=0.5,
                                       random_state=0):
    # Test unsupervised radius-based query
    rng = np.random.RandomState(random_state)

    X = rng.rand(n_samples, n_features)

    test = rng.rand(n_query_pts, n_features)

    for p in P:
        results = []

        for algorithm in ALGORITHMS:
            neigh = neighbors.NearestNeighbors(radius=radius,
                                               algorithm=algorithm,
                                               p=p)
            neigh.fit(X)

            ind1 = neigh.radius_neighbors(test, return_distance=False)

            # sort the results: this is not done automatically for
            # radius searches
            dist, ind = neigh.radius_neighbors(test, return_distance=True)
            for (d, i, i1) in zip(dist, ind, ind1):
                j = d.argsort()
                d[:] = d[j]
                i[:] = i[j]
                i1[:] = i1[j]
            results.append((dist, ind))

            assert_array_almost_equal(np.concatenate(list(ind)),
                                      np.concatenate(list(ind1)))

        for i in range(len(results) - 1):
            assert_array_almost_equal(np.concatenate(list(results[i][0])),
                                      np.concatenate(list(results[i + 1][0]))),
            assert_array_almost_equal(np.concatenate(list(results[i][1])),
                                      np.concatenate(list(results[i + 1][1]))) 
Example #26
Source File: test_mapper.py    From kepler-mapper with MIT License 5 votes vote down vote up
def test_knn_distance(self):
        mapper = KeplerMapper()
        data = np.random.rand(100, 5)
        lens = mapper.project(data, projection="knn_distance_4", scaler=None)

        nn = neighbors.NearestNeighbors(n_neighbors=4)
        nn.fit(data)
        lens_confirm = np.sum(
            nn.kneighbors(data, n_neighbors=4, return_distance=True)[0], axis=1
        ).reshape((-1, 1))

        assert lens.shape == (100, 1)
        np.testing.assert_array_equal(lens, lens_confirm) 
Example #27
Source File: comparison_plot.py    From m-phate with GNU General Public License v3.0 5 votes vote down vote up
def evaluate_within_slice(Y, k=40):
    neighbors_op = NearestNeighbors(k)
    result = []
    for e in np.unique(epoch):
        neighbors_op.fit(Y[epoch == e])
        _, Y_indices = neighbors_op.kneighbors()
        neighbors_op.fit(trace[e])
        _, trace_indices = neighbors_op.kneighbors()
        result.append([np.mean(np.isin(x, y))
                       for x, y in zip(Y_indices, trace_indices)])
    return np.mean(result) 
Example #28
Source File: test_neighbors.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_k_and_radius_neighbors_train_is_not_query():
    # Test kneighbors et.al when query is not training data

    for algorithm in ALGORITHMS:

        nn = neighbors.NearestNeighbors(n_neighbors=1, algorithm=algorithm)

        X = [[0], [1]]
        nn.fit(X)
        test_data = [[2], [1]]

        # Test neighbors.
        dist, ind = nn.kneighbors(test_data)
        assert_array_equal(dist, [[1], [0]])
        assert_array_equal(ind, [[1], [1]])
        dist, ind = nn.radius_neighbors([[2], [1]], radius=1.5)
        check_object_arrays(dist, [[1], [1, 0]])
        check_object_arrays(ind, [[1], [0, 1]])

        # Test the graph variants.
        assert_array_equal(
            nn.kneighbors_graph(test_data).A, [[0., 1.], [0., 1.]])
        assert_array_equal(
            nn.kneighbors_graph([[2], [1]], mode='distance').A,
            np.array([[0., 1.], [0., 0.]]))
        rng = nn.radius_neighbors_graph([[2], [1]], radius=1.5)
        assert_array_equal(rng.A, [[0, 1], [1, 1]]) 
Example #29
Source File: estimators.py    From Pyspatialml with GNU General Public License v3.0 5 votes vote down vote up
def __init__(
        self,
        base_estimator,
        n_neighbors=7,
        weights="distance",
        radius=1.0,
        algorithm="auto",
        leaf_size=30,
        metric="minkowski",
        p=2,
        metric_params=None,
        feature_indices=None,
        n_jobs=1,
    ):

        self.base_estimator = base_estimator
        self.n_neighbors = n_neighbors
        self.weights = weights
        self.radius = radius
        self.algorithm = algorithm
        self.leaf_size = leaf_size
        self.metric = metric
        self.p = p
        self.metric_params = metric_params
        self.feature_indices = feature_indices
        self.n_jobs = n_jobs

        self.knn = NearestNeighbors(
            n_neighbors=self.n_neighbors,
            radius=self.radius,
            algorithm=self.algorithm,
            leaf_size=self.leaf_size,
            metric=self.metric,
            p=self.p,
            metric_params=self.metric_params,
            n_jobs=self.n_jobs,
        )
        
        self.y_ = None 
Example #30
Source File: preprocess.py    From cn-text-classifier with GNU General Public License v3.0 5 votes vote down vote up
def snn_sim_matrix(X, k=5):
    """
    利用sklearn包中的KDTree,计算节点的共享最近邻相似度(SNN)矩阵
    :param X: array-like, shape = [samples_size, features_size]
    :param k: positive integer(default = 5), compute snn similarity threshold k
    :return: snn distance matrix
    """
    try:
        X = np.array(X)
    except Exception as e:
        print(e)
        raise ValueError("输入的数据集必须为矩阵")
    samples_size, features_size = X.shape  # 数据集样本的个数和特征的维数
    nbrs = NearestNeighbors(n_neighbors=k, algorithm='kd_tree').fit(X)
    knn_matrix = nbrs.kneighbors(X, return_distance=False)  # 记录每个样本的k个最近邻对应的索引
    sim_matrix = 0.5 + np.zeros((samples_size, samples_size))  # snn相似度矩阵
    for i in range(samples_size):
        t = np.where(knn_matrix == i)[0]
        c = list(combinations(t, 2))
        for j in c:
            if j[0] not in knn_matrix[j[1]]:
                continue
            sim_matrix[j[0]][j[1]] += 1
    sim_matrix = 1 / sim_matrix  # 将相似度矩阵转化为距离矩阵
    sim_matrix = np.triu(sim_matrix)
    sim_matrix += sim_matrix.T - np.diag(sim_matrix.diagonal())
    return sim_matrix