Python sklearn.neighbors.KDTree() Examples

The following are code examples for showing how to use sklearn.neighbors.KDTree(). They are from open source Python projects. You can vote up the examples you like or vote down the ones you don't like.

Example 1
Project: Anamoly-Detection   Author: msmsk05   File: abod.py    BSD 2-Clause "Simplified" License 6 votes vote down vote up
def _fit_fast(self):
        """Fast ABOD method. Only use n_neighbors for angle calculation.
        Internal use only
        """

        # make sure the n_neighbors is in the range
        check_parameter(self.n_neighbors, 1, self.n_train_)

        self.tree_ = KDTree(self.X_train_)

        neigh = NearestNeighbors(n_neighbors=self.n_neighbors)
        neigh.fit(self.X_train_)
        ind_arr = neigh.kneighbors(n_neighbors=self.n_neighbors,
                                   return_distance=False)

        for i in range(self.n_train_):
            curr_pt = self.X_train_[i, :]
            X_ind = ind_arr[i, :]
            self.decision_scores_[i, 0] = _calculate_wocs(curr_pt,
                                                          self.X_train_,
                                                          X_ind)
        return self

    # noinspection PyPep8Naming 
Example 2
Project: RelativePose   Author: zhenpeiyang   File: util.py    BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def point_cloud_overlap(pc_src,pc_tgt,R_gt_44):
    pc_src_trans = np.matmul(R_gt_44[:3,:3],pc_src.T) +R_gt_44[:3,3:4]
    tree = KDTree(pc_tgt)
    nearest_dist, nearest_ind = tree.query(pc_src_trans.T, k=1)
    nns2t = np.min(nearest_dist)
    hasCorres=(nearest_dist < 0.08)
    overlap_val_s2t = hasCorres.sum()/pc_src.shape[0]

    pc_tgt_trans = np.matmul(np.linalg.inv(R_gt_44),np.concatenate((pc_tgt.T,np.ones([1,pc_tgt.shape[0]]))))[:3,:]
    tree = KDTree(pc_src)
    nearest_dist, nearest_ind = tree.query(pc_tgt_trans.T, k=1)
    nnt2s = np.min(nearest_dist)
    hasCorres=(nearest_dist < 0.08)
    overlap_val_t2s = hasCorres.sum()/pc_tgt.shape[0]

    overlap_val = max(overlap_val_s2t,overlap_val_t2s)
    cam_dist_this = np.linalg.norm(R_gt_44[:3,3])
    pc_dist_this = np.linalg.norm(pc_src_trans.mean(1) - pc_tgt.T.mean(1))
    pc_nn = (nns2t+nnt2s)/2
    return overlap_val,cam_dist_this,pc_dist_this,pc_nn 
Example 3
Project: scikit-multiflow   Author: scikit-multiflow   File: knn_classifier.py    BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def predict(self, X):
        """ predict
        
        Predicts the label of the X sample, by searching the KDTree for 
        the n_neighbors-Nearest Neighbors.
        
        Parameters
        ----------
        X: Numpy.ndarray of shape (n_samples, n_features)
            All the samples we want to predict the label for.
            
        Returns
        -------
        list
            A list containing the predicted labels for all instances in X.
        
        """
        r, c = get_dimensions(X)
        proba = self.predict_proba(X)
        predictions = []
        for i in range(r):
            predictions.append(np.argmax(proba[i]))
        return np.array(predictions) 
Example 4
Project: scikit-multiflow   Author: scikit-multiflow   File: knn_classifier.py    BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def __predict_proba(self, X):
        """ __predict_proba
        
        Private implementation of the predict_proba method.
        
        Parameters
        ----------
        X: Numpy.ndarray of shape (n_samples, n_features)
        
        Returns
        -------
        tuple list
            One list with the k-nearest neighbor's distances and another 
            one with their indexes.
        
        """
        # To use our own KDTree implementation please replace it as follows
        # tree = KDTree(self.window.get_attributes_matrix(), metric='euclidean',
        #              nominal_attributes=self._nominal_attributes, return_distance=True)

        tree = sk.KDTree(self.window.get_attributes_matrix(), self.leaf_size, metric='euclidean')
        dist, ind = tree.query(np.asarray(X), k=self.n_neighbors)
        return dist, ind 
Example 5
Project: pcml   Author: projectclarify   File: similarity_search.py    Apache License 2.0 6 votes vote down vote up
def restore_embedding_data(path):

  with open("/tmp/embedding.json", "r") as f:
    data, predictions = json.loads(f.read())

  tf.logging.info("Computing kdtree...")
  predictions = np.asarray([np.asarray(thing) for thing in predictions])
  kdt = KDTree(predictions, leaf_size=30, metric='euclidean')

  for key, value in data.items():
    data[key] = {
        "emb": np.asarray(data[key]["emb"]),
        "img": np.asarray(data[key]["img"])
    }

  return data, predictions, kdt 
Example 6
Project: pyod   Author: yzhao062   File: abod.py    BSD 2-Clause "Simplified" License 6 votes vote down vote up
def _fit_fast(self):
        """Fast ABOD method. Only use n_neighbors for angle calculation.
        Internal use only
        """

        # make sure the n_neighbors is in the range
        check_parameter(self.n_neighbors, 1, self.n_train_)

        self.tree_ = KDTree(self.X_train_)

        neigh = NearestNeighbors(n_neighbors=self.n_neighbors)
        neigh.fit(self.X_train_)
        ind_arr = neigh.kneighbors(n_neighbors=self.n_neighbors,
                                   return_distance=False)

        for i in range(self.n_train_):
            curr_pt = self.X_train_[i, :]
            X_ind = ind_arr[i, :]
            self.decision_scores_[i, 0] = _calculate_wocs(curr_pt,
                                                          self.X_train_,
                                                          X_ind)
        return self

    # noinspection PyPep8Naming 
Example 7
Project: PPGNet   Author: svip-lab   File: line_graph.py    MIT License 6 votes vote down vote up
def freeze_junction(self, status=True):
        self._freeze_junction = status
        if status:
            clusters = fclusterdata(self._junctions, self._eps_junc, criterion="distance")
            junc_groups = {}
            for ind_junc, ind_group in enumerate(clusters):
                if ind_group not in junc_groups.keys():
                    junc_groups[ind_group] = []
                junc_groups[ind_group].append(self._junctions[ind_junc])
            if self.verbose:
                print(f"{len(self._junctions) - len(junc_groups)} junctions merged.")
            self._junctions = [np.mean(junc_group, axis=0) for junc_group in junc_groups.values()]

            self._kdtree = KDTree(self._junctions, leaf_size=30)
            dists, inds = self._kdtree.query(self._junctions, k=2)
            repl_inds = np.nonzero(dists.sum(axis=1) < self._eps_junc)[0].tolist()
            # assert len(repl_inds) == 0
        else:
            self._kdtree = None 
Example 8
Project: scanpy   Author: theislab   File: test_ingest.py    BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_neighbors(adatas):
    adata_ref = adatas[0].copy()
    adata_new = adatas[1].copy()

    ing = sc.tl.Ingest(adata_ref)
    ing.fit(adata_new)
    ing.neighbors(k=10)
    indices = ing._indices

    tree = KDTree(adata_ref.obsm['X_pca'])
    true_indices = tree.query(ing._obsm['rep'], 10, return_distance=False)

    num_correct = 0.0
    for i in range(adata_new.n_obs):
        num_correct += np.sum(np.in1d(true_indices[i], indices[i]))
    percent_correct = num_correct / (adata_new.n_obs * 10)

    assert percent_correct > 0.99 
Example 9
Project: hmd   Author: zhuhao-nju   File: eval_functions.py    MIT License 6 votes vote down vote up
def knnsearch(target, source, metrics = 'euclidean', k_size =1, leaf_sizes=30):
    """use target build KDTree
    use source to calculate it 
    ```
    """
    # make sure they have the same size
    if not (target.shape[1] == source.shape[1]):
        raise('Two Inputs are not same size or They need to be [N(size), D(dimension)] input')

    kdt_build = KDTree(target, leaf_size = leaf_sizes, metric=metrics)
    distances, indices = kdt_build.query(source, k=k_size)

    averagedist = np.sum(distances) / (source.shape[0])  # assume they have [N,D] 

    return (averagedist, distances, indices)

# get high frequency vert list 
Example 10
Project: pynndescent   Author: lmcinnes   File: test_pynndescent_.py    BSD 2-Clause "Simplified" License 6 votes vote down vote up
def test_nn_descent_neighbor_accuracy():
    knn_indices, _ = NNDescent(
        nn_data, "euclidean", {}, 10, random_state=np.random
    )._neighbor_graph

    tree = KDTree(nn_data)
    true_indices = tree.query(nn_data, 10, return_distance=False)

    num_correct = 0.0
    for i in range(nn_data.shape[0]):
        num_correct += np.sum(np.in1d(true_indices[i], knn_indices[i]))

    percent_correct = num_correct / (nn_data.shape[0] * 10)
    assert_greater_equal(
        percent_correct,
        0.98,
        "NN-descent did not get 99% " "accuracy on nearest neighbors",
    ) 
Example 11
Project: pynndescent   Author: lmcinnes   File: test_pynndescent_.py    BSD 2-Clause "Simplified" License 6 votes vote down vote up
def test_angular_nn_descent_neighbor_accuracy():
    knn_indices, _ = NNDescent(
        nn_data, "cosine", {}, 10, random_state=np.random
    )._neighbor_graph

    angular_data = normalize(nn_data, norm="l2")
    tree = KDTree(angular_data)
    true_indices = tree.query(angular_data, 10, return_distance=False)

    num_correct = 0.0
    for i in range(nn_data.shape[0]):
        num_correct += np.sum(np.in1d(true_indices[i], knn_indices[i]))

    percent_correct = num_correct / (nn_data.shape[0] * 10)
    assert_greater_equal(
        percent_correct,
        0.98,
        "NN-descent did not get 99% " "accuracy on nearest neighbors",
    ) 
Example 12
Project: pynndescent   Author: lmcinnes   File: test_pynndescent_.py    BSD 2-Clause "Simplified" License 6 votes vote down vote up
def test_sparse_nn_descent_neighbor_accuracy():
    knn_indices, _ = NNDescent(
        sparse_nn_data, "euclidean", n_neighbors=20, random_state=None
    )._neighbor_graph

    tree = KDTree(sparse_nn_data.toarray())
    true_indices = tree.query(sparse_nn_data.toarray(), 10, return_distance=False)

    num_correct = 0.0
    for i in range(sparse_nn_data.shape[0]):
        num_correct += np.sum(np.in1d(true_indices[i], knn_indices[i]))

    percent_correct = num_correct / (sparse_nn_data.shape[0] * 10)
    assert_greater_equal(
        percent_correct,
        0.85,
        "Sparse NN-descent did not get 95% " "accuracy on nearest neighbors",
    ) 
Example 13
Project: pynndescent   Author: lmcinnes   File: test_pynndescent_.py    BSD 2-Clause "Simplified" License 6 votes vote down vote up
def test_sparse_angular_nn_descent_neighbor_accuracy():
    knn_indices, _ = NNDescent(
        sparse_nn_data, "cosine", {}, 20, random_state=None
    )._neighbor_graph

    angular_data = normalize(sparse_nn_data, norm="l2").toarray()
    tree = KDTree(angular_data)
    true_indices = tree.query(angular_data, 10, return_distance=False)

    num_correct = 0.0
    for i in range(sparse_nn_data.shape[0]):
        num_correct += np.sum(np.in1d(true_indices[i], knn_indices[i]))

    percent_correct = num_correct / (sparse_nn_data.shape[0] * 10)
    assert_greater_equal(
        percent_correct,
        0.85,
        "Sparse angular NN-descent did not get 98% " "accuracy on nearest neighbors",
    ) 
Example 14
Project: pynndescent   Author: lmcinnes   File: test_pynndescent_.py    BSD 2-Clause "Simplified" License 6 votes vote down vote up
def test_nn_descent_query_accuracy():
    nnd = NNDescent(nn_data[200:], "euclidean", n_neighbors=10, random_state=None)
    knn_indices, _ = nnd.query(nn_data[:200], k=10, epsilon=0.2)

    tree = KDTree(nn_data[200:])
    true_indices = tree.query(nn_data[:200], 10, return_distance=False)

    num_correct = 0.0
    for i in range(true_indices.shape[0]):
        num_correct += np.sum(np.in1d(true_indices[i], knn_indices[i]))

    percent_correct = num_correct / (true_indices.shape[0] * 10)
    assert_greater_equal(
        percent_correct,
        0.95,
        "NN-descent query did not get 95% " "accuracy on nearest neighbors",
    ) 
Example 15
Project: pynndescent   Author: lmcinnes   File: test_pynndescent_.py    BSD 2-Clause "Simplified" License 6 votes vote down vote up
def test_random_state_none():
    knn_indices, _ = NNDescent(
        nn_data, "euclidean", {}, 10, random_state=None
    )._neighbor_graph

    tree = KDTree(nn_data)
    true_indices = tree.query(nn_data, 10, return_distance=False)

    num_correct = 0.0
    for i in range(nn_data.shape[0]):
        num_correct += np.sum(np.in1d(true_indices[i], knn_indices[i]))

    percent_correct = num_correct / (spatial_data.shape[0] * 10)
    assert_greater_equal(
        percent_correct,
        0.99,
        "NN-descent did not get 99% " "accuracy on nearest neighbors",
    ) 
Example 16
Project: Synonyms   Author: huyingxi   File: word2vec.py    MIT License 6 votes vote down vote up
def neighbours(self, word, size = 10):
        """
        Get nearest words with KDTree, ranking by cosine distance
        """
        word = word.strip()
        v = self.word_vec(word)
        [distances], [points] = self.kdt.query(array([v]), k = size, return_distance = True)
        assert len(distances) == len(points), "distances and points should be in same shape."
        words, scores = [], {}
        for (x,y) in zip(points, distances):
            w = self.index2word[x]
            if w == word: s = 1.0
            else: s = cosine(v, self.syn0[x])
            if s < 0: s = abs(s)
            words.append(w)
            scores[w] = min(s, 1.0)
        for x in sorted(words, key=scores.get, reverse=True):
            yield x, scores[x] 
Example 17
Project: pySpatialTools   Author: tgquintela   File: implicit_retrievers.py    MIT License 6 votes vote down vote up
def _define_retriever(self, locs, pars_ret=None):
        """Define a kdtree for retrieving neighbours.

        Parameters
        ----------
        locs: list, np.ndarray, or others
            spatial information of the whole pool of retrievable spatial
            elements.
        pars_ret: int or None (default)
            the parameters to set the core-retriever. In sklearn-KDTree
            core-retriever, we only need leafsize parameter.

        """
        if pars_ret is not None:
            leafsize = int(pars_ret)
        else:
            leafsize = locs.shape[0]
            leafsize = locs.shape[0]/100 if leafsize > 1000 else leafsize
        retriever = KDTree(locs, leaf_size=leafsize)
        self.retriever.append(retriever)
        self._heterogeneity_definition()

    ########################### Auxiliar functions ############################ 
Example 18
Project: pointnetvlad   Author: mikacuy   File: generate_training_tuples_baseline.py    MIT License 6 votes vote down vote up
def construct_query_dict(df_centroids, filename):
	tree = KDTree(df_centroids[['northing','easting']])
	ind_nn = tree.query_radius(df_centroids[['northing','easting']],r=10)
	ind_r = tree.query_radius(df_centroids[['northing','easting']], r=50)
	queries={}
	for i in range(len(ind_nn)):
		query=df_centroids.iloc[i]["file"]
		positives=np.setdiff1d(ind_nn[i],[i]).tolist()
		negatives=np.setdiff1d(df_centroids.index.values.tolist(),ind_r[i]).tolist()
		random.shuffle(negatives)
		queries[i]={"query":query,"positives":positives,"negatives":negatives}

	with open(filename, 'wb') as handle:
	    pickle.dump(queries, handle, protocol=pickle.HIGHEST_PROTOCOL)

	print("Done ", filename)


####Initialize pandas DataFrame 
Example 19
Project: pointnetvlad   Author: mikacuy   File: generate_training_tuples_refine.py    MIT License 6 votes vote down vote up
def construct_query_dict(df_centroids, filename):
	tree = KDTree(df_centroids[['northing','easting']])
	ind_nn = tree.query_radius(df_centroids[['northing','easting']],r=12.5)
	ind_r = tree.query_radius(df_centroids[['northing','easting']], r=50)
	queries={}
	print(len(ind_nn))
	for i in range(len(ind_nn)):
		query=df_centroids.iloc[i]["file"]
		positives=np.setdiff1d(ind_nn[i],[i]).tolist()
		negatives=np.setdiff1d(df_centroids.index.values.tolist(),ind_r[i]).tolist()
		random.shuffle(negatives)
		queries[i]={"query":query,"positives":positives,"negatives":negatives}

	with open(filename, 'wb') as handle:
	    pickle.dump(queries, handle, protocol=pickle.HIGHEST_PROTOCOL)

	print("Done ", filename) 
Example 20
Project: autocluster   Author: wywongbd   File: warmstarter.py    BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def fit(self, metafeatures_table_path='metaknowledge/metafeatures_table.csv'):
        """
        Fit a KDTree model
        """
        # read metafeatures table as dataframe
        self.table = pd.read_csv(metafeatures_table_path, sep=',', header='infer')
        
        # only consider columns that we're interested
        self.table = self.table[self.metafeatures + ['dataset']]
        
        # remove rows with NaN 
        self.table = self.table.dropna()
        
        # this is the dataframe we want to fit our model on
        table_without_dataset = self.table.drop(columns=['dataset'])
        table_without_dataset_np = table_without_dataset.to_numpy()
        
        # train the scaler
        self.scaler.fit(table_without_dataset_np)
        
        # fit KDTree model
        self.model = KDTree(self.scaler.transform(table_without_dataset_np), 
                            leaf_size=5) 
Example 21
Project: Weiss   Author: WangWenjun559   File: test_neighbors.py    Apache License 2.0 6 votes vote down vote up
def test_unsupervised_inputs():
    # test the types of valid input into NearestNeighbors
    X = rng.random_sample((10, 3))

    nbrs_fid = neighbors.NearestNeighbors(n_neighbors=1)
    nbrs_fid.fit(X)

    dist1, ind1 = nbrs_fid.kneighbors(X)

    nbrs = neighbors.NearestNeighbors(n_neighbors=1)

    for input in (nbrs_fid, neighbors.BallTree(X), neighbors.KDTree(X)):
        nbrs.fit(input)
        dist2, ind2 = nbrs.kneighbors(X)

        assert_array_almost_equal(dist1, dist2)
        assert_array_almost_equal(ind1, ind2) 
Example 22
Project: IrrMapper   Author: dgketchum   File: shapefile_utils.py    Apache License 2.0 6 votes vote down vote up
def _construct_kdtree(wrs2):
    centroids = []
    path_rows = [] # a mapping
    features = []
    for feature in wrs2:
        tile = shape(feature['geometry'])
        centroid = tile.centroid.coords[0]
        centroids.append([centroid[0], centroid[1]])
        z = feature['properties']
        p = z['PATH']
        r = z['ROW']
        path_rows.append(str(p) + "_" + str(r))
        features.append(feature)

    tree = KDTree(asarray(centroids))
    return tree, asarray(path_rows), asarray(features) 
Example 23
Project: QAbot_by_base_KG   Author: Goooaaal   File: word2vec.py    MIT License 6 votes vote down vote up
def neighbours(self, word, size = 10):
        """
        Get nearest words with KDTree, ranking by cosine distance
        """
        word = word.strip()
        v = self.word_vec(word)
        [distances], [points] = self.kdt.query(array([v]), k = size, return_distance = True)
        assert len(distances) == len(points), "distances and points should be in same shape."
        words, scores = [], {}
        for (x,y) in zip(points, distances):
            w = self.index2word[x]
            if w == word: s = 1.0
            else: s = utils.cosine(v, self.syn0[x])
            if s < 0: s = abs(s)
            words.append(w)
            scores[w] = min(s, 1.0)
        for x in sorted(words, key=scores.get, reverse=True):
            yield x, scores[x] 
Example 24
Project: wine-ml-on-aws-lambda   Author: pierreant   File: test_neighbors.py    Apache License 2.0 6 votes vote down vote up
def test_unsupervised_inputs():
    # test the types of valid input into NearestNeighbors
    X = rng.random_sample((10, 3))

    nbrs_fid = neighbors.NearestNeighbors(n_neighbors=1)
    nbrs_fid.fit(X)

    dist1, ind1 = nbrs_fid.kneighbors(X)

    nbrs = neighbors.NearestNeighbors(n_neighbors=1)

    for input in (nbrs_fid, neighbors.BallTree(X), neighbors.KDTree(X)):
        nbrs.fit(input)
        dist2, ind2 = nbrs.kneighbors(X)

        assert_array_almost_equal(dist1, dist2)
        assert_array_almost_equal(ind1, ind2) 
Example 25
Project: Analysis-of-Stock-High-Frequent-Data-with-LSTM   Author: Gofinge   File: utils.py    MIT License 6 votes vote down vote up
def _over_sampling_smote(sample, power):
    kdtree = KDTree(sample)
    indices = [i for i in range(len(sample))]
    np.random.shuffle(indices)
    new_sample_list = []
    count = int(power * len(sample)) - len(sample)
    each = int(power)
    feature_num = len(sample[0])

    for ori_ind in indices:
        _, near_ind = kdtree.query([sample[ori_ind]], each)
        for i in near_ind[0]:
            coef = np.random.rand()
            new_sample = [coef * sample[i][j] + (1 - coef) * sample[ori_ind][j] for j in range(feature_num)]
            new_sample_list.append(new_sample)
            count -= 1
        if count < 0:
            break

    sample = list(sample)
    sample.extend(new_sample_list)
    return sample 
Example 26
Project: DPLink   Author: vonfeng   File: preprocessing.py    MIT License 6 votes vote down vote up
def load_vids(data_path, data_name="baseLoc"):
    vid_list = {}
    vid_lookup = {}
    vid_array = []
    poi_info = json.load(open(data_path + "poi_info.json"))
    with open(data_path + data_name) as fid:
        for line in fid:
            bid, lat, lon = line.strip("\r\n").split("_")
            lat, lon = float(lat), float(lon)
            if bid not in vid_list:
                cid = len(vid_list) + 1
                vid_list[bid] = [cid, (lat, lon), poi_info[bid][3:]]
                vid_lookup[cid] = [bid, (lat, lon)]
                vid_array.append((lat, lon))
    vid_array = np.array(vid_array)
    kdtree = KDTree(vid_array)
    return vid_list, vid_lookup, kdtree 
Example 27
Project: scanobjectnn   Author: hkust-vgd   File: provider.py    MIT License 6 votes vote down vote up
def occlude_point_cloud(batch_data, occlusion_ratio):
    """ Randomly k remove points (number of points defined by the ratio.
        Input:
          BxNx3 array, original batch of point clouds
        Return:
          Bx(N-k)x3 array, occluded batch of point clouds
    """
    B, N, C = batch_data.shape
    k = int(np.round(N*occlusion_ratio))
    occluded_batch_point_cloud = []
    for i in range(B):
        point_cloud = batch_data[i, :, :]
        kdt = KDTree(point_cloud, leaf_size=30, metric='euclidean')
        center_of_occlusion = random.choice(point_cloud)
        #occluded_points_idx = kdt.query_radius(center_of_occlusion.reshape(1, -1), r=occlusion_radius)
        _, occluded_points_idx = kdt.query(center_of_occlusion.reshape(1, -1), k=k)
        point_cloud = np.delete(point_cloud, occluded_points_idx, axis=0)
        occluded_batch_point_cloud.append(point_cloud)
    return np.array(occluded_batch_point_cloud) 
Example 28
Project: alibi   Author: SeldonIO   File: trustscore.py    Apache License 2.0 6 votes vote down vote up
def filter_by_distance_knn(self, X: np.ndarray) -> np.ndarray:
        """
        Filter out instances with low kNN density. Calculate distance to k-nearest point in the data for each
        instance and remove instances above a cutoff distance.

        Parameters
        ----------
        X
            Data

        Returns
        -------
        Filtered data.
        """
        kdtree = KDTree(X, leaf_size=self.leaf_size, metric=self.metric)
        knn_r = kdtree.query(X, k=self.k_filter + 1)[0]  # distances from 0 to k-nearest points
        if self.dist_filter_type == 'point':
            knn_r = knn_r[:, -1]
        elif self.dist_filter_type == 'mean':
            knn_r = np.mean(knn_r[:, 1:], axis=1)  # exclude distance of instance to itself
        cutoff_r = np.percentile(knn_r, (1 - self.alpha) * 100)  # cutoff distance
        X_keep = X[np.where(knn_r <= cutoff_r)[0], :]  # define instances to keep
        return X_keep 
Example 29
Project: adversarial-squad   Author: robinjia   File: find_squad_nearby_words.py    MIT License 6 votes vote down vote up
def get_nearby_words(main_words):
  main_inds = {}
  all_words = []
  all_vecs = []
  with open(OPTS.wordvec_file) as f:
    for i, line in tqdm(enumerate(f)):
      toks = line.rstrip().split(' ')
      word = unicode(toks[0], encoding='ISO-8859-1')
      vec = np.array([float(x) for x in toks[1:]])
      all_words.append(word)
      all_vecs.append(vec)
      if word in main_words:
        main_inds[word] = i
  print >> sys.stderr, 'Found vectors for %d/%d words = %.2f%%' % (
      len(main_inds), len(main_words), 100.0 * len(main_inds) / len(main_words))
  tree = KDTree(all_vecs)
  nearby_words = {}
  for word in tqdm(main_inds):
    dists, inds = tree.query([all_vecs[main_inds[word]]],
                             k=OPTS.num_neighbors + 1)
    nearby_words[word] = [
        {'word': all_words[i], 'dist': d} for d, i in zip(dists[0], inds[0])]
  return nearby_words 
Example 30
Project: Anamoly-Detection   Author: msmsk05   File: knn.py    BSD 2-Clause "Simplified" License 5 votes vote down vote up
def fit(self, X, y=None):
        """Fit detector. y is optional for unsupervised methods.

        Parameters
        ----------
        X : numpy array of shape (n_samples, n_features)
            The input samples.

        y : numpy array of shape (n_samples,), optional (default=None)
            The ground truth of the input samples (labels).
        """

        # validate inputs X and y (optional)
        X = check_array(X)
        self._set_n_classes(y)

        self.tree_ = KDTree(X, leaf_size=self.leaf_size, metric=self.metric)
        self.neigh_.fit(X)

        dist_arr, _ = self.neigh_.kneighbors(n_neighbors=self.n_neighbors,
                                             return_distance=True)
        dist = self._get_dist_by_method(dist_arr)

        self.decision_scores_ = dist.ravel()
        self._process_decision_scores()

        return self 
Example 31
Project: dockerizeme   Author: dockerizeme   File: snippet.py    Apache License 2.0 5 votes vote down vote up
def __init__(self, model):
        for counter, key in enumerate(model.vocab.keys()):
            self.data.append(model[key])
            self.word2idx[key] = counter
            self.idx2word[counter] = key

        # leaf_size is a hyperparameter
        self.data = np.array(self.data)
        self.tree = KDTree(self.data, leaf_size=100) 
Example 32
Project: craftassist   Author: facebookresearch   File: render_schematic_with_pixel2block-color.py    MIT License 5 votes vote down vote up
def pixel2block(random_images, schematic_hue):
    """
    This function returns a numpy array (M,N,3) that indicates which pixel corresponds to
    which block.

    If a pixel has [-1, -1, -1], then it means this pixel does not map to any block
    """
    for i in range(len(random_images)):
        random_images[i] = cv2.cvtColor(random_images[i], cv2.COLOR_BGR2HSV)

    ## init the ret to all -1s
    ret = np.ones(random_images[0].shape[:2] + (3,), dtype=np.int32) * -1

    ymax, zmax, xmax, _ = schematic_hue.shape

    schematic_hue = np.reshape(schematic_hue, (-1, schematic_hue.shape[-1]))
    kdt = KDTree(schematic_hue, leaf_size=2)

    hue_vecs = []
    for m in range(ret.shape[0]):
        for n in range(ret.shape[1]):
            ## the original range is [0,179]
            hue_vecs.append([img[m, n][0] * 2 for img in random_images])

    hue_vecs = np.reshape(np.array(hue_vecs), (-1, len(random_images)))

    query = kdt.query(hue_vecs, k=1, return_distance=False)

    assert len(query) == ret.shape[0] * ret.shape[1]

    for i in range(len(query)):
        m = i // ret.shape[1]
        n = i % ret.shape[1]
        y = query[i][0] // (zmax * xmax)
        z = (query[i][0] % (zmax * xmax)) // xmax
        x = (query[i][0] % (zmax * xmax)) % xmax
        ret[m][n] = [x, y, z]

    return ret 
Example 33
Project: pcml   Author: projectclarify   File: main.py    Apache License 2.0 5 votes vote down vote up
def build_kdtree(self):

    X = np.asarray([np.asarray(thing["embedding"]) for thing in self.data])

    self.kdt = KDTree(X, leaf_size=30, metric='euclidean') 
Example 34
Project: FAE   Author: salan668   File: ReliefF.py    GNU General Public License v3.0 5 votes vote down vote up
def fit(self, X, y):
        """Computes the feature importance scores from the training data.

        Parameters
        ----------
        X: array-like {n_samples, n_features}
            Training instances to compute the feature importance scores from
        y: array-like {n_samples}
            Training labels
        }

        Returns
        -------
        None

        """
        self.feature_scores = np.zeros(X.shape[1])
        self.tree = KDTree(X)

        for source_index in range(X.shape[0]):
            distances, indices = self.tree.query(
                X[source_index].reshape(1, -1), k=self.n_neighbors+1)

            # Nearest neighbor is self, so ignore first match
            indices = indices[0][1:]

            # Create a binary array that is 1 when the source and neighbor
            #  match and -1 everywhere else, for labels and features..
            labels_match = np.equal(y[source_index], y[indices]) * 2. - 1.
            features_match = np.equal(X[source_index], X[indices]) * 2. - 1.

            # The change in feature_scores is the dot product of these  arrays
            self.feature_scores += np.dot(features_match.T, labels_match)

        self.top_features = np.argsort(self.feature_scores)[::-1] 
Example 35
Project: LidarPC-KDTree   Author: UP-RS-ESP   File: compare_KDTree_implementations_python.py    GNU General Public License v3.0 5 votes vote down vote up
def pc_generate_sklearnKDTree(pc_xyz):
    #conda install scikit-learn
    try:
        from sklearn.neighbors import KDTree as sklearnKDTree
    except ImportError:
        raise pc_generate_sklearnKDTree("sklearn not installed.")
    pc_xyz_sklearnKDTree_tree = sklearnKDTree(pc_xyz)
    return pc_xyz_sklearnKDTree_tree 
Example 36
Project: pynndescent   Author: lmcinnes   File: test_pynndescent_.py    BSD 2-Clause "Simplified" License 5 votes vote down vote up
def test_deduplicated_data_behaves_normally():
    this_dir = os.path.dirname(os.path.abspath(__file__))
    data_path = os.path.join(this_dir, "test_data/cosine_hang.npy")
    data = np.unique(np.load(data_path), axis=0)
    data = data[~np.all(data == 0, axis=1)]
    data = data[:1000]

    n_neighbors = 10
    knn_indices, _ = NNDescent(
        data, "cosine", {}, n_neighbors, random_state=np.random, n_trees=20
    )._neighbor_graph

    for i in range(data.shape[0]):
        assert_equal(
            len(knn_indices[i]),
            len(np.unique(knn_indices[i])),
            "Duplicate graph_indices in knn graph",
        )

    angular_data = normalize(data, norm="l2")
    tree = KDTree(angular_data)
    true_indices = tree.query(angular_data, n_neighbors, return_distance=False)

    num_correct = 0
    for i in range(data.shape[0]):
        num_correct += np.sum(np.in1d(true_indices[i], knn_indices[i]))

    proportion_correct = num_correct / (data.shape[0] * n_neighbors)
    assert_greater_equal(
        proportion_correct,
        0.95,
        "NN-descent did not get 95%" " accuracy on nearest neighbors",
    ) 
Example 37
Project: pySpatialTools   Author: tgquintela   File: sampling_from_points.py    MIT License 5 votes vote down vote up
def uniform_points_points_sampling(limits, points, n):
    """Select the spatial uniform points in the sample by sampling uniform
    spatial points and getting the nearest ones in the available ones.

    Parameters
    ----------
    limits: numpy.ndarray, shape (2, 2)
        the limits of the space. There is the square four limits which defines
        the whole retrievable region.
    points: numpy.ndarray
        the points in the space selected.
    n: int
        the number of samples we want.

    Returns
    -------
    indices: numpy.ndarray, shape(n)
        the indices of the samples.

    """

    ## 0. Initialize retriever
    retriever = KDTree(points)
    ## 1. Compute spatial uniform points
    points_s = uniform_points_sampling(limits, n)
    ## 2. Get the nearest points in the sample
    result = retriever.query(points_s, k=1)
    indices = result[1]
    indices = indices.astype(int)
    return indices 
Example 38
Project: pySpatialTools   Author: tgquintela   File: utils.py    MIT License 5 votes vote down vote up
def match_regions(polygons, regionlocs, n_dim=2):
    """

    Parameters
    ----------
    polygons: list or array_like
        the polygons information.
    regionlocs: array_like
        the location information of the regions.
    n_dim: integer
        the number of dimensions.

    Returns
    -------
    assign_r: array_like
        the assignated regions.
    """
    n = len(polygons)
    centroids = np.zeros((n, n_dim))
    for i in xrange(n):
        centroids[i, :] = np.array(polygons[i])
    ret = KDTree(regionlocs)
    assign_r = np.zeros(n).astype(int)
    for i in xrange(n):
        assign_r[i] = ret.query(centroids[[i]])[1][0]
    return assign_r 
Example 39
Project: pySpatialTools   Author: tgquintela   File: bisectordiscretization.py    MIT License 5 votes vote down vote up
def __init__(self, r_points, regions_id):
        """The bisector discretizor needs the regionlocs points and the
        region ids of these points.
        """
        self._initialization()
        assert len(r_points) == len(regions_id)
        self.regionlocs = r_points
        self.regions_id = regions_id
        self._compute_limits()
        self.regionretriever = KDTree(r_points) 
Example 40
Project: EvoMSA   Author: INGEOTEC   File: model.py    Apache License 2.0 5 votes vote down vote up
def init(self, corpus):
        """Initial model"""

        words = self.tokens(corpus)
        self._weight = np.ones(len(words))
        # key = self.semantic_space._text
        X = self.semantic_space.transform([str(x) for x in words])
        self._kdtree = KDTree(X, metric='manhattan')
        w = self.entropy(self.transform(corpus), corpus, ntokens=X.shape[0])
        w = np.where(w > self.threshold)[0]
        self._kdtree = KDTree(X[w], metric='manhattan')
        self._weight = self._weight[w]
        self._id2token = [words[x] for x in w]
        self.compute_idf(self.transform(corpus)) 
Example 41
Project: EvoMSA   Author: INGEOTEC   File: align.py    Apache License 2.0 5 votes vote down vote up
def projection(model_from, model_to, text_from, text_to):
    """
    Compute the coefficients to project the output of a Emoji Space in the origin language to the objetive language

    :param lang_from: Origin model
    :type lang_from: str
    :param lang_to: Objective model
    :type lang_to: str [ar|en|es]
    :param text_from: Text in the origin language
    :type text_from: list
    :param text_from: Text in the objective language
    :type text_from: list
    """

    from microtc.utils import load_model
    import numpy as np
    from sklearn.neighbors import KDTree
    model_from = load_model(model_from)
    model_to = load_model(model_to)
    vec_from = model_from.transform(text_from)
    vec_to = model_to.transform(text_to)
    done = set()
    output = []
    X = []
    kdtree = KDTree(vec_to, metric='manhattan')
    ss = kdtree.query(vec_from)[1].flatten()
    for k, j in tqdm(enumerate(ss)):
        if j in done:
            continue
        X.append(vec_from[k])
        output.append(vec_to[j])
        done.add(j)
    output = np.stack(output)
    X = np.stack(X)
    return np.linalg.lstsq(X, output, rcond=None)[0] 
Example 42
Project: pointnetvlad   Author: mikacuy   File: train_pointnetvlad.py    MIT License 5 votes vote down vote up
def get_random_hard_negatives(query_vec, random_negs, num_to_take):
    global TRAINING_LATENT_VECTORS

    latent_vecs=[]
    for j in range(len(random_negs)):
        latent_vecs.append(TRAINING_LATENT_VECTORS[random_negs[j]])
    
    latent_vecs=np.array(latent_vecs)
    nbrs = KDTree(latent_vecs)
    distances, indices = nbrs.query(np.array([query_vec]),k=num_to_take)
    hard_negs=np.squeeze(np.array(random_negs)[indices[0]])
    hard_negs= hard_negs.tolist()
    return hard_negs 
Example 43
Project: pointnetvlad   Author: mikacuy   File: evaluate.py    MIT License 5 votes vote down vote up
def get_recall(sess, ops, m, n):
    global DATABASE_VECTORS
    global QUERY_VECTORS

    database_output= DATABASE_VECTORS[m]
    queries_output= QUERY_VECTORS[n]

    print(len(queries_output))
    database_nbrs = KDTree(database_output)

    num_neighbors=25
    recall=[0]*num_neighbors

    top1_similarity_score=[]
    one_percent_retrieved=0
    threshold=max(int(round(len(database_output)/100.0)),1)

    num_evaluated=0
    for i in range(len(queries_output)):
        true_neighbors= QUERY_SETS[n][i][m]
        if(len(true_neighbors)==0):
            continue
        num_evaluated+=1
        distances, indices = database_nbrs.query(np.array([queries_output[i]]),k=num_neighbors)
        for j in range(len(indices[0])):
            if indices[0][j] in true_neighbors:
                if(j==0):
                    similarity= np.dot(queries_output[i],database_output[indices[0][j]])
                    top1_similarity_score.append(similarity)
                recall[j]+=1
                break
                
        if len(list(set(indices[0][0:threshold]).intersection(set(true_neighbors))))>0:
            one_percent_retrieved+=1

    one_percent_recall=(one_percent_retrieved/float(num_evaluated))*100
    recall=(np.cumsum(recall)/float(num_evaluated))*100
    print(recall)
    print(np.mean(top1_similarity_score))
    print(one_percent_recall)
    return recall, top1_similarity_score, one_percent_recall 
Example 44
Project: EmbedderSDR   Author: dizcza   File: npeet.py    MIT License 5 votes vote down vote up
def build_tree(points):
    if points.shape[1] >= 20:
        # for large dimensions, use BallTree
        return BallTree(points, metric='chebyshev')
    return KDTree(points, metric='chebyshev') 
Example 45
Project: combo   Author: yzhao062   File: classifier_des.py    BSD 2-Clause "Simplified" License 5 votes vote down vote up
def fit(self, X, y):
        """Fit classifier.

        Parameters
        ----------
        X : numpy array of shape (n_samples, n_features)
            The input samples.

        y : numpy array of shape (n_samples,), optional (default=None)
            The ground truth of the input samples (labels).
        """

        # Validate inputs X and y
        X, y = check_X_y(X, y)
        X = check_array(X)
        check_classification_targets(y)
        self._classes = len(np.unique(y))
        n_samples = X.shape[0]

        # save the train ground truth for evaluation purpose
        self.y_train_ = y

        # build KDTree out of training subspace
        self.tree_ = KDTree(X)

        self.y_train_predicted_ = np.zeros(
            [n_samples, self.n_base_estimators_])

        # train all base classifiers on X, and get their local predicted scores
        # iterate over all base classifiers
        for i, clf in enumerate(self.base_estimators):
            clf.fit(X, y)
            self.y_train_predicted_[:, i] = clf.predict(X)
            clf.fitted_ = True

        self.fitted_ = True

        return 
Example 46
Project: combo   Author: yzhao062   File: classifier_dcs.py    BSD 2-Clause "Simplified" License 5 votes vote down vote up
def fit(self, X, y):
        """Fit classifier.

        Parameters
        ----------
        X : numpy array of shape (n_samples, n_features)
            The input samples.

        y : numpy array of shape (n_samples,), optional (default=None)
            The ground truth of the input samples (labels).
        """

        # Validate inputs X and y
        X, y = check_X_y(X, y)
        X = check_array(X)
        check_classification_targets(y)
        self._classes = len(np.unique(y))
        n_samples = X.shape[0]

        # save the train ground truth for evaluation purpose
        self.y_train_ = y

        # build KDTree out of training subspace
        self.tree_ = KDTree(X)

        self.y_train_predicted_ = np.zeros(
            [n_samples, self.n_base_estimators_])

        # train all base classifiers on X, and get their local predicted scores
        # iterate over all base classifiers
        for i, clf in enumerate(self.base_estimators):
            clf.fit(X, y)
            self.y_train_predicted_[:, i] = clf.predict(X)
            clf.fitted_ = True

        self.fitted_ = True

        return 
Example 47
Project: pcay   Author: zpace   File: cmlr.py    MIT License 5 votes vote down vote up
def find_knn(pts0, eval_pts, k=15):
    '''
    find the points within `pts0` closest to `eval_pts`
    '''
    pts0range = (pts0.max(axis=0) - pts0.min(axis=0))
    neigh = KDTree(pts0 / pts0range)

    nni = neigh.query(eval_pts / pts0range, k=k, return_distance=False)
    return nni 
Example 48
Project: pyFTS   Author: PYFTS   File: knn.py    GNU General Public License v3.0 5 votes vote down vote up
def train(self, data, **kwargs):
        X,Y = self._prepare_xy(data)

        self.kdtree = KDTree(np.array(X))
        self.values = Y

        self.shortname = "kNN({})-{}".format(self.order, self.alpha) 
Example 49
Project: predictatops   Author: JustinGOSSES   File: wellsKNN.py    MIT License 5 votes vote down vote up
def kdtree(df_reduced, lat_col, long_col, leaf_size, k):
    """
    Takes in:
    Returns: 
    """
    position = df_reduced[[lat_col, long_col]]
    tree = neighbors.KDTree(position, leaf_size=leaf_size)
    dist, ind = tree.query([position][0], k=k)
    return tree, dist, ind 
Example 50
Project: fast-near-duplicate-image-search   Author: umbertogriffo   File: KDTreeFinder.py    Apache License 2.0 5 votes vote down vote up
def build_tree(self):
        print('Building the KDTree...')
        assert self.distance_metric in self.valid_metrics, "{} isn't a valid metric for KDTree.".format(
            self.distance_metric)

        hash_str_len = len(self.df_dataset.at[0, 'hash_list'])
        self.tree = KDTree(self.df_dataset[[str(i) for i in range(0, hash_str_len)]], leaf_size=self.leaf_size,
                           metric=self.distance_metric) 
Example 51
Project: Measure-Concentration   Author: xiaozhanguva   File: preliminary.py    MIT License 5 votes vote down vote up
def knn_graph(X, k, method='brute_force', leaf_size=30, metric='euclidean'):
    n, p = X.shape
    if method == 'kd_tree':
        if _HAS_SKLEARN:
            kdtree = _sknbr.KDTree(X, leaf_size=leaf_size, metric=metric)
            distances, neighbors = kdtree.query(X, k=k, return_distance=True,
                                                sort_results=True)
            radii = distances[:, -1]
        else:
            raise ImportError("The scikit-learn library could not be loaded." +
                              " It is required for the 'kd-tree' method.")

    if method == 'ball_tree':
        if _HAS_SKLEARN:
            btree = _sknbr.BallTree(X, leaf_size=leaf_size, metric=metric)
            distances, neighbors = btree.query(X, k=k, return_distance=True,
                                               sort_results=True)
            radii = distances[:, -1]
        else:
            raise ImportError("The scikit-learn library could not be loaded." +
                              " It is required for the 'ball-tree' method.")

    else:  # assume brute-force
        if not _HAS_SCIPY:
            raise ImportError("The 'scipy' module could not be loaded. " +
                              "It is required for the 'brute_force' method " +
                              "for building a knn similarity graph.")

        d = _spd.pdist(X, metric=metric)
        D = _spd.squareform(d)
        rank = np.argsort(D, axis=1)
        neighbors = rank[:, 0:k]
        k_nbr = neighbors[:, -1]
        radii = D[np.arange(n), k_nbr]

    return neighbors, radii 
Example 52
Project: nnsearch   Author: pkariz   File: KDTreeScikit.py    GNU General Public License v3.0 5 votes vote down vote up
def build(self, data, leaf_size=30, distance="euclidean", **kwargs):
        """Builds kdtree with specified parameters.
        :param data: Dataset instance representing data
        :param leaf_size: maximum size of a leaf
        :param distance: defines metric to be used, can be "euclidean" and other values of 'metric' parameter in
        scikit's kd-tree.
        """
        if not isinstance(data, Dataset):
            raise ValueError("Data parameter must be an instance of Dataset!")
        if data.data.dtype not in self.valid_types:
            raise ValueError("Invalid dtype of numpy array, check valid_types parameter of index!")
        self.index = KDTree(data.data, leaf_size=leaf_size, metric=distance, **kwargs)
        self.size = len(data.data)
        return self.index 
Example 53
Project: ENN   Author: timo-stoettner   File: enn.py    MIT License 5 votes vote down vote up
def buildDistanceMap (self, X, Y):
        classes = np.unique(Y)
        nClasses = len(classes)
        tree = KDTree(X)
        nRows = X.shape[0]

        TSOri = np.array([]).reshape(0,self.k)

        distanceMap = np.array([]).reshape(0,self.k)
        labels = np.array([]).reshape(0,self.k)

        for row in range(nRows):
            distances, indicesOfNeighbors = tree.query(X[row].reshape(1,-1), k = self.k+1)

            distances = distances[0][1:]
            indicesOfNeighbors = indicesOfNeighbors[0][1:]

            distanceMap = np.append(distanceMap, np.array(distances).reshape(1,self.k), axis=0)
            labels = np.append(labels, np.array(Y[indicesOfNeighbors]).reshape(1,self.k),axis=0)

        for c in classes:
            nTraining = np.sum(Y == c)
            labelTmp = labels[Y.ravel() == c,:]

            tmpKNNClass = labelTmp.ravel()
            TSOri = np.append(TSOri, len(tmpKNNClass[tmpKNNClass == c]) / (nTraining*float(self.k)))

        return distanceMap, labels, TSOri 
Example 54
Project: zero-shot-learning   Author: cetinsamet   File: detect_object.py    MIT License 5 votes vote down vote up
def main(argv):

    if len(argv) != 1:
        print("Usage: python3 detect_object.py input-image-path")
        exit()

    # READ IMAGE
    IMAGEPATH = argv[0]
    img         = Image.open(IMAGEPATH).resize((224, 224))

    # LOAD PRETRAINED VGG16 MODEL FOR FEATURE EXTRACTION
    vgg_model   = get_model()
    # EXTRACT IMAGE FEATURE
    img_feature = get_features(vgg_model, img)
    # L2 NORMALIZE FEATURE
    img_feature = normalize(img_feature, norm='l2')

    # LOAD ZERO-SHOT MODEL
    model       = load_keras_model(model_path=MODELPATH)
    # MAKE PREDICTION
    pred        = model.predict(img_feature)

    # LOAD CLASS WORD2VECS
    class_vectors       = sorted(np.load(WORD2VECPATH), key=lambda x: x[0])
    classnames, vectors = zip(*class_vectors)
    classnames          = list(classnames)
    vectors             = np.asarray(vectors, dtype=np.float)

    # PLACE WORD2VECS IN KDTREE
    tree                = KDTree(vectors)
    # FIND CLOSEST WORD2VEC and GET PREDICTION RESULT
    dist, index         = tree.query(pred, k=5)
    pred_labels         = [classnames[idx] for idx in index[0]]

    # PRINT RESULT
    print()
    print("--- Top-5 Prediction ---")
    for i, classname in enumerate(pred_labels):
        print("%d- %s" %(i+1, classname))
    print()
    return 
Example 55
Project: PVGeo   Author: OpenGeoVis   File: subset.py    BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _query(topo_points, data_points):
        """Querrys the data points for their closest point on the topography
        surface"""
        try:
            # sklearn's KDTree is faster: use it if available
            from sklearn.neighbors import KDTree as Tree
        except ImportError:
            from scipy.spatial import cKDTree as Tree
        tree = Tree(topo_points)
        i = tree.query(data_points)[1].ravel()
        return topo_points[i] 
Example 56
Project: PVGeo   Author: OpenGeoVis   File: slicing.py    BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _get_planes(self, pts):
        """Internal helper to generate planes for the slices"""
        try:
            # sklearn's KDTree is faster: use it if available
            from sklearn.neighbors import KDTree as Tree
        except ImportError:
            from scipy.spatial import cKDTree as Tree
        if self.get_number_of_slices() == 0:
            return []
        # Get the Points over the NumPy interface
        wpdi = dsa.WrapDataObject(pts) # NumPy wrapped points
        points = np.array(wpdi.Points) # New NumPy array of points so we dont destroy input
        numPoints = pts.GetNumberOfPoints()
        if self.__useNearestNbr:
            tree = Tree(points)
            ptsi = tree.query([points[0]], k=numPoints)[1].ravel()
        else:
            ptsi = [i for i in range(numPoints)]

        # Iterate of points in order (skips last point):
        planes = []
        for i in range(0, numPoints - 1, numPoints//self.get_number_of_slices()):
            # get normal
            pts1 = points[ptsi[i]]
            pts2 = points[ptsi[i+1]]
            x1, y1, z1 = pts1[0], pts1[1], pts1[2]
            x2, y2, z2 = pts2[0], pts2[1], pts2[2]
            normal = [x2-x1,y2-y1,z2-z1]
            # create plane
            plane = self._generate_plane([x1,y1,z1], normal)
            planes.append(plane)

        return planes 
Example 57
Project: ratvec   Author: ratvec   File: classifiers.py    Apache License 2.0 5 votes vote down vote up
def fit(self, x, y):
        """Fit."""
        self.tree = neighbors.KDTree(x)
        self.y_rep = np.array(y)
        return self 
Example 58
Project: RPM-Net   Author: Salingo   File: cluster.py    MIT License 5 votes vote down vote up
def GroupMergingSimDist(pcpos, simmat_raw, mov_seg):
    # simmat: (N,N) of 0/1 di
    # mov_seg: (N) of 0/1 digits
    num_point = simmat_raw.shape[0]
    refpos = np.where(mov_seg==0)[0]
    movpos = np.where(mov_seg==1)[0]
    refpts = pcpos[refpos]
    movpts = pcpos[movpos]

    grp_threshhold = 80
    iou_threshold = .5
    min_points = 64
    group_seg = np.zeros(num_point,dtype=np.int32)
    simmat = (simmat_raw>grp_threshhold).astype(np.int32)
    movmat = simmat[np.ix_(movpos,movpos)]

    clustering = DBSCAN(eps=10, min_samples=min_points).fit(movmat)
    raw_group_seg = clustering.labels_
    goodindex = np.where(raw_group_seg!=-1)[0]
    badindex = np.where(raw_group_seg==-1)[0]
    if len(badindex)>0:
        kdtree = KDTree(movpts[goodindex], leaf_size=10)
        dist, nnindices = kdtree.query(movpts[badindex], k=1)
        for i, nnindex in enumerate(nnindices):
            raw_group_seg[badindex[i]] = raw_group_seg[ goodindex[nnindex[0]] ]
        assert((raw_group_seg==-1).sum()==0)
    group_seg[movpos] = raw_group_seg+1
    proposals = []
    for i in range(group_seg.max()+1):
        proposals.append((group_seg==i).astype(np.int32))
    proposals = np.array(proposals)

    return group_seg, proposals 
Example 59
Project: ReliefF   Author: gitter-badger   File: ReliefF.py    MIT License 5 votes vote down vote up
def fit(self, X, y):
        """Computes the feature importance scores from the training data.

        Parameters
        ----------
        X: array-like {n_samples, n_features}
            Training instances to compute the feature importance scores from
        y: array-like {n_samples}
            Training labels

        Returns
        -------
        None

        """
        self.feature_scores = np.zeros(X.shape[1])
        self.tree = KDTree(X)

        for source_index in range(X.shape[0]):
            distances, indices = self.tree.query(X[source_index].reshape(1, -1), k=self.n_neighbors + 1)

            # First match is self, so ignore it
            for neighbor_index in indices[0][1:]:
                similar_features = X[source_index] == X[neighbor_index]
                label_match = y[source_index] == y[neighbor_index]

                # If the labels match, then increment features that match and decrement features that do not match
                # Do the opposite if the labels do not match
                if label_match:
                    self.feature_scores[similar_features] += 1.
                    self.feature_scores[~similar_features] -= 1.
                else:
                    self.feature_scores[~similar_features] += 1.
                    self.feature_scores[similar_features] -= 1.
        
        self.top_features = np.argsort(self.feature_scores)[::-1] 
Example 60
Project: dyneusr   Author: braindynamicslab   File: utils.py    BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def optimize_eps(X, k=3, p=100.0, **kwargs):
    """ Get optimized value for eps based on data. 

    Parameters
    ----------
    k: int
        * calculate distance to k-th nearest neighbor

    p: float 
        * threshold percentage to keep

    Returns
    -------
    eps: float
        * a parameter for DBSCAN

    """
    from sklearn.neighbors import KDTree

    # Use 'minkowski', p=2 (i.e. euclidean metric)
    tree = KDTree(X, metric='minkowski', p=2, leaf_size=15)

    # Query k nearest-neighbors for X, not including self
    dist, ind = tree.query(X, k=k+1)

    # Find eps s.t. % of points within eps of k nearest-neighbor 
    eps = np.percentile(dist[:, k], p)
    return eps 
Example 61
Project: dyneusr   Author: braindynamicslab   File: utils.py    BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def density_filter(X, k=2, inverse=True, normalize=True, **kwargs):
    """ Return function that filters the data by codensity. 

    Parameters
    ----------
    k: int
        * calculate distance to k-th nearest neighbor

    p: float 
        * threshold percentage to keep

    Returns
    -------
    indices: tuple of np.ndarrays
        * indices of core points in the data set

    """
    from sklearn.neighbors import KDTree

    # Use 'minkowski', p=2 (i.e. euclidean metric)
    tree = KDTree(X, metric='minkowski', p=2, leaf_size=15)

    # Query k nearest-neighbors for X, not including self
    dist, ind = tree.query(X, k=k+1)

    # Extract k nearest neighbors
    dens = dist[:, 1:]

    # Calculate codensity, inverse of k nearest-neighbor dists
    if inverse is True:
        dens = 1.0 / dens

    # Normalize
    if normalize is True:
        dens /= dens.max(axis=0) 

    return dens 
Example 62
Project: dyneusr   Author: braindynamicslab   File: utils.py    BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def density_filtered_indices(X, k=15, p=90.0, **kwargs):
    """ Get sample indices based on a density filtration. 

    Parameters
    ----------
    k: int
        * calculate distance to k-th nearest neighbor

    p: float 
        * threshold percentage to keep

    Returns
    -------
    indices: tuple of np.ndarrays
        * indices of core points in the data set

    """
    from sklearn.neighbors import KDTree

    # Use 'minkowski', p=2 (i.e. euclidean metric)
    tree = KDTree(X, metric='minkowski', p=2, leaf_size=15)

    # Query k nearest-neighbors for X, not including self
    dist, ind = tree.query(X, k=k+1)

    # Find max_dist s.t. % of points within max_dist of k nearest-neighbor 
    max_dist = np.percentile(dist[:, k], p)

    # Return a mask over the data based on dist 
    indices = np.where(dist[:, k] <= max_dist)
    return indices 
Example 63
Project: Anamoly-Detection   Author: msmsk05   File: lscp.py    BSD 2-Clause "Simplified" License 4 votes vote down vote up
def _get_local_region(self, X_test_norm):
        """ Get local region for each test instance

        Parameters
        ----------
        X_test_norm : numpy array, shape (n_samples, n_features)
            Normalized test data

        Returns
        -------
        final_local_region_list : List of lists, shape [n_samples, [local_region]]
            Indices of training samples in the local region of each test sample
        """

        # Initialize the local region list
        local_region_list = [[]] * X_test_norm.shape[0]

        if self.local_max_features > 1.0:
            warnings.warn(
                "Local max features greater than 1.0, reducing to 1.0")
            self.local_max_features = 1.0

        # perform multiple iterations
        for _ in range(self.local_region_iterations):

            # randomly generate feature subspaces
            features = generate_bagging_indices(
                self.random_state,
                bootstrap_features=False,
                n_features=self.X_train_norm_.shape[1],
                min_features=int(
                    self.X_train_norm_.shape[1] * self.local_min_features),
                max_features=int(
                    self.X_train_norm_.shape[1] * self.local_max_features))

            # build KDTree out of training subspace
            tree = KDTree(self.X_train_norm_[:, features])

            # Find neighbors of each test instance
            _, ind_arr = tree.query(X_test_norm[:, features],
                                    k=self.local_region_size)

            # add neighbors to local region list
            for j in range(X_test_norm.shape[0]):
                local_region_list[j] = local_region_list[j] + \
                                       ind_arr[j, :].tolist()

        # keep nearby points which occur at least local_region_threshold times
        final_local_region_list = [[]] * X_test_norm.shape[0]
        for j in range(X_test_norm.shape[0]):
            final_local_region_list[j] = [item for item, count in
                                          collections.Counter(
                                              local_region_list[j]).items() if
                                          count > self.local_region_threshold]

        return final_local_region_list 
Example 64
Project: pySpatialTools   Author: tgquintela   File: utils.py    MIT License 4 votes vote down vote up
def tesselation(regionlocs):
    """Function to create a tesselation from the regionlocs.

    Parameters
    ----------
    regionlocs: array_like
        the spatial locations that define each region considered.

    Returns
    -------
    polygons: shapely.Polygon
        the polygon object which contains the information to define it as
        a polygon.

    """
    vor = Voronoi(regionlocs)
    lines = []
    for line in vor.ridge_vertices:
        if -1 not in line:
            lines.append(shapely.geometry.LineString(vor.vertices[line]))
    pols = ops.polygonize(lines)
    polygons = [poly for poly in pols]
    return polygons

#from pySpatialTools.Retrieve.Discretization.spatialdiscretizer import \
#    SpatialDiscretizor

#from polygondiscretization import fit_polygondiscretizer
#    def transform2polygondiscretizer(self):
#        discretizer = fit_polygondiscretizer(self.regionlocs, self.regions_id)
#        return discretizer

    ##################### Definition of particularities #######################
    ###########################################################################
#    def fit_spatialmodel(self, data):
#        """Fit regions from distribution of tagged points in space.
#        TODO: Tesselation.
#        """
#        self.regionlocs, self.borders = somefunction(data)


#from sklearn.neighbors import KDTree
#from scipy.spatial.distance import cdist
#from pythonUtils.parallel_tools import distribute_tasks 
Example 65
Project: pointnetvlad   Author: mikacuy   File: generate_test_sets.py    MIT License 4 votes vote down vote up
def construct_query_and_database_sets(base_path, runs_folder, folders, pointcloud_fols, filename, p, output_name):
	database_trees=[]
	test_trees=[]
	for folder in folders:
		print(folder)
		df_database= pd.DataFrame(columns=['file','northing','easting'])
		df_test= pd.DataFrame(columns=['file','northing','easting'])
		
		df_locations= pd.read_csv(os.path.join(base_path,runs_folder,folder,filename),sep=',')
		# df_locations['timestamp']=runs_folder+folder+pointcloud_fols+df_locations['timestamp'].astype(str)+'.bin'
		# df_locations=df_locations.rename(columns={'timestamp':'file'})
		for index, row in df_locations.iterrows():
			#entire business district is in the test set
			if(output_name=="business"):
				df_test=df_test.append(row, ignore_index=True)
			elif(check_in_test_set(row['northing'], row['easting'], p, x_width, y_width)):
				df_test=df_test.append(row, ignore_index=True)
			df_database=df_database.append(row, ignore_index=True)

		database_tree = KDTree(df_database[['northing','easting']])
		test_tree = KDTree(df_test[['northing','easting']])
		database_trees.append(database_tree)
		test_trees.append(test_tree)

	test_sets=[]
	database_sets=[]
	for folder in folders:
		database={}
		test={} 
		df_locations= pd.read_csv(os.path.join(base_path,runs_folder,folder,filename),sep=',')
		df_locations['timestamp']=runs_folder+folder+pointcloud_fols+df_locations['timestamp'].astype(str)+'.bin'
		df_locations=df_locations.rename(columns={'timestamp':'file'})
		for index,row in df_locations.iterrows():				
			#entire business district is in the test set
			if(output_name=="business"):
				test[len(test.keys())]={'query':row['file'],'northing':row['northing'],'easting':row['easting']}
			elif(check_in_test_set(row['northing'], row['easting'], p, x_width, y_width)):
				test[len(test.keys())]={'query':row['file'],'northing':row['northing'],'easting':row['easting']}
			database[len(database.keys())]={'query':row['file'],'northing':row['northing'],'easting':row['easting']}
		database_sets.append(database)
		test_sets.append(test)		

	for i in range(len(database_sets)):
		tree=database_trees[i]
		for j in range(len(test_sets)):
			if(i==j):
				continue
			for key in range(len(test_sets[j].keys())):
				coor=np.array([[test_sets[j][key]["northing"],test_sets[j][key]["easting"]]])
				index = tree.query_radius(coor, r=25)
				#indices of the positive matches in database i of each query (key) in test set j
				test_sets[j][key][i]=index[0].tolist()

	output_to_file(database_sets, output_name+'_evaluation_database.pickle')
	output_to_file(test_sets, output_name+'_evaluation_query.pickle')

###Building database and query files for evaluation 
Example 66
Project: masif   Author: LPDI-EPFL   File: eval_zrank.py    Apache License 2.0 4 votes vote down vote up
def test_alignment(target_pdb_fn, source_pdb_fn, aligned_pdb_fn, interface_dist = 10.0):
    parser = PDBParser()
    target_struct = parser.get_structure(target_pdb_fn, target_pdb_fn)
    target_coord = np.asarray([atom.get_coord() for atom in target_struct.get_atoms() if atom.get_id() == 'CA'])
    target_atom = [atom for atom in target_struct.get_atoms() if atom.get_id() == 'CA']

    source_struct = parser.get_structure(source_pdb_fn, source_pdb_fn)
    source_coord = np.asarray([atom.get_coord() for atom in source_struct.get_atoms() if atom.get_id() == 'CA'])
    source_atom = [atom for atom in source_struct.get_atoms() if atom.get_id() == 'CA']

    aligned_struct = parser.get_structure(aligned_pdb_fn, aligned_pdb_fn)

    # The following code was replaced by the sklearn code above. I leave it here for comparison purposes
#    flann = pyflann.FLANN()
#    r, d = flann.nn(target_coord, source_coord)
#    d = np.sqrt(d) 

    # Find interface atoms in source. 
    kdt = KDTree(target_coord)
    # For each element in source_coord, find the closest CA atom in target_coord. If it is within interface_dist, then it is interface.
    d, r = kdt.query(source_coord)
    # d is of size source_coord
    # Those atoms in d with less than interface_dist, are interface.
    int_at_ix = np.where(d < interface_dist)[0]
    dists = []
    for at_ix in int_at_ix: 
        res_id = source_atom[at_ix].get_parent().get_id()
        chain_id = source_atom[at_ix].get_parent().get_parent().get_id()
        d = aligned_struct[0][chain_id][res_id]['CA'] - source_atom[at_ix]
        dists.append(d)

    rmsd_source  = np.sqrt(np.mean(np.square(dists)))

    # ZDock sometimes swaps receptor and ligand. So our target could be the actual moving one. Therefore we compute the rmsd of the target as well 
    kdt = KDTree(source_coord)
    # For each element in target_coord, find the closest CA atom in source_coord. If it is within interface_dist, then it is interface.
    d, r = kdt.query(target_coord)
    # d is of size source_coord
    # Those atoms in d with less than interface_dist, are interface.
    int_at_ix = np.where(d < interface_dist)[0]
    dists = []
    for at_ix in int_at_ix: 
        res_id = target_atom[at_ix].get_parent().get_id()
        chain_id = target_atom[at_ix].get_parent().get_parent().get_id()
        d = aligned_struct[0][chain_id][res_id]['CA'] - target_atom[at_ix]
        dists.append(d)

    rmsd_target = np.sqrt(np.mean(np.square(dists)))

    # One of the two should be zero, as this was the static one. 
    assert (min(rmsd_source, rmsd_target) < 1e-8)
    

    return max(rmsd_source, rmsd_target)

# ppi_pair_id: pair of proteins in PDBID_CHAIN1_CHAIN2 format 
Example 67
Project: masif   Author: LPDI-EPFL   File: eval_zdock.py    Apache License 2.0 4 votes vote down vote up
def test_alignment(target_pdb_fn, source_pdb_fn, aligned_pdb_fn, interface_dist = 10.0):
    parser = PDBParser()
    target_struct = parser.get_structure(target_pdb_fn, target_pdb_fn)
    target_coord = np.asarray([atom.get_coord() for atom in target_struct.get_atoms() if atom.get_id() == 'CA'])
    target_atom = [atom for atom in target_struct.get_atoms() if atom.get_id() == 'CA']

    source_struct = parser.get_structure(source_pdb_fn, source_pdb_fn)
    source_coord = np.asarray([atom.get_coord() for atom in source_struct.get_atoms() if atom.get_id() == 'CA'])
    source_atom = [atom for atom in source_struct.get_atoms() if atom.get_id() == 'CA']

    aligned_struct = parser.get_structure(aligned_pdb_fn, aligned_pdb_fn)

    # The following code was replaced by the sklearn code above. I leave it here for comparison purposes
#    flann = pyflann.FLANN()
#    r, d = flann.nn(target_coord, source_coord)
#    d = np.sqrt(d) 

    # Find interface atoms in source. 
    kdt = KDTree(target_coord)
    # For each element in source_coord, find the closest CA atom in target_coord. If it is within interface_dist, then it is interface.
    d, r = kdt.query(source_coord)
    # d is of size source_coord
    # Those atoms in d with less than interface_dist, are interface.
    int_at_ix = np.where(d < interface_dist)[0]
    dists = []
    for at_ix in int_at_ix: 
        res_id = source_atom[at_ix].get_parent().get_id()
        chain_id = source_atom[at_ix].get_parent().get_parent().get_id()
        d = aligned_struct[0][chain_id][res_id]['CA'] - source_atom[at_ix]
        dists.append(d)

    rmsd_source  = np.sqrt(np.mean(np.square(dists)))

    # ZDock sometimes swaps receptor and ligand. So our target could be the actual moving one. Therefore we compute the rmsd of the target. 
    kdt = KDTree(source_coord)
    # For each element in target_coord, find the closest CA atom in source_coord. If it is within interface_dist, then it is interface.
    d, r = kdt.query(target_coord)
    # d is of size source_coord
    # Those atoms in d with less than interface_dist, are interface.
    int_at_ix = np.where(d < interface_dist)[0]
    dists = []
    for at_ix in int_at_ix: 
        res_id = target_atom[at_ix].get_parent().get_id()
        chain_id = target_atom[at_ix].get_parent().get_parent().get_id()
        d = aligned_struct[0][chain_id][res_id]['CA'] - target_atom[at_ix]
        dists.append(d)

    rmsd_target = np.sqrt(np.mean(np.square(dists)))

    # One of the two should be zero
    assert (min(rmsd_source, rmsd_target) < 1e-8)
    

    return max(rmsd_source, rmsd_target)

# ppi_pair_id: pair of proteins in PDBID_CHAIN1_CHAIN2 format 
Example 68
Project: masif   Author: LPDI-EPFL   File: eval_zrank.py    Apache License 2.0 4 votes vote down vote up
def test_alignment(target_pdb_fn, source_pdb_fn, aligned_pdb_fn, interface_dist = 10.0):
    parser = PDBParser()
    target_struct = parser.get_structure(target_pdb_fn, target_pdb_fn)
    target_coord = np.asarray([atom.get_coord() for atom in target_struct.get_atoms() if atom.get_id() == 'CA'])
    target_atom = [atom for atom in target_struct.get_atoms() if atom.get_id() == 'CA']

    source_struct = parser.get_structure(source_pdb_fn, source_pdb_fn)
    source_coord = np.asarray([atom.get_coord() for atom in source_struct.get_atoms() if atom.get_id() == 'CA'])
    source_atom = [atom for atom in source_struct.get_atoms() if atom.get_id() == 'CA']

    aligned_struct = parser.get_structure(aligned_pdb_fn, aligned_pdb_fn)

    # The following code was replaced by the sklearn code above. I leave it here for comparison purposes
#    flann = pyflann.FLANN()
#    r, d = flann.nn(target_coord, source_coord)
#    d = np.sqrt(d) 

    # Find interface atoms in source. 
    kdt = KDTree(target_coord)
    # For each element in source_coord, find the closest CA atom in target_coord. If it is within interface_dist, then it is interface.
    d, r = kdt.query(source_coord)
    # d is of size source_coord
    # Those atoms in d with less than interface_dist, are interface.
    int_at_ix = np.where(d < interface_dist)[0]
    dists = []
    for at_ix in int_at_ix: 
        res_id = source_atom[at_ix].get_parent().get_id()
        chain_id = source_atom[at_ix].get_parent().get_parent().get_id()
        d = aligned_struct[0][chain_id][res_id]['CA'] - source_atom[at_ix]
        dists.append(d)

    rmsd_source  = np.sqrt(np.mean(np.square(dists)))

    # ZDock sometimes swaps receptor and ligand. So our target could be the actual moving one. Therefore we compute the rmsd of the target as well 
    kdt = KDTree(source_coord)
    # For each element in target_coord, find the closest CA atom in source_coord. If it is within interface_dist, then it is interface.
    d, r = kdt.query(target_coord)
    # d is of size source_coord
    # Those atoms in d with less than interface_dist, are interface.
    int_at_ix = np.where(d < interface_dist)[0]
    dists = []
    for at_ix in int_at_ix: 
        res_id = target_atom[at_ix].get_parent().get_id()
        chain_id = target_atom[at_ix].get_parent().get_parent().get_id()
        d = aligned_struct[0][chain_id][res_id]['CA'] - target_atom[at_ix]
        dists.append(d)

    rmsd_target = np.sqrt(np.mean(np.square(dists)))

    # One of the two should be zero, as this was the static one. 
    assert (min(rmsd_source, rmsd_target) < 1e-8)
    

    return max(rmsd_source, rmsd_target)

# ppi_pair_id: pair of proteins in PDBID_CHAIN1_CHAIN2 format 
Example 69
Project: masif   Author: LPDI-EPFL   File: eval_zdock.py    Apache License 2.0 4 votes vote down vote up
def test_alignment(target_pdb_fn, source_pdb_fn, aligned_pdb_fn, interface_dist = 10.0):
    parser = PDBParser()
    target_struct = parser.get_structure(target_pdb_fn, target_pdb_fn)
    target_coord = np.asarray([atom.get_coord() for atom in target_struct.get_atoms() if atom.get_id() == 'CA'])
    target_atom = [atom for atom in target_struct.get_atoms() if atom.get_id() == 'CA']

    source_struct = parser.get_structure(source_pdb_fn, source_pdb_fn)
    source_coord = np.asarray([atom.get_coord() for atom in source_struct.get_atoms() if atom.get_id() == 'CA'])
    source_atom = [atom for atom in source_struct.get_atoms() if atom.get_id() == 'CA']

    aligned_struct = parser.get_structure(aligned_pdb_fn, aligned_pdb_fn)

    # The following code was replaced by the sklearn code above. I leave it here for comparison purposes
#    flann = pyflann.FLANN()
#    r, d = flann.nn(target_coord, source_coord)
#    d = np.sqrt(d) 

    # Find interface atoms in source. 
    kdt = KDTree(target_coord)
    # For each element in source_coord, find the closest CA atom in target_coord. If it is within interface_dist, then it is interface.
    d, r = kdt.query(source_coord)
    # d is of size source_coord
    # Those atoms in d with less than interface_dist, are interface.
    int_at_ix = np.where(d < interface_dist)[0]
    dists = []
    for at_ix in int_at_ix: 
        res_id = source_atom[at_ix].get_parent().get_id()
        chain_id = source_atom[at_ix].get_parent().get_parent().get_id()
        d = aligned_struct[0][chain_id][res_id]['CA'] - source_atom[at_ix]
        dists.append(d)

    rmsd_source  = np.sqrt(np.mean(np.square(dists)))

    # ZDock sometimes swaps receptor and ligand. So our target could be the actual moving one. Therefore we compute the rmsd of the target. 
    kdt = KDTree(source_coord)
    # For each element in target_coord, find the closest CA atom in source_coord. If it is within interface_dist, then it is interface.
    d, r = kdt.query(target_coord)
    # d is of size source_coord
    # Those atoms in d with less than interface_dist, are interface.
    int_at_ix = np.where(d < interface_dist)[0]
    dists = []
    for at_ix in int_at_ix: 
        res_id = target_atom[at_ix].get_parent().get_id()
        chain_id = target_atom[at_ix].get_parent().get_parent().get_id()
        d = aligned_struct[0][chain_id][res_id]['CA'] - target_atom[at_ix]
        dists.append(d)

    rmsd_target = np.sqrt(np.mean(np.square(dists)))

    # One of the two should be zero
    assert (min(rmsd_source, rmsd_target) < 1e-8)
    

    return max(rmsd_source, rmsd_target)

# ppi_pair_id: pair of proteins in PDBID_CHAIN1_CHAIN2 format 
Example 70
Project: masif   Author: LPDI-EPFL   File: evalPatchDock.py    Apache License 2.0 4 votes vote down vote up
def test_alignment(target_pdb_fn, source_pdb_fn, aligned_pdb_fn, interface_dist = 10.0):
    parser = PDBParser()
    target_struct = parser.get_structure(target_pdb_fn, target_pdb_fn)
    target_coord = np.asarray([atom.get_coord() for atom in target_struct.get_atoms() if atom.get_id() == 'CA'])

    source_struct = parser.get_structure(source_pdb_fn, source_pdb_fn)
    source_coord = np.asarray([atom.get_coord() for atom in source_struct.get_atoms() if atom.get_id() == 'CA'])
    source_atom = [atom for atom in source_struct.get_atoms() if atom.get_id() == 'CA']

    aligned_struct = parser.get_structure(aligned_pdb_fn, aligned_pdb_fn)

    # The following code was replaced by the sklearn code above. I leave it here for comparison purposes
    #    flann = pyflann.FLANN()
    #    r, d = flann.nn(target_coord, source_coord)
    #    d = np.sqrt(d
    #flann = pyflann.FLANN()
    #r, d = flann.nn(target_coord, source_coord)
    #d = np.sqrt(d) 
 
    # For each element in source_coord, find the closest CA atom in target_coord. If it is within interface_dist, then it is interface.
    kdt = KDTree(target_coord)
    d, r = kdt.query(source_coord)
    # d is of size source_coord
    # Those atoms in d with less than interface_dist, are interface.
    int_at_ix = np.where(d < interface_dist)[0]

    dists = []
    for at_ix in int_at_ix: 
        res_id = source_atom[at_ix].get_parent().get_id()
        chain_id = source_atom[at_ix].get_parent().get_parent().get_id()
        try:
            d = aligned_struct[0][chain_id][res_id]['CA'] - source_atom[at_ix]
        except:
            print("Failed on {} {}".format(source_pdb_fn, aligned_pdb_fn))
            sys.exit(1)
        dists.append(d)

    rmsd = np.sqrt(np.mean(np.square(dists)))

    return rmsd

# Go to directory 
Example 71
Project: Weiss   Author: WangWenjun559   File: test_neighbors.py    Apache License 2.0 4 votes vote down vote up
def test_neighbors_metrics(n_samples=20, n_features=3,
                           n_query_pts=2, n_neighbors=5):
    # Test computing the neighbors for various metrics
    # create a symmetric matrix
    V = rng.rand(n_features, n_features)
    VI = np.dot(V, V.T)

    metrics = [('euclidean', {}),
               ('manhattan', {}),
               ('minkowski', dict(p=1)),
               ('minkowski', dict(p=2)),
               ('minkowski', dict(p=3)),
               ('minkowski', dict(p=np.inf)),
               ('chebyshev', {}),
               ('seuclidean', dict(V=rng.rand(n_features))),
               ('wminkowski', dict(p=3, w=rng.rand(n_features))),
               ('mahalanobis', dict(VI=VI))]
    algorithms = ['brute', 'ball_tree', 'kd_tree']
    X = rng.rand(n_samples, n_features)

    test = rng.rand(n_query_pts, n_features)

    for metric, metric_params in metrics:
        results = []
        p = metric_params.pop('p', 2)
        for algorithm in algorithms:
            # KD tree doesn't support all metrics
            if (algorithm == 'kd_tree' and
                    metric not in neighbors.KDTree.valid_metrics):
                assert_raises(ValueError,
                              neighbors.NearestNeighbors,
                              algorithm=algorithm,
                              metric=metric, metric_params=metric_params)
                continue

            neigh = neighbors.NearestNeighbors(n_neighbors=n_neighbors,
                                               algorithm=algorithm,
                                               metric=metric, p=p,
                                               metric_params=metric_params)
            neigh.fit(X)
            results.append(neigh.kneighbors(test, return_distance=True))

        assert_array_almost_equal(results[0][0], results[1][0])
        assert_array_almost_equal(results[0][1], results[1][1]) 
Example 72
Project: wine-ml-on-aws-lambda   Author: pierreant   File: test_neighbors.py    Apache License 2.0 4 votes vote down vote up
def test_neighbors_metrics(n_samples=20, n_features=3,
                           n_query_pts=2, n_neighbors=5):
    # Test computing the neighbors for various metrics
    # create a symmetric matrix
    V = rng.rand(n_features, n_features)
    VI = np.dot(V, V.T)

    metrics = [('euclidean', {}),
               ('manhattan', {}),
               ('minkowski', dict(p=1)),
               ('minkowski', dict(p=2)),
               ('minkowski', dict(p=3)),
               ('minkowski', dict(p=np.inf)),
               ('chebyshev', {}),
               ('seuclidean', dict(V=rng.rand(n_features))),
               ('wminkowski', dict(p=3, w=rng.rand(n_features))),
               ('mahalanobis', dict(VI=VI))]
    algorithms = ['brute', 'ball_tree', 'kd_tree']
    X = rng.rand(n_samples, n_features)

    test = rng.rand(n_query_pts, n_features)

    for metric, metric_params in metrics:
        results = {}
        p = metric_params.pop('p', 2)
        for algorithm in algorithms:
            # KD tree doesn't support all metrics
            if (algorithm == 'kd_tree' and
                    metric not in neighbors.KDTree.valid_metrics):
                assert_raises(ValueError,
                              neighbors.NearestNeighbors,
                              algorithm=algorithm,
                              metric=metric, metric_params=metric_params)
                continue
            neigh = neighbors.NearestNeighbors(n_neighbors=n_neighbors,
                                               algorithm=algorithm,
                                               metric=metric, p=p,
                                               metric_params=metric_params)
            neigh.fit(X)
            results[algorithm] = neigh.kneighbors(test, return_distance=True)
        assert_array_almost_equal(results['brute'][0], results['ball_tree'][0])
        assert_array_almost_equal(results['brute'][1], results['ball_tree'][1])
        if 'kd_tree' in results:
            assert_array_almost_equal(results['brute'][0],
                                      results['kd_tree'][0])
            assert_array_almost_equal(results['brute'][1],
                                      results['kd_tree'][1]) 
Example 73
Project: postlearn   Author: TomAugspurger   File: cluster.py    MIT License 4 votes vote down vote up
def plot_decision_boundry(data, pipe, reducer=PCA):
    fig, ax = plt.subplots(figsize=(16, 12))
    if callable(reducer):
        reducer = reducer(n_components=2)
    # else assume it's already been instantiated...

    if isinstance(pipe, Pipeline) and len(pipe.steps) > 1:
        prepipe = Pipeline(pipe.steps[:-1])
        km = pipe.steps[-1][1]
        data_ = prepipe.transform(data)
    elif isinstance(pipe, Pipeline):
        prepipe = None
        km = pipe.steps[0][1]
        data_ = data
    else:
        prepipe = None
        km = pipe
        data_ = data

    X_reduced = reducer.fit_transform(data_)

    cluster_centers = getattr(km, 'cluster_centers_',
                              compute_centers(km, data_))

    mu_reduced = reducer.transform(cluster_centers)
    n_clusters = len(np.unique(km.labels_))
    tree = KDTree(mu_reduced)

    cmap = rediscretize_cmap(n_clusters, 'Set1')
    ax.scatter(mu_reduced[:, 0], mu_reduced[:, 1],
               c=np.arange(n_clusters), cmap=cmap,
               s=300)
    colorbar_index(ncolors=n_clusters, cmap=cmap)

    ax.scatter(X_reduced[:, 0], X_reduced[:, 1], c=km.labels_,
               cmap=cmap, alpha=.95)

    xmin, xmax = ax.get_xlim()
    ymin, ymax = ax.get_ylim()

    xx, yy = np.meshgrid(np.linspace(xmin, xmax, 100),
                         np.linspace(ymin, ymax, 100))
    T = np.c_[xx.ravel(), yy.ravel()]
    _, group = tree.query(T)

    Z = group.ravel().reshape(xx.shape)
    ax.pcolormesh(xx, yy, Z, alpha=.25, cmap=cmap)
    ax.set_xlim(xmin, xmax)
    ax.set_ylim(ymin, ymax)

    for label, xy in enumerate(mu_reduced[:, :2]):
        ax.annotate(label, xy, fontsize=28, fontweight="bold")
    return ax 
Example 74
Project: PVGeo   Author: OpenGeoVis   File: xyz.py    BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def _estimate_angle_and_spacing(self, pts, sample=0.5):
        """internal use only
        """
        try:
            # sklearn's KDTree is faster: use it if available
            from sklearn.neighbors import KDTree as Tree
        except ImportError:
            from scipy.spatial import cKDTree as Tree
        # Creat the indexing range for searching the points:
        num = len(pts)
        rng = np.linspace(0, num-1, num=num, dtype=int)
        N = int(num*sample) + 1
        rng = np.random.choice(rng, N)
        angles = np.zeros(len(rng))
        tree = Tree(pts)
        distances = [[],[]]

        #######################################################################
        #######################################################################
        # Find nearest point
        distall, ptsiall = tree.query(pts, k=2)
        pt1all, pt2all = pts[ptsiall[:, 0]], pts[ptsiall[:, 1]]
        #######################################################################
        idx = 0
        for i in rng:
            # OPTIMIZE
            ax, angles[idx], dist = self._converge_angle(pt1all[i], pt2all[i])
            distances[ax].append(dist)
            idx += 1
        #######################################################################
        #TODO??? angles, distances = self._converge_angle(pt1all, pt2all)
        #######################################################################
        #######################################################################
        dx, dy = distances[0], distances[1]
        if len(dx) == 0:
            dx = dy
        elif len(dy) == 0:
            dy = dx
        TOLERANCE = np.min(np.append(dx, dy)) / 2.0
        angle = np.average(np.unique(angles))
        dx = np.unique(np.around(dx / TOLERANCE)) * TOLERANCE
        dy = np.unique(np.around(dy / TOLERANCE)) * TOLERANCE

        # Now round to decimals
        dx = np.around(dx, self.DECIMALS)
        dy = np.around(dx, self.DECIMALS)

        # print('Recovered: ', dx, dy)
        return angle, dx[0], dy[0] 
Example 75
Project: alibi   Author: SeldonIO   File: trustscore.py    Apache License 2.0 4 votes vote down vote up
def fit(self, X: np.ndarray, Y: np.ndarray, classes: int = None) -> None:
        """
        Build KDTrees for each prediction class.

        Parameters
        ----------
        X
            Data
        Y
            Target labels, either one-hot encoded or the actual class label.
        classes
            Number of prediction classes, needs to be provided if Y equals the predicted class.
        """
        self.classes = classes if classes is not None else Y.shape[1]
        self.kdtrees = [None] * self.classes  # type: Any
        self.X_kdtree = [None] * self.classes  # type: Any

        # KDTree and kNeighborsClassifier need 2D data
        if len(X.shape) > 2:
            logger.warning('Reshaping data from {0} to {1} so k-d trees can '
                           'be built.'.format(X.shape, X.reshape(X.shape[0], -1).shape))
            X = X.reshape(X.shape[0], -1)

        # make sure Y represents predicted classes, not one-hot encodings
        if len(Y.shape) > 1:
            Y = np.argmax(Y, axis=1)

        if self.filter == 'probability_knn':
            X_filter, Y_filter = self.filter_by_probability_knn(X, Y)

        for c in range(self.classes):

            if self.filter is None:
                X_fit = X[np.where(Y == c)[0]]
            elif self.filter == 'distance_knn':
                X_fit = self.filter_by_distance_knn(X[np.where(Y == c)[0]])
            elif self.filter == 'probability_knn':
                X_fit = X_filter[np.where(Y_filter == c)[0]]

            no_x_fit = len(X_fit) == 0
            if no_x_fit and len(X[np.where(Y == c)[0]]) == 0:
                logger.warning('No instances available for class %s', c)
            elif no_x_fit:
                logger.warning('Filtered all the instances for class %s. Lower alpha or check data.', c)

            self.kdtrees[c] = KDTree(X_fit, leaf_size=self.leaf_size, metric=self.metric)  # build KDTree for class c
            self.X_kdtree[c] = X_fit 
Example 76
Project: alibi   Author: SeldonIO   File: trustscore.py    Apache License 2.0 4 votes vote down vote up
def score(self, X: np.ndarray, Y: np.ndarray, k: int = 2, dist_type: str = 'point') \
            -> Tuple[np.ndarray, np.ndarray]:
        """
        Calculate trust scores = ratio of distance to closest class other than the
        predicted class to distance to predicted class.

        Parameters
        ----------
        X
            Instances to calculate trust score for.
        Y
            Either prediction probabilities for each class or the predicted class.
        k
            Number of nearest neighbors used for distance calculation.
        dist_type
            Use either the distance to the k-nearest point (dist_type = 'point') or
            the average distance from the first to the k-nearest point in the data (dist_type = 'mean').

        Returns
        -------
        Batch with trust scores and the closest not predicted class.
        """
        # make sure Y represents predicted classes, not probabilities
        if len(Y.shape) > 1:
            Y = np.argmax(Y, axis=1)

        # KDTree needs 2D data
        if len(X.shape) > 2:
            logger.warning('Reshaping data from {0} to {1} so k-d trees can '
                           'be queried.'.format(X.shape, X.reshape(X.shape[0], -1).shape))
            X = X.reshape(X.shape[0], -1)

        d = np.tile(None, (X.shape[0], self.classes))  # init distance matrix: [nb instances, nb classes]

        for c in range(self.classes):
            d_tmp = self.kdtrees[c].query(X, k=k)[0]  # get k nearest neighbors for each class
            if dist_type == 'point':
                d[:, c] = d_tmp[:, -1]
            elif dist_type == 'mean':
                d[:, c] = np.mean(d_tmp, axis=1)

        sorted_d = np.sort(d, axis=1)  # sort distance each instance in batch over classes
        # get distance to predicted and closest other class and calculate trust score
        d_to_pred = d[range(d.shape[0]), Y]
        d_to_closest_not_pred = np.where(sorted_d[:, 0] != d_to_pred, sorted_d[:, 0], sorted_d[:, 1])
        trust_score = d_to_closest_not_pred / (d_to_pred + self.eps)
        # closest not predicted class
        class_closest_not_pred = np.where(d == d_to_closest_not_pred.reshape(-1, 1))[1]
        return trust_score, class_closest_not_pred 
Example 77
Project: velib-exp   Author: Evarin   File: visu.py    MIT License 4 votes vote down vote up
def build_map0(data, resolution=0.0005, oob=0.005,
              min_free=10, min_busy=10,
              max_dist=0.001):
    from sklearn.neighbors import KDTree
    print("Rendering...")
    
    y = np.array([p['position']['lat'] for p in data])
    x = np.array([p['position']['lng'] for p in data])

    st_free = np.array([p['available_bike_stands']
                        for p in data]).astype(np.float32)
    st_busy = np.array([p['available_bikes']
                        for p in data]).astype(np.float32)

    bds = (x.min()-oob, y.min()-oob, x.max()+oob, y.max()+oob)

    x = (x-bds[0])/resolution
    y = (y-bds[1])/resolution
    max_dist = max_dist/resolution

    w = int((bds[2]-bds[0])/resolution)
    h = int((bds[3]-bds[1])/resolution)

    map = np.zeros((h, w, 3))

    print(map.shape)
    pts = np.array([x, y]).transpose()
    tree = KDTree(pts)
    
    for i in range(h):
        for j in range(w):
            dist, ind = tree.query([[j, i]], 1)
            bfree = st_free[ind]
            bbusy = st_busy[ind]
            mdist = 1.#min(1., max(0., 1.5-dist.min()/max_dist/7.))
            if ((dist <= max_dist) & (bfree >= min_free)).any():
                sf = 1.
            else:
                sf = min(1., np.mean(max_dist/dist * (bfree/min_free)))
            if ((dist <= max_dist) & (bbusy >= min_busy)).any():
                sb = 1.
            else:
                sb = min(1., np.mean(max_dist/dist * (bbusy/min_busy)))
            map[i, j, :] = ((1-sf)*mdist,
                            (1.-abs(sb-sf)/(sb+sf+0.1)*2)*mdist*0,
                            max(0, sf-sb)*mdist)
    return (bds, map)


# Util functions to show the result and save it