Python sklearn.neighbors.BallTree() Examples

The following are code examples for showing how to use sklearn.neighbors.BallTree(). They are from open source Python projects. You can vote up the examples you like or vote down the ones you don't like.

Example 1
Project: rogers   Author: cylance   File: test_pdci.py    Apache License 2.0 6 votes vote down vote up
def test_index():
    xs = rand(1000, 100, random_state=42).toarray()

    try:
        indexer = SQLiteIndexer(index_path=INDEX_PATH)
        index = PrioritizedDynamicContinuousIndex(indexer, composite_indices=2, simple_indices=50)
        index.fit(xs)

        x = xs[0:1]
        k = 10

        nn_baseline = BallTree(xs)

        baseline_dist, baseline_idx = nn_baseline.query(x, k=k)
        dist, idx = index.query(x, k=k)

        # np.testing.assert_equal(baseline_idx[0], idx)
    finally:
        if os.path.exists(INDEX_PATH):
            os.remove(INDEX_PATH) 
Example 2
Project: pynndescent   Author: lmcinnes   File: test_distances.py    BSD 2-Clause "Simplified" License 6 votes vote down vote up
def test_haversine():
    tree = BallTree(spatial_data[:, :2], metric="haversine")
    dist_matrix, _ = tree.query(spatial_data[:, :2], k=spatial_data.shape[0])
    test_matrix = np.array(
        [
            [
                dist.haversine(spatial_data[i, :2], spatial_data[j, :2])
                for j in range(spatial_data.shape[0])
            ]
            for i in range(spatial_data.shape[0])
        ]
    )
    test_matrix.sort(axis=1)
    assert_array_almost_equal(
        test_matrix,
        dist_matrix,
        err_msg="Distances don't match " "for metric haversine",
    ) 
Example 3
Project: Weiss   Author: WangWenjun559   File: test_neighbors.py    Apache License 2.0 6 votes vote down vote up
def test_unsupervised_inputs():
    # test the types of valid input into NearestNeighbors
    X = rng.random_sample((10, 3))

    nbrs_fid = neighbors.NearestNeighbors(n_neighbors=1)
    nbrs_fid.fit(X)

    dist1, ind1 = nbrs_fid.kneighbors(X)

    nbrs = neighbors.NearestNeighbors(n_neighbors=1)

    for input in (nbrs_fid, neighbors.BallTree(X), neighbors.KDTree(X)):
        nbrs.fit(input)
        dist2, ind2 = nbrs.kneighbors(X)

        assert_array_almost_equal(dist1, dist2)
        assert_array_almost_equal(ind1, ind2) 
Example 4
Project: wine-ml-on-aws-lambda   Author: pierreant   File: test_neighbors.py    Apache License 2.0 6 votes vote down vote up
def test_unsupervised_inputs():
    # test the types of valid input into NearestNeighbors
    X = rng.random_sample((10, 3))

    nbrs_fid = neighbors.NearestNeighbors(n_neighbors=1)
    nbrs_fid.fit(X)

    dist1, ind1 = nbrs_fid.kneighbors(X)

    nbrs = neighbors.NearestNeighbors(n_neighbors=1)

    for input in (nbrs_fid, neighbors.BallTree(X), neighbors.KDTree(X)):
        nbrs.fit(input)
        dist2, ind2 = nbrs.kneighbors(X)

        assert_array_almost_equal(dist1, dist2)
        assert_array_almost_equal(ind1, ind2) 
Example 5
Project: astroML_figures   Author: astroML   File: fig_PCA_LLE.py    BSD 2-Clause "Simplified" License 6 votes vote down vote up
def compute_spec_LLE(n_neighbors=10, out_dim=3):
    # Compute the LLE projection
    LLE = manifold.LocallyLinearEmbedding(n_neighbors, out_dim,
                                          method='modified',
                                          eigen_solver='dense')
    Y_LLE = LLE.fit_transform(spec)
    print(" - finished LLE projection")

    # remove outliers for the plot
    BT = neighbors.BallTree(Y_LLE)
    dist, ind = BT.query(Y_LLE, n_neighbors)
    dist_to_n = dist[:, -1]
    dist_to_n -= dist_to_n.mean()
    std = np.std(dist_to_n)
    flag = (dist_to_n > 0.25 * std)
    print(" - removing {0} outliers for plot".format(flag.sum()))

    return Y_LLE[~flag], color[~flag] 
Example 6
Project: correlcalc   Author: rohinkumar   File: tpcf.py    MIT License 6 votes vote down vote up
def autocorrw(dat, bins, metric, weights):
    # dbt = BallTree(dat, metric='pyfunc', func=metric)
    DD = np.zeros(len(bins)-1)
    binmax = max(bins)
    for i in tqdm(range(len(dat))):
        ind = dbt.query_radius(dat[i].reshape(1, -1), binmax)
        # wts=np.array([])
        for j in ind:
            # print ("i j")
            # print (i, j)
            # print ("ind[ind>i]")
            # print (ind[ind>i])
            dist0 = dist.cdist([dat[i], ], dat[j[j>i]], metric)[0]
            DD += np.histogram(dist0, bins=bins, weights=weights[j[j>i]])[0]
            # print (dist0,weights[j])
    print(DD)
    return DD 
Example 7
Project: correlcalc   Author: rohinkumar   File: tpcf.py    MIT License 6 votes vote down vote up
def crosscorrwrd(dat, datR, bins, metric, weights):
    # dbt = BallTree(dat, metric='pyfunc', func=metric)
    RD = np.zeros(len(bins)-1)
    # p=multiprocessing.Pool(processes=multiprocessing.cpu_count())
    # RD=p.map(rdcalc, range(len(datR)))
    binmax = max(bins)
    for i in tqdm(range(len(datR))):
    # def rdcalc():
        ind = dbt.query_radius(datR[i].reshape(1, -1), binmax)
        #  wts=np.array([])
        for j in ind:
            dist0 = dist.cdist([datR[i], ], dat[j], metric)[0]
            RD += np.histogram(dist0, bins=bins, weights=weights[j])[0]
                # print (dist0,weights[j])
            # return RD
    print(RD)
    return RD 
Example 8
Project: correlcalc   Author: rohinkumar   File: tpcf.py    MIT License 6 votes vote down vote up
def autocorrwp(dat, bins, metric, weights, rNd, multi=False, queue=0):
    # dbt = BallTree(dat, metric='pyfunc', func=metric)
    DD = np.zeros(len(bins)-1)
    binmax = max(bins)
    for i in tqdm(rNd):
        ind = dbt.query_radius(dat[i].reshape(1, -1), binmax)
        # wts=np.array([])
        for j in ind:
            # print ("i j")
            # print (i, j)
            # print ("ind[ind>i]")
            # print (ind)
            # print (ind[ind>i])
            dist0 = dist.cdist([dat[i], ], dat[j[j>i]], metric)[0]
            DD += np.histogram(dist0, bins=bins, weights=weights[j[j>i]])[0]
            # print (dist0,weights[j])
    if multi:
        queue.put(DD)
    else:
        return DD
    # print (DD)
    return DD 
Example 9
Project: correlcalc   Author: rohinkumar   File: tpcf.py    MIT License 6 votes vote down vote up
def crosscorrwdrp(dat, datR, bins, metric, rweights, rNd, multi=False, queue=0):
    # dbt = BallTree(dat, metric='pyfunc', func=metric)
    DR = np.zeros(len(bins)-1)
    binmax = max(bins)
    # p=multiprocessing.Pool(processes=multiprocessing.cpu_count())
    # RD=p.map(rdcalc, range(len(datR)))
    for i in tqdm(rNd):
    # def rdcalc():
        ind = rbt.query_radius(dat[i].reshape(1, -1), binmax)
        #  wts=np.array([])
        for j in ind:
            dist0 = dist.cdist([dat[i], ], datR[j], metric)[0]
            DR += np.histogram(dist0, bins=bins, weights=rweights[j])[0]
    if multi:
        queue.put(DR)
    else:
        return DR
    # print(RD)
    return DR 
Example 10
Project: correlcalc   Author: rohinkumar   File: tpcf.py    MIT License 6 votes vote down vote up
def autocorrwpr(datR, bins, metric, rweights, rNr, multi=False, queue=0):
    # dbt = BallTree(dat, metric='pyfunc', func=metric)
    RR = np.zeros(len(bins)-1)
    binmax = max(bins)
    for i in tqdm(rNr):
        ind = rbt.query_radius(datR[i].reshape(1, -1), binmax)
        # print (ind)
        # wts=np.array([])
        for j in ind:
            # print("i")
            # print (i)
            # print ("j")
            # print (j)
            # print ("j[j>i]")
            # print (j[j>i])
            dist0 = dist.cdist([datR[i], ], datR[j[j>i]], metric)[0]
            RR += np.histogram(dist0, bins=bins, weights=rweights[j[j>i]])[0]
            # print (dist0,weights[j])
    if multi:
        queue.put(RR)
    else:
        return RR
    # print (RR)
    return RR 
Example 11
Project: mod   Author: wallarelvo   File: find_stations.py    GNU General Public License v2.0 5 votes vote down vote up
def find_clusters(geos, tol):
    hav_tol = tol / 6371.0
    used = [False] * len(geos)
    ball_tree = nn.BallTree(np.radians(geos), metric="haversine")
    centers = list()
    for i in xrange(len(geos)):
        if not used[i]:
            loc = geos[i]
            st = np.array([i, loc[0], loc[1]])
            centers.append(st)
            nearest = ball_tree.query_radius([np.radians(loc)], hav_tol)[0]
            for i in nearest:
                used[i] = True
    return np.array(centers) 
Example 12
Project: evalutils   Author: comic   File: scorers.py    MIT License 5 votes vote down vote up
def find_hits_for_targets(
    *,
    targets: List[Tuple[float, ...]],
    predictions: List[Tuple[float, ...]],
    radius: float,
) -> List[Tuple[int, ...]]:
    """
    Generates a list of the predicted points that are within a radius r of the
    targets. The indicies are returned in sorted order, from closest to
    farthest point.

    Parameters
    ----------
    targets
        A list of target points
    predictions
        A list of predicted points
    radius
        The maximum distance that two points can be apart for them to be
        considered a hit

    Returns
    -------

    A list which has the same length as the targets list. Each element within
    this list contains another list that contains the indicies of the
    predictions that are considered hits.

    """
    predictions_tree = BallTree(array(predictions))
    hits, _ = predictions_tree.query_radius(
        X=targets, r=radius, return_distance=True, sort_results=True
    )
    return hits 
Example 13
Project: numpy-ml   Author: ddbourgin   File: tests.py    GNU General Public License v3.0 5 votes vote down vote up
def test_ball_tree():
    while True:
        N = np.random.randint(2, 100)
        M = np.random.randint(2, 100)
        k = np.random.randint(1, N)
        ls = np.min([np.random.randint(1, 10), N - 1])

        X = np.random.rand(N, M)
        BT = BallTree(leaf_size=ls, metric=euclidean)
        BT.fit(X)

        x = np.random.rand(M)
        mine = BT.nearest_neighbors(k, x)
        assert len(mine) == k

        mine_neighb = np.array([n.key for n in mine])
        mine_dist = np.array([n.distance for n in mine])

        sort_ix = np.argsort(mine_dist)
        mine_dist = mine_dist[sort_ix]
        mine_neighb = mine_neighb[sort_ix]

        sk = sk_BallTree(X, leaf_size=ls)
        theirs_dist, ind = sk.query(x.reshape(1, -1), k=k)
        sort_ix = np.argsort(theirs_dist.flatten())

        theirs_dist = theirs_dist.flatten()[sort_ix]
        theirs_neighb = X[ind.flatten()[sort_ix]]

        for i in range(len(theirs_dist)):
            np.testing.assert_almost_equal(mine_neighb[i], theirs_neighb[i])
            np.testing.assert_almost_equal(mine_dist[i], theirs_dist[i])

        print("PASSED") 
Example 14
Project: ObjectMatching   Author: Berdic   File: ObjectMatching.py    MIT License 5 votes vote down vote up
def get_score_for_ideal_points(points, ideal_points, IDEAL_HEIGHT):
	

	model,scene,after_tps = nrr.non_rigid_registration(points, ideal_points)

	print("Model: ", model)
	print("Scene: ", scene)
	print("after_tps: ", after_tps)

	distances_array = []

	ballTree = BallTree(after_tps)
	
	i = 0
	for point in ideal_points:
		ind = ballTree.query_radius(point, IDEAL_HEIGHT)
		if len(ind[0]) == 1:
			distances_array.append(np.linalg.norm(point - after_tps[ind[0][0]]))
		else:
			i += 1
			distances_array.append(1000)


	print("SCORE: ", np.mean(distances_array))

	return np.mean(distances_array) 
Example 15
Project: ObjectMatching   Author: Berdic   File: alignment_quality_check.py    MIT License 5 votes vote down vote up
def get_score_for_ideal_points(c, ideal_points, IDEAL_RADIUS, IDEAL_HEIGHT):
	#rename cameras
	rename_cameras(c)
	
	#get normalized points of cameras currently aligned
	points = get_normalized_points(c,IDEAL_RADIUS)

	

	#get translation and rotation vector

	#get model, scene and after non rigid points
	model,scene,after_tps = cca.non_rigid_registration(points, ideal_points)

	#save_points_like_obj(model, "D:/model{}.obj".format(counter))
	#save_points_like_obj(scene, "D:/scene{}.obj".format(counter))
	#save_points_like_obj(after_tps, "D:/after_tps{}.obj".format(counter))

	distances_array = []

	ballTree = BallTree(after_tps)
	#for dooblicator v1 46 min distance between cameras is height/2
	if len(c.cameras) >= 41 and len(c.cameras) <=51:
		radius = 2*(IDEAL_HEIGHT/2)/3
	else:
		radius = 2*IDEAL_HEIGHT/3
	not_functional = []
	i = 0
	for point in ideal_points:
		ind = ballTree.query_radius(point, radius)
		if len(ind[0]) == 1:
			distances_array.append(np.linalg.norm(point - after_tps[ind[0][0]]))
		else:
			i += 1
			distances_array.append(1000)


	print("SCORE: ", np.mean(distances_array))

	return np.mean(distances_array) 
Example 16
Project: Model-Free-Episodic-Control   Author: ShibiHe   File: EC_functions.py    MIT License 5 votes vote down vote up
def update_tree(self, time):
        print 'rebuild tree'
        self.tree = BallTree(self.state[:self.items, :], leaf_size=self.size)
        self.last_tree_built_time = time
        print 'rebuild done' 
Example 17
Project: boundary-detection   Author: marcos-sandim   File: boundary.py    MIT License 5 votes vote down vote up
def __init__(self, points, rho, dimension):
        """Constructor

        Initializes the grid and helper structures using the provided points
        and rho parameter.

        Args:
            points: A numpy array containing the coordinates of the particles.
            rho: Needed to compute the rho-boundary of the system.
            dimension: The dimension of the particle system.
        """
        self.points = points
        self.rho = rho
        self.dimension = dimension
        self.cell_size = 2.0 * rho

        self.aabb_min = np.amin(points, axis=0)
        self.aabb_max = np.amax(points, axis=0)

        self.grid_dims = (self.aabb_max - self.aabb_min) / self.cell_size
        # Regarding the + 3: 1 for left side, 1 for right side, 1 for rounding
        # up
        self.grid_dims = np.trunc(self.grid_dims) + 3
        self.grid_dims = self.grid_dims.astype(int)

        self.grid_min = self.aabb_min - self.cell_size
        self.grid_max = self.grid_min + self.grid_dims * self.cell_size

        self.grid_count = np.zeros(self.grid_dims, dtype=int)
        self.grid_elems = np.empty(self.grid_dims, dtype=object)

        self.update_grid()
        self.tree = NeighborsTree(
            self.points, leaf_size=10, metric='euclidean')

        self.neighbor_cell_list = self.compute_neighbor_cell_list() 
Example 18
Project: atap   Author: foxbook   File: recommender.py    Apache License 2.0 5 votes vote down vote up
def transform(self, documents):
        return [
            BallTree(documents)
        ] 
Example 19
Project: atap   Author: foxbook   File: recommender.py    Apache License 2.0 5 votes vote down vote up
def fit_transform(self, documents):
        # Transformer will be False if pipeline hasn't been fit yet,
        # Trigger fit_transform and save the transformer and lexicon.
        if self.transformer == False:
            self.transformer = Pipeline([
                ('norm', TextNormalizer(minimum=50, maximum=200)),
                ('transform', Pipeline([
                    ('tfidf', TfidfVectorizer()),
                    ('svd', TruncatedSVD(n_components=200))
                ])
                 )
            ])
            self.lexicon = self.transformer.fit_transform(documents)
            self.tree = BallTree(self.lexicon)
            self.save() 
Example 20
Project: EmbedderSDR   Author: dizcza   File: npeet.py    MIT License 5 votes vote down vote up
def build_tree(points):
    if points.shape[1] >= 20:
        # for large dimensions, use BallTree
        return BallTree(points, metric='chebyshev')
    return KDTree(points, metric='chebyshev') 
Example 21
Project: wine-ml-on-aws-lambda   Author: pierreant   File: test_t_sne.py    Apache License 2.0 5 votes vote down vote up
def test_barnes_hut_angle():
    # When Barnes-Hut's angle=0 this corresponds to the exact method.
    angle = 0.0
    perplexity = 10
    n_samples = 100
    for n_components in [2, 3]:
        n_features = 5
        degrees_of_freedom = float(n_components - 1.0)

        random_state = check_random_state(0)
        distances = random_state.randn(n_samples, n_features)
        distances = distances.astype(np.float32)
        distances = abs(distances.dot(distances.T))
        np.fill_diagonal(distances, 0.0)
        params = random_state.randn(n_samples, n_components)
        P = _joint_probabilities(distances, perplexity, verbose=0)
        kl_exact, grad_exact = _kl_divergence(params, P, degrees_of_freedom,
                                              n_samples, n_components)

        k = n_samples - 1
        bt = BallTree(distances)
        distances_nn, neighbors_nn = bt.query(distances, k=k + 1)
        neighbors_nn = neighbors_nn[:, 1:]
        distances_nn = np.array([distances[i, neighbors_nn[i]]
                                 for i in range(n_samples)])
        assert np.all(distances[0, neighbors_nn[0]] == distances_nn[0]),\
            abs(distances[0, neighbors_nn[0]] - distances_nn[0])
        P_bh = _joint_probabilities_nn(distances_nn, neighbors_nn,
                                       perplexity, verbose=0)
        kl_bh, grad_bh = _kl_divergence_bh(params, P_bh, degrees_of_freedom,
                                           n_samples, n_components,
                                           angle=angle, skip_num_points=0,
                                           verbose=0)

        P = squareform(P)
        P_bh = P_bh.toarray()
        assert_array_almost_equal(P_bh, P, decimal=5)
        assert_almost_equal(kl_exact, kl_bh, decimal=3) 
Example 22
Project: flask_semantic_image_search   Author: wayfair   File: fixtures.py    BSD 2-Clause "Simplified" License 5 votes vote down vote up
def single_ball_tree():
    embedding_index = BallTreeEmbeddingIndex('')
    metadata = [{'item_id': '0'}]
    tree = BallTree([[0]])
    embedding_index.populate({'metadata': metadata, 'index': tree})
    return embedding_index 
Example 23
Project: astroML_figures   Author: astroML   File: fig_model_comparison_mcmc.py    BSD 2-Clause "Simplified" License 5 votes vote down vote up
def estimate_bayes_factor(trace, r=0.05, return_list=False):
    """Estimate the bayes factor using the local density of points"""

    # Convert traces to a numpy array, ignore the intervals
    trace_arr = np.array([trace[i] for i in trace.varnames if "_interval__" not in i])
    trace_t = trace_arr.T
    N_iter, D = trace_t.shape

    # compute volume of a D-dimensional sphere of radius r
    Vr = np.pi ** (0.5 * D) / gamma(0.5 * D + 1) * (r ** D)

    # use neighbor count within r as a density estimator
    bt = BallTree(trace_t)
    count = bt.query_radius(trace_t, r=r, count_only=True)

    BF = trace.model_logp + np.log(N_iter) + np.log(Vr) - np.log(count)

    if return_list:
        return BF
    else:
        p25, p50, p75 = np.percentile(BF, [25, 50, 75])
        return p50, 0.7413 * (p75 - p25)


# ------------------------------------------------------------
# Generate the data 
Example 24
Project: 3d-semantic-segmentation   Author: VisualComputingInstitute   File: evaluation.py    MIT License 5 votes vote down vote up
def knn_interpolation(cumulated_pc: np.ndarray, full_sized_data: np.ndarray, k=5):
    """
    Using k-nn interpolation to find labels of points of the full sized pointcloud
    :param cumulated_pc: cumulated pointcloud results after running the network
    :param full_sized_data: full sized point cloud
    :param k: k for k nearest neighbor interpolation
    :return: pointcloud with predicted labels in last column and ground truth labels in last but one column
    """

    labeled = cumulated_pc[cumulated_pc[:, -1] != -1]
    to_be_predicted = full_sized_data.copy()

    ball_tree = BallTree(labeled[:, :3], metric='euclidean')

    knn_classes = labeled[ball_tree.query(to_be_predicted[:, :3], k=k)[1]][:, :, -1].astype(int)

    interpolated = np.zeros(knn_classes.shape[0])

    for i in range(knn_classes.shape[0]):
        interpolated[i] = np.bincount(knn_classes[i]).argmax()

    output = np.zeros((to_be_predicted.shape[0], to_be_predicted.shape[1]+1))
    output[:, :-1] = to_be_predicted

    output[:, -1] = interpolated

    return output 
Example 25
Project: 3d-semantic-segmentation   Author: VisualComputingInstitute   File: center_batch_generator.py    MIT License 5 votes vote down vote up
def _calc_ball_trees(self, metric='euclidean'):
        ball_trees = []
        for pointcloud_data in tqdm(self.dataset.data, desc='Ball trees have to be calculated from scratch'):
            ball_trees.append(BallTree(pointcloud_data[:, :2], metric=metric))
        return ball_trees 
Example 26
Project: correlcalc   Author: rohinkumar   File: tpcf.py    MIT License 5 votes vote down vote up
def poserr(xi, DD):
    print ("Calculating Poisson error")
    return (1.0+xi)/np.sqrt(DD)
# alternatively
# rbt=BallTree(dat,metric='pyfunc',func=metric)
# counts_RD=rbt.two_point_correlation(dat,bins) 
Example 27
Project: correlcalc   Author: rohinkumar   File: tpcf.py    MIT License 5 votes vote down vote up
def crosscorrw(dat, datR, bins, metric, rweights):
    # rbt = BallTree(datR, metric='pyfunc', func=metric)
    DR = np.zeros(len(bins)-1)
    binmax = max(bins)
    for i in tqdm(range(len(dat))):
        ind = rbt.query_radius(dat[i].reshape(1, -1), binmax)
        # wts=np.array([])
        for j in ind:
            dist0 = dist.cdist([dat[i], ], datR[j], metric)[0]
            DR += np.histogram(dist0, bins=bins, weights=rweights[j])[0]
            # print (dist0,weights[j])
    return DR 
Example 28
Project: Measure-Concentration   Author: xiaozhanguva   File: preliminary.py    MIT License 5 votes vote down vote up
def knn_graph(X, k, method='brute_force', leaf_size=30, metric='euclidean'):
    n, p = X.shape
    if method == 'kd_tree':
        if _HAS_SKLEARN:
            kdtree = _sknbr.KDTree(X, leaf_size=leaf_size, metric=metric)
            distances, neighbors = kdtree.query(X, k=k, return_distance=True,
                                                sort_results=True)
            radii = distances[:, -1]
        else:
            raise ImportError("The scikit-learn library could not be loaded." +
                              " It is required for the 'kd-tree' method.")

    if method == 'ball_tree':
        if _HAS_SKLEARN:
            btree = _sknbr.BallTree(X, leaf_size=leaf_size, metric=metric)
            distances, neighbors = btree.query(X, k=k, return_distance=True,
                                               sort_results=True)
            radii = distances[:, -1]
        else:
            raise ImportError("The scikit-learn library could not be loaded." +
                              " It is required for the 'ball-tree' method.")

    else:  # assume brute-force
        if not _HAS_SCIPY:
            raise ImportError("The 'scipy' module could not be loaded. " +
                              "It is required for the 'brute_force' method " +
                              "for building a knn similarity graph.")

        d = _spd.pdist(X, metric=metric)
        D = _spd.squareform(d)
        rank = np.argsort(D, axis=1)
        neighbors = rank[:, 0:k]
        k_nbr = neighbors[:, -1]
        radii = D[np.arange(n), k_nbr]

    return neighbors, radii 
Example 29
Project: nnsearch   Author: pkariz   File: BallTreeScikit.py    GNU General Public License v3.0 5 votes vote down vote up
def __init__(self):
        self.algorithm = "BallTree-scikit"
        self.valid_types = [np.uint8, np.uint16, np.uint32, np.uint64,
                            np.int8, np.int16, np.int32, np.int64,
                            np.float16, np.float32, np.float64] 
Example 30
Project: nnsearch   Author: pkariz   File: BallTreeScikit.py    GNU General Public License v3.0 5 votes vote down vote up
def build(self, data, leaf_size=20, distance="euclidean", **kwargs):
        """Builds ball tree with specified parameters.
        :param data: Dataset instance representing data
        :param leaf_size: maximum size of a leaf
        :param distance: defines metric to be used, can be "euclidean" and other values of 'metric' parameter in
        scikit's ball-tree.
        """
        if not isinstance(data, Dataset):
            raise ValueError("Data parameter must be an instance of Dataset!")
        if data.data.dtype not in self.valid_types:
            raise ValueError("Invalid dtype of numpy array, check valid_types parameter of index!")
        self.index = BallTree(data.data, leaf_size=leaf_size, metric=distance, **kwargs)
        return self.index 
Example 31
Project: Autocomplete-System   Author: weihesdlegend   File: Advanced_server.py    MIT License 5 votes vote down vote up
def __init__(self, num_corrections=10, num_basic_results=10,
                 home_dir=".",
                 embedding_json=None,
                 vocab_int_json=None, *args, **kwargs):
        super().__init__(num_res_return=num_basic_results, *args, **kwargs)

        self.use_embedding = False

        if embedding_json and vocab_int_json:
            self.use_embedding = True
            embedding_json = path.join(home_dir, embedding_json)
            vocab_int_json = path.join(home_dir, vocab_int_json)
            # load json files
            print("Loading JSON files, may take a while.")
            with open(embedding_json, 'r') as read_file:
                self.embeddings = np.array(json.load(read_file))
            with open(vocab_int_json, 'r') as read_file:
                self.vocab_int = json.load(read_file)
            self.int_vocab = {i: word for word, i in self.vocab_int.items()}

            # train k nearest neighbor model
            print("Training BallTree k-nearest neighbor searcher...")
            self.searcher = BallTree(self.embeddings, leaf_size=10)

        self.checker = Spell.Spell()
        self.num_corrections = num_corrections
        self.num_basic_search_results = num_basic_results
        self.max_total_res = min(10, num_basic_results+num_corrections)

        print("Ready to use.") 
Example 32
Project: pyodds   Author: datamllab   File: knn.py    MIT License 5 votes vote down vote up
def fit(self, X):
        """Fit detector. y is optional for unsupervised methods.

        Parameters
        ----------
        X : dataframe of shape (n_samples, n_features)
            The input samples.
        """

        # validate inputs X and y (optional)
        X = X.to_numpy()

        if self.metric_params is not None:
            self.tree_ = BallTree(X, leaf_size=self.leaf_size,
                                  metric=self.metric,
                                  **self.metric_params)
        else:
            self.tree_ = BallTree(X, leaf_size=self.leaf_size,
                                  metric=self.metric)
        self.neigh_.fit(X)

        dist_arr, _ = self.neigh_.kneighbors(n_neighbors=self.n_neighbors,
                                             return_distance=True)
        dist = self._get_dist_by_method(dist_arr)

        self.decision_scores_ = dist.ravel()
        self._process_decision_scores()

        return self 
Example 33
Project: CIMtools   Author: stsouko   File: similarity_distance.py    GNU General Public License v3.0 4 votes vote down vote up
def fit(self, X, y=None):
        """Fit distance-based AD.
        All AD’s model hyperparameters were selected based on internal cross-validation using training set.
        The hyperparameters of the AD definition approach have been optimized in the cross-validation,
        where metrics RMSE_AD or BA_AD were used as maximized scoring functions.
        Parameters
        ----------
        X : array-like or sparse matrix, shape (n_samples, n_features)
            The input samples. Use ``dtype=np.float32`` for maximum
            efficiency.

        Returns
        -------
        self : object
            Returns self.
        """
        # Check data
        X = check_array(X)
        self.tree = BallTree(X, leaf_size=self.leaf_size, metric=self.metric)
        dist_train = self.tree.query(X, k=2)[0]
        if self.threshold == 'auto':
            self.threshold_value = 0.5 * sqrt(var(dist_train[:, 1])) + mean(dist_train[:, 1])
        elif self.threshold == 'cv':
            if y is None:
                raise ValueError("Y must be specified to find the optimal threshold.")
            y = check_array(y, accept_sparse='csc', ensure_2d=False, dtype=None)
            self.threshold_value = 0
            score_value = 0
            Y_pred, Y_true, AD = [], [], []
            cv = KFold(n_splits=5, random_state=1, shuffle=True)
            for train_index, test_index in cv.split(X):
                x_train = safe_indexing(X, train_index)
                x_test = safe_indexing(X, test_index)
                y_train = safe_indexing(y, train_index)
                y_test = safe_indexing(y, test_index)
                data_test = safe_indexing(dist_train[:, 1], test_index)
                if self.reg_model is None:
                    reg_model = RandomForestRegressor(n_estimators=500, random_state=1).fit(x_train, y_train)
                else:
                    reg_model = clone(self.reg_model).fit(x_train, y_train)
                Y_pred.append(reg_model.predict(x_test))
                Y_true.append(y_test)
                AD.append(data_test)
            AD_stack = hstack(AD)
            AD_ = unique(AD_stack)
            for z in AD_:
                AD_new = AD_stack <= z
                if self.score == 'ba_ad':
                    val = balanced_accuracy_score_with_ad(Y_true=hstack(Y_true), Y_pred=hstack(Y_pred), AD=AD_new)
                elif self.score == 'rmse_ad':
                    val = rmse_score_with_ad(Y_true=hstack(Y_true), Y_pred=hstack(Y_pred), AD=AD_new)
                if val >= score_value:
                    score_value = val
                    self.threshold_value = z
        else:
            self.threshold_value = self.threshold
        return self 
Example 34
Project: pyod   Author: yzhao062   File: knn.py    BSD 2-Clause "Simplified" License 4 votes vote down vote up
def fit(self, X, y=None):
        """Fit detector. y is ignored in unsupervised methods.

        Parameters
        ----------
        X : numpy array of shape (n_samples, n_features)
            The input samples.

        y : Ignored
            Not used, present for API consistency by convention.

        Returns
        -------
        self : object
            Fitted estimator.
        """

        # validate inputs X and y (optional)
        X = check_array(X)
        self._set_n_classes(y)

        self.neigh_.fit(X)
        # TODO: code cleanup
        # if self.neigh_._tree is not None:
        self.tree_ = self.neigh_._tree

        # The code below may not be necessary
        # else:
        #     if self.metric_params is not None:
        #         self.tree_ = BallTree(X, leaf_size=self.leaf_size,
        #                               metric=self.metric,
        #                               **self.metric_params)
        #     else:
        #         self.tree_ = BallTree(X, leaf_size=self.leaf_size,
        #                               metric=self.metric)

        dist_arr, _ = self.neigh_.kneighbors(n_neighbors=self.n_neighbors,
                                             return_distance=True)
        dist = self._get_dist_by_method(dist_arr)

        self.decision_scores_ = dist.ravel()
        self._process_decision_scores()

        return self 
Example 35
Project: ObjectMatching   Author: Berdic   File: alignment_quality_check_cameras_names_in_progress.py    MIT License 4 votes vote down vote up
def get_score_for_ideal_points(c, ideal_points, IDEAL_RADIUS, IDEAL_HEIGHT, array_of_points_with_name):
	#rename cameras
	rename_cameras(c)
	global counter
	counter += 1
	#get normalized points of cameras currently aligned
	points = get_normalized_points(c,IDEAL_RADIUS)

	#get translation and rotation vector

	#get model, scene and after non rigid points
	model,scene,after_tps = cca.non_rigid_registration(points, ideal_points)


	save_points_like_obj(model, "D:/model{}.obj".format(counter))
	save_points_like_obj(scene, "D:/scene{}.obj".format(counter))
	save_points_like_obj(after_tps, "D:/after_tps{}.obj".format(counter))

	distances_array = []

	problematic_cameras = []

	ballTree = BallTree(after_tps)
	#for dooblicator v1 46 min distance between cameras is height/2
	if len(c.cameras) >= 41 and len(c.cameras) <=51:
		radius = 2*(IDEAL_HEIGHT/2)/3
	else:
		radius = 2*IDEAL_HEIGHT/3
	not_functional = []
	i = 0
	for point in ideal_points:
		ind = ballTree.query_radius(point, radius)
		if len(ind[0]) == 1:
			distances_array.append(np.linalg.norm(point - after_tps[ind[0][0]]))
		else:
			i += 1
			distances_array.append(1000)
			for cam_name in array_of_points_with_name:
				if all(cam_name[1] == point):
					problematic_cameras.append(cam_name[0])
			


	print("SCORE: ", np.mean(distances_array))

	return np.mean(distances_array), problematic_cameras