Python faiss.GpuIndexFlatL2() Examples

The following are 15 code examples of faiss.GpuIndexFlatL2(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module faiss , or try the search function .
Example #1
Source File: faiss_kmeans.py    From cdp with MIT License 10 votes vote down vote up
def run_kmeans(x, nmb_clusters, verbose=False):
    """Runs kmeans on 1 GPU.
    Args:
        x: data
        nmb_clusters (int): number of clusters
    Returns:
        list: ids of data in each cluster
    """
    n_data, d = x.shape

    # faiss implementation of k-means
    clus = faiss.Clustering(d, nmb_clusters)

    # Change faiss seed at each k-means so that the randomly picked
    # initialization centroids do not correspond to the same feature ids
    # from an epoch to another.
    clus.seed = np.random.randint(1234)

    clus.niter = 20
    clus.max_points_per_centroid = 10000000
    res = faiss.StandardGpuResources()
    flat_config = faiss.GpuIndexFlatConfig()
    flat_config.useFloat16 = False
    flat_config.device = 0
    index = faiss.GpuIndexFlatL2(res, d, flat_config)

    # perform the training
    clus.train(x, index)
    _, I = index.search(x, 1)
    losses = faiss.vector_to_array(clus.obj)
    if verbose:
        print('k-means loss evolution: {0}'.format(losses))

    return [int(n[0]) for n in I], losses[-1] 
Example #2
Source File: faissext.py    From metric-learning-divide-and-conquer with GNU Lesser General Public License v3.0 7 votes vote down vote up
def test_knn_search(size=10000, gpu_id=None):
    x = np.random.rand(size, 512)
    x = x.reshape(x.shape[0], -1).astype('float32')
    d = x.shape[1]

    tic = time.time()
    if gpu_id is None:
        index = faiss.IndexFlatL2(d)
    else:
        cfg = faiss.GpuIndexFlatConfig()
        cfg.useFloat16 = False
        cfg.device = gpu_id

        flat_config = [cfg]
        resources = [faiss.StandardGpuResources()]
        index = faiss.GpuIndexFlatL2(resources[0], d, flat_config[0])
    index.add(x)
    print('Index built in {} sec'.format(time.time() - tic))
    distances, I = index.search(x, 21)
    print('Searched in {} sec'.format(time.time() - tic))
    print(distances.shape)
    print(I.shape)
    print(distances[:5])
    print(I[:5]) 
Example #3
Source File: losses.py    From SO-Net with MIT License 6 votes vote down vote up
def build_nn_index(self, database):
        '''
        :param database: numpy array of Nx3
        :return: Faiss index, in CPU
        '''
        # index = faiss.GpuIndexFlatL2(self.res, self.dimension, self.flat_config)  # dimension is 3
        index_cpu = faiss.IndexFlatL2(self.dimension)
        index = faiss.index_cpu_to_gpu(self.res, self.opt.gpu_id, index_cpu)
        index.add(database)
        return index 
Example #4
Source File: nmi.py    From classification_metric_learning with Apache License 2.0 6 votes vote down vote up
def test_nmi_faiss(embeddings, labels):
    res = faiss.StandardGpuResources()
    flat_config = faiss.GpuIndexFlatConfig()
    flat_config.device = 0

    unique_labels = np.unique(labels)
    d = embeddings.shape[1]
    kmeans = faiss.Clustering(d, unique_labels.size)
    kmeans.verbose = True
    kmeans.niter = 300
    kmeans.nredo = 10
    kmeans.seed = 0

    index = faiss.GpuIndexFlatL2(res, d, flat_config)

    kmeans.train(embeddings, index)

    dists, pred_labels = index.search(embeddings, 1)

    pred_labels = pred_labels.squeeze()

    nmi = normalized_mutual_info_score(labels, pred_labels)

    print("NMI: {}".format(nmi))
    return nmi 
Example #5
Source File: faissext.py    From metric-learning-divide-and-conquer with GNU Lesser General Public License v3.0 5 votes vote down vote up
def reserve_faiss_gpu_memory(gpu_id=0):
    """
    Reserves around 2.4 Gb memory on Titan Xp.
    `r = reserve_faiss_gpu_memory()`
    To release the memory run `del r`

    Something like 200 Mb will still be hold afterwards.
    """
    res = faiss.StandardGpuResources()
    cfg = faiss.GpuIndexFlatConfig()
    cfg.useFloat16 = False
    cfg.device = gpu_id
    index = faiss.GpuIndexFlatL2(res, 2048, cfg)
    return index, res 
Example #6
Source File: knn.py    From cdp with MIT License 5 votes vote down vote up
def knn_faiss(feats, k):
    import torch
    import faiss
    import pdb
    N, dim = feats.shape
    res = faiss.StandardGpuResources()
    feats /= np.linalg.norm(feats).reshape(-1, 1)
    flat_config = faiss.GpuIndexFlatConfig()
    flat_config.device = int(torch.cuda.device_count()) - 1
    index = faiss.GpuIndexFlatL2(res, dim, flat_config)
    index.add(feats)
    D, I = index.search(feats, k + 1)
    pdb.set_trace() 
Example #7
Source File: HardNetClassicalHardNegMiningSiftInit.py    From hardnet with MIT License 5 votes vote down vote up
def BuildKNNGraphByFAISS_GPU(db,k):
    dbsize, dim = db.shape
    flat_config = faiss.GpuIndexFlatConfig()
    flat_config.device = 0
    res = faiss.StandardGpuResources()
    nn = faiss.GpuIndexFlatL2(res, dim, flat_config)
    nn.add(db)
    dists,idx = nn.search(db, k+1)
    return idx[:,1:],dists[:,1:] 
Example #8
Source File: HardNetClassicalHardNegMining.py    From hardnet with MIT License 5 votes vote down vote up
def BuildKNNGraphByFAISS_GPU(db,k):
    dbsize, dim = db.shape
    flat_config = faiss.GpuIndexFlatConfig()
    flat_config.device = 0
    res = faiss.StandardGpuResources()
    nn = faiss.GpuIndexFlatL2(res, dim, flat_config)
    nn.add(db)
    dists,idx = nn.search(db, k+1)
    return idx[:,1:],dists[:,1:] 
Example #9
Source File: lossess.py    From RL-GAN-Net with MIT License 5 votes vote down vote up
def build_nn_index(self, database):
        '''
        :param database: numpy array of Nx3
        :return: Faiss index, in CPU
        '''
        # index = faiss.GpuIndexFlatL2(self.res, self.dimension, self.flat_config)  # dimension is 3
        index_cpu = faiss.IndexFlatL2(self.dimension)
        index = faiss.index_cpu_to_gpu(self.res, self.opt.gpu_id, index_cpu)
        index.add(database)
        return index 
Example #10
Source File: lossess.py    From RL-GAN-Net with MIT License 5 votes vote down vote up
def build_nn_index(self, database):
        '''
        :param database: numpy array of Nx3
        :return: Faiss index, in CPU
        '''
        # index = faiss.GpuIndexFlatL2(self.res, self.dimension, self.flat_config)  # dimension is 3
        index_cpu = faiss.IndexFlatL2(self.dimension)
        index = faiss.index_cpu_to_gpu(self.res, self.opt.gpu_id, index_cpu)
        index.add(database)
        return index 
Example #11
Source File: retrieval.py    From classification_metric_learning with Apache License 2.0 5 votes vote down vote up
def _retrieve_knn_faiss_gpu_euclidean(query_embeddings, db_embeddings, k, gpu_id=0):
    """
        Retrieve k nearest neighbor based on inner product

        Args:
            query_embeddings:           numpy array of size [NUM_QUERY_IMAGES x EMBED_SIZE]
            db_embeddings:              numpy array of size [NUM_DB_IMAGES x EMBED_SIZE]
            k:                          number of nn results to retrieve excluding query
            gpu_id:                     gpu device id to use for nearest neighbor (if possible for `metric` chosen)

        Returns:
            dists:                      numpy array of size [NUM_QUERY_IMAGES x k], distances of k nearest neighbors
                                        for each query
            retrieved_db_indices:       numpy array of size [NUM_QUERY_IMAGES x k], indices of k nearest neighbors
                                        for each query
    """
    import faiss

    res = faiss.StandardGpuResources()
    flat_config = faiss.GpuIndexFlatConfig()
    flat_config.device = gpu_id

    # Evaluate with inner product
    index = faiss.GpuIndexFlatL2(res, db_embeddings.shape[1], flat_config)
    index.add(db_embeddings)
    # retrieved k+1 results in case that query images are also in the db
    dists, retrieved_result_indices = index.search(query_embeddings, k + 1)

    return dists, retrieved_result_indices 
Example #12
Source File: dknn.py    From cleverhans with MIT License 5 votes vote down vote up
def _init_faiss(
    self,
    dimension,
  ):
    import faiss

    res = faiss.StandardGpuResources()

    self._faiss_index = faiss.GpuIndexFlatL2(
      res,
      dimension,
    ) 
Example #13
Source File: faissext.py    From metric-learning-divide-and-conquer with GNU Lesser General Public License v3.0 4 votes vote down vote up
def train_kmeans(x, num_clusters=1000, gpu_ids=None, niter=100, nredo=1, verbose=0):
    """
    Runs k-means clustering on one or several GPUs
    """
    assert np.all(~np.isnan(x)), 'x contains NaN'
    assert np.all(np.isfinite(x)), 'x contains Inf'
    if isinstance(gpu_ids, int):
        gpu_ids = [gpu_ids]
    assert gpu_ids is None or len(gpu_ids)

    d = x.shape[1]
    kmeans = faiss.Clustering(d, num_clusters)
    kmeans.verbose = bool(verbose)
    kmeans.niter = niter
    kmeans.nredo = nredo

    # otherwise the kmeans implementation sub-samples the training set
    kmeans.max_points_per_centroid = 10000000

    if gpu_ids is not None:
        res = [faiss.StandardGpuResources() for i in gpu_ids]

        flat_config = []
        for i in gpu_ids:
            cfg = faiss.GpuIndexFlatConfig()
            cfg.useFloat16 = False
            cfg.device = i
            flat_config.append(cfg)

        if len(gpu_ids) == 1:
            index = faiss.GpuIndexFlatL2(res[0], d, flat_config[0])
        else:
            indexes = [faiss.GpuIndexFlatL2(res[i], d, flat_config[i])
                       for i in range(len(gpu_ids))]
            index = faiss.IndexProxy()
            for sub_index in indexes:
                index.addIndex(sub_index)
    else:
        index = faiss.IndexFlatL2(d)

    # perform the training
    kmeans.train(x, index)
    centroids = faiss.vector_float_to_array(kmeans.centroids)

    objective = faiss.vector_float_to_array(kmeans.obj)
    #logging.debug("Final objective: %.4g" % objective[-1])

    return centroids.reshape(num_clusters, d) 
Example #14
Source File: faissext.py    From metric-learning-divide-and-conquer with GNU Lesser General Public License v3.0 4 votes vote down vote up
def find_nearest_neighbors(x, queries=None, k=5, gpu_id=None):
    """
    Find k nearest neighbors for each of the n examples.
    Distances are computed using Squared Euclidean distance metric.

    Arguments:
    ----------
    queries
    x (ndarray): N examples to search within. [N x d].
    gpu_id (int): use CPU if None else use GPU with the specified id.
    queries (ndarray): find nearest neigbor for each query example. [M x d] matrix
        If None than find k nearest neighbors for each row of x
        (excluding self exampels).
    k (int): number of nearest neighbors to find.

    Return
    I (ndarray): Indices of the nearest neighnpors. [M x k]
    distances (ndarray): Distances to the nearest neighbors. [M x k]

    """
    if gpu_id is not None and not isinstance(gpu_id, int):
        raise ValueError('gpu_id must be None or int')
    x = np.asarray(x.reshape(x.shape[0], -1), dtype=np.float32)
    remove_self = False # will we have queries in the search results?
    if queries is None:
        remove_self = True
        queries = x
        k += 1

    d = x.shape[1]

    tic = time.time()
    if gpu_id is None:
        logging.debug('FAISS: cpu::find {} nearest neighbors'\
                     .format(k - int(remove_self)))
        index = faiss.IndexFlatL2(d)
    else:
        logging.debug('FAISS: gpu[{}]::find {} nearest neighbors'\
                     .format(gpu_id, k - int(remove_self)))
        cfg = faiss.GpuIndexFlatConfig()
        cfg.useFloat16 = False
        cfg.device = gpu_id

        flat_config = [cfg]
        resources = [faiss.StandardGpuResources()]
        index = faiss.GpuIndexFlatL2(resources[0], d, flat_config[0])
    index.add(x)
    distances, nns = index.search(queries, k)
    if remove_self:
        for i in range(len(nns)):
            indices = np.nonzero(nns[i, :] != i)[0]
            indices.sort()
            if len(indices) > k - 1:
                indices = indices[:-1]
            nns[i, :-1] = nns[i, indices]
            distances[i, :-1] = distances[i, indices]
        nns = nns[:, :-1]
        distances = distances[:, :-1]
    logging.debug('FAISS: Neighbors search total elapsed time: {:.2f} sec'.format(time.time() - tic))
    return nns, distances 
Example #15
Source File: utils.py    From DeMa-BWE with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def get_nn_avg_dist_mog(emb, query, knn):
    """
    Compute the average distance of the `knn` nearest neighbors
    for a given set of embeddings and queries.
    Use Faiss if available.

    emb has divided sqrt(2) * var
    """
    if FAISS_AVAILABLE:
        emb = emb.cpu().numpy()
        query = query.cpu().numpy()
        if hasattr(faiss, 'StandardGpuResources'):
            # gpu mode
            res = faiss.StandardGpuResources()
            config = faiss.GpuIndexFlatConfig()
            config.device = 0
            index = faiss.GpuIndexFlatL2(res, emb.shape[1], config)
        else:
            # cpu mode
            index = faiss.IndexFlatL2(emb.shape[1])
        index.add(emb)
        # Ad-hoc implementation
        topK = 1000
        temp = 2.
        topK = 10
        distances, idxes = index.search(query, topK)
        return distances.mean(1)
        #query_idx = np.tile(np.arange(query.shape[0]) + 1, (topK, 1)).transpose()
        #rank_diff = abs(np.log(idxes + 1) - np.log(query_idx)) / temp
        #mog_distances_sorted = np.sort(distances + rank_diff)[:, :knn]
        # return: qN, knn
        #return mog_distances_sorted.mean(1)
    else:
        bs = 1024
        all_distances = []
        emb = emb.transpose(0, 1).contiguous()
        for i in range(0, query.shape[0], bs):
            distances = query[i:i + bs].mm(emb)
            best_distances, _ = distances.topk(knn, dim=1, largest=True, sorted=True)
            all_distances.append(best_distances.mean(1).cpu())
        all_distances = torch.cat(all_distances)
        return all_distances.numpy()