Python faiss.METRIC_INNER_PRODUCT Examples

The following are 5 code examples of faiss.METRIC_INNER_PRODUCT(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module faiss , or try the search function .
Example #1
Source File: train.py    From Mosaicer with MIT License 8 votes vote down vote up
def faiss_train(fn_feature, root_path, index_path='train.index', id_path='data.json'):
    folder_names = os.listdir(root_path)
    logging.info('directory %s ', folder_names)
    ids = None
    vals = None
    id_json = {}
    print(folder_names)
    for idx, folder_name in enumerate(folder_names):
        id_json[str(idx)] = folder_name
        now_path = os.path.join(root_path, folder_name)
        feature_val = fn_feature(now_path)
        vals = np.concatenate((feature_val, vals), axis=0) if vals is not None else feature_val
        id_np = np.asarray([idx] * feature_val.shape[0])
        ids = np.concatenate((id_np, ids), axis=0) if ids is not None else id_np
    N, dim = vals.shape
    x = int(2 * math.sqrt(N))
    index_description = "IVF{x},Flat".format(x=x)
    index = faiss.index_factory(7 * 7 * 512, index_description, faiss.METRIC_INNER_PRODUCT)
    index.train(vals)
    index.add_with_ids(vals, ids)
    faiss.write_index(index, index_path)
    with open(id_path, 'w', encoding='utf-8') as f:
        json.dump(id_json, f, ensure_ascii=False, indent=4)
    print(id_json)
    return index, id_json 
Example #2
Source File: knn.py    From homura with Apache License 2.0 5 votes vote down vote up
def _faiss_knn(keys: torch.Tensor,
               queries: torch.Tensor,
               num_neighbors: int,
               distance: str) -> Tuple[torch.Tensor, torch.Tensor]:
    # https://github.com/facebookresearch/XLM/blob/master/src/model/memory/utils.py
    if not is_faiss_available():
        raise RuntimeError("faiss_knn requires faiss-gpu")
    import faiss

    assert distance in ['dot_product', 'l2']
    assert keys.size(1) == queries.size(1)

    metric = faiss.METRIC_INNER_PRODUCT if distance == 'dot_product' else faiss.METRIC_L2

    k_ptr = _tensor_to_ptr(keys)
    q_ptr = _tensor_to_ptr(queries)

    scores = keys.new_zeros((queries.size(0), num_neighbors), dtype=torch.float32)
    indices = keys.new_zeros((queries.size(0), num_neighbors), dtype=torch.int64)

    s_ptr = _tensor_to_ptr(scores)
    i_ptr = _tensor_to_ptr(indices)

    faiss.bruteForceKnn(FAISS_RES, metric,
                        k_ptr, True, keys.size(0),
                        q_ptr, True, queries.size(0),
                        queries.size(1), num_neighbors, s_ptr, i_ptr)
    return scores, indices 
Example #3
Source File: cdp.py    From capture_reid with Apache License 2.0 5 votes vote down vote up
def cluster(features, th_knn, max_size=300, labels=None):
    '''
    与face-train不同,这里聚类的相似度没有经过1-转换
    :param features:
    :param th_knn:
    :param max_size:
    :return:
    '''
    k = 80
    nprobe = 8

    # knn
    size, dim = features.shape
    metric = faiss.METRIC_INNER_PRODUCT
    nlist = min(4096, 8 * round(math.sqrt(size)))
    if size < 4 * 10000:
        fac_str = "Flat"  # same
    elif size < 80 * 10000:
        fac_str = "IVF" + str(nlist) + ",Flat"  # same
    elif size < 200 * 10000:
        fac_str = "IVF16384,Flat"  # same
    else:
        fac_str = "IVF16384,PQ8"  # same
    logger.info("cdp cluster fac str %s", fac_str)
    index = faiss.index_factory(dim, fac_str, metric)
    index.train(features)
    index.nprobe = min(nprobe, nlist)
    assert index.is_trained
    logger.info('cdp cluster nlist: {}, nprobe: {}'.format(nlist, nprobe))
    index.add(features)

    sims, ners = index.search(features, k=k)
    if "Flat" not in fac_str:
        sims = sim_by_feature(features, features, ners)
    knns = np.concatenate([sims[:, np.newaxis].astype(np.float32), ners[:, np.newaxis].astype(np.float32)], axis=1)
    # del features

    return cluster_by_knns(knns, features, th_knn, max_size, labels) 
Example #4
Source File: approximate_als.py    From implicit with MIT License 4 votes vote down vote up
def fit(self, Ciu, show_progress=True):
        import faiss

        # train the model
        super(FaissAlternatingLeastSquares, self).fit(Ciu, show_progress)

        self.quantizer = faiss.IndexFlat(self.factors)

        if self.use_gpu:
            self.gpu_resources = faiss.StandardGpuResources()

        item_factors = self.item_factors.astype('float32')

        if self.approximate_recommend:
            log.debug("Building faiss recommendation index")

            # build up a inner product index here
            if self.use_gpu:
                index = faiss.GpuIndexIVFFlat(self.gpu_resources, self.factors, self.nlist,
                                              faiss.METRIC_INNER_PRODUCT)
            else:
                index = faiss.IndexIVFFlat(self.quantizer, self.factors, self.nlist,
                                           faiss.METRIC_INNER_PRODUCT)

            index.train(item_factors)
            index.add(item_factors)
            index.nprobe = self.nprobe
            self.recommend_index = index

        if self.approximate_similar_items:
            log.debug("Building faiss similar items index")

            # likewise build up cosine index for similar_items, using an inner product
            # index on normalized vectors`
            norms = numpy.linalg.norm(item_factors, axis=1)
            norms[norms == 0] = 1e-10

            normalized = (item_factors.T / norms).T.astype('float32')
            if self.use_gpu:
                index = faiss.GpuIndexIVFFlat(self.gpu_resources, self.factors, self.nlist,
                                              faiss.METRIC_INNER_PRODUCT)
            else:
                index = faiss.IndexIVFFlat(self.quantizer, self.factors, self.nlist,
                                           faiss.METRIC_INNER_PRODUCT)

            index.train(normalized)
            index.add(normalized)
            index.nprobe = self.nprobe
            self.similar_items_index = index 
Example #5
Source File: knn.py    From learn-to-cluster with MIT License 4 votes vote down vote up
def __init__(self,
                 feats,
                 k,
                 index_path='',
                 index_key='',
                 nprobe=128,
                 omp_num_threads=None,
                 rebuild_index=True,
                 verbose=True,
                 **kwargs):
        import faiss
        if omp_num_threads is not None:
            faiss.omp_set_num_threads(omp_num_threads)
        self.verbose = verbose
        with Timer('[faiss] build index', verbose):
            if index_path != '' and not rebuild_index and os.path.exists(
                    index_path):
                print('[faiss] read index from {}'.format(index_path))
                index = faiss.read_index(index_path)
            else:
                feats = feats.astype('float32')
                size, dim = feats.shape
                index = faiss.IndexFlatIP(dim)
                if index_key != '':
                    assert index_key.find(
                        'HNSW') < 0, 'HNSW returns distances insted of sims'
                    metric = faiss.METRIC_INNER_PRODUCT
                    nlist = min(4096, 8 * round(math.sqrt(size)))
                    if index_key == 'IVF':
                        quantizer = index
                        index = faiss.IndexIVFFlat(quantizer, dim, nlist,
                                                   metric)
                    else:
                        index = faiss.index_factory(dim, index_key, metric)
                    if index_key.find('Flat') < 0:
                        assert not index.is_trained
                    index.train(feats)
                    index.nprobe = min(nprobe, nlist)
                    assert index.is_trained
                    print('nlist: {}, nprobe: {}'.format(nlist, nprobe))
                index.add(feats)
                if index_path != '':
                    print('[faiss] save index to {}'.format(index_path))
                    mkdir_if_no_exists(index_path)
                    faiss.write_index(index, index_path)
        with Timer('[faiss] query topk {}'.format(k), verbose):
            knn_ofn = index_path + '.npz'
            if os.path.exists(knn_ofn):
                print('[faiss] read knns from {}'.format(knn_ofn))
                self.knns = np.load(knn_ofn)['data']
            else:
                sims, nbrs = index.search(feats, k=k)
                self.knns = [(np.array(nbr, dtype=np.int32),
                              1 - np.array(sim, dtype=np.float32))
                             for nbr, sim in zip(nbrs, sims)]