Python faiss.StandardGpuResources() Examples

The following are 26 code examples of faiss.StandardGpuResources(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module faiss , or try the search function .
Example #1
Source File: faiss_kmeans.py    From cdp with MIT License 10 votes vote down vote up
def run_kmeans(x, nmb_clusters, verbose=False):
    """Runs kmeans on 1 GPU.
    Args:
        x: data
        nmb_clusters (int): number of clusters
    Returns:
        list: ids of data in each cluster
    """
    n_data, d = x.shape

    # faiss implementation of k-means
    clus = faiss.Clustering(d, nmb_clusters)

    # Change faiss seed at each k-means so that the randomly picked
    # initialization centroids do not correspond to the same feature ids
    # from an epoch to another.
    clus.seed = np.random.randint(1234)

    clus.niter = 20
    clus.max_points_per_centroid = 10000000
    res = faiss.StandardGpuResources()
    flat_config = faiss.GpuIndexFlatConfig()
    flat_config.useFloat16 = False
    flat_config.device = 0
    index = faiss.GpuIndexFlatL2(res, d, flat_config)

    # perform the training
    clus.train(x, index)
    _, I = index.search(x, 1)
    losses = faiss.vector_to_array(clus.obj)
    if verbose:
        print('k-means loss evolution: {0}'.format(losses))

    return [int(n[0]) for n in I], losses[-1] 
Example #2
Source File: faissext.py    From metric-learning-divide-and-conquer with GNU Lesser General Public License v3.0 7 votes vote down vote up
def test_knn_search(size=10000, gpu_id=None):
    x = np.random.rand(size, 512)
    x = x.reshape(x.shape[0], -1).astype('float32')
    d = x.shape[1]

    tic = time.time()
    if gpu_id is None:
        index = faiss.IndexFlatL2(d)
    else:
        cfg = faiss.GpuIndexFlatConfig()
        cfg.useFloat16 = False
        cfg.device = gpu_id

        flat_config = [cfg]
        resources = [faiss.StandardGpuResources()]
        index = faiss.GpuIndexFlatL2(resources[0], d, flat_config[0])
    index.add(x)
    print('Index built in {} sec'.format(time.time() - tic))
    distances, I = index.search(x, 21)
    print('Searched in {} sec'.format(time.time() - tic))
    print(distances.shape)
    print(I.shape)
    print(distances[:5])
    print(I[:5]) 
Example #3
Source File: faiss_index.py    From pytorch-dnc with MIT License 6 votes vote down vote up
def __init__(self, cell_size=20, nr_cells=1024, K=4, num_lists=32, probes=32, res=None, train=None, gpu_id=-1):
    super(FAISSIndex, self).__init__()
    self.cell_size = cell_size
    self.nr_cells = nr_cells
    self.probes = probes
    self.K = K
    self.num_lists = num_lists
    self.gpu_id = gpu_id

    # BEWARE: if this variable gets deallocated, FAISS crashes
    self.res = res if res else faiss.StandardGpuResources()
    self.res.setTempMemoryFraction(0.01)
    if self.gpu_id != -1:
      self.res.initializeForDevice(self.gpu_id)

    nr_samples = self.nr_cells * 100 * self.cell_size
    train = train if train is not None else T.randn(self.nr_cells * 100, self.cell_size)

    self.index = faiss.GpuIndexIVFFlat(self.res, self.cell_size, self.num_lists, faiss.METRIC_L2)
    self.index.setNumProbes(self.probes)
    self.train(train) 
Example #4
Source File: run_index.py    From denspi with Apache License 2.0 6 votes vote down vote up
def train_index(data, quantizer_path, trained_index_path, fine_quant='SQ8', cuda=False):
    quantizer = faiss.read_index(quantizer_path)
    if fine_quant == 'SQ8':
        trained_index = faiss.IndexIVFScalarQuantizer(quantizer, quantizer.d, quantizer.ntotal, faiss.METRIC_L2)
    elif fine_quant.startswith('PQ'):
        m = int(fine_quant[2:])
        trained_index = faiss.IndexIVFPQ(quantizer, quantizer.d, quantizer.ntotal, m, 8)
    else:
        raise ValueError(fine_quant)

    if cuda:
        if fine_quant.startswith('PQ'):
            print('PQ not supported on GPU; keeping CPU.')
        else:
            res = faiss.StandardGpuResources()
            gpu_index = faiss.index_cpu_to_gpu(res, 0, trained_index)
            gpu_index.train(data)
            trained_index = faiss.index_gpu_to_cpu(gpu_index)
    else:
        trained_index.train(data)
    faiss.write_index(trained_index, trained_index_path) 
Example #5
Source File: run_index.py    From denspi with Apache License 2.0 6 votes vote down vote up
def train_coarse_quantizer(data, quantizer_path, num_clusters, hnsw=False, niter=10, cuda=False):
    d = data.shape[1]

    index_flat = faiss.IndexFlatL2(d)
    # make it into a gpu index
    if cuda:
        res = faiss.StandardGpuResources()
        index_flat = faiss.index_cpu_to_gpu(res, 0, index_flat)
    clus = faiss.Clustering(d, num_clusters)
    clus.verbose = True
    clus.niter = niter
    clus.train(data, index_flat)
    centroids = faiss.vector_float_to_array(clus.centroids)
    centroids = centroids.reshape(num_clusters, d)

    if hnsw:
        quantizer = faiss.IndexHNSWFlat(d, 32)
        quantizer.hnsw.efSearch = 128
        quantizer.train(centroids)
        quantizer.add(centroids)
    else:
        quantizer = faiss.IndexFlatL2(d)
        quantizer.add(centroids)

    faiss.write_index(quantizer, quantizer_path) 
Example #6
Source File: nmi.py    From classification_metric_learning with Apache License 2.0 6 votes vote down vote up
def test_nmi_faiss(embeddings, labels):
    res = faiss.StandardGpuResources()
    flat_config = faiss.GpuIndexFlatConfig()
    flat_config.device = 0

    unique_labels = np.unique(labels)
    d = embeddings.shape[1]
    kmeans = faiss.Clustering(d, unique_labels.size)
    kmeans.verbose = True
    kmeans.niter = 300
    kmeans.nredo = 10
    kmeans.seed = 0

    index = faiss.GpuIndexFlatL2(res, d, flat_config)

    kmeans.train(embeddings, index)

    dists, pred_labels = index.search(embeddings, 1)

    pred_labels = pred_labels.squeeze()

    nmi = normalized_mutual_info_score(labels, pred_labels)

    print("NMI: {}".format(nmi))
    return nmi 
Example #7
Source File: lossess.py    From RL-GAN-Net with MIT License 5 votes vote down vote up
def __init__(self, opt):
        super(ChamferLoss, self).__init__()
        self.opt = opt
        self.dimension = 3
        self.k = 1

        # we need only a StandardGpuResources per GPU
        self.res = faiss.StandardGpuResources()
        self.res.setTempMemoryFraction(0.1)
        self.flat_config = faiss.GpuIndexFlatConfig()
        self.flat_config.device = opt.gpu_id

        # place holder
        self.forward_loss = torch.FloatTensor([0])
        self.backward_loss = torch.FloatTensor([0]) 
Example #8
Source File: dknn.py    From cleverhans with MIT License 5 votes vote down vote up
def _init_faiss(
    self,
    dimension,
  ):
    import faiss

    res = faiss.StandardGpuResources()

    self._faiss_index = faiss.GpuIndexFlatL2(
      res,
      dimension,
    ) 
Example #9
Source File: embedding_based_indexer.py    From forte with Apache License 2.0 5 votes vote down vote up
def load(self, path: str, device: Optional[str] = None) -> None:
        r"""Load the index and meta data from ``path`` directory.

        Args:
            path (str): A path to the directory to load the index from.
            device (optional str): Device to load the index into. If None,
                value will be picked from hyperparameters.

        """

        if not os.path.exists(path):
            raise ValueError(f"Failed to load the index. {path} "
                             f"does not exist.")

        cpu_index = faiss.read_index(f"{path}/index.faiss")

        if device is None:
            device = self._config.device

        if device.lower().startswith("gpu"):
            gpu_resource = faiss.StandardGpuResources()
            gpu_id = int(device[3:])
            if faiss.get_num_gpus() < gpu_id:
                gpu_id = 0
                logging.warning("Cannot create the index on device %s. "
                                "Total number of GPUs on this machine is "
                                "%s. Using the gpu0 for the index.",
                                device, faiss.get_num_gpus())
            self._index = faiss.index_cpu_to_gpu(
                gpu_resource, gpu_id, cpu_index)

        else:
            self._index = cpu_index

        with open(f"{path}/index.meta_data", "rb") as f:
            self._meta_data = pickle.load(f) 
Example #10
Source File: embedding_based_indexer.py    From forte with Apache License 2.0 5 votes vote down vote up
def __init__(self, config: Optional[Union[Dict, Config]] = None):
        super().__init__()
        self._config = Config(hparams=config,
                              default_hparams=self.default_configs())
        self._meta_data: Dict[int, str] = {}

        index_type = self._config.index_type
        device = self._config.device
        dim = self._config.dim

        if device.lower().startswith("gpu"):
            if isinstance(index_type, str) and not index_type.startswith("Gpu"):
                index_type = "Gpu" + index_type

            index_class = utils.get_class(index_type, module_paths=["faiss"])
            gpu_resource = faiss.StandardGpuResources()
            gpu_id = int(device[3:])
            if faiss.get_num_gpus() < gpu_id:
                gpu_id = 0
                logging.warning("Cannot create the index on device %s. "
                                "Total number of GPUs on this machine is "
                                "%s. Using gpu0 for the index.",
                                self._config.device, faiss.get_num_gpus())
            config_class_name = \
                self.INDEX_TYPE_TO_CONFIG.get(index_class.__name__)
            config = utils.get_class(config_class_name,  # type: ignore
                                     module_paths=["faiss"])()
            config.device = gpu_id
            self._index = index_class(gpu_resource, dim, config)

        else:
            index_class = utils.get_class(index_type, module_paths=["faiss"])
            self._index = index_class(dim) 
Example #11
Source File: retrieval.py    From classification_metric_learning with Apache License 2.0 5 votes vote down vote up
def _retrieve_knn_faiss_gpu_euclidean(query_embeddings, db_embeddings, k, gpu_id=0):
    """
        Retrieve k nearest neighbor based on inner product

        Args:
            query_embeddings:           numpy array of size [NUM_QUERY_IMAGES x EMBED_SIZE]
            db_embeddings:              numpy array of size [NUM_DB_IMAGES x EMBED_SIZE]
            k:                          number of nn results to retrieve excluding query
            gpu_id:                     gpu device id to use for nearest neighbor (if possible for `metric` chosen)

        Returns:
            dists:                      numpy array of size [NUM_QUERY_IMAGES x k], distances of k nearest neighbors
                                        for each query
            retrieved_db_indices:       numpy array of size [NUM_QUERY_IMAGES x k], indices of k nearest neighbors
                                        for each query
    """
    import faiss

    res = faiss.StandardGpuResources()
    flat_config = faiss.GpuIndexFlatConfig()
    flat_config.device = gpu_id

    # Evaluate with inner product
    index = faiss.GpuIndexFlatL2(res, db_embeddings.shape[1], flat_config)
    index.add(db_embeddings)
    # retrieved k+1 results in case that query images are also in the db
    dists, retrieved_result_indices = index.search(query_embeddings, k + 1)

    return dists, retrieved_result_indices 
Example #12
Source File: retrieval.py    From classification_metric_learning with Apache License 2.0 5 votes vote down vote up
def _retrieve_knn_faiss_gpu_inner_product(query_embeddings, db_embeddings, k, gpu_id=0):
    """
        Retrieve k nearest neighbor based on inner product

        Args:
            query_embeddings:           numpy array of size [NUM_QUERY_IMAGES x EMBED_SIZE]
            db_embeddings:              numpy array of size [NUM_DB_IMAGES x EMBED_SIZE]
            k:                          number of nn results to retrieve excluding query
            gpu_id:                     gpu device id to use for nearest neighbor (if possible for `metric` chosen)

        Returns:
            dists:                      numpy array of size [NUM_QUERY_IMAGES x k], distances of k nearest neighbors
                                        for each query
            retrieved_db_indices:       numpy array of size [NUM_QUERY_IMAGES x k], indices of k nearest neighbors
                                        for each query
    """
    import faiss

    res = faiss.StandardGpuResources()
    flat_config = faiss.GpuIndexFlatConfig()
    flat_config.device = gpu_id

    # Evaluate with inner product
    index = faiss.GpuIndexFlatIP(res, db_embeddings.shape[1], flat_config)
    index.add(db_embeddings)
    # retrieved k+1 results in case that query images are also in the db
    dists, retrieved_result_indices = index.search(query_embeddings, k + 1)

    return dists, retrieved_result_indices 
Example #13
Source File: faiss_gpu.py    From ann-benchmarks with MIT License 5 votes vote down vote up
def __init__(self, n_bits, n_probes):
        self.name = 'FaissGPU(n_bits={}, n_probes={})'.format(
            n_bits, n_probes)
        self._n_bits = n_bits
        self._n_probes = n_probes
        self._res = faiss.StandardGpuResources()
        self._index = None 
Example #14
Source File: lossess.py    From RL-GAN-Net with MIT License 5 votes vote down vote up
def __init__(self, opt):
        super(ChamferLoss, self).__init__()
        self.opt = opt
        self.dimension = 3
        self.k = 1

        # we need only a StandardGpuResources per GPU
        self.res = faiss.StandardGpuResources()
        self.res.setTempMemoryFraction(0.1)
        self.flat_config = faiss.GpuIndexFlatConfig()
        self.flat_config.device = opt.gpu_id

        # place holder
        self.forward_loss = torch.FloatTensor([0])
        self.backward_loss = torch.FloatTensor([0]) 
Example #15
Source File: _faiss.py    From mars with Apache License 2.0 5 votes vote down vote up
def _index_to_gpu(index, device_id):  # pragma: no cover
    res = faiss.StandardGpuResources()
    return faiss.index_cpu_to_gpu(res, device_id, index) 
Example #16
Source File: HardNetClassicalHardNegMining.py    From hardnet with MIT License 5 votes vote down vote up
def BuildKNNGraphByFAISS_GPU(db,k):
    dbsize, dim = db.shape
    flat_config = faiss.GpuIndexFlatConfig()
    flat_config.device = 0
    res = faiss.StandardGpuResources()
    nn = faiss.GpuIndexFlatL2(res, dim, flat_config)
    nn.add(db)
    dists,idx = nn.search(db, k+1)
    return idx[:,1:],dists[:,1:] 
Example #17
Source File: HardNetClassicalHardNegMiningSiftInit.py    From hardnet with MIT License 5 votes vote down vote up
def BuildKNNGraphByFAISS_GPU(db,k):
    dbsize, dim = db.shape
    flat_config = faiss.GpuIndexFlatConfig()
    flat_config.device = 0
    res = faiss.StandardGpuResources()
    nn = faiss.GpuIndexFlatL2(res, dim, flat_config)
    nn.add(db)
    dists,idx = nn.search(db, k+1)
    return idx[:,1:],dists[:,1:] 
Example #18
Source File: utils.py    From DeMa-BWE with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def get_nn_avg_dist(emb, query, knn):
    """
    Compute the average distance of the `knn` nearest neighbors
    for a given set of embeddings and queries.
    Use Faiss if available.
    """
    if FAISS_AVAILABLE:
        emb = emb.cpu().numpy()
        query = query.cpu().numpy()
        if hasattr(faiss, 'StandardGpuResources'):
            # gpu mode
            res = faiss.StandardGpuResources()
            config = faiss.GpuIndexFlatConfig()
            config.device = 0
            index = faiss.GpuIndexFlatIP(res, emb.shape[1], config)
        else:
            # cpu mode
            index = faiss.IndexFlatIP(emb.shape[1])
        index.add(emb)
        distances, _ = index.search(query, knn)
        return distances.mean(1)
    else:
        bs = 1024
        all_distances = []
        emb = emb.transpose(0, 1).contiguous()
        for i in range(0, query.shape[0], bs):
            distances = query[i:i + bs].mm(emb)
            best_distances, _ = distances.topk(knn, dim=1, largest=True, sorted=True)
            all_distances.append(best_distances.mean(1).cpu())
        all_distances = torch.cat(all_distances)
        return all_distances.numpy() 
Example #19
Source File: losses.py    From SO-Net with MIT License 5 votes vote down vote up
def __init__(self, opt):
        super(ChamferLoss, self).__init__()
        self.opt = opt
        self.dimension = 3
        self.k = 1

        # we need only a StandardGpuResources per GPU
        self.res = faiss.StandardGpuResources()
        self.res.setTempMemoryFraction(0.1)
        self.flat_config = faiss.GpuIndexFlatConfig()
        self.flat_config.device = opt.gpu_id

        # place holder
        self.forward_loss = torch.FloatTensor([0])
        self.backward_loss = torch.FloatTensor([0]) 
Example #20
Source File: knn.py    From cdp with MIT License 5 votes vote down vote up
def knn_faiss(feats, k):
    import torch
    import faiss
    import pdb
    N, dim = feats.shape
    res = faiss.StandardGpuResources()
    feats /= np.linalg.norm(feats).reshape(-1, 1)
    flat_config = faiss.GpuIndexFlatConfig()
    flat_config.device = int(torch.cuda.device_count()) - 1
    index = faiss.GpuIndexFlatL2(res, dim, flat_config)
    index.add(feats)
    D, I = index.search(feats, k + 1)
    pdb.set_trace() 
Example #21
Source File: faissext.py    From metric-learning-divide-and-conquer with GNU Lesser General Public License v3.0 5 votes vote down vote up
def reserve_faiss_gpu_memory(gpu_id=0):
    """
    Reserves around 2.4 Gb memory on Titan Xp.
    `r = reserve_faiss_gpu_memory()`
    To release the memory run `del r`

    Something like 200 Mb will still be hold afterwards.
    """
    res = faiss.StandardGpuResources()
    cfg = faiss.GpuIndexFlatConfig()
    cfg.useFloat16 = False
    cfg.device = gpu_id
    index = faiss.GpuIndexFlatL2(res, 2048, cfg)
    return index, res 
Example #22
Source File: utils.py    From DeMa-BWE with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def get_nn_avg_dist_mog(emb, query, knn):
    """
    Compute the average distance of the `knn` nearest neighbors
    for a given set of embeddings and queries.
    Use Faiss if available.

    emb has divided sqrt(2) * var
    """
    if FAISS_AVAILABLE:
        emb = emb.cpu().numpy()
        query = query.cpu().numpy()
        if hasattr(faiss, 'StandardGpuResources'):
            # gpu mode
            res = faiss.StandardGpuResources()
            config = faiss.GpuIndexFlatConfig()
            config.device = 0
            index = faiss.GpuIndexFlatL2(res, emb.shape[1], config)
        else:
            # cpu mode
            index = faiss.IndexFlatL2(emb.shape[1])
        index.add(emb)
        # Ad-hoc implementation
        topK = 1000
        temp = 2.
        topK = 10
        distances, idxes = index.search(query, topK)
        return distances.mean(1)
        #query_idx = np.tile(np.arange(query.shape[0]) + 1, (topK, 1)).transpose()
        #rank_diff = abs(np.log(idxes + 1) - np.log(query_idx)) / temp
        #mog_distances_sorted = np.sort(distances + rank_diff)[:, :knn]
        # return: qN, knn
        #return mog_distances_sorted.mean(1)
    else:
        bs = 1024
        all_distances = []
        emb = emb.transpose(0, 1).contiguous()
        for i in range(0, query.shape[0], bs):
            distances = query[i:i + bs].mm(emb)
            best_distances, _ = distances.topk(knn, dim=1, largest=True, sorted=True)
            all_distances.append(best_distances.mean(1).cpu())
        all_distances = torch.cat(all_distances)
        return all_distances.numpy() 
Example #23
Source File: faissext.py    From metric-learning-divide-and-conquer with GNU Lesser General Public License v3.0 4 votes vote down vote up
def find_nearest_neighbors(x, queries=None, k=5, gpu_id=None):
    """
    Find k nearest neighbors for each of the n examples.
    Distances are computed using Squared Euclidean distance metric.

    Arguments:
    ----------
    queries
    x (ndarray): N examples to search within. [N x d].
    gpu_id (int): use CPU if None else use GPU with the specified id.
    queries (ndarray): find nearest neigbor for each query example. [M x d] matrix
        If None than find k nearest neighbors for each row of x
        (excluding self exampels).
    k (int): number of nearest neighbors to find.

    Return
    I (ndarray): Indices of the nearest neighnpors. [M x k]
    distances (ndarray): Distances to the nearest neighbors. [M x k]

    """
    if gpu_id is not None and not isinstance(gpu_id, int):
        raise ValueError('gpu_id must be None or int')
    x = np.asarray(x.reshape(x.shape[0], -1), dtype=np.float32)
    remove_self = False # will we have queries in the search results?
    if queries is None:
        remove_self = True
        queries = x
        k += 1

    d = x.shape[1]

    tic = time.time()
    if gpu_id is None:
        logging.debug('FAISS: cpu::find {} nearest neighbors'\
                     .format(k - int(remove_self)))
        index = faiss.IndexFlatL2(d)
    else:
        logging.debug('FAISS: gpu[{}]::find {} nearest neighbors'\
                     .format(gpu_id, k - int(remove_self)))
        cfg = faiss.GpuIndexFlatConfig()
        cfg.useFloat16 = False
        cfg.device = gpu_id

        flat_config = [cfg]
        resources = [faiss.StandardGpuResources()]
        index = faiss.GpuIndexFlatL2(resources[0], d, flat_config[0])
    index.add(x)
    distances, nns = index.search(queries, k)
    if remove_self:
        for i in range(len(nns)):
            indices = np.nonzero(nns[i, :] != i)[0]
            indices.sort()
            if len(indices) > k - 1:
                indices = indices[:-1]
            nns[i, :-1] = nns[i, indices]
            distances[i, :-1] = distances[i, indices]
        nns = nns[:, :-1]
        distances = distances[:, :-1]
    logging.debug('FAISS: Neighbors search total elapsed time: {:.2f} sec'.format(time.time() - tic))
    return nns, distances 
Example #24
Source File: faissext.py    From metric-learning-divide-and-conquer with GNU Lesser General Public License v3.0 4 votes vote down vote up
def train_kmeans(x, num_clusters=1000, gpu_ids=None, niter=100, nredo=1, verbose=0):
    """
    Runs k-means clustering on one or several GPUs
    """
    assert np.all(~np.isnan(x)), 'x contains NaN'
    assert np.all(np.isfinite(x)), 'x contains Inf'
    if isinstance(gpu_ids, int):
        gpu_ids = [gpu_ids]
    assert gpu_ids is None or len(gpu_ids)

    d = x.shape[1]
    kmeans = faiss.Clustering(d, num_clusters)
    kmeans.verbose = bool(verbose)
    kmeans.niter = niter
    kmeans.nredo = nredo

    # otherwise the kmeans implementation sub-samples the training set
    kmeans.max_points_per_centroid = 10000000

    if gpu_ids is not None:
        res = [faiss.StandardGpuResources() for i in gpu_ids]

        flat_config = []
        for i in gpu_ids:
            cfg = faiss.GpuIndexFlatConfig()
            cfg.useFloat16 = False
            cfg.device = i
            flat_config.append(cfg)

        if len(gpu_ids) == 1:
            index = faiss.GpuIndexFlatL2(res[0], d, flat_config[0])
        else:
            indexes = [faiss.GpuIndexFlatL2(res[i], d, flat_config[i])
                       for i in range(len(gpu_ids))]
            index = faiss.IndexProxy()
            for sub_index in indexes:
                index.addIndex(sub_index)
    else:
        index = faiss.IndexFlatL2(d)

    # perform the training
    kmeans.train(x, index)
    centroids = faiss.vector_float_to_array(kmeans.centroids)

    objective = faiss.vector_float_to_array(kmeans.obj)
    #logging.debug("Final objective: %.4g" % objective[-1])

    return centroids.reshape(num_clusters, d) 
Example #25
Source File: faiss_gpu.py    From learn-to-cluster with MIT License 4 votes vote down vote up
def __init__(self,
                 target,
                 nprobe=128,
                 index_factory_str=None,
                 verbose=False,
                 mode='proxy',
                 using_gpu=True):
        self._res_list = []

        num_gpu = faiss.get_num_gpus()
        print('[faiss gpu] #GPU: {}'.format(num_gpu))

        size, dim = target.shape
        assert size > 0, "size: {}".format(size)
        index_factory_str = "IVF{},PQ{}".format(
            min(8192, 16 * round(np.sqrt(size))),
            32) if index_factory_str is None else index_factory_str
        cpu_index = faiss.index_factory(dim, index_factory_str)
        cpu_index.nprobe = nprobe

        if mode == 'proxy':
            co = faiss.GpuClonerOptions()
            co.useFloat16 = True
            co.usePrecomputed = False

            index = faiss.IndexProxy()
            for i in range(num_gpu):
                res = faiss.StandardGpuResources()
                self._res_list.append(res)
                sub_index = faiss.index_cpu_to_gpu(
                    res, i, cpu_index, co) if using_gpu else cpu_index
                index.addIndex(sub_index)
        elif mode == 'shard':
            co = faiss.GpuMultipleClonerOptions()
            co.useFloat16 = True
            co.usePrecomputed = False
            co.shard = True
            index = faiss.index_cpu_to_all_gpus(cpu_index,
                                                co,
                                                ngpu=num_gpu)
        else:
            raise KeyError("Unknown index mode")

        index = faiss.IndexIDMap(index)
        index.verbose = verbose

        # get nlist to decide how many samples used for training
        nlist = int([
            item for item in index_factory_str.split(",") if 'IVF' in item
        ][0].replace("IVF", ""))

        # training
        if not index.is_trained:
            indexes_sample_for_train = np.random.randint(
                0, size, nlist * 256)
            index.train(target[indexes_sample_for_train])

        # add with ids
        target_ids = np.arange(0, size)
        index.add_with_ids(target, target_ids)
        self.index = index 
Example #26
Source File: approximate_als.py    From implicit with MIT License 4 votes vote down vote up
def fit(self, Ciu, show_progress=True):
        import faiss

        # train the model
        super(FaissAlternatingLeastSquares, self).fit(Ciu, show_progress)

        self.quantizer = faiss.IndexFlat(self.factors)

        if self.use_gpu:
            self.gpu_resources = faiss.StandardGpuResources()

        item_factors = self.item_factors.astype('float32')

        if self.approximate_recommend:
            log.debug("Building faiss recommendation index")

            # build up a inner product index here
            if self.use_gpu:
                index = faiss.GpuIndexIVFFlat(self.gpu_resources, self.factors, self.nlist,
                                              faiss.METRIC_INNER_PRODUCT)
            else:
                index = faiss.IndexIVFFlat(self.quantizer, self.factors, self.nlist,
                                           faiss.METRIC_INNER_PRODUCT)

            index.train(item_factors)
            index.add(item_factors)
            index.nprobe = self.nprobe
            self.recommend_index = index

        if self.approximate_similar_items:
            log.debug("Building faiss similar items index")

            # likewise build up cosine index for similar_items, using an inner product
            # index on normalized vectors`
            norms = numpy.linalg.norm(item_factors, axis=1)
            norms[norms == 0] = 1e-10

            normalized = (item_factors.T / norms).T.astype('float32')
            if self.use_gpu:
                index = faiss.GpuIndexIVFFlat(self.gpu_resources, self.factors, self.nlist,
                                              faiss.METRIC_INNER_PRODUCT)
            else:
                index = faiss.IndexIVFFlat(self.quantizer, self.factors, self.nlist,
                                           faiss.METRIC_INNER_PRODUCT)

            index.train(normalized)
            index.add(normalized)
            index.nprobe = self.nprobe
            self.similar_items_index = index