Python faiss.Clustering() Examples

The following are 6 code examples of faiss.Clustering(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module faiss , or try the search function .
Example #1
Source File: faiss_kmeans.py    From cdp with MIT License 10 votes vote down vote up
def run_kmeans(x, nmb_clusters, verbose=False):
    """Runs kmeans on 1 GPU.
    Args:
        x: data
        nmb_clusters (int): number of clusters
    Returns:
        list: ids of data in each cluster
    """
    n_data, d = x.shape

    # faiss implementation of k-means
    clus = faiss.Clustering(d, nmb_clusters)

    # Change faiss seed at each k-means so that the randomly picked
    # initialization centroids do not correspond to the same feature ids
    # from an epoch to another.
    clus.seed = np.random.randint(1234)

    clus.niter = 20
    clus.max_points_per_centroid = 10000000
    res = faiss.StandardGpuResources()
    flat_config = faiss.GpuIndexFlatConfig()
    flat_config.useFloat16 = False
    flat_config.device = 0
    index = faiss.GpuIndexFlatL2(res, d, flat_config)

    # perform the training
    clus.train(x, index)
    _, I = index.search(x, 1)
    losses = faiss.vector_to_array(clus.obj)
    if verbose:
        print('k-means loss evolution: {0}'.format(losses))

    return [int(n[0]) for n in I], losses[-1] 
Example #2
Source File: faissext.py    From metric-learning-divide-and-conquer with GNU Lesser General Public License v3.0 6 votes vote down vote up
def do_clustering(features, num_clusters, gpu_ids=None,
                  num_pca_components=None, niter=100, nredo=1, verbose=0):
    logging.debug('FAISS: using GPUs {}'.format(gpu_ids))
    features = np.asarray(features.reshape(features.shape[0], -1), dtype=np.float32)

    if num_pca_components is not None:
        features = preprocess_features(features, d=num_pca_components,
                                       niter=niter, nredo=nredo, verbose=verbose)

    logging.debug('FAISS: clustering...')
    t0 = time.time()
    centroids = train_kmeans(features, num_clusters, gpu_ids=gpu_ids, verbose=1)
    labels = compute_cluster_assignment(centroids, features)
    t1 = time.time()
    logging.debug("FAISS: Clustering total elapsed time: %.3f m" % ((t1 - t0) / 60.0))
    return labels 
Example #3
Source File: nmi.py    From classification_metric_learning with Apache License 2.0 6 votes vote down vote up
def test_nmi_faiss(embeddings, labels):
    res = faiss.StandardGpuResources()
    flat_config = faiss.GpuIndexFlatConfig()
    flat_config.device = 0

    unique_labels = np.unique(labels)
    d = embeddings.shape[1]
    kmeans = faiss.Clustering(d, unique_labels.size)
    kmeans.verbose = True
    kmeans.niter = 300
    kmeans.nredo = 10
    kmeans.seed = 0

    index = faiss.GpuIndexFlatL2(res, d, flat_config)

    kmeans.train(embeddings, index)

    dists, pred_labels = index.search(embeddings, 1)

    pred_labels = pred_labels.squeeze()

    nmi = normalized_mutual_info_score(labels, pred_labels)

    print("NMI: {}".format(nmi))
    return nmi 
Example #4
Source File: run_index.py    From denspi with Apache License 2.0 6 votes vote down vote up
def train_coarse_quantizer(data, quantizer_path, num_clusters, hnsw=False, niter=10, cuda=False):
    d = data.shape[1]

    index_flat = faiss.IndexFlatL2(d)
    # make it into a gpu index
    if cuda:
        res = faiss.StandardGpuResources()
        index_flat = faiss.index_cpu_to_gpu(res, 0, index_flat)
    clus = faiss.Clustering(d, num_clusters)
    clus.verbose = True
    clus.niter = niter
    clus.train(data, index_flat)
    centroids = faiss.vector_float_to_array(clus.centroids)
    centroids = centroids.reshape(num_clusters, d)

    if hnsw:
        quantizer = faiss.IndexHNSWFlat(d, 32)
        quantizer.hnsw.efSearch = 128
        quantizer.train(centroids)
        quantizer.add(centroids)
    else:
        quantizer = faiss.IndexFlatL2(d)
        quantizer.add(centroids)

    faiss.write_index(quantizer, quantizer_path) 
Example #5
Source File: stat_utils.py    From pytorch-metric-learning with MIT License 6 votes vote down vote up
def run_kmeans(x, nmb_clusters):
    """
    Args:
        x: data
        nmb_clusters (int): number of clusters
    Returns:
        list: ids of data in each cluster
    """
    n_data, d = x.shape
    logging.info("running k-means clustering with k=%d"%nmb_clusters)
    logging.info("embedding dimensionality is %d"%d)

    # faiss implementation of k-means
    clus = faiss.Clustering(d, nmb_clusters)
    clus.niter = 20
    clus.max_points_per_centroid = 10000000
    index = faiss.IndexFlatL2(d)
    if faiss.get_num_gpus() > 0:
        index = faiss.index_cpu_to_all_gpus(index)
    # perform the training
    clus.train(x, index)
    _, idxs = index.search(x, 1)

    return [int(n[0]) for n in idxs]


# modified from https://github.com/facebookresearch/faiss/wiki/Faiss-building-blocks:-clustering,-PCA,-quantization 
Example #6
Source File: faissext.py    From metric-learning-divide-and-conquer with GNU Lesser General Public License v3.0 4 votes vote down vote up
def train_kmeans(x, num_clusters=1000, gpu_ids=None, niter=100, nredo=1, verbose=0):
    """
    Runs k-means clustering on one or several GPUs
    """
    assert np.all(~np.isnan(x)), 'x contains NaN'
    assert np.all(np.isfinite(x)), 'x contains Inf'
    if isinstance(gpu_ids, int):
        gpu_ids = [gpu_ids]
    assert gpu_ids is None or len(gpu_ids)

    d = x.shape[1]
    kmeans = faiss.Clustering(d, num_clusters)
    kmeans.verbose = bool(verbose)
    kmeans.niter = niter
    kmeans.nredo = nredo

    # otherwise the kmeans implementation sub-samples the training set
    kmeans.max_points_per_centroid = 10000000

    if gpu_ids is not None:
        res = [faiss.StandardGpuResources() for i in gpu_ids]

        flat_config = []
        for i in gpu_ids:
            cfg = faiss.GpuIndexFlatConfig()
            cfg.useFloat16 = False
            cfg.device = i
            flat_config.append(cfg)

        if len(gpu_ids) == 1:
            index = faiss.GpuIndexFlatL2(res[0], d, flat_config[0])
        else:
            indexes = [faiss.GpuIndexFlatL2(res[i], d, flat_config[i])
                       for i in range(len(gpu_ids))]
            index = faiss.IndexProxy()
            for sub_index in indexes:
                index.addIndex(sub_index)
    else:
        index = faiss.IndexFlatL2(d)

    # perform the training
    kmeans.train(x, index)
    centroids = faiss.vector_float_to_array(kmeans.centroids)

    objective = faiss.vector_float_to_array(kmeans.obj)
    #logging.debug("Final objective: %.4g" % objective[-1])

    return centroids.reshape(num_clusters, d)