Python sklearn.datasets.make_circles() Examples

The following are 26 code examples of sklearn.datasets.make_circles(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.datasets , or try the search function .
Example #1
Source File: mini.py    From SymJAX with Apache License 2.0 7 votes vote down vote up
def load_mini(N=1000):
    X, y = make_moons(N, noise=0.035, random_state=20)
    x_, y_ = make_circles(N, noise=0.02, random_state=20)
    x_[:, 1] += 2.0
    y_ += 2
    X = np.concatenate([X, x_], axis=0)
    y = np.concatenate([y, y_])
    X -= X.mean(0, keepdims=True)
    X /= X.max(0, keepdims=True)

    X = X.astype("float32")
    y = y.astype("int32")

    dict_init = [
        ("datum_shape", (2,)),
        ("n_classes", 4),
        ("name", "mini"),
        ("classes", [str(u) for u in range(4)]),
    ]

    dataset = Dataset(**dict(dict_init))
    dataset["inputs/train_set"] = X
    dataset["outputs/train_set"] = y

    return dataset 
Example #2
Source File: test_forest.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_random_trees_dense_equal():
    # Test that the `sparse_output` parameter of RandomTreesEmbedding
    # works by returning the same array for both argument values.

    # Create the RTEs
    hasher_dense = RandomTreesEmbedding(n_estimators=10, sparse_output=False,
                                        random_state=0)
    hasher_sparse = RandomTreesEmbedding(n_estimators=10, sparse_output=True,
                                         random_state=0)
    X, y = datasets.make_circles(factor=0.5)
    X_transformed_dense = hasher_dense.fit_transform(X)
    X_transformed_sparse = hasher_sparse.fit_transform(X)

    # Assert that dense and sparse hashers have same array.
    assert_array_equal(X_transformed_sparse.toarray(), X_transformed_dense)


# Ignore warnings from switching to more power iterations in randomized_svd 
Example #3
Source File: test_forest.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_random_hasher():
    # test random forest hashing on circles dataset
    # make sure that it is linearly separable.
    # even after projected to two SVD dimensions
    # Note: Not all random_states produce perfect results.
    hasher = RandomTreesEmbedding(n_estimators=30, random_state=1)
    X, y = datasets.make_circles(factor=0.5)
    X_transformed = hasher.fit_transform(X)

    # test fit and transform:
    hasher = RandomTreesEmbedding(n_estimators=30, random_state=1)
    assert_array_equal(hasher.fit(X).transform(X).toarray(),
                       X_transformed.toarray())

    # one leaf active per data point per forest
    assert_equal(X_transformed.shape[0], X.shape[0])
    assert_array_equal(X_transformed.sum(axis=1), hasher.n_estimators)
    svd = TruncatedSVD(n_components=2)
    X_reduced = svd.fit_transform(X_transformed)
    linear_clf = LinearSVC()
    linear_clf.fit(X_reduced, y)
    assert_equal(linear_clf.score(X_reduced, y), 1.) 
Example #4
Source File: test_hierarchical.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_single_linkage_clustering():
    # Check that we get the correct result in two emblematic cases
    moons, moon_labels = make_moons(noise=0.05, random_state=42)
    clustering = AgglomerativeClustering(n_clusters=2, linkage='single')
    clustering.fit(moons)
    assert_almost_equal(normalized_mutual_info_score(clustering.labels_,
                                                     moon_labels), 1)

    circles, circle_labels = make_circles(factor=0.5, noise=0.025,
                                          random_state=42)
    clustering = AgglomerativeClustering(n_clusters=2, linkage='single')
    clustering.fit(circles)
    assert_almost_equal(normalized_mutual_info_score(clustering.labels_,
                                                     circle_labels), 1) 
Example #5
Source File: test_samples_generator.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_make_circles():
    factor = 0.3

    for (n_samples, n_outer, n_inner) in [(7, 3, 4), (8, 4, 4)]:
        # Testing odd and even case, because in the past make_circles always
        # created an even number of samples.
        X, y = make_circles(n_samples, shuffle=False, noise=None,
                            factor=factor)
        assert_equal(X.shape, (n_samples, 2), "X shape mismatch")
        assert_equal(y.shape, (n_samples,), "y shape mismatch")
        center = [0.0, 0.0]
        for x, label in zip(X, y):
            dist_sqr = ((x - center) ** 2).sum()
            dist_exp = 1.0 if label == 0 else factor**2
            assert_almost_equal(dist_sqr, dist_exp,
                                err_msg="Point is not on expected circle")

        assert_equal(X[y == 0].shape, (n_outer, 2),
                     "Samples not correctly distributed across circles.")
        assert_equal(X[y == 1].shape, (n_inner, 2),
                     "Samples not correctly distributed across circles.")

    assert_raises(ValueError, make_circles, factor=-0.01)
    assert_raises(ValueError, make_circles, factor=1.) 
Example #6
Source File: test_kernel_pca.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_gridsearch_pipeline_precomputed():
    # Test if we can do a grid-search to find parameters to separate
    # circles with a perceptron model using a precomputed kernel.
    X, y = make_circles(n_samples=400, factor=.3, noise=.05,
                        random_state=0)
    kpca = KernelPCA(kernel="precomputed", n_components=2)
    pipeline = Pipeline([("kernel_pca", kpca),
                         ("Perceptron", Perceptron(max_iter=5))])
    param_grid = dict(Perceptron__max_iter=np.arange(1, 5))
    grid_search = GridSearchCV(pipeline, cv=3, param_grid=param_grid)
    X_kernel = rbf_kernel(X, gamma=2.)
    grid_search.fit(X_kernel, y)
    assert_equal(grid_search.best_score_, 1)


# 0.23. warning about tol not having its correct default value. 
Example #7
Source File: test_kernel_pca.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_nested_circles():
    # Test the linear separability of the first 2D KPCA transform
    X, y = make_circles(n_samples=400, factor=.3, noise=.05,
                        random_state=0)

    # 2D nested circles are not linearly separable
    train_score = Perceptron(max_iter=5).fit(X, y).score(X, y)
    assert_less(train_score, 0.8)

    # Project the circles data into the first 2 components of a RBF Kernel
    # PCA model.
    # Note that the gamma value is data dependent. If this test breaks
    # and the gamma value has to be updated, the Kernel PCA example will
    # have to be updated too.
    kpca = KernelPCA(kernel="rbf", n_components=2,
                     fit_inverse_transform=True, gamma=2.)
    X_kpca = kpca.fit_transform(X)

    # The data is perfectly linearly separable in that space
    train_score = Perceptron(max_iter=5).fit(X_kpca, y).score(X_kpca, y)
    assert_equal(train_score, 1.0) 
Example #8
Source File: sequential_minimum_optimization.py    From Python with MIT License 6 votes vote down vote up
def test_rbf_kernel(ax, cost):
    train_x, train_y = make_circles(
        n_samples=500, noise=0.1, factor=0.1, random_state=1
    )
    train_y[train_y == 0] = -1
    scaler = StandardScaler()
    train_x_scaled = scaler.fit_transform(train_x, train_y)
    train_data = np.hstack((train_y.reshape(500, 1), train_x_scaled))
    mykernel = Kernel(kernel="rbf", degree=5, coef0=1, gamma=0.5)
    mysvm = SmoSVM(
        train=train_data,
        kernel_func=mykernel,
        cost=cost,
        tolerance=0.001,
        auto_norm=False,
    )
    mysvm.fit()
    plot_partition_boundary(mysvm, train_data, ax=ax) 
Example #9
Source File: test_forest.py    From twitter-stock-recommendation with MIT License 6 votes vote down vote up
def test_random_hasher():
    # test random forest hashing on circles dataset
    # make sure that it is linearly separable.
    # even after projected to two SVD dimensions
    # Note: Not all random_states produce perfect results.
    hasher = RandomTreesEmbedding(n_estimators=30, random_state=1)
    X, y = datasets.make_circles(factor=0.5)
    X_transformed = hasher.fit_transform(X)

    # test fit and transform:
    hasher = RandomTreesEmbedding(n_estimators=30, random_state=1)
    assert_array_equal(hasher.fit(X).transform(X).toarray(),
                       X_transformed.toarray())

    # one leaf active per data point per forest
    assert_equal(X_transformed.shape[0], X.shape[0])
    assert_array_equal(X_transformed.sum(axis=1), hasher.n_estimators)
    svd = TruncatedSVD(n_components=2)
    X_reduced = svd.fit_transform(X_transformed)
    linear_clf = LinearSVC()
    linear_clf.fit(X_reduced, y)
    assert_equal(linear_clf.score(X_reduced, y), 1.) 
Example #10
Source File: test_forest.py    From twitter-stock-recommendation with MIT License 6 votes vote down vote up
def test_random_trees_dense_equal():
    # Test that the `sparse_output` parameter of RandomTreesEmbedding
    # works by returning the same array for both argument values.

    # Create the RTEs
    hasher_dense = RandomTreesEmbedding(n_estimators=10, sparse_output=False,
                                        random_state=0)
    hasher_sparse = RandomTreesEmbedding(n_estimators=10, sparse_output=True,
                                         random_state=0)
    X, y = datasets.make_circles(factor=0.5)
    X_transformed_dense = hasher_dense.fit_transform(X)
    X_transformed_sparse = hasher_sparse.fit_transform(X)

    # Assert that dense and sparse hashers have same array.
    assert_array_equal(X_transformed_sparse.toarray(), X_transformed_dense)


# Ignore warnings from switching to more power iterations in randomized_svd 
Example #11
Source File: test_dbscan.py    From dislib with Apache License 2.0 5 votes vote down vote up
def test_n_clusters_circles_grid(self):
        """ Tests that DBSCAN finds the correct number of clusters when
        setting n_regions > 1 with circle data.
        """
        n_samples = 1500
        x, y = make_circles(n_samples=n_samples, factor=.5, noise=.05)
        dbscan = DBSCAN(n_regions=4, eps=.15, max_samples=700)
        x = StandardScaler().fit_transform(x)
        ds_x = ds.array(x, block_size=(300, 2))
        dbscan.fit(ds_x)
        self.assertEqual(dbscan.n_clusters, 2) 
Example #12
Source File: app.py    From dash-svm with MIT License 5 votes vote down vote up
def generate_data(n_samples, dataset, noise):
    if dataset == 'moons':
        return datasets.make_moons(
            n_samples=n_samples,
            noise=noise,
            random_state=0
        )

    elif dataset == 'circles':
        return datasets.make_circles(
            n_samples=n_samples,
            noise=noise,
            factor=0.5,
            random_state=1
        )

    elif dataset == 'linear':
        X, y = datasets.make_classification(
            n_samples=n_samples,
            n_features=2,
            n_redundant=0,
            n_informative=2,
            random_state=2,
            n_clusters_per_class=1
        )

        rng = np.random.RandomState(2)
        X += noise * rng.uniform(size=X.shape)
        linearly_separable = (X, y)

        return linearly_separable

    else:
        raise ValueError(
            'Data type incorrectly specified. Please choose an existing '
            'dataset.') 
Example #13
Source File: classification_example.py    From intro_ds with Apache License 2.0 5 votes vote down vote up
def generateData(n):
    """
    """
    np.random.seed(12046)
    blobs = make_blobs(n_samples=n, centers = [[-2, -2], [2, 2]])
    circles = make_circles(n_samples=n, factor=.4, noise=.05)
    moons = make_moons(n_samples=n, noise=.05)
    blocks = np.random.rand(n, 2) - 0.5
    y = (blocks[:, 0] * blocks[:, 1] < 0) + 0
    blocks = (blocks, y)
    # 由于神经网络对数据的线性变换不稳定,因此将数据做归一化处理
    scaler = StandardScaler()
    blobs = (scaler.fit_transform(blobs[0]), blobs[1])
    circles = (scaler.fit_transform(circles[0]), circles[1])
    moons = (scaler.fit_transform(moons[0]), moons[1])
    blocks = (scaler.fit_transform(blocks[0]), blocks[1])
    return blobs, circles, moons, blocks 
Example #14
Source File: gmm_vs_spectral.py    From intro_ds with Apache License 2.0 5 votes vote down vote up
def generateCircles(n):
    """
    生成圆圈数据
    """
    data, _ = make_circles(n_samples=n, factor=0.5, noise=0.06)
    return data 
Example #15
Source File: test_forest.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_random_trees_dense_type():
    # Test that the `sparse_output` parameter of RandomTreesEmbedding
    # works by returning a dense array.

    # Create the RTE with sparse=False
    hasher = RandomTreesEmbedding(n_estimators=10, sparse_output=False)
    X, y = datasets.make_circles(factor=0.5)
    X_transformed = hasher.fit_transform(X)

    # Assert that type is ndarray, not scipy.sparse.csr.csr_matrix
    assert_equal(type(X_transformed), np.ndarray) 
Example #16
Source File: test_kernel_pca.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_gridsearch_pipeline():
    # Test if we can do a grid-search to find parameters to separate
    # circles with a perceptron model.
    X, y = make_circles(n_samples=400, factor=.3, noise=.05,
                        random_state=0)
    kpca = KernelPCA(kernel="rbf", n_components=2)
    pipeline = Pipeline([("kernel_pca", kpca),
                         ("Perceptron", Perceptron(max_iter=5))])
    param_grid = dict(kernel_pca__gamma=2. ** np.arange(-2, 2))
    grid_search = GridSearchCV(pipeline, cv=3, param_grid=param_grid)
    grid_search.fit(X, y)
    assert_equal(grid_search.best_score_, 1) 
Example #17
Source File: test_kernel_pca.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_gridsearch_pipeline_precomputed():
    # Test if we can do a grid-search to find parameters to separate
    # circles with a perceptron model using a precomputed kernel.
    X, y = make_circles(n_samples=400, factor=.3, noise=.05,
                        random_state=0)
    kpca = KernelPCA(kernel="precomputed", n_components=2)
    pipeline = Pipeline([("kernel_pca", kpca),
                         ("Perceptron", Perceptron(max_iter=5))])
    param_grid = dict(Perceptron__max_iter=np.arange(1, 5))
    grid_search = GridSearchCV(pipeline, cv=3, param_grid=param_grid)
    X_kernel = rbf_kernel(X, gamma=2.)
    grid_search.fit(X_kernel, y)
    assert_equal(grid_search.best_score_, 1) 
Example #18
Source File: test_dbscan.py    From dislib with Apache License 2.0 5 votes vote down vote up
def test_n_clusters_circles_max_samples(self):
        """ Tests that DBSCAN finds the correct number of clusters when
        defining max_samples with circle data.
        """
        n_samples = 1500
        x, y = make_circles(n_samples=n_samples, factor=.5, noise=.05)
        dbscan = DBSCAN(n_regions=1, eps=.15, max_samples=500)
        x = StandardScaler().fit_transform(x)
        ds_x = ds.array(x, block_size=(300, 2))
        dbscan.fit(ds_x)
        self.assertEqual(dbscan.n_clusters, 2) 
Example #19
Source File: test_dbscan.py    From dislib with Apache License 2.0 5 votes vote down vote up
def test_n_clusters_circles(self):
        """ Tests that DBSCAN finds the correct number of clusters with
        circle data.
        """
        n_samples = 1500
        x, y = make_circles(n_samples=n_samples, factor=.5, noise=.05)
        dbscan = DBSCAN(n_regions=1, eps=.15)
        x = StandardScaler().fit_transform(x)
        ds_x = ds.array(x, block_size=(300, 2))
        dbscan.fit(ds_x)
        self.assertEqual(dbscan.n_clusters, 2) 
Example #20
Source File: distributions.py    From swae-pytorch with MIT License 5 votes vote down vote up
def rand_ring2d(batch_size):
    """ This function generates 2D samples from a hollowed-cirlce distribution in a 2-dimensional space.

        Args:
            batch_size (int): number of batch samples

        Return:
            torch.Tensor: tensor of size (batch_size, 2)
    """
    circles = make_circles(2 * batch_size, noise=.01)
    z = np.squeeze(circles[0][np.argwhere(circles[1] == 0), :])
    return torch.from_numpy(z).type(torch.FloatTensor) 
Example #21
Source File: bivariate.py    From SDGym with MIT License 5 votes vote down vote up
def make_two_rings(num_samples):
    samples, labels = make_circles(num_samples, shuffle=True, noise=None, random_state=None, factor=0.6)
    return samples 
Example #22
Source File: test_coverer.py    From kepler-mapper with MIT License 5 votes vote down vote up
def test_complete_pipeline(self, CoverClass):
        # TODO: add a mock that asserts the cover was called appropriately.. or test number of cubes etc.
        data, _ = datasets.make_circles()

        data = data.astype(np.float64)
        mapper = KeplerMapper()
        graph = mapper.map(data, cover=CoverClass())
        mapper.visualize(graph) 
Example #23
Source File: test_visuals.py    From kepler-mapper with MIT License 5 votes vote down vote up
def test_format_mapper_data(self, jinja_env):
        mapper = KeplerMapper()
        data, labels = make_circles(1000, random_state=0)
        lens = mapper.fit_transform(data, projection=[0])
        graph = mapper.map(lens, data)

        color_function = lens[:, 0]
        inverse_X = data
        projected_X = lens
        projected_X_names = ["projected_%s" % (i) for i in range(projected_X.shape[1])]
        inverse_X_names = ["inverse_%s" % (i) for i in range(inverse_X.shape[1])]
        custom_tooltips = np.array(["customized_%s" % (l) for l in labels])

        graph_data = format_mapper_data(
            graph,
            color_function,
            inverse_X,
            inverse_X_names,
            projected_X,
            projected_X_names,
            custom_tooltips,
            jinja_env,
        )
        # print(graph_data)
        # Dump to json so we can easily tell what's in it.
        graph_data = json.dumps(graph_data)

        # TODO test more properties!
        assert "name" in graph_data
        assert """cube2_cluster0""" in graph_data
        assert """projected_0""" in graph_data
        assert """inverse_0""" in graph_data

        assert """customized_""" in graph_data 
Example #24
Source File: test_forest.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_random_trees_dense_type():
    # Test that the `sparse_output` parameter of RandomTreesEmbedding
    # works by returning a dense array.

    # Create the RTE with sparse=False
    hasher = RandomTreesEmbedding(n_estimators=10, sparse_output=False)
    X, y = datasets.make_circles(factor=0.5)
    X_transformed = hasher.fit_transform(X)

    # Assert that type is ndarray, not scipy.sparse.csr.csr_matrix
    assert_equal(type(X_transformed), np.ndarray) 
Example #25
Source File: icnn.py    From icnn with Apache License 2.0 4 votes vote down vote up
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--save', type=str, default='work')
    parser.add_argument('--nEpoch', type=int, default=100)
    # parser.add_argument('--testBatchSz', type=int, default=2048)
    parser.add_argument('--seed', type=int, default=42)
    parser.add_argument('--model', type=str, default="picnn",
                        choices=['picnn', 'ficnn'])
    parser.add_argument('--dataset', type=str, default="moons",
                        choices=['moons', 'circles', 'linear'])
    parser.add_argument('--noncvx', action='store_true')

    args = parser.parse_args()

    npr.seed(args.seed)
    tf.set_random_seed(args.seed)

    setproctitle.setproctitle('bamos.icnn.synthetic.{}.{}'.format(args.model, args.dataset))

    save = os.path.join(os.path.expanduser(args.save),
                        "{}.{}".format(args.model, args.dataset))
    if os.path.isdir(save):
        shutil.rmtree(save)
    os.makedirs(save, exist_ok=True)

    if args.dataset == "moons":
        (dataX, dataY) = make_moons(noise=0.3, random_state=0)
    elif args.dataset == "circles":
        (dataX, dataY) = make_circles(noise=0.2, factor=0.5, random_state=0)
        dataY = 1.-dataY
    elif args.dataset == "linear":
        (dataX, dataY) = make_classification(n_features=2, n_redundant=0, n_informative=2,
                                             random_state=1, n_clusters_per_class=1)
        rng = np.random.RandomState(2)
        dataX += 2 * rng.uniform(size=dataX.shape)
    else:
        assert(False)

    dataY = dataY.reshape((-1, 1)).astype(np.float32)

    nData = dataX.shape[0]
    nFeatures = dataX.shape[1]
    nLabels = 1
    nXy = nFeatures + nLabels

    config = tf.ConfigProto() #log_device_placement=False)
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        model = Model(nFeatures, nLabels, sess, args.model, nGdIter=30)
        model.train(args, dataX, dataY) 
Example #26
Source File: test_clustering.py    From nussl with MIT License 4 votes vote down vote up
def cluster_data():
    np.random.seed(0)

    # ============
    # Generate datasets. We choose the size big enough to see the scalability
    # of the algorithms, but not too big to avoid too long running times
    # ============
    n_samples = 1500
    noisy_circles = datasets.make_circles(n_samples=n_samples, factor=.5,
                                          noise=.05)
    noisy_moons = datasets.make_moons(n_samples=n_samples, noise=.05)
    blobs = datasets.make_blobs(n_samples=n_samples, random_state=8)

    # Anisotropicly distributed data
    random_state = 170
    X, y = datasets.make_blobs(n_samples=n_samples, random_state=random_state)
    transformation = [[0.6, -0.6], [-0.4, 0.8]]
    X_aniso = np.dot(X, transformation)
    aniso = (X_aniso, y)

    # blobs with varied variances
    varied = datasets.make_blobs(n_samples=n_samples,
                                 cluster_std=[1.0, 2.5, 0.5],
                                 random_state=random_state)

    default_base = {'quantile': .3,
                    'eps': .3,
                    'damping': .9,
                    'preference': -200,
                    'n_neighbors': 10,
                    'n_clusters': 3,
                    'min_samples': 20,
                    'xi': 0.05,
                    'min_cluster_size': 0.1}

    data = [
        ('noisy_circles', noisy_circles, {'damping': .77, 'preference': -240,
                                          'quantile': .2, 'n_clusters': 2,
                                          'min_samples': 20, 'xi': 0.25}),
        ('noisy_moons', noisy_moons, {'damping': .75, 'preference': -220, 'n_clusters': 2}),
        ('varied', varied, {'eps': .18, 'n_neighbors': 2,
                            'min_samples': 5, 'xi': 0.035, 'min_cluster_size': .2}),
        ('aniso', aniso, {'eps': .15, 'n_neighbors': 2,
                          'min_samples': 20, 'xi': 0.1, 'min_cluster_size': .2}),
        ('blobs', blobs, {}),
    ]
    yield data, default_base