Python sklearn.utils.safe_indexing() Examples

The following are 13 code examples of sklearn.utils.safe_indexing(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.utils , or try the search function .
Example #1
Source File: test_utils.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_safe_indexing_pandas():
    try:
        import pandas as pd
    except ImportError:
        raise SkipTest("Pandas not found")
    X = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
    X_df = pd.DataFrame(X)
    inds = np.array([1, 2])
    X_df_indexed = safe_indexing(X_df, inds)
    X_indexed = safe_indexing(X_df, inds)
    assert_array_equal(np.array(X_df_indexed), X_indexed)
    # fun with read-only data in dataframes
    # this happens in joblib memmapping
    X.setflags(write=False)
    X_df_readonly = pd.DataFrame(X)
    inds_readonly = inds.copy()
    inds_readonly.setflags(write=False)

    for this_df, this_inds in product([X_df, X_df_readonly],
                                      [inds, inds_readonly]):
        with warnings.catch_warnings(record=True):
            X_df_indexed = safe_indexing(this_df, this_inds)

        assert_array_equal(np.array(X_df_indexed), X_indexed) 
Example #2
Source File: test_utils.py    From twitter-stock-recommendation with MIT License 6 votes vote down vote up
def test_safe_indexing_pandas():
    try:
        import pandas as pd
    except ImportError:
        raise SkipTest("Pandas not found")
    X = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
    X_df = pd.DataFrame(X)
    inds = np.array([1, 2])
    X_df_indexed = safe_indexing(X_df, inds)
    X_indexed = safe_indexing(X_df, inds)
    assert_array_equal(np.array(X_df_indexed), X_indexed)
    # fun with read-only data in dataframes
    # this happens in joblib memmapping
    X.setflags(write=False)
    X_df_readonly = pd.DataFrame(X)
    inds_readonly = inds.copy()
    inds_readonly.setflags(write=False)

    for this_df, this_inds in product([X_df, X_df_readonly],
                                      [inds, inds_readonly]):
        with warnings.catch_warnings(record=True):
            X_df_indexed = safe_indexing(this_df, this_inds)

        assert_array_equal(np.array(X_df_indexed), X_indexed) 
Example #3
Source File: test_utils.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_safe_indexing():
    X = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
    inds = np.array([1, 2])
    X_inds = safe_indexing(X, inds)
    X_arrays = safe_indexing(np.array(X), inds)
    assert_array_equal(np.array(X_inds), X_arrays)
    assert_array_equal(np.array(X_inds), np.array(X)[inds]) 
Example #4
Source File: test_utils.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_safe_indexing_mock_pandas():
    X = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
    X_df = MockDataFrame(X)
    inds = np.array([1, 2])
    X_df_indexed = safe_indexing(X_df, inds)
    X_indexed = safe_indexing(X_df, inds)
    assert_array_equal(np.array(X_df_indexed), X_indexed) 
Example #5
Source File: utils.py    From skorch with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _indexing_other(data, i):
    # sklearn's safe_indexing doesn't work with tuples since 0.22
    if isinstance(i, (int, np.integer, slice, tuple)):
        return data[i]
    return safe_indexing(data, i) 
Example #6
Source File: _validation.py    From mriqc with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _shuffle(y, groups, random_state):
    """Return a shuffled copy of y eventually shuffle among same groups."""
    if groups is None:
        indices = random_state.permutation(len(y))
    else:
        indices = np.arange(len(groups))
        for group in np.unique(groups):
            this_mask = groups == group
            indices[this_mask] = random_state.permutation(indices[this_mask])
    return safe_indexing(y, indices) 
Example #7
Source File: base.py    From skoot with MIT License 5 votes vote down vote up
def _reorder(X, y, random_state, shuffle):
    # reorder if needed
    order = np.arange(X.shape[0])
    if shuffle:
        order = random_state.permutation(order)
    return safe_indexing(X, order), y[order] 
Example #8
Source File: sklearn.py    From optuna with MIT License 5 votes vote down vote up
def _safe_indexing(
    X,  # type: Union[OneDimArrayLikeType, TwoDimArrayLikeType]
    indices,  # type: OneDimArrayLikeType
):
    # type: (...) -> Union[OneDimArrayLikeType, TwoDimArrayLikeType]
    if X is None:
        return X

    return sklearn_safe_indexing(X, indices) 
Example #9
Source File: features.py    From SecuML with GNU General Public License v2.0 5 votes vote down vote up
def get_from_ids(self, instance_ids):
        if self.streaming:
            raise StreamingUnsupported('get_from_ids is not supported for '
                                       'streaming features.')
        indices = [self.instance_ids.get_index(id_)
                   for id_ in instance_ids.ids]
        values = safe_indexing(self.values, indices)
        return Features(values, self.info, instance_ids) 
Example #10
Source File: features.py    From SecuML with GNU General Public License v2.0 5 votes vote down vote up
def get_from_indices(self, instance_ids, indices):
        if self.streaming:
            raise StreamingUnsupported('get_from_ids is not supported for '
                                       'streaming features.')
        if len(indices) > 0:
            values = safe_indexing(self.values, indices)
        else:
            values = np.empty((0, self.values.shape[1]))
        return Features(values, self.info, instance_ids) 
Example #11
Source File: test_utils.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_safe_indexing():
    X = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
    inds = np.array([1, 2])
    X_inds = safe_indexing(X, inds)
    X_arrays = safe_indexing(np.array(X), inds)
    assert_array_equal(np.array(X_inds), X_arrays)
    assert_array_equal(np.array(X_inds), np.array(X)[inds]) 
Example #12
Source File: test_utils.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_safe_indexing_mock_pandas():
    X = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
    X_df = MockDataFrame(X)
    inds = np.array([1, 2])
    X_df_indexed = safe_indexing(X_df, inds)
    X_indexed = safe_indexing(X_df, inds)
    assert_array_equal(np.array(X_df_indexed), X_indexed) 
Example #13
Source File: KMeansClustering.py    From driverlessai-recipes with Apache License 2.0 4 votes vote down vote up
def my_davies_bouldin_score(X, labels):
    """Computes the Davies-Bouldin score.
    The score is defined as the ratio of within-cluster distances to
    between-cluster distances.
    Read more in the :ref:`User Guide <davies-bouldin_index>`.
    Parameters
    ----------
    X : array-like, shape (``n_samples``, ``n_features``)
        List of ``n_features``-dimensional data points. Each row corresponds
        to a single data point.
    labels : array-like, shape (``n_samples``,)
        Predicted labels for each sample.
    Returns
    -------
    score: float
        The resulting Davies-Bouldin score.
    References
    ----------
    .. [1] Davies, David L.; Bouldin, Donald W. (1979).
       `"A Cluster Separation Measure"
       <https://ieeexplore.ieee.org/document/4766909>`__.
       IEEE Transactions on Pattern Analysis and Machine Intelligence.
       PAMI-1 (2): 224-227
    """
    X, labels = check_X_y(X, labels)
    le = LabelEncoder()
    labels = le.fit_transform(labels)
    n_samples, _ = X.shape
    n_labels = len(le.classes_)
    check_number_of_labels(n_labels, n_samples)

    intra_dists = np.zeros(n_labels)
    centroids = np.zeros((n_labels, len(X[0])), dtype=np.float)
    for k in range(n_labels):
        cluster_k = safe_indexing(X, labels == k)
        centroid = cluster_k.mean(axis=0)
        centroids[k] = centroid
        intra_dists[k] = np.average(pairwise_distances(
            cluster_k, [centroid]))

    # centroid_distances will contain zeros in the diagonal
    centroid_distances = pairwise_distances(centroids)

    if np.allclose(intra_dists, 0) or np.allclose(centroid_distances, 0):
        return 0.0

    # Compute score avoiding division by zero by adding an epsilon
    # this leads to high values in the diagonal's result
    score = (intra_dists[:, None] + intra_dists) / (centroid_distances + 1e-15)

    # Simply put the diagonal to zero
    score[np.eye(centroid_distances.shape[0]) == 1] = 0

    # Here is the original code
    # score = (intra_dists[:, None] + intra_dists) / (centroid_distances)
    # score[score == np.inf] = np.nan
    return np.mean(np.nanmax(score, axis=1))