Python sklearn.utils() Examples

The following are 13 code examples of sklearn.utils(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn , or try the search function .
Example #1
Source File: sklearn_policy.py    From rasa_core with Apache License 2.0 6 votes vote down vote up
def persist(self, path: Text) -> None:

        if self.model:
            self.featurizer.persist(path)

            meta = {"priority": self.priority}

            meta_file = os.path.join(path, 'sklearn_policy.json')
            utils.dump_obj_as_json_to_file(meta_file, meta)

            filename = os.path.join(path, 'sklearn_model.pkl')
            with open(filename, 'wb') as f:
                pickle.dump(self._state, f)
        else:
            warnings.warn("Persist called without a trained model present. "
                          "Nothing to persist then!") 
Example #2
Source File: sklearn_policy.py    From rasa_core with Apache License 2.0 6 votes vote down vote up
def load(cls, path: Text) -> Policy:
        filename = os.path.join(path, 'sklearn_model.pkl')
        if not os.path.exists(path):
            raise OSError("Failed to load dialogue model. Path {} "
                          "doesn't exist".format(os.path.abspath(filename)))

        featurizer = TrackerFeaturizer.load(path)
        assert isinstance(featurizer, MaxHistoryTrackerFeaturizer), \
            ("Loaded featurizer of type {}, should be "
             "MaxHistoryTrackerFeaturizer.".format(type(featurizer).__name__))

        meta_file = os.path.join(path, "sklearn_policy.json")
        meta = json.loads(utils.read_file(meta_file))
        policy = cls(featurizer=featurizer, priority=meta["priority"])

        with open(filename, 'rb') as f:
            state = pickle.load(f)
        vars(policy).update(state)

        logger.info("Loaded sklearn model")
        return policy 
Example #3
Source File: utils.py    From skorch with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def check_is_fitted(estimator, attributes, msg=None, all_or_any=all):
    """Checks whether the net is initialized.

    Note: This calls ``sklearn.utils.validation.check_is_fitted``
    under the hood, using exactly the same arguments and logic. The
    only difference is that this function has an adapted error message
    and raises a ``skorch.exception.NotInitializedError`` instead of
    an ``sklearn.exceptions.NotFittedError``.

    """
    if msg is None:
        msg = ("This %(name)s instance is not initialized yet. Call "
               "'initialize' or 'fit' with appropriate arguments "
               "before using this method.")


    if not isinstance(attributes, (list, tuple)):
        attributes = [attributes]

    if not all_or_any([hasattr(estimator, attr) for attr in attributes]):
        raise NotInitializedError(msg % {'name': type(estimator).__name__}) 
Example #4
Source File: sklearn_policy.py    From rasa-for-botfront with Apache License 2.0 6 votes vote down vote up
def persist(self, path: Text) -> None:

        if self.model:
            self.featurizer.persist(path)

            meta = {"priority": self.priority}

            meta_file = os.path.join(path, "sklearn_policy.json")
            rasa.utils.io.dump_obj_as_json_to_file(meta_file, meta)

            filename = os.path.join(path, "sklearn_model.pkl")
            rasa.utils.io.pickle_dump(filename, self._state)
        else:
            raise_warning(
                "Persist called without a trained model present. "
                "Nothing to persist then!"
            ) 
Example #5
Source File: sklearn_policy.py    From rasa-for-botfront with Apache License 2.0 6 votes vote down vote up
def load(cls, path: Text) -> Policy:
        filename = os.path.join(path, "sklearn_model.pkl")
        if not os.path.exists(path):
            raise OSError(
                "Failed to load dialogue model. Path {} "
                "doesn't exist".format(os.path.abspath(filename))
            )

        featurizer = TrackerFeaturizer.load(path)
        assert isinstance(featurizer, MaxHistoryTrackerFeaturizer), (
            "Loaded featurizer of type {}, should be "
            "MaxHistoryTrackerFeaturizer.".format(type(featurizer).__name__)
        )

        meta_file = os.path.join(path, "sklearn_policy.json")
        meta = json.loads(rasa.utils.io.read_file(meta_file))

        policy = cls(featurizer=featurizer, priority=meta["priority"])

        state = rasa.utils.io.pickle_load(filename)

        vars(policy).update(state)

        logger.info("Loaded sklearn model")
        return policy 
Example #6
Source File: test_common.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_root_import_all_completeness():
    EXCEPTIONS = ('utils', 'tests', 'base', 'setup', 'conftest')
    for _, modname, _ in pkgutil.walk_packages(path=sklearn.__path__,
                                               onerror=lambda _: None):
        if '.' in modname or modname.startswith('_') or modname in EXCEPTIONS:
            continue
        assert_in(modname, sklearn.__all__) 
Example #7
Source File: utils.py    From skorch with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def is_dataset(x):
    return isinstance(x, torch.utils.data.Dataset)


# pylint: disable=not-callable 
Example #8
Source File: utils.py    From skorch with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def data_from_dataset(dataset, X_indexing=None, y_indexing=None):
    """Try to access X and y attribute from dataset.

    Also works when dataset is a subset.

    Parameters
    ----------
    dataset : skorch.dataset.Dataset or torch.utils.data.Subset
      The incoming dataset should be a ``skorch.dataset.Dataset`` or a
      ``torch.utils.data.Subset`` of a
      ``skorch.dataset.Dataset``.

    X_indexing : function/callable or None (default=None)
      If not None, use this function for indexing into the X data. If
      None, try to automatically determine how to index data.

    y_indexing : function/callable or None (default=None)
      If not None, use this function for indexing into the y data. If
      None, try to automatically determine how to index data.

    """
    X, y = _none, _none

    if isinstance(dataset, Subset):
        X, y = data_from_dataset(
            dataset.dataset, X_indexing=X_indexing, y_indexing=y_indexing)
        X = multi_indexing(X, dataset.indices, indexing=X_indexing)
        y = multi_indexing(y, dataset.indices, indexing=y_indexing)
    elif hasattr(dataset, 'X') and hasattr(dataset, 'y'):
        X, y = dataset.X, dataset.y

    if (X is _none) or (y is _none):
        raise AttributeError("Could not access X and y from dataset.")
    return X, y 
Example #9
Source File: sklearn.py    From optuna with MIT License 5 votes vote down vote up
def _check_fit_params(
    X,  # type: TwoDimArrayLikeType
    fit_params,  # type: Dict
    indices,  # type: OneDimArrayLikeType
):
    # type: (...) -> Dict

    fit_params_validated = {}
    for key, value in fit_params.items():

        # NOTE Original implementation:
        # https://github.com/scikit-learn/scikit-learn/blob/ \
        # 2467e1b84aeb493a22533fa15ff92e0d7c05ed1c/sklearn/utils/validation.py#L1324-L1328
        # Scikit-learn does not accept non-iterable inputs.
        # This line is for keeping backward compatibility.
        # (See: https://github.com/scikit-learn/scikit-learn/issues/15805)
        if not _is_arraylike(value) or _num_samples(value) != _num_samples(X):
            fit_params_validated[key] = value
        else:
            fit_params_validated[key] = _make_indexable(value)
            fit_params_validated[key] = _safe_indexing(fit_params_validated[key], indices)
    return fit_params_validated


# NOTE Original implementation:
# https://github.com/scikit-learn/scikit-learn/blob/ \
# 8caa93889f85254fc3ca84caa0a24a1640eebdd1/sklearn/utils/validation.py#L131-L135 
Example #10
Source File: sklearn.py    From optuna with MIT License 5 votes vote down vote up
def _is_arraylike(x):
    # type: (Any) -> bool

    return hasattr(x, "__len__") or hasattr(x, "shape") or hasattr(x, "__array__")


# NOTE Original implementation:
# https://github.com/scikit-learn/scikit-learn/blob/ \
# 8caa93889f85254fc3ca84caa0a24a1640eebdd1/sklearn/utils/validation.py#L217-L234 
Example #11
Source File: sklearn.py    From optuna with MIT License 5 votes vote down vote up
def _num_samples(x):
    # type: (ArrayLikeType) -> int

    # NOTE For dask dataframes
    # https://github.com/scikit-learn/scikit-learn/blob/ \
    # 8caa93889f85254fc3ca84caa0a24a1640eebdd1/sklearn/utils/validation.py#L155-L158
    x_shape = getattr(x, "shape", None)
    if x_shape is not None:
        if isinstance(x_shape[0], Integral):
            return int(x_shape[0])

    try:
        return len(x)
    except TypeError:
        raise TypeError("Expected sequence or array-like, got %s." % type(x)) 
Example #12
Source File: utility.py    From pyod with BSD 2-Clause "Simplified" License 4 votes vote down vote up
def get_label_n(y, y_pred, n=None):
    """Function to turn raw outlier scores into binary labels by assign 1
    to top n outlier scores.

    Parameters
    ----------
    y : list or numpy array of shape (n_samples,)
        The ground truth. Binary (0: inliers, 1: outliers).

    y_pred : list or numpy array of shape (n_samples,)
        The raw outlier scores as returned by a fitted model.

    n : int, optional (default=None)
        The number of outliers. if not defined, infer using ground truth.

    Returns
    -------
    labels : numpy array of shape (n_samples,)
        binary labels 0: normal points and 1: outliers

    Examples
    --------
    >>> from pyod.utils.utility import get_label_n
    >>> y = [0, 1, 1, 0, 0]
    >>> y_pred = [0.1, 0.5, 0.3, 0.2, 0.7]
    >>> get_label_n(y, y_pred)
    array([0, 1, 0, 0, 1])

    """

    # enforce formats of inputs
    y = column_or_1d(y)
    y_pred = column_or_1d(y_pred)

    check_consistent_length(y, y_pred)
    y_len = len(y)  # the length of targets

    # calculate the percentage of outliers
    if n is not None:
        outliers_fraction = n / y_len
    else:
        outliers_fraction = np.count_nonzero(y) / y_len

    threshold = percentile(y_pred, 100 * (1 - outliers_fraction))
    y_pred = (y_pred > threshold).astype('int')

    return y_pred 
Example #13
Source File: solver.py    From intrinsic with MIT License 4 votes vote down vote up
def initialize_intensities(self):
        """ Initialization: k-means of the input image """

        if self.params.logging:
            t0 = timeit.default_timer()
            print("initialization: k-means clustering with %s centers..." %
                  self.params.kmeans_n_clusters)

        image_irg = self.input.image_irg
        mask_nz = self.input.mask_nz

        if self.params.fixed_seed:
            # fix the seed when computing things like gradients across
            # hyperparameters
            random_state = np.random.RandomState(seed=59173)
        else:
            random_state = None

        samples = image_irg[mask_nz[0], mask_nz[1], :]
        if samples.shape[0] > self.params.kmeans_max_samples:
            print("image is large: subsampling %s/%s random pixels" %
                  (self.params.kmeans_max_samples, samples.shape[0]))
            samples = sklearn.utils \
                .shuffle(samples)[:self.params.kmeans_max_samples, :]
        samples[:, 0] *= self.params.kmeans_intensity_scale

        kmeans = MiniBatchKMeans(
            n_clusters=self.params.kmeans_n_clusters,
            compute_labels=False, random_state=random_state)
        kmeans.fit(samples)

        assert self.params.kmeans_intensity_scale > 0
        self.decomposition.intensities = (
            kmeans.cluster_centers_[:, 0] /
            self.params.kmeans_intensity_scale
        )
        self.decomposition.chromaticities = (
            kmeans.cluster_centers_[:, 1:3]
        )

        if self.params.logging:
            t1 = timeit.default_timer()
            print("clustering done (%s s).  intensities:\n%s" %
                  (t1 - t0, self.decomposition.intensities))