Python scipy.special.boxcox() Examples

The following are 19 code examples of scipy.special.boxcox(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module scipy.special , or try the search function .
Example #1
Source File: data.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def _yeo_johnson_optimize(self, x):
        """Find and return optimal lambda parameter of the Yeo-Johnson
        transform by MLE, for observed data x.

        Like for Box-Cox, MLE is done via the brent optimizer.
        """

        def _neg_log_likelihood(lmbda):
            """Return the negative log likelihood of the observed data x as a
            function of lambda."""
            x_trans = self._yeo_johnson_transform(x, lmbda)
            n_samples = x.shape[0]

            loglike = -n_samples / 2 * np.log(x_trans.var())
            loglike += (lmbda - 1) * (np.sign(x) * np.log1p(np.abs(x))).sum()

            return -loglike

        # the computation of lambda is influenced by NaNs so we need to
        # get rid of them
        x = x[~np.isnan(x)]
        # choosing bracket -2, 2 like for boxcox
        return optimize.brent(_neg_log_likelihood, brack=(-2, 2)) 
Example #2
Source File: test_boxcox.py    From GraphicDesignPatternByPython with MIT License 6 votes vote down vote up
def test_boxcox_basic():
    x = np.array([0.5, 1, 2, 4])

    # lambda = 0  =>  y = log(x)
    y = boxcox(x, 0)
    assert_almost_equal(y, np.log(x))

    # lambda = 1  =>  y = x - 1
    y = boxcox(x, 1)
    assert_almost_equal(y, x - 1)

    # lambda = 2  =>  y = 0.5*(x**2 - 1)
    y = boxcox(x, 2)
    assert_almost_equal(y, 0.5*(x**2 - 1))

    # x = 0 and lambda > 0  =>  y = -1 / lambda
    lam = np.array([0.5, 1, 2])
    y = boxcox(0, lam)
    assert_almost_equal(y, -1.0 / lam) 
Example #3
Source File: test_mpmath.py    From GraphicDesignPatternByPython with MIT License 6 votes vote down vote up
def test_boxcox(self):

        def mp_boxcox(x, lmbda):
            x = mpmath.mp.mpf(x)
            lmbda = mpmath.mp.mpf(lmbda)
            if lmbda == 0:
                return mpmath.mp.log(x)
            else:
                return mpmath.mp.powm1(x, lmbda) / lmbda

        assert_mpmath_equal(sc.boxcox,
                            exception_to_nan(mp_boxcox),
                            [Arg(a=0, inclusive_a=False), Arg()],
                            n=200,
                            dps=60,
                            rtol=1e-13) 
Example #4
Source File: test_boxcox.py    From GraphicDesignPatternByPython with MIT License 5 votes vote down vote up
def test_boxcox_underflow():
    x = 1 + 1e-15
    lmbda = 1e-306
    y = boxcox(x, lmbda)
    assert_allclose(y, np.log(x), rtol=1e-14) 
Example #5
Source File: test_boxcox.py    From XenonPy with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_transform_4x1_2(data):
    from scipy.special import boxcox as bc_
    shift = 1e-5
    bc = BoxCox(shift=shift)
    _data = data[0] - 2.
    trans = bc.fit_transform(_data)
    tmp = bc_(_data + (shift - _data.min()), bc.lambda_[0])
    assert np.all(trans == tmp)
    inverse = bc.inverse_transform(trans)
    assert np.allclose(inverse, _data) 
Example #6
Source File: transform.py    From XenonPy with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def transform(self, x):
        """

        Parameters
        ----------
        x

        Returns
        -------
        DataFrame
            Box-Cox transformed data.
        """
        x = self._check_type(x)
        xs = []
        for i, col in enumerate(x.T):
            if np.all(col > 0):
                self._shift[i] = 0.
            else:
                self._shift[i] -= col[~np.isnan(col)].min()

            _lmd = self._lmd[i]
            _shift = self._shift[i]
            for case in Switch(_lmd):
                if case(np.inf):
                    x = col
                    break
                if case(np.nan):
                    x = np.full(col.shape, np.nan)
                    break
                if case():
                    x = boxcox(col + _shift, _lmd)
            xs.append(x.reshape(-1, 1))
        xs = np.concatenate(xs, axis=1)

        if len(self._shape) == 1:
            return xs.ravel()
        return xs.reshape(-1, self._shape[1]) 
Example #7
Source File: transform.py    From XenonPy with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def __init__(self, *, lmd=None, shift=1e-9, tolerance=(-2, 2), on_err=None):
        """
        Parameters
        ----------
        lmd: list or 1-dim ndarray
            You might assign each input xs with a specific lmd yourself.
            Leave None(default) to use a inferred value.
            See `boxcox`_ for detials.
        shift: float
            Guarantee Xs are positive.
            BoxCox transform need all data positive.
            Therefore, a shift xs with their min and a specific shift data series(xs)``x = x - x.min + shift``.

        tolerance: tuple
            Tolerance of lmd. Set None to accept any.
            Default is **(-2, 2)**
        on_err: None or str
            Error handle when try to inference lambda. Can be None or **log**, **nan** or **raise** by string.
            **log** will return the logarithmic transform of xs that have a min shift to 1.
            **nan** return ``ndarray`` with shape xs.shape filled with``np.nan``.
            **raise** raise a FloatingPointError. You can catch it yourself.
            Default(None) will return the input series without scale transform.


        .. _boxcox:
            https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.boxcox.html
        """
        self._tolerance = tolerance
        self._shift = [shift]
        self._lmd = lmd
        self._shape = None
        self._on_err = on_err 
Example #8
Source File: boxcox.py    From sktime with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def transform(self, y, **transform_params):
        self.check_is_fitted()
        check_y(y)
        yt = boxcox(y.values, self.lambda_)
        return pd.Series(yt, index=y.index) 
Example #9
Source File: _continuous_distns.py    From Splunking-Crime with GNU Affero General Public License v3.0 5 votes vote down vote up
def _ppf(self, q, lam):
        return sc.boxcox(q, lam) - sc.boxcox1p(-q, lam) 
Example #10
Source File: _continuous_distns.py    From Splunking-Crime with GNU Affero General Public License v3.0 5 votes vote down vote up
def _isf(self, q, c):
        return -sc.boxcox(q, -c) 
Example #11
Source File: test_boxcox.py    From GraphicDesignPatternByPython with MIT License 5 votes vote down vote up
def test_boxcox_nonfinite():
    # x < 0  =>  y = nan
    x = np.array([-1, -1, -0.5])
    y = boxcox(x, [0.5, 2.0, -1.5])
    assert_equal(y, np.array([np.nan, np.nan, np.nan]))

    # x = 0 and lambda <= 0  =>  y = -inf
    x = 0
    y = boxcox(x, [-2.5, 0])
    assert_equal(y, np.array([-np.inf, -np.inf])) 
Example #12
Source File: _continuous_distns.py    From lambda-packs with MIT License 5 votes vote down vote up
def _isf(self, q, c):
        return -sc.boxcox(q, -c) 
Example #13
Source File: _continuous_distns.py    From GraphicDesignPatternByPython with MIT License 5 votes vote down vote up
def _ppf(self, q, lam):
        return sc.boxcox(q, lam) - sc.boxcox1p(-q, lam) 
Example #14
Source File: _continuous_distns.py    From GraphicDesignPatternByPython with MIT License 5 votes vote down vote up
def _isf(self, q, c):
        return -sc.boxcox(q, -c) 
Example #15
Source File: data.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def _box_cox_optimize(self, x):
        """Find and return optimal lambda parameter of the Box-Cox transform by
        MLE, for observed data x.

        We here use scipy builtins which uses the brent optimizer.
        """
        # the computation of lambda is influenced by NaNs so we need to
        # get rid of them
        _, lmbda = stats.boxcox(x[~np.isnan(x)], lmbda=None)

        return lmbda 
Example #16
Source File: data.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def transform(self, X):
        """Apply the power transform to each feature using the fitted lambdas.

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            The data to be transformed using a power transformation.

        Returns
        -------
        X_trans : array-like, shape (n_samples, n_features)
            The transformed data.
        """
        check_is_fitted(self, 'lambdas_')
        X = self._check_input(X, check_positive=True, check_shape=True)

        transform_function = {'box-cox': boxcox,
                              'yeo-johnson': self._yeo_johnson_transform
                              }[self.method]
        for i, lmbda in enumerate(self.lambdas_):
            with np.errstate(invalid='ignore'):  # hide NaN warnings
                X[:, i] = transform_function(X[:, i], lmbda)

        if self.standardize:
            X = self._scaler.transform(X)

        return X 
Example #17
Source File: data.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def _fit(self, X, y=None, force_transform=False):
        X = self._check_input(X, check_positive=True, check_method=True)

        if not self.copy and not force_transform:  # if call from fit()
            X = X.copy()  # force copy so that fit does not change X inplace

        optim_function = {'box-cox': self._box_cox_optimize,
                          'yeo-johnson': self._yeo_johnson_optimize
                          }[self.method]
        with np.errstate(invalid='ignore'):  # hide NaN warnings
            self.lambdas_ = np.array([optim_function(col) for col in X.T])

        if self.standardize or force_transform:
            transform_function = {'box-cox': boxcox,
                                  'yeo-johnson': self._yeo_johnson_transform
                                  }[self.method]
            for i, lmbda in enumerate(self.lambdas_):
                with np.errstate(invalid='ignore'):  # hide NaN warnings
                    X[:, i] = transform_function(X[:, i], lmbda)

        if self.standardize:
            self._scaler = StandardScaler(copy=False)
            if force_transform:
                X = self._scaler.fit_transform(X)
            else:
                self._scaler.fit(X)

        return X 
Example #18
Source File: _continuous_distns.py    From lambda-packs with MIT License 5 votes vote down vote up
def _ppf(self, q, lam):
        return sc.boxcox(q, lam) - sc.boxcox1p(-q, lam) 
Example #19
Source File: boxcox.py    From sktime with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def boxcox_normmax(x, bounds=None, brack=(-2.0, 2.0), method='pearsonr'):
    # bounds is None, use simple Brent optimisation
    if bounds is None:
        def optimizer(func, args):
            return optimize.brent(func, brack=brack, args=args)

    # otherwise use bounded Brent optimisation
    else:
        # input checks on bounds
        if not isinstance(bounds, tuple) or len(bounds) != 2:
            raise ValueError(
                f"`bounds` must be a tuple of length 2, but found: {bounds}")

        def optimizer(func, args):
            return optimize.fminbound(func, bounds[0], bounds[1], args=args)

    def _pearsonr(x):
        osm_uniform = _calc_uniform_order_statistic_medians(len(x))
        xvals = distributions.norm.ppf(osm_uniform)

        def _eval_pearsonr(lmbda, xvals, samps):
            # This function computes the x-axis values of the probability plot
            # and computes a linear regression (including the correlation) and
            # returns ``1 - r`` so that a minimization function maximizes the
            # correlation.
            y = boxcox(samps, lmbda)
            yvals = np.sort(y)
            r, prob = stats.pearsonr(xvals, yvals)
            return 1 - r

        return optimizer(_eval_pearsonr, args=(xvals, x))

    def _mle(x):
        def _eval_mle(lmb, data):
            # function to minimize
            return -boxcox_llf(lmb, data)

        return optimizer(_eval_mle, args=(x,))

    def _all(x):
        maxlog = np.zeros(2, dtype=float)
        maxlog[0] = _pearsonr(x)
        maxlog[1] = _mle(x)
        return maxlog

    methods = {'pearsonr': _pearsonr,
               'mle': _mle,
               'all': _all}
    if method not in methods.keys():
        raise ValueError("Method %s not recognized." % method)

    optimfunc = methods[method]
    return optimfunc(x)