Python scipy.stats.chi2() Examples

The following are code examples for showing how to use scipy.stats.chi2(). They are from open source Python projects. You can vote up the examples you like or vote down the ones you don't like.

Example 1
Project: pyfilter   Author: tingiskhan   File: utils.py    MIT License 6 votes vote down vote up
def normal_test(x, alpha=0.05):
    """
    Implements a basic Jarque-Bera test for normality.
    :param x: The data
    :type x: torch.Tensor
    :param alpha: The level of confidence
    :type alpha: float
    :return: Whether a normal distribution or not
    :rtype: bool
    """
    mean = x.mean(0)
    var = ((x - mean) ** 2).mean(0)

    # ===== Skew ===== #
    skew = ((x - mean) ** 3).mean(0) / var ** 1.5

    # ===== Kurtosis ===== #
    kurt = ((x - mean) ** 4).mean(0) / var ** 2

    # ===== Statistic ===== #
    jb = x.shape[0] / 6 * (skew ** 2 + 1 / 4 * (kurt - 3) ** 2)

    return chi2(2).ppf(1 - alpha) >= jb 
Example 2
Project: vnpy_crypto   Author: birforce   File: test_transf.py    MIT License 6 votes vote down vote up
def setup_class(cls):
        cls.dist_equivalents = [
            #transf, stats.lognorm(1))
            #The below fails on the SPARC box with scipy 10.1
            #(lognormalg, stats.lognorm(1)),
            #transf2
           (squarenormalg, stats.chi2(1)),
           (absnormalg, stats.halfnorm),
           (absnormalg, stats.foldnorm(1e-5)),  #try frozen
           #(negsquarenormalg, 1-stats.chi2),  # won't work as distribution
           (squaretg(10), stats.f(1, 10))
            ]      #try both frozen

        l,s = 0.0, 1.0
        cls.ppfq = [0.1,0.5,0.9]
        cls.xx = [0.95,1.0,1.1]
        cls.nxx = [-0.95,-1.0,-1.1] 
Example 3
Project: vnpy_crypto   Author: birforce   File: test_transf.py    MIT License 6 votes vote down vote up
def test_equivalent_negsq(self):
        #special case negsquarenormalg
        #negsquarenormalg.cdf(x) == stats.chi2(1).cdf(-x), for x<=0

        xx, nxx, ppfq = self.xx, self.nxx, self.ppfq
        d1,d2 = (negsquarenormalg, stats.chi2(1))
        #print d1.name
        assert_almost_equal(d1.cdf(nxx), 1-d2.cdf(xx), err_msg='cdf'+d1.name)
        assert_almost_equal(d1.pdf(nxx), d2.pdf(xx))
        assert_almost_equal(d1.sf(nxx), 1-d2.sf(xx))
        assert_almost_equal(d1.ppf(ppfq), -d2.ppf(ppfq)[::-1])
        assert_almost_equal(d1.isf(ppfq), -d2.isf(ppfq)[::-1])
        assert_almost_equal(d1.moment(3), -d2.moment(3))
        ch2oddneg = [v*(-1)**(i+1) for i,v in
                     enumerate(d2.stats(moments='mvsk'))]
        assert_almost_equal(d1.stats(moments='mvsk'), ch2oddneg,
                            err_msg='stats '+d1.name+d2.name) 
Example 4
Project: linear_neuron   Author: uglyboxer   File: robust_covariance.py    MIT License 6 votes vote down vote up
def correct_covariance(self, data):
        """Apply a correction to raw Minimum Covariance Determinant estimates.

        Correction using the empirical correction factor suggested
        by Rousseeuw and Van Driessen in [Rouseeuw1984]_.

        Parameters
        ----------
        data : array-like, shape (n_samples, n_features)
            The data matrix, with p features and n samples.
            The data set must be the one which was used to compute
            the raw estimates.

        Returns
        -------
        covariance_corrected : array-like, shape (n_features, n_features)
            Corrected robust covariance estimate.

        """
        correction = np.median(self.dist_) / chi2(data.shape[1]).isf(0.5)
        covariance_corrected = self.raw_covariance_ * correction
        self.dist_ /= correction
        return covariance_corrected 
Example 5
Project: Weiss   Author: WangWenjun559   File: robust_covariance.py    Apache License 2.0 6 votes vote down vote up
def correct_covariance(self, data):
        """Apply a correction to raw Minimum Covariance Determinant estimates.

        Correction using the empirical correction factor suggested
        by Rousseeuw and Van Driessen in [Rouseeuw1984]_.

        Parameters
        ----------
        data : array-like, shape (n_samples, n_features)
            The data matrix, with p features and n samples.
            The data set must be the one which was used to compute
            the raw estimates.

        Returns
        -------
        covariance_corrected : array-like, shape (n_features, n_features)
            Corrected robust covariance estimate.

        """
        correction = np.median(self.dist_) / chi2(data.shape[1]).isf(0.5)
        covariance_corrected = self.raw_covariance_ * correction
        self.dist_ /= correction
        return covariance_corrected 
Example 6
Project: LaserTOF   Author: kyleuckert   File: test_multivariate.py    MIT License 5 votes vote down vote up
def test_1D_is_chisquared(self):
        # The 1-dimensional Wishart with an identity scale matrix is just a
        # chi-squared distribution.
        # Test variance, mean, entropy, pdf
        # Kolgomorov-Smirnov test for rvs
        np.random.seed(482974)

        sn = 500
        dim = 1
        scale = np.eye(dim)

        df_range = np.arange(1, 10, 2, dtype=float)
        X = np.linspace(0.1,10,num=10)
        for df in df_range:
            w = wishart(df, scale)
            c = chi2(df)

            # Statistics
            assert_allclose(w.var(), c.var())
            assert_allclose(w.mean(), c.mean())
            assert_allclose(w.entropy(), c.entropy())

            # PDF
            assert_allclose(w.pdf(X), c.pdf(X))

            # rvs
            rvs = w.rvs(size=sn)
            args = (df,)
            alpha = 0.01
            check_distribution_rvs('chi2', args, alpha, rvs) 
Example 7
Project: LaserTOF   Author: kyleuckert   File: test_multivariate.py    MIT License 5 votes vote down vote up
def test_is_scaled_chisquared(self):
        # The 2-dimensional Wishart with an arbitrary scale matrix can be
        # transformed to a scaled chi-squared distribution.
        # For :math:`S \sim W_p(V,n)` and :math:`\lambda \in \mathbb{R}^p` we have
        # :math:`\lambda' S \lambda \sim \lambda' V \lambda \times \chi^2(n)`
        np.random.seed(482974)

        sn = 500
        df = 10
        dim = 4
        # Construct an arbitrary positive definite matrix
        scale = np.diag(np.arange(4)+1)
        scale[np.tril_indices(4, k=-1)] = np.arange(6)
        scale = np.dot(scale.T, scale)
        # Use :math:`\lambda = [1, \dots, 1]'`
        lamda = np.ones((dim,1))
        sigma_lamda = lamda.T.dot(scale).dot(lamda).squeeze()
        w = wishart(df, sigma_lamda)
        c = chi2(df, scale=sigma_lamda)

        # Statistics
        assert_allclose(w.var(), c.var())
        assert_allclose(w.mean(), c.mean())
        assert_allclose(w.entropy(), c.entropy())

        # PDF
        X = np.linspace(0.1,10,num=10)
        assert_allclose(w.pdf(X), c.pdf(X))

        # rvs
        rvs = w.rvs(size=sn)
        args = (df,0,sigma_lamda)
        alpha = 0.01
        check_distribution_rvs('chi2', args, alpha, rvs) 
Example 8
Project: vnpy_crypto   Author: birforce   File: var_model.py    MIT License 5 votes vote down vote up
def test_whiteness(self, nlags=10, signif=0.05, adjusted=False):
        """
        Residual whiteness tests using Portmanteau test

        Parameters
        ----------
        nlags : int > 0
        signif : float, between 0 and 1
        adjusted : bool, default False

        Returns
        -------
        results : WhitenessTestResults

        Notes
        -----
        Test the whiteness of the residuals using the Portmanteau test as
        described in [1]_, chapter 4.4.3.

        References
        ----------
        .. [1] L├╝tkepohl, H. 2005. *New Introduction to Multiple Time Series Analysis*. Springer.
        """
        statistic = 0
        u = np.asarray(self.resid)
        acov_list = _compute_acov(u, nlags)
        cov0_inv = scipy.linalg.inv(acov_list[0])
        for t in range(1, nlags+1):
            ct = acov_list[t]
            to_add = np.trace(chain_dot(ct.T, cov0_inv, ct, cov0_inv))
            if adjusted:
                to_add /= (self.nobs - t)
            statistic += to_add
        statistic *= self.nobs**2 if adjusted else self.nobs
        df = self.neqs**2 * (nlags - self.k_ar)
        dist = stats.chi2(df)
        pvalue = dist.sf(statistic)
        crit_value = dist.ppf(1 - signif)

        return WhitenessTestResults(statistic, crit_value, pvalue, df, signif,
                                    nlags, adjusted) 
Example 9
Project: vnpy_crypto   Author: birforce   File: contrast.py    MIT License 5 votes vote down vote up
def __init__(self, statistic, distribution, dist_args, table=None,
                 pvalues=None):
        self.table = table

        self.distribution = distribution
        self.statistic = statistic
        #self.sd = sd
        self.dist_args = dist_args

        # The following is because I don't know which we want
        if table is not None:
            self.statistic = table['statistic'].values
            self.pvalues = table['pvalue'].values
            self.df_constraints = table['df_constraint'].values
            if self.distribution == 'F':
                self.df_denom = table['df_denom'].values

        else:
            if self.distribution is 'chi2':
                self.dist = stats.chi2
                self.df_constraints = self.dist_args[0]  # assumes tuple
                # using dist_args[0] is a bit dangerous,
            elif self.distribution is 'F':
                self.dist = stats.f
                self.df_constraints, self.df_denom = self.dist_args

            else:
                raise ValueError('only F and chi2 are possible distribution')

            if pvalues is None:
                self.pvalues = self.dist.sf(np.abs(statistic), *dist_args)
            else:
                self.pvalues = pvalues 
Example 10
Project: vnpy_crypto   Author: birforce   File: ex_transf2.py    MIT License 5 votes vote down vote up
def __init__(self):
        self.dist = squarenormalg
        self.trargs = ()
        self.trkwds = dict(loc=-10, scale=20)
        self.statsdist = stats.chi2
        self.stargs = (1,)
        self.stkwds = dict(loc=-10, scale=20) 
Example 11
Project: vnpy_crypto   Author: birforce   File: ex_transf2.py    MIT License 5 votes vote down vote up
def test_squared_normal_chi2():
    #'\nsquare of standard normal random variable is chisquare with dof=1 distributed'
    cdftr = squarenormalg.cdf(xx,loc=l, scale=s)
    sfctr = 1-squarenormalg.sf(xx,loc=l, scale=s) #sf complement
    cdfst = stats.chi2.cdf(xx,1)
    assert_almost_equal(cdfst, cdftr, 14)
    assert_almost_equal(cdfst, sfctr, 14)

#    print('sqnorm  pdf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), squarenormalg.pdf(xx,loc=l, scale=s)
#    print('chi2    pdf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), stats.chi2.pdf(xx,1)
#    print('sqnorm  ppf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), squarenormalg.ppf(ppfq,loc=l, scale=s)
#    print('chi2    ppf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), stats.chi2.ppf(ppfq,1)
#    print('sqnorm  cdf with loc scale', squarenormalg.cdf(xx,loc=-10, scale=20)
#    print('chi2    cdf with loc scale', stats.chi2.cdf(xx,1,loc=-10, scale=20) 
Example 12
Project: ble5-nrf52-mac   Author: tomasero   File: test_multivariate.py    MIT License 5 votes vote down vote up
def test_1D_is_chisquared(self):
        # The 1-dimensional Wishart with an identity scale matrix is just a
        # chi-squared distribution.
        # Test variance, mean, entropy, pdf
        # Kolgomorov-Smirnov test for rvs
        np.random.seed(482974)

        sn = 500
        dim = 1
        scale = np.eye(dim)

        df_range = np.arange(1, 10, 2, dtype=float)
        X = np.linspace(0.1,10,num=10)
        for df in df_range:
            w = wishart(df, scale)
            c = chi2(df)

            # Statistics
            assert_allclose(w.var(), c.var())
            assert_allclose(w.mean(), c.mean())
            assert_allclose(w.entropy(), c.entropy())

            # PDF
            assert_allclose(w.pdf(X), c.pdf(X))

            # rvs
            rvs = w.rvs(size=sn)
            args = (df,)
            alpha = 0.01
            check_distribution_rvs('chi2', args, alpha, rvs) 
Example 13
Project: ble5-nrf52-mac   Author: tomasero   File: test_multivariate.py    MIT License 5 votes vote down vote up
def test_is_scaled_chisquared(self):
        # The 2-dimensional Wishart with an arbitrary scale matrix can be
        # transformed to a scaled chi-squared distribution.
        # For :math:`S \sim W_p(V,n)` and :math:`\lambda \in \mathbb{R}^p` we have
        # :math:`\lambda' S \lambda \sim \lambda' V \lambda \times \chi^2(n)`
        np.random.seed(482974)

        sn = 500
        df = 10
        dim = 4
        # Construct an arbitrary positive definite matrix
        scale = np.diag(np.arange(4)+1)
        scale[np.tril_indices(4, k=-1)] = np.arange(6)
        scale = np.dot(scale.T, scale)
        # Use :math:`\lambda = [1, \dots, 1]'`
        lamda = np.ones((dim,1))
        sigma_lamda = lamda.T.dot(scale).dot(lamda).squeeze()
        w = wishart(df, sigma_lamda)
        c = chi2(df, scale=sigma_lamda)

        # Statistics
        assert_allclose(w.var(), c.var())
        assert_allclose(w.mean(), c.mean())
        assert_allclose(w.entropy(), c.entropy())

        # PDF
        X = np.linspace(0.1,10,num=10)
        assert_allclose(w.pdf(X), c.pdf(X))

        # rvs
        rvs = w.rvs(size=sn)
        args = (df,0,sigma_lamda)
        alpha = 0.01
        check_distribution_rvs('chi2', args, alpha, rvs) 
Example 14
Project: poker   Author: surgebiswas   File: test_multivariate.py    MIT License 5 votes vote down vote up
def test_1D_is_chisquared(self):
        # The 1-dimensional Wishart with an identity scale matrix is just a
        # chi-squared distribution.
        # Test variance, mean, entropy, pdf
        # Kolgomorov-Smirnov test for rvs
        np.random.seed(482974)

        sn = 500
        dim = 1
        scale = np.eye(dim)

        df_range = np.arange(1, 10, 2, dtype=float)
        X = np.linspace(0.1,10,num=10)
        for df in df_range:
            w = wishart(df, scale)
            c = chi2(df)

            # Statistics
            assert_allclose(w.var(), c.var())
            assert_allclose(w.mean(), c.mean())
            assert_allclose(w.entropy(), c.entropy())

            # PDF
            assert_allclose(w.pdf(X), c.pdf(X))

            # rvs
            rvs = w.rvs(size=sn)
            args = (df,)
            alpha = 0.01
            check_distribution_rvs('chi2', args, alpha, rvs) 
Example 15
Project: poker   Author: surgebiswas   File: test_multivariate.py    MIT License 5 votes vote down vote up
def test_is_scaled_chisquared(self):
        # The 2-dimensional Wishart with an arbitrary scale matrix can be
        # transformed to a scaled chi-squared distribution.
        # For :math:`S \sim W_p(V,n)` and :math:`\lambda \in \mathbb{R}^p` we have
        # :math:`\lambda' S \lambda \sim \lambda' V \lambda \times \chi^2(n)`
        np.random.seed(482974)

        sn = 500
        df = 10
        dim = 4
        # Construct an arbitrary positive definite matrix
        scale = np.diag(np.arange(4)+1)
        scale[np.tril_indices(4, k=-1)] = np.arange(6)
        scale = np.dot(scale.T, scale)
        # Use :math:`\lambda = [1, \dots, 1]'`
        lamda = np.ones((dim,1))
        sigma_lamda = lamda.T.dot(scale).dot(lamda).squeeze()
        w = wishart(df, sigma_lamda)
        c = chi2(df, scale=sigma_lamda)

        # Statistics
        assert_allclose(w.var(), c.var())
        assert_allclose(w.mean(), c.mean())
        assert_allclose(w.entropy(), c.entropy())

        # PDF
        X = np.linspace(0.1,10,num=10)
        assert_allclose(w.pdf(X), c.pdf(X))

        # rvs
        rvs = w.rvs(size=sn)
        args = (df,0,sigma_lamda)
        alpha = 0.01
        check_distribution_rvs('chi2', args, alpha, rvs) 
Example 16
Project: distcan   Author: sglyon   File: univariate.py    MIT License 5 votes vote down vote up
def __init__(self, k):
        self.k = k

        # set dist before calling super's __init__
        self.dist = st.chi2(df=k)
        super(Chisq, self).__init__() 
Example 17
Project: P3_image_processing   Author: latedude2   File: test_multivariate.py    MIT License 5 votes vote down vote up
def test_1D_is_chisquared(self):
        # The 1-dimensional Wishart with an identity scale matrix is just a
        # chi-squared distribution.
        # Test variance, mean, entropy, pdf
        # Kolgomorov-Smirnov test for rvs
        np.random.seed(482974)

        sn = 500
        dim = 1
        scale = np.eye(dim)

        df_range = np.arange(1, 10, 2, dtype=float)
        X = np.linspace(0.1,10,num=10)
        for df in df_range:
            w = wishart(df, scale)
            c = chi2(df)

            # Statistics
            assert_allclose(w.var(), c.var())
            assert_allclose(w.mean(), c.mean())
            assert_allclose(w.entropy(), c.entropy())

            # PDF
            assert_allclose(w.pdf(X), c.pdf(X))

            # rvs
            rvs = w.rvs(size=sn)
            args = (df,)
            alpha = 0.01
            check_distribution_rvs('chi2', args, alpha, rvs) 
Example 18
Project: P3_image_processing   Author: latedude2   File: test_multivariate.py    MIT License 5 votes vote down vote up
def test_is_scaled_chisquared(self):
        # The 2-dimensional Wishart with an arbitrary scale matrix can be
        # transformed to a scaled chi-squared distribution.
        # For :math:`S \sim W_p(V,n)` and :math:`\lambda \in \mathbb{R}^p` we have
        # :math:`\lambda' S \lambda \sim \lambda' V \lambda \times \chi^2(n)`
        np.random.seed(482974)

        sn = 500
        df = 10
        dim = 4
        # Construct an arbitrary positive definite matrix
        scale = np.diag(np.arange(4)+1)
        scale[np.tril_indices(4, k=-1)] = np.arange(6)
        scale = np.dot(scale.T, scale)
        # Use :math:`\lambda = [1, \dots, 1]'`
        lamda = np.ones((dim,1))
        sigma_lamda = lamda.T.dot(scale).dot(lamda).squeeze()
        w = wishart(df, sigma_lamda)
        c = chi2(df, scale=sigma_lamda)

        # Statistics
        assert_allclose(w.var(), c.var())
        assert_allclose(w.mean(), c.mean())
        assert_allclose(w.entropy(), c.entropy())

        # PDF
        X = np.linspace(0.1,10,num=10)
        assert_allclose(w.pdf(X), c.pdf(X))

        # rvs
        rvs = w.rvs(size=sn)
        args = (df,0,sigma_lamda)
        alpha = 0.01
        check_distribution_rvs('chi2', args, alpha, rvs) 
Example 19
Project: chainer   Author: chainer   File: test_chisquare.py    MIT License 5 votes vote down vote up
def setUp_configure(self):
        from scipy import stats
        self.dist = distributions.Chisquare
        self.scipy_dist = stats.chi2

        self.test_targets = set([
            'batch_shape', 'entropy', 'event_shape', 'log_prob', 'mean',
            'sample', 'support', 'variance'])

        k = numpy.random.randint(1, 10, self.shape).astype(numpy.float32)
        self.params = {'k': k}
        self.scipy_params = {'df': k}

        self.support = 'positive' 
Example 20
Project: GraphicDesignPatternByPython   Author: Relph1119   File: test_multivariate.py    MIT License 5 votes vote down vote up
def test_1D_is_chisquared(self):
        # The 1-dimensional Wishart with an identity scale matrix is just a
        # chi-squared distribution.
        # Test variance, mean, entropy, pdf
        # Kolgomorov-Smirnov test for rvs
        np.random.seed(482974)

        sn = 500
        dim = 1
        scale = np.eye(dim)

        df_range = np.arange(1, 10, 2, dtype=float)
        X = np.linspace(0.1,10,num=10)
        for df in df_range:
            w = wishart(df, scale)
            c = chi2(df)

            # Statistics
            assert_allclose(w.var(), c.var())
            assert_allclose(w.mean(), c.mean())
            assert_allclose(w.entropy(), c.entropy())

            # PDF
            assert_allclose(w.pdf(X), c.pdf(X))

            # rvs
            rvs = w.rvs(size=sn)
            args = (df,)
            alpha = 0.01
            check_distribution_rvs('chi2', args, alpha, rvs) 
Example 21
Project: GraphicDesignPatternByPython   Author: Relph1119   File: test_multivariate.py    MIT License 5 votes vote down vote up
def test_is_scaled_chisquared(self):
        # The 2-dimensional Wishart with an arbitrary scale matrix can be
        # transformed to a scaled chi-squared distribution.
        # For :math:`S \sim W_p(V,n)` and :math:`\lambda \in \mathbb{R}^p` we have
        # :math:`\lambda' S \lambda \sim \lambda' V \lambda \times \chi^2(n)`
        np.random.seed(482974)

        sn = 500
        df = 10
        dim = 4
        # Construct an arbitrary positive definite matrix
        scale = np.diag(np.arange(4)+1)
        scale[np.tril_indices(4, k=-1)] = np.arange(6)
        scale = np.dot(scale.T, scale)
        # Use :math:`\lambda = [1, \dots, 1]'`
        lamda = np.ones((dim,1))
        sigma_lamda = lamda.T.dot(scale).dot(lamda).squeeze()
        w = wishart(df, sigma_lamda)
        c = chi2(df, scale=sigma_lamda)

        # Statistics
        assert_allclose(w.var(), c.var())
        assert_allclose(w.mean(), c.mean())
        assert_allclose(w.entropy(), c.entropy())

        # PDF
        X = np.linspace(0.1,10,num=10)
        assert_allclose(w.pdf(X), c.pdf(X))

        # rvs
        rvs = w.rvs(size=sn)
        args = (df,0,sigma_lamda)
        alpha = 0.01
        check_distribution_rvs('chi2', args, alpha, rvs) 
Example 22
Project: PsrSigSim   Author: PsrSigSim   File: receiver.py    MIT License 5 votes vote down vote up
def _make_pow_noise(self, signal, Tsys, gain, pulsar):
        """radiometer noise for power signals
        """
        print(Tsys, gain)
        if signal.subint:
            # number of bins per subint
            nbins = signal.nsamp/signal.nsub
            dt = signal.sublen / nbins # bins per subint; s
        else:
            nbins = signal.samprate
            dt = 1 / nbins
        bw_per_chan = signal.bw / signal.Nchan
        print(dt, bw_per_chan)

        # noise variance
        sigS = Tsys / gain / np.sqrt(dt * bw_per_chan)
        print(sigS)

        df = signal.Nfold if signal.subint else 1
        distr = stats.chi2(df)
        print(df)

        # scaling factor due to profile normalization (see Lam et al. 2018a)
        U_scale = 1.0 / (np.sum(pulsar.Profiles())/nbins)
        print(U_scale)

        norm = (sigS * signal._draw_norm / signal._Smax).decompose() * U_scale
        print(norm)
        noise = norm * distr.rvs(size=signal.data.shape)

        return noise.value  # drop units! 
Example 23
Project: Effective-Quadratures   Author: Effective-Quadratures   File: chisquared.py    GNU Lesser General Public License v2.1 5 votes vote down vote up
def __init__(self, dofs):
        self.dofs = dofs
        if self.dofs is not None:
            if self.dofs == 1:
                self.bounds = np.array([1e-15, np.inf])
            else:
                self.bounds = np.array([0.0, np.inf])
            if self.dofs >= 1:
                self.mean = float(self.dofs)
                self.variance = 2 * self.mean
                self.skewness = np.sqrt(8.0 / self.mean)
                self.kurtosis = 12.0/self.mean + 3.0
                self.x_range_for_pdf = np.linspace(0.0, 10.0*self.mean,RECURRENCE_PDF_SAMPLES)
                self.parent = chi2(self.dofs) 
Example 24
Project: wine-ml-on-aws-lambda   Author: pierreant   File: robust_covariance.py    Apache License 2.0 5 votes vote down vote up
def correct_covariance(self, data):
        """Apply a correction to raw Minimum Covariance Determinant estimates.

        Correction using the empirical correction factor suggested
        by Rousseeuw and Van Driessen in [RVD]_.

        Parameters
        ----------
        data : array-like, shape (n_samples, n_features)
            The data matrix, with p features and n samples.
            The data set must be the one which was used to compute
            the raw estimates.

        References
        ----------

        .. [RVD] `A Fast Algorithm for the Minimum Covariance
            Determinant Estimator, 1999, American Statistical Association
            and the American Society for Quality, TECHNOMETRICS`

        Returns
        -------
        covariance_corrected : array-like, shape (n_features, n_features)
            Corrected robust covariance estimate.

        """
        correction = np.median(self.dist_) / chi2(data.shape[1]).isf(0.5)
        covariance_corrected = self.raw_covariance_ * correction
        self.dist_ /= correction
        return covariance_corrected 
Example 25
Project: wine-ml-on-aws-lambda   Author: pierreant   File: test_multivariate.py    Apache License 2.0 5 votes vote down vote up
def test_1D_is_chisquared(self):
        # The 1-dimensional Wishart with an identity scale matrix is just a
        # chi-squared distribution.
        # Test variance, mean, entropy, pdf
        # Kolgomorov-Smirnov test for rvs
        np.random.seed(482974)

        sn = 500
        dim = 1
        scale = np.eye(dim)

        df_range = np.arange(1, 10, 2, dtype=float)
        X = np.linspace(0.1,10,num=10)
        for df in df_range:
            w = wishart(df, scale)
            c = chi2(df)

            # Statistics
            assert_allclose(w.var(), c.var())
            assert_allclose(w.mean(), c.mean())
            assert_allclose(w.entropy(), c.entropy())

            # PDF
            assert_allclose(w.pdf(X), c.pdf(X))

            # rvs
            rvs = w.rvs(size=sn)
            args = (df,)
            alpha = 0.01
            check_distribution_rvs('chi2', args, alpha, rvs) 
Example 26
Project: wine-ml-on-aws-lambda   Author: pierreant   File: test_multivariate.py    Apache License 2.0 5 votes vote down vote up
def test_is_scaled_chisquared(self):
        # The 2-dimensional Wishart with an arbitrary scale matrix can be
        # transformed to a scaled chi-squared distribution.
        # For :math:`S \sim W_p(V,n)` and :math:`\lambda \in \mathbb{R}^p` we have
        # :math:`\lambda' S \lambda \sim \lambda' V \lambda \times \chi^2(n)`
        np.random.seed(482974)

        sn = 500
        df = 10
        dim = 4
        # Construct an arbitrary positive definite matrix
        scale = np.diag(np.arange(4)+1)
        scale[np.tril_indices(4, k=-1)] = np.arange(6)
        scale = np.dot(scale.T, scale)
        # Use :math:`\lambda = [1, \dots, 1]'`
        lamda = np.ones((dim,1))
        sigma_lamda = lamda.T.dot(scale).dot(lamda).squeeze()
        w = wishart(df, sigma_lamda)
        c = chi2(df, scale=sigma_lamda)

        # Statistics
        assert_allclose(w.var(), c.var())
        assert_allclose(w.mean(), c.mean())
        assert_allclose(w.entropy(), c.entropy())

        # PDF
        X = np.linspace(0.1,10,num=10)
        assert_allclose(w.pdf(X), c.pdf(X))

        # rvs
        rvs = w.rvs(size=sn)
        args = (df,0,sigma_lamda)
        alpha = 0.01
        check_distribution_rvs('chi2', args, alpha, rvs) 
Example 27
Project: focus   Author: bogdanlab   File: train.py    GNU General Public License v3.0 5 votes vote down vote up
def _lrt_pvalue(logl_H0, logl_H1, dof=1):
    from scipy.stats import chi2
    from numpy import clip, inf

    epsilon = np.finfo(float).tiny

    lrs = clip(2 * (logl_H1 - logl_H0), epsilon, inf)
    pv = chi2(df=dof).sf(lrs)
    return clip(pv, epsilon, 1 - epsilon) 
Example 28
Project: Splunking-Crime   Author: nccgroup   File: robust_covariance.py    GNU Affero General Public License v3.0 5 votes vote down vote up
def correct_covariance(self, data):
        """Apply a correction to raw Minimum Covariance Determinant estimates.

        Correction using the empirical correction factor suggested
        by Rousseeuw and Van Driessen in [RVD]_.

        Parameters
        ----------
        data : array-like, shape (n_samples, n_features)
            The data matrix, with p features and n samples.
            The data set must be the one which was used to compute
            the raw estimates.

        References
        ----------

        .. [RVD] `A Fast Algorithm for the Minimum Covariance
            Determinant Estimator, 1999, American Statistical Association
            and the American Society for Quality, TECHNOMETRICS`

        Returns
        -------
        covariance_corrected : array-like, shape (n_features, n_features)
            Corrected robust covariance estimate.

        """
        correction = np.median(self.dist_) / chi2(data.shape[1]).isf(0.5)
        covariance_corrected = self.raw_covariance_ * correction
        self.dist_ /= correction
        return covariance_corrected 
Example 29
Project: Splunking-Crime   Author: nccgroup   File: contrast.py    GNU Affero General Public License v3.0 5 votes vote down vote up
def __init__(self, t=None, F=None, sd=None, effect=None, df_denom=None,
                 df_num=None, alpha=0.05, **kwds):

        self.effect = effect  # Let it be None for F
        if F is not None:
            self.distribution = 'F'
            self.fvalue = F
            self.statistic = self.fvalue
            self.df_denom = df_denom
            self.df_num = df_num
            self.dist = fdist
            self.dist_args = (df_num, df_denom)
            self.pvalue = fdist.sf(F, df_num, df_denom)
        elif t is not None:
            self.distribution = 't'
            self.tvalue = t
            self.statistic = t  # generic alias
            self.sd = sd
            self.df_denom = df_denom
            self.dist = student_t
            self.dist_args = (df_denom,)
            self.pvalue = self.dist.sf(np.abs(t), df_denom) * 2
        elif 'statistic' in kwds:
            # TODO: currently targeted to normal distribution, and chi2
            self.distribution = kwds['distribution']
            self.statistic = kwds['statistic']
            self.tvalue = value = kwds['statistic']  # keep alias
            # TODO: for results instance we decided to use tvalues also for normal
            self.sd = sd
            self.dist = getattr(stats, self.distribution)
            self.dist_args = ()
            if self.distribution is 'chi2':
                self.pvalue = self.dist.sf(self.statistic, df_denom)
            else:
                "normal"
                self.pvalue = self.dist.sf(np.abs(value)) * 2

        # cleanup
        # should we return python scalar?
        self.pvalue = np.squeeze(self.pvalue) 
Example 30
Project: Splunking-Crime   Author: nccgroup   File: contrast.py    GNU Affero General Public License v3.0 5 votes vote down vote up
def __init__(self, statistic, distribution, dist_args, table=None,
                 pvalues=None):
        self.table = table

        self.distribution = distribution
        self.statistic = statistic
        #self.sd = sd
        self.dist_args = dist_args

        # The following is because I don't know which we want
        if table is not None:
            self.statistic = table['statistic'].values
            self.pvalues = table['pvalue'].values
            self.df_constraints = table['df_constraint'].values
            if self.distribution == 'F':
                self.df_denom = table['df_denom'].values

        else:
            if self.distribution is 'chi2':
                self.dist = stats.chi2
                self.df_constraints = self.dist_args[0]  # assumes tuple
                # using dist_args[0] is a bit dangerous,
            elif self.distribution is 'F':
                self.dist = stats.f
                self.df_constraints, self.df_denom = self.dist_args

            else:
                raise ValueError('only F and chi2 are possible distribution')

            if pvalues is None:
                self.pvalues = self.dist.sf(np.abs(statistic), *dist_args)
            else:
                self.pvalues = pvalues 
Example 31
Project: Splunking-Crime   Author: nccgroup   File: ex_transf2.py    GNU Affero General Public License v3.0 5 votes vote down vote up
def __init__(self):
        self.dist = squarenormalg
        self.trargs = ()
        self.trkwds = dict(loc=-10, scale=20)
        self.statsdist = stats.chi2
        self.stargs = (1,)
        self.stkwds = dict(loc=-10, scale=20) 
Example 32
Project: Splunking-Crime   Author: nccgroup   File: ex_transf2.py    GNU Affero General Public License v3.0 5 votes vote down vote up
def test_squared_normal_chi2():
    #'\nsquare of standard normal random variable is chisquare with dof=1 distributed'
    cdftr = squarenormalg.cdf(xx,loc=l, scale=s)
    sfctr = 1-squarenormalg.sf(xx,loc=l, scale=s) #sf complement
    cdfst = stats.chi2.cdf(xx,1)
    assert_almost_equal(cdfst, cdftr, 14)
    assert_almost_equal(cdfst, sfctr, 14)

#    print('sqnorm  pdf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), squarenormalg.pdf(xx,loc=l, scale=s)
#    print('chi2    pdf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), stats.chi2.pdf(xx,1)
#    print('sqnorm  ppf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), squarenormalg.ppf(ppfq,loc=l, scale=s)
#    print('chi2    ppf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), stats.chi2.ppf(ppfq,1)
#    print('sqnorm  cdf with loc scale', squarenormalg.cdf(xx,loc=-10, scale=20)
#    print('chi2    cdf with loc scale', stats.chi2.cdf(xx,1,loc=-10, scale=20) 
Example 33
Project: vnpy_crypto   Author: birforce   File: var_model.py    MIT License 4 votes vote down vote up
def test_normality(results, signif=0.05):
    """
    Test assumption of normal-distributed errors using Jarque-Bera-style
    omnibus Chi^2 test

    Parameters
    ----------
    results : VARResults or statsmodels.tsa.vecm.vecm.VECMResults
    signif : float
        The test's significance level.

    Notes
    -----
    H0 (null) : data are generated by a Gaussian-distributed process

    Returns
    -------
    result : NormalityTestResults

    References
    ----------
    .. [1] L├╝tkepohl, H. 2005. *New Introduction to Multiple Time Series Analysis*. Springer.

    .. [2] Kilian, L. & Demiroglu, U. (2000). "Residual-Based Tests for
       Normality in Autoregressions: Asymptotic Theory and Simulation
       Evidence." Journal of Business & Economic Statistics
    """
    resid_c = results.resid - results.resid.mean(0)
    sig = np.dot(resid_c.T, resid_c) / results.nobs
    Pinv = np.linalg.inv(np.linalg.cholesky(sig))

    w = np.dot(Pinv, resid_c.T)
    b1 = (w**3).sum(1)[:, None] / results.nobs
    b2 = (w**4).sum(1)[:, None] / results.nobs - 3

    lam_skew = results.nobs * np.dot(b1.T, b1) / 6
    lam_kurt = results.nobs * np.dot(b2.T, b2) / 24

    lam_omni = float(lam_skew + lam_kurt)
    omni_dist = stats.chi2(results.neqs * 2)
    omni_pvalue = float(omni_dist.sf(lam_omni))
    crit_omni = float(omni_dist.ppf(1 - signif))

    return NormalityTestResults(lam_omni, crit_omni, omni_pvalue,
                                results.neqs*2, signif) 
Example 34
Project: vnpy_crypto   Author: birforce   File: contrast.py    MIT License 4 votes vote down vote up
def __init__(self, t=None, F=None, sd=None, effect=None, df_denom=None,
                 df_num=None, alpha=0.05, **kwds):

        self.effect = effect  # Let it be None for F
        if F is not None:
            self.distribution = 'F'
            self.fvalue = F
            self.statistic = self.fvalue
            self.df_denom = df_denom
            self.df_num = df_num
            self.dist = fdist
            self.dist_args = (df_num, df_denom)
            self.pvalue = fdist.sf(F, df_num, df_denom)
        elif t is not None:
            self.distribution = 't'
            self.tvalue = t
            self.statistic = t  # generic alias
            self.sd = sd
            self.df_denom = df_denom
            self.dist = student_t
            self.dist_args = (df_denom,)
            self.pvalue = self.dist.sf(np.abs(t), df_denom) * 2
        elif 'statistic' in kwds:
            # TODO: currently targeted to normal distribution, and chi2
            self.distribution = kwds['distribution']
            self.statistic = kwds['statistic']
            self.tvalue = value = kwds['statistic']  # keep alias
            # TODO: for results instance we decided to use tvalues also for normal
            self.sd = sd
            self.dist = getattr(stats, self.distribution)
            self.dist_args = kwds.get('dist_args', ())
            if self.distribution is 'chi2':
                self.pvalue = self.dist.sf(self.statistic, df_denom)
                self.df_denom = df_denom
            else:
                "normal"
                self.pvalue = np.full_like(value, np.nan)
                not_nan = ~np.isnan(value)
                self.pvalue[not_nan] = self.dist.sf(np.abs(value[not_nan])) * 2

        # cleanup
        # should we return python scalar?
        self.pvalue = np.squeeze(self.pvalue) 
Example 35
Project: vnpy_crypto   Author: birforce   File: contrast.py    MIT License 4 votes vote down vote up
def summary(self, xname=None, alpha=0.05, title=None):
        """Summarize the Results of the hypothesis test

        Parameters
        -----------

        xname : list of strings, optional
            Default is `c_##` for ## in p the number of regressors
        alpha : float
            significance level for the confidence intervals. Default is
            alpha = 0.05 which implies a confidence level of 95%.
        title : string, optional
            Title for the params table. If not None, then this replaces the
            default title

        Returns
        -------
        smry : string or Summary instance
            This contains a parameter results table in the case of t or z test
            in the same form as the parameter results table in the model
            results summary.
            For F or Wald test, the return is a string.

        """
        if self.effect is not None:
            # TODO: should also add some extra information, e.g. robust cov ?
            # TODO: can we infer names for constraints, xname in __init__ ?
            if title is None:
                title = 'Test for Constraints'
            elif title == '':
                # don't add any title,
                # I think SimpleTable skips on None - check
                title = None
            # we have everything for a params table
            use_t = (self.distribution == 't')
            yname='constraints' # Not used in params_frame
            if xname is None:
                xname = ['c%d'%ii for ii in range(len(self.effect))]
            from statsmodels.iolib.summary import summary_params
            pvalues = np.atleast_1d(self.pvalue)
            summ = summary_params((self, self.effect, self.sd, self.statistic,
                                   pvalues, self.conf_int(alpha)),
                                  yname=yname, xname=xname, use_t=use_t,
                                  title=title, alpha=alpha)
            return summ
        elif hasattr(self, 'fvalue'):
            # TODO: create something nicer for these casee
            return '<F test: F=%s, p=%s, df_denom=%d, df_num=%d>' % \
                   (repr(self.fvalue), self.pvalue, self.df_denom, self.df_num)
        elif self.distribution == 'chi2':
            return '<Wald test (%s): statistic=%s, p-value=%s, df_denom=%d>' % \
                   (self.distribution, self.statistic, self.pvalue, self.df_denom)
        else:
            # generic
            return '<Wald test: statistic=%s, p-value=%s>' % \
                   (self.statistic, self.pvalue) 
Example 36
Project: vnpy_crypto   Author: birforce   File: mctools.py    MIT License 4 votes vote down vote up
def summary_quantiles(self, idx, distppf, frac=[0.01, 0.025, 0.05, 0.1, 0.975],
                          varnames=None, title=None):
        '''summary table for quantiles (critical values)

        Parameters
        ----------
        idx : None or list of integers
            List of indices into the Monte Carlo results (columns) that should
            be used in the calculation
        distppf : callable
            probability density function of reference distribution
            TODO: use `crit` values instead or additional, see summary_cdf
        frac : array_like, float
            probabilities for which
        varnames : None, or list of strings
            optional list of variable names, same length as idx

        Returns
        -------
        table : instance of SimpleTable
            use `print(table` to see results

        '''
        idx = np.atleast_1d(idx)  #assure iterable, use list ?

        quant, mcq = self.quantiles(idx, frac=frac)
        #not sure whether this will work with single quantile
        #crit = stats.chi2([2,4]).ppf(np.atleast_2d(quant).T)
        crit = distppf(np.atleast_2d(quant).T)
        mml=[]
        for i, ix in enumerate(idx):  #TODO: hardcoded 2 ?
            mml.extend([mcq[:,i], crit[:,i]])
        #mmlar = np.column_stack(mml)
        mmlar = np.column_stack([quant] + mml)
        #print(mmlar.shape
        if title:
            title = title +' Quantiles (critical values)'
        else:
            title='Quantiles (critical values)'
        #TODO use stub instead
        if varnames is None:
            varnames = ['var%d' % i for i in range(mmlar.shape[1]//2)]
        headers = ['\nprob'] + ['%s\n%s' % (i, t) for i in varnames for t in ['mc', 'dist']]
        return SimpleTable(mmlar,
                          txt_fmt={'data_fmts': ["%#6.3f"]+["%#10.4f"]*(mmlar.shape[1]-1)},
                          title=title,
                          headers=headers) 
Example 37
Project: MALAX   Author: omerwe   File: run_laplace.py    MIT License 4 votes vote down vote up
def perform_ewas_fixed_bb_lr(mcounts, counts, kernel, predictor, covars, verbose=False, out_file=None):
	K, yMat, rMat, covToTest, covars, snpNames = readMacauTest(mcounts, counts, kernel, predictor, covars, correctK=False)	
	covars_null = np.ones((yMat.shape[1],1))
	if (covars is not None): covars_null = np.concatenate((covars_null, covars), axis=1)
	covars_alt = np.concatenate((covars_null, np.row_stack(covToTest)), axis=1)	
	ewas_laplace = EWAS_Laplace()
	use_dispersion = True	
	chi2 = stats.chi2(1)
	U = np.eye(K.shape[0])
	s = np.ones(K.shape[0])
	beta0_null = np.zeros(covars_null.shape[1])
	beta0_alt = np.zeros(covars_alt.shape[1])
	
	#print header
	out_file_h = open(out_file, 'w')
	out_file_h.write('%s\t%s\t%s\t%s\n'%('index', 'id', 'test_stat', 'P-value'))
	
	test_stats = np.zeros(len(snpNames))
	t0_ewas = time.time()
	for i in xrange(len(snpNames)):
		snpName = snpNames[i]
		r_i = rMat[i,:]
		y_i = yMat[i,:]
		
		#exclude individuals with no data
		if (np.sum(r_i>0) < 3): continue			
		y_i = y_i[r_i>0]		
		covars_null_i = covars_null[r_i>0,:]
		covars_alt_i  = covars_alt[r_i>0,:]
		U_i  = U[r_i>0,:]
		r_i = r_i[r_i>0]	
		
		try:
			nll_null = optimize.minimize_scalar(nll_bb2, args=(covars_null_i, y_i, r_i), method='bounded', bounds=(0,1)).fun
			nll_alt  = optimize.minimize_scalar(nll_bb2, args=(covars_alt_i,  y_i, r_i), method='bounded', bounds=(0,1)).fun
			test_stats[i] = 2*(nll_null - nll_alt)
		except:
			test_stats[i] = np.nan
		
		out_file_h.write('%d\t%s\t%0.4f\t%0.5e\n'%(i+1, snpName, test_stats[i], chi2.sf(test_stats[i])))
		
	out_file_h.close()
	print
	print '#total EWAS time: %0.2f minutes'%((time.time()-t0_ewas) / 60.0) 
Example 38
Project: PsrSigSim   Author: PsrSigSim   File: pulsar.py    MIT License 4 votes vote down vote up
def _make_pow_pulses(self, signal):
        """generate a power pulse

        This method should be used for filter bank pulses

        Parameters
        ----------

        signal : :class:`Signal`-like
            Signal object to store pulses.
        """
        if signal.subint:
            # Determine how many subints to make
            if signal.sublen is None:
                signal._sublen = signal.tobs
                signal._nsub = 1
            else:
                # This should be an integer, if not, will round
                signal._nsub = int(np.round(signal.tobs / signal.sublen))

            # determine the number of data samples necessary
            signal._nsamp = int((signal.nsub*(self.period*signal.samprate)).decompose())
            # Need to make an initial empty data array
            signal.init_data(signal.nsamp)

            # Tile the profiles to number of desired subints
            sngl_prof = np.tile(self.Profiles(), signal.nsub)

            # changed to number of subints
            signal._Nfold = (signal.sublen / self.period).decompose()
            distr = stats.chi2(df=signal.Nfold)
            signal._set_draw_norm(df=signal.Nfold)

            #Why is there a second call to init_data?
            signal.init_data(sngl_prof.shape[1])
            signal._data = (sngl_prof * distr.rvs(size=signal.data.shape)
                            * signal._draw_norm)
        else:
            # generate several pulses in time
            distr = stats.chi2(df=1)
            signal._set_draw_norm(df=1)

            signal._nsamp = int((signal.tobs * signal.samprate).decompose())
            signal.init_data(signal.nsamp)

            # TODO break into blocks
            # TODO phase from .par file
            # calc profiles at phases
            phs = (np.arange(signal.nsamp) /
                   (signal.samprate * self.period).decompose().value)
            phs %= 1  # clip integer part
            full_prof = self.Profiles.calc_profiles(phs,signal.Nchan)

            signal._data = (full_prof * distr.rvs(size=signal.data.shape)
                            * signal._draw_norm) 
Example 39
Project: linear_neuron   Author: uglyboxer   File: robust_covariance.py    MIT License 4 votes vote down vote up
def reweight_covariance(self, data):
        """Re-weight raw Minimum Covariance Determinant estimates.

        Re-weight observations using Rousseeuw's method (equivalent to
        deleting outlying observations from the data set before
        computing location and covariance estimates). [Rouseeuw1984]_

        Parameters
        ----------
        data : array-like, shape (n_samples, n_features)
            The data matrix, with p features and n samples.
            The data set must be the one which was used to compute
            the raw estimates.

        Returns
        -------
        location_reweighted : array-like, shape (n_features, )
            Re-weighted robust location estimate.

        covariance_reweighted : array-like, shape (n_features, n_features)
            Re-weighted robust covariance estimate.

        support_reweighted : array-like, type boolean, shape (n_samples,)
            A mask of the observations that have been used to compute
            the re-weighted robust location and covariance estimates.

        """
        n_samples, n_features = data.shape
        mask = self.dist_ < chi2(n_features).isf(0.025)
        if self.assume_centered:
            location_reweighted = np.zeros(n_features)
        else:
            location_reweighted = data[mask].mean(0)
        covariance_reweighted = self._nonrobust_covariance(
            data[mask], assume_centered=self.assume_centered)
        support_reweighted = np.zeros(n_samples, dtype=bool)
        support_reweighted[mask] = True
        self._set_covariance(covariance_reweighted)
        self.location_ = location_reweighted
        self.support_ = support_reweighted
        X_centered = data - self.location_
        self.dist_ = np.sum(
            np.dot(X_centered, self.get_precision()) * X_centered, 1)
        return location_reweighted, covariance_reweighted, support_reweighted 
Example 40
Project: Weiss   Author: WangWenjun559   File: robust_covariance.py    Apache License 2.0 4 votes vote down vote up
def reweight_covariance(self, data):
        """Re-weight raw Minimum Covariance Determinant estimates.

        Re-weight observations using Rousseeuw's method (equivalent to
        deleting outlying observations from the data set before
        computing location and covariance estimates). [Rouseeuw1984]_

        Parameters
        ----------
        data : array-like, shape (n_samples, n_features)
            The data matrix, with p features and n samples.
            The data set must be the one which was used to compute
            the raw estimates.

        Returns
        -------
        location_reweighted : array-like, shape (n_features, )
            Re-weighted robust location estimate.

        covariance_reweighted : array-like, shape (n_features, n_features)
            Re-weighted robust covariance estimate.

        support_reweighted : array-like, type boolean, shape (n_samples,)
            A mask of the observations that have been used to compute
            the re-weighted robust location and covariance estimates.

        """
        n_samples, n_features = data.shape
        mask = self.dist_ < chi2(n_features).isf(0.025)
        if self.assume_centered:
            location_reweighted = np.zeros(n_features)
        else:
            location_reweighted = data[mask].mean(0)
        covariance_reweighted = self._nonrobust_covariance(
            data[mask], assume_centered=self.assume_centered)
        support_reweighted = np.zeros(n_samples, dtype=bool)
        support_reweighted[mask] = True
        self._set_covariance(covariance_reweighted)
        self.location_ = location_reweighted
        self.support_ = support_reweighted
        X_centered = data - self.location_
        self.dist_ = np.sum(
            np.dot(X_centered, self.get_precision()) * X_centered, 1)
        return location_reweighted, covariance_reweighted, support_reweighted 
Example 41
Project: wine-ml-on-aws-lambda   Author: pierreant   File: robust_covariance.py    Apache License 2.0 4 votes vote down vote up
def reweight_covariance(self, data):
        """Re-weight raw Minimum Covariance Determinant estimates.

        Re-weight observations using Rousseeuw's method (equivalent to
        deleting outlying observations from the data set before
        computing location and covariance estimates) described
        in [RVDriessen]_.

        Parameters
        ----------
        data : array-like, shape (n_samples, n_features)
            The data matrix, with p features and n samples.
            The data set must be the one which was used to compute
            the raw estimates.

        References
        ----------

        .. [RVDriessen] `A Fast Algorithm for the Minimum Covariance
            Determinant Estimator, 1999, American Statistical Association
            and the American Society for Quality, TECHNOMETRICS`

        Returns
        -------
        location_reweighted : array-like, shape (n_features, )
            Re-weighted robust location estimate.

        covariance_reweighted : array-like, shape (n_features, n_features)
            Re-weighted robust covariance estimate.

        support_reweighted : array-like, type boolean, shape (n_samples,)
            A mask of the observations that have been used to compute
            the re-weighted robust location and covariance estimates.

        """
        n_samples, n_features = data.shape
        mask = self.dist_ < chi2(n_features).isf(0.025)
        if self.assume_centered:
            location_reweighted = np.zeros(n_features)
        else:
            location_reweighted = data[mask].mean(0)
        covariance_reweighted = self._nonrobust_covariance(
            data[mask], assume_centered=self.assume_centered)
        support_reweighted = np.zeros(n_samples, dtype=bool)
        support_reweighted[mask] = True
        self._set_covariance(covariance_reweighted)
        self.location_ = location_reweighted
        self.support_ = support_reweighted
        X_centered = data - self.location_
        self.dist_ = np.sum(
            np.dot(X_centered, self.get_precision()) * X_centered, 1)
        return location_reweighted, covariance_reweighted, support_reweighted 
Example 42
Project: Splunking-Crime   Author: nccgroup   File: robust_covariance.py    GNU Affero General Public License v3.0 4 votes vote down vote up
def reweight_covariance(self, data):
        """Re-weight raw Minimum Covariance Determinant estimates.

        Re-weight observations using Rousseeuw's method (equivalent to
        deleting outlying observations from the data set before
        computing location and covariance estimates) described
        in [RVDriessen]_.

        Parameters
        ----------
        data : array-like, shape (n_samples, n_features)
            The data matrix, with p features and n samples.
            The data set must be the one which was used to compute
            the raw estimates.

        References
        ----------

        .. [RVDriessen] `A Fast Algorithm for the Minimum Covariance
            Determinant Estimator, 1999, American Statistical Association
            and the American Society for Quality, TECHNOMETRICS`

        Returns
        -------
        location_reweighted : array-like, shape (n_features, )
            Re-weighted robust location estimate.

        covariance_reweighted : array-like, shape (n_features, n_features)
            Re-weighted robust covariance estimate.

        support_reweighted : array-like, type boolean, shape (n_samples,)
            A mask of the observations that have been used to compute
            the re-weighted robust location and covariance estimates.

        """
        n_samples, n_features = data.shape
        mask = self.dist_ < chi2(n_features).isf(0.025)
        if self.assume_centered:
            location_reweighted = np.zeros(n_features)
        else:
            location_reweighted = data[mask].mean(0)
        covariance_reweighted = self._nonrobust_covariance(
            data[mask], assume_centered=self.assume_centered)
        support_reweighted = np.zeros(n_samples, dtype=bool)
        support_reweighted[mask] = True
        self._set_covariance(covariance_reweighted)
        self.location_ = location_reweighted
        self.support_ = support_reweighted
        X_centered = data - self.location_
        self.dist_ = np.sum(
            np.dot(X_centered, self.get_precision()) * X_centered, 1)
        return location_reweighted, covariance_reweighted, support_reweighted 
Example 43
Project: Splunking-Crime   Author: nccgroup   File: var_model.py    GNU Affero General Public License v3.0 4 votes vote down vote up
def test_normality(self, signif=0.05, verbose=True):
        """
        Test assumption of normal-distributed errors using Jarque-Bera-style
        omnibus Chi^2 test

        Parameters
        ----------
        signif : float
            Test significance threshold

        Notes
        -----
        H0 (null) : data are generated by a Gaussian-distributed process
        """
        Pinv = npl.inv(self._chol_sigma_u)

        w = np.array([np.dot(Pinv, u) for u in self.resid])

        b1 = (w ** 3).sum(0) / self.nobs
        lam_skew = self.nobs * np.dot(b1, b1) / 6

        b2 = (w ** 4).sum(0) / self.nobs - 3
        lam_kurt = self.nobs * np.dot(b2, b2) / 24

        lam_omni = lam_skew + lam_kurt

        omni_dist = stats.chi2(self.neqs * 2)
        omni_pvalue = omni_dist.sf(lam_omni)
        crit_omni = omni_dist.ppf(1 - signif)

        conclusion = 'fail to reject' if lam_omni < crit_omni else 'reject'

        results = {
            'statistic' : lam_omni,
            'crit_value' : crit_omni,
            'pvalue' : omni_pvalue,
            'df' : self.neqs * 2,
            'conclusion' : conclusion,
            'signif' :  signif
        }

        if verbose:
            summ = output.normality_summary(results)
            print(summ)

        return results 
Example 44
Project: Splunking-Crime   Author: nccgroup   File: mctools.py    GNU Affero General Public License v3.0 4 votes vote down vote up
def summary_quantiles(self, idx, distppf, frac=[0.01, 0.025, 0.05, 0.1, 0.975],
                          varnames=None, title=None):
        '''summary table for quantiles (critical values)

        Parameters
        ----------
        idx : None or list of integers
            List of indices into the Monte Carlo results (columns) that should
            be used in the calculation
        distppf : callable
            probability density function of reference distribution
            TODO: use `crit` values instead or additional, see summary_cdf
        frac : array_like, float
            probabilities for which
        varnames : None, or list of strings
            optional list of variable names, same length as idx

        Returns
        -------
        table : instance of SimpleTable
            use `print(table` to see results

        '''
        idx = np.atleast_1d(idx)  #assure iterable, use list ?

        quant, mcq = self.quantiles(idx, frac=frac)
        #not sure whether this will work with single quantile
        #crit = stats.chi2([2,4]).ppf(np.atleast_2d(quant).T)
        crit = distppf(np.atleast_2d(quant).T)
        mml=[]
        for i, ix in enumerate(idx):  #TODO: hardcoded 2 ?
            mml.extend([mcq[:,i], crit[:,i]])
        #mmlar = np.column_stack(mml)
        mmlar = np.column_stack([quant] + mml)
        #print(mmlar.shape
        if title:
            title = title +' Quantiles (critical values)'
        else:
            title='Quantiles (critical values)'
        #TODO use stub instead
        if varnames is None:
            varnames = ['var%d' % i for i in range(mmlar.shape[1]//2)]
        headers = ['\nprob'] + ['%s\n%s' % (i, t) for i in varnames for t in ['mc', 'dist']]
        return SimpleTable(mmlar,
                          txt_fmt={'data_fmts': ["%#6.3f"]+["%#10.4f"]*(mmlar.shape[1]-1)},
                          title=title,
                          headers=headers)