Python scipy.stats.mstats.mquantiles() Examples

The following are 18 code examples for showing how to use scipy.stats.mstats.mquantiles(). These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.

You may want to check out the right sidebar which shows the related API usage.

You may also want to check out all available functions/classes of the module scipy.stats.mstats , or try the search function .

Example 1
Project: Computable   Author: ktraunmueller   File: test_mstats_basic.py    License: MIT License 6 votes vote down vote up
def test_mquantiles_limit_keyword(self):
        """Ticket #867"""
        data = np.array([[6., 7., 1.],
                         [47., 15., 2.],
                         [49., 36., 3.],
                         [15., 39., 4.],
                         [42., 40., -999.],
                         [41., 41., -999.],
                         [7., -999., -999.],
                         [39., -999., -999.],
                         [43., -999., -999.],
                         [40., -999., -999.],
                         [36., -999., -999.]])
        desired = [[19.2, 14.6, 1.45],
                   [40.0, 37.5, 2.5],
                   [42.8, 40.05, 3.55]]
        quants = mstats.mquantiles(data, axis=0, limit=(0, 50))
        assert_almost_equal(quants, desired) 
Example 2
Project: GraphicDesignPatternByPython   Author: Relph1119   File: test_mstats_basic.py    License: MIT License 6 votes vote down vote up
def test_mquantiles_limit_keyword(self):
        # Regression test for Trac ticket #867
        data = np.array([[6., 7., 1.],
                         [47., 15., 2.],
                         [49., 36., 3.],
                         [15., 39., 4.],
                         [42., 40., -999.],
                         [41., 41., -999.],
                         [7., -999., -999.],
                         [39., -999., -999.],
                         [43., -999., -999.],
                         [40., -999., -999.],
                         [36., -999., -999.]])
        desired = [[19.2, 14.6, 1.45],
                   [40.0, 37.5, 2.5],
                   [42.8, 40.05, 3.55]]
        quants = mstats.mquantiles(data, axis=0, limit=(0, 50))
        assert_almost_equal(quants, desired) 
Example 3
Project: vnpy_crypto   Author: birforce   File: kernel_extras.py    License: MIT License 5 votes vote down vote up
def _compute_sig(self):
        Y = self.endog
        X = self.exog
        b = self.estimator(Y, X)
        m = self.fform(X, b)
        n = np.shape(X)[0]
        resid = Y - m
        resid = resid - np.mean(resid)  # center residuals
        self.test_stat = self._compute_test_stat(resid)
        sqrt5 = np.sqrt(5.)
        fct1 = (1 - sqrt5) / 2.
        fct2 = (1 + sqrt5) / 2.
        u1 = fct1 * resid
        u2 = fct2 * resid
        r = fct2 / sqrt5
        I_dist = np.empty((self.nboot,1))
        for j in range(self.nboot):
            u_boot = u2.copy()

            prob = np.random.uniform(0,1, size = (n,))
            ind = prob < r
            u_boot[ind] = u1[ind]
            Y_boot = m + u_boot
            b_hat = self.estimator(Y_boot, X)
            m_hat = self.fform(X, b_hat)
            u_boot_hat = Y_boot - m_hat
            I_dist[j] = self._compute_test_stat(u_boot_hat)

        self.boots_results = I_dist
        sig = "Not Significant"
        if self.test_stat > mquantiles(I_dist, 0.9):
            sig = "*"
        if self.test_stat > mquantiles(I_dist, 0.95):
            sig = "**"
        if self.test_stat > mquantiles(I_dist, 0.99):
            sig = "***"
        return sig 
Example 4
Project: vnpy_crypto   Author: birforce   File: kernel_regression.py    License: MIT License 5 votes vote down vote up
def _compute_sig(self):
        """
        Computes the significance value for the variable(s) tested.

        The empirical distribution of the test statistic is obtained through
        bootstrapping the sample.  The null hypothesis is rejected if the test
        statistic is larger than the 90, 95, 99 percentiles.
        """
        t_dist = np.empty(shape=(self.nboot, ))
        Y = self.endog
        X = copy.deepcopy(self.exog)
        n = np.shape(Y)[0]

        X[:, self.test_vars] = np.mean(X[:, self.test_vars], axis=0)
        # Calculate the restricted mean. See p. 372 in [8]
        M = KernelReg(Y, X, self.var_type, self.model.reg_type, self.bw,
                      defaults = EstimatorSettings(efficient=False)).fit()[0]
        M = np.reshape(M, (n, 1))
        e = Y - M
        e = e - np.mean(e)  # recenter residuals
        for i in range(self.nboot):
            ind = np.random.random_integers(0, n-1, size=(n,1))
            e_boot = e[ind, 0]
            Y_boot = M + e_boot
            t_dist[i] = self._compute_test_stat(Y_boot, self.exog)

        self.t_dist = t_dist
        sig = "Not Significant"
        if self.test_stat > mquantiles(t_dist, 0.9):
            sig = "*"
        if self.test_stat > mquantiles(t_dist, 0.95):
            sig = "**"
        if self.test_stat > mquantiles(t_dist, 0.99):
            sig = "***"

        return sig 
Example 5
Project: vnpy_crypto   Author: birforce   File: kernel_regression.py    License: MIT License 5 votes vote down vote up
def _compute_sig(self):
        """Calculates the significance level of the variable tested"""

        m = self._est_cond_mean()
        Y = self.endog
        X = self.exog
        n = np.shape(X)[0]
        u = Y - m
        u = u - np.mean(u)  # center
        fct1 = (1 - 5**0.5) / 2.
        fct2 = (1 + 5**0.5) / 2.
        u1 = fct1 * u
        u2 = fct2 * u
        r = fct2 / (5 ** 0.5)
        I_dist = np.empty((self.nboot,1))
        for j in range(self.nboot):
            u_boot = copy.deepcopy(u2)

            prob = np.random.uniform(0,1, size = (n,1))
            ind = prob < r
            u_boot[ind] = u1[ind]
            Y_boot = m + u_boot
            I_dist[j] = self._compute_test_stat(Y_boot, X)

        sig = "Not Significant"
        if self.test_stat > mquantiles(I_dist, 0.9):
            sig = "*"
        if self.test_stat > mquantiles(I_dist, 0.95):
            sig = "**"
        if self.test_stat > mquantiles(I_dist, 0.99):
            sig = "***"

        return sig 
Example 6
Project: vnpy_crypto   Author: birforce   File: _kernel_base.py    License: MIT License 5 votes vote down vote up
def _compute_min_std_IQR(data):
    """Compute minimum of std and IQR for each variable."""
    s1 = np.std(data, axis=0)
    q75 = mquantiles(data, 0.75, axis=0).data[0]
    q25 = mquantiles(data, 0.25, axis=0).data[0]
    s2 = (q75 - q25) / 1.349  # IQR
    dispersion = np.minimum(s1, s2)
    return dispersion 
Example 7
Project: arviz   Author: arviz-devs   File: stats_utils.py    License: Apache License 2.0 5 votes vote down vote up
def quantile(ary, q, axis=None, limit=None):
    """Use same quantile function as R (Type 7)."""
    if limit is None:
        limit = tuple()
    return mquantiles(ary, q, alphap=1, betap=1, axis=axis, limit=limit) 
Example 8
Project: plotnine   Author: has2k1   File: stat_qq_line.py    License: GNU General Public License v2.0 5 votes vote down vote up
def compute_group(cls, data, scales, **params):
        line_p = params['line_p']
        dparams = params['dparams']

        # Compute theoretical values
        df = stat_qq.compute_group(data, scales, **params)
        sample = df['sample'].values
        theoretical = df['theoretical'].values

        # Compute slope & intercept of the line through the quantiles
        cdist = get_continuous_distribution(params['distribution'])
        x_coords = cdist.ppf(line_p, *dparams)
        y_coords = mquantiles(sample, line_p)
        slope = (np.diff(y_coords)/np.diff(x_coords))[0]
        intercept = y_coords[0] - slope*x_coords[0]

        # Get x,y points that describe the line
        if params['fullrange'] and scales.x:
            x = scales.x.dimension()
        else:
            x = theoretical.min(), theoretical.max()

        x = np.asarray(x)
        y = slope * x + intercept
        data = pd.DataFrame({'x': x, 'y': y})
        return data 
Example 9
Project: Splunking-Crime   Author: nccgroup   File: kernel_extras.py    License: GNU Affero General Public License v3.0 5 votes vote down vote up
def _compute_sig(self):
        Y = self.endog
        X = self.exog
        b = self.estimator(Y, X)
        m = self.fform(X, b)
        n = np.shape(X)[0]
        resid = Y - m
        resid = resid - np.mean(resid)  # center residuals
        self.test_stat = self._compute_test_stat(resid)
        sqrt5 = np.sqrt(5.)
        fct1 = (1 - sqrt5) / 2.
        fct2 = (1 + sqrt5) / 2.
        u1 = fct1 * resid
        u2 = fct2 * resid
        r = fct2 / sqrt5
        I_dist = np.empty((self.nboot,1))
        for j in range(self.nboot):
            u_boot = u2.copy()

            prob = np.random.uniform(0,1, size = (n,))
            ind = prob < r
            u_boot[ind] = u1[ind]
            Y_boot = m + u_boot
            b_hat = self.estimator(Y_boot, X)
            m_hat = self.fform(X, b_hat)
            u_boot_hat = Y_boot - m_hat
            I_dist[j] = self._compute_test_stat(u_boot_hat)

        self.boots_results = I_dist
        sig = "Not Significant"
        if self.test_stat > mquantiles(I_dist, 0.9):
            sig = "*"
        if self.test_stat > mquantiles(I_dist, 0.95):
            sig = "**"
        if self.test_stat > mquantiles(I_dist, 0.99):
            sig = "***"
        return sig 
Example 10
Project: Splunking-Crime   Author: nccgroup   File: kernel_regression.py    License: GNU Affero General Public License v3.0 5 votes vote down vote up
def _compute_sig(self):
        """
        Computes the significance value for the variable(s) tested.

        The empirical distribution of the test statistic is obtained through
        bootstrapping the sample.  The null hypothesis is rejected if the test
        statistic is larger than the 90, 95, 99 percentiles.
        """
        t_dist = np.empty(shape=(self.nboot, ))
        Y = self.endog
        X = copy.deepcopy(self.exog)
        n = np.shape(Y)[0]

        X[:, self.test_vars] = np.mean(X[:, self.test_vars], axis=0)
        # Calculate the restricted mean. See p. 372 in [8]
        M = KernelReg(Y, X, self.var_type, self.model.reg_type, self.bw,
                      defaults = EstimatorSettings(efficient=False)).fit()[0]
        M = np.reshape(M, (n, 1))
        e = Y - M
        e = e - np.mean(e)  # recenter residuals
        for i in range(self.nboot):
            ind = np.random.random_integers(0, n-1, size=(n,1))
            e_boot = e[ind, 0]
            Y_boot = M + e_boot
            t_dist[i] = self._compute_test_stat(Y_boot, self.exog)

        self.t_dist = t_dist
        sig = "Not Significant"
        if self.test_stat > mquantiles(t_dist, 0.9):
            sig = "*"
        if self.test_stat > mquantiles(t_dist, 0.95):
            sig = "**"
        if self.test_stat > mquantiles(t_dist, 0.99):
            sig = "***"

        return sig 
Example 11
Project: Splunking-Crime   Author: nccgroup   File: kernel_regression.py    License: GNU Affero General Public License v3.0 5 votes vote down vote up
def _compute_sig(self):
        """Calculates the significance level of the variable tested"""

        m = self._est_cond_mean()
        Y = self.endog
        X = self.exog
        n = np.shape(X)[0]
        u = Y - m
        u = u - np.mean(u)  # center
        fct1 = (1 - 5**0.5) / 2.
        fct2 = (1 + 5**0.5) / 2.
        u1 = fct1 * u
        u2 = fct2 * u
        r = fct2 / (5 ** 0.5)
        I_dist = np.empty((self.nboot,1))
        for j in range(self.nboot):
            u_boot = copy.deepcopy(u2)

            prob = np.random.uniform(0,1, size = (n,1))
            ind = prob < r
            u_boot[ind] = u1[ind]
            Y_boot = m + u_boot
            I_dist[j] = self._compute_test_stat(Y_boot, X)

        sig = "Not Significant"
        if self.test_stat > mquantiles(I_dist, 0.9):
            sig = "*"
        if self.test_stat > mquantiles(I_dist, 0.95):
            sig = "**"
        if self.test_stat > mquantiles(I_dist, 0.99):
            sig = "***"

        return sig 
Example 12
Project: Splunking-Crime   Author: nccgroup   File: _kernel_base.py    License: GNU Affero General Public License v3.0 5 votes vote down vote up
def _compute_min_std_IQR(data):
    """Compute minimum of std and IQR for each variable."""
    s1 = np.std(data, axis=0)
    q75 = mquantiles(data, 0.75, axis=0).data[0]
    q25 = mquantiles(data, 0.25, axis=0).data[0]
    s2 = (q75 - q25) / 1.349  # IQR
    dispersion = np.minimum(s1, s2)
    return dispersion 
Example 13
Project: sima   Author: losonczylab   File: hmm.py    License: GNU General Public License v2.0 5 votes vote down vote up
def _threshold_gradient(im):
    """Indicate pixel locations with gradient below the bottom 10th percentile

    Parameters
    ----------
    im : array
        The mean intensity images for each channel.
        Size: (num_channels, num_rows, num_columns).

    Returns
    -------
    array
        Binary values indicating whether the magnitude of the gradient is below
        the 10th percentile.  Same size as im.

    """

    if im.shape[0] > 1:
        # Calculate directional relative derivatives
        _, g_x, g_y = np.gradient(np.log(im))
    else:
        # Calculate directional relative derivatives
        g_x, g_y = np.gradient(np.log(im[0]))
        g_x = g_x.reshape([1, g_x.shape[0], g_x.shape[1]])
        g_y = g_y.reshape([1, g_y.shape[0], g_y.shape[1]])
    gradient_magnitudes = np.sqrt((g_x ** 2) + (g_y ** 2))
    below_threshold = []
    for chan in gradient_magnitudes:
        threshold = mquantiles(chan[np.isfinite(chan)].flatten(), [0.1])[0]
        below_threshold.append(chan < threshold)
    return np.array(below_threshold) 
Example 14
Project: opt-mmd   Author: djsutherland   File: fixed_run.py    License: BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def get_estimates(gen, sigmas=None, n_reps=100, n_null_samps=1000,
                  cache_size=64, rep_states=False, name=None,
                  save_samps=False, thresh_levels=(.2, .1, .05, .01)):
    if sigmas is None:
        sigmas = np.logspace(-1.7, 1.7, num=30)
    sigmas = np.asarray(sigmas)

    mmd = sg.QuadraticTimeMMD()
    mmd.set_num_null_samples(n_null_samps)
    mmd_mk = mmd.multikernel()
    for s in sigmas:
        mmd_mk.add_kernel(sg.GaussianKernel(cache_size, 2 * s**2))

    info = OrderedDict()
    for k in 'sigma rep mmd_est var_est p'.split():
        info[k] = []
    thresh_names = []
    for l in thresh_levels:
        s = 'thresh_{}'.format(l)
        thresh_names.append(s)
        info[s] = []
    if save_samps:
        info['samps'] = []

    thresh_prob = 1 - np.asarray(thresh_levels)

    bar = pb.ProgressBar()
    if name is not None:
        bar.start()
        bar.widgets.insert(0, '{} '.format(name))
    for rep in bar(xrange(n_reps)):
        if rep_states:
            rep = np.random.randint(0, 2**32)
            X, Y = gen(rs=rep)
        else:
            X, Y = gen()
        n = X.shape[0]
        assert Y.shape[0] == n
        mmd.set_p(sg.RealFeatures(X.T))
        mmd.set_q(sg.RealFeatures(Y.T))

        info['sigma'].extend(sigmas)
        info['rep'].extend([rep] * len(sigmas))

        stat = mmd_mk.compute_statistic()
        info['mmd_est'].extend(stat / (n / 2))

        samps = mmd_mk.sample_null()
        info['p'].extend(np.mean(samps >= stat, axis=0))
        if save_samps:
            info['samps'].extend(samps.T)

        info['var_est'].extend(mmd_mk.compute_variance_h1())

        threshes = np.asarray(mquantiles(samps, prob=thresh_prob, axis=0))
        for s, t in zip(thresh_names, threshes):
            info[s].extend(t)

    info = pd.DataFrame(info)
    info.set_index(['sigma', 'rep'], inplace=True)
    return info 
Example 15
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: partial_dependence.py    License: MIT License 4 votes vote down vote up
def _grid_from_X(X, percentiles=(0.05, 0.95), grid_resolution=100):
    """Generate a grid of points based on the ``percentiles of ``X``.

    The grid is generated by placing ``grid_resolution`` equally
    spaced points between the ``percentiles`` of each column
    of ``X``.

    Parameters
    ----------
    X : ndarray
        The data
    percentiles : tuple of floats
        The percentiles which are used to construct the extreme
        values of the grid axes.
    grid_resolution : int
        The number of equally spaced points that are placed
        on the grid.

    Returns
    -------
    grid : ndarray
        All data points on the grid; ``grid.shape[1] == X.shape[1]``
        and ``grid.shape[0] == grid_resolution * X.shape[1]``.
    axes : seq of ndarray
        The axes with which the grid has been created.
    """
    if len(percentiles) != 2:
        raise ValueError('percentile must be tuple of len 2')
    if not all(0. <= x <= 1. for x in percentiles):
        raise ValueError('percentile values must be in [0, 1]')

    axes = []
    emp_percentiles = mquantiles(X, prob=percentiles, axis=0)
    for col in range(X.shape[1]):
        uniques = np.unique(X[:, col])
        if uniques.shape[0] < grid_resolution:
            # feature has low resolution use unique vals
            axis = uniques
        else:
            # create axis based on percentiles and grid resolution
            axis = np.linspace(emp_percentiles[0, col],
                               emp_percentiles[1, col],
                               num=grid_resolution, endpoint=True)
        axes.append(axis)

    return cartesian(axes), axes 
Example 16
Project: Splunking-Crime   Author: nccgroup   File: partial_dependence.py    License: GNU Affero General Public License v3.0 4 votes vote down vote up
def _grid_from_X(X, percentiles=(0.05, 0.95), grid_resolution=100):
    """Generate a grid of points based on the ``percentiles of ``X``.

    The grid is generated by placing ``grid_resolution`` equally
    spaced points between the ``percentiles`` of each column
    of ``X``.

    Parameters
    ----------
    X : ndarray
        The data
    percentiles : tuple of floats
        The percentiles which are used to construct the extreme
        values of the grid axes.
    grid_resolution : int
        The number of equally spaced points that are placed
        on the grid.

    Returns
    -------
    grid : ndarray
        All data points on the grid; ``grid.shape[1] == X.shape[1]``
        and ``grid.shape[0] == grid_resolution * X.shape[1]``.
    axes : seq of ndarray
        The axes with which the grid has been created.
    """
    if len(percentiles) != 2:
        raise ValueError('percentile must be tuple of len 2')
    if not all(0. <= x <= 1. for x in percentiles):
        raise ValueError('percentile values must be in [0, 1]')

    axes = []
    emp_percentiles = mquantiles(X, prob=percentiles, axis=0)
    for col in range(X.shape[1]):
        uniques = np.unique(X[:, col])
        if uniques.shape[0] < grid_resolution:
            # feature has low resolution use unique vals
            axis = uniques
        else:
            # create axis based on percentiles and grid resolution
            axis = np.linspace(emp_percentiles[0, col],
                               emp_percentiles[1, col],
                               num=grid_resolution, endpoint=True)
        axes.append(axis)

    return cartesian(axes), axes 
Example 17
Project: twitter-stock-recommendation   Author: alvarobartt   File: partial_dependence.py    License: MIT License 4 votes vote down vote up
def _grid_from_X(X, percentiles=(0.05, 0.95), grid_resolution=100):
    """Generate a grid of points based on the ``percentiles of ``X``.

    The grid is generated by placing ``grid_resolution`` equally
    spaced points between the ``percentiles`` of each column
    of ``X``.

    Parameters
    ----------
    X : ndarray
        The data
    percentiles : tuple of floats
        The percentiles which are used to construct the extreme
        values of the grid axes.
    grid_resolution : int
        The number of equally spaced points that are placed
        on the grid.

    Returns
    -------
    grid : ndarray
        All data points on the grid; ``grid.shape[1] == X.shape[1]``
        and ``grid.shape[0] == grid_resolution * X.shape[1]``.
    axes : seq of ndarray
        The axes with which the grid has been created.
    """
    if len(percentiles) != 2:
        raise ValueError('percentile must be tuple of len 2')
    if not all(0. <= x <= 1. for x in percentiles):
        raise ValueError('percentile values must be in [0, 1]')

    axes = []
    emp_percentiles = mquantiles(X, prob=percentiles, axis=0)
    for col in range(X.shape[1]):
        uniques = np.unique(X[:, col])
        if uniques.shape[0] < grid_resolution:
            # feature has low resolution use unique vals
            axis = uniques
        else:
            # create axis based on percentiles and grid resolution
            axis = np.linspace(emp_percentiles[0, col],
                               emp_percentiles[1, col],
                               num=grid_resolution, endpoint=True)
        axes.append(axis)

    return cartesian(axes), axes 
Example 18
Project: fithic   Author: ay-lab   File: fithic.py    License: MIT License 4 votes vote down vote up
def read_biases(infilename):
    global biasLowerBound
    global biasUpperBound
    startt = time.time()
    biasDic={}

    rawBiases=[]
    with gzip.open(infilename, 'rt') as infile:
        for line in infile:
            words=line.rstrip().split()
            chrom=words[0]; midPoint=int(words[1]); bias=float(words[2])
            if bias!=1.0:
               rawBiases.append(bias)
        botQ,med,topQ=mquantiles(rawBiases,prob=[0.05,0.5,0.95])
        with open(logfile, 'a') as log:
            log.write("5th quantile of biases: "+str(botQ)+"\n")
            log.write("50th quantile of biases: "+str(med)+"\n")
            log.write("95th quantile of biases: "+str(topQ)+"\n")
    totalC=0
    discardC=0
    with gzip.open(infilename, 'rt') as infile:
        for line in infile:
            words=line.rstrip().split()
            chrom=words[0]; midPoint=int(words[1]); bias=float(words[2]);
            if bias<biasLowerBound or math.isnan(bias):
                bias=-1 #botQ
                discardC+=1
            elif bias>biasUpperBound:
                bias=-1 #topQ
                discardC+=1
            totalC+=1
            if chrom not in biasDic:
                biasDic[chrom]={}
            if midPoint not in biasDic[chrom]:
                biasDic[chrom][midPoint]=bias
    with open(logfile, 'a') as log:
        log.write("Out of " + str(totalC) + " loci " +str(discardC) +" were discarded with biases not in range [0.5 2]\n\n" )
    endt = time.time()
    print("Bias file read. Time took %s" % (endt-startt))
    return biasDic # from read_biases

#==================================
# function to compute the contact probabilities
# applied for intra-chromosomal interactions
#==================================