Python scipy.stats.mstats.mquantiles() Examples

The following are 18 code examples of scipy.stats.mstats.mquantiles(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module scipy.stats.mstats , or try the search function .
Example #1
Source File: test_mstats_basic.py    From Computable with MIT License 6 votes vote down vote up
def test_mquantiles_limit_keyword(self):
        """Ticket #867"""
        data = np.array([[6., 7., 1.],
                         [47., 15., 2.],
                         [49., 36., 3.],
                         [15., 39., 4.],
                         [42., 40., -999.],
                         [41., 41., -999.],
                         [7., -999., -999.],
                         [39., -999., -999.],
                         [43., -999., -999.],
                         [40., -999., -999.],
                         [36., -999., -999.]])
        desired = [[19.2, 14.6, 1.45],
                   [40.0, 37.5, 2.5],
                   [42.8, 40.05, 3.55]]
        quants = mstats.mquantiles(data, axis=0, limit=(0, 50))
        assert_almost_equal(quants, desired) 
Example #2
Source File: test_mstats_basic.py    From GraphicDesignPatternByPython with MIT License 6 votes vote down vote up
def test_mquantiles_limit_keyword(self):
        # Regression test for Trac ticket #867
        data = np.array([[6., 7., 1.],
                         [47., 15., 2.],
                         [49., 36., 3.],
                         [15., 39., 4.],
                         [42., 40., -999.],
                         [41., 41., -999.],
                         [7., -999., -999.],
                         [39., -999., -999.],
                         [43., -999., -999.],
                         [40., -999., -999.],
                         [36., -999., -999.]])
        desired = [[19.2, 14.6, 1.45],
                   [40.0, 37.5, 2.5],
                   [42.8, 40.05, 3.55]]
        quants = mstats.mquantiles(data, axis=0, limit=(0, 50))
        assert_almost_equal(quants, desired) 
Example #3
Source File: kernel_extras.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def _compute_sig(self):
        Y = self.endog
        X = self.exog
        b = self.estimator(Y, X)
        m = self.fform(X, b)
        n = np.shape(X)[0]
        resid = Y - m
        resid = resid - np.mean(resid)  # center residuals
        self.test_stat = self._compute_test_stat(resid)
        sqrt5 = np.sqrt(5.)
        fct1 = (1 - sqrt5) / 2.
        fct2 = (1 + sqrt5) / 2.
        u1 = fct1 * resid
        u2 = fct2 * resid
        r = fct2 / sqrt5
        I_dist = np.empty((self.nboot,1))
        for j in range(self.nboot):
            u_boot = u2.copy()

            prob = np.random.uniform(0,1, size = (n,))
            ind = prob < r
            u_boot[ind] = u1[ind]
            Y_boot = m + u_boot
            b_hat = self.estimator(Y_boot, X)
            m_hat = self.fform(X, b_hat)
            u_boot_hat = Y_boot - m_hat
            I_dist[j] = self._compute_test_stat(u_boot_hat)

        self.boots_results = I_dist
        sig = "Not Significant"
        if self.test_stat > mquantiles(I_dist, 0.9):
            sig = "*"
        if self.test_stat > mquantiles(I_dist, 0.95):
            sig = "**"
        if self.test_stat > mquantiles(I_dist, 0.99):
            sig = "***"
        return sig 
Example #4
Source File: hmm.py    From sima with GNU General Public License v2.0 5 votes vote down vote up
def _threshold_gradient(im):
    """Indicate pixel locations with gradient below the bottom 10th percentile

    Parameters
    ----------
    im : array
        The mean intensity images for each channel.
        Size: (num_channels, num_rows, num_columns).

    Returns
    -------
    array
        Binary values indicating whether the magnitude of the gradient is below
        the 10th percentile.  Same size as im.

    """

    if im.shape[0] > 1:
        # Calculate directional relative derivatives
        _, g_x, g_y = np.gradient(np.log(im))
    else:
        # Calculate directional relative derivatives
        g_x, g_y = np.gradient(np.log(im[0]))
        g_x = g_x.reshape([1, g_x.shape[0], g_x.shape[1]])
        g_y = g_y.reshape([1, g_y.shape[0], g_y.shape[1]])
    gradient_magnitudes = np.sqrt((g_x ** 2) + (g_y ** 2))
    below_threshold = []
    for chan in gradient_magnitudes:
        threshold = mquantiles(chan[np.isfinite(chan)].flatten(), [0.1])[0]
        below_threshold.append(chan < threshold)
    return np.array(below_threshold) 
Example #5
Source File: _kernel_base.py    From Splunking-Crime with GNU Affero General Public License v3.0 5 votes vote down vote up
def _compute_min_std_IQR(data):
    """Compute minimum of std and IQR for each variable."""
    s1 = np.std(data, axis=0)
    q75 = mquantiles(data, 0.75, axis=0).data[0]
    q25 = mquantiles(data, 0.25, axis=0).data[0]
    s2 = (q75 - q25) / 1.349  # IQR
    dispersion = np.minimum(s1, s2)
    return dispersion 
Example #6
Source File: kernel_regression.py    From Splunking-Crime with GNU Affero General Public License v3.0 5 votes vote down vote up
def _compute_sig(self):
        """Calculates the significance level of the variable tested"""

        m = self._est_cond_mean()
        Y = self.endog
        X = self.exog
        n = np.shape(X)[0]
        u = Y - m
        u = u - np.mean(u)  # center
        fct1 = (1 - 5**0.5) / 2.
        fct2 = (1 + 5**0.5) / 2.
        u1 = fct1 * u
        u2 = fct2 * u
        r = fct2 / (5 ** 0.5)
        I_dist = np.empty((self.nboot,1))
        for j in range(self.nboot):
            u_boot = copy.deepcopy(u2)

            prob = np.random.uniform(0,1, size = (n,1))
            ind = prob < r
            u_boot[ind] = u1[ind]
            Y_boot = m + u_boot
            I_dist[j] = self._compute_test_stat(Y_boot, X)

        sig = "Not Significant"
        if self.test_stat > mquantiles(I_dist, 0.9):
            sig = "*"
        if self.test_stat > mquantiles(I_dist, 0.95):
            sig = "**"
        if self.test_stat > mquantiles(I_dist, 0.99):
            sig = "***"

        return sig 
Example #7
Source File: kernel_regression.py    From Splunking-Crime with GNU Affero General Public License v3.0 5 votes vote down vote up
def _compute_sig(self):
        """
        Computes the significance value for the variable(s) tested.

        The empirical distribution of the test statistic is obtained through
        bootstrapping the sample.  The null hypothesis is rejected if the test
        statistic is larger than the 90, 95, 99 percentiles.
        """
        t_dist = np.empty(shape=(self.nboot, ))
        Y = self.endog
        X = copy.deepcopy(self.exog)
        n = np.shape(Y)[0]

        X[:, self.test_vars] = np.mean(X[:, self.test_vars], axis=0)
        # Calculate the restricted mean. See p. 372 in [8]
        M = KernelReg(Y, X, self.var_type, self.model.reg_type, self.bw,
                      defaults = EstimatorSettings(efficient=False)).fit()[0]
        M = np.reshape(M, (n, 1))
        e = Y - M
        e = e - np.mean(e)  # recenter residuals
        for i in range(self.nboot):
            ind = np.random.random_integers(0, n-1, size=(n,1))
            e_boot = e[ind, 0]
            Y_boot = M + e_boot
            t_dist[i] = self._compute_test_stat(Y_boot, self.exog)

        self.t_dist = t_dist
        sig = "Not Significant"
        if self.test_stat > mquantiles(t_dist, 0.9):
            sig = "*"
        if self.test_stat > mquantiles(t_dist, 0.95):
            sig = "**"
        if self.test_stat > mquantiles(t_dist, 0.99):
            sig = "***"

        return sig 
Example #8
Source File: kernel_extras.py    From Splunking-Crime with GNU Affero General Public License v3.0 5 votes vote down vote up
def _compute_sig(self):
        Y = self.endog
        X = self.exog
        b = self.estimator(Y, X)
        m = self.fform(X, b)
        n = np.shape(X)[0]
        resid = Y - m
        resid = resid - np.mean(resid)  # center residuals
        self.test_stat = self._compute_test_stat(resid)
        sqrt5 = np.sqrt(5.)
        fct1 = (1 - sqrt5) / 2.
        fct2 = (1 + sqrt5) / 2.
        u1 = fct1 * resid
        u2 = fct2 * resid
        r = fct2 / sqrt5
        I_dist = np.empty((self.nboot,1))
        for j in range(self.nboot):
            u_boot = u2.copy()

            prob = np.random.uniform(0,1, size = (n,))
            ind = prob < r
            u_boot[ind] = u1[ind]
            Y_boot = m + u_boot
            b_hat = self.estimator(Y_boot, X)
            m_hat = self.fform(X, b_hat)
            u_boot_hat = Y_boot - m_hat
            I_dist[j] = self._compute_test_stat(u_boot_hat)

        self.boots_results = I_dist
        sig = "Not Significant"
        if self.test_stat > mquantiles(I_dist, 0.9):
            sig = "*"
        if self.test_stat > mquantiles(I_dist, 0.95):
            sig = "**"
        if self.test_stat > mquantiles(I_dist, 0.99):
            sig = "***"
        return sig 
Example #9
Source File: stat_qq_line.py    From plotnine with GNU General Public License v2.0 5 votes vote down vote up
def compute_group(cls, data, scales, **params):
        line_p = params['line_p']
        dparams = params['dparams']

        # Compute theoretical values
        df = stat_qq.compute_group(data, scales, **params)
        sample = df['sample'].values
        theoretical = df['theoretical'].values

        # Compute slope & intercept of the line through the quantiles
        cdist = get_continuous_distribution(params['distribution'])
        x_coords = cdist.ppf(line_p, *dparams)
        y_coords = mquantiles(sample, line_p)
        slope = (np.diff(y_coords)/np.diff(x_coords))[0]
        intercept = y_coords[0] - slope*x_coords[0]

        # Get x,y points that describe the line
        if params['fullrange'] and scales.x:
            x = scales.x.dimension()
        else:
            x = theoretical.min(), theoretical.max()

        x = np.asarray(x)
        y = slope * x + intercept
        data = pd.DataFrame({'x': x, 'y': y})
        return data 
Example #10
Source File: stats_utils.py    From arviz with Apache License 2.0 5 votes vote down vote up
def quantile(ary, q, axis=None, limit=None):
    """Use same quantile function as R (Type 7)."""
    if limit is None:
        limit = tuple()
    return mquantiles(ary, q, alphap=1, betap=1, axis=axis, limit=limit) 
Example #11
Source File: _kernel_base.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def _compute_min_std_IQR(data):
    """Compute minimum of std and IQR for each variable."""
    s1 = np.std(data, axis=0)
    q75 = mquantiles(data, 0.75, axis=0).data[0]
    q25 = mquantiles(data, 0.25, axis=0).data[0]
    s2 = (q75 - q25) / 1.349  # IQR
    dispersion = np.minimum(s1, s2)
    return dispersion 
Example #12
Source File: kernel_regression.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def _compute_sig(self):
        """Calculates the significance level of the variable tested"""

        m = self._est_cond_mean()
        Y = self.endog
        X = self.exog
        n = np.shape(X)[0]
        u = Y - m
        u = u - np.mean(u)  # center
        fct1 = (1 - 5**0.5) / 2.
        fct2 = (1 + 5**0.5) / 2.
        u1 = fct1 * u
        u2 = fct2 * u
        r = fct2 / (5 ** 0.5)
        I_dist = np.empty((self.nboot,1))
        for j in range(self.nboot):
            u_boot = copy.deepcopy(u2)

            prob = np.random.uniform(0,1, size = (n,1))
            ind = prob < r
            u_boot[ind] = u1[ind]
            Y_boot = m + u_boot
            I_dist[j] = self._compute_test_stat(Y_boot, X)

        sig = "Not Significant"
        if self.test_stat > mquantiles(I_dist, 0.9):
            sig = "*"
        if self.test_stat > mquantiles(I_dist, 0.95):
            sig = "**"
        if self.test_stat > mquantiles(I_dist, 0.99):
            sig = "***"

        return sig 
Example #13
Source File: kernel_regression.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def _compute_sig(self):
        """
        Computes the significance value for the variable(s) tested.

        The empirical distribution of the test statistic is obtained through
        bootstrapping the sample.  The null hypothesis is rejected if the test
        statistic is larger than the 90, 95, 99 percentiles.
        """
        t_dist = np.empty(shape=(self.nboot, ))
        Y = self.endog
        X = copy.deepcopy(self.exog)
        n = np.shape(Y)[0]

        X[:, self.test_vars] = np.mean(X[:, self.test_vars], axis=0)
        # Calculate the restricted mean. See p. 372 in [8]
        M = KernelReg(Y, X, self.var_type, self.model.reg_type, self.bw,
                      defaults = EstimatorSettings(efficient=False)).fit()[0]
        M = np.reshape(M, (n, 1))
        e = Y - M
        e = e - np.mean(e)  # recenter residuals
        for i in range(self.nboot):
            ind = np.random.random_integers(0, n-1, size=(n,1))
            e_boot = e[ind, 0]
            Y_boot = M + e_boot
            t_dist[i] = self._compute_test_stat(Y_boot, self.exog)

        self.t_dist = t_dist
        sig = "Not Significant"
        if self.test_stat > mquantiles(t_dist, 0.9):
            sig = "*"
        if self.test_stat > mquantiles(t_dist, 0.95):
            sig = "**"
        if self.test_stat > mquantiles(t_dist, 0.99):
            sig = "***"

        return sig 
Example #14
Source File: partial_dependence.py    From Splunking-Crime with GNU Affero General Public License v3.0 4 votes vote down vote up
def _grid_from_X(X, percentiles=(0.05, 0.95), grid_resolution=100):
    """Generate a grid of points based on the ``percentiles of ``X``.

    The grid is generated by placing ``grid_resolution`` equally
    spaced points between the ``percentiles`` of each column
    of ``X``.

    Parameters
    ----------
    X : ndarray
        The data
    percentiles : tuple of floats
        The percentiles which are used to construct the extreme
        values of the grid axes.
    grid_resolution : int
        The number of equally spaced points that are placed
        on the grid.

    Returns
    -------
    grid : ndarray
        All data points on the grid; ``grid.shape[1] == X.shape[1]``
        and ``grid.shape[0] == grid_resolution * X.shape[1]``.
    axes : seq of ndarray
        The axes with which the grid has been created.
    """
    if len(percentiles) != 2:
        raise ValueError('percentile must be tuple of len 2')
    if not all(0. <= x <= 1. for x in percentiles):
        raise ValueError('percentile values must be in [0, 1]')

    axes = []
    emp_percentiles = mquantiles(X, prob=percentiles, axis=0)
    for col in range(X.shape[1]):
        uniques = np.unique(X[:, col])
        if uniques.shape[0] < grid_resolution:
            # feature has low resolution use unique vals
            axis = uniques
        else:
            # create axis based on percentiles and grid resolution
            axis = np.linspace(emp_percentiles[0, col],
                               emp_percentiles[1, col],
                               num=grid_resolution, endpoint=True)
        axes.append(axis)

    return cartesian(axes), axes 
Example #15
Source File: partial_dependence.py    From Mastering-Elasticsearch-7.0 with MIT License 4 votes vote down vote up
def _grid_from_X(X, percentiles=(0.05, 0.95), grid_resolution=100):
    """Generate a grid of points based on the ``percentiles of ``X``.

    The grid is generated by placing ``grid_resolution`` equally
    spaced points between the ``percentiles`` of each column
    of ``X``.

    Parameters
    ----------
    X : ndarray
        The data
    percentiles : tuple of floats
        The percentiles which are used to construct the extreme
        values of the grid axes.
    grid_resolution : int
        The number of equally spaced points that are placed
        on the grid.

    Returns
    -------
    grid : ndarray
        All data points on the grid; ``grid.shape[1] == X.shape[1]``
        and ``grid.shape[0] == grid_resolution * X.shape[1]``.
    axes : seq of ndarray
        The axes with which the grid has been created.
    """
    if len(percentiles) != 2:
        raise ValueError('percentile must be tuple of len 2')
    if not all(0. <= x <= 1. for x in percentiles):
        raise ValueError('percentile values must be in [0, 1]')

    axes = []
    emp_percentiles = mquantiles(X, prob=percentiles, axis=0)
    for col in range(X.shape[1]):
        uniques = np.unique(X[:, col])
        if uniques.shape[0] < grid_resolution:
            # feature has low resolution use unique vals
            axis = uniques
        else:
            # create axis based on percentiles and grid resolution
            axis = np.linspace(emp_percentiles[0, col],
                               emp_percentiles[1, col],
                               num=grid_resolution, endpoint=True)
        axes.append(axis)

    return cartesian(axes), axes 
Example #16
Source File: fixed_run.py    From opt-mmd with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def get_estimates(gen, sigmas=None, n_reps=100, n_null_samps=1000,
                  cache_size=64, rep_states=False, name=None,
                  save_samps=False, thresh_levels=(.2, .1, .05, .01)):
    if sigmas is None:
        sigmas = np.logspace(-1.7, 1.7, num=30)
    sigmas = np.asarray(sigmas)

    mmd = sg.QuadraticTimeMMD()
    mmd.set_num_null_samples(n_null_samps)
    mmd_mk = mmd.multikernel()
    for s in sigmas:
        mmd_mk.add_kernel(sg.GaussianKernel(cache_size, 2 * s**2))

    info = OrderedDict()
    for k in 'sigma rep mmd_est var_est p'.split():
        info[k] = []
    thresh_names = []
    for l in thresh_levels:
        s = 'thresh_{}'.format(l)
        thresh_names.append(s)
        info[s] = []
    if save_samps:
        info['samps'] = []

    thresh_prob = 1 - np.asarray(thresh_levels)

    bar = pb.ProgressBar()
    if name is not None:
        bar.start()
        bar.widgets.insert(0, '{} '.format(name))
    for rep in bar(xrange(n_reps)):
        if rep_states:
            rep = np.random.randint(0, 2**32)
            X, Y = gen(rs=rep)
        else:
            X, Y = gen()
        n = X.shape[0]
        assert Y.shape[0] == n
        mmd.set_p(sg.RealFeatures(X.T))
        mmd.set_q(sg.RealFeatures(Y.T))

        info['sigma'].extend(sigmas)
        info['rep'].extend([rep] * len(sigmas))

        stat = mmd_mk.compute_statistic()
        info['mmd_est'].extend(stat / (n / 2))

        samps = mmd_mk.sample_null()
        info['p'].extend(np.mean(samps >= stat, axis=0))
        if save_samps:
            info['samps'].extend(samps.T)

        info['var_est'].extend(mmd_mk.compute_variance_h1())

        threshes = np.asarray(mquantiles(samps, prob=thresh_prob, axis=0))
        for s, t in zip(thresh_names, threshes):
            info[s].extend(t)

    info = pd.DataFrame(info)
    info.set_index(['sigma', 'rep'], inplace=True)
    return info 
Example #17
Source File: partial_dependence.py    From twitter-stock-recommendation with MIT License 4 votes vote down vote up
def _grid_from_X(X, percentiles=(0.05, 0.95), grid_resolution=100):
    """Generate a grid of points based on the ``percentiles of ``X``.

    The grid is generated by placing ``grid_resolution`` equally
    spaced points between the ``percentiles`` of each column
    of ``X``.

    Parameters
    ----------
    X : ndarray
        The data
    percentiles : tuple of floats
        The percentiles which are used to construct the extreme
        values of the grid axes.
    grid_resolution : int
        The number of equally spaced points that are placed
        on the grid.

    Returns
    -------
    grid : ndarray
        All data points on the grid; ``grid.shape[1] == X.shape[1]``
        and ``grid.shape[0] == grid_resolution * X.shape[1]``.
    axes : seq of ndarray
        The axes with which the grid has been created.
    """
    if len(percentiles) != 2:
        raise ValueError('percentile must be tuple of len 2')
    if not all(0. <= x <= 1. for x in percentiles):
        raise ValueError('percentile values must be in [0, 1]')

    axes = []
    emp_percentiles = mquantiles(X, prob=percentiles, axis=0)
    for col in range(X.shape[1]):
        uniques = np.unique(X[:, col])
        if uniques.shape[0] < grid_resolution:
            # feature has low resolution use unique vals
            axis = uniques
        else:
            # create axis based on percentiles and grid resolution
            axis = np.linspace(emp_percentiles[0, col],
                               emp_percentiles[1, col],
                               num=grid_resolution, endpoint=True)
        axes.append(axis)

    return cartesian(axes), axes 
Example #18
Source File: fithic.py    From fithic with MIT License 4 votes vote down vote up
def read_biases(infilename):
    global biasLowerBound
    global biasUpperBound
    startt = time.time()
    biasDic={}

    rawBiases=[]
    with gzip.open(infilename, 'rt') as infile:
        for line in infile:
            words=line.rstrip().split()
            chrom=words[0]; midPoint=int(words[1]); bias=float(words[2])
            if bias!=1.0:
               rawBiases.append(bias)
        botQ,med,topQ=mquantiles(rawBiases,prob=[0.05,0.5,0.95])
        with open(logfile, 'a') as log:
            log.write("5th quantile of biases: "+str(botQ)+"\n")
            log.write("50th quantile of biases: "+str(med)+"\n")
            log.write("95th quantile of biases: "+str(topQ)+"\n")
    totalC=0
    discardC=0
    with gzip.open(infilename, 'rt') as infile:
        for line in infile:
            words=line.rstrip().split()
            chrom=words[0]; midPoint=int(words[1]); bias=float(words[2]);
            if bias<biasLowerBound or math.isnan(bias):
                bias=-1 #botQ
                discardC+=1
            elif bias>biasUpperBound:
                bias=-1 #topQ
                discardC+=1
            totalC+=1
            if chrom not in biasDic:
                biasDic[chrom]={}
            if midPoint not in biasDic[chrom]:
                biasDic[chrom][midPoint]=bias
    with open(logfile, 'a') as log:
        log.write("Out of " + str(totalC) + " loci " +str(discardC) +" were discarded with biases not in range [0.5 2]\n\n" )
    endt = time.time()
    print("Bias file read. Time took %s" % (endt-startt))
    return biasDic # from read_biases

#==================================
# function to compute the contact probabilities
# applied for intra-chromosomal interactions
#==================================