Python scipy.stats.mstats.mquantiles() Examples

The following are code examples for showing how to use scipy.stats.mstats.mquantiles(). They are from open source Python projects. You can vote up the examples you like or vote down the ones you don't like.

Example 1
Project: LaserTOF   Author: kyleuckert   File: test_mstats_basic.py    MIT License 6 votes vote down vote up
def test_mquantiles_limit_keyword(self):
        # Regression test for Trac ticket #867
        data = np.array([[6., 7., 1.],
                         [47., 15., 2.],
                         [49., 36., 3.],
                         [15., 39., 4.],
                         [42., 40., -999.],
                         [41., 41., -999.],
                         [7., -999., -999.],
                         [39., -999., -999.],
                         [43., -999., -999.],
                         [40., -999., -999.],
                         [36., -999., -999.]])
        desired = [[19.2, 14.6, 1.45],
                   [40.0, 37.5, 2.5],
                   [42.8, 40.05, 3.55]]
        quants = mstats.mquantiles(data, axis=0, limit=(0, 50))
        assert_almost_equal(quants, desired) 
Example 2
Project: PyCausality   Author: ZacKeskin   File: Utils.py    GNU General Public License v3.0 6 votes vote down vote up
def equiprobable_bins(self,max_bins=15):
        """ 
        Returns bins for N-dimensional data, such that each bin should contain equal numbers of
        samples. 
        *** Note that due to SciPy's mquantiles() functional design, the equipartion is not strictly true - 
        it operates independently on the marginals, and so with large bin numbers there are usually 
        significant discrepancies from desired behaviour. Fortunately, for TE we find equipartioning is
        extremely beneficial, so we find good accuracy with small bin counts ***

        Args:
            max_bins        -   (int)       The number of bins in each dimension
        Returns:
            bins            -   (dict)      The calculated bin-edges for pdf estimation
                                            using the histogram method, keyed by df column names
        """
        quantiles = np.array([i/max_bins for i in range(0, max_bins+1)])
        bins = dict(zip(self.axes, mquantiles(a=self.df, prob=quantiles, axis=0).T.tolist()))
        
        ## Remove_duplicates
        bins = {k:sorted(set(bins[k])) for (k,v) in bins.items()} 

        if self.lag is not None:
            bins = self.__extend_bins__(bins)
        return bins 
Example 3
Project: att   Author: Centre-Alt-Rendiment-Esportiu   File: test_mstats_basic.py    GNU General Public License v3.0 6 votes vote down vote up
def test_mquantiles_limit_keyword(self):
        # Regression test for Trac ticket #867
        data = np.array([[6., 7., 1.],
                         [47., 15., 2.],
                         [49., 36., 3.],
                         [15., 39., 4.],
                         [42., 40., -999.],
                         [41., 41., -999.],
                         [7., -999., -999.],
                         [39., -999., -999.],
                         [43., -999., -999.],
                         [40., -999., -999.],
                         [36., -999., -999.]])
        desired = [[19.2, 14.6, 1.45],
                   [40.0, 37.5, 2.5],
                   [42.8, 40.05, 3.55]]
        quants = mstats.mquantiles(data, axis=0, limit=(0, 50))
        assert_almost_equal(quants, desired) 
Example 4
Project: dockerizeme   Author: dockerizeme   File: snippet.py    Apache License 2.0 6 votes vote down vote up
def FindQuantile(data,findme):
        print 'entered FindQuantile'
        probset=[]
        #cheap hack to make a quick list to get quantiles for each permille value]
        for i in numpy.linspace(0,1,10000):
                probset.append(i)

        	#http://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.mstats.mquantiles.html
        quantile_results = mquantiles(data,prob=probset)
        #see: http://stackoverflow.com/q/17330252/
        quantiles = []
        i = 0
        for value in quantile_results:
                print str(i) +  ' permille ' + str(value)
                quantiles.append(value)
                i = i+1
        #goal is to figure out which quantile findme falls in:
        i = 0
        for quantile in quantiles:
                if (findme > quantile):
                        print str(quantile) + ' is too small for ' + str(findme)
                else:
                        print str(quantile) + ' is the quantile value for the ' + str(i) + '-' + str(i + 1) + ' per mille quantile range. ' + str(findme) + ' falls within this range.'
                        break
                i = i + 1 
Example 5
Project: ble5-nrf52-mac   Author: tomasero   File: test_mstats_basic.py    MIT License 6 votes vote down vote up
def test_mquantiles_limit_keyword(self):
        # Regression test for Trac ticket #867
        data = np.array([[6., 7., 1.],
                         [47., 15., 2.],
                         [49., 36., 3.],
                         [15., 39., 4.],
                         [42., 40., -999.],
                         [41., 41., -999.],
                         [7., -999., -999.],
                         [39., -999., -999.],
                         [43., -999., -999.],
                         [40., -999., -999.],
                         [36., -999., -999.]])
        desired = [[19.2, 14.6, 1.45],
                   [40.0, 37.5, 2.5],
                   [42.8, 40.05, 3.55]]
        quants = mstats.mquantiles(data, axis=0, limit=(0, 50))
        assert_almost_equal(quants, desired) 
Example 6
Project: Computable   Author: ktraunmueller   File: test_mstats_basic.py    MIT License 6 votes vote down vote up
def test_mquantiles_limit_keyword(self):
        """Ticket #867"""
        data = np.array([[6., 7., 1.],
                         [47., 15., 2.],
                         [49., 36., 3.],
                         [15., 39., 4.],
                         [42., 40., -999.],
                         [41., 41., -999.],
                         [7., -999., -999.],
                         [39., -999., -999.],
                         [43., -999., -999.],
                         [40., -999., -999.],
                         [36., -999., -999.]])
        desired = [[19.2, 14.6, 1.45],
                   [40.0, 37.5, 2.5],
                   [42.8, 40.05, 3.55]]
        quants = mstats.mquantiles(data, axis=0, limit=(0, 50))
        assert_almost_equal(quants, desired) 
Example 7
Project: poker   Author: surgebiswas   File: test_mstats_basic.py    MIT License 6 votes vote down vote up
def test_mquantiles_limit_keyword(self):
        # Regression test for Trac ticket #867
        data = np.array([[6., 7., 1.],
                         [47., 15., 2.],
                         [49., 36., 3.],
                         [15., 39., 4.],
                         [42., 40., -999.],
                         [41., 41., -999.],
                         [7., -999., -999.],
                         [39., -999., -999.],
                         [43., -999., -999.],
                         [40., -999., -999.],
                         [36., -999., -999.]])
        desired = [[19.2, 14.6, 1.45],
                   [40.0, 37.5, 2.5],
                   [42.8, 40.05, 3.55]]
        quants = mstats.mquantiles(data, axis=0, limit=(0, 50))
        assert_almost_equal(quants, desired) 
Example 8
Project: P3_image_processing   Author: latedude2   File: test_mstats_basic.py    MIT License 6 votes vote down vote up
def test_mquantiles_limit_keyword(self):
        # Regression test for Trac ticket #867
        data = np.array([[6., 7., 1.],
                         [47., 15., 2.],
                         [49., 36., 3.],
                         [15., 39., 4.],
                         [42., 40., -999.],
                         [41., 41., -999.],
                         [7., -999., -999.],
                         [39., -999., -999.],
                         [43., -999., -999.],
                         [40., -999., -999.],
                         [36., -999., -999.]])
        desired = [[19.2, 14.6, 1.45],
                   [40.0, 37.5, 2.5],
                   [42.8, 40.05, 3.55]]
        quants = mstats.mquantiles(data, axis=0, limit=(0, 50))
        assert_almost_equal(quants, desired) 
Example 9
Project: GraphicDesignPatternByPython   Author: Relph1119   File: test_mstats_basic.py    MIT License 6 votes vote down vote up
def test_mquantiles_limit_keyword(self):
        # Regression test for Trac ticket #867
        data = np.array([[6., 7., 1.],
                         [47., 15., 2.],
                         [49., 36., 3.],
                         [15., 39., 4.],
                         [42., 40., -999.],
                         [41., 41., -999.],
                         [7., -999., -999.],
                         [39., -999., -999.],
                         [43., -999., -999.],
                         [40., -999., -999.],
                         [36., -999., -999.]])
        desired = [[19.2, 14.6, 1.45],
                   [40.0, 37.5, 2.5],
                   [42.8, 40.05, 3.55]]
        quants = mstats.mquantiles(data, axis=0, limit=(0, 50))
        assert_almost_equal(quants, desired) 
Example 10
Project: wine-ml-on-aws-lambda   Author: pierreant   File: test_mstats_basic.py    Apache License 2.0 6 votes vote down vote up
def test_mquantiles_limit_keyword(self):
        # Regression test for Trac ticket #867
        data = np.array([[6., 7., 1.],
                         [47., 15., 2.],
                         [49., 36., 3.],
                         [15., 39., 4.],
                         [42., 40., -999.],
                         [41., 41., -999.],
                         [7., -999., -999.],
                         [39., -999., -999.],
                         [43., -999., -999.],
                         [40., -999., -999.],
                         [36., -999., -999.]])
        desired = [[19.2, 14.6, 1.45],
                   [40.0, 37.5, 2.5],
                   [42.8, 40.05, 3.55]]
        quants = mstats.mquantiles(data, axis=0, limit=(0, 50))
        assert_almost_equal(quants, desired) 
Example 11
Project: senior-design   Author: james-tate   File: test_mstats_basic.py    GNU General Public License v2.0 6 votes vote down vote up
def test_mquantiles_limit_keyword(self):
        """Ticket #867"""
        data = np.array([[   6.,    7.,    1.],
                         [  47.,   15.,    2.],
                         [  49.,   36.,    3.],
                         [  15.,   39.,    4.],
                         [  42.,   40., -999.],
                         [  41.,   41., -999.],
                         [   7., -999., -999.],
                         [  39., -999., -999.],
                         [  43., -999., -999.],
                         [  40., -999., -999.],
                         [  36., -999., -999.]])
        desired = [[19.2, 14.6, 1.45],
                   [40.0, 37.5, 2.5 ],
                   [42.8, 40.05, 3.55]]
        quants = mstats.mquantiles(data, axis=0, limit=(0, 50))
        assert_almost_equal(quants, desired) 
Example 12
Project: MetaLex   Author: Levis0045   File: fgen.py    GNU Affero General Public License v3.0 6 votes vote down vote up
def gauss_degrade(image,margin=1.0,change=None,noise=0.02,minmargin=0.5,inner=1.0):
    if image.ndim==3: image = mean(image,axis=2)
    m = mean([amin(image),amax(image)])
    image = 1*(image>m)
    if margin<minmargin: return 1.0*image
    pixels = sum(image)
    if change is not None:
        npixels = int((1.0+change)*pixels)
    else:
        edt = distance_transform_edt(image==0)
        npixels = sum(edt<=(margin+1e-4))
    r = int(max(1,2*margin+0.5))
    ri = int(margin+0.5-inner)
    if ri<=0: mask = binary_dilation(image,iterations=r)-image
    else: mask = binary_dilation(image,iterations=r)-binary_erosion(image,iterations=ri)
    image += mask*randn(*image.shape)*noise*min(1.0,margin**2)
    smoothed = gaussian_filter(1.0*image,margin)
    frac = max(0.0,min(1.0,npixels*1.0/prod(image.shape)))
    threshold = mquantiles(smoothed,prob=[1.0-frac])[0]
    result = (smoothed>threshold)
    return 1.0*result 
Example 13
Project: mHiC   Author: yezhengSTAT   File: s5_prior.py    MIT License 5 votes vote down vote up
def read_biases(infilename):
    startt = time.time()
    biasDic={}

    rawBiases=[]
    infile = open(infilename, 'rt')
    for line in infile:
        words=line.rstrip().split()
        chr=words[0]; midPoint=int(words[1]); bias=float(words[2])
        if bias!=1.0:
           rawBiases.append(bias)
    infile.close()
    botQ,med,topQ=mquantiles(rawBiases,prob=[0.05,0.5,0.95])
    with open(logfile, 'a') as log:
        log.write("5th quantile of biases: "+str(botQ)+"\n")
        log.write("50th quantile of biases: "+str(med)+"\n")
        log.write("95th quantile of biases: "+str(topQ)+"\n")
    infile = open(infilename, 'rt')
    totalC=0
    discardC=0
    for line in infile:
        words=line.rstrip().split()
        chr=words[0]; midPoint=int(words[1]); bias=float(words[2]);
        if bias<biasLowerBound:
            bias=-1 #botQ
            discardC+=1
        elif bias>biasUpperBound:
            bias=-1 #topQ
            #bias=1
            discardC+=1
        totalC+=1
        if chr not in biasDic:
            biasDic[chr]={}
        if midPoint not in biasDic[chr]:
            biasDic[chr][midPoint]=bias
    infile.close()
    with open(logfile, 'a') as log:
        log.write("Out of " + str(totalC) + " loci " +str(discardC) +" were discarded with biases not in range [0.5 2]\n\n" )
    endt = time.time()
    print("Bias file read. Time took %s" % (endt-startt))
    return biasDic # from read_biases 
Example 14
Project: vnpy_crypto   Author: birforce   File: kernel_extras.py    MIT License 5 votes vote down vote up
def _compute_sig(self):
        Y = self.endog
        X = self.exog
        b = self.estimator(Y, X)
        m = self.fform(X, b)
        n = np.shape(X)[0]
        resid = Y - m
        resid = resid - np.mean(resid)  # center residuals
        self.test_stat = self._compute_test_stat(resid)
        sqrt5 = np.sqrt(5.)
        fct1 = (1 - sqrt5) / 2.
        fct2 = (1 + sqrt5) / 2.
        u1 = fct1 * resid
        u2 = fct2 * resid
        r = fct2 / sqrt5
        I_dist = np.empty((self.nboot,1))
        for j in range(self.nboot):
            u_boot = u2.copy()

            prob = np.random.uniform(0,1, size = (n,))
            ind = prob < r
            u_boot[ind] = u1[ind]
            Y_boot = m + u_boot
            b_hat = self.estimator(Y_boot, X)
            m_hat = self.fform(X, b_hat)
            u_boot_hat = Y_boot - m_hat
            I_dist[j] = self._compute_test_stat(u_boot_hat)

        self.boots_results = I_dist
        sig = "Not Significant"
        if self.test_stat > mquantiles(I_dist, 0.9):
            sig = "*"
        if self.test_stat > mquantiles(I_dist, 0.95):
            sig = "**"
        if self.test_stat > mquantiles(I_dist, 0.99):
            sig = "***"
        return sig 
Example 15
Project: vnpy_crypto   Author: birforce   File: kernel_regression.py    MIT License 5 votes vote down vote up
def _compute_sig(self):
        """
        Computes the significance value for the variable(s) tested.

        The empirical distribution of the test statistic is obtained through
        bootstrapping the sample.  The null hypothesis is rejected if the test
        statistic is larger than the 90, 95, 99 percentiles.
        """
        t_dist = np.empty(shape=(self.nboot, ))
        Y = self.endog
        X = copy.deepcopy(self.exog)
        n = np.shape(Y)[0]

        X[:, self.test_vars] = np.mean(X[:, self.test_vars], axis=0)
        # Calculate the restricted mean. See p. 372 in [8]
        M = KernelReg(Y, X, self.var_type, self.model.reg_type, self.bw,
                      defaults = EstimatorSettings(efficient=False)).fit()[0]
        M = np.reshape(M, (n, 1))
        e = Y - M
        e = e - np.mean(e)  # recenter residuals
        for i in range(self.nboot):
            ind = np.random.random_integers(0, n-1, size=(n,1))
            e_boot = e[ind, 0]
            Y_boot = M + e_boot
            t_dist[i] = self._compute_test_stat(Y_boot, self.exog)

        self.t_dist = t_dist
        sig = "Not Significant"
        if self.test_stat > mquantiles(t_dist, 0.9):
            sig = "*"
        if self.test_stat > mquantiles(t_dist, 0.95):
            sig = "**"
        if self.test_stat > mquantiles(t_dist, 0.99):
            sig = "***"

        return sig 
Example 16
Project: vnpy_crypto   Author: birforce   File: kernel_regression.py    MIT License 5 votes vote down vote up
def _compute_sig(self):
        """Calculates the significance level of the variable tested"""

        m = self._est_cond_mean()
        Y = self.endog
        X = self.exog
        n = np.shape(X)[0]
        u = Y - m
        u = u - np.mean(u)  # center
        fct1 = (1 - 5**0.5) / 2.
        fct2 = (1 + 5**0.5) / 2.
        u1 = fct1 * u
        u2 = fct2 * u
        r = fct2 / (5 ** 0.5)
        I_dist = np.empty((self.nboot,1))
        for j in range(self.nboot):
            u_boot = copy.deepcopy(u2)

            prob = np.random.uniform(0,1, size = (n,1))
            ind = prob < r
            u_boot[ind] = u1[ind]
            Y_boot = m + u_boot
            I_dist[j] = self._compute_test_stat(Y_boot, X)

        sig = "Not Significant"
        if self.test_stat > mquantiles(I_dist, 0.9):
            sig = "*"
        if self.test_stat > mquantiles(I_dist, 0.95):
            sig = "**"
        if self.test_stat > mquantiles(I_dist, 0.99):
            sig = "***"

        return sig 
Example 17
Project: vnpy_crypto   Author: birforce   File: _kernel_base.py    MIT License 5 votes vote down vote up
def _compute_min_std_IQR(data):
    """Compute minimum of std and IQR for each variable."""
    s1 = np.std(data, axis=0)
    q75 = mquantiles(data, 0.75, axis=0).data[0]
    q25 = mquantiles(data, 0.25, axis=0).data[0]
    s2 = (q75 - q25) / 1.349  # IQR
    dispersion = np.minimum(s1, s2)
    return dispersion 
Example 18
Project: scanomatic   Author: Scan-o-Matic   File: maths.py    GNU General Public License v3.0 5 votes vote down vote up
def iqr_mean(data, *args, **kwargs):
    quantiles = mquantiles(data, prob=(0.25, 0.75))
    if quantiles.any():
        val = np.ma.masked_outside(data, *quantiles).mean(*args, **kwargs)
        if isinstance(val, np.ma.MaskedArray):
            return val.filled(np.nan)
        return val
    return None 
Example 19
Project: arviz   Author: arviz-devs   File: stats_utils.py    Apache License 2.0 5 votes vote down vote up
def quantile(ary, q, axis=None, limit=None):
    """Use same quantile function as R (Type 7)."""
    if limit is None:
        limit = tuple()
    return mquantiles(ary, q, alphap=1, betap=1, axis=axis, limit=limit) 
Example 20
Project: phystricks   Author: LaurentClaessens   File: BoxDiagramGraph.py    GNU General Public License v3.0 5 votes vote down vote up
def __init__(self,values,h,delta_y=0):
        ObjectGraph.__init__(self,self)

        import numpy
        from scipy.stats.mstats import mquantiles

        ms=mquantiles(values)
        self.average=numpy.mean(values)
        self.q1=ms[0]
        self.median=ms[1]
        self.q3=ms[2]
        self.minimum=min(values)
        self.maximum=max(values)
        self.h=h
        self.delta_y=delta_y 
Example 21
Project: plotnine   Author: has2k1   File: stat_qq_line.py    GNU General Public License v2.0 5 votes vote down vote up
def compute_group(cls, data, scales, **params):
        line_p = params['line_p']
        dparams = params['dparams']

        # Compute theoretical values
        df = stat_qq.compute_group(data, scales, **params)
        sample = df['sample'].values
        theoretical = df['theoretical'].values

        # Compute slope & intercept of the line through the quantiles
        cdist = get_continuous_distribution(params['distribution'])
        x_coords = cdist.ppf(line_p, *dparams)
        y_coords = mquantiles(sample, line_p)
        slope = (np.diff(y_coords)/np.diff(x_coords))[0]
        intercept = y_coords[0] - slope*x_coords[0]

        # Get x,y points that describe the line
        if params['fullrange'] and scales.x:
            x = scales.x.dimension()
        else:
            x = theoretical.min(), theoretical.max()

        x = np.asarray(x)
        y = slope * x + intercept
        data = pd.DataFrame({'x': x, 'y': y})
        return data 
Example 22
Project: Splunking-Crime   Author: nccgroup   File: kernel_extras.py    GNU Affero General Public License v3.0 5 votes vote down vote up
def _compute_sig(self):
        Y = self.endog
        X = self.exog
        b = self.estimator(Y, X)
        m = self.fform(X, b)
        n = np.shape(X)[0]
        resid = Y - m
        resid = resid - np.mean(resid)  # center residuals
        self.test_stat = self._compute_test_stat(resid)
        sqrt5 = np.sqrt(5.)
        fct1 = (1 - sqrt5) / 2.
        fct2 = (1 + sqrt5) / 2.
        u1 = fct1 * resid
        u2 = fct2 * resid
        r = fct2 / sqrt5
        I_dist = np.empty((self.nboot,1))
        for j in range(self.nboot):
            u_boot = u2.copy()

            prob = np.random.uniform(0,1, size = (n,))
            ind = prob < r
            u_boot[ind] = u1[ind]
            Y_boot = m + u_boot
            b_hat = self.estimator(Y_boot, X)
            m_hat = self.fform(X, b_hat)
            u_boot_hat = Y_boot - m_hat
            I_dist[j] = self._compute_test_stat(u_boot_hat)

        self.boots_results = I_dist
        sig = "Not Significant"
        if self.test_stat > mquantiles(I_dist, 0.9):
            sig = "*"
        if self.test_stat > mquantiles(I_dist, 0.95):
            sig = "**"
        if self.test_stat > mquantiles(I_dist, 0.99):
            sig = "***"
        return sig 
Example 23
Project: Splunking-Crime   Author: nccgroup   File: kernel_regression.py    GNU Affero General Public License v3.0 5 votes vote down vote up
def _compute_sig(self):
        """
        Computes the significance value for the variable(s) tested.

        The empirical distribution of the test statistic is obtained through
        bootstrapping the sample.  The null hypothesis is rejected if the test
        statistic is larger than the 90, 95, 99 percentiles.
        """
        t_dist = np.empty(shape=(self.nboot, ))
        Y = self.endog
        X = copy.deepcopy(self.exog)
        n = np.shape(Y)[0]

        X[:, self.test_vars] = np.mean(X[:, self.test_vars], axis=0)
        # Calculate the restricted mean. See p. 372 in [8]
        M = KernelReg(Y, X, self.var_type, self.model.reg_type, self.bw,
                      defaults = EstimatorSettings(efficient=False)).fit()[0]
        M = np.reshape(M, (n, 1))
        e = Y - M
        e = e - np.mean(e)  # recenter residuals
        for i in range(self.nboot):
            ind = np.random.random_integers(0, n-1, size=(n,1))
            e_boot = e[ind, 0]
            Y_boot = M + e_boot
            t_dist[i] = self._compute_test_stat(Y_boot, self.exog)

        self.t_dist = t_dist
        sig = "Not Significant"
        if self.test_stat > mquantiles(t_dist, 0.9):
            sig = "*"
        if self.test_stat > mquantiles(t_dist, 0.95):
            sig = "**"
        if self.test_stat > mquantiles(t_dist, 0.99):
            sig = "***"

        return sig 
Example 24
Project: Splunking-Crime   Author: nccgroup   File: kernel_regression.py    GNU Affero General Public License v3.0 5 votes vote down vote up
def _compute_sig(self):
        """Calculates the significance level of the variable tested"""

        m = self._est_cond_mean()
        Y = self.endog
        X = self.exog
        n = np.shape(X)[0]
        u = Y - m
        u = u - np.mean(u)  # center
        fct1 = (1 - 5**0.5) / 2.
        fct2 = (1 + 5**0.5) / 2.
        u1 = fct1 * u
        u2 = fct2 * u
        r = fct2 / (5 ** 0.5)
        I_dist = np.empty((self.nboot,1))
        for j in range(self.nboot):
            u_boot = copy.deepcopy(u2)

            prob = np.random.uniform(0,1, size = (n,1))
            ind = prob < r
            u_boot[ind] = u1[ind]
            Y_boot = m + u_boot
            I_dist[j] = self._compute_test_stat(Y_boot, X)

        sig = "Not Significant"
        if self.test_stat > mquantiles(I_dist, 0.9):
            sig = "*"
        if self.test_stat > mquantiles(I_dist, 0.95):
            sig = "**"
        if self.test_stat > mquantiles(I_dist, 0.99):
            sig = "***"

        return sig 
Example 25
Project: Splunking-Crime   Author: nccgroup   File: _kernel_base.py    GNU Affero General Public License v3.0 5 votes vote down vote up
def _compute_min_std_IQR(data):
    """Compute minimum of std and IQR for each variable."""
    s1 = np.std(data, axis=0)
    q75 = mquantiles(data, 0.75, axis=0).data[0]
    q25 = mquantiles(data, 0.25, axis=0).data[0]
    s2 = (q75 - q25) / 1.349  # IQR
    dispersion = np.minimum(s1, s2)
    return dispersion 
Example 26
Project: DREAM_invivo_tf_binding_prediction_challenge_baseline   Author: nboley   File: baseline.py    GNU General Public License v3.0 5 votes vote down vote up
def aggregate_region_scores(
        scores, quantile_probs = [0.99, 0.95, 0.90, 0.75, 0.50]):
    rv = [scores.mean()/len(scores), scores.max()]
    rv.extend(mquantiles(scores, prob=quantile_probs))
    return rv 
Example 27
Project: yanntricks   Author: LaurentClaessens   File: BoxDiagramGraph.py    GNU General Public License v3.0 5 votes vote down vote up
def __init__(self,values,h,delta_y=0):
        ObjectGraph.__init__(self,self)

        import numpy
        from scipy.stats.mstats import mquantiles

        ms=mquantiles(values)
        self.average=numpy.mean(values)
        self.q1=ms[0]
        self.median=ms[1]
        self.q3=ms[2]
        self.minimum=min(values)
        self.maximum=max(values)
        self.h=h
        self.delta_y=delta_y 
Example 28
Project: linear_neuron   Author: uglyboxer   File: partial_dependence.py    MIT License 4 votes vote down vote up
def _grid_from_X(X, percentiles=(0.05, 0.95), grid_resolution=100):
    """Generate a grid of points based on the ``percentiles of ``X``.

    The grid is generated by placing ``grid_resolution`` equally
    spaced points between the ``percentiles`` of each column
    of ``X``.

    Parameters
    ----------
    X : ndarray
        The data
    percentiles : tuple of floats
        The percentiles which are used to construct the extreme
        values of the grid axes.
    grid_resolution : int
        The number of equally spaced points that are placed
        on the grid.

    Returns
    -------
    grid : ndarray
        All data points on the grid; ``grid.shape[1] == X.shape[1]``
        and ``grid.shape[0] == grid_resolution * X.shape[1]``.
    axes : seq of ndarray
        The axes with which the grid has been created.
    """
    if len(percentiles) != 2:
        raise ValueError('percentile must be tuple of len 2')
    if not all(0. <= x <= 1. for x in percentiles):
        raise ValueError('percentile values must be in [0, 1]')

    axes = []
    for col in range(X.shape[1]):
        uniques = np.unique(X[:, col])
        if uniques.shape[0] < grid_resolution:
            # feature has low resolution use unique vals
            axis = uniques
        else:
            emp_percentiles = mquantiles(X, prob=percentiles, axis=0)
            # create axis based on percentiles and grid resolution
            axis = np.linspace(emp_percentiles[0, col],
                               emp_percentiles[1, col],
                               num=grid_resolution, endpoint=True)
        axes.append(axis)

    return cartesian(axes), axes 
Example 29
Project: Weiss   Author: WangWenjun559   File: partial_dependence.py    Apache License 2.0 4 votes vote down vote up
def _grid_from_X(X, percentiles=(0.05, 0.95), grid_resolution=100):
    """Generate a grid of points based on the ``percentiles of ``X``.

    The grid is generated by placing ``grid_resolution`` equally
    spaced points between the ``percentiles`` of each column
    of ``X``.

    Parameters
    ----------
    X : ndarray
        The data
    percentiles : tuple of floats
        The percentiles which are used to construct the extreme
        values of the grid axes.
    grid_resolution : int
        The number of equally spaced points that are placed
        on the grid.

    Returns
    -------
    grid : ndarray
        All data points on the grid; ``grid.shape[1] == X.shape[1]``
        and ``grid.shape[0] == grid_resolution * X.shape[1]``.
    axes : seq of ndarray
        The axes with which the grid has been created.
    """
    if len(percentiles) != 2:
        raise ValueError('percentile must be tuple of len 2')
    if not all(0. <= x <= 1. for x in percentiles):
        raise ValueError('percentile values must be in [0, 1]')

    axes = []
    for col in range(X.shape[1]):
        uniques = np.unique(X[:, col])
        if uniques.shape[0] < grid_resolution:
            # feature has low resolution use unique vals
            axis = uniques
        else:
            emp_percentiles = mquantiles(X, prob=percentiles, axis=0)
            # create axis based on percentiles and grid resolution
            axis = np.linspace(emp_percentiles[0, col],
                               emp_percentiles[1, col],
                               num=grid_resolution, endpoint=True)
        axes.append(axis)

    return cartesian(axes), axes 
Example 30
Project: wine-ml-on-aws-lambda   Author: pierreant   File: partial_dependence.py    Apache License 2.0 4 votes vote down vote up
def _grid_from_X(X, percentiles=(0.05, 0.95), grid_resolution=100):
    """Generate a grid of points based on the ``percentiles of ``X``.

    The grid is generated by placing ``grid_resolution`` equally
    spaced points between the ``percentiles`` of each column
    of ``X``.

    Parameters
    ----------
    X : ndarray
        The data
    percentiles : tuple of floats
        The percentiles which are used to construct the extreme
        values of the grid axes.
    grid_resolution : int
        The number of equally spaced points that are placed
        on the grid.

    Returns
    -------
    grid : ndarray
        All data points on the grid; ``grid.shape[1] == X.shape[1]``
        and ``grid.shape[0] == grid_resolution * X.shape[1]``.
    axes : seq of ndarray
        The axes with which the grid has been created.
    """
    if len(percentiles) != 2:
        raise ValueError('percentile must be tuple of len 2')
    if not all(0. <= x <= 1. for x in percentiles):
        raise ValueError('percentile values must be in [0, 1]')

    axes = []
    emp_percentiles = mquantiles(X, prob=percentiles, axis=0)
    for col in range(X.shape[1]):
        uniques = np.unique(X[:, col])
        if uniques.shape[0] < grid_resolution:
            # feature has low resolution use unique vals
            axis = uniques
        else:
            # create axis based on percentiles and grid resolution
            axis = np.linspace(emp_percentiles[0, col],
                               emp_percentiles[1, col],
                               num=grid_resolution, endpoint=True)
        axes.append(axis)

    return cartesian(axes), axes 
Example 31
Project: Splunking-Crime   Author: nccgroup   File: partial_dependence.py    GNU Affero General Public License v3.0 4 votes vote down vote up
def _grid_from_X(X, percentiles=(0.05, 0.95), grid_resolution=100):
    """Generate a grid of points based on the ``percentiles of ``X``.

    The grid is generated by placing ``grid_resolution`` equally
    spaced points between the ``percentiles`` of each column
    of ``X``.

    Parameters
    ----------
    X : ndarray
        The data
    percentiles : tuple of floats
        The percentiles which are used to construct the extreme
        values of the grid axes.
    grid_resolution : int
        The number of equally spaced points that are placed
        on the grid.

    Returns
    -------
    grid : ndarray
        All data points on the grid; ``grid.shape[1] == X.shape[1]``
        and ``grid.shape[0] == grid_resolution * X.shape[1]``.
    axes : seq of ndarray
        The axes with which the grid has been created.
    """
    if len(percentiles) != 2:
        raise ValueError('percentile must be tuple of len 2')
    if not all(0. <= x <= 1. for x in percentiles):
        raise ValueError('percentile values must be in [0, 1]')

    axes = []
    emp_percentiles = mquantiles(X, prob=percentiles, axis=0)
    for col in range(X.shape[1]):
        uniques = np.unique(X[:, col])
        if uniques.shape[0] < grid_resolution:
            # feature has low resolution use unique vals
            axis = uniques
        else:
            # create axis based on percentiles and grid resolution
            axis = np.linspace(emp_percentiles[0, col],
                               emp_percentiles[1, col],
                               num=grid_resolution, endpoint=True)
        axes.append(axis)

    return cartesian(axes), axes