Python autograd.numpy.mean() Examples

The following are code examples for showing how to use autograd.numpy.mean(). They are from open source Python projects. You can vote up the examples you like or vote down the ones you don't like.

Example 1
Project: lightML   Author: jfzhang95   File: LinearClassification.py    MIT License 6 votes vote down vote up
def transform(self, X, y):
        """transform function"""
        XMat = np.array(X)
        yMat = np.array(y)

        if XMat.shape[0] != yMat.shape[0]:
            yMat = yMat.T
        assert XMat.shape[0] == yMat.shape[0]

        XMat -= XMat.mean(axis=0)
        Sw, Sb = calc_Sw_Sb(XMat, yMat)
        evals, evecs = eig(Sw, Sb)

        np.ascontiguousarray(evals)
        np.ascontiguousarray(evecs)

        idx = np.argsort(evals)
        idx = idx[::-1]
        evecs = evecs[:, idx]

        self.W = evecs[:, :self.n_components]
        X_transformed = np.dot(XMat, self.W)

        return X_transformed 
Example 2
Project: lightML   Author: jfzhang95   File: LinearClassification.py    MIT License 6 votes vote down vote up
def another_Sw_Sb(X, y):
    XMat = np.array(X)
    yMat = np.array(y)
    n_samples, n_features = XMat.shape

    Sw = np.zeros((n_features, n_features))
    Sb = np.zeros((n_features, n_features))

    labels = np.unique(yMat)
    for c in range(len(labels)):
        idx = np.squeeze(np.where(yMat == labels[c]))
        X_c = np.squeeze(XMat[idx[0], :]).T
        Sw += X_c.shape[0] * np.cov(X_c)

    total_mean = np.mean(XMat, axis=0)
    for c in range(len(labels)):
        idx = np.squeeze(np.where(yMat == labels[c]))
        X_c = np.squeeze(XMat[idx[0], :])
        mean_c = (np.mean(X_c, axis=0) - total_mean)
        Sb += X_c.shape[0] * mean_c.T * mean_c

    return Sw, Sb 
Example 3
Project: autograd-forward   Author: BB-UCL   File: numpy_grads.py    MIT License 6 votes vote down vote up
def forward_grad_np_std(g, ans, gvs, vs, x, axis=None, ddof=0, keepdims=False):
    if axis is None:
        if gvs.iscomplex:
            num_reps = gvs.size / 2
        else:
            num_reps = gvs.size
    elif isinstance(axis, int):
        num_reps = gvs.shape[axis]
    elif isinstance(axis, tuple):
        num_reps = anp.prod(anp.array(gvs.shape)[list(axis)])

    if num_reps <= 1:
        return vs.zeros()
    x_minus_mean = anp.conj(x - anp.mean(x, axis=axis, keepdims=True))
    return (anp.sum(anp.real(g * x_minus_mean), axis=axis, keepdims=keepdims) /
            ((num_reps - ddof) * ans)) 
Example 4
Project: DistCal   Author: Srceh   File: GP_Beta_cal.py    Apache License 2.0 6 votes vote down vote up
def prior_error(mu_shift, w, n_u):

    a = -numpy.abs(w[:, numpy.arange(0, n_u)*3] + mu_shift[1])

    b = numpy.abs(w[:, numpy.arange(0, n_u)*3+1] + mu_shift[3])

    c = w[:, numpy.arange(0, n_u)*3+2] + mu_shift[5]

    q = numpy.linspace(1e-8, 1 - 1e-8, 128)

    # q = q.ravel()

    q_hat = numpy.mean(1 / (1 + numpy.exp(numpy.log(q)[None, None, :] * a[:, :, None] +
                                          numpy.log(1 - q)[None, None, :] * b[:, :, None] +
                                          c[:, :, None])), axis=0)

    return numpy.mean((q - q_hat) ** 2) 
Example 5
Project: ReducedVarianceReparamGradients   Author: andymiller   File: bbvi_mvn_diag.py    MIT License 6 votes vote down vote up
def __init__(self, lnpdf, D, glnpdf=None, lnpdf_is_vectorized=False):
        """
        Implements MCVI --- exposes elbo gradient and sampling methods.
        This class breaks the gradient down into parts

        dg/dz = dlnpdf(z)/dz * dz/dlam - dlnq(z)/dz * dz/dlam - dlnq(z)/dlam

        Parameterizes with mean and log-std! (not variance!)
            lam = [mean, log-std]
        """
        # base class sets up the gradient function organization
        super(DiagMvnBBVI, self).__init__(lnpdf, D, glnpdf, lnpdf_is_vectorized)

        # we note that the second two terms, with probability one, 
        # create the vector [0, 0, 0, ..., 0, 1., 1., ..., 1.]
        self.mask = np.concatenate([np.zeros(D), np.ones(D)])
        self.num_variational_params = 2*D
        self.D = D

    #####################################################################
    # Methods for various types of gradients of the ELBO                #
    #    -- that can be plugged into FilteredOptimization routines      #
    ##################################################################### 
Example 6
Project: ReducedVarianceReparamGradients   Author: andymiller   File: nn.py    MIT License 6 votes vote down vote up
def make_standardize_funs(Xtrain, Ytrain):
    """ functions to scale/unscale data """
    # first create scale functions
    std_Xtrain = np.std(Xtrain, 0)
    std_Xtrain[ std_Xtrain == 0 ] = 1.
    mean_Xtrain = np.mean(Xtrain, 0)

    std_Ytrain  = np.std(Ytrain, 0)
    mean_Ytrain = np.mean(Ytrain, 0)

    std_X   = lambda X: (X - mean_Xtrain) / std_Xtrain
    ustd_X  = lambda X: X*std_Xtrain + mean_Xtrain

    std_Y   = lambda Y: (Y - mean_Ytrain) / std_Ytrain
    ustd_Y  = lambda Y: Y*std_Ytrain + mean_Ytrain
    return (std_X, ustd_X), (std_Y, ustd_Y), \
           (mean_Xtrain, std_Xtrain), (mean_Ytrain, std_Ytrain) 
Example 7
Project: momi2   Author: popgenmethods   File: confidence_region.py    GNU General Public License v3.0 6 votes vote down vote up
def _many_score_cov(params, data, demo_func, **kwargs):
    params = np.array(params)

    def f_vec(x):
        ret = _composite_log_likelihood(
            data, demo_func(*x), vector=True, **kwargs)
        # centralize
        return ret - np.mean(ret)

    # g_out = einsum('ij,ik', jacobian(f_vec)(params), jacobian(f_vec)(params))
    # but computed in a roundabout way because jacobian implementation is slow
    def _g_out_antihess(x):
        l = f_vec(x)
        lc = make_constant(l)
        return np.sum(0.5 * (l**2 - l * lc - lc * l))
    return autograd.hessian(_g_out_antihess)(params) 
Example 8
Project: SyntheticStatistics   Author: BlissChapman   File: util.py    MIT License 6 votes vote down vote up
def fit_gaussian_draw(X, J, seed=28, reg=1e-7, eig_pow=1.0):
    """
    Fit a multivariate normal to the data X (n x d) and draw J points 
    from the fit. 
    - reg: regularizer to use with the covariance matrix
    - eig_pow: raise eigenvalues of the covariance matrix to this power to construct 
        a new covariance matrix before drawing samples. Useful to shrink the spread 
        of the variance.
    """
    with NumpySeedContext(seed=seed):
        d = X.shape[1]
        mean_x = np.mean(X, 0)
        cov_x = np.cov(X.T)
        if d==1:
            cov_x = np.array([[cov_x]])
        [evals, evecs] = np.linalg.eig(cov_x)
        evals = np.maximum(0, np.real(evals))
        assert np.all(np.isfinite(evals))
        evecs = np.real(evecs)
        shrunk_cov = evecs.dot(np.diag(evals**eig_pow)).dot(evecs.T) + reg*np.eye(d)
        V = np.random.multivariate_normal(mean_x, shrunk_cov, J)
    return V 
Example 9
Project: SyntheticStatistics   Author: BlissChapman   File: tst.py    MIT License 6 votes vote down vote up
def perform_test(self, tst_data):
        """perform the two-sample test and return values computed in a dictionary:
        {alpha: 0.01, pvalue: 0.0002, test_stat: 2.3, h0_rejected: True, ...}
        tst_data: an instance of TSTData
        """
        d = tst_data.dim()
        alpha = self.alpha
        mmd2_stat = self.compute_stat(tst_data, use_1sample_U=self.use_1sample_U)

        X, Y = tst_data.xy()
        k = self.kernel
        repeats = self.n_permute
        list_mmd2 = QuadMMDTest.permutation_list_mmd2(X, Y, k, repeats)
        # approximate p-value with the permutations
        pvalue = np.mean(list_mmd2 > mmd2_stat)

        results = {'alpha': self.alpha, 'pvalue': pvalue, 'test_stat': mmd2_stat,
                'h0_rejected': pvalue < alpha, 'list_permuted_mmd2': list_mmd2}
        return results 
Example 10
Project: SyntheticStatistics   Author: BlissChapman   File: tst.py    MIT License 6 votes vote down vote up
def init_locs_randn(tst_data, n_test_locs, seed=1):
        """Fit a Gaussian to the merged data of the two samples and draw
        n_test_locs points from the Gaussian"""
        # set the seed
        rand_state = np.random.get_state()
        np.random.seed(seed)

        X, Y = tst_data.xy()
        d = X.shape[1]
        # fit a Gaussian in the middle of X, Y and draw sample to initialize T
        xy = np.vstack((X, Y))
        mean_xy = np.mean(xy, 0)
        cov_xy = np.cov(xy.T)
        [Dxy, Vxy] = np.linalg.eig(cov_xy + 1e-3*np.eye(d))
        Dxy = np.real(Dxy)
        Vxy = np.real(Vxy)
        Dxy[Dxy<=0] = 1e-3
        eig_pow = 0.9 # 1.0 = not shrink
        reduced_cov_xy = Vxy.dot(np.diag(Dxy**eig_pow)).dot(Vxy.T) + 1e-3*np.eye(d)

        T0 = np.random.multivariate_normal(mean_xy, reduced_cov_xy, n_test_locs)
        # reset the seed back to the original
        np.random.set_state(rand_state)
        return T0 
Example 11
Project: kernel-gof   Author: wittawatj   File: util.py    MIT License 6 votes vote down vote up
def fit_gaussian_draw(X, J, seed=28, reg=1e-7, eig_pow=1.0):
    """
    Fit a multivariate normal to the data X (n x d) and draw J points 
    from the fit. 
    - reg: regularizer to use with the covariance matrix
    - eig_pow: raise eigenvalues of the covariance matrix to this power to construct 
        a new covariance matrix before drawing samples. Useful to shrink the spread 
        of the variance.
    """
    with NumpySeedContext(seed=seed):
        d = X.shape[1]
        mean_x = np.mean(X, 0)
        cov_x = np.cov(X.T)
        if d==1:
            cov_x = np.array([[cov_x]])
        [evals, evecs] = np.linalg.eig(cov_x)
        evals = np.maximum(0, np.real(evals))
        assert np.all(np.isfinite(evals))
        evecs = np.real(evecs)
        shrunk_cov = evecs.dot(np.diag(evals**eig_pow)).dot(evecs.T) + reg*np.eye(d)
        V = np.random.multivariate_normal(mean_x, shrunk_cov, J)
    return V 
Example 12
Project: kernel-gof   Author: wittawatj   File: data.py    MIT License 6 votes vote down vote up
def _blocked_gibbs_next(self, X, H):
        """
        Sample from the mutual conditional distributions.
        """
        dh = H.shape[1]
        n, dx = X.shape
        B = self.B
        b = self.b

        # Draw H.
        XB2C = np.dot(X, self.B) + 2.0*self.c
        # Ph: n x dh matrix
        Ph = DSGaussBernRBM.sigmoid(XB2C)
        # H: n x dh
        H = (np.random.rand(n, dh) <= Ph)*2 - 1.0
        assert np.all(np.abs(H) - 1 <= 1e-6 )
        # Draw X.
        # mean: n x dx
        mean = old_div(np.dot(H, B.T),2.0) + b
        X = np.random.randn(n, dx) + mean
        return X, H 
Example 13
Project: kernel-gof   Author: wittawatj   File: data.py    MIT License 6 votes vote down vote up
def gmm_sample(self, mean=None, w=None, N=10000,n=10,d=2,seed=10):
        np.random.seed(seed)
        self.d = d
        if mean is None:
            mean = np.random.randn(n,d)*10
        if w is None:
            w = np.random.rand(n)
        w = old_div(w,sum(w))
        multi = np.random.multinomial(N,w)
        X = np.zeros((N,d))
        base = 0
        for i in range(n):
            X[base:base+multi[i],:] = np.random.multivariate_normal(mean[i,:], np.eye(self.d), multi[i])
            base += multi[i]
        
        llh = np.zeros(N)
        for i in range(n):
            llh += w[i] * stats.multivariate_normal.pdf(X, mean[i,:], np.eye(self.d))
        #llh = llh/sum(llh)
        return X, llh 
Example 14
Project: paragami   Author: rgiordan   File: test_functions.py    Apache License 2.0 6 votes vote down vote up
def test_autograd(self):
        pattern = get_test_pattern()

        # The autodiff tests produces non-symmetric matrices.
        pattern['mat'].default_validate = False
        param_val = pattern.random()

        def tf1(param_val):
            return \
                np.mean(param_val['array'] ** 2) + \
                np.mean(param_val['mat'] ** 2)

        for free in [True, False]:
            tf1_flat = paragami.FlattenFunctionInput(tf1, pattern, free)
            param_val_flat = pattern.flatten(param_val, free=free)
            check_grads(
                tf1_flat, modes=['rev', 'fwd'], order=2)(param_val_flat) 
Example 15
Project: Efficient_Augmentation   Author: mkuchnik   File: LogisticRegression.py    MIT License 5 votes vote down vote up
def mean_log_loss(self, X=None, y=None, L2_alpha=None):
        return np.mean(self.log_losses(X=X,
                                       y=y,
                                       L2_alpha=L2_alpha)) 
Example 16
Project: Efficient_Augmentation   Author: mkuchnik   File: LogisticRegression.py    MIT License 5 votes vote down vote up
def mean_pred_losses(self, X=None, y=None):
        return np.mean(self.pred_losses(X=X,
                                        y=y)) 
Example 17
Project: lightML   Author: jfzhang95   File: LinearClassification.py    MIT License 5 votes vote down vote up
def transform(self, X, y):
        """transform function"""
        XMat = np.array(X)
        yMat = np.array(y)

        if XMat.shape[0] != yMat.shape[0]:
            yMat = yMat.T
        assert XMat.shape[0] == yMat.shape[0]

        XMat -= XMat.mean(axis=0)
        Sw, Sb = calc_Sw_Sb(XMat, yMat)

        if self.method == 'svd':
            U, S, V = np.linalg.svd(Sw)
            S = np.diag(S)
            Sw_inversed = V * np.linalg.pinv(S) * U.T
            A = Sw_inversed * Sb
        elif self.method == 'auto':
            A = np.linalg.pinv(Sw) * Sb

        eigval, eigvec = np.linalg.eig(A)
        eigval = eigval[0:self.n_components]
        eigvec = eigvec[:, 0:self.n_components]
        X_transformed = np.dot(XMat, eigvec)
        self.W = eigvec

        return X_transformed 
Example 18
Project: autograd-forward   Author: BB-UCL   File: numpy_grads.py    MIT License 5 votes vote down vote up
def forward_grad_np_var(g, ans, gvs, vs, x, axis=None, ddof=0, keepdims=False):
    if axis is None:
        if gvs.iscomplex:
            num_reps = gvs.size / 2
        else:
            num_reps = gvs.size
    elif isinstance(axis, int):
        num_reps = gvs.shape[axis]
    elif isinstance(axis, tuple):
        num_reps = anp.prod(anp.array(gvs.shape)[list(axis)])

    x_minus_mean = anp.conj(x - anp.mean(x, axis=axis, keepdims=True))
    return (2.0 * anp.sum(anp.real(g * x_minus_mean), axis=axis, keepdims=keepdims) /
            (num_reps - ddof)) 
Example 19
Project: autograd-forward   Author: BB-UCL   File: test_numpy.py    MIT License 5 votes vote down vote up
def test_mean(): stat_check(np.mean) 
Example 20
Project: DistCal   Author: Srceh   File: GP_Beta_cal.py    Apache License 2.0 5 votes vote down vote up
def link_log_lik(w, q, ln_q, ln_1_q, ln_s):

    w = w.reshape(-1, 3)

    a = -numpy.exp(w[:, 0]).reshape(-1, 1)

    b = numpy.exp(w[:, 1]).reshape(-1, 1)

    c = w[:, 2].reshape(-1, 1)

    tmp_sum = a * ln_q + c + b * ln_1_q

    tmp_exp = numpy.exp(tmp_sum)

    tmp_de = numpy.where(tmp_exp.ravel() <= 1e-16,
                         2 * numpy.log(1 + tmp_exp.ravel()),
                         2 * (tmp_sum.ravel() + numpy.log(1 + 1 / tmp_exp.ravel()))).reshape(-1, 1)

    ln_s_hat = ln_s + tmp_sum + numpy.log((a + b) * q - a) - ln_q - ln_1_q - tmp_de

    # L = numpy.sum(ln_s_hat)

    # if numpy.isnan(L):
    #     import pdb; pdb.set_trace()
    #
    # if numpy.isinf(L):
    #     import pdb; pdb.set_trace()

    # print([numpy.mean(ln_s), numpy.mean(ln_s_hat)])

    return ln_s_hat 
Example 21
Project: DistCal   Author: Srceh   File: GP_Beta_cal.py    Apache License 2.0 5 votes vote down vote up
def mc_link_lik(w, mu_shift, q, ln_q, ln_1_q, ln_s):

    n = numpy.shape(q)[0]

    w_a = w[:, numpy.arange(0, n)*3]

    w_b = w[:, numpy.arange(0, n)*3+1]

    a = -numpy.exp(w_a / mu_shift[0] + mu_shift[1])

    b = numpy.exp(w_b / mu_shift[2] + mu_shift[3])

    c = w[:, numpy.arange(0, n)*3+2] / mu_shift[4] + mu_shift[5]

    tmp_sum = a * ln_q.ravel() + b * ln_1_q.ravel() + c

    tmp_de = numpy.where(tmp_sum <= 0,
                         2 * numpy.log(1 + numpy.exp(tmp_sum)),
                         2 * (tmp_sum + numpy.log(1 + 1 / (numpy.exp(tmp_sum)))))

    ln_s_hat = (tmp_sum + numpy.log((a + b) * q.ravel() - a) - ln_q.ravel() - ln_1_q.ravel() - tmp_de) + ln_s.ravel()

    mean_exp = numpy.mean(numpy.exp(ln_s_hat), axis=0)

    ln_mean_s_hat = numpy.where(mean_exp > 0, numpy.log(mean_exp), numpy.log(1e-16))

    link_ll = numpy.sum(ln_mean_s_hat)

    return link_ll 
Example 22
Project: kernel-activation-functions   Author: ispamm   File: demo_kaf_regression.py    MIT License 5 votes vote down vote up
def loss_fcn(params, inputs, targets):
    return np.mean(np.square(predict_fcn(params, inputs) - targets))

# Iterator over mini-batches 
Example 23
Project: ReducedVarianceReparamGradients   Author: andymiller   File: bbvi_mvn_diag.py    MIT License 5 votes vote down vote up
def elbo_grad_mc(self, lam, t, n_samps=1, eps=None):
        """ monte carlo approximation of the _negative_ ELBO """
        if eps is None:
            eps = np.random.randn(n_samps, self.D)
        return -1.*np.mean(self.dlnp(lam, eps) + self.mask, axis=0) 
Example 24
Project: ReducedVarianceReparamGradients   Author: andymiller   File: bbvi_mvn_diag.py    MIT License 5 votes vote down vote up
def elbo_mc(self, lam, n_samps=100, full_monte_carlo=False):
        """ approximate the ELBO with samples """
        D = len(lam)/2
        zs  = self.sample_z(lam, n_samps=n_samps)
        if full_monte_carlo:
            elbo_vals = self.lnpdf(zs) - mvn_diag_logpdf(zs, lam[:D], lam[D:])
        else:
            elbo_vals = self.lnpdf(zs) + mvn_diag_entropy(lam[D:])
        return np.mean(elbo_vals) 
Example 25
Project: ReducedVarianceReparamGradients   Author: andymiller   File: nn.py    MIT License 5 votes vote down vote up
def accuracy(params, inputs, targets):
    target_class    = np.argmax(targets, axis=1)
    predicted_class = np.argmax(neural_net_predict(params, inputs), axis=1)
    return np.mean(predicted_class == target_class) 
Example 26
Project: momi2   Author: popgenmethods   File: confidence_region.py    GNU General Public License v3.0 5 votes vote down vote up
def godambe(self, inverse=False):
        """
        Returns Godambe Information.
        If the true params are in the interior of the parameter space,
        the composite MLE will be approximately Gaussian with mean 0,
        and covariance given by the Godambe information.
        """
        fisher_inv = inv_psd(self.fisher, tol=self.psd_rtol)
        ret = check_psd(np.dot(fisher_inv, np.dot(
            self.score_cov, fisher_inv)), tol=self.psd_rtol)
        if not inverse:
            ret = inv_psd(ret, tol=self.psd_rtol)
        return ret 
Example 27
Project: momi2   Author: popgenmethods   File: sfs_stats.py    GNU General Public License v3.0 5 votes vote down vote up
def sd(self):
        """
        Standard deviation of the statistic, estimated via jackknife
        """
        resids = self.jackknifed_array - self.observed
        return np.sqrt(np.mean(resids**2) * (
            len(self.jackknifed_array) - 1)) 
Example 28
Project: momi2   Author: popgenmethods   File: sfs_stats.py    GNU General Public License v3.0 5 votes vote down vote up
def bias(self):
        return np.mean(self.jackknifed_array - self.observed) * (
            len(self.jackknifed_array) - 1) 
Example 29
Project: momi2   Author: popgenmethods   File: sfs_stats.py    GNU General Public License v3.0 5 votes vote down vote up
def bias(self):
        return np.mean(self.resids) * (len(self.jackknife) - 1) 
Example 30
Project: momi2   Author: popgenmethods   File: likelihood.py    GNU General Public License v3.0 5 votes vote down vote up
def _score_cov(self, params):
        params = np.array(params)

        def f_vec(x):
            ret = self._log_lik(x, vector=True)
            # centralize
            return ret - np.mean(ret)

        j = ag.jacobian(f_vec)(params)
        return np.einsum('ij, ik', j, j) 
Example 31
Project: momi2   Author: popgenmethods   File: likelihood.py    GNU General Public License v3.0 5 votes vote down vote up
def __init__(self, full_surface, pieces, rgen):
        try:
            assert pieces > 0 and pieces == int(pieces)
        except (TypeError, AssertionError):
            raise ValueError("pieces should be a positive integer")

        self.pieces = full_surface._get_stochastic_pieces(pieces, rgen)
        self.total_snp_counts = full_surface.sfs._total_freqs
        logger.info("Created {n_batches} minibatches, with an average of {n_snps} SNPs and {n_sfs} unique SFS entries per batch".format(n_batches=len(
            self.pieces), n_snps=full_surface.sfs.n_snps() / float(len(self.pieces)), n_sfs=np.mean([len(piece.sfs.configs) for piece in self.pieces])))

        self.rgen = rgen
        self.full_surface = full_surface 
Example 32
Project: momi2   Author: popgenmethods   File: test_normalizing_constant.py    GNU General Public License v3.0 5 votes vote down vote up
def check_num_snps(sampled_n_dict, demo, num_loci, mut_rate, ascertainment_pop=None, error_matrices=None):
    if error_matrices is not None or ascertainment_pop is not None:
        # TODO
        raise NotImplementedError

    #seg_sites = momi.simulate_ms(
    #    ms_path, demo, num_loci=num_loci, mut_rate=mut_rate)
    #sfs = seg_sites.sfs

    num_bases = 1000
    sfs = demo.simulate_data(
        sampled_n_dict=sampled_n_dict,
        muts_per_gen=mut_rate/num_bases,
        recoms_per_gen=0,
        length=num_bases,
        num_replicates=num_loci)._sfs

    n_sites = sfs.n_snps(vector=True)

    n_sites_mean = np.mean(n_sites)
    n_sites_sd = np.std(n_sites)

    # TODO this test isn't very useful because expected_branchlen is not used anywhere internally anymore
    n_sites_theoretical = demo.expected_branchlen(sampled_n_dict) * mut_rate
    #n_sites_theoretical = momi.expected_total_branch_len(
    #    demo, ascertainment_pop=ascertainment_pop, error_matrices=error_matrices) * mut_rate

    zscore = -np.abs(n_sites_mean - n_sites_theoretical) / n_sites_sd
    pval = scipy.stats.norm.cdf(zscore) * 2.0

    assert pval >= .05 
Example 33
Project: momi2   Author: popgenmethods   File: test_msprime.py    GNU General Public License v3.0 5 votes vote down vote up
def my_t_test(labels, theoretical, observed, min_samples=25):

    assert theoretical.ndim == 1 and observed.ndim == 2
    assert len(theoretical) == observed.shape[
        0] and len(theoretical) == len(labels)

    n_observed = np.sum(observed > 0, axis=1)
    theoretical, observed = theoretical[
        n_observed > min_samples], observed[n_observed > min_samples, :]
    labels = np.array(list(map(str, labels)))[n_observed > min_samples]
    n_observed = n_observed[n_observed > min_samples]

    runs = observed.shape[1]
    observed_mean = np.mean(observed, axis=1)
    bias = observed_mean - theoretical
    variances = np.var(observed, axis=1)

    t_vals = bias / np.sqrt(variances) * np.sqrt(runs)

    # get the p-values
    abs_t_vals = np.abs(t_vals)
    p_vals = 2.0 * scipy.stats.t.sf(abs_t_vals, df=runs - 1)
    print("# labels, p-values, empirical-mean, theoretical-mean, nonzero-counts")
    toPrint = np.array([labels, p_vals, observed_mean,
                        theoretical, n_observed]).transpose()
    toPrint = toPrint[np.array(toPrint[:, 1], dtype='float').argsort()[
        ::-1]]  # reverse-sort by p-vals
    print(toPrint)

    print("Note p-values are for t-distribution, which may not be a good approximation to the true distribution")

    # p-values should be uniformly distributed
    # so then the min p-value should be beta distributed
    return scipy.stats.beta.cdf(np.min(p_vals), 1, len(p_vals)) 
Example 34
Project: SyntheticStatistics   Author: BlissChapman   File: util.py    MIT License 5 votes vote down vote up
def meddistance(X, subsample=None, mean_on_fail=True):
    """
    Compute the median of pairwise distances (not distance squared) of points
    in the matrix.  Useful as a heuristic for setting Gaussian kernel's width.

    Parameters
    ----------
    X : n x d numpy array
    mean_on_fail: True/False. If True, use the mean when the median distance is 0.
        This can happen especially, when the data are discrete e.g., 0/1, and 
        there are more slightly more 0 than 1. In this case, the m

    Return
    ------
    median distance
    """
    if subsample is None:
        D = dist_matrix(X, X)
        Itri = np.tril_indices(D.shape[0], -1)
        Tri = D[Itri]
        med = np.median(Tri)
        if med <= 0:
            # use the mean
            return np.mean(Tri)
        return med

    else:
        assert subsample > 0
        rand_state = np.random.get_state()
        np.random.seed(9827)
        n = X.shape[0]
        ind = np.random.choice(n, min(subsample, n), replace=False)
        np.random.set_state(rand_state)
        # recursion just one
        return meddistance(X[ind, :], None, mean_on_fail) 
Example 35
Project: SyntheticStatistics   Author: BlissChapman   File: tst.py    MIT License 5 votes vote down vote up
def compute_stat(self, tst_data):
        """Compute the test statistic"""
        X, Y = tst_data.xy()
        #if X.shape[0] != Y.shape[0]:
        #    raise ValueError('Require nx = ny for now. Will improve if needed.')
        nx = X.shape[0]
        ny = Y.shape[0]
        mx = np.mean(X, 0)
        my = np.mean(Y, 0)
        mdiff = mx-my
        sx = np.cov(X.T)
        sy = np.cov(Y.T)
        s = old_div(sx,nx) + old_div(sy,ny)
        chi2_stat = np.dot(np.linalg.solve(s, mdiff), mdiff)
        return chi2_stat 
Example 36
Project: SyntheticStatistics   Author: BlissChapman   File: tst.py    MIT License 5 votes vote down vote up
def two_moments(X, Y, kernel):
        """Compute linear mmd estimator and a linear estimate of
        the uncentred 2nd moment of h(z, z'). Total cost: O(n).

        return: (linear mmd, linear 2nd moment)
        """
        if X.shape[0] != Y.shape[0]:
            raise ValueError('Require sample size of X = size of Y')
        n = X.shape[0]
        if n%2 == 1:
            # make it even by removing the last row
            X = np.delete(X, -1, axis=0)
            Y = np.delete(Y, -1, axis=0)

        Xodd = X[::2, :]
        Xeven = X[1::2, :]
        assert Xodd.shape[0] == Xeven.shape[0]
        Yodd = Y[::2, :]
        Yeven = Y[1::2, :]
        assert Yodd.shape[0] == Yeven.shape[0]
        # linear mmd. O(n)
        xx = kernel.pair_eval(Xodd, Xeven)
        yy = kernel.pair_eval(Yodd, Yeven)
        xo_ye = kernel.pair_eval(Xodd, Yeven)
        xe_yo = kernel.pair_eval(Xeven, Yodd)
        h = xx + yy - xo_ye - xe_yo
        lin_mmd = np.mean(h)
        """
        Compute a linear-time estimate of the 2nd moment of h = E_z,z' h(z, z')^2.
        Note that MMD = E_z,z' h(z, z').
        Require O(n). Same trick as used in linear MMD to get O(n).
        """
        lin_2nd = np.mean(h**2)
        return lin_mmd, lin_2nd 
Example 37
Project: SyntheticStatistics   Author: BlissChapman   File: tst.py    MIT License 5 votes vote down vote up
def compute_mean_variance_stat(tst_data, gwidth2):
        """Compute the mean and variance of the MMD^2, and the test statistic
        :return: (mean, variance)
        """

        X, Y = tst_data.xy()
        if X.shape[0] != Y.shape[0]:
            raise ValueError('Require sample size of X = size of Y')

        ker = kernel.KGauss(gwidth2)
        K = ker.eval(X, X)
        L = ker.eval(Y, Y)
        KL = ker.eval(X, Y)

        n = X.shape[0]
        # computing meanMMD is only correct for Gaussian kernels.
        meanMMD = 2.0/n * (1.0 - 1.0/n*np.sum(np.diag(KL)))

        np.fill_diagonal(K, 0.0)
        np.fill_diagonal(L, 0.0)
        np.fill_diagonal(KL, 0.0)

        varMMD = 2.0/n/(n-1) * 1.0/n/(n-1) * np.sum((K + L - KL - KL.T)**2)
        # test statistic
        test_stat = 1.0/n * np.sum(K + L - KL - KL.T)
        return meanMMD, varMMD, test_stat 
Example 38
Project: SyntheticStatistics   Author: BlissChapman   File: tst.py    MIT License 5 votes vote down vote up
def perform_test(self, tst_data, return_simulated_stats=False):
        with util.ContextTimer() as t:
            alpha = self.alpha
            X, Y = tst_data.xy()
            n = X.shape[0]
            V = self.test_locs
            J = V.shape[0]

            # stat = n*(UME stat)
            # Z = n x J feature matrix
            stat, Z = self.compute_stat(tst_data, return_feature_matrix=True)

            # Simulate from the asymptotic null distribution
            n_simulate = self.n_simulate

            # Uncentred covariance matrix
            cov = np.dot(Z.T, Z)/float(n)

            arr_nume, eigs = UMETest.list_simulate_spectral(cov, J, n_simulate,
                    seed=self.seed)

            # approximate p-value with the permutations
            pvalue = np.mean(arr_nume > stat)

        results = {'alpha': self.alpha, 'pvalue': pvalue, 'test_stat': stat,
                'h0_rejected': pvalue < alpha, 'n_simulate': n_simulate,
                'time_secs': t.secs,
                }
        if return_simulated_stats:
            results['sim_stats'] = arr_nume
        return results 
Example 39
Project: SyntheticStatistics   Author: BlissChapman   File: tst.py    MIT License 5 votes vote down vote up
def power_criterion(tst_data, test_locs, k, reg=1e-2, use_unbiased=True):
        """
        Compute the mean and standard deviation of the statistic under H1.
        Return power criterion = mean_under_H1/sqrt(var_under_H1 + reg) .
        """
        ume = UMETest(test_locs, k)
        Z = ume.feature_matrix(tst_data)
        u_mean, u_variance = UMETest.ustat_h1_mean_variance(Z,
                return_variance=True, use_unbiased=use_unbiased)

        # mean/sd criterion
        sigma_h1 = np.sqrt(u_variance + reg)
        ratio = old_div(u_mean, sigma_h1)
        return ratio 
Example 40
Project: SyntheticStatistics   Author: BlissChapman   File: tst.py    MIT License 5 votes vote down vote up
def ustat_h1_mean_variance(feature_matrix, return_variance=True,
            use_unbiased=True):
        """
        Compute the mean and variance of the asymptotic normal distribution
        under H1 of the test statistic. The mean converges to a constant as
        n->\infty.

        feature_matrix: n x J feature matrix
        return_variance: If false, avoid computing and returning the variance.
        use_unbiased: If True, use the unbiased version of the mean. Can be
            negative.

        Return the mean [and the variance]
        """
        Z = feature_matrix
        n, J = Z.shape
        assert n > 1, 'Need n > 1 to compute the mean of the statistic.'
        if use_unbiased:
            t1 = np.sum(np.mean(Z, axis=0)**2)*(n/float(n-1))
            t2 = np.mean(np.sum(Z**2, axis=1))/float(n-1)
            mean_h1 = t1 - t2
        else:
            mean_h1 = np.sum(np.mean(Z, axis=0)**2)

        if return_variance:
            # compute the variance
            mu = np.mean(Z, axis=0) # length-J vector
            variance = 4*np.mean(np.dot(Z, mu)**2) - 4*np.sum(mu**2)**2
            return mean_h1, variance
        else:
            return mean_h1

# end of class UMETest 
Example 41
Project: SyntheticStatistics   Author: BlissChapman   File: data.py    MIT License 5 votes vote down vote up
def gen_blobs(stretch, angle, blob_distance, num_blobs, num_samples):
    """Generate 2d blobs dataset """

    # rotation matrix
    r = np.array( [[math.cos(angle), -math.sin(angle)], [math.sin(angle), math.cos(angle)]] )
    eigenvalues = np.diag(np.array([np.sqrt(stretch), 1]))
    mod_matix = np.dot(r, eigenvalues)
    mean = old_div(float(blob_distance * (num_blobs-1)), 2)
    mu = np.random.randint(0, num_blobs,(num_samples, 2))*blob_distance - mean
    return np.random.randn(num_samples,2).dot(mod_matix) + mu 
Example 42
Project: SyntheticStatistics   Author: BlissChapman   File: data.py    MIT License 5 votes vote down vote up
def __str__(self):
        mean_x = np.mean(self.X, 0)
        std_x = np.std(self.X, 0)
        mean_y = np.mean(self.Y, 0)
        std_y = np.std(self.Y, 0)
        prec = 4
        desc = ''
        desc += 'E[x] = %s \n'%(np.array_str(mean_x, precision=prec ) )
        desc += 'E[y] = %s \n'%(np.array_str(mean_y, precision=prec ) )
        desc += 'Std[x] = %s \n' %(np.array_str(std_x, precision=prec))
        desc += 'Std[y] = %s \n' %(np.array_str(std_y, precision=prec))
        return desc 
Example 43
Project: SyntheticStatistics   Author: BlissChapman   File: data.py    MIT License 5 votes vote down vote up
def toy_2d_gauss_mean_diff(n_each, shift_2d=[0, 0], seed=1):
    """Generate a 2d toy data X, Y. 2 unit-variance Gaussians with different means."""

    rand_state = np.random.get_state()
    np.random.seed(seed)
    d = 2
    mean = [0, 00]
    X = np.random.randn(n_each, d) + mean
    Y = np.random.randn(n_each, d) + mean + shift_2d
    tst_data = TSTData(X, Y, '2D-N. shift: %s'%(str(shift_2d)) )

    np.random.set_state(rand_state)
    return tst_data 
Example 44
Project: SyntheticStatistics   Author: BlissChapman   File: data.py    MIT License 5 votes vote down vote up
def toy_2d_gauss_variance_diff(n_each, std_factor=2, seed=1):
    """Generate a 2d toy data X, Y. 2 zero-mean Gaussians with different variances."""

    rand_state = np.random.get_state()
    np.random.seed(seed)
    d = 2
    X = np.random.randn(n_each, d)
    Y = np.random.randn(n_each, d)*std_factor
    tst_data = TSTData(X, Y, '2D-N. std_factor: %.3g'%(std_factor) )

    np.random.set_state(rand_state)
    return tst_data 
Example 45
Project: SyntheticStatistics   Author: BlissChapman   File: plot.py    MIT License 5 votes vote down vote up
def plot_runtime(ex, fname, func_xvalues, xlabel, func_title=None):
    results = glo.ex_load_result(ex, fname)
    value_accessor = lambda job_results: job_results['time_secs']
    vf_pval = np.vectorize(value_accessor)
    # results['test_results'] is a dictionary: 
    # {'test_result': (dict from running perform_test(te) '...':..., }
    times = vf_pval(results['test_results'])
    repeats, _, n_methods = results['test_results'].shape
    time_avg = np.mean(times, axis=0)
    time_std = np.std(times, axis=0)

    xvalues = func_xvalues(results)

    #ns = np.array(results[xkey])
    #te_proportion = 1.0 - results['tr_proportion']
    #test_sizes = ns*te_proportion
    line_styles = exglo.func_plot_fmt_map()
    method_labels = exglo.get_func2label_map()
    
    func_names = [f.__name__ for f in results['method_job_funcs'] ]
    for i in range(n_methods):    
        te_proportion = 1.0 - results['tr_proportion']
        fmt = line_styles[func_names[i]]
        #plt.errorbar(ns*te_proportion, mean_rejs[:, i], std_pvals[:, i])
        method_label = method_labels[func_names[i]]
        plt.errorbar(xvalues, time_avg[:, i], yerr=time_std[:,i], fmt=fmt,
                label=method_label)
            
    ylabel = 'Time (s)'
    plt.ylabel(ylabel)
    plt.xlabel(xlabel)
    plt.gca().set_yscale('log')
    plt.xlim([np.min(xvalues), np.max(xvalues)])
    plt.xticks( xvalues, xvalues)
    plt.legend(loc='best')
    title = '%s. %d trials. '%( results['prob_label'],
            repeats ) if func_title is None else func_title(results)
    plt.title(title)
    #plt.grid()
    return results 
Example 46
Project: ISM_supervised_DR   Author: chieh-neu   File: kernel_lib.py    MIT License 5 votes vote down vote up
def center_scale_entire_kernel(K):
	return (K - np.mean(K))/np.std(K) 
Example 47
Project: networked-popularity   Author: avalanchesiqi   File: predictors.py    MIT License 5 votes vote down vote up
def arnet_cost_function(params, model_input, model_output):
    yhat = arnet_predict(params, model_input)[0]
    # minimize SMAPE
    return np.mean(200 * np.nan_to_num(np.abs(model_output - yhat) / (np.abs(model_output) + np.abs(yhat)))) 
Example 48
Project: networked-popularity   Author: avalanchesiqi   File: predictors.py    MIT License 5 votes vote down vote up
def train_arnet(self, start_params):
        arnet_autograd_func = grad(arnet_cost_function)
        arnet_bounds = [(0, 1)] * len(start_params) + [(0, 1)] * self.num_src

        iter_cnt = 0
        pred_train_output_mat = np.empty((0, len(self.true_train_output)), np.float)
        pred_test_output_mat = np.empty((0, self.num_output), np.float)
        link_weights_list = np.empty((0, self.num_src), np.float)
        network_ratio_list = []
        while iter_cnt < self.num_ensemble:
            arnet_init_values = np.array(start_params + [np.random.random()] * self.num_src)
            arnet_optimizer = optimize.minimize(arnet_cost_function, arnet_init_values, jac=arnet_autograd_func,
                                                method='L-BFGS-B',
                                                args=(self.train_input, self.true_train_output),
                                                bounds=arnet_bounds,
                                                options={'maxiter': 100, 'disp': False})
            arnet_fitted_params = arnet_optimizer.x
            # arnet_ar_coef = arnet_fitted_params[: self.num_input]
            arnet_link_weights = arnet_fitted_params[self.num_input:]

            arnet_pred_train, arnet_latent_train = arnet_predict(arnet_fitted_params, self.train_input, mode='train')
            arnet_pred_test, arnet_latent_test = arnet_predict(arnet_fitted_params, self.test_input, mode='test')

            pred_train_output_mat = np.vstack((pred_train_output_mat, arnet_pred_train))
            pred_test_output_mat = np.vstack((pred_test_output_mat, arnet_pred_test))
            link_weights_list = np.vstack((link_weights_list, arnet_link_weights))
            network_ratio_list.append(1 - sum(arnet_latent_train) / sum(arnet_pred_train))
            iter_cnt += 1

        self.pred_train_output = np.nanmean(pred_train_output_mat, axis=0)
        self.pred_test_output = np.nanmean(pred_test_output_mat, axis=0)
        self.link_weights = np.nanmean(link_weights_list, axis=0)
        self.network_ratio = np.mean(network_ratio_list) 
Example 49
Project: networked-popularity   Author: avalanchesiqi   File: predictors.py    MIT License 5 votes vote down vote up
def smape_loss(y_true, y_pred):
    return K.mean(200 * K.abs(y_pred - y_true) / (K.abs(y_pred) + K.abs(y_true))) 
Example 50
Project: Elephant   Author: 983   File: approximate_house.py    The Unlicense 5 votes vote down vote up
def loss(parameters):
    difference = house - make_curve(parameters)
    return np.mean(np.square(difference))