Python scipy.special.digamma() Examples

The following are 30 code examples of scipy.special.digamma(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module scipy.special , or try the search function .
Example #1
Source File: entropy_estimators.py    From cgpm with Apache License 2.0 8 votes vote down vote up
def cmi(x, y, z, k=3, base=2):
  """Mutual information of x and y, conditioned on z
  x,y,z should be a list of vectors, e.g. x = [[1.3], [3.7], [5.1], [2.4]]
  if x is a one-dimensional scalar and we have four samples
  """
  assert len(x)==len(y), 'Lists should have same length.'
  assert k <= len(x) - 1, 'Set k smaller than num samples - 1.'
  intens = 1e-10 # Small noise to break degeneracy, see doc.
  x = [list(p + intens*nr.rand(len(x[0]))) for p in x]
  y = [list(p + intens*nr.rand(len(y[0]))) for p in y]
  z = [list(p + intens*nr.rand(len(z[0]))) for p in z]
  points = zip2(x,y,z)
  # Find nearest neighbors in joint space, p=inf means max-norm.
  tree = ss.cKDTree(points)
  dvec = [tree.query(point, k+1, p=float('inf'))[0][k] for point in points]
  a = avgdigamma(zip2(x,z), dvec)
  b = avgdigamma(zip2(y,z), dvec)
  c = avgdigamma(z,dvec)
  d = digamma(k)
  return (-a-b+c+d) / log(base) 
Example #2
Source File: test_mpmath.py    From GraphicDesignPatternByPython with MIT License 7 votes vote down vote up
def test_rgamma_zeros():
    # Test around the zeros at z = 0, -1, -2, ...,  -169. (After -169 we
    # get values that are out of floating point range even when we're
    # within 0.1 of the zero.)

    # Can't use too many points here or the test takes forever.
    dx = np.r_[-np.logspace(-1, -13, 3), 0, np.logspace(-13, -1, 3)]
    dy = dx.copy()
    dx, dy = np.meshgrid(dx, dy)
    dz = dx + 1j*dy
    zeros = np.arange(0, -170, -1).reshape(1, 1, -1)
    z = (zeros + np.dstack((dz,)*zeros.size)).flatten()
    dataset = []
    with mpmath.workdps(100):
        for z0 in z:
            dataset.append((z0, complex(mpmath.rgamma(z0))))

    dataset = np.array(dataset)
    FuncData(sc.rgamma, dataset, 0, 1, rtol=1e-12).check()


# ------------------------------------------------------------------------------
# digamma
# ------------------------------------------------------------------------------ 
Example #3
Source File: entropy_estimators.py    From cgpm with Apache License 2.0 7 votes vote down vote up
def mi(x, y, k=3, base=2):
  """Mutual information of x and y.
  x,y should be a list of vectors, e.g. x = [[1.3], [3.7], [5.1], [2.4]]
  if x is a one-dimensional scalar and we have four samples.
  """
  assert len(x)==len(y), 'Lists should have same length.'
  assert k <= len(x) - 1, 'Set k smaller than num samples - 1.'
  intens = 1e-10 # Small noise to break degeneracy, see doc.
  x = [list(p + intens*nr.rand(len(x[0]))) for p in x]
  y = [list(p + intens*nr.rand(len(y[0]))) for p in y]
  points = zip2(x,y)
  # Find nearest neighbors in joint space, p=inf means max-norm.
  tree = ss.cKDTree(points)
  dvec = [tree.query(point, k+1, p=float('inf'))[0][k] for point in points]
  a = avgdigamma(x,dvec)
  b = avgdigamma(y,dvec)
  c = digamma(k)
  d = digamma(len(x))
  return (-a-b+c+d) / log(base) 
Example #4
Source File: test_mpmath.py    From GraphicDesignPatternByPython with MIT License 7 votes vote down vote up
def test_digamma_roots():
    # Test the special-cased roots for digamma.
    root = mpmath.findroot(mpmath.digamma, 1.5)
    roots = [float(root)]
    root = mpmath.findroot(mpmath.digamma, -0.5)
    roots.append(float(root))
    roots = np.array(roots)

    # If we test beyond a radius of 0.24 mpmath will take forever.
    dx = np.r_[-0.24, -np.logspace(-1, -15, 10), 0, np.logspace(-15, -1, 10), 0.24]
    dy = dx.copy()
    dx, dy = np.meshgrid(dx, dy)
    dz = dx + 1j*dy
    z = (roots + np.dstack((dz,)*roots.size)).flatten()
    dataset = []
    with mpmath.workdps(30):
        for z0 in z:
            dataset.append((z0, complex(mpmath.digamma(z0))))

    dataset = np.array(dataset)
    FuncData(sc.digamma, dataset, 0, 1, rtol=1e-14).check() 
Example #5
Source File: test_mpmath.py    From GraphicDesignPatternByPython with MIT License 7 votes vote down vote up
def test_digamma_negreal():
    # Test digamma around the negative real axis. Don't do this in
    # TestSystematic because the points need some jiggering so that
    # mpmath doesn't take forever.

    digamma = exception_to_nan(mpmath.digamma)

    x = -np.logspace(300, -30, 100)
    y = np.r_[-np.logspace(0, -3, 5), 0, np.logspace(-3, 0, 5)]
    x, y = np.meshgrid(x, y)
    z = (x + 1j*y).flatten()

    dataset = []
    with mpmath.workdps(40):
        for z0 in z:
            res = digamma(z0)
            dataset.append((z0, complex(res)))
    dataset = np.asarray(dataset)

    FuncData(sc.digamma, dataset, 0, 1, rtol=1e-13).check() 
Example #6
Source File: test_mpmath.py    From GraphicDesignPatternByPython with MIT License 7 votes vote down vote up
def test_digamma_boundary():
    # Check that there isn't a jump in accuracy when we switch from
    # using the asymptotic series to the reflection formula.

    x = -np.logspace(300, -30, 100)
    y = np.array([-6.1, -5.9, 5.9, 6.1])
    x, y = np.meshgrid(x, y)
    z = (x + 1j*y).flatten()

    dataset = []
    with mpmath.workdps(30):
        for z0 in z:
            res = mpmath.digamma(z0)
            dataset.append((z0, complex(res)))
    dataset = np.asarray(dataset)

    FuncData(sc.digamma, dataset, 0, 1, rtol=1e-13).check()


# ------------------------------------------------------------------------------
# gammainc
# ------------------------------------------------------------------------------ 
Example #7
Source File: negative_binomial.py    From DiscoSnp with GNU Affero General Public License v3.0 6 votes vote down vote up
def r_derv(r_var, vec):
    ''' Function that represents the derivative of the neg bin likelihood wrt r
    @param r: The value of r in the derivative of the likelihood wrt r
    @param vec: The data vector used in the likelihood
    '''
    if not r_var or not vec:
        raise ValueError("r parameter and data must be specified")

    if r_var <= 0:
        raise ValueError("r must be strictly greater than 0")

    total_sum = 0
    obs_mean = np.mean(vec)  # Save the mean of the data
    n_pop = float(len(vec))  # Save the length of the vector, n_pop

    for obs in vec:
        total_sum += digamma(obs + r_var)

    total_sum -= n_pop*digamma(r_var)
    total_sum += n_pop*math.log(r_var / (r_var + obs_mean))

    return total_sum 
Example #8
Source File: TestMathForHDPMerges.py    From refinery with MIT License 6 votes vote down vote up
def calc_mergeLP(self, LP, kA, kB):
    ''' Calculate and return the new local parameters for a merged configuration
          that combines topics kA and kB

        Returns
        ---------
        LP : dict of local params, with updated fields and only K-1 comps
    '''
    K = LP['DocTopicCount'].shape[1]
    assert kA < kB
    LP = copy.deepcopy(LP)
    for key in ['DocTopicCount', 'word_variational', 'alphaPi']:
      LP[key][:,kA] = LP[key][:,kA] + LP[key][:,kB]
      LP[key] = np.delete(LP[key], kB, axis=1)
    LP['E_logPi'] = digamma(LP['alphaPi']) \
                    - digamma(LP['alphaPi'].sum(axis=1))[:,np.newaxis]
    assert LP['word_variational'].shape[1] == K-1
    return LP


  ######################################################### Verify Z terms
  ######################################################### 
Example #9
Source File: entropy_estimators.py    From scikit-feature with GNU General Public License v2.0 6 votes vote down vote up
def mi(x, y, k=3, base=2):
    """
    Mutual information of x and y; x, y should be a list of vectors, e.g. x = [[1.3],[3.7],[5.1],[2.4]]
    if x is a one-dimensional scalar and we have four samples
    """

    assert len(x) == len(y), "Lists should have same length"
    assert k <= len(x) - 1, "Set k smaller than num. samples - 1"
    intens = 1e-10  # small noise to break degeneracy, see doc.
    x = [list(p + intens * nr.rand(len(x[0]))) for p in x]
    y = [list(p + intens * nr.rand(len(y[0]))) for p in y]
    points = zip2(x, y)
    # Find nearest neighbors in joint space, p=inf means max-norm
    tree = ss.cKDTree(points)
    dvec = [tree.query(point, k+1, p=float('inf'))[0][k] for point in points]
    a, b, c, d = avgdigamma(x, dvec), avgdigamma(y, dvec), digamma(k), digamma(len(x))
    return (-a-b+c+d)/log(base) 
Example #10
Source File: metrics.py    From reportgen with MIT License 6 votes vote down vote up
def cond_mutual_info(x, y, z, k=3, base=2):
        """ Mutual information of x and y, conditioned on z
            x, y, z should be a list of vectors, e.g. x = [[1.3], [3.7], [5.1], [2.4]]
            if x is a one-dimensional scalar and we have four samples
        """
        x=entropyc.__reshape(x)
        y=entropyc.__reshape(y)
        z=entropyc.__reshape(z)
        assert len(x) == len(y), "Lists should have same length"
        assert k <= len(x) - 1, "Set k smaller than num. samples - 1"
        intens = 1e-10  # small noise to break degeneracy, see doc.
        x = [list(p + intens * nr.rand(len(x[0]))) for p in x]
        y = [list(p + intens * nr.rand(len(y[0]))) for p in y]
        z = [list(p + intens * nr.rand(len(z[0]))) for p in z]
        points = zip2(x, y, z)
        # Find nearest neighbors in joint space, p=inf means max-norm
        tree = ss.cKDTree(points)
        dvec = [tree.query(point, k + 1, p=float('inf'))[0][k] for point in points]
        a, b, c, d = avgdigamma(zip2(x, z), dvec), avgdigamma(zip2(y, z), dvec), avgdigamma(z, dvec), digamma(k)
        return (-a - b + c + d) / log(base) 
Example #11
Source File: metrics.py    From reportgen with MIT License 6 votes vote down vote up
def mutual_info(x, y, k=3, base=2):
        """ Mutual information of x and y
            x, y should be a list of vectors, e.g. x = [[1.3], [3.7], [5.1], [2.4]]
            if x is a one-dimensional scalar and we have four samples
        """
        x=entropyc.__reshape(x)
        y=entropyc.__reshape(y)
        assert len(x) == len(y), "Lists should have same length"
        assert k <= len(x) - 1, "Set k smaller than num. samples - 1"
        intens = 1e-10  # small noise to break degeneracy, see doc.
        x = [list(p + intens * nr.rand(len(x[0]))) for p in x]
        y = [list(p + intens * nr.rand(len(y[0]))) for p in y]
        points = zip2(x, y)
        # Find nearest neighbors in joint space, p=inf means max-norm
        tree = ss.cKDTree(points)
        dvec = [tree.query(point, k + 1, p=float('inf'))[0][k] for point in points]
        a, b, c, d = avgdigamma(x, dvec), avgdigamma(y, dvec), digamma(k), digamma(len(x))
        return (-a - b + c + d) / log(base) 
Example #12
Source File: utils.py    From mdentropy with MIT License 6 votes vote down vote up
def avgdigamma(data, dvec, leaf_size=16):
    """Convenience function for finding expectation value of <psi(nx)> given
    some number of neighbors in some radius in a marginal space.

    Parameters
    ----------
    points : numpy.ndarray
    dvec : array_like (n_points,)
    Returns
    -------
    avgdigamma : float
        expectation value of <psi(nx)>
    """
    tree = BallTree(data, leaf_size=leaf_size, p=float('inf'))

    n_points = tree.query_radius(data, dvec - EPS, count_only=True)

    return digamma(n_points).mean() 
Example #13
Source File: entropy_estimators.py    From scikit-feature with GNU General Public License v2.0 6 votes vote down vote up
def cmi(x, y, z, k=3, base=2):
    """
    Mutual information of x and y, conditioned on z; x, y, z should be a list of vectors, e.g. x = [[1.3],[3.7],[5.1],[2.4]]
    if x is a one-dimensional scalar and we have four samples
    """

    assert len(x) == len(y), "Lists should have same length"
    assert k <= len(x) - 1, "Set k smaller than num. samples - 1"
    intens = 1e-10  # small noise to break degeneracy, see doc.
    x = [list(p + intens * nr.rand(len(x[0]))) for p in x]
    y = [list(p + intens * nr.rand(len(y[0]))) for p in y]
    z = [list(p + intens * nr.rand(len(z[0]))) for p in z]
    points = zip2(x, y, z)
    # Find nearest neighbors in joint space, p=inf means max-norm
    tree = ss.cKDTree(points)
    dvec = [tree.query(point, k+1, p=float('inf'))[0][k] for point in points]
    a, b, c, d = avgdigamma(zip2(x, z), dvec), avgdigamma(zip2(y, z), dvec), avgdigamma(z, dvec), digamma(k)
    return (-a-b+c+d)/log(base) 
Example #14
Source File: BernRelObsModel.py    From refinery with MIT License 5 votes vote down vote up
def ElogphiOLD(self):
    return digamma(self.Lam) - digamma(self.LamSum)[:,np.newaxis]

  ######################################################### Local Params
  #########################################################   E-step 
Example #15
Source File: mfm.py    From yass with Apache License 2.0 5 votes vote down vote up
def mult_psi(x, d):
    """
        Calculates the multivariate digamma function. Returns N x 1 array of
        the multivariaate digamma values

        parameters:
        -----------
        x: np.array
            M x 1 array containing the
    """
    v = x - np.asarray([range(d)]) / 2.0
    u = np.sum(specsci.digamma(v), axis=1)
    return u[:, np.newaxis] 
Example #16
Source File: bayesian_mixture.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def _estimate_log_prob(self, X):
        _, n_features = X.shape
        # We remove `n_features * np.log(self.degrees_of_freedom_)` because
        # the precision matrix is normalized
        log_gauss = (_estimate_log_gaussian_prob(
            X, self.means_, self.precisions_cholesky_, self.covariance_type) -
            .5 * n_features * np.log(self.degrees_of_freedom_))

        log_lambda = n_features * np.log(2.) + np.sum(digamma(
            .5 * (self.degrees_of_freedom_ -
                  np.arange(0, n_features)[:, np.newaxis])), 0)

        return log_gauss + .5 * (log_lambda -
                                 n_features / self.mean_precision_) 
Example #17
Source File: dmr.py    From dmr with MIT License 5 votes vote down vote up
def _dll(self, x):
        alpha = self.get_alpha(x)
        result = np.sum(self.vecs[:,np.newaxis,:] * alpha[:,:,np.newaxis]\
            * (special.digamma(np.sum(alpha, axis=1))[:,np.newaxis,np.newaxis]\
            - special.digamma(np.sum(self.n_m_z+alpha, axis=1))[:,np.newaxis,np.newaxis]\
            + special.digamma(self.n_m_z+alpha)[:,:,np.newaxis]\
            - special.digamma(alpha)[:,:,np.newaxis]), axis=0)\
            - x / (self.sigma ** 2)
        result = -result
        return result 
Example #18
Source File: utilities.py    From Quantum_Edward with MIT License 5 votes vote down vote up
def grad_log_beta_prob(x, conc0, conc1):
    """
    This function takes in a probability 'x' and returns the GRADIENT of the
    log probability of 'x', according to the Beta distribution with
    concentrations 'conc0' and 'conc1'. The Wikipedia article cited below
    refers to conc0 as alpha and to conc1 as beta.

    x, conc0, conc1, g0, g1 are all floats, or they are all numpy arrays of
    the same shape. This method works elementwise for arrays.

    References
    ----------
    https://en.wikipedia.org/wiki/Beta_distribution


    Parameters
    ----------
    x : float | np.array
        x in interval [0, 1]
    conc0 : float | np.array
        Concentration 0, must be >= 0
    conc1 : float | np.array
        Concentration 1, must be >= 0

    Returns
    -------
    [g0, g1]: list[float | np.array, float | np.array]
        g0 and g1 are tha gradients with respect to the concentrations conc0
        and conc1.

    """
    def fun(z):
        a = np.clip(np.real(z), 1e-5, 15)
        b = np.clip(np.imag(z), 1e-5, 15)
        return sp.digamma(a+b) - sp.digamma(a)
    vfun = np.vectorize(fun)
    x = np.clip(x, .00001, .99999)
    g0 = np.log(x) + vfun(conc0 + 1j*conc1)
    g1 = np.log(1-x) + vfun(conc1 + 1j*conc0)
    return [g0, g1] 
Example #19
Source File: scHPF_.py    From scHPF with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def entropy(self):
        """Entropy of variational Gammas"""
        return  self.vi_shape - np.log(self.vi_rate) \
                + gammaln(self.vi_shape) \
                + (1 - self.vi_shape) * digamma(self.vi_shape) 
Example #20
Source File: scHPF_.py    From scHPF with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def e_logx(self):
        """Expectation of the log of random variable given variational
        distribution(s)"""
        return digamma(self.vi_shape) - np.log(self.vi_rate) 
Example #21
Source File: mdmr.py    From dmr with MIT License 5 votes vote down vote up
def _dll(self, x):
        alpha = self.get_alpha(x)
        return -(np.sum(self._dll_common(x)\
            * (special.digamma(np.sum(alpha, axis=1))[:,np.newaxis,np.newaxis]\
            - special.digamma(np.sum(self.n_m_z+alpha, axis=1))[:,np.newaxis,np.newaxis]\
            + special.digamma(self.n_m_z+alpha)[:,:,np.newaxis]\
            - special.digamma(alpha)[:,:,np.newaxis]), axis=0)\
            - x / (self.sigma ** 2)) 
Example #22
Source File: OptimizerForHDPPE.py    From refinery with MIT License 5 votes vote down vote up
def objGrad_v(v, sumLogPi, nDoc, gamma, alpha0):
  ''' Returns K-vector gradient of the constrained objective
      Args
      -------
      v := K-vector of real numbers, subject to 0 < v < 1

      Returns
      -------
      g := K-vector of real numbers, 
            g[k] = gradient of -1 * L(v) with respect to v[k]
  '''
  K = v.size
  beta = v2beta(v)
  dv_logpV = (1 - alpha0) / (1-v)

  dv_logpPi_const = np.zeros(K)
  psibeta = digamma(gamma*beta) * beta
  for k in xrange(K):
    Sk = -1.0*psibeta[k]/v[k] + np.sum( psibeta[k+1:]/(1-v[k]) )
    dv_logpPi_const[k] = nDoc * gamma * Sk

  dv_logpPi = np.zeros(K)
  sbeta = sumLogPi * beta
  for k in xrange(K):
    Sk = sbeta[k]/v[k] - np.sum( sbeta[k+1:]/(1-v[k]) )
    dv_logpPi[k] = gamma * Sk

  return -1.0* ( dv_logpV + dv_logpPi_const + dv_logpPi ) 
Example #23
Source File: FromScratchMult.py    From refinery with MIT License 5 votes vote down vote up
def getLPfromResp(Resp, Data, smoothMass=0.01):
  ''' Returns local parameters (LP) for an HDP model
        given word-level responsibility matrix and Data (which indicates num docs)
      Returns
      --------
      LP : dict with fields 
              word_variational
              alphaPi
              DocTopicCount
              E_logPi
  '''
  D = Data.nDoc
  K = Resp.shape[1]
  DocTopicC = np.zeros((D, K))
  for dd in range(D):
    start,stop = Data.doc_range[dd,:]
    DocTopicC[dd,:] = np.dot(Data.word_count[start:stop],        
                               Resp[start:stop,:]
                             )
  # Alpha and ElogPi : D x K+1 matrices
  padCol = smoothMass * np.ones((D,1))
  alph = np.hstack( [DocTopicC + smoothMass, padCol])    
  ElogPi = digamma(alph) - digamma(alph.sum(axis=1))[:,np.newaxis]
  assert ElogPi.shape == (D,K+1)
  return dict(word_variational =Resp, 
              E_logPi=ElogPi, alphaPi=alph,
              DocTopicCount=DocTopicC) 
Example #24
Source File: BagOfWordsObsModel.py    From refinery with MIT License 5 votes vote down vote up
def ElogphiOLD(self):
    return digamma(self.Lam) - digamma(self.LamSum)[:,np.newaxis]

  ######################################################### Local Params
  #########################################################   E-step 
Example #25
Source File: BernRelObsModel.py    From refinery with MIT License 5 votes vote down vote up
def Elogphi(self):
    digLam = np.empty(self.Lam.shape)
    digamma(self.Lam, out=digLam)
    digLam -= digamma(self.LamSum)[:,np.newaxis]
    return digLam 
Example #26
Source File: HDPBetaOptimizer.py    From refinery with MIT License 5 votes vote down vote up
def objectiveGradient(Cvec, alpha, gamma, nDoc, sumLogPi):
  ''' Calculate gradient of objectiveFunc, objective for HDP variational 
      Returns
      -------
        gvec : 2*K length vector,
              where each entry gives partial derivative with respect to
                  the corresponding entry of Cvec
  '''
  # UNPACK unconstrained input Cvec into intended params U

  Uvec = np.exp(Cvec)
  assert np.all(Uvec > 0)
  assert np.all(Uvec < 1)

  beta = v2beta(Uvec)
  dBdv = d_beta(Uvec, beta)

  gvecU = -1 * (gamma - 1.0) / (1.0 - Uvec)
  gvecU -= nDoc * alpha * np.dot(dBdv, digamma(alpha*beta))
  gvecU += alpha * np.dot(dBdv, sumLogPi)
  gvecU = -1 * gvecU

  # Apply chain rule!
  assert np.all(np.logical_not(np.isnan(gvecU)))
  gvecC = gvecU * Uvec

  return -1.0*gvecC 
Example #27
Source File: metrics.py    From reportgen with MIT License 5 votes vote down vote up
def entropy(x, k=3, base=2):
        """
        The classic K-L k-nearest neighbor continuous entropy estimator

        if x is a one-dimensional scalar and we have:
        H(X)=-\sum p_i log2(p_i)
        if we only have random sample (x1 . . . xN) of N realizations of X,
        we can estimator H(X):

        H(X) = −ψ(k) + ψ(N) + \log c_d + d/N \sum_{i=1}^{N} \log eps(i)

        where ψ(x) is digammer funciton,d is the dimention of x,
         c_d is the volume of the d-dimensional unit ball
        eps(i) is twice the distance from xi to its k-th neighbour

        parameter
        ---------
        x: 某个分布的抽样,且支持多维。
        k: k近邻的
        base:2

        return
        -------
        entropy
        """
        x=entropyc.__reshape(x)
        assert k <= len(x) - 1, "Set k smaller than num. samples - 1"
        d = len(x[0])
        N = len(x)
        intens = 1e-10  # small noise to break degeneracy, see doc.
        x = [list(p + intens * nr.rand(len(x[0]))) for p in x]
        tree = ss.cKDTree(x)
        nn = [tree.query(point, k + 1, p=float('inf'))[0][k] for point in x]
        const = digamma(N) - digamma(k) + d * log(base)
        return (const + d * np.mean(list(map(log, nn)))) / log(base) 
Example #28
Source File: TestHDPBetaOptimizer.py    From refinery with MIT License 5 votes vote down vote up
def calcExpPI(self, Pi):
    ELogPi = np.zeros((self.G, self.K+1))
    for i in xrange(self.G):
       ELogPi[i] = digamma(Pi[i,:]) - digamma(np.sum(Pi[i,:]))
    return ELogPi 
Example #29
Source File: AbstractBaseTestForHDP.py    From refinery with MIT License 5 votes vote down vote up
def getLPfromResp(self, Resp, smoothMass=0.001):
    ''' Create full local parameter (LP) dictionary for HDPModel,
          given responsibility matrix Resp

        Returns
        --------
        LP : dict with fields word_variational, alphaPi, E_logPi, DocTopicCount
    '''
    Data = self.Data
    D = Data.nDoc
    K = Resp.shape[1]
    # DocTopicCount matrix : D x K matrix
    DocTopicC = np.zeros((D, K))
    for dd in range(D):
      start,stop = Data.doc_range[dd,:]
      DocTopicC[dd,:] = np.dot(Data.word_count[start:stop],        
                               Resp[start:stop,:]
                               )
    assert np.allclose(DocTopicC.sum(), Data.word_count.sum())
    # Alpha and ElogPi : D x K+1 matrices
    padCol = smoothMass * np.ones((D,1))
    alph = np.hstack( [DocTopicC + smoothMass, padCol])    
    ElogPi = digamma(alph) - digamma(alph.sum(axis=1))[:,np.newaxis]
    assert ElogPi.shape == (D,K+1)
    return dict(word_variational =Resp, 
              E_logPi=ElogPi, alphaPi=alph,
              DocTopicCount=DocTopicC) 
Example #30
Source File: metrics.py    From reportgen with MIT License 5 votes vote down vote up
def avgdigamma(points, dvec):
    # This part finds number of neighbors in some radius in the marginal space
    # returns expectation value of <psi(nx)>
    N = len(points)
    tree = ss.cKDTree(points)
    avg = 0.
    for i in range(N):
        dist = dvec[i]
        # subtlety, we don't include the boundary point,
        # but we are implicitly adding 1 to kraskov def bc center point is included
        num_points = len(tree.query_ball_point(points[i], dist - 1e-15, p=float('inf')))
        avg += digamma(num_points) / N
    return avg