Python scipy.stats.entropy() Examples

The following are code examples for showing how to use scipy.stats.entropy(). They are from open source Python projects. You can vote up the examples you like or vote down the ones you don't like.

Example 1
Project: PyCausality   Author: ZacKeskin   File: unit_tests.py    GNU General Public License v3.0 7 votes vote down vote up
def test_joint_entropyND():
    """
        Test that our implemented function to return the entropy corresponds 
        to Scipy's entropy method in multiple dimensions.
    """
    gridpoints = 10 # for KDE estimation

    ## Test 2D joint entropy:
    X = skewnorm.rvs(size=1000, a=-3, loc=0, scale=2)
    Y = skewnorm.rvs(size=1000, a=-3, loc=0, scale=2) 
    
    data = pd.DataFrame({'X':X, 'Y':Y})
    
    ## So this is valid if test_get_pdf passes   
    pdf = get_pdf(data,gridpoints=gridpoints) 

    ## The estimated entropy should correspond to scipy's value (to 5 d.p.) 
    assert_almost_equal(get_entropy(data,gridpoints=gridpoints), entropy(pdf.flatten(),base=2), 5) 
Example 2
Project: stableGAN   Author: shahsohil   File: mogdata.py    MIT License 6 votes vote down vote up
def loglikelihood(data, num_mode=100, radius=24, center=(0, 0)):
    t = np.linspace(0, 2*np.pi, num_mode+1)
    t = t[:-1]
    x = np.cos(t)*radius + center[0]
    y = np.sin(t)*radius + center[1]

    modes = np.vstack([x, y]).T
    q = np.ones(num_mode) / num_mode

    mat = distance.cdist(data, modes)
    prob = np.bincount(np.argmin(mat, axis=1), minlength=num_mode) / len(data)

    # find the entropy
    try:
        toReturn =  entropy(q,prob,base=2)
    except:
        print('Got some Error, return toReturn=-0.1')
        toReturn = -0.1
    return toReturn 
Example 3
Project: LaserTOF   Author: kyleuckert   File: test_distributions.py    MIT License 6 votes vote down vote up
def test_entropy(self):
        # Basic tests of entropy.
        pvals = np.array([0.25, 0.45, 0.3])
        p = stats.rv_discrete(values=([0, 1, 2], pvals))
        expected_h = -sum(xlogy(pvals, pvals))
        h = p.entropy()
        assert_allclose(h, expected_h)

        p = stats.rv_discrete(values=([0, 1, 2], [1.0, 0, 0]))
        h = p.entropy()
        assert_equal(h, 0.0) 
Example 4
Project: LaserTOF   Author: kyleuckert   File: test_distributions.py    MIT License 6 votes vote down vote up
def test_entropy_zero(self):
        # Test for PR-479
        assert_almost_equal(stats.entropy([0, 1, 2]), 0.63651416829481278,
                            decimal=12) 
Example 5
Project: LaserTOF   Author: kyleuckert   File: test_distributions.py    MIT License 6 votes vote down vote up
def test_genextreme_entropy():
    # regression test for gh-5181
    euler_gamma = 0.5772156649015329

    h = stats.genextreme.entropy(-1.0)
    assert_allclose(h, 2*euler_gamma + 1, rtol=1e-14)

    h = stats.genextreme.entropy(0)
    assert_allclose(h, euler_gamma + 1, rtol=1e-14)

    h = stats.genextreme.entropy(1.0)
    assert_equal(h, 1)

    h = stats.genextreme.entropy(-2.0, scale=10)
    assert_allclose(h, euler_gamma*3 + np.log(10) + 1, rtol=1e-14)

    h = stats.genextreme.entropy(10)
    assert_allclose(h, -9*euler_gamma + 1, rtol=1e-14)

    h = stats.genextreme.entropy(-10)
    assert_allclose(h, 11*euler_gamma + 1, rtol=1e-14) 
Example 6
Project: PyCausality   Author: ZacKeskin   File: unit_tests.py    GNU General Public License v3.0 6 votes vote down vote up
def test_joint_entropy():
    """
        Test that our implemented function to return the entropy corresponds 
        to Scipy's entropy method
    """
    gridpoints = 10 # for KDE estimation

    S = np.random.normal(0,0.1,10000)

    data = pd.DataFrame(S)

    ## pdf required for scipy.stats.entropy. Uses KDE to match with scipy. 
    pdf = get_pdf(data,gridpoints=gridpoints) # Valid providing test_get_pdf() passes   

    ## The estimated entropy should correspond to scipy's value (to 5 d.p.) 
    assert_almost_equal(get_entropy(data,gridpoints=gridpoints), entropy(pdf,base=2), 5) 
Example 7
Project: trVAE   Author: theislab   File: metrics.py    MIT License 6 votes vote down vote up
def entropy_batch_mixing(adata, label_key='batch',
                         n_neighbors=50, n_pools=50, n_samples_per_pool=100, subsample_frac=1.0):
    adata = remove_sparsity(adata)

    n_samples = adata.shape[0]
    keep_idx = np.random.choice(np.arange(n_samples), size=min(n_samples, int(subsample_frac * n_samples)),
                                replace=False)
    adata = adata[keep_idx, :]

    neighbors = NearestNeighbors(n_neighbors=n_neighbors + 1).fit(adata.X)
    indices = neighbors.kneighbors(adata.X, return_distance=False)[:, 1:]
    batch_indices = np.vectorize(lambda i: adata.obs[label_key].values[i])(indices)

    entropies = np.apply_along_axis(__entropy_from_indices, axis=1, arr=batch_indices)

    # average n_pools entropy results where each result is an average of n_samples_per_pool random samples.
    if n_pools == 1:
        score = np.mean(entropies)
    else:
        score = np.mean([
            np.mean(entropies[np.random.choice(len(entropies), size=n_samples_per_pool)])
            for _ in range(n_pools)
        ])

    return score 
Example 8
Project: csslab   Author: Sci-Robot   File: entropy.py    MIT License 6 votes vote down vote up
def entropy(self):
        ''' 使用 scipy中熵的计算公式'''

        if(not self._is_data_processed):
            self.process_data()

        # 采用universal function的方式计算,注意看sci的源码,需要转置
        for ky in self.e_datas.keys():
            #  ret_ent = []
            #  for i in self.e_datas[ky].index:
                #  one_line_data = self.e_datas[ky].ix[i,self.class_columns].values.tolist()[:]
                #  ret_ent.append(_sci_entropy(one_line_data))

            self.e_datas[ky]["Ent"] = _sci_entropy(self.e_datas[ky][self.class_columns].T)

        self._calculated["Ent"] = True
            #  print("max and min: ",self.e_datas[ky]["RawEnt"].max(),self.e_datas[ky]["RawEnt"].min()) 
Example 9
Project: abstention   Author: kundajelab   File: abstention.py    MIT License 6 votes vote down vote up
def __call__(self, valid_labels=None, valid_posterior=None,
                       valid_uncert=None, train_embeddings=None,
                       train_labels=None):
        #softmax_valid_labels =\
        #    map_to_softmax_format_if_appropriate(values=valid_labels)
        #mean_class_freqs = np.mean(softmax_valid_labels, axis=0)
        def abstaining_func(posterior_probs,
                            uncertainties=None,
                            embeddings=None):
            softmax_posterior_probs =\
                map_to_softmax_format_if_appropriate(values=posterior_probs) 
            mean_class_freqs = np.mean(softmax_posterior_probs, axis=0)
            assert len(softmax_posterior_probs.shape)==2
            M = 0.5*(mean_class_freqs[None,:] + softmax_posterior_probs)  
            jsd = (np.array([(0.5*entropy(pk=pk, qk=m)
                              + 0.5*entropy(pk=mean_class_freqs, qk=m))
                             for (m,pk) in zip(M, softmax_posterior_probs)]))
            return 1-jsd
        return abstaining_func 
Example 10
Project: ble5-nrf52-mac   Author: tomasero   File: test_distributions.py    MIT License 6 votes vote down vote up
def test_genextreme_entropy():
    # regression test for gh-5181
    euler_gamma = 0.5772156649015329

    h = stats.genextreme.entropy(-1.0)
    assert_allclose(h, 2*euler_gamma + 1, rtol=1e-14)

    h = stats.genextreme.entropy(0)
    assert_allclose(h, euler_gamma + 1, rtol=1e-14)

    h = stats.genextreme.entropy(1.0)
    assert_equal(h, 1)

    h = stats.genextreme.entropy(-2.0, scale=10)
    assert_allclose(h, euler_gamma*3 + np.log(10) + 1, rtol=1e-14)

    h = stats.genextreme.entropy(10)
    assert_allclose(h, -9*euler_gamma + 1, rtol=1e-14)

    h = stats.genextreme.entropy(-10)
    assert_allclose(h, 11*euler_gamma + 1, rtol=1e-14) 
Example 11
Project: rl-musician   Author: Nikolay-Lysenko   File: evaluation.py    MIT License 6 votes vote down vote up
def evaluate_entropy(piece: Piece) -> float:
    """
    Evaluate non-triviality of a piece based on entropy of pitch distribution.

    :param piece:
        `Piece` instance
    :return:
        normalized average over all lines entropy of pitches distribution
    """
    scores = []
    for line, elements in zip(piece.lines, piece.line_elements):
        positions = [element.relative_position for element in line]
        counter = Counter(positions)
        distribution = [
            counter[element.relative_position] / piece.n_measures
            for element in elements
        ]
        raw_entropy = entropy(distribution)
        max_entropy_distribution = [1 / len(elements) for _ in elements]
        denominator = entropy(max_entropy_distribution)
        normalized_entropy = raw_entropy / denominator
        scores.append(normalized_entropy)
    score = sum(scores) / len(scores)
    return score 
Example 12
Project: rl-musician   Author: Nikolay-Lysenko   File: evaluation.py    MIT License 6 votes vote down vote up
def get_scoring_functions_registry() -> Dict[str, Callable]:
    """
    Get mapping from names of scoring functions to scoring functions.

    :return:
        registry of scoring functions
    """
    registry = {
        'autocorrelation': evaluate_autocorrelation,
        'entropy': evaluate_entropy,
        'absence_of_pitch_class_clashes': evaluate_absence_of_pitch_class_clashes,
        'independence_of_motion': evaluate_independence_of_motion,
        'lines_correlation': evaluate_lines_correlation,
        'climax_explicity': evaluate_climax_explicity,
    }
    return registry 
Example 13
Project: dynamic-training-with-apache-mxnet-on-aws   Author: awslabs   File: quantization.py    Apache License 2.0 5 votes vote down vote up
def _get_optimal_thresholds(nd_dict, num_bins=8001, num_quantized_bins=255, logger=None):
    """Given a ndarray dict, find the optimal threshold for quantizing each value of the key."""
    if stats is None:
        raise ImportError('scipy.stats is required for running entropy mode of calculating'
                          ' the optimal thresholds for quantizing FP32 ndarrays into int8.'
                          ' Please check if the scipy python bindings are installed.')
    assert isinstance(nd_dict, dict)
    if logger is not None:
        logger.info('Calculating optimal thresholds for quantization using KL divergence'
                    ' with num_bins=%d and num_quantized_bins=%d' % (num_bins, num_quantized_bins))
    th_dict = {}
    # copy nd_dict keys since the keys() only returns a view in python3
    layer_names = list(nd_dict.keys())
    for name in layer_names:
        assert name in nd_dict
        min_val, max_val, min_divergence, opt_th = \
            _get_optimal_threshold(nd_dict[name], num_bins=num_bins,
                                   num_quantized_bins=num_quantized_bins)
        del nd_dict[name]  # release the memory of ndarray
        if min_val < 0:
            th_dict[name] = (-opt_th, opt_th)
        else:
            th_dict[name] = (0, opt_th)
        if logger is not None:
            logger.info('layer=%s, min_val=%f, max_val=%f, min_divergence=%f, optimal_threshold=%f'
                        % (name, min_val, max_val, min_divergence, opt_th))
    return th_dict 
Example 14
Project: LaserTOF   Author: kyleuckert   File: test_distributions.py    MIT License 5 votes vote down vote up
def test_entropy_2d(self):
        pk = [[0.1, 0.2], [0.6, 0.3], [0.3, 0.5]]
        qk = [[0.2, 0.1], [0.3, 0.6], [0.5, 0.3]]
        assert_array_almost_equal(stats.entropy(pk, qk),
                                  [0.1933259, 0.18609809]) 
Example 15
Project: LaserTOF   Author: kyleuckert   File: test_distributions.py    MIT License 5 votes vote down vote up
def test_entropy_2d_zero(self):
        pk = [[0.1, 0.2], [0.6, 0.3], [0.3, 0.5]]
        qk = [[0.0, 0.1], [0.3, 0.6], [0.5, 0.3]]
        assert_array_almost_equal(stats.entropy(pk, qk),
                                  [np.inf, 0.18609809])

        pk[0][0] = 0.0
        assert_array_almost_equal(stats.entropy(pk, qk),
                                  [0.17403988, 0.18609809]) 
Example 16
Project: LaserTOF   Author: kyleuckert   File: test_distributions.py    MIT License 5 votes vote down vote up
def test_entropy(self):
        assert_allclose(self.norm_template.entropy(),
                        stats.norm.entropy(loc=1.0, scale=2.5), rtol=0.05) 
Example 17
Project: edge2vec   Author: RoyZhengGao   File: transition.py    BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def entroy_test(v1,v2):#original metric: the smaller the more similar
    result = stats.entropy(v1,v2)
    result = stats.wilcoxon(v1, v2).statistic
    if result != result:
        result = 0
    return result 
Example 18
Project: IDEA   Author: armor-ai   File: main.py    MIT License 5 votes vote down vote up
def JSD(P, Q):
    """
    Jensen-Shannon divergence
    :param P:
    :param Q:
    :return:
    """
    _M = 0.5 * (P + Q)
    return 0.5 * (entropy(P, _M) + entropy(Q, _M)) 
Example 19
Project: PyCausality   Author: ZacKeskin   File: unit_tests.py    GNU General Public License v3.0 5 votes vote down vote up
def test_NDHistogram():
    """
        Test function to ensure that the custom NDHistogram class correctly captures all
        data from 1 to 4 dimensions. Entropy must correspond to get_entropy() function.

    """
    data = pd.read_csv(os.path.join(os.getcwd(),'PyCausality','Testing',
                                    'Test_Utils','test_data.csv'))

    # 1D Data
    Hist1D =  NDHistogram(df=data[['S1']])
    assert len(data) == np.sum(Hist1D.Hist)

    # 2D Data
    Hist2D =  NDHistogram(df=data[['S1','S2']])
    assert len(data) == np.sum(Hist2D.Hist)

    # 3D Data
    Hist3D =  NDHistogram(df=data[['S1','S2','S3']])
    assert len(data) == np.sum(Hist3D.Hist)

    # 4D Data
    Hist4D =  NDHistogram(df=data[['S1','S2','S3','S4']])
    assert len(data) == np.sum(Hist4D.Hist)

    # Check sigma bins are correctly calculated when bins parameter is None
    #   (regression check against AutoBins.sigma_bins() )
    AB1 = AutoBins(df=data[['S1']])
    assert sorted(Hist1D.Dedges) == sorted(AB1.sigma_bins()['S1'])

    # Check entropy values correspond to test_get_entropy() 
Example 20
Project: trVAE   Author: theislab   File: test_trVAE.py    MIT License 5 votes vote down vote up
def score(adata, n_deg=10, n_genes=1000, condition_key="condition", cell_type_key="cell_type",
          conditions={"stim": "stimulated", "ctrl": "control"},
          sortby="median_score"):
    import scanpy as sc
    import numpy as np
    from scipy.stats import entropy
    import pandas as pd
    sc.tl.rank_genes_groups(adata, groupby=condition_key, method="wilcoxon", n_genes=n_genes)
    gene_names = adata.uns["rank_genes_groups"]['names'][conditions['stim']]
    gene_lfcs = adata.uns["rank_genes_groups"]['logfoldchanges'][conditions['stim']]
    diff_genes_df = pd.DataFrame({"names": gene_names, "lfc": gene_lfcs})
    diff_genes = diff_genes_df["names"].tolist()[:n_genes]
    print(len(diff_genes))

    adata_deg = adata[:, diff_genes].copy()
    cell_types = adata_deg.obs[cell_type_key].cat.categories.tolist()
    lfc_temp = np.zeros((len(cell_types), n_genes))
    for j, ct in enumerate(cell_types):
        if cell_type_key == "cell_type":  # if data is pbmc
            stim = adata_deg[(adata_deg.obs[cell_type_key] == ct) &
                             (adata_deg.obs[condition_key] == conditions["stim"])].X.mean(0).A1
            ctrl = adata_deg[(adata_deg.obs[cell_type_key] == ct) &
                             (adata_deg.obs[condition_key] == conditions["ctrl"])].X.mean(0).A1
        else:
            stim = adata_deg[(adata_deg.obs[cell_type_key] == ct) &
                             (adata_deg.obs[condition_key] == conditions["stim"])].X.mean(0)
            ctrl = adata_deg[(adata_deg.obs[cell_type_key] == ct) &
                             (adata_deg.obs[condition_key] == conditions["ctrl"])].X.mean(0)
        lfc_temp[j] = np.abs((stim - ctrl)[None, :])
    norm_lfc = lfc_temp / lfc_temp.sum(0).reshape((1, n_genes))
    ent_scores = entropy(norm_lfc)
    median = np.median(lfc_temp, axis=0)
    med_scores = np.max(np.abs((lfc_temp - median)), axis=0)
    df_score = pd.DataFrame({"genes": adata_deg.var_names.tolist(), "median_score": med_scores,
                             "entropy_score": ent_scores})
    if sortby == "median_score":
        return df_score.sort_values(by=['median_score'], ascending=False).iloc[:n_deg, :]
    else:
        return df_score.sort_values(by=['entropy_score'], ascending=False).iloc[:n_deg, :] 
Example 21
Project: trVAE   Author: theislab   File: metrics.py    MIT License 5 votes vote down vote up
def __entropy_from_indices(indices):
    return entropy(np.array(itemfreq(indices)[:, 1].astype(np.int32))) 
Example 22
Project: HINPy   Author: pedroramaciotti   File: diversities_measures.py    GNU General Public License v3.0 5 votes vote down vote up
def ShannonEntropy(P,base=2,renormalize=False):
    # Convert, check, trim, and renormalize
    P=np.array(P)
    CheckDistribution(P)
    P=TrimDistribution(P)
    if renormalize:
        P=RenormalizeDistribution(P)
    # Computing entropy
    return entropy(P,base=base); 
Example 23
Project: HINPy   Author: pedroramaciotti   File: diversities_measures.py    GNU General Public License v3.0 5 votes vote down vote up
def ShannonEvenness(P,base=2,renormalize=False):
    # Convert, check, trim, and renormalize
    P=np.array(P)
    CheckDistribution(P)
    P=TrimDistribution(P)
    if renormalize:
        P=RenormalizeDistribution(P)
    # Computing entropy
    return ShannonEntropy(P,base=base)/P.size; 
Example 24
Project: HINPy   Author: pedroramaciotti   File: diversities_measures.py    GNU General Public License v3.0 5 votes vote down vote up
def HHI(P,renormalize=False):# Herfindahl-Hirschman Index
    # Convert, check, trim, and renormalize
    P=np.array(P)
    CheckDistribution(P)
    P=TrimDistribution(P)
    if renormalize:
        P=RenormalizeDistribution(P)
    # Computing entropy
    return np.power(P,2).sum(); 
Example 25
Project: att   Author: Centre-Alt-Rendiment-Esportiu   File: test_distributions.py    GNU General Public License v3.0 5 votes vote down vote up
def test_entropy(self):
        # Basic entropy tests.
        b = stats.binom(2, 0.5)
        expected_p = np.array([0.25, 0.5, 0.25])
        expected_h = -sum(xlogy(expected_p, expected_p))
        h = b.entropy()
        assert_allclose(h, expected_h)

        b = stats.binom(2, 0.0)
        h = b.entropy()
        assert_equal(h, 0.0)

        b = stats.binom(2, 1.0)
        h = b.entropy()
        assert_equal(h, 0.0) 
Example 26
Project: att   Author: Centre-Alt-Rendiment-Esportiu   File: test_distributions.py    GNU General Public License v3.0 5 votes vote down vote up
def test_entropy(self):
        # Simple tests of entropy.
        b = stats.bernoulli(0.25)
        expected_h = -0.25*np.log(0.25) - 0.75*np.log(0.75)
        h = b.entropy()
        assert_allclose(h, expected_h)

        b = stats.bernoulli(0.0)
        h = b.entropy()
        assert_equal(h, 0.0)

        b = stats.bernoulli(1.0)
        h = b.entropy()
        assert_equal(h, 0.0) 
Example 27
Project: att   Author: Centre-Alt-Rendiment-Esportiu   File: test_distributions.py    GNU General Public License v3.0 5 votes vote down vote up
def test_entropy(self):
        # Simple tests of entropy.
        hg = stats.hypergeom(4, 1, 1)
        h = hg.entropy()
        expected_p = np.array([0.75, 0.25])
        expected_h = -np.sum(xlogy(expected_p, expected_p))
        assert_allclose(h, expected_h)

        hg = stats.hypergeom(1, 1, 1)
        h = hg.entropy()
        assert_equal(h, 0.0) 
Example 28
Project: att   Author: Centre-Alt-Rendiment-Esportiu   File: test_distributions.py    GNU General Public License v3.0 5 votes vote down vote up
def test_entropy(self):
        # Basic tests of entropy.
        pvals = np.array([0.25, 0.45, 0.3])
        p = stats.rv_discrete(values=([0, 1, 2], pvals))
        expected_h = -sum(xlogy(pvals, pvals))
        h = p.entropy()
        assert_allclose(h, expected_h)

        p = stats.rv_discrete(values=([0, 1, 2], [1.0, 0, 0]))
        h = p.entropy()
        assert_equal(h, 0.0) 
Example 29
Project: att   Author: Centre-Alt-Rendiment-Esportiu   File: test_distributions.py    GNU General Public License v3.0 5 votes vote down vote up
def test_entropy_positive(self):
        # See ticket #497
        pk = [0.5,0.2,0.3]
        qk = [0.1,0.25,0.65]
        eself = stats.entropy(pk,pk)
        edouble = stats.entropy(pk,qk)
        assert_(0.0 == eself)
        assert_(edouble >= 0.0) 
Example 30
Project: att   Author: Centre-Alt-Rendiment-Esportiu   File: test_distributions.py    GNU General Public License v3.0 5 votes vote down vote up
def test_entropy_zero(self):
        # Test for PR-479
        assert_almost_equal(stats.entropy([0, 1, 2]), 0.63651416829481278,
                            decimal=12) 
Example 31
Project: att   Author: Centre-Alt-Rendiment-Esportiu   File: test_distributions.py    GNU General Public License v3.0 5 votes vote down vote up
def test_entropy_2d(self):
        pk = [[0.1, 0.2], [0.6, 0.3], [0.3, 0.5]]
        qk = [[0.2, 0.1], [0.3, 0.6], [0.5, 0.3]]
        assert_array_almost_equal(stats.entropy(pk, qk),
                [0.1933259, 0.18609809]) 
Example 32
Project: att   Author: Centre-Alt-Rendiment-Esportiu   File: test_distributions.py    GNU General Public License v3.0 5 votes vote down vote up
def test_entropy_2d_zero(self):
        pk = [[0.1, 0.2], [0.6, 0.3], [0.3, 0.5]]
        qk = [[0.0, 0.1], [0.3, 0.6], [0.5, 0.3]]
        assert_array_almost_equal(stats.entropy(pk, qk),
                [np.inf, 0.18609809])

        pk[0][0] = 0.0
        assert_array_almost_equal(stats.entropy(pk, qk),
                [0.17403988, 0.18609809]) 
Example 33
Project: SITE   Author: Osier-Yi   File: util.py    MIT License 5 votes vote down vote up
def similarity_error_cal(x, h_rep_norm):
    distance_matrix_x = cdist(x, x, propensity_dist)
    distance_matrix_h = cdist(h_rep_norm, h_rep_norm, "sqeuclidean")
    dim = distance_matrix_h.shape[0]
    il2 = np.tril_indices(dim, -1)
    p_x = distance_matrix_x[il2]
    p_x = p_x/sum(p_x)
    p_h = distance_matrix_h[il2]
    p_h = p_h / sum(p_h)
    print p_x
    print p_h
    k_l = entropy(p_x, p_h)

    return k_l 
Example 34
Project: featuretools   Author: FeatureLabs   File: aggregation_primitives.py    BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def get_function(self):
        def pd_entropy(s):
            distribution = s.value_counts(normalize=True, dropna=self.dropna)
            return stats.entropy(distribution, base=self.base)

        return pd_entropy 
Example 35
Project: vnpy_crypto   Author: birforce   File: infotheo.py    MIT License 5 votes vote down vote up
def logbasechange(a,b):
    """
    There is a one-to-one transformation of the entropy value from
    a log base b to a log base a :

    H_{b}(X)=log_{b}(a)[H_{a}(X)]

    Returns
    -------
    log_{b}(a)
    """
    return np.log(b)/np.log(a) 
Example 36
Project: vnpy_crypto   Author: birforce   File: infotheo.py    MIT License 5 votes vote down vote up
def bitstonats(X):
    """
    Converts from bits to nats
    """
    return logbasechange(2, np.e) * X

#TODO: make this entropy, and then have different measures as
#a method 
Example 37
Project: vnpy_crypto   Author: birforce   File: infotheo.py    MIT License 5 votes vote down vote up
def shannonentropy(px, logbase=2):
    """
    This is Shannon's entropy

    Parameters
    -----------
    logbase, int or np.e
        The base of the log
    px : 1d or 2d array_like
        Can be a discrete probability distribution, a 2d joint distribution,
        or a sequence of probabilities.

    Returns
    -----
    For log base 2 (bits) given a discrete distribution
        H(p) = sum(px * log2(1/px) = -sum(pk*log2(px)) = E[log2(1/p(X))]

    For log base 2 (bits) given a joint distribution
        H(px,py) = -sum_{k,j}*w_{kj}log2(w_{kj})

    Notes
    -----
    shannonentropy(0) is defined as 0
    """
#TODO: haven't defined the px,py case?
    px = np.asarray(px)
    if not np.all(px <= 1) or not np.all(px >= 0):
        raise ValueError("px does not define proper distribution")
    entropy = -np.sum(np.nan_to_num(px*np.log2(px)))
    if logbase != 2:
        return logbasechange(2,logbase) * entropy
    else:
        return entropy

# Shannon's information content 
Example 38
Project: vnpy_crypto   Author: birforce   File: infotheo.py    MIT License 5 votes vote down vote up
def condentropy(px, py, pxpy=None, logbase=2):
    """
    Return the conditional entropy of X given Y.

    Parameters
    ----------
    px : array-like
    py : array-like
    pxpy : array-like, optional
        If pxpy is None, the distributions are assumed to be independent
        and conendtropy(px,py) = shannonentropy(px)
    logbase : int or np.e

    Returns
    -------
    sum_{kj}log(q_{j}/w_{kj}

    where q_{j} = Y[j]
    and w_kj = X[k,j]
    """
    if not _isproperdist(px) or not _isproperdist(py):
        raise ValueError("px or py is not a proper probability distribution")
    if pxpy != None and not _isproperdist(pxpy):
        raise ValueError("pxpy is not a proper joint distribtion")
    if pxpy == None:
        pxpy = np.outer(py,px)
    condent = np.sum(pxpy * np.nan_to_num(np.log2(py/pxpy)))
    if logbase == 2:
        return condent
    else:
        return logbasechange(2, logbase) * condent 
Example 39
Project: BuildingMachineLearning   Author: ademyanchuk   File: demo_mi.py    MIT License 5 votes vote down vote up
def mutual_info(x, y, bins=10):
    counts_xy, bins_x, bins_y = np.histogram2d(x, y, bins=(bins, bins))
    counts_x, bins = np.histogram(x, bins=bins)
    counts_y, bins = np.histogram(y, bins=bins)

    counts_xy += 1
    counts_x += 1
    counts_y += 1
    P_xy = counts_xy / np.sum(counts_xy, dtype=float)
    P_x = counts_x / np.sum(counts_x, dtype=float)
    P_y = counts_y / np.sum(counts_y, dtype=float)

    I_xy = np.sum(P_xy * np.log2(P_xy / (P_x.reshape(-1, 1) * P_y)))

    return I_xy / (entropy(counts_x) + entropy(counts_y)) 
Example 40
Project: ble5-nrf52-mac   Author: tomasero   File: test_distributions.py    MIT License 5 votes vote down vote up
def test_entropy(self):
        # Basic entropy tests.
        b = stats.binom(2, 0.5)
        expected_p = np.array([0.25, 0.5, 0.25])
        expected_h = -sum(xlogy(expected_p, expected_p))
        h = b.entropy()
        assert_allclose(h, expected_h)

        b = stats.binom(2, 0.0)
        h = b.entropy()
        assert_equal(h, 0.0)

        b = stats.binom(2, 1.0)
        h = b.entropy()
        assert_equal(h, 0.0) 
Example 41
Project: ble5-nrf52-mac   Author: tomasero   File: test_distributions.py    MIT License 5 votes vote down vote up
def test_entropy(self):
        # Simple tests of entropy.
        b = stats.bernoulli(0.25)
        expected_h = -0.25*np.log(0.25) - 0.75*np.log(0.75)
        h = b.entropy()
        assert_allclose(h, expected_h)

        b = stats.bernoulli(0.0)
        h = b.entropy()
        assert_equal(h, 0.0)

        b = stats.bernoulli(1.0)
        h = b.entropy()
        assert_equal(h, 0.0) 
Example 42
Project: ble5-nrf52-mac   Author: tomasero   File: test_distributions.py    MIT License 5 votes vote down vote up
def test_entropy(self):
        # Simple tests of entropy.
        hg = stats.hypergeom(4, 1, 1)
        h = hg.entropy()
        expected_p = np.array([0.75, 0.25])
        expected_h = -np.sum(xlogy(expected_p, expected_p))
        assert_allclose(h, expected_h)

        hg = stats.hypergeom(1, 1, 1)
        h = hg.entropy()
        assert_equal(h, 0.0) 
Example 43
Project: ble5-nrf52-mac   Author: tomasero   File: test_distributions.py    MIT License 5 votes vote down vote up
def test_entropy(self):
        # Basic tests of entropy.
        pvals = np.array([0.25, 0.45, 0.3])
        p = stats.rv_discrete(values=([0, 1, 2], pvals))
        expected_h = -sum(xlogy(pvals, pvals))
        h = p.entropy()
        assert_allclose(h, expected_h)

        p = stats.rv_discrete(values=([0, 1, 2], [1.0, 0, 0]))
        h = p.entropy()
        assert_equal(h, 0.0) 
Example 44
Project: ble5-nrf52-mac   Author: tomasero   File: test_distributions.py    MIT License 5 votes vote down vote up
def test_entropy_positive(self):
        # See ticket #497
        pk = [0.5, 0.2, 0.3]
        qk = [0.1, 0.25, 0.65]
        eself = stats.entropy(pk, pk)
        edouble = stats.entropy(pk, qk)
        assert_(0.0 == eself)
        assert_(edouble >= 0.0) 
Example 45
Project: ble5-nrf52-mac   Author: tomasero   File: test_distributions.py    MIT License 5 votes vote down vote up
def test_entropy_zero(self):
        # Test for PR-479
        assert_almost_equal(stats.entropy([0, 1, 2]), 0.63651416829481278,
                            decimal=12) 
Example 46
Project: ble5-nrf52-mac   Author: tomasero   File: test_distributions.py    MIT License 5 votes vote down vote up
def test_entropy_2d(self):
        pk = [[0.1, 0.2], [0.6, 0.3], [0.3, 0.5]]
        qk = [[0.2, 0.1], [0.3, 0.6], [0.5, 0.3]]
        assert_array_almost_equal(stats.entropy(pk, qk),
                                  [0.1933259, 0.18609809]) 
Example 47
Project: ble5-nrf52-mac   Author: tomasero   File: test_distributions.py    MIT License 5 votes vote down vote up
def test_entropy_2d_zero(self):
        pk = [[0.1, 0.2], [0.6, 0.3], [0.3, 0.5]]
        qk = [[0.0, 0.1], [0.3, 0.6], [0.5, 0.3]]
        assert_array_almost_equal(stats.entropy(pk, qk),
                                  [np.inf, 0.18609809])

        pk[0][0] = 0.0
        assert_array_almost_equal(stats.entropy(pk, qk),
                                  [0.17403988, 0.18609809]) 
Example 48
Project: ble5-nrf52-mac   Author: tomasero   File: test_distributions.py    MIT License 5 votes vote down vote up
def test_entropy(self):
        assert_allclose(self.norm_template.entropy(),
                        stats.norm.entropy(loc=1.0, scale=2.5), rtol=0.05) 
Example 49
Project: fakespikes   Author: voytekresearch   File: util.py    MIT License 5 votes vote down vote up
def kl_divergence(a, b):
    """Calculate the K-L divergence between a and b
    
    Note: a and b must be two sequences of integers
    """
    a = np.asarray(a)
    b = np.asarray(b)

    # Find the total set of symbols
    a_set = set(a)
    b_set = set(b)
    ab_set = a_set.union(b_set)

    # Create a lookup table for each symbol in p_a/p_b
    lookup = {}
    for i, x in enumerate(ab_set):
        lookup[x] = i

    # Calculate event probabilities for and then b
    # To prevent nan/division errors every event
    # gets at least a 1 count.
    p_a = np.ones(len(ab_set))
    for x in a:
        p_a[lookup[x]] += 1

    p_b = np.ones(len(ab_set))
    for x in b:
        p_b[lookup[x]] += 1

    # Norm counts into probabilities
    p_a /= a.size
    p_b /= b.size

    return scientropy(p_a, p_b, base=2) 
Example 50
Project: Computable   Author: ktraunmueller   File: test_distributions.py    MIT License 5 votes vote down vote up
def test_entropy(self):
        # Basic entropy tests.
        b = stats.binom(2, 0.5)
        expected_p = np.array([0.25, 0.5, 0.25])
        expected_h = -sum(xlogy(expected_p, expected_p))
        h = b.entropy()
        assert_allclose(h, expected_h)

        b = stats.binom(2, 0.0)
        h = b.entropy()
        assert_equal(h, 0.0)

        b = stats.binom(2, 1.0)
        h = b.entropy()
        assert_equal(h, 0.0) 
Example 51
Project: Computable   Author: ktraunmueller   File: test_distributions.py    MIT License 5 votes vote down vote up
def test_entropy(self):
        # Simple tests of entropy.
        b = stats.bernoulli(0.25)
        expected_h = -0.25*np.log(0.25) - 0.75*np.log(0.75)
        h = b.entropy()
        assert_allclose(h, expected_h)

        b = stats.bernoulli(0.0)
        h = b.entropy()
        assert_equal(h, 0.0)

        b = stats.bernoulli(1.0)
        h = b.entropy()
        assert_equal(h, 0.0) 
Example 52
Project: Computable   Author: ktraunmueller   File: test_distributions.py    MIT License 5 votes vote down vote up
def test_entropy(self):
        # Simple tests of entropy.
        hg = stats.hypergeom(4, 1, 1)
        h = hg.entropy()
        expected_p = np.array([0.75, 0.25])
        expected_h = -np.sum(xlogy(expected_p, expected_p))
        assert_allclose(h, expected_h)

        hg = stats.hypergeom(1, 1, 1)
        h = hg.entropy()
        assert_equal(h, 0.0) 
Example 53
Project: Computable   Author: ktraunmueller   File: test_distributions.py    MIT License 5 votes vote down vote up
def test_entropy(self):
        # Basic tests of entropy.
        pvals = np.array([0.25, 0.45, 0.3])
        p = stats.rv_discrete(values=([0, 1, 2], pvals))
        expected_h = -sum(xlogy(pvals, pvals))
        h = p.entropy()
        assert_allclose(h, expected_h)

        p = stats.rv_discrete(values=([0, 1, 2], [1.0, 0, 0]))
        h = p.entropy()
        assert_equal(h, 0.0) 
Example 54
Project: Computable   Author: ktraunmueller   File: test_distributions.py    MIT License 5 votes vote down vote up
def test_entropy_positive(self):
        # See ticket #497
        pk = [0.5,0.2,0.3]
        qk = [0.1,0.25,0.65]
        eself = stats.entropy(pk,pk)
        edouble = stats.entropy(pk,qk)
        assert_(0.0 == eself)
        assert_(edouble >= 0.0) 
Example 55
Project: Computable   Author: ktraunmueller   File: test_distributions.py    MIT License 5 votes vote down vote up
def test_entropy_zero(self):
        # Test for PR-479
        assert_almost_equal(stats.entropy([0, 1, 2]), 0.63651416829481278,
                            decimal=12) 
Example 56
Project: Computable   Author: ktraunmueller   File: test_distributions.py    MIT License 5 votes vote down vote up
def test_distribution_too_many_args():
    # Check that a TypeError is raised when too many args are given to a method
    # Regression test for ticket 1815.
    x = np.linspace(0.1, 0.7, num=5)
    assert_raises(TypeError, stats.gamma.pdf, x, 2, 3, loc=1.0)
    assert_raises(TypeError, stats.gamma.pdf, x, 2, 3, 4, loc=1.0)
    assert_raises(TypeError, stats.gamma.pdf, x, 2, 3, 4, 5)
    assert_raises(TypeError, stats.gamma.pdf, x, 2, 3, loc=1.0, scale=0.5)
    assert_raises(TypeError, stats.gamma.rvs, 2., 3, loc=1.0, scale=0.5)
    assert_raises(TypeError, stats.gamma.cdf, x, 2., 3, loc=1.0, scale=0.5)
    assert_raises(TypeError, stats.gamma.ppf, x, 2., 3, loc=1.0, scale=0.5)
    assert_raises(TypeError, stats.gamma.stats, 2., 3, loc=1.0, scale=0.5)
    assert_raises(TypeError, stats.gamma.entropy, 2., 3, loc=1.0, scale=0.5)
    assert_raises(TypeError, stats.gamma.fit, x, 2., 3, loc=1.0, scale=0.5)

    # These should not give errors
    stats.gamma.pdf(x, 2, 3)  # loc=3
    stats.gamma.pdf(x, 2, 3, 4)  # loc=3, scale=4
    stats.gamma.stats(2., 3)
    stats.gamma.stats(2., 3, 4)
    stats.gamma.stats(2., 3, 4, 'mv')
    stats.gamma.rvs(2., 3, 4, 5)
    stats.gamma.fit(stats.gamma.rvs(2., size=7), 2.)

    # Also for a discrete distribution
    stats.geom.pmf(x, 2, loc=3)  # no error, loc=3
    assert_raises(TypeError, stats.geom.pmf, x, 2, 3, 4)
    assert_raises(TypeError, stats.geom.pmf, x, 2, 3, loc=4)

    # And for distributions with 0, 2 and 3 args respectively
    assert_raises(TypeError, stats.expon.pdf, x, 3, loc=1.0)
    assert_raises(TypeError, stats.exponweib.pdf, x, 3, 4, 5, loc=1.0)
    assert_raises(TypeError, stats.exponweib.pdf, x, 3, 4, 5, 0.1, 0.1)
    assert_raises(TypeError, stats.ncf.pdf, x, 3, 4, 5, 6, loc=1.0)
    assert_raises(TypeError, stats.ncf.pdf, x, 3, 4, 5, 6, 1.0, scale=0.5)
    stats.ncf.pdf(x, 3, 4, 5, 6, 1.0)  # 3 args, plus loc/scale 
Example 57
Project: Building-Machine-Learning-Systems-With-Python-Second-Edition   Author: PacktPublishing   File: demo_mi.py    MIT License 5 votes vote down vote up
def mutual_info(x, y, bins=10):
    counts_xy, bins_x, bins_y = np.histogram2d(x, y, bins=(bins, bins))
    counts_x, bins = np.histogram(x, bins=bins)
    counts_y, bins = np.histogram(y, bins=bins)

    counts_xy += 1
    counts_x += 1
    counts_y += 1
    P_xy = counts_xy / np.sum(counts_xy, dtype=float)
    P_x = counts_x / np.sum(counts_x, dtype=float)
    P_y = counts_y / np.sum(counts_y, dtype=float)

    I_xy = np.sum(P_xy * np.log2(P_xy / (P_x.reshape(-1, 1) * P_y)))

    return I_xy / (entropy(counts_x) + entropy(counts_y)) 
Example 58
Project: poker   Author: surgebiswas   File: test_distributions.py    MIT License 5 votes vote down vote up
def test_entropy(self):
        # Basic entropy tests.
        b = stats.binom(2, 0.5)
        expected_p = np.array([0.25, 0.5, 0.25])
        expected_h = -sum(xlogy(expected_p, expected_p))
        h = b.entropy()
        assert_allclose(h, expected_h)

        b = stats.binom(2, 0.0)
        h = b.entropy()
        assert_equal(h, 0.0)

        b = stats.binom(2, 1.0)
        h = b.entropy()
        assert_equal(h, 0.0) 
Example 59
Project: poker   Author: surgebiswas   File: test_distributions.py    MIT License 5 votes vote down vote up
def test_entropy(self):
        # Simple tests of entropy.
        b = stats.bernoulli(0.25)
        expected_h = -0.25*np.log(0.25) - 0.75*np.log(0.75)
        h = b.entropy()
        assert_allclose(h, expected_h)

        b = stats.bernoulli(0.0)
        h = b.entropy()
        assert_equal(h, 0.0)

        b = stats.bernoulli(1.0)
        h = b.entropy()
        assert_equal(h, 0.0) 
Example 60
Project: poker   Author: surgebiswas   File: test_distributions.py    MIT License 5 votes vote down vote up
def test_entropy(self):
        # Simple tests of entropy.
        hg = stats.hypergeom(4, 1, 1)
        h = hg.entropy()
        expected_p = np.array([0.75, 0.25])
        expected_h = -np.sum(xlogy(expected_p, expected_p))
        assert_allclose(h, expected_h)

        hg = stats.hypergeom(1, 1, 1)
        h = hg.entropy()
        assert_equal(h, 0.0) 
Example 61
Project: poker   Author: surgebiswas   File: test_distributions.py    MIT License 5 votes vote down vote up
def test_entropy(self):
        # Basic tests of entropy.
        pvals = np.array([0.25, 0.45, 0.3])
        p = stats.rv_discrete(values=([0, 1, 2], pvals))
        expected_h = -sum(xlogy(pvals, pvals))
        h = p.entropy()
        assert_allclose(h, expected_h)

        p = stats.rv_discrete(values=([0, 1, 2], [1.0, 0, 0]))
        h = p.entropy()
        assert_equal(h, 0.0) 
Example 62
Project: LaserTOF   Author: kyleuckert   File: test_distributions.py    MIT License 4 votes vote down vote up
def test_entropy(self):
        # Basic entropy tests.
        b = stats.binom(2, 0.5)
        expected_p = np.array([0.25, 0.5, 0.25])
        expected_h = -sum(xlogy(expected_p, expected_p))
        h = b.entropy()
        assert_allclose(h, expected_h)

        b = stats.binom(2, 0.0)
        h = b.entropy()
        assert_equal(h, 0.0)

        b = stats.binom(2, 1.0)
        h = b.entropy()
        assert_equal(h, 0.0) 
Example 63
Project: LaserTOF   Author: kyleuckert   File: test_distributions.py    MIT License 4 votes vote down vote up
def test_entropy(self):
        # Simple tests of entropy.
        b = stats.bernoulli(0.25)
        expected_h = -0.25*np.log(0.25) - 0.75*np.log(0.75)
        h = b.entropy()
        assert_allclose(h, expected_h)

        b = stats.bernoulli(0.0)
        h = b.entropy()
        assert_equal(h, 0.0)

        b = stats.bernoulli(1.0)
        h = b.entropy()
        assert_equal(h, 0.0) 
Example 64
Project: LaserTOF   Author: kyleuckert   File: test_distributions.py    MIT License 4 votes vote down vote up
def test_entropy(self):
        # Simple tests of entropy.
        hg = stats.hypergeom(4, 1, 1)
        h = hg.entropy()
        expected_p = np.array([0.75, 0.25])
        expected_h = -np.sum(xlogy(expected_p, expected_p))
        assert_allclose(h, expected_h)

        hg = stats.hypergeom(1, 1, 1)
        h = hg.entropy()
        assert_equal(h, 0.0) 
Example 65
Project: LaserTOF   Author: kyleuckert   File: test_distributions.py    MIT License 4 votes vote down vote up
def test_entropy_positive(self):
        # See ticket #497
        pk = [0.5, 0.2, 0.3]
        qk = [0.1, 0.25, 0.65]
        eself = stats.entropy(pk, pk)
        edouble = stats.entropy(pk, qk)
        assert_(0.0 == eself)
        assert_(edouble >= 0.0) 
Example 66
Project: LaserTOF   Author: kyleuckert   File: test_distributions.py    MIT License 4 votes vote down vote up
def test_norm(self):
        dist = stats.norm
        frozen = stats.norm(loc=10.0, scale=3.0)

        result_f = frozen.pdf(20.0)
        result = dist.pdf(20.0, loc=10.0, scale=3.0)
        assert_equal(result_f, result)

        result_f = frozen.cdf(20.0)
        result = dist.cdf(20.0, loc=10.0, scale=3.0)
        assert_equal(result_f, result)

        result_f = frozen.ppf(0.25)
        result = dist.ppf(0.25, loc=10.0, scale=3.0)
        assert_equal(result_f, result)

        result_f = frozen.isf(0.25)
        result = dist.isf(0.25, loc=10.0, scale=3.0)
        assert_equal(result_f, result)

        result_f = frozen.sf(10.0)
        result = dist.sf(10.0, loc=10.0, scale=3.0)
        assert_equal(result_f, result)

        result_f = frozen.median()
        result = dist.median(loc=10.0, scale=3.0)
        assert_equal(result_f, result)

        result_f = frozen.mean()
        result = dist.mean(loc=10.0, scale=3.0)
        assert_equal(result_f, result)

        result_f = frozen.var()
        result = dist.var(loc=10.0, scale=3.0)
        assert_equal(result_f, result)

        result_f = frozen.std()
        result = dist.std(loc=10.0, scale=3.0)
        assert_equal(result_f, result)

        result_f = frozen.entropy()
        result = dist.entropy(loc=10.0, scale=3.0)
        assert_equal(result_f, result)

        result_f = frozen.moment(2)
        result = dist.moment(2, loc=10.0, scale=3.0)
        assert_equal(result_f, result)

        assert_equal(frozen.a, dist.a)
        assert_equal(frozen.b, dist.b) 
Example 67
Project: LaserTOF   Author: kyleuckert   File: test_distributions.py    MIT License 4 votes vote down vote up
def test_gamma(self):
        a = 2.0
        dist = stats.gamma
        frozen = stats.gamma(a)

        result_f = frozen.pdf(20.0)
        result = dist.pdf(20.0, a)
        assert_equal(result_f, result)

        result_f = frozen.cdf(20.0)
        result = dist.cdf(20.0, a)
        assert_equal(result_f, result)

        result_f = frozen.ppf(0.25)
        result = dist.ppf(0.25, a)
        assert_equal(result_f, result)

        result_f = frozen.isf(0.25)
        result = dist.isf(0.25, a)
        assert_equal(result_f, result)

        result_f = frozen.sf(10.0)
        result = dist.sf(10.0, a)
        assert_equal(result_f, result)

        result_f = frozen.median()
        result = dist.median(a)
        assert_equal(result_f, result)

        result_f = frozen.mean()
        result = dist.mean(a)
        assert_equal(result_f, result)

        result_f = frozen.var()
        result = dist.var(a)
        assert_equal(result_f, result)

        result_f = frozen.std()
        result = dist.std(a)
        assert_equal(result_f, result)

        result_f = frozen.entropy()
        result = dist.entropy(a)
        assert_equal(result_f, result)

        result_f = frozen.moment(2)
        result = dist.moment(2, a)
        assert_equal(result_f, result)

        assert_equal(frozen.a, frozen.dist.a)
        assert_equal(frozen.b, frozen.dist.b) 
Example 68
Project: chowmein   Author: xiaohan2012   File: label_ranker.py    MIT License 4 votes vote down vote up
def label_mmr_score(self,
                        which_topic,
                        chosen_labels,
                        label_scores,
                        label_models):
        """
        Maximal Marginal Relevance score for labels.
        It's computed only when `apply_intra_topic_coverage` is True

        Parameters:
        --------------
        which_topic: int
            the index of the topic
        
        chosen_labels: list<int>
           indices of labels that are already chosen
        
        label_scores: numpy.ndarray<#topic, #label>
           label scores for each topic

        label_models: numpy.ndarray<#label, #words>
            the language models for labels

        Returns:
        --------------
        numpy.ndarray: 1D of length #label - #chosen_labels
            the scored label indices

        numpy.ndarray: same length as above
            the scores
        """
        chosen_len = len(chosen_labels)
        if chosen_len == 0:
            # no label is chosen
            # return the raw scores
            return (np.arange(label_models.shape[0]),
                    label_scores[which_topic, :])
        else:
            kl_m = np.zeros((label_models.shape[0]-chosen_len,
                             chosen_len))
            
            # the unchosen label indices
            candidate_labels = list(set(range(label_models.shape[0])) -
                                    set(chosen_labels))
            candidate_labels = np.sort(np.asarray(candidate_labels))
            for i, l_p in enumerate(candidate_labels):
                for j, l in enumerate(chosen_labels):
                    kl_m[i, j] = kl_divergence(label_models[l_p],
                                               label_models[l])
            sim_scores = kl_m.max(axis=1)
            mml_scores = (self._alpha *
                          label_scores[which_topic, candidate_labels]
                          - (1 - self._alpha) * sim_scores)
            return (candidate_labels, mml_scores) 
Example 69
Project: att   Author: Centre-Alt-Rendiment-Esportiu   File: test_distributions.py    GNU General Public License v3.0 4 votes vote down vote up
def test_norm(self):
        dist = stats.norm
        frozen = stats.norm(loc=10.0, scale=3.0)

        result_f = frozen.pdf(20.0)
        result = dist.pdf(20.0, loc=10.0, scale=3.0)
        assert_equal(result_f, result)

        result_f = frozen.cdf(20.0)
        result = dist.cdf(20.0, loc=10.0, scale=3.0)
        assert_equal(result_f, result)

        result_f = frozen.ppf(0.25)
        result = dist.ppf(0.25, loc=10.0, scale=3.0)
        assert_equal(result_f, result)

        result_f = frozen.isf(0.25)
        result = dist.isf(0.25, loc=10.0, scale=3.0)
        assert_equal(result_f, result)

        result_f = frozen.sf(10.0)
        result = dist.sf(10.0, loc=10.0, scale=3.0)
        assert_equal(result_f, result)

        result_f = frozen.median()
        result = dist.median(loc=10.0, scale=3.0)
        assert_equal(result_f, result)

        result_f = frozen.mean()
        result = dist.mean(loc=10.0, scale=3.0)
        assert_equal(result_f, result)

        result_f = frozen.var()
        result = dist.var(loc=10.0, scale=3.0)
        assert_equal(result_f, result)

        result_f = frozen.std()
        result = dist.std(loc=10.0, scale=3.0)
        assert_equal(result_f, result)

        result_f = frozen.entropy()
        result = dist.entropy(loc=10.0, scale=3.0)
        assert_equal(result_f, result)

        result_f = frozen.moment(2)
        result = dist.moment(2,loc=10.0, scale=3.0)
        assert_equal(result_f, result) 
Example 70
Project: att   Author: Centre-Alt-Rendiment-Esportiu   File: test_distributions.py    GNU General Public License v3.0 4 votes vote down vote up
def test_gamma(self):
        a = 2.0
        dist = stats.gamma
        frozen = stats.gamma(a)

        result_f = frozen.pdf(20.0)
        result = dist.pdf(20.0, a)
        assert_equal(result_f, result)

        result_f = frozen.cdf(20.0)
        result = dist.cdf(20.0, a)
        assert_equal(result_f, result)

        result_f = frozen.ppf(0.25)
        result = dist.ppf(0.25, a)
        assert_equal(result_f, result)

        result_f = frozen.isf(0.25)
        result = dist.isf(0.25, a)
        assert_equal(result_f, result)

        result_f = frozen.sf(10.0)
        result = dist.sf(10.0, a)
        assert_equal(result_f, result)

        result_f = frozen.median()
        result = dist.median(a)
        assert_equal(result_f, result)

        result_f = frozen.mean()
        result = dist.mean(a)
        assert_equal(result_f, result)

        result_f = frozen.var()
        result = dist.var(a)
        assert_equal(result_f, result)

        result_f = frozen.std()
        result = dist.std(a)
        assert_equal(result_f, result)

        result_f = frozen.entropy()
        result = dist.entropy(a)
        assert_equal(result_f, result)

        result_f = frozen.moment(2)
        result = dist.moment(2, a)
        assert_equal(result_f, result) 
Example 71
Project: pydsge   Author: gboehl   File: plots.py    MIT License 4 votes vote down vote up
def fast_kde(x, bw=4.5):
    """
    A fft-based Gaussian kernel density estimate (KDE)
    The code was adapted from https://github.com/mfouesneau/faststats

    Parameters
    ----------
    x : Numpy array or list
    bw : float
        Bandwidth scaling factor for the KDE. Should be larger than 0. The higher this number the
        smoother the KDE will be. Defaults to 4.5 which is essentially the same as the Scott's rule
        of thumb (the default rule used by SciPy).

    Returns
    -------
    density: A gridded 1D KDE of the input points (x)
    xmin: minimum value of x
    xmax: maximum value of x
    """

    from scipy.signal import gaussian, convolve
    from scipy.stats import entropy

    x = np.asarray(x, dtype=float)
    x = x[np.isfinite(x)]
    n = len(x)
    nx = 200

    xmin, xmax = np.min(x), np.max(x)

    dx = (xmax - xmin) / (nx - 1)
    std_x = entropy((x - xmin) / dx) * bw
    if ~np.isfinite(std_x):
        std_x = 0.
    grid, _ = np.histogram(x, bins=nx)

    scotts_factor = n ** (-0.2)
    kern_nx = int(scotts_factor * 2 * np.pi * std_x)
    kernel = gaussian(kern_nx, scotts_factor * std_x)

    npad = min(nx, 2 * kern_nx)
    grid = np.concatenate([grid[npad: 0: -1], grid, grid[nx: nx - npad: -1]])
    density = convolve(grid, kernel, mode='same')[npad: npad + nx]

    norm_factor = n * dx * (2 * np.pi * std_x ** 2 * scotts_factor ** 2) ** 0.5

    density = density / norm_factor

    return density, xmin, xmax 
Example 72
Project: torchbench   Author: paperswithcode   File: utils.py    Apache License 2.0 4 votes vote down vote up
def calculate_inception_score(
    sample_dataloader,
    test_dataloader,
    device="cuda",
    num_images=50000,
    splits=10,
):
    """Calculate the inception score for a model's samples.

    Args:
        sample_dataloader: Dataloader for the generated image samples from the
            model.
        test_dataloader: Dataloader for the real images from the dataset to
            compare to.
        device: to perform the evaluation (e.g. 'cuda' for GPU).
        num_images: number of images to evaluate.
        splits: nu,ber of splits to perform for the evaluation.

    Returns:
        dict: Dictionary with key being the metric name, and values being the
            metric scores.
    """
    inception_model = InceptionScore(device=device)
    inception_model.eval()

    preds = np.zeros((num_images, 1000))

    for i, batch in enumerate(sample_dataloader, 0):
        batch = batch.to(device=device)
        batchv = Variable(batch)
        batch_size_i = batch.size()[0]
        predictions = inception_model(batchv)
        start = i * test_dataloader.batch_size
        n_predictions = len(preds[start : start + batch_size_i])
        preds[start : start + batch_size_i] = predictions[:n_predictions]

    split_scores = []

    for k in range(splits):
        part = preds[
            k * (num_images // splits) : (k + 1) * (num_images // splits), :
        ]
        py = np.mean(part, axis=0)
        scores = []
        for i in range(part.shape[0]):
            pyx = part[i, :]
            scores.append(entropy(pyx, py))
        split_scores.append(np.exp(np.mean(scores)))

    return {"Inception Score": np.mean(split_scores)} 
Example 73
Project: openbadge-analysis   Author: HumanDynamics   File: audio.py    MIT License 4 votes vote down vote up
def get_kldistance(dt_nys, bandwidth=2, prox=0.001, step=0.1, num_samples=200, plot=False, figsize=(12,8)):
    """
    only for 4-user situations
    calculate kl-distance of two distributions (D_t and D_s)
    """
    klds, seps = [], []
    if plot is True:
        fig, axs = plt.subplots(2,2,figsize=figsize,) 
        plt.tight_layout(h_pad=4)
    for idx, user in enumerate(dt_nys):
        ns, ys = dt_nys[user]
        cond_nonezero = len(ns) == 0 or len(ys) ==0
        kden, pns, nss = get_kde_pdf(ns, step=step, num_samples=num_samples, bandwidth=bandwidth)
        kdey, pys, yss = get_kde_pdf(ys, step=step, num_samples=num_samples, bandwidth=bandwidth)
        
        kldistance = stats.entropy(pns, pys) if not cond_nonezero else np.nan
        if not np.isinf(kldistance) and not np.isnan(kldistance):
            klds.append(kldistance)

        pys[pys<=prox] = 0
        pns[pns<=prox] = 0

        sep = -1        
        if not cond_nonezero:
            for i in np.arange(int(100/step)-1, 0, -1):
                if pys[i-1] < pns[i-1]  and pys[i] >= pns[i]:
                    sep = i * step
                    break
        seps.append(sep)
        
        if plot is True:
            ax = axs.flatten()[idx]
            sns.distplot(nss, label='Silent',  kde=False, norm_hist=True, ax=ax)
            sns.distplot(yss, label='Talking', kde=False, norm_hist=True, ax=ax)
            ax.set_title('%s kl-dist:%.2f' % (user, kldistance) )    
            ax.set_xlabel('')
            if not cond_nonezero:
                ax.axvline(x=sep)
                ax.annotate('best sep val: %.1f' % sep, xy=(sep, 0.1), xytext=(sep+5, 0.1), 
                        arrowprops= dict(facecolor='black', shrink=0.0001))
            ax.legend()
            
    seps = np.array(seps)
    seps[seps == -1] = seps[seps != -1].mean()
    return klds, seps 
Example 74
Project: vnpy_crypto   Author: birforce   File: infotheo.py    MIT License 4 votes vote down vote up
def renyientropy(px,alpha=1,logbase=2,measure='R'):
    """
    Renyi's generalized entropy

    Parameters
    ----------
    px : array-like
        Discrete probability distribution of random variable X.  Note that
        px is assumed to be a proper probability distribution.
    logbase : int or np.e, optional
        Default is 2 (bits)
    alpha : float or inf
        The order of the entropy.  The default is 1, which in the limit
        is just Shannon's entropy.  2 is Renyi (Collision) entropy.  If
        the string "inf" or numpy.inf is specified the min-entropy is returned.
    measure : str, optional
        The type of entropy measure desired.  'R' returns Renyi entropy
        measure.  'T' returns the Tsallis entropy measure.

    Returns
    -------
    1/(1-alpha)*log(sum(px**alpha))

    In the limit as alpha -> 1, Shannon's entropy is returned.

    In the limit as alpha -> inf, min-entropy is returned.
    """
#TODO:finish returns
#TODO:add checks for measure
    if not _isproperdist(px):
        raise ValueError("px is not a proper probability distribution")
    alpha = float(alpha)
    if alpha == 1:
        genent = shannonentropy(px)
        if logbase != 2:
            return logbasechange(2, logbase) * genent
        return genent
    elif 'inf' in string(alpha).lower() or alpha == np.inf:
        return -np.log(np.max(px))

    # gets here if alpha != (1 or inf)
    px = px**alpha
    genent = np.log(px.sum())
    if logbase == 2:
        return 1/(1-alpha) * genent
    else:
        return 1/(1-alpha) * logbasechange(2, logbase) * genent

#TODO: before completing this, need to rethink the organization of
# (relative) entropy measures, ie., all put into one function
# and have kwdargs, etc.? 
Example 75
Project: ijcai2019-relis   Author: UKPLab   File: evaluator.py    MIT License 4 votes vote down vote up
def evaluateReward(learnt_values, ref_values,short=False):
    metrics_dic = OrderedDict()

    metrics_dic['mse'] = mean_squared_error(ref_values,learnt_values)

    if not short:
        ### compute the absolute errors
        mse = mean_squared_error(ref_values,learnt_values)
        metrics_dic['mse'] = mse

        ### compute KL divergence
        #js = jsd(learnt_values,ref_values)
        #metrics_dic['jsd-original'] = js
        prob_optimal = getSoftmaxList(ref_values, 1.0)
        prob_learnt = getSoftmaxList(learnt_values, 1.0)
        js = jsd(prob_optimal,prob_learnt)
        metrics_dic['jsd-softmax'] = js
        #kld = stats.entropy(prob_optimal, prob_learnt)
        #metrics_dic['kld'] = kld

    ### compute Kendall's tau, Spearman's rho and Pearson correlation coefficient
    sorted_list = sorted(learnt_values)
    new_reward_ranking = [sorted_list.index(i) for i in learnt_values]
    sorted_list = sorted(ref_values)
    true_reward_ranking = [sorted_list.index(i) for i in ref_values]
    tau, _ = stats.kendalltau(new_reward_ranking, true_reward_ranking)
    rho, _ = stats.pearsonr(new_reward_ranking, true_reward_ranking)
    pcc, _ = stats.pearsonr(learnt_values, ref_values)
    metrics_dic['tau'] = tau
    metrics_dic['rho'] = rho
    metrics_dic['pcc'] = pcc

    ### compute nDCG
    sorted_list = sorted(learnt_values,reverse=True)
    ll = [ref_values[learnt_values.index(ele)] for ele in sorted_list]

    ndcg = ndcg_at_k(ll,int(0.01*len(ll)))
    metrics_dic['ndcg_at_1%'] = ndcg
    ndcg = ndcg_at_k(ll,int(0.05*len(ll)))
    metrics_dic['ndcg_at_5%'] = ndcg
    ndcg = ndcg_at_k(ll,int(0.1*len(ll)))
    metrics_dic['ndcg_at_10%'] = ndcg
    ndcg = ndcg_at_k(ll,int(0.2*len(ll)))
    metrics_dic['ndcg_at_20%'] = ndcg
    ndcg = ndcg_at_k(ll,int(0.5*len(ll)))
    metrics_dic['ndcg_at_50%'] = ndcg
    ndcg = ndcg_at_k(ll,len(ll))
    metrics_dic['ndcg_at_all'] = ndcg

    return metrics_dic 
Example 76
Project: ble5-nrf52-mac   Author: tomasero   File: test_distributions.py    MIT License 4 votes vote down vote up
def test_norm(self):
        dist = stats.norm
        frozen = stats.norm(loc=10.0, scale=3.0)

        result_f = frozen.pdf(20.0)
        result = dist.pdf(20.0, loc=10.0, scale=3.0)
        assert_equal(result_f, result)

        result_f = frozen.cdf(20.0)
        result = dist.cdf(20.0, loc=10.0, scale=3.0)
        assert_equal(result_f, result)

        result_f = frozen.ppf(0.25)
        result = dist.ppf(0.25, loc=10.0, scale=3.0)
        assert_equal(result_f, result)

        result_f = frozen.isf(0.25)
        result = dist.isf(0.25, loc=10.0, scale=3.0)
        assert_equal(result_f, result)

        result_f = frozen.sf(10.0)
        result = dist.sf(10.0, loc=10.0, scale=3.0)
        assert_equal(result_f, result)

        result_f = frozen.median()
        result = dist.median(loc=10.0, scale=3.0)
        assert_equal(result_f, result)

        result_f = frozen.mean()
        result = dist.mean(loc=10.0, scale=3.0)
        assert_equal(result_f, result)

        result_f = frozen.var()
        result = dist.var(loc=10.0, scale=3.0)
        assert_equal(result_f, result)

        result_f = frozen.std()
        result = dist.std(loc=10.0, scale=3.0)
        assert_equal(result_f, result)

        result_f = frozen.entropy()
        result = dist.entropy(loc=10.0, scale=3.0)
        assert_equal(result_f, result)

        result_f = frozen.moment(2)
        result = dist.moment(2, loc=10.0, scale=3.0)
        assert_equal(result_f, result)

        assert_equal(frozen.a, dist.a)
        assert_equal(frozen.b, dist.b) 
Example 77
Project: ble5-nrf52-mac   Author: tomasero   File: test_distributions.py    MIT License 4 votes vote down vote up
def test_gamma(self):
        a = 2.0
        dist = stats.gamma
        frozen = stats.gamma(a)

        result_f = frozen.pdf(20.0)
        result = dist.pdf(20.0, a)
        assert_equal(result_f, result)

        result_f = frozen.cdf(20.0)
        result = dist.cdf(20.0, a)
        assert_equal(result_f, result)

        result_f = frozen.ppf(0.25)
        result = dist.ppf(0.25, a)
        assert_equal(result_f, result)

        result_f = frozen.isf(0.25)
        result = dist.isf(0.25, a)
        assert_equal(result_f, result)

        result_f = frozen.sf(10.0)
        result = dist.sf(10.0, a)
        assert_equal(result_f, result)

        result_f = frozen.median()
        result = dist.median(a)
        assert_equal(result_f, result)

        result_f = frozen.mean()
        result = dist.mean(a)
        assert_equal(result_f, result)

        result_f = frozen.var()
        result = dist.var(a)
        assert_equal(result_f, result)

        result_f = frozen.std()
        result = dist.std(a)
        assert_equal(result_f, result)

        result_f = frozen.entropy()
        result = dist.entropy(a)
        assert_equal(result_f, result)

        result_f = frozen.moment(2)
        result = dist.moment(2, a)
        assert_equal(result_f, result)

        assert_equal(frozen.a, frozen.dist.a)
        assert_equal(frozen.b, frozen.dist.b) 
Example 78
Project: lit-code   Author: stanford-futuredata   File: inception_score.py    Apache License 2.0 4 votes vote down vote up
def inception_score(imgs, cuda=True, batch_size=32, resize=True, splits=10):
    """Computes the inception score of the generated images imgs
    imgs -- Torch dataset of (3xHxW) numpy images normalized in the range [-1, 1]
    cuda -- whether or not to run on GPU
    batch_size -- batch size for feeding into Inception v3
    splits -- number of splits
    """
    N = len(imgs)

    assert batch_size > 0
    assert N > batch_size

    # Set up dtype
    if cuda:
        dtype = torch.cuda.FloatTensor
    else:
        if torch.cuda.is_available():
            print("WARNING: You have a CUDA device, so you should probably set cuda=True")
        dtype = torch.FloatTensor

    # Set up dataloader
    dataloader = torch.utils.data.DataLoader(imgs, batch_size=batch_size)

    # Load inception model
    inception_model = inception_v3(pretrained=True, transform_input=False).cuda()
    inception_model.eval();
    up = nn.Upsample(size=(299, 299), mode='bilinear').cuda()
    def get_pred(x):
        if resize:
            x = up(x)
        x = inception_model(x)
        return F.softmax(x).data.cpu().numpy()

    # Get predictions
    preds = np.zeros((N, 1000))

    for i, batch in enumerate(dataloader, 0):
        batch = torch.cat(batch)
        batch = batch.cuda()
        batchv = Variable(batch)
        batch_size_i = batch.size()[0]

        preds[i*batch_size:i*batch_size + batch_size_i] = get_pred(batchv)

    # Now compute the mean kl-div
    split_scores = []

    for k in range(splits):
        part = preds[k * (N // splits): (k+1) * (N // splits), :]
        py = np.mean(part, axis=0)
        scores = []
        for i in range(part.shape[0]):
            pyx = part[i, :]
            scores.append(entropy(pyx, py))
        split_scores.append(np.exp(np.mean(scores)))

    return np.mean(split_scores), np.std(split_scores) 
Example 79
Project: Computable   Author: ktraunmueller   File: test_distributions.py    MIT License 4 votes vote down vote up
def test_norm(self):
        dist = stats.norm
        frozen = stats.norm(loc=10.0, scale=3.0)

        result_f = frozen.pdf(20.0)
        result = dist.pdf(20.0, loc=10.0, scale=3.0)
        assert_equal(result_f, result)

        result_f = frozen.cdf(20.0)
        result = dist.cdf(20.0, loc=10.0, scale=3.0)
        assert_equal(result_f, result)

        result_f = frozen.ppf(0.25)
        result = dist.ppf(0.25, loc=10.0, scale=3.0)
        assert_equal(result_f, result)

        result_f = frozen.isf(0.25)
        result = dist.isf(0.25, loc=10.0, scale=3.0)
        assert_equal(result_f, result)

        result_f = frozen.sf(10.0)
        result = dist.sf(10.0, loc=10.0, scale=3.0)
        assert_equal(result_f, result)

        result_f = frozen.median()
        result = dist.median(loc=10.0, scale=3.0)
        assert_equal(result_f, result)

        result_f = frozen.mean()
        result = dist.mean(loc=10.0, scale=3.0)
        assert_equal(result_f, result)

        result_f = frozen.var()
        result = dist.var(loc=10.0, scale=3.0)
        assert_equal(result_f, result)

        result_f = frozen.std()
        result = dist.std(loc=10.0, scale=3.0)
        assert_equal(result_f, result)

        result_f = frozen.entropy()
        result = dist.entropy(loc=10.0, scale=3.0)
        assert_equal(result_f, result)

        result_f = frozen.moment(2)
        result = dist.moment(2,loc=10.0, scale=3.0)
        assert_equal(result_f, result) 
Example 80
Project: Computable   Author: ktraunmueller   File: test_distributions.py    MIT License 4 votes vote down vote up
def test_gamma(self):
        a = 2.0
        dist = stats.gamma
        frozen = stats.gamma(a)

        result_f = frozen.pdf(20.0)
        result = dist.pdf(20.0, a)
        assert_equal(result_f, result)

        result_f = frozen.cdf(20.0)
        result = dist.cdf(20.0, a)
        assert_equal(result_f, result)

        result_f = frozen.ppf(0.25)
        result = dist.ppf(0.25, a)
        assert_equal(result_f, result)

        result_f = frozen.isf(0.25)
        result = dist.isf(0.25, a)
        assert_equal(result_f, result)

        result_f = frozen.sf(10.0)
        result = dist.sf(10.0, a)
        assert_equal(result_f, result)

        result_f = frozen.median()
        result = dist.median(a)
        assert_equal(result_f, result)

        result_f = frozen.mean()
        result = dist.mean(a)
        assert_equal(result_f, result)

        result_f = frozen.var()
        result = dist.var(a)
        assert_equal(result_f, result)

        result_f = frozen.std()
        result = dist.std(a)
        assert_equal(result_f, result)

        result_f = frozen.entropy()
        result = dist.entropy(a)
        assert_equal(result_f, result)

        result_f = frozen.moment(2)
        result = dist.moment(2, a)
        assert_equal(result_f, result)