Python scipy.stats.entropy() Examples

The following are 30 code examples of scipy.stats.entropy(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module scipy.stats , or try the search function .
Example #1
Source File: extraction.py    From git2net with GNU Affero General Public License v3.0 7 votes vote down vote up
def text_entropy(text):
    """ Computes entropy for a given text based on UTF8 alphabet.

    Args:
        text: string to compute the text entropy for

    Returns:
        text_entropy: text entropy of the given string
    """
    # we only consider UTF8 characters to compute the text entropy
    pk = [text.count(chr(i)) for i in range(256)]
    if sum(pk) == 0:
        text_entropy = None
    else:
        text_entropy = entropy(pk, base=2)
    return text_entropy 
Example #2
Source File: evaluation_metrics.py    From PointFlow with MIT License 7 votes vote down vote up
def jensen_shannon_divergence(P, Q):
    if np.any(P < 0) or np.any(Q < 0):
        raise ValueError('Negative values.')
    if len(P) != len(Q):
        raise ValueError('Non equal size.')

    P_ = P / np.sum(P)  # Ensure probabilities.
    Q_ = Q / np.sum(Q)

    e1 = entropy(P_, base=2)
    e2 = entropy(Q_, base=2)
    e_sum = entropy((P_ + Q_) / 2.0, base=2)
    res = e_sum - ((e1 + e2) / 2.0)

    res2 = _jsdiv(P_, Q_)

    if not np.allclose(res, res2, atol=10e-5, rtol=0):
        warnings.warn('Numerical values of two JSD methods don\'t agree.')

    return res 
Example #3
Source File: actions.py    From reinvent-randomized with MIT License 6 votes vote down vote up
def _nll_stats(self, sampled_nlls, validation_nlls, training_nlls):
        self._add_histogram("nll_plot/sampled", sampled_nlls)
        self._add_histogram("nll_plot/validation", validation_nlls)
        self._add_histogram("nll_plot/training", training_nlls)

        self._add_scalars("nll/avg", {
            "sampled": sampled_nlls.mean(),
            "validation": validation_nlls.mean(),
            "training": training_nlls.mean()
        })

        self._add_scalars("nll/var", {
            "sampled": sampled_nlls.var(),
            "validation": validation_nlls.var(),
            "training": training_nlls.var()
        })

        def jsd(dists):
            min_size = min(len(dist) for dist in dists)
            dists = [dist[:min_size] for dist in dists]
            num_dists = len(dists)
            avg_dist = np.sum(dists, axis=0) / num_dists
            return np.sum([sps.entropy(dist, avg_dist) for dist in dists]) / num_dists

        self._add_scalar("nll_plot/jsd_joined", jsd([sampled_nlls, training_nlls, validation_nlls])) 
Example #4
Source File: entropy.py    From netrd with MIT License 6 votes vote down vote up
def js_divergence(P, Q):
    """Jensen-Shannon divergence between `P` and `Q`.

    Parameters
    ----------

    P, Q (np.ndarray)
        Two discrete distributions represented as 1D arrays. They are
        assumed to have the same support

    Returns
    -------

    float
        The Jensen-Shannon divergence between `P` and `Q`.

    """
    M = 0.5 * (P + Q)
    return 0.5 * (sp_entropy(P, M, base=2) + sp_entropy(Q, M, base=2)) 
Example #5
Source File: _paga.py    From scanpy with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def paga_expression_entropies(adata) -> List[float]:
    """Compute the median expression entropy for each node-group.

    Parameters
    ----------
    adata : AnnData
        Annotated data matrix.

    Returns
    -------
    Entropies of median expressions for each node.
    """
    from scipy.stats import entropy
    groups_order, groups_masks = _utils.select_groups(
        adata, key=adata.uns['paga']['groups']
    )
    entropies = []
    for mask in groups_masks:
        X_mask = adata.X[mask].todense()
        x_median = np.nanmedian(X_mask, axis=1,overwrite_input=True)
        x_probs = (x_median - np.nanmin(x_median)) / (np.nanmax(x_median) - np.nanmin(x_median))
        entropies.append(entropy(x_probs))
    return entropies 
Example #6
Source File: test_distributions.py    From GraphicDesignPatternByPython with MIT License 6 votes vote down vote up
def test_genextreme_entropy():
    # regression test for gh-5181
    euler_gamma = 0.5772156649015329

    h = stats.genextreme.entropy(-1.0)
    assert_allclose(h, 2*euler_gamma + 1, rtol=1e-14)

    h = stats.genextreme.entropy(0)
    assert_allclose(h, euler_gamma + 1, rtol=1e-14)

    h = stats.genextreme.entropy(1.0)
    assert_equal(h, 1)

    h = stats.genextreme.entropy(-2.0, scale=10)
    assert_allclose(h, euler_gamma*3 + np.log(10) + 1, rtol=1e-14)

    h = stats.genextreme.entropy(10)
    assert_allclose(h, -9*euler_gamma + 1, rtol=1e-14)

    h = stats.genextreme.entropy(-10)
    assert_allclose(h, 11*euler_gamma + 1, rtol=1e-14) 
Example #7
Source File: selection.py    From StageDP with MIT License 6 votes vote down vote up
def select(self, features, freq_table):
        """ Select features via some criteria

        :type features: dict
        :param features: features vocab

        :type freq_table: 2-D numpy.array
        :param freq_table: frequency table with rows as features,
                          columns as frequency values
        """
        if self.method == 'frequency':
            feat_vals = self.frequency(features, freq_table)
        elif self.method == 'entropy':
            feat_vals = self.entropy(features, freq_table)
        elif self.method == 'freq-entropy':
            feat_vals = self.freq_entropy(features, freq_table)
        else:
            raise KeyError("Unrecognized method")
        new_features = self.rank(feat_vals)
        return new_features 
Example #8
Source File: qgan.py    From qiskit-aqua with Apache License 2.0 6 votes vote down vote up
def _run(self):
        """
        Run qGAN training

        Returns:
            dict: with generator(discriminator) parameters & loss, relative entropy
        Raises:
            AquaError: invalid backend
        """
        if self._quantum_instance.backend_name == ('unitary_simulator' or 'clifford_simulator'):
            raise AquaError(
                'Chosen backend not supported - '
                'Set backend either to statevector_simulator, qasm_simulator'
                ' or actual quantum hardware')
        self.train()

        return self._ret 
Example #9
Source File: disagreement.py    From modAL with MIT License 6 votes vote down vote up
def consensus_entropy(committee: BaseCommittee, X: modALinput, **predict_proba_kwargs) -> np.ndarray:
    """
    Calculates the consensus entropy for the Committee. First it computes the class probabilties of X for each learner
    in the Committee, then calculates the consensus probability distribution by averaging the individual class
    probabilities for each learner. The entropy of the consensus probability distribution is the vote entropy of the
    Committee, which is returned.

    Args:
        committee: The :class:`modAL.models.BaseCommittee` instance for which the consensus entropy is to be calculated.
        X: The data for which the consensus entropy is to be calculated.
        **predict_proba_kwargs: Keyword arguments for the :meth:`predict_proba` of the Committee.

    Returns:
        Consensus entropy of the Committee for the samples in X.
    """
    try:
        proba = committee.predict_proba(X, **predict_proba_kwargs)
    except NotFittedError:
        return np.zeros(shape=(X.shape[0],))

    entr = np.transpose(entropy(np.transpose(proba)))
    return entr 
Example #10
Source File: uncertainty.py    From modAL with MIT License 6 votes vote down vote up
def classifier_entropy(classifier: BaseEstimator, X: modALinput, **predict_proba_kwargs) -> np.ndarray:
    """
    Entropy of predictions of the for the provided samples.

    Args:
        classifier: The classifier for which the prediction entropy is to be measured.
        X: The samples for which the prediction entropy is to be measured.
        **predict_proba_kwargs: Keyword arguments to be passed for the :meth:`predict_proba` of the classifier.

    Returns:
        Entropy of the class probabilities.
    """
    try:
        classwise_uncertainty = classifier.predict_proba(X, **predict_proba_kwargs)
    except NotFittedError:
        return np.zeros(shape=(X.shape[0], ))

    return np.transpose(entropy(np.transpose(classwise_uncertainty))) 
Example #11
Source File: core_tests.py    From modAL with MIT License 6 votes vote down vote up
def test_vote_entropy(self):
        for n_samples in range(1, 10):
            for n_classes in range(1, 10):
                for true_query_idx in range(n_samples):
                    # 1. fitted committee
                    vote_return = np.zeros(shape=(n_samples, n_classes), dtype=np.int16)
                    vote_return[true_query_idx] = np.asarray(range(n_classes), dtype=np.int16)
                    committee = mock.MockCommittee(classes_=np.asarray(range(n_classes)), vote_return=vote_return)
                    vote_entr = modAL.disagreement.vote_entropy(
                        committee, np.random.rand(n_samples, n_classes)
                    )
                    true_entropy = np.zeros(shape=(n_samples, ))
                    true_entropy[true_query_idx] = entropy(np.ones(n_classes)/n_classes)
                    np.testing.assert_array_almost_equal(vote_entr, true_entropy)

                    # 2. unfitted committee
                    committee = mock.MockCommittee(fitted=False)
                    true_entropy = np.zeros(shape=(n_samples,))
                    vote_entr = modAL.disagreement.vote_entropy(
                        committee, np.random.rand(n_samples, n_classes)
                    )
                    np.testing.assert_almost_equal(vote_entr, true_entropy) 
Example #12
Source File: core_tests.py    From modAL with MIT License 6 votes vote down vote up
def test_consensus_entropy(self):
        for n_samples in range(1, 10):
            for n_classes in range(2, 10):
                for true_query_idx in range(n_samples):
                    # 1. fitted committee
                    proba = np.zeros(shape=(n_samples, n_classes))
                    proba[:, 0] = 1.0
                    proba[true_query_idx] = np.ones(n_classes)/n_classes
                    committee = mock.MockCommittee(predict_proba_return=proba)
                    consensus_entropy = modAL.disagreement.consensus_entropy(
                        committee, np.random.rand(n_samples, n_classes)
                    )
                    true_entropy = np.zeros(shape=(n_samples,))
                    true_entropy[true_query_idx] = entropy(np.ones(n_classes) / n_classes)
                    np.testing.assert_array_almost_equal(consensus_entropy, true_entropy)

                    # 2. unfitted committee
                    committee = mock.MockCommittee(fitted=False)
                    true_entropy = np.zeros(shape=(n_samples,))
                    consensus_entropy = modAL.disagreement.consensus_entropy(
                        committee, np.random.rand(n_samples, n_classes)
                    )
                    np.testing.assert_almost_equal(consensus_entropy, true_entropy) 
Example #13
Source File: evaluation_metrics.py    From PointFlow with MIT License 5 votes vote down vote up
def entropy_of_occupancy_grid(pclouds, grid_resolution, in_sphere=False, verbose=False):
    """Given a collection of point-clouds, estimate the entropy of the random variables
    corresponding to occupancy-grid activation patterns.
    Inputs:
        pclouds: (numpy array) #point-clouds x points per point-cloud x 3
        grid_resolution (int) size of occupancy grid that will be used.
    """
    epsilon = 10e-4
    bound = 0.5 + epsilon
    if abs(np.max(pclouds)) > bound or abs(np.min(pclouds)) > bound:
        if verbose:
            warnings.warn('Point-clouds are not in unit cube.')

    if in_sphere and np.max(np.sqrt(np.sum(pclouds ** 2, axis=2))) > bound:
        if verbose:
            warnings.warn('Point-clouds are not in unit sphere.')

    grid_coordinates, _ = unit_cube_grid_point_cloud(grid_resolution, in_sphere)
    grid_coordinates = grid_coordinates.reshape(-1, 3)
    grid_counters = np.zeros(len(grid_coordinates))
    grid_bernoulli_rvars = np.zeros(len(grid_coordinates))
    nn = NearestNeighbors(n_neighbors=1).fit(grid_coordinates)

    for pc in pclouds:
        _, indices = nn.kneighbors(pc)
        indices = np.squeeze(indices)
        for i in indices:
            grid_counters[i] += 1
        indices = np.unique(indices)
        for i in indices:
            grid_bernoulli_rvars[i] += 1

    acc_entropy = 0.0
    n = float(len(pclouds))
    for g in grid_bernoulli_rvars:
        if g > 0:
            p = float(g) / n
            acc_entropy += entropy([p, 1.0 - p])

    return acc_entropy / len(grid_counters), grid_counters 
Example #14
Source File: entropy.py    From gwin with GNU General Public License v3.0 5 votes vote down vote up
def kl(samples1, samples2, pdf1=False, pdf2=False,
       bins=30, hist_min=None, hist_max=None):
    """ Computes the Kullback-Leibler divergence for a single parameter
    from two distributions.

    Parameters
    ----------
    samples1 : numpy.array
        Samples or probability density function (must also set `pdf1=True`).
    samples2 : numpy.array
        Samples or probability density function (must also set `pdf2=True`).
    pdf1 : bool
        Set to `True` if `samples1` is a probability density funtion already.
    pdf2 : bool
        Set to `True` if `samples2` is a probability density funtion already.
    bins : int
        Number of bins to use when calculating probability density function
        from a set of samples of the distribution.
    hist_min : numpy.float64
        Minimum of the distributions' values to use.
    hist_max : numpy.float64
        Maximum of the distributions' values to use.

    Returns
    -------
    numpy.float64
        The Kullback-Leibler divergence value.
    """
    hist_range = (hist_min, hist_max)
    if not pdf1:
        samples1, _ = numpy.histogram(samples1, bins=bins,
                                      range=hist_range, normed=True)
    if not pdf2:
        samples2, _ = numpy.histogram(samples2, bins=bins,
                                      range=hist_range, normed=True)
    return stats.entropy(samples1, qk=samples2) 
Example #15
Source File: feature_squeezing.py    From EvadeML-Zoo with MIT License 5 votes vote down vote up
def kl(x1, x2):
    assert x1.shape == x2.shape
    # x1_2d, x2_2d = reshape_2d(x1), reshape_2d(x2)

    # Transpose to [?, #num_examples]
    x1_2d_t = x1.transpose()
    x2_2d_t = x2.transpose()

    # pdb.set_trace()
    e = entropy(x1_2d_t, x2_2d_t)
    e[np.where(e==np.inf)] = 2
    return e 
Example #16
Source File: toy_world_state.py    From mcts with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def reward(self, parent, action):
        if (self.pos == self.world.goal).all():
            print("g", end="")
            return 100
        else:
            reward = -1
            if self.world.information_gain:
                for a in self.actions:
                    reward += entropy(parent.belief[a], self.belief[a])
            return reward 
Example #17
Source File: TestCode.py    From aktaion with Apache License 2.0 5 votes vote down vote up
def H(data, iterator=range_bytes):
    if not data:
        return 0
    entropy = 0
    for x in iterator():
        p_x = float(data.count(chr(x)))/len(data)
        if p_x > 0:
            entropy += - p_x*math.log(p_x, 2)
    return entropy

#def main ():
#    for row in fileinput.input():
#        string = row.rstrip('\n')
#        print ("%s: %f" % (string, H(string, range_printable))) 
Example #18
Source File: posterior_utils.py    From scVI with MIT License 5 votes vote down vote up
def entropy_from_indices(indices):
    return entropy(np.array(np.unique(indices, return_counts=True)[1].astype(np.int32))) 
Example #19
Source File: qgan.py    From qiskit-aqua with Apache License 2.0 5 votes vote down vote up
def get_rel_entr(self) -> float:
        """ Get relative entropy between target and trained distribution """
        samples_gen, prob_gen = self._generator.get_output(self._quantum_instance)
        temp = np.zeros(len(self._grid_elements))
        for j, sample in enumerate(samples_gen):
            for i, element in enumerate(self._grid_elements):
                if sample == element:
                    temp[i] += prob_gen[j]
        prob_gen = temp
        prob_gen = [1e-8 if x == 0 else x for x in prob_gen]
        rel_entr = entropy(prob_gen, self._prob_data)
        return rel_entr 
Example #20
Source File: qgan.py    From qiskit-aqua with Apache License 2.0 5 votes vote down vote up
def rel_entr(self) -> List[float]:
        """ Returns relative entropy between target and trained distribution """
        return self._rel_entr 
Example #21
Source File: qgan.py    From qiskit-aqua with Apache License 2.0 5 votes vote down vote up
def tol_rel_ent(self, t):
        """
        Set tolerance for relative entropy

        Args:
            t (float): or None, Set tolerance level for relative entropy.
                If the training achieves relative
                entropy equal or lower than tolerance it finishes.
        """
        self._tol_rel_ent = t 
Example #22
Source File: qgan.py    From qiskit-aqua with Apache License 2.0 5 votes vote down vote up
def tol_rel_ent(self):
        """ Returns tolerance for relative entropy """
        return self._tol_rel_ent 
Example #23
Source File: selection.py    From StageDP with MIT License 5 votes vote down vote up
def test():
    vocab = {'hello': 0, 'data': 1, 'computer': 2}
    freq_table = [[23, 23, 23, 23], [23, 1, 4, 5], [1, 34, 1, 1]]
    freq_table = numpy.array(freq_table)
    fs = FeatureSelector(topn=2, method='freq-entropy')
    newvocab = fs.select(vocab, freq_table)
    print(newvocab) 
Example #24
Source File: selection.py    From StageDP with MIT License 5 votes vote down vote up
def freq_entropy(self, features, freq_table):
        """
        """
        feat_vals = {}
        feat_freqs = self.frequency(features, freq_table)
        feat_ents = self.entropy(features, freq_table)
        for feat in features.keys():
            freq = feat_freqs[feat]
            ent = feat_ents[feat]
            feat_vals[feat] = numpy.log(freq + 1e-3) * (ent + 1e-3)
        return feat_vals 
Example #25
Source File: selection.py    From StageDP with MIT License 5 votes vote down vote up
def entropy(self, features, freq_table):
        """
        """
        feat_vals = {}
        for (feat, idx) in features.items():
            freq = freq_table[idx, :]
            feat_vals[feat] = 1 / (entropy(freq) + 1e-3)
        return feat_vals 
Example #26
Source File: posterior_utils.py    From scVI with MIT License 5 votes vote down vote up
def entropy_batch_mixing(
    latent_space, batches, n_neighbors=50, n_pools=50, n_samples_per_pool=100
):
    def entropy(hist_data):
        n_batches = len(np.unique(hist_data))
        if n_batches > 2:
            raise ValueError("Should be only two clusters for this metric")
        frequency = np.mean(hist_data == 1)
        if frequency == 0 or frequency == 1:
            return 0
        return -frequency * np.log(frequency) - (1 - frequency) * np.log(1 - frequency)

    n_neighbors = min(n_neighbors, len(latent_space) - 1)
    nne = NearestNeighbors(n_neighbors=1 + n_neighbors, n_jobs=8)
    nne.fit(latent_space)
    kmatrix = nne.kneighbors_graph(latent_space) - scipy.sparse.identity(
        latent_space.shape[0]
    )

    score = 0
    for t in range(n_pools):
        indices = np.random.choice(
            np.arange(latent_space.shape[0]), size=n_samples_per_pool
        )
        score += np.mean(
            [
                entropy(
                    batches[
                        kmatrix[indices].nonzero()[1][
                            kmatrix[indices].nonzero()[0] == i
                        ]
                    ]
                )
                for i in range(n_samples_per_pool)
            ]
        )
    return score / float(n_pools) 
Example #27
Source File: test_distributions.py    From GraphicDesignPatternByPython with MIT License 5 votes vote down vote up
def test_entropy(self):
        assert_allclose(self.norm_template.entropy(),
                        stats.norm.entropy(loc=1.0, scale=2.5), rtol=0.05) 
Example #28
Source File: infotheo.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def bitstonats(X):
    """
    Converts from bits to nats
    """
    return logbasechange(2, np.e) * X

#TODO: make this entropy, and then have different measures as
#a method 
Example #29
Source File: test_distributions.py    From GraphicDesignPatternByPython with MIT License 5 votes vote down vote up
def test_entropy_2d_zero(self):
        pk = [[0.1, 0.2], [0.6, 0.3], [0.3, 0.5]]
        qk = [[0.0, 0.1], [0.3, 0.6], [0.5, 0.3]]
        assert_array_almost_equal(stats.entropy(pk, qk),
                                  [np.inf, 0.18609809])

        pk[0][0] = 0.0
        assert_array_almost_equal(stats.entropy(pk, qk),
                                  [0.17403988, 0.18609809]) 
Example #30
Source File: ir2tagsets.py    From plastering with MIT License 5 votes vote down vote up
def ir2tagset_al_query_entropy(self,
                                   target_prob_mat,
                                   #target_prob,
                                   target_srcids,
                                   learning_srcids,
                                   target_building,
                                   inc_num
                                   ):
        assert len(target_srcids) == target_prob_mat.shape[0]
        entropies = get_entropy(target_prob_mat.T)
        sorted_entropies = sorted([(srcid, ent) for srcid, ent
                                   in zip(target_srcids, entropies)],
                                  key=itemgetter(1))
        cluster_dict = self.building_cluster_dict[target_building]
        added_cids = []
        todo_srcids = []
        new_srcid_cnt = 0
        for srcid, ent in sorted_entropies:
            if srcid in learning_srcids:
                continue
            the_cid = None
            for cid, cluster in cluster_dict.items():
                if srcid in cluster:
                    the_cid = cid
                    break
            if the_cid in added_cids:
                continue
            added_cids.append(the_cid)
            todo_srcids.append(srcid)
            new_srcid_cnt += 1
            if new_srcid_cnt == inc_num:
                break
        return todo_srcids