Python scipy.stats.wasserstein_distance() Examples

The following are 21 code examples of scipy.stats.wasserstein_distance(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module scipy.stats , or try the search function .
Example #1
Source File: _dp_verification.py    From whitenoise-system with MIT License 6 votes vote down vote up
def aggtest(self, f, colname, numbins=0, binsize="auto", debug=False, plot=True, bound=True, exact=False):
        """
        Verification of SQL aggregation mechanisms
        Returns statistical distance measures between repeated analysis 
        responses on neighboring datasets
        """
        d1, d2, d1_metadata, d2_metadata = self.generate_neighbors()
        fD1, fD2 = self.apply_aggregation_neighbors(f, (d1, colname), (d2, colname))
        d1size, d2size = fD1.size, fD2.size
        ks_res = self.ks_test(fD1, fD2)
        d1hist, d2hist, bin_edges = \
            self.generate_histogram_neighbors(fD1, fD2, numbins, binsize, exact=exact)
        dp_res, d1histupperbound, d2histupperbound, d1lower, d2lower = self.dp_test(d1hist, d2hist, bin_edges, d1size, d2size, debug, exact=exact)
        ws_res = 0.0
        if(exact):
            return False, 0.0, 0.0
        else:
            ws_res = self.wasserstein_distance(d1hist, d2hist)

        if(plot):
            self.plot_histogram_neighbors(fD1, fD2, d1histupperbound, d2histupperbound, d1hist, d2hist, d1lower, d2lower, bin_edges, bound, exact)
        return dp_res, ks_res, ws_res 
Example #2
Source File: test_stats.py    From GraphicDesignPatternByPython with MIT License 5 votes vote down vote up
def test_collapse(self):
        # Collapsing a distribution to a point distribution at zero is
        # equivalent to taking the average of the absolute values of the values.
        u = np.arange(-10, 30, 0.3)
        v = np.zeros_like(u)
        assert_almost_equal(
            stats.wasserstein_distance(u, v),
            np.mean(np.abs(u)))

        u_weights = np.arange(len(u))
        v_weights = u_weights[::-1]
        assert_almost_equal(
            stats.wasserstein_distance(u, v, u_weights, v_weights),
            np.average(np.abs(u), weights=u_weights)) 
Example #3
Source File: _dp_verification.py    From whitenoise-system with MIT License 5 votes vote down vote up
def wasserstein_distance(self, d1hist, d2hist):
        """
        Wasserstein Distance between histograms of repeated analysis on neighboring datasets
        """
        return stats.wasserstein_distance(d1hist, d2hist) 
Example #4
Source File: metrics.py    From moses with MIT License 5 votes vote down vote up
def metric(self, pref, pgen):
        return wasserstein_distance(
            pref['values'], pgen['values']
        ) 
Example #5
Source File: stats.py    From scprep with GNU General Public License v3.0 5 votes vote down vote up
def EMD(x, y):
    """Earth Mover's Distance between samples

    Calculates an approximation of Earth Mover's Distance (also called
    Wasserstein distance) for 2 variables. This can be thought of as the
    distance between two probability distributions. This metric is useful for
    identifying differentially expressed genes between two groups of cells. For
    more information see https://en.wikipedia.org/wiki/Wasserstein_metric.

    Parameters
    ----------
    x : array-like, shape=[n_samples]
        Input data (feature 1)
    y : array-like, shape=[n_samples]
        Input data (feature 2)

    Returns
    -------
    emd : float
        Earth Mover's Distance between x and y.

    Examples
    --------
    >>> import scprep
    >>> data = scprep.io.load_csv("my_data.csv")
    >>> emd = scprep.stats.EMD(data['GENE1'], data['GENE2'])
    """
    x, y = _vector_coerce_two_dense(x, y)
    return stats.wasserstein_distance(x, y) 
Example #6
Source File: statistics.py    From generative-graph-transformer with MIT License 5 votes vote down vote up
def compute_statistics_MLP(y_A, y_nodes, output_A, output_nodes, y_seq_len, output_seq_len):
    r"""
    Compute statistics for the current data point, based on the one-shot output from the MLP decoder.

    :param output_adj: predicted A
    :param output_coord: predicted X
    :param output_seq_len: predicted |V|
    :param y_adj: target A
    :param y_coord: target X
    :param y_seq_len: target |V|
    :param lamb: lambda parameter for the loss in this experiment
    :return: streetmover, acc_A, delta_n_edges, delta_n_nodes, dist_degree, dist_diam
    """
    output_graph = nx.from_numpy_matrix(output_A)
    y_graph = nx.from_numpy_matrix(y_A)
    
    output_degree = get_degree_hist(output_graph)
    y_degree = get_degree_hist(y_graph)
    dist_degree = wasserstein_distance(output_degree, y_degree)
    
    output_diam = get_diameters(output_graph)
    y_diam = get_diameters(y_graph)
    dist_diam = wasserstein_distance(output_diam, y_diam) if len(output_diam) > 0 else 1
    
    delta_n_nodes = int(output_seq_len - y_seq_len)
    delta_n_edges = output_A.sum() - y_A.sum()
    
    acc_A = get_accuracy_A(output_A, y_A)
    
    (y_pc, output_pc), (streetmover, P, C) = streetmover_distance(y_A, y_nodes, output_A, output_nodes, n_points=100)
    # print("Streetmover distance: {:.3f}".format(streetmover.item()))
    
    return streetmover.item(), acc_A, delta_n_edges, delta_n_nodes, dist_degree, dist_diam 
Example #7
Source File: dnn_train.py    From FractalAI with GNU Affero General Public License v3.0 5 votes vote down vote up
def evaluate_distance(self) -> np.ndarray:
        """Calculates the euclidean distance between pixels of two different arrays
        on a vector of observations, and normalizes the result applying the relativize function.
        In a more general scenario, any function that quantifies the notion of "how different two
        observations are" could work, even if it is not a proper distance.
        """

        # Get random companion
        idx = np.random.permutation(np.arange(self.n_walkers, dtype=int))
        # Euclidean distance between states (pixels / RAM)
        obs = self.observations.astype(np.float32)
        dist = self.wasserstein_distance(obs[idx], obs)  # ** 2
        return relativize_vector(dist) 
Example #8
Source File: dnn_train.py    From FractalAI with GNU Affero General Public License v3.0 5 votes vote down vote up
def wasserstein_distance(x, y):
        def entropy_dist(x, y):
            def hernandez_crossentropy(x, y):
                return 1 + np.log(np.prod(2 - x ** y, axis=2))

            first = hernandez_crossentropy(x, y).mean(axis=1)
            sec = hernandez_crossentropy(y, x).mean(axis=1)
            return np.maximum(first, sec)

        def _wasserstein_distance(x, y):
            from scipy import stats

            def stacked_distance(x, y):
                distances = []
                for i in range(x.shape[0]):
                    dist_val = stats.wasserstein_distance(x[i], y[i])
                    distances.append(dist_val)
                return np.array(distances)

            distances = []
            for i in range(x.shape[0]):
                dist_val = stacked_distance(x[i], y[i]).mean()
                distances.append(dist_val)
            return np.array(distances)

        return _wasserstein_distance(x, y) 
Example #9
Source File: dnn_train.py    From FractalAI with GNU Affero General Public License v3.0 5 votes vote down vote up
def peste_distance(self) -> np.ndarray:
        """Calculates the euclidean distance between pixels of two different arrays
        on a vector of observations, and normalizes the result applying the relativize function.
        In a more general scenario, any function that quantifies the notion of "how different two
        observations are" could work, even if it is not a proper distance.
        """
        # Get random companion
        peste_obs = self.get_peste_obs()
        # Euclidean distance between states (pixels / RAM)
        # obs = self.observations.astype(np.float32).reshape((self.n_walkers, -1))
        dist = self.wasserstein_distance(np.array(self.observations), peste_obs)
        return relativize_vector(dist) 
Example #10
Source File: test_stats.py    From GraphicDesignPatternByPython with MIT License 5 votes vote down vote up
def test_zero_weight(self):
        # Values with zero weight have no impact on the Wasserstein distance.
        assert_almost_equal(
            stats.wasserstein_distance([1, 2, 100000], [1, 1],
                                       [1, 1, 0], [1, 1]),
            stats.wasserstein_distance([1, 2], [1, 1], [1, 1], [1, 1])) 
Example #11
Source File: test_ot.py    From POT with MIT License 5 votes vote down vote up
def test_emd_1d_emd2_1d():
    # test emd1d gives similar results as emd
    n = 20
    m = 30
    rng = np.random.RandomState(0)
    u = rng.randn(n, 1)
    v = rng.randn(m, 1)

    M = ot.dist(u, v, metric='sqeuclidean')

    G, log = ot.emd([], [], M, log=True)
    wass = log["cost"]
    G_1d, log = ot.emd_1d(u, v, [], [], metric='sqeuclidean', log=True)
    wass1d = log["cost"]
    wass1d_emd2 = ot.emd2_1d(u, v, [], [], metric='sqeuclidean', log=False)
    wass1d_euc = ot.emd2_1d(u, v, [], [], metric='euclidean', log=False)

    # check loss is similar
    np.testing.assert_allclose(wass, wass1d)
    np.testing.assert_allclose(wass, wass1d_emd2)

    # check loss is similar to scipy's implementation for Euclidean metric
    wass_sp = wasserstein_distance(u.reshape((-1,)), v.reshape((-1,)))
    np.testing.assert_allclose(wass_sp, wass1d_euc)

    # check constraints
    np.testing.assert_allclose(np.ones((n,)) / n, G.sum(1))
    np.testing.assert_allclose(np.ones((m,)) / m, G.sum(0))

    # check G is similar
    np.testing.assert_allclose(G, G_1d)

    # check AssertionError is raised if called on non 1d arrays
    u = np.random.randn(n, 2)
    v = np.random.randn(m, 2)
    with pytest.raises(AssertionError):
        ot.emd_1d(u, v, [], []) 
Example #12
Source File: test_stats.py    From GraphicDesignPatternByPython with MIT License 5 votes vote down vote up
def test_combine_weights(self):
        # Assigning a weight w to a value is equivalent to including that value
        # w times in the value array with weight of 1.
        assert_almost_equal(
            stats.wasserstein_distance(
                [0, 0, 1, 1, 1, 1, 5], [0, 3, 3, 3, 3, 4, 4],
                [1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1]),
            stats.wasserstein_distance([5, 0, 1], [0, 4, 3],
                                       [1, 2, 4], [1, 2, 4])) 
Example #13
Source File: test_stats.py    From GraphicDesignPatternByPython with MIT License 5 votes vote down vote up
def test_shift(self):
        # If the whole distribution is shifted by x, then the Wasserstein
        # distance should be x.
        assert_almost_equal(stats.wasserstein_distance([0], [1]), 1)
        assert_almost_equal(stats.wasserstein_distance([-5], [5]), 10)
        assert_almost_equal(
            stats.wasserstein_distance([1, 2, 3, 4, 5], [11, 12, 13, 14, 15]),
            10)
        assert_almost_equal(
            stats.wasserstein_distance([4.5, 6.7, 2.1], [4.6, 7, 9.2],
                                       [3, 1, 1], [1, 3, 1]),
            2.5) 
Example #14
Source File: test_stats.py    From GraphicDesignPatternByPython with MIT License 5 votes vote down vote up
def test_same_distribution(self):
        # Any distribution moved to itself should have a Wasserstein distance of
        # zero.
        assert_equal(stats.wasserstein_distance([1, 2, 3], [2, 1, 3]), 0)
        assert_equal(
            stats.wasserstein_distance([1, 1, 1, 4], [4, 1],
                                       [1, 1, 1, 1], [1, 3]),
            0) 
Example #15
Source File: test_stats.py    From GraphicDesignPatternByPython with MIT License 5 votes vote down vote up
def test_inf_weight(self):
        # An inf weight is not valid.
        assert_raises(ValueError, stats.wasserstein_distance,
                      [1, 2, 1], [1, 1], [1, np.inf, 1], [1, 1]) 
Example #16
Source File: test_stats.py    From GraphicDesignPatternByPython with MIT License 5 votes vote down vote up
def test_empty_distribution(self):
        # A ValueError should be raised when trying to measure the distance
        # between something and nothing.
        assert_raises(ValueError, stats.wasserstein_distance, [], [2, 2])
        assert_raises(ValueError, stats.wasserstein_distance, [1], []) 
Example #17
Source File: test_stats.py    From GraphicDesignPatternByPython with MIT License 5 votes vote down vote up
def test_negative_weights(self):
        # A ValueError should be raised if there are any negative weights.
        assert_raises(ValueError, stats.wasserstein_distance,
                      [0, 1], [2, 2], [1, 1], [3, -1]) 
Example #18
Source File: test_stats.py    From GraphicDesignPatternByPython with MIT License 5 votes vote down vote up
def test_zero_weight(self):
        # When a distribution is given zero weight, a ValueError should be
        # raised.
        assert_raises(ValueError, stats.wasserstein_distance,
                      [0, 1], [2], [0, 0])
        assert_raises(ValueError, stats.wasserstein_distance,
                      [0, 1], [2], [3, 1], [0]) 
Example #19
Source File: test_stats.py    From GraphicDesignPatternByPython with MIT License 5 votes vote down vote up
def test_distinct_value_and_weight_lengths(self):
        # When the number of weights does not match the number of values,
        # a ValueError should be raised.
        assert_raises(ValueError, stats.wasserstein_distance,
                      [1], [2], [4], [3, 1])
        assert_raises(ValueError, stats.wasserstein_distance, [1], [2], [1, 0]) 
Example #20
Source File: test_ot.py    From POT with MIT License 5 votes vote down vote up
def test_emd_1d_emd2_1d_with_weights():
    # test emd1d gives similar results as emd
    n = 20
    m = 30
    rng = np.random.RandomState(0)
    u = rng.randn(n, 1)
    v = rng.randn(m, 1)

    w_u = rng.uniform(0., 1., n)
    w_u = w_u / w_u.sum()

    w_v = rng.uniform(0., 1., m)
    w_v = w_v / w_v.sum()

    M = ot.dist(u, v, metric='sqeuclidean')

    G, log = ot.emd(w_u, w_v, M, log=True)
    wass = log["cost"]
    G_1d, log = ot.emd_1d(u, v, w_u, w_v, metric='sqeuclidean', log=True)
    wass1d = log["cost"]
    wass1d_emd2 = ot.emd2_1d(u, v, w_u, w_v, metric='sqeuclidean', log=False)
    wass1d_euc = ot.emd2_1d(u, v, w_u, w_v, metric='euclidean', log=False)

    # check loss is similar
    np.testing.assert_allclose(wass, wass1d)
    np.testing.assert_allclose(wass, wass1d_emd2)

    # check loss is similar to scipy's implementation for Euclidean metric
    wass_sp = wasserstein_distance(u.reshape((-1,)), v.reshape((-1,)), w_u, w_v)
    np.testing.assert_allclose(wass_sp, wass1d_euc)

    # check constraints
    np.testing.assert_allclose(w_u, G.sum(1))
    np.testing.assert_allclose(w_v, G.sum(0)) 
Example #21
Source File: statistics.py    From generative-graph-transformer with MIT License 4 votes vote down vote up
def compute_statistics(output_adj, output_coord, output_seq_len, y_adj, y_coord, y_seq_len, lamb=0.5):
    r"""
    Compute statistics for the current data point.
    
    :param output_adj: predicted A
    :param output_coord: predicted X
    :param output_seq_len: predicted |V|
    :param y_adj: target A
    :param y_coord: target X
    :param y_seq_len: target |V|
    :param lamb: lambda parameter for the loss in this experiment
    :return: streetmover, loss, loss_adj, loss_coord, acc_A, delta_n_edges, delta_n_nodes, dist_degree, dist_diam
    """
    output_A = decode_adj(output_adj[0, :output_seq_len - 2].cpu().numpy())  # not include the last 1)
    y_A = decode_adj(y_adj[0, :y_seq_len - 2].cpu().numpy())
    output_nodes = output_coord[0, :output_seq_len - 2]
    y_nodes = y_coord[0, :y_seq_len - 2]
    output_graph = nx.from_numpy_matrix(output_A)
    y_graph = nx.from_numpy_matrix(y_A)
    
    assert output_A.shape[0] == output_nodes.shape[0] == output_seq_len - 2
    assert y_A.shape[0] == y_nodes.shape[0] == y_seq_len - 2
    
    output_n_edges = output_adj.reshape(-1).sum()
    y_n_edges = y_adj.reshape(-1).sum()
    
    output_degree = get_degree_hist(output_graph)
    y_degree = get_degree_hist(y_graph)
    dist_degree = wasserstein_distance(output_degree, y_degree)
    
    output_diam = get_diameters(output_graph)
    y_diam = get_diameters(y_graph)
    dist_diam = wasserstein_distance(output_diam, y_diam) if len(output_diam) > 0 else 1
    
    delta_n_nodes = int(output_seq_len - y_seq_len)
    delta_n_edges = (output_n_edges - y_n_edges).item()
    
    acc_A = get_accuracy_A(output_A, y_A)
    
    loss_adj = get_BCE_adj(output_adj[0], y_adj[0])
    loss_coord = get_MSE_coord(output_nodes, y_nodes)
    loss = lamb * loss_adj + (1 - lamb) * loss_coord
    
    (y_pc, output_pc), (streetmover, P, C) = streetmover_distance(y_A, y_nodes, output_A, output_nodes, n_points=100)
    # print("Streetmover distance: {:.3f}".format(streetmover.item()))
    
    # possibly, plot assignments and/or point clouds
    # show_assignments(y_pc, output_pc, P, title=str(streetmover.item())[:8])
    # plot_point_cloud(y_adj[0], y_coord[0], y_pc)
    # plot_point_cloud(output_adj[0], output_coord[0], output_pc)
    
    return streetmover.item(), loss, loss_adj, loss_coord, acc_A, delta_n_edges, delta_n_nodes, dist_degree, dist_diam