Python scipy.stats.wasserstein_distance() Examples
The following are 21
code examples of scipy.stats.wasserstein_distance().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
scipy.stats
, or try the search function
.
Example #1
Source File: _dp_verification.py From whitenoise-system with MIT License | 6 votes |
def aggtest(self, f, colname, numbins=0, binsize="auto", debug=False, plot=True, bound=True, exact=False): """ Verification of SQL aggregation mechanisms Returns statistical distance measures between repeated analysis responses on neighboring datasets """ d1, d2, d1_metadata, d2_metadata = self.generate_neighbors() fD1, fD2 = self.apply_aggregation_neighbors(f, (d1, colname), (d2, colname)) d1size, d2size = fD1.size, fD2.size ks_res = self.ks_test(fD1, fD2) d1hist, d2hist, bin_edges = \ self.generate_histogram_neighbors(fD1, fD2, numbins, binsize, exact=exact) dp_res, d1histupperbound, d2histupperbound, d1lower, d2lower = self.dp_test(d1hist, d2hist, bin_edges, d1size, d2size, debug, exact=exact) ws_res = 0.0 if(exact): return False, 0.0, 0.0 else: ws_res = self.wasserstein_distance(d1hist, d2hist) if(plot): self.plot_histogram_neighbors(fD1, fD2, d1histupperbound, d2histupperbound, d1hist, d2hist, d1lower, d2lower, bin_edges, bound, exact) return dp_res, ks_res, ws_res
Example #2
Source File: test_stats.py From GraphicDesignPatternByPython with MIT License | 5 votes |
def test_collapse(self): # Collapsing a distribution to a point distribution at zero is # equivalent to taking the average of the absolute values of the values. u = np.arange(-10, 30, 0.3) v = np.zeros_like(u) assert_almost_equal( stats.wasserstein_distance(u, v), np.mean(np.abs(u))) u_weights = np.arange(len(u)) v_weights = u_weights[::-1] assert_almost_equal( stats.wasserstein_distance(u, v, u_weights, v_weights), np.average(np.abs(u), weights=u_weights))
Example #3
Source File: _dp_verification.py From whitenoise-system with MIT License | 5 votes |
def wasserstein_distance(self, d1hist, d2hist): """ Wasserstein Distance between histograms of repeated analysis on neighboring datasets """ return stats.wasserstein_distance(d1hist, d2hist)
Example #4
Source File: metrics.py From moses with MIT License | 5 votes |
def metric(self, pref, pgen): return wasserstein_distance( pref['values'], pgen['values'] )
Example #5
Source File: stats.py From scprep with GNU General Public License v3.0 | 5 votes |
def EMD(x, y): """Earth Mover's Distance between samples Calculates an approximation of Earth Mover's Distance (also called Wasserstein distance) for 2 variables. This can be thought of as the distance between two probability distributions. This metric is useful for identifying differentially expressed genes between two groups of cells. For more information see https://en.wikipedia.org/wiki/Wasserstein_metric. Parameters ---------- x : array-like, shape=[n_samples] Input data (feature 1) y : array-like, shape=[n_samples] Input data (feature 2) Returns ------- emd : float Earth Mover's Distance between x and y. Examples -------- >>> import scprep >>> data = scprep.io.load_csv("my_data.csv") >>> emd = scprep.stats.EMD(data['GENE1'], data['GENE2']) """ x, y = _vector_coerce_two_dense(x, y) return stats.wasserstein_distance(x, y)
Example #6
Source File: statistics.py From generative-graph-transformer with MIT License | 5 votes |
def compute_statistics_MLP(y_A, y_nodes, output_A, output_nodes, y_seq_len, output_seq_len): r""" Compute statistics for the current data point, based on the one-shot output from the MLP decoder. :param output_adj: predicted A :param output_coord: predicted X :param output_seq_len: predicted |V| :param y_adj: target A :param y_coord: target X :param y_seq_len: target |V| :param lamb: lambda parameter for the loss in this experiment :return: streetmover, acc_A, delta_n_edges, delta_n_nodes, dist_degree, dist_diam """ output_graph = nx.from_numpy_matrix(output_A) y_graph = nx.from_numpy_matrix(y_A) output_degree = get_degree_hist(output_graph) y_degree = get_degree_hist(y_graph) dist_degree = wasserstein_distance(output_degree, y_degree) output_diam = get_diameters(output_graph) y_diam = get_diameters(y_graph) dist_diam = wasserstein_distance(output_diam, y_diam) if len(output_diam) > 0 else 1 delta_n_nodes = int(output_seq_len - y_seq_len) delta_n_edges = output_A.sum() - y_A.sum() acc_A = get_accuracy_A(output_A, y_A) (y_pc, output_pc), (streetmover, P, C) = streetmover_distance(y_A, y_nodes, output_A, output_nodes, n_points=100) # print("Streetmover distance: {:.3f}".format(streetmover.item())) return streetmover.item(), acc_A, delta_n_edges, delta_n_nodes, dist_degree, dist_diam
Example #7
Source File: dnn_train.py From FractalAI with GNU Affero General Public License v3.0 | 5 votes |
def evaluate_distance(self) -> np.ndarray: """Calculates the euclidean distance between pixels of two different arrays on a vector of observations, and normalizes the result applying the relativize function. In a more general scenario, any function that quantifies the notion of "how different two observations are" could work, even if it is not a proper distance. """ # Get random companion idx = np.random.permutation(np.arange(self.n_walkers, dtype=int)) # Euclidean distance between states (pixels / RAM) obs = self.observations.astype(np.float32) dist = self.wasserstein_distance(obs[idx], obs) # ** 2 return relativize_vector(dist)
Example #8
Source File: dnn_train.py From FractalAI with GNU Affero General Public License v3.0 | 5 votes |
def wasserstein_distance(x, y): def entropy_dist(x, y): def hernandez_crossentropy(x, y): return 1 + np.log(np.prod(2 - x ** y, axis=2)) first = hernandez_crossentropy(x, y).mean(axis=1) sec = hernandez_crossentropy(y, x).mean(axis=1) return np.maximum(first, sec) def _wasserstein_distance(x, y): from scipy import stats def stacked_distance(x, y): distances = [] for i in range(x.shape[0]): dist_val = stats.wasserstein_distance(x[i], y[i]) distances.append(dist_val) return np.array(distances) distances = [] for i in range(x.shape[0]): dist_val = stacked_distance(x[i], y[i]).mean() distances.append(dist_val) return np.array(distances) return _wasserstein_distance(x, y)
Example #9
Source File: dnn_train.py From FractalAI with GNU Affero General Public License v3.0 | 5 votes |
def peste_distance(self) -> np.ndarray: """Calculates the euclidean distance between pixels of two different arrays on a vector of observations, and normalizes the result applying the relativize function. In a more general scenario, any function that quantifies the notion of "how different two observations are" could work, even if it is not a proper distance. """ # Get random companion peste_obs = self.get_peste_obs() # Euclidean distance between states (pixels / RAM) # obs = self.observations.astype(np.float32).reshape((self.n_walkers, -1)) dist = self.wasserstein_distance(np.array(self.observations), peste_obs) return relativize_vector(dist)
Example #10
Source File: test_stats.py From GraphicDesignPatternByPython with MIT License | 5 votes |
def test_zero_weight(self): # Values with zero weight have no impact on the Wasserstein distance. assert_almost_equal( stats.wasserstein_distance([1, 2, 100000], [1, 1], [1, 1, 0], [1, 1]), stats.wasserstein_distance([1, 2], [1, 1], [1, 1], [1, 1]))
Example #11
Source File: test_ot.py From POT with MIT License | 5 votes |
def test_emd_1d_emd2_1d(): # test emd1d gives similar results as emd n = 20 m = 30 rng = np.random.RandomState(0) u = rng.randn(n, 1) v = rng.randn(m, 1) M = ot.dist(u, v, metric='sqeuclidean') G, log = ot.emd([], [], M, log=True) wass = log["cost"] G_1d, log = ot.emd_1d(u, v, [], [], metric='sqeuclidean', log=True) wass1d = log["cost"] wass1d_emd2 = ot.emd2_1d(u, v, [], [], metric='sqeuclidean', log=False) wass1d_euc = ot.emd2_1d(u, v, [], [], metric='euclidean', log=False) # check loss is similar np.testing.assert_allclose(wass, wass1d) np.testing.assert_allclose(wass, wass1d_emd2) # check loss is similar to scipy's implementation for Euclidean metric wass_sp = wasserstein_distance(u.reshape((-1,)), v.reshape((-1,))) np.testing.assert_allclose(wass_sp, wass1d_euc) # check constraints np.testing.assert_allclose(np.ones((n,)) / n, G.sum(1)) np.testing.assert_allclose(np.ones((m,)) / m, G.sum(0)) # check G is similar np.testing.assert_allclose(G, G_1d) # check AssertionError is raised if called on non 1d arrays u = np.random.randn(n, 2) v = np.random.randn(m, 2) with pytest.raises(AssertionError): ot.emd_1d(u, v, [], [])
Example #12
Source File: test_stats.py From GraphicDesignPatternByPython with MIT License | 5 votes |
def test_combine_weights(self): # Assigning a weight w to a value is equivalent to including that value # w times in the value array with weight of 1. assert_almost_equal( stats.wasserstein_distance( [0, 0, 1, 1, 1, 1, 5], [0, 3, 3, 3, 3, 4, 4], [1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1]), stats.wasserstein_distance([5, 0, 1], [0, 4, 3], [1, 2, 4], [1, 2, 4]))
Example #13
Source File: test_stats.py From GraphicDesignPatternByPython with MIT License | 5 votes |
def test_shift(self): # If the whole distribution is shifted by x, then the Wasserstein # distance should be x. assert_almost_equal(stats.wasserstein_distance([0], [1]), 1) assert_almost_equal(stats.wasserstein_distance([-5], [5]), 10) assert_almost_equal( stats.wasserstein_distance([1, 2, 3, 4, 5], [11, 12, 13, 14, 15]), 10) assert_almost_equal( stats.wasserstein_distance([4.5, 6.7, 2.1], [4.6, 7, 9.2], [3, 1, 1], [1, 3, 1]), 2.5)
Example #14
Source File: test_stats.py From GraphicDesignPatternByPython with MIT License | 5 votes |
def test_same_distribution(self): # Any distribution moved to itself should have a Wasserstein distance of # zero. assert_equal(stats.wasserstein_distance([1, 2, 3], [2, 1, 3]), 0) assert_equal( stats.wasserstein_distance([1, 1, 1, 4], [4, 1], [1, 1, 1, 1], [1, 3]), 0)
Example #15
Source File: test_stats.py From GraphicDesignPatternByPython with MIT License | 5 votes |
def test_inf_weight(self): # An inf weight is not valid. assert_raises(ValueError, stats.wasserstein_distance, [1, 2, 1], [1, 1], [1, np.inf, 1], [1, 1])
Example #16
Source File: test_stats.py From GraphicDesignPatternByPython with MIT License | 5 votes |
def test_empty_distribution(self): # A ValueError should be raised when trying to measure the distance # between something and nothing. assert_raises(ValueError, stats.wasserstein_distance, [], [2, 2]) assert_raises(ValueError, stats.wasserstein_distance, [1], [])
Example #17
Source File: test_stats.py From GraphicDesignPatternByPython with MIT License | 5 votes |
def test_negative_weights(self): # A ValueError should be raised if there are any negative weights. assert_raises(ValueError, stats.wasserstein_distance, [0, 1], [2, 2], [1, 1], [3, -1])
Example #18
Source File: test_stats.py From GraphicDesignPatternByPython with MIT License | 5 votes |
def test_zero_weight(self): # When a distribution is given zero weight, a ValueError should be # raised. assert_raises(ValueError, stats.wasserstein_distance, [0, 1], [2], [0, 0]) assert_raises(ValueError, stats.wasserstein_distance, [0, 1], [2], [3, 1], [0])
Example #19
Source File: test_stats.py From GraphicDesignPatternByPython with MIT License | 5 votes |
def test_distinct_value_and_weight_lengths(self): # When the number of weights does not match the number of values, # a ValueError should be raised. assert_raises(ValueError, stats.wasserstein_distance, [1], [2], [4], [3, 1]) assert_raises(ValueError, stats.wasserstein_distance, [1], [2], [1, 0])
Example #20
Source File: test_ot.py From POT with MIT License | 5 votes |
def test_emd_1d_emd2_1d_with_weights(): # test emd1d gives similar results as emd n = 20 m = 30 rng = np.random.RandomState(0) u = rng.randn(n, 1) v = rng.randn(m, 1) w_u = rng.uniform(0., 1., n) w_u = w_u / w_u.sum() w_v = rng.uniform(0., 1., m) w_v = w_v / w_v.sum() M = ot.dist(u, v, metric='sqeuclidean') G, log = ot.emd(w_u, w_v, M, log=True) wass = log["cost"] G_1d, log = ot.emd_1d(u, v, w_u, w_v, metric='sqeuclidean', log=True) wass1d = log["cost"] wass1d_emd2 = ot.emd2_1d(u, v, w_u, w_v, metric='sqeuclidean', log=False) wass1d_euc = ot.emd2_1d(u, v, w_u, w_v, metric='euclidean', log=False) # check loss is similar np.testing.assert_allclose(wass, wass1d) np.testing.assert_allclose(wass, wass1d_emd2) # check loss is similar to scipy's implementation for Euclidean metric wass_sp = wasserstein_distance(u.reshape((-1,)), v.reshape((-1,)), w_u, w_v) np.testing.assert_allclose(wass_sp, wass1d_euc) # check constraints np.testing.assert_allclose(w_u, G.sum(1)) np.testing.assert_allclose(w_v, G.sum(0))
Example #21
Source File: statistics.py From generative-graph-transformer with MIT License | 4 votes |
def compute_statistics(output_adj, output_coord, output_seq_len, y_adj, y_coord, y_seq_len, lamb=0.5): r""" Compute statistics for the current data point. :param output_adj: predicted A :param output_coord: predicted X :param output_seq_len: predicted |V| :param y_adj: target A :param y_coord: target X :param y_seq_len: target |V| :param lamb: lambda parameter for the loss in this experiment :return: streetmover, loss, loss_adj, loss_coord, acc_A, delta_n_edges, delta_n_nodes, dist_degree, dist_diam """ output_A = decode_adj(output_adj[0, :output_seq_len - 2].cpu().numpy()) # not include the last 1) y_A = decode_adj(y_adj[0, :y_seq_len - 2].cpu().numpy()) output_nodes = output_coord[0, :output_seq_len - 2] y_nodes = y_coord[0, :y_seq_len - 2] output_graph = nx.from_numpy_matrix(output_A) y_graph = nx.from_numpy_matrix(y_A) assert output_A.shape[0] == output_nodes.shape[0] == output_seq_len - 2 assert y_A.shape[0] == y_nodes.shape[0] == y_seq_len - 2 output_n_edges = output_adj.reshape(-1).sum() y_n_edges = y_adj.reshape(-1).sum() output_degree = get_degree_hist(output_graph) y_degree = get_degree_hist(y_graph) dist_degree = wasserstein_distance(output_degree, y_degree) output_diam = get_diameters(output_graph) y_diam = get_diameters(y_graph) dist_diam = wasserstein_distance(output_diam, y_diam) if len(output_diam) > 0 else 1 delta_n_nodes = int(output_seq_len - y_seq_len) delta_n_edges = (output_n_edges - y_n_edges).item() acc_A = get_accuracy_A(output_A, y_A) loss_adj = get_BCE_adj(output_adj[0], y_adj[0]) loss_coord = get_MSE_coord(output_nodes, y_nodes) loss = lamb * loss_adj + (1 - lamb) * loss_coord (y_pc, output_pc), (streetmover, P, C) = streetmover_distance(y_A, y_nodes, output_A, output_nodes, n_points=100) # print("Streetmover distance: {:.3f}".format(streetmover.item())) # possibly, plot assignments and/or point clouds # show_assignments(y_pc, output_pc, P, title=str(streetmover.item())[:8]) # plot_point_cloud(y_adj[0], y_coord[0], y_pc) # plot_point_cloud(output_adj[0], output_coord[0], output_pc) return streetmover.item(), loss, loss_adj, loss_coord, acc_A, delta_n_edges, delta_n_nodes, dist_degree, dist_diam