Python pandas.util.hash_pandas_object() Examples
The following are 30
code examples of pandas.util.hash_pandas_object().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
pandas.util
, or try the search function
.
Example #1
Source File: test_hashing.py From twitter-stock-recommendation with MIT License | 6 votes |
def test_categorical_consistency(self): # GH15143 # Check that categoricals hash consistent with their values, not codes # This should work for categoricals of any dtype for s1 in [Series(['a', 'b', 'c', 'd']), Series([1000, 2000, 3000, 4000]), Series(pd.date_range(0, periods=4))]: s2 = s1.astype('category').cat.set_categories(s1) s3 = s2.cat.set_categories(list(reversed(s1))) for categorize in [True, False]: # These should all hash identically h1 = hash_pandas_object(s1, categorize=categorize) h2 = hash_pandas_object(s2, categorize=categorize) h3 = hash_pandas_object(s3, categorize=categorize) tm.assert_series_equal(h1, h2) tm.assert_series_equal(h1, h3)
Example #2
Source File: test_hashing.py From elasticintel with GNU General Public License v3.0 | 6 votes |
def test_categorical_consistency(self): # GH15143 # Check that categoricals hash consistent with their values, not codes # This should work for categoricals of any dtype for s1 in [Series(['a', 'b', 'c', 'd']), Series([1000, 2000, 3000, 4000]), Series(pd.date_range(0, periods=4))]: s2 = s1.astype('category').cat.set_categories(s1) s3 = s2.cat.set_categories(list(reversed(s1))) for categorize in [True, False]: # These should all hash identically h1 = hash_pandas_object(s1, categorize=categorize) h2 = hash_pandas_object(s2, categorize=categorize) h3 = hash_pandas_object(s3, categorize=categorize) tm.assert_series_equal(h1, h2) tm.assert_series_equal(h1, h3)
Example #3
Source File: test_hashing.py From vnpy_crypto with MIT License | 6 votes |
def test_categorical_consistency(self): # GH15143 # Check that categoricals hash consistent with their values, not codes # This should work for categoricals of any dtype for s1 in [Series(['a', 'b', 'c', 'd']), Series([1000, 2000, 3000, 4000]), Series(pd.date_range(0, periods=4))]: s2 = s1.astype('category').cat.set_categories(s1) s3 = s2.cat.set_categories(list(reversed(s1))) for categorize in [True, False]: # These should all hash identically h1 = hash_pandas_object(s1, categorize=categorize) h2 = hash_pandas_object(s2, categorize=categorize) h3 = hash_pandas_object(s3, categorize=categorize) tm.assert_series_equal(h1, h2) tm.assert_series_equal(h1, h3)
Example #4
Source File: test_hashing.py From elasticintel with GNU General Public License v3.0 | 5 votes |
def test_invalid_key(self): # this only matters for object dtypes def f(): hash_pandas_object(Series(list('abc')), hash_key='foo') pytest.raises(ValueError, f)
Example #5
Source File: test_hashing.py From recruit with Apache License 2.0 | 5 votes |
def _check_equal(obj, **kwargs): """ Check that hashing an objects produces the same value each time. Parameters ---------- obj : object The object to hash. kwargs : kwargs Keyword arguments to pass to the hashing function. """ a = hash_pandas_object(obj, **kwargs) b = hash_pandas_object(obj, **kwargs) tm.assert_series_equal(a, b)
Example #6
Source File: test_hashing.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 5 votes |
def test_invalid_key(): # This only matters for object dtypes. msg = "key should be a 16-byte string encoded" with pytest.raises(ValueError, match=msg): hash_pandas_object(Series(list("abc")), hash_key="foo")
Example #7
Source File: view_4c_analysis_baseline.py From aurum-datadiscovery with MIT License | 5 votes |
def identify_compatible_groups(dataframes_with_metadata): already_classified = set() compatible_groups = [] for t1, path1, md1 in dataframes_with_metadata: # these local variables are for this one view compatible_group = [path1] hashes1 = hash_pandas_object(t1, index=False) ht1 = hashes1.sum() if path1 in already_classified: continue for t2, path2, md2 in dataframes_with_metadata: if path1 == path2: # same table continue # if t2 is in remove group if path2 in already_classified: continue hashes2 = hash_pandas_object(t2, index=False) ht2 = hashes2.sum() # are views compatible if ht1 == ht2: compatible_group.append(path2) already_classified.add(path1) already_classified.add(path2) # if len(compatible_group) > 1: # cannot check this condition because now all views are analyzed from compatible groups compatible_groups.append(compatible_group) return compatible_groups
Example #8
Source File: test_hashing.py From elasticintel with GNU General Public License v3.0 | 5 votes |
def test_consistency(self): # check that our hash doesn't change because of a mistake # in the actual code; this is the ground truth result = hash_pandas_object(Index(['foo', 'bar', 'baz'])) expected = Series(np.array([3600424527151052760, 1374399572096150070, 477881037637427054], dtype='uint64'), index=['foo', 'bar', 'baz']) tm.assert_series_equal(result, expected)
Example #9
Source File: test_hashing.py From elasticintel with GNU General Public License v3.0 | 5 votes |
def check_equal(self, obj, **kwargs): a = hash_pandas_object(obj, **kwargs) b = hash_pandas_object(obj, **kwargs) tm.assert_series_equal(a, b) kwargs.pop('index', None) a = hash_pandas_object(obj, **kwargs) b = hash_pandas_object(obj, **kwargs) tm.assert_series_equal(a, b)
Example #10
Source File: test_hashing.py From elasticintel with GNU General Public License v3.0 | 5 votes |
def check_not_equal_with_index(self, obj): # check that we are not hashing the same if # we include the index if not isinstance(obj, Index): a = hash_pandas_object(obj, index=True) b = hash_pandas_object(obj, index=False) if len(obj): assert not (a == b).all()
Example #11
Source File: test_hashing.py From elasticintel with GNU General Public License v3.0 | 5 votes |
def test_hash_tuples(self): tups = [(1, 'one'), (1, 'two'), (2, 'one')] result = hash_tuples(tups) expected = hash_pandas_object(MultiIndex.from_tuples(tups)).values tm.assert_numpy_array_equal(result, expected) result = hash_tuples(tups[0]) assert result == expected[0]
Example #12
Source File: test_hashing.py From elasticintel with GNU General Public License v3.0 | 5 votes |
def test_multiindex_unique(self): mi = MultiIndex.from_tuples([(118, 472), (236, 118), (51, 204), (102, 51)]) assert mi.is_unique result = hash_pandas_object(mi) assert result.is_unique
Example #13
Source File: test_hashing.py From elasticintel with GNU General Public License v3.0 | 5 votes |
def test_pandas_errors(self): for obj in [pd.Timestamp('20130101')]: with pytest.raises(TypeError): hash_pandas_object(obj) with catch_warnings(record=True): obj = tm.makePanel() with pytest.raises(TypeError): hash_pandas_object(obj)
Example #14
Source File: test_hashing.py From elasticintel with GNU General Public License v3.0 | 5 votes |
def test_hash_keys(self): # using different hash keys, should have different hashes # for the same data # this only matters for object dtypes obj = Series(list('abc')) a = hash_pandas_object(obj, hash_key='9876543210123456') b = hash_pandas_object(obj, hash_key='9876543210123465') assert (a != b).all()
Example #15
Source File: test_hashing.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 5 votes |
def test_hash_keys(): # Using different hash keys, should have # different hashes for the same data. # # This only matters for object dtypes. obj = Series(list("abc")) a = hash_pandas_object(obj, hash_key="9876543210123456") b = hash_pandas_object(obj, hash_key="9876543210123465") assert (a != b).all()
Example #16
Source File: test_hashing.py From elasticintel with GNU General Public License v3.0 | 5 votes |
def test_deprecation(): with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False): from pandas.tools.hashing import hash_pandas_object obj = Series(list('abc')) hash_pandas_object(obj, hash_key='9876543210123456') with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False): from pandas.tools.hashing import hash_array obj = np.array([1, 2, 3]) hash_array(obj, hash_key='9876543210123456')
Example #17
Source File: test_hashing.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_consistency(self): # check that our hash doesn't change because of a mistake # in the actual code; this is the ground truth result = hash_pandas_object(Index(['foo', 'bar', 'baz'])) expected = Series(np.array([3600424527151052760, 1374399572096150070, 477881037637427054], dtype='uint64'), index=['foo', 'bar', 'baz']) tm.assert_series_equal(result, expected)
Example #18
Source File: test_hashing.py From twitter-stock-recommendation with MIT License | 5 votes |
def check_equal(self, obj, **kwargs): a = hash_pandas_object(obj, **kwargs) b = hash_pandas_object(obj, **kwargs) tm.assert_series_equal(a, b) kwargs.pop('index', None) a = hash_pandas_object(obj, **kwargs) b = hash_pandas_object(obj, **kwargs) tm.assert_series_equal(a, b)
Example #19
Source File: test_hashing.py From twitter-stock-recommendation with MIT License | 5 votes |
def check_not_equal_with_index(self, obj): # check that we are not hashing the same if # we include the index if not isinstance(obj, Index): a = hash_pandas_object(obj, index=True) b = hash_pandas_object(obj, index=False) if len(obj): assert not (a == b).all()
Example #20
Source File: test_hashing.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_hash_tuples(self): tups = [(1, 'one'), (1, 'two'), (2, 'one')] result = hash_tuples(tups) expected = hash_pandas_object(MultiIndex.from_tuples(tups)).values tm.assert_numpy_array_equal(result, expected) result = hash_tuples(tups[0]) assert result == expected[0]
Example #21
Source File: test_hashing.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_multiindex_unique(self): mi = MultiIndex.from_tuples([(118, 472), (236, 118), (51, 204), (102, 51)]) assert mi.is_unique result = hash_pandas_object(mi) assert result.is_unique
Example #22
Source File: test_hashing.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_pandas_errors(self): for obj in [pd.Timestamp('20130101')]: with pytest.raises(TypeError): hash_pandas_object(obj) with catch_warnings(record=True): obj = tm.makePanel() with pytest.raises(TypeError): hash_pandas_object(obj)
Example #23
Source File: test_hashing.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_hash_keys(self): # using different hash keys, should have different hashes # for the same data # this only matters for object dtypes obj = Series(list('abc')) a = hash_pandas_object(obj, hash_key='9876543210123456') b = hash_pandas_object(obj, hash_key='9876543210123465') assert (a != b).all()
Example #24
Source File: test_hashing.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_invalid_key(self): # this only matters for object dtypes def f(): hash_pandas_object(Series(list('abc')), hash_key='foo') pytest.raises(ValueError, f)
Example #25
Source File: helpers.py From siuba with MIT License | 5 votes |
def load_cached_df(self, df): import hashlib from pandas import util hash_arr = util.hash_pandas_object(df, index=True).values hashed = hashlib.sha256(hash_arr).hexdigest() if hashed in self.cache: return self.cache[hashed] res = self.cache[hashed] = self.load_df(df) return res
Example #26
Source File: test_hashing.py From vnpy_crypto with MIT License | 5 votes |
def test_hash_tuples(self): tups = [(1, 'one'), (1, 'two'), (2, 'one')] result = hash_tuples(tups) expected = hash_pandas_object(MultiIndex.from_tuples(tups)).values tm.assert_numpy_array_equal(result, expected) result = hash_tuples(tups[0]) assert result == expected[0]
Example #27
Source File: test_hashing.py From recruit with Apache License 2.0 | 5 votes |
def _check_not_equal_with_index(obj): """ Check the hash of an object with and without its index is not the same. Parameters ---------- obj : object The object to hash. """ if not isinstance(obj, Index): a = hash_pandas_object(obj, index=True) b = hash_pandas_object(obj, index=False) if len(obj): assert not (a == b).all()
Example #28
Source File: test_hashing.py From recruit with Apache License 2.0 | 5 votes |
def test_consistency(): # Check that our hash doesn't change because of a mistake # in the actual code; this is the ground truth. result = hash_pandas_object(Index(["foo", "bar", "baz"])) expected = Series(np.array([3600424527151052760, 1374399572096150070, 477881037637427054], dtype="uint64"), index=["foo", "bar", "baz"]) tm.assert_series_equal(result, expected)
Example #29
Source File: test_hashing.py From recruit with Apache License 2.0 | 5 votes |
def test_hash_tuples(): tuples = [(1, "one"), (1, "two"), (2, "one")] result = hash_tuples(tuples) expected = hash_pandas_object(MultiIndex.from_tuples(tuples)).values tm.assert_numpy_array_equal(result, expected) result = hash_tuples(tuples[0]) assert result == expected[0]
Example #30
Source File: test_hashing.py From recruit with Apache License 2.0 | 5 votes |
def test_multiindex_unique(): mi = MultiIndex.from_tuples([(118, 472), (236, 118), (51, 204), (102, 51)]) assert mi.is_unique is True result = hash_pandas_object(mi) assert result.is_unique is True