Python pandas.util.hash_array() Examples

The following are 30 code examples of pandas.util.hash_array(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module pandas.util , or try the search function .
Example #1
Source File: test_hashing.py    From recruit with Apache License 2.0 7 votes vote down vote up
def test_hash_collisions():
    # Hash collisions are bad.
    #
    # https://github.com/pandas-dev/pandas/issues/14711#issuecomment-264885726
    hashes = ["Ingrid-9Z9fKIZmkO7i7Cn51Li34pJm44fgX6DYGBNj3VPlOH50m7HnBlPxfIwFMrcNJNMP6PSgLmwWnInciMWrCSAlLEvt7JkJl4IxiMrVbXSa8ZQoVaq5xoQPjltuJEfwdNlO6jo8qRRHvD8sBEBMQASrRa6TsdaPTPCBo3nwIBpE7YzzmyH0vMBhjQZLx1aCT7faSEx7PgFxQhHdKFWROcysamgy9iVj8DO2Fmwg1NNl93rIAqC3mdqfrCxrzfvIY8aJdzin2cHVzy3QUJxZgHvtUtOLxoqnUHsYbNTeq0xcLXpTZEZCxD4PGubIuCNf32c33M7HFsnjWSEjE2yVdWKhmSVodyF8hFYVmhYnMCztQnJrt3O8ZvVRXd5IKwlLexiSp4h888w7SzAIcKgc3g5XQJf6MlSMftDXm9lIsE1mJNiJEv6uY6pgvC3fUPhatlR5JPpVAHNSbSEE73MBzJrhCAbOLXQumyOXigZuPoME7QgJcBalliQol7YZ9",  # noqa
              "Tim-b9MddTxOWW2AT1Py6vtVbZwGAmYCjbp89p8mxsiFoVX4FyDOF3wFiAkyQTUgwg9sVqVYOZo09Dh1AzhFHbgij52ylF0SEwgzjzHH8TGY8Lypart4p4onnDoDvVMBa0kdthVGKl6K0BDVGzyOXPXKpmnMF1H6rJzqHJ0HywfwS4XYpVwlAkoeNsiicHkJUFdUAhG229INzvIAiJuAHeJDUoyO4DCBqtoZ5TDend6TK7Y914yHlfH3g1WZu5LksKv68VQHJriWFYusW5e6ZZ6dKaMjTwEGuRgdT66iU5nqWTHRH8WSzpXoCFwGcTOwyuqPSe0fTe21DVtJn1FKj9F9nEnR9xOvJUO7E0piCIF4Ad9yAIDY4DBimpsTfKXCu1vdHpKYerzbndfuFe5AhfMduLYZJi5iAw8qKSwR5h86ttXV0Mc0QmXz8dsRvDgxjXSmupPxBggdlqUlC828hXiTPD7am0yETBV0F3bEtvPiNJfremszcV8NcqAoARMe"]  # noqa

    # These should be different.
    result1 = hash_array(np.asarray(hashes[0:1], dtype=object), "utf8")
    expected1 = np.array([14963968704024874985], dtype=np.uint64)
    tm.assert_numpy_array_equal(result1, expected1)

    result2 = hash_array(np.asarray(hashes[1:2], dtype=object), "utf8")
    expected2 = np.array([16428432627716348016], dtype=np.uint64)
    tm.assert_numpy_array_equal(result2, expected2)

    result = hash_array(np.asarray(hashes, dtype=object), "utf8")
    tm.assert_numpy_array_equal(result, np.concatenate([expected1,
                                                        expected2], axis=0)) 
Example #2
Source File: test_hashing.py    From twitter-stock-recommendation with MIT License 6 votes vote down vote up
def test_hash_collisions(self):

        # hash collisions are bad
        # https://github.com/pandas-dev/pandas/issues/14711#issuecomment-264885726
        L = ['Ingrid-9Z9fKIZmkO7i7Cn51Li34pJm44fgX6DYGBNj3VPlOH50m7HnBlPxfIwFMrcNJNMP6PSgLmwWnInciMWrCSAlLEvt7JkJl4IxiMrVbXSa8ZQoVaq5xoQPjltuJEfwdNlO6jo8qRRHvD8sBEBMQASrRa6TsdaPTPCBo3nwIBpE7YzzmyH0vMBhjQZLx1aCT7faSEx7PgFxQhHdKFWROcysamgy9iVj8DO2Fmwg1NNl93rIAqC3mdqfrCxrzfvIY8aJdzin2cHVzy3QUJxZgHvtUtOLxoqnUHsYbNTeq0xcLXpTZEZCxD4PGubIuCNf32c33M7HFsnjWSEjE2yVdWKhmSVodyF8hFYVmhYnMCztQnJrt3O8ZvVRXd5IKwlLexiSp4h888w7SzAIcKgc3g5XQJf6MlSMftDXm9lIsE1mJNiJEv6uY6pgvC3fUPhatlR5JPpVAHNSbSEE73MBzJrhCAbOLXQumyOXigZuPoME7QgJcBalliQol7YZ9',  # noqa
             'Tim-b9MddTxOWW2AT1Py6vtVbZwGAmYCjbp89p8mxsiFoVX4FyDOF3wFiAkyQTUgwg9sVqVYOZo09Dh1AzhFHbgij52ylF0SEwgzjzHH8TGY8Lypart4p4onnDoDvVMBa0kdthVGKl6K0BDVGzyOXPXKpmnMF1H6rJzqHJ0HywfwS4XYpVwlAkoeNsiicHkJUFdUAhG229INzvIAiJuAHeJDUoyO4DCBqtoZ5TDend6TK7Y914yHlfH3g1WZu5LksKv68VQHJriWFYusW5e6ZZ6dKaMjTwEGuRgdT66iU5nqWTHRH8WSzpXoCFwGcTOwyuqPSe0fTe21DVtJn1FKj9F9nEnR9xOvJUO7E0piCIF4Ad9yAIDY4DBimpsTfKXCu1vdHpKYerzbndfuFe5AhfMduLYZJi5iAw8qKSwR5h86ttXV0Mc0QmXz8dsRvDgxjXSmupPxBggdlqUlC828hXiTPD7am0yETBV0F3bEtvPiNJfremszcV8NcqAoARMe']  # noqa

        # these should be different!
        result1 = hash_array(np.asarray(L[0:1], dtype=object), 'utf8')
        expected1 = np.array([14963968704024874985], dtype=np.uint64)
        tm.assert_numpy_array_equal(result1, expected1)

        result2 = hash_array(np.asarray(L[1:2], dtype=object), 'utf8')
        expected2 = np.array([16428432627716348016], dtype=np.uint64)
        tm.assert_numpy_array_equal(result2, expected2)

        result = hash_array(np.asarray(L, dtype=object), 'utf8')
        tm.assert_numpy_array_equal(
            result, np.concatenate([expected1, expected2], axis=0)) 
Example #3
Source File: test_hashing.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 6 votes vote down vote up
def test_hash_collisions():
    # Hash collisions are bad.
    #
    # https://github.com/pandas-dev/pandas/issues/14711#issuecomment-264885726
    hashes = ["Ingrid-9Z9fKIZmkO7i7Cn51Li34pJm44fgX6DYGBNj3VPlOH50m7HnBlPxfIwFMrcNJNMP6PSgLmwWnInciMWrCSAlLEvt7JkJl4IxiMrVbXSa8ZQoVaq5xoQPjltuJEfwdNlO6jo8qRRHvD8sBEBMQASrRa6TsdaPTPCBo3nwIBpE7YzzmyH0vMBhjQZLx1aCT7faSEx7PgFxQhHdKFWROcysamgy9iVj8DO2Fmwg1NNl93rIAqC3mdqfrCxrzfvIY8aJdzin2cHVzy3QUJxZgHvtUtOLxoqnUHsYbNTeq0xcLXpTZEZCxD4PGubIuCNf32c33M7HFsnjWSEjE2yVdWKhmSVodyF8hFYVmhYnMCztQnJrt3O8ZvVRXd5IKwlLexiSp4h888w7SzAIcKgc3g5XQJf6MlSMftDXm9lIsE1mJNiJEv6uY6pgvC3fUPhatlR5JPpVAHNSbSEE73MBzJrhCAbOLXQumyOXigZuPoME7QgJcBalliQol7YZ9",  # noqa
              "Tim-b9MddTxOWW2AT1Py6vtVbZwGAmYCjbp89p8mxsiFoVX4FyDOF3wFiAkyQTUgwg9sVqVYOZo09Dh1AzhFHbgij52ylF0SEwgzjzHH8TGY8Lypart4p4onnDoDvVMBa0kdthVGKl6K0BDVGzyOXPXKpmnMF1H6rJzqHJ0HywfwS4XYpVwlAkoeNsiicHkJUFdUAhG229INzvIAiJuAHeJDUoyO4DCBqtoZ5TDend6TK7Y914yHlfH3g1WZu5LksKv68VQHJriWFYusW5e6ZZ6dKaMjTwEGuRgdT66iU5nqWTHRH8WSzpXoCFwGcTOwyuqPSe0fTe21DVtJn1FKj9F9nEnR9xOvJUO7E0piCIF4Ad9yAIDY4DBimpsTfKXCu1vdHpKYerzbndfuFe5AhfMduLYZJi5iAw8qKSwR5h86ttXV0Mc0QmXz8dsRvDgxjXSmupPxBggdlqUlC828hXiTPD7am0yETBV0F3bEtvPiNJfremszcV8NcqAoARMe"]  # noqa

    # These should be different.
    result1 = hash_array(np.asarray(hashes[0:1], dtype=object), "utf8")
    expected1 = np.array([14963968704024874985], dtype=np.uint64)
    tm.assert_numpy_array_equal(result1, expected1)

    result2 = hash_array(np.asarray(hashes[1:2], dtype=object), "utf8")
    expected2 = np.array([16428432627716348016], dtype=np.uint64)
    tm.assert_numpy_array_equal(result2, expected2)

    result = hash_array(np.asarray(hashes, dtype=object), "utf8")
    tm.assert_numpy_array_equal(result, np.concatenate([expected1,
                                                        expected2], axis=0)) 
Example #4
Source File: test_hashing.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def test_hash_collisions(self):

        # hash collisions are bad
        # https://github.com/pandas-dev/pandas/issues/14711#issuecomment-264885726
        L = ['Ingrid-9Z9fKIZmkO7i7Cn51Li34pJm44fgX6DYGBNj3VPlOH50m7HnBlPxfIwFMrcNJNMP6PSgLmwWnInciMWrCSAlLEvt7JkJl4IxiMrVbXSa8ZQoVaq5xoQPjltuJEfwdNlO6jo8qRRHvD8sBEBMQASrRa6TsdaPTPCBo3nwIBpE7YzzmyH0vMBhjQZLx1aCT7faSEx7PgFxQhHdKFWROcysamgy9iVj8DO2Fmwg1NNl93rIAqC3mdqfrCxrzfvIY8aJdzin2cHVzy3QUJxZgHvtUtOLxoqnUHsYbNTeq0xcLXpTZEZCxD4PGubIuCNf32c33M7HFsnjWSEjE2yVdWKhmSVodyF8hFYVmhYnMCztQnJrt3O8ZvVRXd5IKwlLexiSp4h888w7SzAIcKgc3g5XQJf6MlSMftDXm9lIsE1mJNiJEv6uY6pgvC3fUPhatlR5JPpVAHNSbSEE73MBzJrhCAbOLXQumyOXigZuPoME7QgJcBalliQol7YZ9',  # noqa
             'Tim-b9MddTxOWW2AT1Py6vtVbZwGAmYCjbp89p8mxsiFoVX4FyDOF3wFiAkyQTUgwg9sVqVYOZo09Dh1AzhFHbgij52ylF0SEwgzjzHH8TGY8Lypart4p4onnDoDvVMBa0kdthVGKl6K0BDVGzyOXPXKpmnMF1H6rJzqHJ0HywfwS4XYpVwlAkoeNsiicHkJUFdUAhG229INzvIAiJuAHeJDUoyO4DCBqtoZ5TDend6TK7Y914yHlfH3g1WZu5LksKv68VQHJriWFYusW5e6ZZ6dKaMjTwEGuRgdT66iU5nqWTHRH8WSzpXoCFwGcTOwyuqPSe0fTe21DVtJn1FKj9F9nEnR9xOvJUO7E0piCIF4Ad9yAIDY4DBimpsTfKXCu1vdHpKYerzbndfuFe5AhfMduLYZJi5iAw8qKSwR5h86ttXV0Mc0QmXz8dsRvDgxjXSmupPxBggdlqUlC828hXiTPD7am0yETBV0F3bEtvPiNJfremszcV8NcqAoARMe']  # noqa

        # these should be different!
        result1 = hash_array(np.asarray(L[0:1], dtype=object), 'utf8')
        expected1 = np.array([14963968704024874985], dtype=np.uint64)
        tm.assert_numpy_array_equal(result1, expected1)

        result2 = hash_array(np.asarray(L[1:2], dtype=object), 'utf8')
        expected2 = np.array([16428432627716348016], dtype=np.uint64)
        tm.assert_numpy_array_equal(result2, expected2)

        result = hash_array(np.asarray(L, dtype=object), 'utf8')
        tm.assert_numpy_array_equal(
            result, np.concatenate([expected1, expected2], axis=0)) 
Example #5
Source File: test_hashing.py    From elasticintel with GNU General Public License v3.0 6 votes vote down vote up
def test_hash_collisions(self):

        # hash collisions are bad
        # https://github.com/pandas-dev/pandas/issues/14711#issuecomment-264885726
        L = ['Ingrid-9Z9fKIZmkO7i7Cn51Li34pJm44fgX6DYGBNj3VPlOH50m7HnBlPxfIwFMrcNJNMP6PSgLmwWnInciMWrCSAlLEvt7JkJl4IxiMrVbXSa8ZQoVaq5xoQPjltuJEfwdNlO6jo8qRRHvD8sBEBMQASrRa6TsdaPTPCBo3nwIBpE7YzzmyH0vMBhjQZLx1aCT7faSEx7PgFxQhHdKFWROcysamgy9iVj8DO2Fmwg1NNl93rIAqC3mdqfrCxrzfvIY8aJdzin2cHVzy3QUJxZgHvtUtOLxoqnUHsYbNTeq0xcLXpTZEZCxD4PGubIuCNf32c33M7HFsnjWSEjE2yVdWKhmSVodyF8hFYVmhYnMCztQnJrt3O8ZvVRXd5IKwlLexiSp4h888w7SzAIcKgc3g5XQJf6MlSMftDXm9lIsE1mJNiJEv6uY6pgvC3fUPhatlR5JPpVAHNSbSEE73MBzJrhCAbOLXQumyOXigZuPoME7QgJcBalliQol7YZ9',  # noqa
             'Tim-b9MddTxOWW2AT1Py6vtVbZwGAmYCjbp89p8mxsiFoVX4FyDOF3wFiAkyQTUgwg9sVqVYOZo09Dh1AzhFHbgij52ylF0SEwgzjzHH8TGY8Lypart4p4onnDoDvVMBa0kdthVGKl6K0BDVGzyOXPXKpmnMF1H6rJzqHJ0HywfwS4XYpVwlAkoeNsiicHkJUFdUAhG229INzvIAiJuAHeJDUoyO4DCBqtoZ5TDend6TK7Y914yHlfH3g1WZu5LksKv68VQHJriWFYusW5e6ZZ6dKaMjTwEGuRgdT66iU5nqWTHRH8WSzpXoCFwGcTOwyuqPSe0fTe21DVtJn1FKj9F9nEnR9xOvJUO7E0piCIF4Ad9yAIDY4DBimpsTfKXCu1vdHpKYerzbndfuFe5AhfMduLYZJi5iAw8qKSwR5h86ttXV0Mc0QmXz8dsRvDgxjXSmupPxBggdlqUlC828hXiTPD7am0yETBV0F3bEtvPiNJfremszcV8NcqAoARMe']  # noqa

        # these should be different!
        result1 = hash_array(np.asarray(L[0:1], dtype=object), 'utf8')
        expected1 = np.array([14963968704024874985], dtype=np.uint64)
        tm.assert_numpy_array_equal(result1, expected1)

        result2 = hash_array(np.asarray(L[1:2], dtype=object), 'utf8')
        expected2 = np.array([16428432627716348016], dtype=np.uint64)
        tm.assert_numpy_array_equal(result2, expected2)

        result = hash_array(np.asarray(L, dtype=object), 'utf8')
        tm.assert_numpy_array_equal(
            result, np.concatenate([expected1, expected2], axis=0)) 
Example #6
Source File: test_hashing.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_hash_array_errors(self):

        for val in [5, 'foo', pd.Timestamp('20130101')]:
            pytest.raises(TypeError, hash_array, val) 
Example #7
Source File: test_hashing.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_hash_array_mixed(self):
        result1 = hash_array(np.array([3, 4, 'All']))
        result2 = hash_array(np.array(['3', '4', 'All']))
        result3 = hash_array(np.array([3, 4, 'All'], dtype=object))
        tm.assert_numpy_array_equal(result1, result2)
        tm.assert_numpy_array_equal(result1, result3) 
Example #8
Source File: test_hashing.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_hash_array(self):
        for name, s in self.df.iteritems():
            a = s.values
            tm.assert_numpy_array_equal(hash_array(a), hash_array(a)) 
Example #9
Source File: test_hashing.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def test_deprecation():

    with tm.assert_produces_warning(DeprecationWarning,
                                    check_stacklevel=False):
        from pandas.tools.hashing import hash_pandas_object
        obj = Series(list('abc'))
        hash_pandas_object(obj, hash_key='9876543210123456')

    with tm.assert_produces_warning(DeprecationWarning,
                                    check_stacklevel=False):
        from pandas.tools.hashing import hash_array
        obj = np.array([1, 2, 3])
        hash_array(obj, hash_key='9876543210123456') 
Example #10
Source File: test_hashing.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_hash_scalar(self):
        for val in [1, 1.4, 'A', b'A', u'A', pd.Timestamp("2012-01-01"),
                    pd.Timestamp("2012-01-01", tz='Europe/Brussels'),
                    datetime.datetime(2012, 1, 1),
                    pd.Timestamp("2012-01-01", tz='EST').to_pydatetime(),
                    pd.Timedelta('1 days'), datetime.timedelta(1),
                    pd.Period('2012-01-01', freq='D'), pd.Interval(0, 1),
                    np.nan, pd.NaT, None]:
            result = _hash_scalar(val)
            expected = hash_array(np.array([val], dtype=object),
                                  categorize=True)
            assert result[0] == expected[0] 
Example #11
Source File: test_hashing.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def test_categorical_with_nan_consistency(self):
        c = pd.Categorical.from_codes(
            [-1, 0, 1, 2, 3, 4],
            categories=pd.date_range('2012-01-01', periods=5, name='B'))
        expected = hash_array(c, categorize=False)
        c = pd.Categorical.from_codes(
            [-1, 0],
            categories=[pd.Timestamp('2012-01-01')])
        result = hash_array(c, categorize=False)
        assert result[0] in expected
        assert result[1] in expected 
Example #12
Source File: test_hashing.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_categorical_with_nan_consistency(self):
        c = pd.Categorical.from_codes(
            [-1, 0, 1, 2, 3, 4],
            categories=pd.date_range('2012-01-01', periods=5, name='B'))
        expected = hash_array(c, categorize=False)
        c = pd.Categorical.from_codes(
            [-1, 0],
            categories=[pd.Timestamp('2012-01-01')])
        result = hash_array(c, categorize=False)
        assert result[0] in expected
        assert result[1] in expected 
Example #13
Source File: test_hashing.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def test_hash_scalar(self):
        for val in [1, 1.4, 'A', b'A', u'A', pd.Timestamp("2012-01-01"),
                    pd.Timestamp("2012-01-01", tz='Europe/Brussels'),
                    datetime.datetime(2012, 1, 1),
                    pd.Timestamp("2012-01-01", tz='EST').to_pydatetime(),
                    pd.Timedelta('1 days'), datetime.timedelta(1),
                    pd.Period('2012-01-01', freq='D'), pd.Interval(0, 1),
                    np.nan, pd.NaT, None]:
            result = _hash_scalar(val)
            expected = hash_array(np.array([val], dtype=object),
                                  categorize=True)
            assert result[0] == expected[0] 
Example #14
Source File: test_hashing.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def test_hash_array_errors(self):

        for val in [5, 'foo', pd.Timestamp('20130101')]:
            pytest.raises(TypeError, hash_array, val) 
Example #15
Source File: test_hashing.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def test_hash_array_mixed(self):
        result1 = hash_array(np.array([3, 4, 'All']))
        result2 = hash_array(np.array(['3', '4', 'All']))
        result3 = hash_array(np.array([3, 4, 'All'], dtype=object))
        tm.assert_numpy_array_equal(result1, result2)
        tm.assert_numpy_array_equal(result1, result3) 
Example #16
Source File: test_hashing.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def test_hash_array(self):
        for name, s in self.df.iteritems():
            a = s.values
            tm.assert_numpy_array_equal(hash_array(a), hash_array(a)) 
Example #17
Source File: test_hashing.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def test_hash_scalar(val):
    result = _hash_scalar(val)
    expected = hash_array(np.array([val], dtype=object), categorize=True)

    assert result[0] == expected[0] 
Example #18
Source File: test_hashing.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def test_categorical_with_nan_consistency():
    c = pd.Categorical.from_codes(
        [-1, 0, 1, 2, 3, 4],
        categories=pd.date_range("2012-01-01", periods=5, name="B"))
    expected = hash_array(c, categorize=False)

    c = pd.Categorical.from_codes(
        [-1, 0],
        categories=[pd.Timestamp("2012-01-01")])
    result = hash_array(c, categorize=False)

    assert result[0] in expected
    assert result[1] in expected 
Example #19
Source File: test_hashing.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_hash_array(series):
    arr = series.values
    tm.assert_numpy_array_equal(hash_array(arr), hash_array(arr)) 
Example #20
Source File: test_hashing.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def test_hash_array_errors(val):
    msg = "must pass a ndarray-like"
    with pytest.raises(TypeError, match=msg):
        hash_array(val) 
Example #21
Source File: test_hashing.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def test_hash_array_mixed(arr2):
    result1 = hash_array(np.array(["3", "4", "All"]))
    result2 = hash_array(arr2)

    tm.assert_numpy_array_equal(result1, result2) 
Example #22
Source File: test_hashing.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def test_hash_array(series):
    arr = series.values
    tm.assert_numpy_array_equal(hash_array(arr), hash_array(arr)) 
Example #23
Source File: test_hashing.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def test_categorical_with_nan_consistency(self):
        c = pd.Categorical.from_codes(
            [-1, 0, 1, 2, 3, 4],
            categories=pd.date_range('2012-01-01', periods=5, name='B'))
        expected = hash_array(c, categorize=False)
        c = pd.Categorical.from_codes(
            [-1, 0],
            categories=[pd.Timestamp('2012-01-01')])
        result = hash_array(c, categorize=False)
        assert result[0] in expected
        assert result[1] in expected 
Example #24
Source File: test_hashing.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def test_hash_scalar(self):
        for val in [1, 1.4, 'A', b'A', u'A', pd.Timestamp("2012-01-01"),
                    pd.Timestamp("2012-01-01", tz='Europe/Brussels'),
                    datetime.datetime(2012, 1, 1),
                    pd.Timestamp("2012-01-01", tz='EST').to_pydatetime(),
                    pd.Timedelta('1 days'), datetime.timedelta(1),
                    pd.Period('2012-01-01', freq='D'), pd.Interval(0, 1),
                    np.nan, pd.NaT, None]:
            result = _hash_scalar(val)
            expected = hash_array(np.array([val], dtype=object),
                                  categorize=True)
            assert result[0] == expected[0] 
Example #25
Source File: test_hashing.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def test_hash_array_errors(self):

        for val in [5, 'foo', pd.Timestamp('20130101')]:
            pytest.raises(TypeError, hash_array, val) 
Example #26
Source File: test_hashing.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def test_hash_array_mixed(self):
        result1 = hash_array(np.array([3, 4, 'All']))
        result2 = hash_array(np.array(['3', '4', 'All']))
        result3 = hash_array(np.array([3, 4, 'All'], dtype=object))
        tm.assert_numpy_array_equal(result1, result2)
        tm.assert_numpy_array_equal(result1, result3) 
Example #27
Source File: test_hashing.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def test_hash_array(self):
        for name, s in self.df.iteritems():
            a = s.values
            tm.assert_numpy_array_equal(hash_array(a), hash_array(a)) 
Example #28
Source File: test_hashing.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_categorical_with_nan_consistency():
    c = pd.Categorical.from_codes(
        [-1, 0, 1, 2, 3, 4],
        categories=pd.date_range("2012-01-01", periods=5, name="B"))
    expected = hash_array(c, categorize=False)

    c = pd.Categorical.from_codes(
        [-1, 0],
        categories=[pd.Timestamp("2012-01-01")])
    result = hash_array(c, categorize=False)

    assert result[0] in expected
    assert result[1] in expected 
Example #29
Source File: test_hashing.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_hash_scalar(val):
    result = _hash_scalar(val)
    expected = hash_array(np.array([val], dtype=object), categorize=True)

    assert result[0] == expected[0] 
Example #30
Source File: test_hashing.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_hash_array_errors(val):
    msg = "must pass a ndarray-like"
    with pytest.raises(TypeError, match=msg):
        hash_array(val)