Python pandas.Series.str() Examples

The following are 30 code examples of pandas.Series.str(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module pandas.Series , or try the search function .
Example #1
Source File: test_strings.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_isnumeric(self):
        # 0x00bc: ¼ VULGAR FRACTION ONE QUARTER
        # 0x2605: ★ not number
        # 0x1378: ፸ ETHIOPIC NUMBER SEVENTY
        # 0xFF13: 3 Em 3
        values = ['A', '3', u'¼', u'★', u'፸', u'3', 'four']
        s = Series(values)
        numeric_e = [False, True, True, False, True, True, False]
        decimal_e = [False, True, False, False, False, True, False]
        tm.assert_series_equal(s.str.isnumeric(), Series(numeric_e))
        tm.assert_series_equal(s.str.isdecimal(), Series(decimal_e))

        unicodes = [u'A', u'3', u'¼', u'★', u'፸', u'3', u'four']
        assert s.str.isnumeric().tolist() == [v.isnumeric() for v in unicodes]
        assert s.str.isdecimal().tolist() == [v.isdecimal() for v in unicodes]

        values = ['A', np.nan, u'¼', u'★', np.nan, u'3', 'four']
        s = Series(values)
        numeric_e = [False, np.nan, True, False, np.nan, True, False]
        decimal_e = [False, np.nan, False, False, np.nan, True, False]
        tm.assert_series_equal(s.str.isnumeric(), Series(numeric_e))
        tm.assert_series_equal(s.str.isdecimal(), Series(decimal_e)) 
Example #2
Source File: test_strings.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_iter(self):
        # GH3638
        strs = 'google', 'wikimedia', 'wikipedia', 'wikitravel'
        ds = Series(strs)

        for s in ds.str:
            # iter must yield a Series
            assert isinstance(s, Series)

            # indices of each yielded Series should be equal to the index of
            # the original Series
            tm.assert_index_equal(s.index, ds.index)

            for el in s:
                # each element of the series is either a basestring/str or nan
                assert isinstance(el, compat.string_types) or isna(el)

        # desired behavior is to iterate until everything would be nan on the
        # next iter so make sure the last element of the iterator was 'l' in
        # this case since 'wikitravel' is the longest string
        assert s.dropna().values.item() == 'l' 
Example #3
Source File: test_strings.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_wrap(self):
        # test values are: two words less than width, two words equal to width,
        # two words greater than width, one word less than width, one word
        # equal to width, one word greater than width, multiple tokens with
        # trailing whitespace equal to width
        values = Series([u('hello world'), u('hello world!'), u(
            'hello world!!'), u('abcdefabcde'), u('abcdefabcdef'), u(
                'abcdefabcdefa'), u('ab ab ab ab '), u('ab ab ab ab a'), u(
                    '\t')])

        # expected values
        xp = Series([u('hello world'), u('hello world!'), u('hello\nworld!!'),
                     u('abcdefabcde'), u('abcdefabcdef'), u('abcdefabcdef\na'),
                     u('ab ab ab ab'), u('ab ab ab ab\na'), u('')])

        rs = values.str.wrap(12, break_long_words=True)
        assert_series_equal(rs, xp)

        # test with pre and post whitespace (non-unicode), NaN, and non-ascii
        # Unicode
        values = Series(['  pre  ', np.nan, u('\xac\u20ac\U00008000 abadcafe')
                         ])
        xp = Series(['  pre', NA, u('\xac\u20ac\U00008000 ab\nadcafe')])
        rs = values.str.wrap(6)
        assert_series_equal(rs, xp) 
Example #4
Source File: test_strings.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_strip_lstrip_rstrip_mixed(self):
        # mixed
        mixed = Series(['  aa  ', NA, ' bb \t\n', True, datetime.today(), None,
                        1, 2.])

        rs = Series(mixed).str.strip()
        xp = Series(['aa', NA, 'bb', NA, NA, NA, NA, NA])

        assert isinstance(rs, Series)
        tm.assert_almost_equal(rs, xp)

        rs = Series(mixed).str.lstrip()
        xp = Series(['aa  ', NA, 'bb \t\n', NA, NA, NA, NA, NA])

        assert isinstance(rs, Series)
        tm.assert_almost_equal(rs, xp)

        rs = Series(mixed).str.rstrip()
        xp = Series(['  aa', NA, ' bb', NA, NA, NA, NA, NA])

        assert isinstance(rs, Series)
        tm.assert_almost_equal(rs, xp) 
Example #5
Source File: test_strings.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_partition_deprecation(self):
        # GH 22676; depr kwarg "pat" in favor of "sep"
        values = Series(['a_b_c', 'c_d_e', NA, 'f_g_h'])

        # str.partition
        # using sep -> no warning
        expected = values.str.partition(sep='_')
        with tm.assert_produces_warning(FutureWarning):
            result = values.str.partition(pat='_')
            tm.assert_frame_equal(result, expected)

        # str.rpartition
        # using sep -> no warning
        expected = values.str.rpartition(sep='_')
        with tm.assert_produces_warning(FutureWarning):
            result = values.str.rpartition(pat='_')
            tm.assert_frame_equal(result, expected) 
Example #6
Source File: test_strings.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_partition_with_name(self):
        # GH 12617

        s = Series(['a,b', 'c,d'], name='xxx')
        res = s.str.partition(',')
        exp = DataFrame({0: ['a', 'c'], 1: [',', ','], 2: ['b', 'd']})
        tm.assert_frame_equal(res, exp)

        # should preserve name
        res = s.str.partition(',', expand=False)
        exp = Series([('a', ',', 'b'), ('c', ',', 'd')], name='xxx')
        tm.assert_series_equal(res, exp)

        idx = Index(['a,b', 'c,d'], name='xxx')
        res = idx.str.partition(',')
        exp = MultiIndex.from_tuples([('a', ',', 'b'), ('c', ',', 'd')])
        assert res.nlevels == 3
        tm.assert_index_equal(res, exp)

        # should preserve name
        res = idx.str.partition(',', expand=False)
        exp = Index(np.array([('a', ',', 'b'), ('c', ',', 'd')]), name='xxx')
        assert res.nlevels == 1
        tm.assert_index_equal(res, exp) 
Example #7
Source File: test_strings.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_str_cat_all_na(self, box, other):
        # GH 24044

        # check that all NaNs in caller / target work
        s = Index(['a', 'b', 'c', 'd'])
        s = s if box == Index else Series(s, index=s)
        t = other([np.nan] * 4, dtype=object)
        # add index of s for alignment
        t = t if other == Index else Series(t, index=s)

        # all-NA target
        if box == Series:
            expected = Series([np.nan] * 4, index=s.index, dtype=object)
        else:  # box == Index
            expected = Index([np.nan] * 4, dtype=object)
        result = s.str.cat(t, join='left')
        assert_series_or_index_equal(result, expected)

        # all-NA caller (only for Series)
        if other == Series:
            expected = Series([np.nan] * 4, dtype=object, index=t.index)
            result = t.str.cat(s, join='left')
            tm.assert_series_equal(result, expected) 
Example #8
Source File: test_strings.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_cat_on_filtered_index(self):
        df = DataFrame(index=MultiIndex.from_product(
            [[2011, 2012], [1, 2, 3]], names=['year', 'month']))

        df = df.reset_index()
        df = df[df.month > 1]

        str_year = df.year.astype('str')
        str_month = df.month.astype('str')
        str_both = str_year.str.cat(str_month, sep=' ')

        assert str_both.loc[1] == '2011 2'

        str_multiple = str_year.str.cat([str_month, str_month], sep=' ')

        assert str_multiple.loc[1] == '2011 2 2' 
Example #9
Source File: test_strings.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_contains_for_object_category(self):
        # gh 22158

        # na for category
        values = Series(["a", "b", "c", "a", np.nan], dtype="category")
        result = values.str.contains('a', na=True)
        expected = Series([True, False, False, True, True])
        tm.assert_series_equal(result, expected)

        result = values.str.contains('a', na=False)
        expected = Series([True, False, False, True, False])
        tm.assert_series_equal(result, expected)

        # na for objects
        values = Series(["a", "b", "c", "a", np.nan])
        result = values.str.contains('a', na=True)
        expected = Series([True, False, False, True, True])
        tm.assert_series_equal(result, expected)

        result = values.str.contains('a', na=False)
        expected = Series([True, False, False, True, False])
        tm.assert_series_equal(result, expected) 
Example #10
Source File: test_strings.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_split_with_name(self):
        # GH 12617

        # should preserve name
        s = Series(['a,b', 'c,d'], name='xxx')
        res = s.str.split(',')
        exp = Series([['a', 'b'], ['c', 'd']], name='xxx')
        tm.assert_series_equal(res, exp)

        res = s.str.split(',', expand=True)
        exp = DataFrame([['a', 'b'], ['c', 'd']])
        tm.assert_frame_equal(res, exp)

        idx = Index(['a,b', 'c,d'], name='xxx')
        res = idx.str.split(',')
        exp = Index([['a', 'b'], ['c', 'd']], name='xxx')
        assert res.nlevels == 1
        tm.assert_index_equal(res, exp)

        res = idx.str.split(',', expand=True)
        exp = MultiIndex.from_tuples([('a', 'b'), ('c', 'd')])
        assert res.nlevels == 2
        tm.assert_index_equal(res, exp) 
Example #11
Source File: test_strings.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_title(self):
        values = Series(["FOO", "BAR", NA, "Blah", "blurg"])

        result = values.str.title()
        exp = Series(["Foo", "Bar", NA, "Blah", "Blurg"])
        tm.assert_series_equal(result, exp)

        # mixed
        mixed = Series(["FOO", NA, "bar", True, datetime.today(), "blah", None,
                        1, 2.])
        mixed = mixed.str.title()
        exp = Series(["Foo", NA, "Bar", NA, NA, "Blah", NA, NA, NA])
        tm.assert_almost_equal(mixed, exp)

        # unicode
        values = Series([u("FOO"), NA, u("bar"), u("Blurg")])

        results = values.str.title()
        exp = Series([u("Foo"), NA, u("Bar"), u("Blurg")])

        tm.assert_series_equal(results, exp) 
Example #12
Source File: test_strings.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_capitalize(self):
        values = Series(["FOO", "BAR", NA, "Blah", "blurg"])
        result = values.str.capitalize()
        exp = Series(["Foo", "Bar", NA, "Blah", "Blurg"])
        tm.assert_series_equal(result, exp)

        # mixed
        mixed = Series(["FOO", NA, "bar", True, datetime.today(), "blah", None,
                        1, 2.])
        mixed = mixed.str.capitalize()
        exp = Series(["Foo", NA, "Bar", NA, NA, "Blah", NA, NA, NA])
        tm.assert_almost_equal(mixed, exp)

        # unicode
        values = Series([u("FOO"), NA, u("bar"), u("Blurg")])
        results = values.str.capitalize()
        exp = Series([u("Foo"), NA, u("Bar"), u("Blurg")])
        tm.assert_series_equal(results, exp) 
Example #13
Source File: test_strings.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_replace_literal(self):
        # GH16808 literal replace (regex=False vs regex=True)
        values = Series(['f.o', 'foo', NA])
        exp = Series(['bao', 'bao', NA])
        result = values.str.replace('f.', 'ba')
        tm.assert_series_equal(result, exp)

        exp = Series(['bao', 'foo', NA])
        result = values.str.replace('f.', 'ba', regex=False)
        tm.assert_series_equal(result, exp)

        # Cannot do a literal replace if given a callable repl or compiled
        # pattern
        callable_repl = lambda m: m.group(0).swapcase()
        compiled_pat = re.compile('[a-z][A-Z]{2}')

        pytest.raises(ValueError, values.str.replace, 'abc', callable_repl,
                      regex=False)
        pytest.raises(ValueError, values.str.replace, compiled_pat, '',
                      regex=False) 
Example #14
Source File: test_strings.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_zfill(self):
        values = Series(['1', '22', 'aaa', '333', '45678'])

        result = values.str.zfill(5)
        expected = Series(['00001', '00022', '00aaa', '00333', '45678'])
        tm.assert_series_equal(result, expected)
        expected = np.array([v.zfill(5) for v in values.values],
                            dtype=np.object_)
        tm.assert_numpy_array_equal(result.values, expected)

        result = values.str.zfill(3)
        expected = Series(['001', '022', 'aaa', '333', '45678'])
        tm.assert_series_equal(result, expected)
        expected = np.array([v.zfill(3) for v in values.values],
                            dtype=np.object_)
        tm.assert_numpy_array_equal(result.values, expected)

        values = Series(['1', np.nan, 'aaa', np.nan, '45678'])
        result = values.str.zfill(5)
        expected = Series(['00001', np.nan, '00aaa', np.nan, '45678'])
        tm.assert_series_equal(result, expected) 
Example #15
Source File: test_strings.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_pad_fillchar(self):

        values = Series(['a', 'b', NA, 'c', NA, 'eeeeee'])

        result = values.str.pad(5, side='left', fillchar='X')
        exp = Series(['XXXXa', 'XXXXb', NA, 'XXXXc', NA, 'eeeeee'])
        tm.assert_almost_equal(result, exp)

        result = values.str.pad(5, side='right', fillchar='X')
        exp = Series(['aXXXX', 'bXXXX', NA, 'cXXXX', NA, 'eeeeee'])
        tm.assert_almost_equal(result, exp)

        result = values.str.pad(5, side='both', fillchar='X')
        exp = Series(['XXaXX', 'XXbXX', NA, 'XXcXX', NA, 'eeeeee'])
        tm.assert_almost_equal(result, exp)

        msg = "fillchar must be a character, not str"
        with pytest.raises(TypeError, match=msg):
            result = values.str.pad(5, fillchar='XY')

        msg = "fillchar must be a character, not int"
        with pytest.raises(TypeError, match=msg):
            result = values.str.pad(5, fillchar=5) 
Example #16
Source File: test_strings.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_find_nan(self):
        values = Series(['ABCDEFG', np.nan, 'DEFGHIJEF', np.nan, 'XXXX'])
        result = values.str.find('EF')
        tm.assert_series_equal(result, Series([4, np.nan, 1, np.nan, -1]))

        result = values.str.rfind('EF')
        tm.assert_series_equal(result, Series([4, np.nan, 7, np.nan, -1]))

        result = values.str.find('EF', 3)
        tm.assert_series_equal(result, Series([4, np.nan, 7, np.nan, -1]))

        result = values.str.rfind('EF', 3)
        tm.assert_series_equal(result, Series([4, np.nan, 7, np.nan, -1]))

        result = values.str.find('EF', 3, 6)
        tm.assert_series_equal(result, Series([4, np.nan, -1, np.nan, -1]))

        result = values.str.rfind('EF', 3, 6)
        tm.assert_series_equal(result, Series([4, np.nan, -1, np.nan, -1])) 
Example #17
Source File: test_strings.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_swapcase(self):
        values = Series(["FOO", "BAR", NA, "Blah", "blurg"])
        result = values.str.swapcase()
        exp = Series(["foo", "bar", NA, "bLAH", "BLURG"])
        tm.assert_series_equal(result, exp)

        # mixed
        mixed = Series(["FOO", NA, "bar", True, datetime.today(), "Blah", None,
                        1, 2.])
        mixed = mixed.str.swapcase()
        exp = Series(["foo", NA, "BAR", NA, NA, "bLAH", NA, NA, NA])
        tm.assert_almost_equal(mixed, exp)

        # unicode
        values = Series([u("FOO"), NA, u("bar"), u("Blurg")])
        results = values.str.swapcase()
        exp = Series([u("foo"), NA, u("BAR"), u("bLURG")])
        tm.assert_series_equal(results, exp) 
Example #18
Source File: test_strings.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_findall(self):
        values = Series(['fooBAD__barBAD', NA, 'foo', 'BAD'])

        result = values.str.findall('BAD[_]*')
        exp = Series([['BAD__', 'BAD'], NA, [], ['BAD']])
        tm.assert_almost_equal(result, exp)

        # mixed
        mixed = Series(['fooBAD__barBAD', NA, 'foo', True, datetime.today(),
                        'BAD', None, 1, 2.])

        rs = Series(mixed).str.findall('BAD[_]*')
        xp = Series([['BAD__', 'BAD'], NA, [], NA, NA, ['BAD'], NA, NA, NA])

        assert isinstance(rs, Series)
        tm.assert_almost_equal(rs, xp)

        # unicode
        values = Series([u('fooBAD__barBAD'), NA, u('foo'), u('BAD')])

        result = values.str.findall('BAD[_]*')
        exp = Series([[u('BAD__'), u('BAD')], NA, [], [u('BAD')]])
        tm.assert_almost_equal(result, exp) 
Example #19
Source File: test_strings.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_extractall_stringindex(self):
        s = Series(["a1a2", "b1", "c1"], name='xxx')
        res = s.str.extractall(r"[ab](?P<digit>\d)")
        exp_idx = MultiIndex.from_tuples([(0, 0), (0, 1), (1, 0)],
                                         names=[None, 'match'])
        exp = DataFrame({'digit': ["1", "2", "1"]}, index=exp_idx)
        tm.assert_frame_equal(res, exp)

        # index should return the same result as the default index without name
        # thus index.name doesn't affect to the result
        for idx in [Index(["a1a2", "b1", "c1"]),
                    Index(["a1a2", "b1", "c1"], name='xxx')]:

            res = idx.str.extractall(r"[ab](?P<digit>\d)")
            tm.assert_frame_equal(res, exp)

        s = Series(["a1a2", "b1", "c1"], name='s_name',
                   index=Index(["XX", "yy", "zz"], name='idx_name'))
        res = s.str.extractall(r"[ab](?P<digit>\d)")
        exp_idx = MultiIndex.from_tuples([("XX", 0), ("XX", 1), ("yy", 0)],
                                         names=["idx_name", 'match'])
        exp = DataFrame({'digit': ["1", "2", "1"]}, index=exp_idx)
        tm.assert_frame_equal(res, exp) 
Example #20
Source File: test_strings.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_len(self):
        values = Series(['foo', 'fooo', 'fooooo', np.nan, 'fooooooo'])

        result = values.str.len()
        exp = values.map(lambda x: len(x) if notna(x) else NA)
        tm.assert_series_equal(result, exp)

        # mixed
        mixed = Series(['a_b', NA, 'asdf_cas_asdf', True, datetime.today(),
                        'foo', None, 1, 2.])

        rs = Series(mixed).str.len()
        xp = Series([3, NA, 13, NA, NA, 3, NA, NA, NA])

        assert isinstance(rs, Series)
        tm.assert_almost_equal(rs, xp)

        # unicode
        values = Series([u('foo'), u('fooo'), u('fooooo'), np.nan, u(
            'fooooooo')])

        result = values.str.len()
        exp = values.map(lambda x: len(x) if notna(x) else NA)
        tm.assert_series_equal(result, exp) 
Example #21
Source File: test_strings.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_extract_index_one_two_groups(self):
        s = Series(['a3', 'b3', 'd4c2'], index=["A3", "B3", "D4"],
                   name='series_name')
        r = s.index.str.extract(r'([A-Z])', expand=True)
        e = DataFrame(['A', "B", "D"])
        tm.assert_frame_equal(r, e)

        # Prior to v0.18.0, index.str.extract(regex with one group)
        # returned Index. With more than one group, extract raised an
        # error (GH9980). Now extract always returns DataFrame.
        r = s.index.str.extract(
            r'(?P<letter>[A-Z])(?P<digit>[0-9])', expand=True)
        e_list = [
            ("A", "3"),
            ("B", "3"),
            ("D", "4"),
        ]
        e = DataFrame(e_list, columns=["letter", "digit"])
        tm.assert_frame_equal(r, e) 
Example #22
Source File: test_strings.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_join(self):
        values = Series(['a_b_c', 'c_d_e', np.nan, 'f_g_h'])
        result = values.str.split('_').str.join('_')
        tm.assert_series_equal(values, result)

        # mixed
        mixed = Series(['a_b', NA, 'asdf_cas_asdf', True, datetime.today(),
                        'foo', None, 1, 2.])

        rs = Series(mixed).str.split('_').str.join('_')
        xp = Series(['a_b', NA, 'asdf_cas_asdf', NA, NA, 'foo', NA, NA, NA])

        assert isinstance(rs, Series)
        tm.assert_almost_equal(rs, xp)

        # unicode
        values = Series([u('a_b_c'), u('c_d_e'), np.nan, u('f_g_h')])
        result = values.str.split('_').str.join('_')
        tm.assert_series_equal(values, result) 
Example #23
Source File: test_strings.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_get_dummies(self):
        s = Series(['a|b', 'a|c', np.nan])
        result = s.str.get_dummies('|')
        expected = DataFrame([[1, 1, 0], [1, 0, 1], [0, 0, 0]],
                             columns=list('abc'))
        tm.assert_frame_equal(result, expected)

        s = Series(['a;b', 'a', 7])
        result = s.str.get_dummies(';')
        expected = DataFrame([[0, 1, 1], [0, 1, 0], [1, 0, 0]],
                             columns=list('7ab'))
        tm.assert_frame_equal(result, expected)

        # GH9980, GH8028
        idx = Index(['a|b', 'a|c', 'b|c'])
        result = idx.str.get_dummies('|')

        expected = MultiIndex.from_tuples([(1, 1, 0), (1, 0, 1),
                                           (0, 1, 1)], names=('a', 'b', 'c'))
        tm.assert_index_equal(result, expected) 
Example #24
Source File: test_strings.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_rsplit_to_multiindex_expand(self):
        idx = Index(['nosplit', 'alsonosplit'])
        result = idx.str.rsplit('_', expand=True)
        exp = idx
        tm.assert_index_equal(result, exp)
        assert result.nlevels == 1

        idx = Index(['some_equal_splits', 'with_no_nans'])
        result = idx.str.rsplit('_', expand=True)
        exp = MultiIndex.from_tuples([('some', 'equal', 'splits'), (
            'with', 'no', 'nans')])
        tm.assert_index_equal(result, exp)
        assert result.nlevels == 3

        idx = Index(['some_equal_splits', 'with_no_nans'])
        result = idx.str.rsplit('_', expand=True, n=1)
        exp = MultiIndex.from_tuples([('some_equal', 'splits'),
                                      ('with_no', 'nans')])
        tm.assert_index_equal(result, exp)
        assert result.nlevels == 2 
Example #25
Source File: test_strings.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_get_complex_nested(self, to_type):
        values = Series([to_type([to_type([1, 2])])])

        result = values.str.get(0)
        expected = Series([to_type([1, 2])])
        tm.assert_series_equal(result, expected)

        result = values.str.get(1)
        expected = Series([np.nan])
        tm.assert_series_equal(result, expected) 
Example #26
Source File: test_strings.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_get_complex(self):
        # GH 20671, getting value not in dict raising `KeyError`
        values = Series([(1, 2, 3), [1, 2, 3], {1, 2, 3},
                         {1: 'a', 2: 'b', 3: 'c'}])

        result = values.str.get(1)
        expected = Series([2, 2, np.nan, 'a'])
        tm.assert_series_equal(result, expected)

        result = values.str.get(-1)
        expected = Series([3, 3, np.nan, np.nan])
        tm.assert_series_equal(result, expected) 
Example #27
Source File: test_strings.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_strip_lstrip_rstrip_unicode(self):
        # unicode
        values = Series([u('  aa   '), u(' bb \n'), NA, u('cc  ')])

        result = values.str.strip()
        exp = Series([u('aa'), u('bb'), NA, u('cc')])
        tm.assert_series_equal(result, exp)

        result = values.str.lstrip()
        exp = Series([u('aa   '), u('bb \n'), NA, u('cc  ')])
        tm.assert_series_equal(result, exp)

        result = values.str.rstrip()
        exp = Series([u('  aa'), u(' bb'), NA, u('cc')])
        tm.assert_series_equal(result, exp) 
Example #28
Source File: test_strings.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_strip_lstrip_rstrip(self):
        values = Series(['  aa   ', ' bb \n', NA, 'cc  '])

        result = values.str.strip()
        exp = Series(['aa', 'bb', NA, 'cc'])
        tm.assert_series_equal(result, exp)

        result = values.str.lstrip()
        exp = Series(['aa   ', 'bb \n', NA, 'cc  '])
        tm.assert_series_equal(result, exp)

        result = values.str.rstrip()
        exp = Series(['  aa', ' bb', NA, 'cc'])
        tm.assert_series_equal(result, exp) 
Example #29
Source File: test_strings.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_pad_width(self, f):
        # see gh-13598
        s = Series(['1', '22', 'a', 'bb'])
        msg = "width must be of integer type, not*"

        with pytest.raises(TypeError, match=msg):
            getattr(s.str, f)('f') 
Example #30
Source File: test_strings.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_get(self):
        values = Series(['a_b_c', 'c_d_e', np.nan, 'f_g_h'])

        result = values.str.split('_').str.get(1)
        expected = Series(['b', 'd', np.nan, 'g'])
        tm.assert_series_equal(result, expected)

        # mixed
        mixed = Series(['a_b_c', NA, 'c_d_e', True, datetime.today(), None, 1,
                        2.])

        rs = Series(mixed).str.split('_').str.get(1)
        xp = Series(['b', NA, 'd', NA, NA, NA, NA, NA])

        assert isinstance(rs, Series)
        tm.assert_almost_equal(rs, xp)

        # unicode
        values = Series([u('a_b_c'), u('c_d_e'), np.nan, u('f_g_h')])

        result = values.str.split('_').str.get(1)
        expected = Series([u('b'), u('d'), np.nan, u('g')])
        tm.assert_series_equal(result, expected)

        # bounds testing
        values = Series(['1_2_3_4_5', '6_7_8_9_10', '11_12'])

        # positive index
        result = values.str.split('_').str.get(2)
        expected = Series(['3', '8', np.nan])
        tm.assert_series_equal(result, expected)

        # negative index
        result = values.str.split('_').str.get(-3)
        expected = Series(['3', '8', np.nan])
        tm.assert_series_equal(result, expected)