Python pandas.compat.StringIO() Examples

The following are 30 code examples of pandas.compat.StringIO(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module pandas.compat , or try the search function .
Example #1
Source File: test_usecols.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_usecols_with_multi_byte_characters(all_parsers, usecols):
    data = """あああ,いい,ううう,ええええ
0.056674973,8,True,a
2.613230982,2,False,b
3.568935038,7,False,a"""
    parser = all_parsers

    exp_data = {
        "あああ": {
            0: 0.056674972999999997,
            1: 2.6132309819999997,
            2: 3.5689350380000002
        },
        "いい": {0: 8, 1: 2, 2: 7}
    }
    expected = DataFrame(exp_data)

    result = parser.read_csv(StringIO(data), usecols=usecols)
    tm.assert_frame_equal(result, expected) 
Example #2
Source File: test_dtypes.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_dtype_per_column(all_parsers):
    parser = all_parsers
    data = """\
one,two
1,2.5
2,3.5
3,4.5
4,5.5"""
    expected = DataFrame([[1, "2.5"], [2, "3.5"], [3, "4.5"], [4, "5.5"]],
                         columns=["one", "two"])
    expected["one"] = expected["one"].astype(np.float64)
    expected["two"] = expected["two"].astype(object)

    result = parser.read_csv(StringIO(data), dtype={"one": np.float64,
                                                    1: str})
    tm.assert_frame_equal(result, expected) 
Example #3
Source File: test_usecols.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_usecols_with_parse_dates(all_parsers, usecols):
    # see gh-9755
    data = """a,b,c,d,e
0,1,20140101,0900,4
0,1,20140102,1000,4"""
    parser = all_parsers
    parse_dates = [[1, 2]]

    cols = {
        "a": [0, 0],
        "c_d": [
            Timestamp("2014-01-01 09:00:00"),
            Timestamp("2014-01-02 10:00:00")
        ]
    }
    expected = DataFrame(cols, columns=["c_d", "a"])
    result = parser.read_csv(StringIO(data), usecols=usecols,
                             parse_dates=parse_dates)
    tm.assert_frame_equal(result, expected) 
Example #4
Source File: test_usecols.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_usecols_with_parse_dates2(all_parsers):
    # see gh-13604
    parser = all_parsers
    data = """2008-02-07 09:40,1032.43
2008-02-07 09:50,1042.54
2008-02-07 10:00,1051.65"""

    names = ["date", "values"]
    usecols = names[:]
    parse_dates = [0]

    index = Index([Timestamp("2008-02-07 09:40"),
                   Timestamp("2008-02-07 09:50"),
                   Timestamp("2008-02-07 10:00")],
                  name="date")
    cols = {"values": [1032.43, 1042.54, 1051.65]}
    expected = DataFrame(cols, index=index)

    result = parser.read_csv(StringIO(data), parse_dates=parse_dates,
                             index_col=0, usecols=usecols,
                             header=None, names=names)
    tm.assert_frame_equal(result, expected) 
Example #5
Source File: test_usecols.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_usecols_with_parse_dates3(all_parsers):
    # see gh-14792
    parser = all_parsers
    data = """a,b,c,d,e,f,g,h,i,j
2016/09/21,1,1,2,3,4,5,6,7,8"""

    usecols = list("abcdefghij")
    parse_dates = [0]

    cols = {"a": Timestamp("2016-09-21"),
            "b": [1], "c": [1], "d": [2],
            "e": [3], "f": [4], "g": [5],
            "h": [6], "i": [7], "j": [8]}
    expected = DataFrame(cols, columns=usecols)

    result = parser.read_csv(StringIO(data), usecols=usecols,
                             parse_dates=parse_dates)
    tm.assert_frame_equal(result, expected) 
Example #6
Source File: test_dtypes.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_categorical_dtype_chunksize_infer_categories(all_parsers):
    # see gh-10153
    parser = all_parsers
    data = """a,b
1,a
1,b
1,b
2,c"""
    expecteds = [DataFrame({"a": [1, 1],
                            "b": Categorical(["a", "b"])}),
                 DataFrame({"a": [1, 2],
                            "b": Categorical(["b", "c"])},
                           index=[2, 3])]
    actuals = parser.read_csv(StringIO(data), dtype={"b": "category"},
                              chunksize=2)

    for actual, expected in zip(actuals, expecteds):
        tm.assert_frame_equal(actual, expected) 
Example #7
Source File: test_usecols.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_usecols_with_unicode_strings(all_parsers):
    # see gh-13219
    data = """AAA,BBB,CCC,DDD
0.056674973,8,True,a
2.613230982,2,False,b
3.568935038,7,False,a"""
    parser = all_parsers

    exp_data = {
        "AAA": {
            0: 0.056674972999999997,
            1: 2.6132309819999997,
            2: 3.5689350380000002
        },
        "BBB": {0: 8, 1: 2, 2: 7}
    }
    expected = DataFrame(exp_data)

    result = parser.read_csv(StringIO(data), usecols=[u"AAA", u"BBB"])
    tm.assert_frame_equal(result, expected) 
Example #8
Source File: test_usecols.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_usecols_with_single_byte_unicode_strings(all_parsers):
    # see gh-13219
    data = """A,B,C,D
0.056674973,8,True,a
2.613230982,2,False,b
3.568935038,7,False,a"""
    parser = all_parsers

    exp_data = {
        "A": {
            0: 0.056674972999999997,
            1: 2.6132309819999997,
            2: 3.5689350380000002
        },
        "B": {0: 8, 1: 2, 2: 7}
    }
    expected = DataFrame(exp_data)

    result = parser.read_csv(StringIO(data), usecols=[u"A", u"B"])
    tm.assert_frame_equal(result, expected) 
Example #9
Source File: test_dtypes.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_categorical_dtype_chunksize_explicit_categories(all_parsers):
    # see gh-10153
    parser = all_parsers
    data = """a,b
1,a
1,b
1,b
2,c"""
    cats = ["a", "b", "c"]
    expecteds = [DataFrame({"a": [1, 1],
                            "b": Categorical(["a", "b"],
                                             categories=cats)}),
                 DataFrame({"a": [1, 2],
                            "b": Categorical(["b", "c"],
                                             categories=cats)},
                           index=[2, 3])]
    dtype = CategoricalDtype(cats)
    actuals = parser.read_csv(StringIO(data), dtype={"b": dtype}, chunksize=2)

    for actual, expected in zip(actuals, expecteds):
        tm.assert_frame_equal(actual, expected) 
Example #10
Source File: test_dtypes.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_categorical_category_dtype(all_parsers, categories, ordered):
    parser = all_parsers
    data = """a,b
1,a
1,b
1,b
2,c"""
    expected = DataFrame({
        "a": [1, 1, 1, 2],
        "b": Categorical(["a", "b", "b", "c"],
                         categories=categories,
                         ordered=ordered)
    })

    dtype = {"b": CategoricalDtype(categories=categories,
                                   ordered=ordered)}
    result = parser.read_csv(StringIO(data), dtype=dtype)
    tm.assert_frame_equal(result, expected) 
Example #11
Source File: test_repr_info.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_repr(self):
        buf = StringIO()

        # small one
        foo = repr(self.frame)
        self.frame.info(verbose=False, buf=buf)

        # even smaller
        self.frame.reindex(columns=['A']).info(verbose=False, buf=buf)
        self.frame.reindex(columns=['A', 'B']).info(verbose=False, buf=buf)

        # exhausting cases in DataFrame.info

        # columns but no index
        no_index = DataFrame(columns=[0, 1, 3])
        foo = repr(no_index)  # noqa

        # no columns or index
        self.empty.info(buf=buf)

        df = DataFrame(["a\n\r\tb"], columns=["a\n\r\td"], index=["a\n\r\tf"])
        assert "\t" not in repr(df)
        assert "\r" not in repr(df)
        assert "a\n" not in repr(df) 
Example #12
Source File: test_transform.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_transform_casting():
    # 13046
    data = """
    idx     A         ID3              DATETIME
    0   B-028  b76cd912ff "2014-10-08 13:43:27"
    1   B-054  4a57ed0b02 "2014-10-08 14:26:19"
    2   B-076  1a682034f8 "2014-10-08 14:29:01"
    3   B-023  b76cd912ff "2014-10-08 18:39:34"
    4   B-023  f88g8d7sds "2014-10-08 18:40:18"
    5   B-033  b76cd912ff "2014-10-08 18:44:30"
    6   B-032  b76cd912ff "2014-10-08 18:46:00"
    7   B-037  b76cd912ff "2014-10-08 18:52:15"
    8   B-046  db959faf02 "2014-10-08 18:59:59"
    9   B-053  b76cd912ff "2014-10-08 19:17:48"
    10  B-065  b76cd912ff "2014-10-08 19:21:38"
    """
    df = pd.read_csv(StringIO(data), sep=r'\s+',
                     index_col=[0], parse_dates=['DATETIME'])

    result = df.groupby('ID3')['DATETIME'].transform(lambda x: x.diff())
    assert is_timedelta64_dtype(result.dtype)

    result = df[['ID3', 'DATETIME']].groupby('ID3').transform(
        lambda x: x.diff())
    assert is_timedelta64_dtype(result.DATETIME.dtype) 
Example #13
Source File: test_query_eval.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_query_with_nested_strings(self, parser, engine):
        skip_if_no_pandas_parser(parser)
        raw = """id          event          timestamp
        1   "page 1 load"   1/1/2014 0:00:01
        1   "page 1 exit"   1/1/2014 0:00:31
        2   "page 2 load"   1/1/2014 0:01:01
        2   "page 2 exit"   1/1/2014 0:01:31
        3   "page 3 load"   1/1/2014 0:02:01
        3   "page 3 exit"   1/1/2014 0:02:31
        4   "page 1 load"   2/1/2014 1:00:01
        4   "page 1 exit"   2/1/2014 1:00:31
        5   "page 2 load"   2/1/2014 1:01:01
        5   "page 2 exit"   2/1/2014 1:01:31
        6   "page 3 load"   2/1/2014 1:02:01
        6   "page 3 exit"   2/1/2014 1:02:31
        """
        df = pd.read_csv(StringIO(raw), sep=r'\s{2,}', engine='python',
                         parse_dates=['timestamp'])
        expected = df[df.event == '"page 1 load"']
        res = df.query("""'"page 1 load"' in event""", parser=parser,
                       engine=engine)
        assert_frame_equal(expected, res) 
Example #14
Source File: test_loc.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_loc_setitem_consistency_slice_column_len(self):
        # .loc[:,column] setting with slice == len of the column
        # GH10408
        data = """Level_0,,,Respondent,Respondent,Respondent,OtherCat,OtherCat
Level_1,,,Something,StartDate,EndDate,Yes/No,SomethingElse
Region,Site,RespondentID,,,,,
Region_1,Site_1,3987227376,A,5/25/2015 10:59,5/25/2015 11:22,Yes,
Region_1,Site_1,3980680971,A,5/21/2015 9:40,5/21/2015 9:52,Yes,Yes
Region_1,Site_2,3977723249,A,5/20/2015 8:27,5/20/2015 8:41,Yes,
Region_1,Site_2,3977723089,A,5/20/2015 8:33,5/20/2015 9:09,Yes,No"""

        df = pd.read_csv(StringIO(data), header=[0, 1], index_col=[0, 1, 2])
        df.loc[:, ('Respondent', 'StartDate')] = pd.to_datetime(df.loc[:, (
            'Respondent', 'StartDate')])
        df.loc[:, ('Respondent', 'EndDate')] = pd.to_datetime(df.loc[:, (
            'Respondent', 'EndDate')])
        df.loc[:, ('Respondent', 'Duration')] = df.loc[:, (
            'Respondent', 'EndDate')] - df.loc[:, ('Respondent', 'StartDate')]

        df.loc[:, ('Respondent', 'Duration')] = df.loc[:, (
            'Respondent', 'Duration')].astype('timedelta64[s]')
        expected = Series([1380, 720, 840, 2160.], index=df.index,
                          name=('Respondent', 'Duration'))
        tm.assert_series_equal(df[('Respondent', 'Duration')], expected) 
Example #15
Source File: test_repr_info.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_info_wide(self):
        from pandas import set_option, reset_option
        io = StringIO()
        df = DataFrame(np.random.randn(5, 101))
        df.info(buf=io)

        io = StringIO()
        df.info(buf=io, max_cols=101)
        rs = io.getvalue()
        assert len(rs.splitlines()) > 100
        xp = rs

        set_option('display.max_info_columns', 101)
        io = StringIO()
        df.info(buf=io)
        assert rs == xp
        reset_option('display.max_info_columns') 
Example #16
Source File: test_dtypes.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_empty_dtype(all_parsers, dtype, expected):
    # see gh-14712
    parser = all_parsers
    data = "a,b"

    result = parser.read_csv(StringIO(data), header=0, dtype=dtype)
    tm.assert_frame_equal(result, expected) 
Example #17
Source File: test_dtypes.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_numeric_dtype(all_parsers, dtype):
    data = "0\n1"
    parser = all_parsers
    expected = DataFrame([0, 1], dtype=dtype)

    result = parser.read_csv(StringIO(data), header=None, dtype=dtype)
    tm.assert_frame_equal(expected, result) 
Example #18
Source File: test_usecols.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_callable_usecols(all_parsers, usecols, expected):
    # see gh-14154
    data = """AaA,bBb,CCC,ddd
0.056674973,8,True,a
2.613230982,2,False,b
3.568935038,7,False,a"""
    parser = all_parsers

    result = parser.read_csv(StringIO(data), usecols=usecols)
    tm.assert_frame_equal(result, expected) 
Example #19
Source File: test_usecols.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_usecols(all_parsers, usecols):
    data = """\
a,b,c
1,2,3
4,5,6
7,8,9
10,11,12"""
    parser = all_parsers
    result = parser.read_csv(StringIO(data), usecols=usecols)

    expected = DataFrame([[2, 3], [5, 6], [8, 9],
                          [11, 12]], columns=["b", "c"])
    tm.assert_frame_equal(result, expected) 
Example #20
Source File: test_usecols.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_usecols_with_names(all_parsers):
    data = """\
a,b,c
1,2,3
4,5,6
7,8,9
10,11,12"""
    parser = all_parsers
    names = ["foo", "bar"]
    result = parser.read_csv(StringIO(data), names=names,
                             usecols=[1, 2], header=0)

    expected = DataFrame([[2, 3], [5, 6], [8, 9],
                          [11, 12]], columns=names)
    tm.assert_frame_equal(result, expected) 
Example #21
Source File: test_usecols.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_usecols_with_mixed_encoding_strings(all_parsers, usecols):
    data = """AAA,BBB,CCC,DDD
0.056674973,8,True,a
2.613230982,2,False,b
3.568935038,7,False,a"""
    parser = all_parsers

    with pytest.raises(ValueError, match=_msg_validate_usecols_arg):
        parser.read_csv(StringIO(data), usecols=usecols) 
Example #22
Source File: test_usecols.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_usecols_index_col_conflict2(all_parsers):
    # see gh-4201: test that index_col as integer reflects usecols
    parser = all_parsers
    data = "a,b,c,d\nA,a,1,one\nB,b,2,two"

    expected = DataFrame({"b": ["a", "b"], "c": [1, 2], "d": ("one", "two")})
    expected = expected.set_index(["b", "c"])

    result = parser.read_csv(StringIO(data), usecols=["b", "c", "d"],
                             index_col=["b", "c"])
    tm.assert_frame_equal(result, expected) 
Example #23
Source File: test_dtypes.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_dtype_with_converters(all_parsers):
    parser = all_parsers
    data = """a,b
1.1,2.2
1.2,2.3"""

    # Dtype spec ignored if converted specified.
    with tm.assert_produces_warning(ParserWarning):
        result = parser.read_csv(StringIO(data), dtype={"a": "i8"},
                                 converters={"a": lambda x: str(x)})
    expected = DataFrame({"a": ["1.1", "1.2"], "b": [2.2, 2.3]})
    tm.assert_frame_equal(result, expected) 
Example #24
Source File: test_dtypes.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_raise_on_passed_int_dtype_with_nas(all_parsers):
    # see gh-2631
    parser = all_parsers
    data = """YEAR, DOY, a
2001,106380451,10
2001,,11
2001,106380451,67"""

    msg = ("Integer column has NA values" if parser.engine == "c" else
           "Unable to convert column DOY")
    with pytest.raises(ValueError, match=msg):
        parser.read_csv(StringIO(data), dtype={"DOY": np.int64},
                        skipinitialspace=True) 
Example #25
Source File: test_dtypes.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_empty_with_dup_column_pass_dtype_by_indexes_warn(all_parsers):
    # see gh-9424
    parser = all_parsers
    expected = concat([Series([], name="one", dtype="u1"),
                       Series([], name="one.1", dtype="f")], axis=1)
    expected.index = expected.index.astype(object)

    with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
        data = ""
        result = parser.read_csv(StringIO(data), names=["one", "one"],
                                 dtype={0: "u1", 1: "f"})
        tm.assert_frame_equal(result, expected) 
Example #26
Source File: test_dtypes.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_empty_with_mangled_column_pass_dtype_by_indexes(all_parsers):
    parser = all_parsers

    data = "one,one"
    result = parser.read_csv(StringIO(data), dtype={0: "u1", 1: "f"})

    expected = DataFrame({"one": np.empty(0, dtype="u1"),
                          "one.1": np.empty(0, dtype="f")},
                         index=Index([], dtype=object))
    tm.assert_frame_equal(result, expected) 
Example #27
Source File: test_dtypes.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_empty_with_mangled_column_pass_dtype_by_names(all_parsers):
    parser = all_parsers

    data = "one,one"
    result = parser.read_csv(StringIO(data), dtype={"one": "u1", "one.1": "f"})

    expected = DataFrame({"one": np.empty(0, dtype="u1"),
                          "one.1": np.empty(0, dtype="f")},
                         index=Index([], dtype=object))
    tm.assert_frame_equal(result, expected) 
Example #28
Source File: test_dtypes.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_empty_with_multi_index_pass_dtype(all_parsers):
    parser = all_parsers

    data = "one,two,three"
    result = parser.read_csv(StringIO(data), index_col=["one", "two"],
                             dtype={"one": "u1", 1: "f8"})

    exp_idx = MultiIndex.from_arrays([np.empty(0, dtype="u1"),
                                      np.empty(0, dtype=np.float64)],
                                     names=["one", "two"])
    expected = DataFrame({"three": np.empty(0, dtype=np.object)},
                         index=exp_idx)
    tm.assert_frame_equal(result, expected) 
Example #29
Source File: test_dtypes.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_empty_with_index_pass_dtype(all_parsers):
    parser = all_parsers

    data = "one,two"
    result = parser.read_csv(StringIO(data), index_col=["one"],
                             dtype={"one": "u1", 1: "f"})

    expected = DataFrame({"two": np.empty(0, dtype="f")},
                         index=Index([], dtype="u1", name="one"))
    tm.assert_frame_equal(result, expected) 
Example #30
Source File: test_dtypes.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_empty_pass_dtype(all_parsers):
    parser = all_parsers

    data = "one,two"
    result = parser.read_csv(StringIO(data), dtype={"one": "u1"})

    expected = DataFrame({"one": np.empty(0, dtype="u1"),
                          "two": np.empty(0, dtype=np.object)},
                         index=Index([], dtype=object))
    tm.assert_frame_equal(result, expected)