Python pandas.core.frame.Series() Examples

The following are 30 code examples of pandas.core.frame.Series(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module pandas.core.frame , or try the search function .
Example #1
Source File: test_stata.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_read_write_dta10(self, version):
        original = DataFrame(data=[["string", "object", 1, 1.1,
                                    np.datetime64('2003-12-25')]],
                             columns=['string', 'object', 'integer',
                                      'floating', 'datetime'])
        original["object"] = Series(original["object"], dtype=object)
        original.index.name = 'index'
        original.index = original.index.astype(np.int32)
        original['integer'] = original['integer'].astype(np.int32)

        with tm.ensure_clean() as path:
            original.to_stata(path, {'datetime': 'tc'}, version=version)
            written_and_read_again = self.read_dta(path)
            # original.index is np.int32, read index is np.int64
            tm.assert_frame_equal(written_and_read_again.set_index('index'),
                                  original, check_index_type=False) 
Example #2
Source File: test_stata.py    From elasticintel with GNU General Public License v3.0 6 votes vote down vote up
def test_read_write_dta10(self):
        original = DataFrame(data=[["string", "object", 1, 1.1,
                                    np.datetime64('2003-12-25')]],
                             columns=['string', 'object', 'integer',
                                      'floating', 'datetime'])
        original["object"] = Series(original["object"], dtype=object)
        original.index.name = 'index'
        original.index = original.index.astype(np.int32)
        original['integer'] = original['integer'].astype(np.int32)

        with tm.ensure_clean() as path:
            original.to_stata(path, {'datetime': 'tc'})
            written_and_read_again = self.read_dta(path)
            # original.index is np.int32, readed index is np.int64
            tm.assert_frame_equal(written_and_read_again.set_index('index'),
                                  original, check_index_type=False) 
Example #3
Source File: test_stata.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 6 votes vote down vote up
def test_minimal_size_col(self):
        str_lens = (1, 100, 244)
        s = {}
        for str_len in str_lens:
            s['s' + str(str_len)] = Series(['a' * str_len,
                                            'b' * str_len, 'c' * str_len])
        original = DataFrame(s)
        with tm.ensure_clean() as path:
            original.to_stata(path, write_index=False)

            with StataReader(path) as sr:
                typlist = sr.typlist
                variables = sr.varlist
                formats = sr.fmtlist
                for variable, fmt, typ in zip(variables, formats, typlist):
                    assert int(variable[1:]) == int(fmt[1:-1])
                    assert int(variable[1:]) == typ 
Example #4
Source File: test_stata.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 6 votes vote down vote up
def test_large_value_conversion(self):
        s0 = Series([1, 99], dtype=np.int8)
        s1 = Series([1, 127], dtype=np.int8)
        s2 = Series([1, 2 ** 15 - 1], dtype=np.int16)
        s3 = Series([1, 2 ** 63 - 1], dtype=np.int64)
        original = DataFrame({'s0': s0, 's1': s1, 's2': s2, 's3': s3})
        original.index.name = 'index'
        with tm.ensure_clean() as path:
            with tm.assert_produces_warning(PossiblePrecisionLoss):
                original.to_stata(path)

            written_and_read_again = self.read_dta(path)
            modified = original.copy()
            modified['s1'] = Series(modified['s1'], dtype=np.int16)
            modified['s2'] = Series(modified['s2'], dtype=np.int32)
            modified['s3'] = Series(modified['s3'], dtype=np.float64)
            tm.assert_frame_equal(written_and_read_again.set_index('index'),
                                  modified) 
Example #5
Source File: test_stata.py    From elasticintel with GNU General Public License v3.0 6 votes vote down vote up
def test_large_value_conversion(self):
        s0 = Series([1, 99], dtype=np.int8)
        s1 = Series([1, 127], dtype=np.int8)
        s2 = Series([1, 2 ** 15 - 1], dtype=np.int16)
        s3 = Series([1, 2 ** 63 - 1], dtype=np.int64)
        original = DataFrame({'s0': s0, 's1': s1, 's2': s2, 's3': s3})
        original.index.name = 'index'
        with tm.ensure_clean() as path:
            with tm.assert_produces_warning(PossiblePrecisionLoss):
                original.to_stata(path)

            written_and_read_again = self.read_dta(path)
            modified = original.copy()
            modified['s1'] = Series(modified['s1'], dtype=np.int16)
            modified['s2'] = Series(modified['s2'], dtype=np.int32)
            modified['s3'] = Series(modified['s3'], dtype=np.float64)
            tm.assert_frame_equal(written_and_read_again.set_index('index'),
                                  modified) 
Example #6
Source File: test_stata.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 6 votes vote down vote up
def test_read_write_dta10(self, version):
        original = DataFrame(data=[["string", "object", 1, 1.1,
                                    np.datetime64('2003-12-25')]],
                             columns=['string', 'object', 'integer',
                                      'floating', 'datetime'])
        original["object"] = Series(original["object"], dtype=object)
        original.index.name = 'index'
        original.index = original.index.astype(np.int32)
        original['integer'] = original['integer'].astype(np.int32)

        with tm.ensure_clean() as path:
            original.to_stata(path, {'datetime': 'tc'}, version=version)
            written_and_read_again = self.read_dta(path)
            # original.index is np.int32, read index is np.int64
            tm.assert_frame_equal(written_and_read_again.set_index('index'),
                                  original, check_index_type=False) 
Example #7
Source File: test_stata.py    From elasticintel with GNU General Public License v3.0 6 votes vote down vote up
def test_minimal_size_col(self):
        str_lens = (1, 100, 244)
        s = {}
        for str_len in str_lens:
            s['s' + str(str_len)] = Series(['a' * str_len,
                                            'b' * str_len, 'c' * str_len])
        original = DataFrame(s)
        with tm.ensure_clean() as path:
            original.to_stata(path, write_index=False)

            with StataReader(path) as sr:
                typlist = sr.typlist
                variables = sr.varlist
                formats = sr.fmtlist
                for variable, fmt, typ in zip(variables, formats, typlist):
                    assert int(variable[1:]) == int(fmt[1:-1])
                    assert int(variable[1:]) == typ 
Example #8
Source File: magic.py    From ipython-cypher with GNU General Public License v2.0 6 votes vote down vote up
def _persist_dataframe(self, raw, conn, user_ns):
        if not DataFrame:
            raise ImportError("Must `pip install pandas` to use DataFrames")
        pieces = raw.split()
        if len(pieces) != 2:
            raise SyntaxError(
                "Format: %%cypher [connection] persist <DataFrameName>"
            )
        frame_name = pieces[1].strip(';')
        frame = eval(frame_name, user_ns)
        if not isinstance(frame, DataFrame) and not isinstance(frame, Series):
            raise TypeError(
                '%s is not a Pandas DataFrame or Series' % frame_name
            )
        table_name = frame_name.lower()
        table_name = self._legal_cypher_identifier.search(table_name).group(0)
        frame.to_sql(table_name, conn.session.engine)
        return 'Persisted %s' % table_name 
Example #9
Source File: test_stata.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def test_minimal_size_col(self):
        str_lens = (1, 100, 244)
        s = {}
        for str_len in str_lens:
            s['s' + str(str_len)] = Series(['a' * str_len,
                                            'b' * str_len, 'c' * str_len])
        original = DataFrame(s)
        with tm.ensure_clean() as path:
            original.to_stata(path, write_index=False)

            with StataReader(path) as sr:
                typlist = sr.typlist
                variables = sr.varlist
                formats = sr.fmtlist
                for variable, fmt, typ in zip(variables, formats, typlist):
                    assert int(variable[1:]) == int(fmt[1:-1])
                    assert int(variable[1:]) == typ 
Example #10
Source File: test_stata.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def test_large_value_conversion(self):
        s0 = Series([1, 99], dtype=np.int8)
        s1 = Series([1, 127], dtype=np.int8)
        s2 = Series([1, 2 ** 15 - 1], dtype=np.int16)
        s3 = Series([1, 2 ** 63 - 1], dtype=np.int64)
        original = DataFrame({'s0': s0, 's1': s1, 's2': s2, 's3': s3})
        original.index.name = 'index'
        with tm.ensure_clean() as path:
            with tm.assert_produces_warning(PossiblePrecisionLoss):
                original.to_stata(path)

            written_and_read_again = self.read_dta(path)
            modified = original.copy()
            modified['s1'] = Series(modified['s1'], dtype=np.int16)
            modified['s2'] = Series(modified['s2'], dtype=np.int32)
            modified['s3'] = Series(modified['s3'], dtype=np.float64)
            tm.assert_frame_equal(written_and_read_again.set_index('index'),
                                  modified) 
Example #11
Source File: test_stata.py    From twitter-stock-recommendation with MIT License 6 votes vote down vote up
def test_read_write_dta10(self, version):
        original = DataFrame(data=[["string", "object", 1, 1.1,
                                    np.datetime64('2003-12-25')]],
                             columns=['string', 'object', 'integer',
                                      'floating', 'datetime'])
        original["object"] = Series(original["object"], dtype=object)
        original.index.name = 'index'
        original.index = original.index.astype(np.int32)
        original['integer'] = original['integer'].astype(np.int32)

        with tm.ensure_clean() as path:
            original.to_stata(path, {'datetime': 'tc'}, version=version)
            written_and_read_again = self.read_dta(path)
            # original.index is np.int32, read index is np.int64
            tm.assert_frame_equal(written_and_read_again.set_index('index'),
                                  original, check_index_type=False) 
Example #12
Source File: test_stata.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def test_read_write_dta10(self, version):
        original = DataFrame(data=[["string", "object", 1, 1.1,
                                    np.datetime64('2003-12-25')]],
                             columns=['string', 'object', 'integer',
                                      'floating', 'datetime'])
        original["object"] = Series(original["object"], dtype=object)
        original.index.name = 'index'
        original.index = original.index.astype(np.int32)
        original['integer'] = original['integer'].astype(np.int32)

        with tm.ensure_clean() as path:
            original.to_stata(path, {'datetime': 'tc'}, version=version)
            written_and_read_again = self.read_dta(path)
            # original.index is np.int32, read index is np.int64
            tm.assert_frame_equal(written_and_read_again.set_index('index'),
                                  original, check_index_type=False) 
Example #13
Source File: test_stata.py    From twitter-stock-recommendation with MIT License 6 votes vote down vote up
def test_large_value_conversion(self):
        s0 = Series([1, 99], dtype=np.int8)
        s1 = Series([1, 127], dtype=np.int8)
        s2 = Series([1, 2 ** 15 - 1], dtype=np.int16)
        s3 = Series([1, 2 ** 63 - 1], dtype=np.int64)
        original = DataFrame({'s0': s0, 's1': s1, 's2': s2, 's3': s3})
        original.index.name = 'index'
        with tm.ensure_clean() as path:
            with tm.assert_produces_warning(PossiblePrecisionLoss):
                original.to_stata(path)

            written_and_read_again = self.read_dta(path)
            modified = original.copy()
            modified['s1'] = Series(modified['s1'], dtype=np.int16)
            modified['s2'] = Series(modified['s2'], dtype=np.int32)
            modified['s3'] = Series(modified['s3'], dtype=np.float64)
            tm.assert_frame_equal(written_and_read_again.set_index('index'),
                                  modified) 
Example #14
Source File: test_stata.py    From twitter-stock-recommendation with MIT License 6 votes vote down vote up
def test_minimal_size_col(self):
        str_lens = (1, 100, 244)
        s = {}
        for str_len in str_lens:
            s['s' + str(str_len)] = Series(['a' * str_len,
                                            'b' * str_len, 'c' * str_len])
        original = DataFrame(s)
        with tm.ensure_clean() as path:
            original.to_stata(path, write_index=False)

            with StataReader(path) as sr:
                typlist = sr.typlist
                variables = sr.varlist
                formats = sr.fmtlist
                for variable, fmt, typ in zip(variables, formats, typlist):
                    assert int(variable[1:]) == int(fmt[1:-1])
                    assert int(variable[1:]) == typ 
Example #15
Source File: test_stata.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_minimal_size_col(self):
        str_lens = (1, 100, 244)
        s = {}
        for str_len in str_lens:
            s['s' + str(str_len)] = Series(['a' * str_len,
                                            'b' * str_len, 'c' * str_len])
        original = DataFrame(s)
        with tm.ensure_clean() as path:
            original.to_stata(path, write_index=False)

            with StataReader(path) as sr:
                typlist = sr.typlist
                variables = sr.varlist
                formats = sr.fmtlist
                for variable, fmt, typ in zip(variables, formats, typlist):
                    assert int(variable[1:]) == int(fmt[1:-1])
                    assert int(variable[1:]) == typ 
Example #16
Source File: test_stata.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_large_value_conversion(self):
        s0 = Series([1, 99], dtype=np.int8)
        s1 = Series([1, 127], dtype=np.int8)
        s2 = Series([1, 2 ** 15 - 1], dtype=np.int16)
        s3 = Series([1, 2 ** 63 - 1], dtype=np.int64)
        original = DataFrame({'s0': s0, 's1': s1, 's2': s2, 's3': s3})
        original.index.name = 'index'
        with tm.ensure_clean() as path:
            with tm.assert_produces_warning(PossiblePrecisionLoss):
                original.to_stata(path)

            written_and_read_again = self.read_dta(path)
            modified = original.copy()
            modified['s1'] = Series(modified['s1'], dtype=np.int16)
            modified['s2'] = Series(modified['s2'], dtype=np.int32)
            modified['s3'] = Series(modified['s3'], dtype=np.float64)
            tm.assert_frame_equal(written_and_read_again.set_index('index'),
                                  modified) 
Example #17
Source File: test_stata.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_excessively_long_string(self):
        str_lens = (1, 244, 500)
        s = {}
        for str_len in str_lens:
            s['s' + str(str_len)] = Series(['a' * str_len,
                                            'b' * str_len, 'c' * str_len])
        original = DataFrame(s)
        with pytest.raises(ValueError):
            with tm.ensure_clean() as path:
                original.to_stata(path) 
Example #18
Source File: test_stata.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def test_writer_117(self):
        original = DataFrame(data=[['string', 'object', 1, 1, 1, 1.1, 1.1,
                                    np.datetime64('2003-12-25'),
                                    'a', 'a' * 2045, 'a' * 5000, 'a'],
                                   ['string-1', 'object-1', 1, 1, 1, 1.1, 1.1,
                                    np.datetime64('2003-12-26'),
                                    'b', 'b' * 2045, '', '']
                                   ],
                             columns=['string', 'object', 'int8', 'int16',
                                      'int32', 'float32', 'float64',
                                      'datetime',
                                      's1', 's2045', 'srtl', 'forced_strl'])
        original['object'] = Series(original['object'], dtype=object)
        original['int8'] = Series(original['int8'], dtype=np.int8)
        original['int16'] = Series(original['int16'], dtype=np.int16)
        original['int32'] = original['int32'].astype(np.int32)
        original['float32'] = Series(original['float32'], dtype=np.float32)
        original.index.name = 'index'
        original.index = original.index.astype(np.int32)
        copy = original.copy()
        with tm.ensure_clean() as path:
            original.to_stata(path,
                              convert_dates={'datetime': 'tc'},
                              convert_strl=['forced_strl'],
                              version=117)
            written_and_read_again = self.read_dta(path)
            # original.index is np.int32, read index is np.int64
            tm.assert_frame_equal(written_and_read_again.set_index('index'),
                                  original, check_index_type=False)
            tm.assert_frame_equal(original, copy) 
Example #19
Source File: test_stata.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_writer_117(self):
        original = DataFrame(data=[['string', 'object', 1, 1, 1, 1.1, 1.1,
                                    np.datetime64('2003-12-25'),
                                    'a', 'a' * 2045, 'a' * 5000, 'a'],
                                   ['string-1', 'object-1', 1, 1, 1, 1.1, 1.1,
                                    np.datetime64('2003-12-26'),
                                    'b', 'b' * 2045, '', '']
                                   ],
                             columns=['string', 'object', 'int8', 'int16',
                                      'int32', 'float32', 'float64',
                                      'datetime',
                                      's1', 's2045', 'srtl', 'forced_strl'])
        original['object'] = Series(original['object'], dtype=object)
        original['int8'] = Series(original['int8'], dtype=np.int8)
        original['int16'] = Series(original['int16'], dtype=np.int16)
        original['int32'] = original['int32'].astype(np.int32)
        original['float32'] = Series(original['float32'], dtype=np.float32)
        original.index.name = 'index'
        original.index = original.index.astype(np.int32)
        copy = original.copy()
        with tm.ensure_clean() as path:
            original.to_stata(path,
                              convert_dates={'datetime': 'tc'},
                              convert_strl=['forced_strl'],
                              version=117)
            written_and_read_again = self.read_dta(path)
            # original.index is np.int32, read index is np.int64
            tm.assert_frame_equal(written_and_read_again.set_index('index'),
                                  original, check_index_type=False)
            tm.assert_frame_equal(original, copy) 
Example #20
Source File: test_stata.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def test_read_write_dta13(self):
        s1 = Series(2 ** 9, dtype=np.int16)
        s2 = Series(2 ** 17, dtype=np.int32)
        s3 = Series(2 ** 33, dtype=np.int64)
        original = DataFrame({'int16': s1, 'int32': s2, 'int64': s3})
        original.index.name = 'index'

        formatted = original
        formatted['int64'] = formatted['int64'].astype(np.float64)

        with tm.ensure_clean() as path:
            original.to_stata(path)
            written_and_read_again = self.read_dta(path)
            tm.assert_frame_equal(written_and_read_again.set_index('index'),
                                  formatted) 
Example #21
Source File: test_stata.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_categorical_sorting(self, file):
        parsed = read_stata(getattr(self, file))

        # Sort based on codes, not strings
        parsed = parsed.sort_values("srh")

        # Don't sort index
        parsed.index = np.arange(parsed.shape[0])
        codes = [-1, -1, 0, 1, 1, 1, 2, 2, 3, 4]
        categories = ["Poor", "Fair", "Good", "Very good", "Excellent"]
        cat = pd.Categorical.from_codes(codes=codes, categories=categories)
        expected = pd.Series(cat, name='srh')
        tm.assert_series_equal(expected, parsed["srh"],
                               check_categorical=False) 
Example #22
Source File: test_stata.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_categorical_order(self, file):
        # Directly construct using expected codes
        # Format is is_cat, col_name, labels (in order), underlying data
        expected = [(True, 'ordered', ['a', 'b', 'c', 'd', 'e'], np.arange(5)),
                    (True, 'reverse', ['a', 'b', 'c',
                                       'd', 'e'], np.arange(5)[::-1]),
                    (True, 'noorder', ['a', 'b', 'c', 'd',
                                       'e'], np.array([2, 1, 4, 0, 3])),
                    (True, 'floating', [
                     'a', 'b', 'c', 'd', 'e'], np.arange(0, 5)),
                    (True, 'float_missing', [
                     'a', 'd', 'e'], np.array([0, 1, 2, -1, -1])),
                    (False, 'nolabel', [
                     1.0, 2.0, 3.0, 4.0, 5.0], np.arange(5)),
                    (True, 'int32_mixed', ['d', 2, 'e', 'b', 'a'],
                     np.arange(5))]
        cols = []
        for is_cat, col, labels, codes in expected:
            if is_cat:
                cols.append((col, pd.Categorical.from_codes(codes, labels)))
            else:
                cols.append((col, pd.Series(labels, dtype=np.float32)))
        expected = DataFrame.from_dict(OrderedDict(cols))

        # Read with and with out categoricals, ensure order is identical
        file = getattr(self, file)
        parsed = read_stata(file)
        tm.assert_frame_equal(expected, parsed, check_categorical=False)

        # Check identity of codes
        for col in expected:
            if is_categorical_dtype(expected[col]):
                tm.assert_series_equal(expected[col].cat.codes,
                                       parsed[col].cat.codes)
                tm.assert_index_equal(expected[col].cat.categories,
                                      parsed[col].cat.categories) 
Example #23
Source File: test_stata.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def test_nan_to_missing_value(self):
        s1 = Series(np.arange(4.0), dtype=np.float32)
        s2 = Series(np.arange(4.0), dtype=np.float64)
        s1[::2] = np.nan
        s2[1::2] = np.nan
        original = DataFrame({'s1': s1, 's2': s2})
        original.index.name = 'index'
        with tm.ensure_clean() as path:
            original.to_stata(path)
            written_and_read_again = self.read_dta(path)
            written_and_read_again = written_and_read_again.set_index('index')
            tm.assert_frame_equal(written_and_read_again, original) 
Example #24
Source File: test_stata.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def test_string_no_dates(self):
        s1 = Series(['a', 'A longer string'])
        s2 = Series([1.0, 2.0], dtype=np.float64)
        original = DataFrame({'s1': s1, 's2': s2})
        original.index.name = 'index'
        with tm.ensure_clean() as path:
            original.to_stata(path)
            written_and_read_again = self.read_dta(path)
            tm.assert_frame_equal(written_and_read_again.set_index('index'),
                                  original) 
Example #25
Source File: test_stata.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_nan_to_missing_value(self, version):
        s1 = Series(np.arange(4.0), dtype=np.float32)
        s2 = Series(np.arange(4.0), dtype=np.float64)
        s1[::2] = np.nan
        s2[1::2] = np.nan
        original = DataFrame({'s1': s1, 's2': s2})
        original.index.name = 'index'
        with tm.ensure_clean() as path:
            original.to_stata(path, version=version)
            written_and_read_again = self.read_dta(path)
            written_and_read_again = written_and_read_again.set_index('index')
            tm.assert_frame_equal(written_and_read_again, original) 
Example #26
Source File: test_stata.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def test_excessively_long_string(self):
        str_lens = (1, 244, 500)
        s = {}
        for str_len in str_lens:
            s['s' + str(str_len)] = Series(['a' * str_len,
                                            'b' * str_len, 'c' * str_len])
        original = DataFrame(s)
        with pytest.raises(ValueError):
            with tm.ensure_clean() as path:
                original.to_stata(path) 
Example #27
Source File: test_stata.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def test_categorical_order(self, file):
        # Directly construct using expected codes
        # Format is is_cat, col_name, labels (in order), underlying data
        expected = [(True, 'ordered', ['a', 'b', 'c', 'd', 'e'], np.arange(5)),
                    (True, 'reverse', ['a', 'b', 'c',
                                       'd', 'e'], np.arange(5)[::-1]),
                    (True, 'noorder', ['a', 'b', 'c', 'd',
                                       'e'], np.array([2, 1, 4, 0, 3])),
                    (True, 'floating', [
                     'a', 'b', 'c', 'd', 'e'], np.arange(0, 5)),
                    (True, 'float_missing', [
                     'a', 'd', 'e'], np.array([0, 1, 2, -1, -1])),
                    (False, 'nolabel', [
                     1.0, 2.0, 3.0, 4.0, 5.0], np.arange(5)),
                    (True, 'int32_mixed', ['d', 2, 'e', 'b', 'a'],
                     np.arange(5))]
        cols = []
        for is_cat, col, labels, codes in expected:
            if is_cat:
                cols.append((col, pd.Categorical.from_codes(codes, labels)))
            else:
                cols.append((col, pd.Series(labels, dtype=np.float32)))
        expected = DataFrame.from_items(cols)

        # Read with and with out categoricals, ensure order is identical
        file = getattr(self, file)
        parsed = read_stata(file)
        tm.assert_frame_equal(expected, parsed, check_categorical=False)

        # Check identity of codes
        for col in expected:
            if is_categorical_dtype(expected[col]):
                tm.assert_series_equal(expected[col].cat.codes,
                                       parsed[col].cat.codes)
                tm.assert_index_equal(expected[col].cat.categories,
                                      parsed[col].cat.categories) 
Example #28
Source File: test_stata.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def test_categorical_sorting(self, file):
        parsed = read_stata(getattr(self, file))

        # Sort based on codes, not strings
        parsed = parsed.sort_values("srh")

        # Don't sort index
        parsed.index = np.arange(parsed.shape[0])
        codes = [-1, -1, 0, 1, 1, 1, 2, 2, 3, 4]
        categories = ["Poor", "Fair", "Good", "Very good", "Excellent"]
        cat = pd.Categorical.from_codes(codes=codes, categories=categories)
        expected = pd.Series(cat, name='srh')
        tm.assert_series_equal(expected, parsed["srh"],
                               check_categorical=False) 
Example #29
Source File: test_stata.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_string_no_dates(self):
        s1 = Series(['a', 'A longer string'])
        s2 = Series([1.0, 2.0], dtype=np.float64)
        original = DataFrame({'s1': s1, 's2': s2})
        original.index.name = 'index'
        with tm.ensure_clean() as path:
            original.to_stata(path)
            written_and_read_again = self.read_dta(path)
            tm.assert_frame_equal(written_and_read_again.set_index('index'),
                                  original) 
Example #30
Source File: test_stata.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_read_write_dta13(self):
        s1 = Series(2 ** 9, dtype=np.int16)
        s2 = Series(2 ** 17, dtype=np.int32)
        s3 = Series(2 ** 33, dtype=np.int64)
        original = DataFrame({'int16': s1, 'int32': s2, 'int64': s3})
        original.index.name = 'index'

        formatted = original
        formatted['int64'] = formatted['int64'].astype(np.float64)

        with tm.ensure_clean() as path:
            original.to_stata(path)
            written_and_read_again = self.read_dta(path)
            tm.assert_frame_equal(written_and_read_again.set_index('index'),
                                  formatted)