Python pandas.read_sas() Examples

The following are 30 code examples of pandas.read_sas(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module pandas , or try the search function .
Example #1
Source File: io.py    From modin with Apache License 2.0 8 votes vote down vote up
def read_sas(
        cls,
        filepath_or_buffer,
        format=None,
        index=None,
        encoding=None,
        chunksize=None,
        iterator=False,
    ):  # pragma: no cover
        ErrorMessage.default_to_pandas("`read_sas`")
        return cls.from_pandas(
            pandas.read_sas(
                filepath_or_buffer,
                format=format,
                index=index,
                encoding=encoding,
                chunksize=chunksize,
                iterator=iterator,
            )
        ) 
Example #2
Source File: test_sas7bdat.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 8 votes vote down vote up
def test_encoding_options(datapath):
    fname = datapath("io", "sas", "data", "test1.sas7bdat")
    df1 = pd.read_sas(fname)
    df2 = pd.read_sas(fname, encoding='utf-8')
    for col in df1.columns:
        try:
            df1[col] = df1[col].str.decode('utf-8')
        except AttributeError:
            pass
    tm.assert_frame_equal(df1, df2)

    from pandas.io.sas.sas7bdat import SAS7BDATReader
    rdr = SAS7BDATReader(fname, convert_header_text=False)
    df3 = rdr.read()
    rdr.close()
    for x, y in zip(df1.columns, df3.columns):
        assert(x == y.decode()) 
Example #3
Source File: format.py    From dataiku-contrib with Apache License 2.0 6 votes vote down vote up
def get_format_extractor(self, stream, schema=None):
        return SASFormatExtractor(stream, schema, self.config)    


# Fix for the stream class provided by DSS
# Seek could be disabled by a one-liner like the following one but read_sas may seek forward
# self.stream.seek = types.MethodType(lambda self, _: False, self.stream) 
Example #4
Source File: test_sas7bdat.py    From elasticintel with GNU General Public License v3.0 6 votes vote down vote up
def test_encoding_options():
    dirpath = tm.get_data_path()
    fname = os.path.join(dirpath, "test1.sas7bdat")
    df1 = pd.read_sas(fname)
    df2 = pd.read_sas(fname, encoding='utf-8')
    for col in df1.columns:
        try:
            df1[col] = df1[col].str.decode('utf-8')
        except AttributeError:
            pass
    tm.assert_frame_equal(df1, df2)

    from pandas.io.sas.sas7bdat import SAS7BDATReader
    rdr = SAS7BDATReader(fname, convert_header_text=False)
    df3 = rdr.read()
    rdr.close()
    for x, y in zip(df1.columns, df3.columns):
        assert(x == y.decode()) 
Example #5
Source File: test_sas7bdat.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_encoding_options(datapath):
    fname = datapath("io", "sas", "data", "test1.sas7bdat")
    df1 = pd.read_sas(fname)
    df2 = pd.read_sas(fname, encoding='utf-8')
    for col in df1.columns:
        try:
            df1[col] = df1[col].str.decode('utf-8')
        except AttributeError:
            pass
    tm.assert_frame_equal(df1, df2)

    from pandas.io.sas.sas7bdat import SAS7BDATReader
    rdr = SAS7BDATReader(fname, convert_header_text=False)
    df3 = rdr.read()
    rdr.close()
    for x, y in zip(df1.columns, df3.columns):
        assert(x == y.decode()) 
Example #6
Source File: test_sas7bdat.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def test_encoding_options(datapath):
    fname = datapath("io", "sas", "data", "test1.sas7bdat")
    df1 = pd.read_sas(fname)
    df2 = pd.read_sas(fname, encoding='utf-8')
    for col in df1.columns:
        try:
            df1[col] = df1[col].str.decode('utf-8')
        except AttributeError:
            pass
    tm.assert_frame_equal(df1, df2)

    from pandas.io.sas.sas7bdat import SAS7BDATReader
    rdr = SAS7BDATReader(fname, convert_header_text=False)
    df3 = rdr.read()
    rdr.close()
    for x, y in zip(df1.columns, df3.columns):
        assert(x == y.decode()) 
Example #7
Source File: test_sas7bdat.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def test_zero_variables(datapath):
    # Check if the SAS file has zero variables (PR #18184)
    fname = datapath("io", "sas", "data", "zero_variables.sas7bdat")
    with pytest.raises(EmptyDataError):
        pd.read_sas(fname) 
Example #8
Source File: test_sas7bdat.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def test_from_iterator(self):
        for j in 0, 1:
            df0 = self.data[j]
            for k in self.test_ix[j]:
                fname = os.path.join(
                    self.dirpath, "test{k}.sas7bdat".format(k=k))
                rdr = pd.read_sas(fname, iterator=True, encoding='utf-8')
                df = rdr.read(2)
                tm.assert_frame_equal(df, df0.iloc[0:2, :])
                df = rdr.read(3)
                tm.assert_frame_equal(df, df0.iloc[2:5, :])
                rdr.close() 
Example #9
Source File: test_sas7bdat.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def test_path_pathlib(self):
        from pathlib import Path
        for j in 0, 1:
            df0 = self.data[j]
            for k in self.test_ix[j]:
                fname = Path(os.path.join(
                    self.dirpath, "test{k}.sas7bdat".format(k=k)))
                df = pd.read_sas(fname, encoding='utf-8')
                tm.assert_frame_equal(df, df0) 
Example #10
Source File: test_sas7bdat.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def test_path_localpath(self):
        from py.path import local as LocalPath
        for j in 0, 1:
            df0 = self.data[j]
            for k in self.test_ix[j]:
                fname = LocalPath(os.path.join(
                    self.dirpath, "test{k}.sas7bdat".format(k=k)))
                df = pd.read_sas(fname, encoding='utf-8')
                tm.assert_frame_equal(df, df0) 
Example #11
Source File: test_sas7bdat.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def test_iterator_read_too_much(self):
        # github #14734
        k = self.test_ix[0][0]
        fname = os.path.join(self.dirpath, "test{k}.sas7bdat".format(k=k))
        rdr = pd.read_sas(fname, format="sas7bdat",
                          iterator=True, encoding='utf-8')
        d1 = rdr.read(rdr.row_count + 20)
        rdr.close()

        rdr = pd.read_sas(fname, iterator=True, encoding="utf-8")
        d2 = rdr.read(rdr.row_count + 20)
        tm.assert_frame_equal(d1, d2)
        rdr.close() 
Example #12
Source File: test_sas7bdat.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def test_productsales(datapath):
    fname = datapath("io", "sas", "data", "productsales.sas7bdat")
    df = pd.read_sas(fname, encoding='utf-8')
    fname = datapath("io", "sas", "data", "productsales.csv")
    df0 = pd.read_csv(fname, parse_dates=['MONTH'])
    vn = ["ACTUAL", "PREDICT", "QUARTER", "YEAR"]
    df0[vn] = df0[vn].astype(np.float64)
    tm.assert_frame_equal(df, df0) 
Example #13
Source File: test_sas7bdat.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def test_12659(datapath):
    fname = datapath("io", "sas", "data", "test_12659.sas7bdat")
    df = pd.read_sas(fname)
    fname = datapath("io", "sas", "data", "test_12659.csv")
    df0 = pd.read_csv(fname)
    df0 = df0.astype(np.float64)
    tm.assert_frame_equal(df, df0) 
Example #14
Source File: test_sas7bdat.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def test_airline(datapath):
    fname = datapath("io", "sas", "data", "airline.sas7bdat")
    df = pd.read_sas(fname)
    fname = datapath("io", "sas", "data", "airline.csv")
    df0 = pd.read_csv(fname)
    df0 = df0.astype(np.float64)
    tm.assert_frame_equal(df, df0, check_exact=False) 
Example #15
Source File: test_sas7bdat.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def test_compact_numerical_values(datapath):
    # Regression test for #21616
    fname = datapath("io", "sas", "data", "cars.sas7bdat")
    df = pd.read_sas(fname, encoding='latin-1')
    # The two columns CYL and WGT in cars.sas7bdat have column
    # width < 8 and only contain integral values.
    # Test that pandas doesn't corrupt the numbers by adding
    # decimals.
    result = df['WGT']
    expected = df['WGT'].round()
    tm.assert_series_equal(result, expected, check_exact=True)
    result = df['CYL']
    expected = df['CYL'].round()
    tm.assert_series_equal(result, expected, check_exact=True) 
Example #16
Source File: test_sas7bdat.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def test_many_columns(datapath):
    # Test for looking for column information in more places (PR #22628)
    fname = datapath("io", "sas", "data", "many_columns.sas7bdat")
    df = pd.read_sas(fname, encoding='latin-1')
    fname = datapath("io", "sas", "data", "many_columns.csv")
    df0 = pd.read_csv(fname, encoding='latin-1')
    tm.assert_frame_equal(df, df0) 
Example #17
Source File: test_sas7bdat.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def test_inconsistent_number_of_rows(datapath):
    # Regression test for issue #16615. (PR #22628)
    fname = datapath("io", "sas", "data", "load_log.sas7bdat")
    df = pd.read_sas(fname, encoding='latin-1')
    assert len(df) == 2097 
Example #18
Source File: test_sas7bdat.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def test_zero_variables(datapath):
    # Check if the SAS file has zero variables (PR #18184)
    fname = datapath("io", "sas", "data", "zero_variables.sas7bdat")
    with pytest.raises(EmptyDataError):
        pd.read_sas(fname) 
Example #19
Source File: test_sas7bdat.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def test_from_file(self):
        for j in 0, 1:
            df0 = self.data[j]
            for k in self.test_ix[j]:
                fname = os.path.join(self.dirpath, "test%d.sas7bdat" % k)
                df = pd.read_sas(fname, encoding='utf-8')
                tm.assert_frame_equal(df, df0) 
Example #20
Source File: test_sas7bdat.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def test_from_buffer(self):
        for j in 0, 1:
            df0 = self.data[j]
            for k in self.test_ix[j]:
                fname = os.path.join(self.dirpath, "test%d.sas7bdat" % k)
                with open(fname, 'rb') as f:
                    byts = f.read()
                buf = io.BytesIO(byts)
                rdr = pd.read_sas(buf, format="sas7bdat",
                                  iterator=True, encoding='utf-8')
                df = rdr.read()
                tm.assert_frame_equal(df, df0, check_exact=False)
                rdr.close() 
Example #21
Source File: test_sas7bdat.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def test_from_iterator(self):
        for j in 0, 1:
            df0 = self.data[j]
            for k in self.test_ix[j]:
                fname = os.path.join(self.dirpath, "test%d.sas7bdat" % k)
                rdr = pd.read_sas(fname, iterator=True, encoding='utf-8')
                df = rdr.read(2)
                tm.assert_frame_equal(df, df0.iloc[0:2, :])
                df = rdr.read(3)
                tm.assert_frame_equal(df, df0.iloc[2:5, :])
                rdr.close() 
Example #22
Source File: test_sas7bdat.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def test_path_pathlib(self):
        tm._skip_if_no_pathlib()
        from pathlib import Path
        for j in 0, 1:
            df0 = self.data[j]
            for k in self.test_ix[j]:
                fname = Path(os.path.join(self.dirpath, "test%d.sas7bdat" % k))
                df = pd.read_sas(fname, encoding='utf-8')
                tm.assert_frame_equal(df, df0) 
Example #23
Source File: test_sas7bdat.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def test_path_localpath(self):
        tm._skip_if_no_localpath()
        from py.path import local as LocalPath
        for j in 0, 1:
            df0 = self.data[j]
            for k in self.test_ix[j]:
                fname = LocalPath(os.path.join(self.dirpath,
                                               "test%d.sas7bdat" % k))
                df = pd.read_sas(fname, encoding='utf-8')
                tm.assert_frame_equal(df, df0) 
Example #24
Source File: test_sas7bdat.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def test_iterator_read_too_much(self):
        # github #14734
        k = self.test_ix[0][0]
        fname = os.path.join(self.dirpath, "test%d.sas7bdat" % k)
        rdr = pd.read_sas(fname, format="sas7bdat",
                          iterator=True, encoding='utf-8')
        d1 = rdr.read(rdr.row_count + 20)
        rdr.close()

        rdr = pd.read_sas(fname, iterator=True, encoding="utf-8")
        d2 = rdr.read(rdr.row_count + 20)
        tm.assert_frame_equal(d1, d2)
        rdr.close() 
Example #25
Source File: test_sas7bdat.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def test_productsales():
    dirpath = tm.get_data_path()
    fname = os.path.join(dirpath, "productsales.sas7bdat")
    df = pd.read_sas(fname, encoding='utf-8')
    fname = os.path.join(dirpath, "productsales.csv")
    df0 = pd.read_csv(fname, parse_dates=['MONTH'])
    vn = ["ACTUAL", "PREDICT", "QUARTER", "YEAR"]
    df0[vn] = df0[vn].astype(np.float64)
    tm.assert_frame_equal(df, df0) 
Example #26
Source File: test_sas7bdat.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def test_12659():
    dirpath = tm.get_data_path()
    fname = os.path.join(dirpath, "test_12659.sas7bdat")
    df = pd.read_sas(fname)
    fname = os.path.join(dirpath, "test_12659.csv")
    df0 = pd.read_csv(fname)
    df0 = df0.astype(np.float64)
    tm.assert_frame_equal(df, df0) 
Example #27
Source File: test_sas7bdat.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def test_airline():
    dirpath = tm.get_data_path()
    fname = os.path.join(dirpath, "airline.sas7bdat")
    df = pd.read_sas(fname)
    fname = os.path.join(dirpath, "airline.csv")
    df0 = pd.read_csv(fname)
    df0 = df0.astype(np.float64)
    tm.assert_frame_equal(df, df0, check_exact=False) 
Example #28
Source File: test_io.py    From modin with Apache License 2.0 5 votes vote down vote up
def test_from_sas():
    pandas_df = pandas.read_sas(TEST_SAS_FILENAME)
    modin_df = pd.read_sas(TEST_SAS_FILENAME)

    df_equals(modin_df, pandas_df) 
Example #29
Source File: test_sas7bdat.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_inconsistent_number_of_rows(datapath):
    # Regression test for issue #16615. (PR #22628)
    fname = datapath("io", "sas", "data", "load_log.sas7bdat")
    df = pd.read_sas(fname, encoding='latin-1')
    assert len(df) == 2097 
Example #30
Source File: format.py    From dataiku-contrib with Apache License 2.0 5 votes vote down vote up
def __init__(self, stream, schema, config):
        FormatExtractor.__init__(self, stream)
        
        chunksize = int(config.get("chunksize", "10000"))
        sas_format = config.get("sas_format", "sas7bdat")
        encoding = config.get("encoding", "latin_1")
        dump_to_file = config.get("dump_to_file", False)

        self.hasSchema = schema != None

        read_from = ForwardSeekStream(stream)

        if dump_to_file:
            dirname, _ = os.path.split(os.path.abspath(__file__))
            fullpath = os.path.join(dirname, 'dumped-%s.sas7bdat' % (time.time()))
            with open(fullpath, 'w+') as of:
                # Reading 500kb data everytime
                for data in iter((lambda:stream.read(500000)), b''):
                    of.write(data)
                
            read_from = fullpath

        self.iterator = pd.read_sas(read_from,
                                    format=sas_format,
                                    iterator=True,
                                    encoding=encoding,
                                    chunksize=chunksize)

        self.get_chunk()