Python pandas.read_fwf() Examples

The following are 30 code examples for showing how to use pandas.read_fwf(). These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.

You may want to check out the right sidebar which shows the related API usage.

You may also want to check out all available functions/classes of the module pandas , or try the search function .

Example 1
Project: vnpy_crypto   Author: birforce   File: test_read_fwf.py    License: MIT License 6 votes vote down vote up
def test_fwf_colspecs_None(self):
        # GH 7079
        data = """\
123456
456789
"""
        colspecs = [(0, 3), (3, None)]
        result = read_fwf(StringIO(data), colspecs=colspecs, header=None)
        expected = DataFrame([[123, 456], [456, 789]])
        tm.assert_frame_equal(result, expected)

        colspecs = [(None, 3), (3, 6)]
        result = read_fwf(StringIO(data), colspecs=colspecs, header=None)
        expected = DataFrame([[123, 456], [456, 789]])
        tm.assert_frame_equal(result, expected)

        colspecs = [(0, None), (3, None)]
        result = read_fwf(StringIO(data), colspecs=colspecs, header=None)
        expected = DataFrame([[123456, 456], [456789, 789]])
        tm.assert_frame_equal(result, expected)

        colspecs = [(None, None), (3, 6)]
        result = read_fwf(StringIO(data), colspecs=colspecs, header=None)
        expected = DataFrame([[123456, 456], [456789, 789]])
        tm.assert_frame_equal(result, expected) 
Example 2
Project: vnpy_crypto   Author: birforce   File: test_read_fwf.py    License: MIT License 6 votes vote down vote up
def test_fwf_compression(self):
        try:
            import gzip
            import bz2
        except ImportError:
            pytest.skip("Need gzip and bz2 to run this test")

        data = """1111111111
        2222222222
        3333333333""".strip()
        widths = [5, 5]
        names = ['one', 'two']
        expected = read_fwf(StringIO(data), widths=widths, names=names)
        if compat.PY3:
            data = bytes(data, encoding='utf-8')
        comps = [('gzip', gzip.GzipFile), ('bz2', bz2.BZ2File)]
        for comp_name, compresser in comps:
            with tm.ensure_clean() as path:
                tmp = compresser(path, mode='wb')
                tmp.write(data)
                tmp.close()
                result = read_fwf(path, widths=widths, names=names,
                                  compression=comp_name)
                tm.assert_frame_equal(result, expected) 
Example 3
Project: heliopy   Author: heliopython   File: helios.py    License: GNU General Public License v3.0 6 votes vote down vote up
def load_local_file(self, interval):
        # Read in data
        headings = ['probe', 'year', 'doy', 'hour', 'minute', 'second',
                    'naverage', 'Bx', 'By', 'Bz', '|B|',
                    'sigma_Bx', 'sigma_By', 'sigma_Bz']

        colspecs = [(1, 2), (2, 4), (4, 7), (7, 9), (9, 11), (11, 13),
                    (13, 15), (15, 22), (22, 29), (29, 36), (36, 42), (42, 48),
                    (48, 54), (54, 60)]
        data = pd.read_fwf(self.local_path(interval), names=headings,
                           header=None, colspecs=colspecs)

        # Process data
        data['year'] += 1900
        # Convert date info to datetime
        data['Time'] = pd.to_datetime(data['year'], format='%Y') + \
            pd.to_timedelta(data['doy'] - 1, unit='d') + \
            pd.to_timedelta(data['hour'], unit='h') + \
            pd.to_timedelta(data['minute'], unit='m') + \
            pd.to_timedelta(data['second'], unit='s')
        data = data.drop(['year', 'doy', 'hour', 'minute', 'second'], axis=1)
        data = data.set_index('Time', drop=False)
        return data 
Example 4
Project: nmc_met_io   Author: nmcdev   File: retrieve_ghcn.py    License: GNU General Public License v3.0 6 votes vote down vote up
def get_ghcnd_stn_metadata(fname=None, download=False):
    """
    Get the ghcnd station metadata from ghcnd-stations.txt.
    China station start with "CHM000...", like "CHM00054511"
    
    Args:
        fname (string, optional): You can specify the station metadata file. 
                                  Defaults to download the file from website.
    
    Returns:
        [type]: [description]

    Examples:
    >>> stnmd = get_ghcnd_stn_metadata()
    """
    
    if fname == None:
        fname = get_cache_file("pub/data/ghcn/daily/", "ghcnd-stations.txt", name="GHCN")
        if not fname.is_file() or download:
            url = 'https://www1.ncdc.noaa.gov/pub/data/ghcn/daily/ghcnd-stations.txt'
            urllib.request.urlretrieve(url, fname)
    md = pd.read_fwf(fname, colspecs=[(0,12), (12,21), (21,31), (31,38), (38,69)],
                        names=['station','lat','lon','elev','name'])
    return md 
Example 5
Project: elasticintel   Author: securityclippy   File: test_read_fwf.py    License: GNU General Public License v3.0 6 votes vote down vote up
def test_fwf_colspecs_None(self):
        # GH 7079
        data = """\
123456
456789
"""
        colspecs = [(0, 3), (3, None)]
        result = read_fwf(StringIO(data), colspecs=colspecs, header=None)
        expected = DataFrame([[123, 456], [456, 789]])
        tm.assert_frame_equal(result, expected)

        colspecs = [(None, 3), (3, 6)]
        result = read_fwf(StringIO(data), colspecs=colspecs, header=None)
        expected = DataFrame([[123, 456], [456, 789]])
        tm.assert_frame_equal(result, expected)

        colspecs = [(0, None), (3, None)]
        result = read_fwf(StringIO(data), colspecs=colspecs, header=None)
        expected = DataFrame([[123456, 456], [456789, 789]])
        tm.assert_frame_equal(result, expected)

        colspecs = [(None, None), (3, 6)]
        result = read_fwf(StringIO(data), colspecs=colspecs, header=None)
        expected = DataFrame([[123456, 456], [456789, 789]])
        tm.assert_frame_equal(result, expected) 
Example 6
Project: elasticintel   Author: securityclippy   File: test_read_fwf.py    License: GNU General Public License v3.0 6 votes vote down vote up
def test_fwf_compression(self):
        try:
            import gzip
            import bz2
        except ImportError:
            pytest.skip("Need gzip and bz2 to run this test")

        data = """1111111111
        2222222222
        3333333333""".strip()
        widths = [5, 5]
        names = ['one', 'two']
        expected = read_fwf(StringIO(data), widths=widths, names=names)
        if compat.PY3:
            data = bytes(data, encoding='utf-8')
        comps = [('gzip', gzip.GzipFile), ('bz2', bz2.BZ2File)]
        for comp_name, compresser in comps:
            with tm.ensure_clean() as path:
                tmp = compresser(path, mode='wb')
                tmp.write(data)
                tmp.close()
                result = read_fwf(path, widths=widths, names=names,
                                  compression=comp_name)
                tm.assert_frame_equal(result, expected) 
Example 7
Project: modin   Author: modin-project   File: io.py    License: Apache License 2.0 6 votes vote down vote up
def read_fwf(
        cls, filepath_or_buffer, colspecs="infer", widths=None, infer_nrows=100, **kwds
    ):
        ErrorMessage.default_to_pandas("`read_fwf`")
        pd_obj = pandas.read_fwf(
            filepath_or_buffer,
            colspecs=colspecs,
            widths=widths,
            infer_nrows=infer_nrows,
            **kwds,
        )
        if isinstance(pd_obj, pandas.DataFrame):
            return cls.from_pandas(pd_obj)
        if isinstance(pd_obj, pandas.io.parsers.TextFileReader):
            # Overwriting the read method should return a Modin DataFrame for calls
            # to __next__ and get_chunk
            pd_read = pd_obj.read
            pd_obj.read = lambda *args, **kwargs: cls.from_pandas(
                pd_read(*args, **kwargs)
            )
        return pd_obj 
Example 8
Project: modin   Author: modin-project   File: test_io.py    License: Apache License 2.0 6 votes vote down vote up
def test_fwf_file_usecols(usecols):
    fwf_data = """a       b           c          d
id8141  360.242940  149.910199 11950.7
id1594  444.953632  166.985655 11788.4
id1849  364.136849  183.628767 11806.2
id1230  413.836124  184.375703 11916.8
id1948  502.953953  173.237159 12468.3"""

    setup_fwf_file(overwrite=True, fwf_data=fwf_data)

    pandas_df = pandas.read_fwf(TEST_FWF_FILENAME, usecols=usecols)
    modin_df = pd.read_fwf(TEST_FWF_FILENAME, usecols=usecols)

    df_equals(modin_df, pandas_df)

    teardown_fwf_file() 
Example 9
Project: modin   Author: modin-project   File: test_io.py    License: Apache License 2.0 6 votes vote down vote up
def test_fwf_file_parse_dates():
    dates = pandas.date_range("2000", freq="h", periods=10)
    fwf_data = "col1 col2        col3 col4"
    for i in range(10, 20):
        fwf_data = fwf_data + "\n{col1}   {col2}  {col3}   {col4}".format(
            col1=str(i),
            col2=str(dates[i - 10].date()),
            col3=str(i),
            col4=str(dates[i - 10].time()),
        )

    setup_fwf_file(overwrite=True, fwf_data=fwf_data)

    pandas_df = pandas.read_fwf(TEST_FWF_FILENAME, parse_dates=[["col2", "col4"]])
    modin_df = pd.read_fwf(TEST_FWF_FILENAME, parse_dates=[["col2", "col4"]])
    df_equals(modin_df, pandas_df)

    pandas_df = pandas.read_fwf(
        TEST_FWF_FILENAME, parse_dates={"time": ["col2", "col4"]}
    )
    modin_df = pd.read_fwf(TEST_FWF_FILENAME, parse_dates={"time": ["col2", "col4"]})
    df_equals(modin_df, pandas_df)

    teardown_fwf_file() 
Example 10
Project: DataExploration   Author: AllenDowney   File: marriage.py    License: MIT License 6 votes vote down vote up
def ReadFemResp1995():
    """Reads respondent data from NSFG Cycle 5.

    returns: DataFrame
    """
    dat_file = '1995FemRespData.dat.gz'
    names = ['cmintvw', 'timesmar', 'cmmarrhx', 'cmbirth', 'finalwgt']
    colspecs = [(12360-1, 12363),
                (4637-1, 4638),
                (11759-1, 11762),
                (14-1, 16),
                (12350-1, 12359)]
    df = pd.read_fwf(dat_file, 
                         compression='gzip', 
                         colspecs=colspecs, 
                         names=names)

    df.timesmar.replace([98, 99], np.nan, inplace=True)
    df['evrmarry'] = (df.timesmar > 0)

    CleanData(df)
    return df 
Example 11
Project: poseidon   Author: changhiskhan   File: ssh.py    License: MIT License 5 votes vote down vote up
def ps(self, args=None, options='', all=True, verbose=True,
           as_frame='auto', raise_on_error=True):
        if args is None:
            args = ''
        if all:
            args += 'A'
        if verbose:
            args += 'f'
        if len(args) > 0 and args[0] != '-':
            args = '-' + args

        results = self.wait(('ps %s %s' % (args, options)).strip(),
                            raise_on_error=raise_on_error)

        if as_frame == 'auto':
            as_frame = has_pandas

        if as_frame:
            if not has_pandas:
                raise ImportError("Unable to import pandas")
            df = pd.read_fwf(StringIO(results))
            cmd_loc = df.columns.get_loc('CMD')
            if cmd_loc < len(df.columns):
                col = cmd_loc.fillna('')
                for i in range(cmd_loc + 1, len(df.columns)):
                    col = col + df.icol(i).fillna('')
                df['CMD'] = col
            return df

        return results 
Example 12
Project: vnpy_crypto   Author: birforce   File: test_read_fwf.py    License: MIT License 5 votes vote down vote up
def test_BytesIO_input(self):
        if not compat.PY3:
            pytest.skip(
                "Bytes-related test - only needs to work on Python 3")

        result = read_fwf(BytesIO("שלום\nשלום".encode('utf8')), widths=[
            2, 2], encoding='utf8')
        expected = DataFrame([["של", "ום"]], columns=["של", "ום"])
        tm.assert_frame_equal(result, expected) 
Example 13
Project: vnpy_crypto   Author: birforce   File: test_read_fwf.py    License: MIT License 5 votes vote down vote up
def test_fwf_colspecs_is_list_or_tuple_of_two_element_tuples(self):
        data = """index,A,B,C,D
foo,2,3,4,5
bar,7,8,9,10
baz,12,13,14,15
qux,12,13,14,15
foo2,12,13,14,15
bar2,12,13,14,15
"""

        with tm.assert_raises_regex(TypeError,
                                    'Each column specification '
                                    'must be.+'):
            read_fwf(StringIO(data), [('a', 1)]) 
Example 14
Project: vnpy_crypto   Author: birforce   File: test_read_fwf.py    License: MIT License 5 votes vote down vote up
def test_fwf_regression(self):
        # GH 3594
        # turns out 'T060' is parsable as a datetime slice!

        tzlist = [1, 10, 20, 30, 60, 80, 100]
        ntz = len(tzlist)
        tcolspecs = [16] + [8] * ntz
        tcolnames = ['SST'] + ["T%03d" % z for z in tzlist[1:]]
        data = """  2009164202000   9.5403  9.4105  8.6571  7.8372  6.0612  5.8843  5.5192
  2009164203000   9.5435  9.2010  8.6167  7.8176  6.0804  5.8728  5.4869
  2009164204000   9.5873  9.1326  8.4694  7.5889  6.0422  5.8526  5.4657
  2009164205000   9.5810  9.0896  8.4009  7.4652  6.0322  5.8189  5.4379
  2009164210000   9.6034  9.0897  8.3822  7.4905  6.0908  5.7904  5.4039
"""

        df = read_fwf(StringIO(data),
                      index_col=0,
                      header=None,
                      names=tcolnames,
                      widths=tcolspecs,
                      parse_dates=True,
                      date_parser=lambda s: datetime.strptime(s, '%Y%j%H%M%S'))

        for c in df.columns:
            res = df.loc[:, c]
            assert len(res) 
Example 15
Project: vnpy_crypto   Author: birforce   File: test_read_fwf.py    License: MIT License 5 votes vote down vote up
def test_comment_fwf(self):
        data = """
  1   2.   4  #hello world
  5  NaN  10.0
"""
        expected = np.array([[1, 2., 4],
                             [5, np.nan, 10.]])
        df = read_fwf(StringIO(data), colspecs=[(0, 3), (4, 9), (9, 25)],
                      comment='#')
        tm.assert_almost_equal(df.values, expected) 
Example 16
Project: vnpy_crypto   Author: birforce   File: test_read_fwf.py    License: MIT License 5 votes vote down vote up
def test_1000_fwf(self):
        data = """
 1 2,334.0    5
10   13     10.
"""
        expected = np.array([[1, 2334., 5],
                             [10, 13, 10]])
        df = read_fwf(StringIO(data), colspecs=[(0, 3), (3, 11), (12, 16)],
                      thousands=',')
        tm.assert_almost_equal(df.values, expected) 
Example 17
Project: vnpy_crypto   Author: birforce   File: test_read_fwf.py    License: MIT License 5 votes vote down vote up
def test_bool_header_arg(self):
        # see gh-6114
        data = """\
MyColumn
   a
   b
   a
   b"""
        for arg in [True, False]:
            with pytest.raises(TypeError):
                read_fwf(StringIO(data), header=arg) 
Example 18
Project: vnpy_crypto   Author: birforce   File: test_read_fwf.py    License: MIT License 5 votes vote down vote up
def test_full_file(self):
        # File with all values
        test = """index                             A    B    C
2000-01-03T00:00:00  0.980268513777    3  foo
2000-01-04T00:00:00  1.04791624281    -4  bar
2000-01-05T00:00:00  0.498580885705   73  baz
2000-01-06T00:00:00  1.12020151869     1  foo
2000-01-07T00:00:00  0.487094399463    0  bar
2000-01-10T00:00:00  0.836648671666    2  baz
2000-01-11T00:00:00  0.157160753327   34  foo"""
        colspecs = ((0, 19), (21, 35), (38, 40), (42, 45))
        expected = read_fwf(StringIO(test), colspecs=colspecs)
        tm.assert_frame_equal(expected, read_fwf(StringIO(test))) 
Example 19
Project: vnpy_crypto   Author: birforce   File: test_read_fwf.py    License: MIT License 5 votes vote down vote up
def test_full_file_with_spaces(self):
        # File with spaces in columns
        test = """
Account                 Name  Balance     CreditLimit   AccountCreated
101     Keanu Reeves          9315.45     10000.00           1/17/1998
312     Gerard Butler         90.00       1000.00             8/6/2003
868     Jennifer Love Hewitt  0           17000.00           5/25/1985
761     Jada Pinkett-Smith    49654.87    100000.00          12/5/2006
317     Bill Murray           789.65      5000.00             2/5/2007
""".strip('\r\n')
        colspecs = ((0, 7), (8, 28), (30, 38), (42, 53), (56, 70))
        expected = read_fwf(StringIO(test), colspecs=colspecs)
        tm.assert_frame_equal(expected, read_fwf(StringIO(test))) 
Example 20
Project: vnpy_crypto   Author: birforce   File: test_read_fwf.py    License: MIT License 5 votes vote down vote up
def test_full_file_with_spaces_and_missing(self):
        # File with spaces and missing values in columns
        test = """
Account               Name    Balance     CreditLimit   AccountCreated
101                           10000.00                       1/17/1998
312     Gerard Butler         90.00       1000.00             8/6/2003
868                                                          5/25/1985
761     Jada Pinkett-Smith    49654.87    100000.00          12/5/2006
317     Bill Murray           789.65
""".strip('\r\n')
        colspecs = ((0, 7), (8, 28), (30, 38), (42, 53), (56, 70))
        expected = read_fwf(StringIO(test), colspecs=colspecs)
        tm.assert_frame_equal(expected, read_fwf(StringIO(test))) 
Example 21
Project: vnpy_crypto   Author: birforce   File: test_read_fwf.py    License: MIT License 5 votes vote down vote up
def test_messed_up_data(self):
        # Completely messed up file
        test = """
   Account          Name             Balance     Credit Limit   Account Created
       101                           10000.00                       1/17/1998
       312     Gerard Butler         90.00       1000.00

       761     Jada Pinkett-Smith    49654.87    100000.00          12/5/2006
  317          Bill Murray           789.65
""".strip('\r\n')
        colspecs = ((2, 10), (15, 33), (37, 45), (49, 61), (64, 79))
        expected = read_fwf(StringIO(test), colspecs=colspecs)
        tm.assert_frame_equal(expected, read_fwf(StringIO(test))) 
Example 22
Project: vnpy_crypto   Author: birforce   File: test_read_fwf.py    License: MIT License 5 votes vote down vote up
def test_multiple_delimiters(self):
        test = r"""
col1~~~~~col2  col3++++++++++++++++++col4
~~22.....11.0+++foo~~~~~~~~~~Keanu Reeves
  33+++122.33\\\bar.........Gerard Butler
++44~~~~12.01   baz~~Jennifer Love Hewitt
~~55       11+++foo++++Jada Pinkett-Smith
..66++++++.03~~~bar           Bill Murray
""".strip('\r\n')
        colspecs = ((0, 4), (7, 13), (15, 19), (21, 41))
        expected = read_fwf(StringIO(test), colspecs=colspecs,
                            delimiter=' +~.\\')
        tm.assert_frame_equal(expected, read_fwf(StringIO(test),
                                                 delimiter=' +~.\\')) 
Example 23
Project: vnpy_crypto   Author: birforce   File: test_read_fwf.py    License: MIT License 5 votes vote down vote up
def test_variable_width_unicode(self):
        if not compat.PY3:
            pytest.skip(
                'Bytes-related test - only needs to work on Python 3')
        test = """
שלום שלום
ום   שלל
של   ום
""".strip('\r\n')
        expected = read_fwf(BytesIO(test.encode('utf8')),
                            colspecs=[(0, 4), (5, 9)],
                            header=None, encoding='utf8')
        tm.assert_frame_equal(expected, read_fwf(
            BytesIO(test.encode('utf8')), header=None, encoding='utf8')) 
Example 24
Project: vnpy_crypto   Author: birforce   File: test_read_fwf.py    License: MIT License 5 votes vote down vote up
def test_skiprows_inference(self):
        # GH11256
        test = """
Text contained in the file header

DataCol1   DataCol2
     0.0        1.0
   101.6      956.1
""".strip()
        expected = read_csv(StringIO(test), skiprows=2,
                            delim_whitespace=True)
        tm.assert_frame_equal(expected, read_fwf(
            StringIO(test), skiprows=2)) 
Example 25
Project: vnpy_crypto   Author: birforce   File: test_read_fwf.py    License: MIT License 5 votes vote down vote up
def test_skiprows_by_index_inference(self):
        test = """
To be skipped
Not  To  Be  Skipped
Once more to be skipped
123  34   8      123
456  78   9      456
""".strip()

        expected = read_csv(StringIO(test), skiprows=[0, 2],
                            delim_whitespace=True)
        tm.assert_frame_equal(expected, read_fwf(
            StringIO(test), skiprows=[0, 2])) 
Example 26
Project: vnpy_crypto   Author: birforce   File: test_read_fwf.py    License: MIT License 5 votes vote down vote up
def test_skiprows_inference_empty(self):
        test = """
AA   BBB  C
12   345  6
78   901  2
""".strip()

        with pytest.raises(EmptyDataError):
            read_fwf(StringIO(test), skiprows=3) 
Example 27
Project: vnpy_crypto   Author: birforce   File: test_read_fwf.py    License: MIT License 5 votes vote down vote up
def test_whitespace_preservation(self):
        # Addresses Issue #16772
        data_expected = """
 a ,bbb
 cc,dd """
        expected = read_csv(StringIO(data_expected), header=None)

        test_data = """
 a bbb
 ccdd """
        result = read_fwf(StringIO(test_data), widths=[3, 3],
                          header=None, skiprows=[0], delimiter="\n\t")

        tm.assert_frame_equal(result, expected) 
Example 28
Project: vnpy_crypto   Author: birforce   File: test_read_fwf.py    License: MIT License 5 votes vote down vote up
def test_default_delimiter(self):
        data_expected = """
a,bbb
cc,dd"""
        expected = read_csv(StringIO(data_expected), header=None)

        test_data = """
a \tbbb
cc\tdd """
        result = read_fwf(StringIO(test_data), widths=[3, 3],
                          header=None, skiprows=[0])

        tm.assert_frame_equal(result, expected) 
Example 29
Project: Computable   Author: ktraunmueller   File: test_parsers.py    License: MIT License 5 votes vote down vote up
def test_1000_fwf(self):
        data = """
 1 2,334.0    5
10   13     10.
"""
        expected = [[1, 2334., 5],
                    [10, 13, 10]]
        df = read_fwf(StringIO(data), colspecs=[(0, 3), (3, 11), (12, 16)],
                      thousands=',')
        tm.assert_almost_equal(df.values, expected) 
Example 30
Project: Computable   Author: ktraunmueller   File: test_parsers.py    License: MIT License 5 votes vote down vote up
def test_comment_fwf(self):
        data = """
  1   2.   4  #hello world
  5  NaN  10.0
"""
        expected = [[1, 2., 4],
                    [5, np.nan, 10.]]
        df = read_fwf(StringIO(data), colspecs=[(0, 3), (4, 9), (9, 25)],
                      comment='#')
        tm.assert_almost_equal(df.values, expected)