Python pandas.io.parsers.read_table() Examples

The following are 10 code examples of pandas.io.parsers.read_table(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module pandas.io.parsers , or try the search function .
Example #1
Source File: test_network.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def check_compressed_urls(salaries_table, compression, extension, mode,
                          engine):
    # test reading compressed urls with various engines and
    # extension inference
    base_url = ('https://github.com/pandas-dev/pandas/raw/master/'
                'pandas/tests/io/parser/data/salaries.csv')

    url = base_url + extension

    if mode != 'explicit':
        compression = mode

    url_table = read_table(url, compression=compression, engine=engine)
    tm.assert_frame_equal(url_table, salaries_table) 
Example #2
Source File: conftest.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def salaries_table(datapath):
    """DataFrame with the salaries dataset"""
    return read_table(datapath('io', 'parser', 'data', 'salaries.csv')) 
Example #3
Source File: frame.py    From Computable with MIT License 5 votes vote down vote up
def from_csv(cls, path, header=0, sep=',', index_col=0,
                 parse_dates=True, encoding=None, tupleize_cols=False,
                 infer_datetime_format=False):
        """
        Read delimited file into DataFrame

        Parameters
        ----------
        path : string file path or file handle / StringIO
        header : int, default 0
            Row to use at header (skip prior rows)
        sep : string, default ','
            Field delimiter
        index_col : int or sequence, default 0
            Column to use for index. If a sequence is given, a MultiIndex
            is used. Different default from read_table
        parse_dates : boolean, default True
            Parse dates. Different default from read_table
        tupleize_cols : boolean, default False
            write multi_index columns as a list of tuples (if True)
            or new (expanded format) if False)
        infer_datetime_format: boolean, default False
            If True and `parse_dates` is True for a column, try to infer the
            datetime format based on the first datetime string. If the format
            can be inferred, there often will be a large parsing speed-up.

        Notes
        -----
        Preferable to use read_table for most general purposes but from_csv
        makes for an easy roundtrip to and from file, especially with a
        DataFrame of time series data

        Returns
        -------
        y : DataFrame
        """
        from pandas.io.parsers import read_table
        return read_table(path, header=header, sep=sep,
                          parse_dates=parse_dates, index_col=index_col,
                          encoding=encoding, tupleize_cols=tupleize_cols,
                          infer_datetime_format=infer_datetime_format) 
Example #4
Source File: clipboard.py    From Computable with MIT License 5 votes vote down vote up
def read_clipboard(**kwargs):  # pragma: no cover
    """
    Read text from clipboard and pass to read_table. See read_table for the
    full argument list

    If unspecified, `sep` defaults to '\s+'

    Returns
    -------
    parsed : DataFrame
    """
    if kwargs.get('sep') is None and kwargs.get('delim_whitespace') is None:
        kwargs['sep'] = '\s+'
    from pandas.util.clipboard import clipboard_get
    from pandas.io.parsers import read_table
    text = clipboard_get()

    # try to decode (if needed on PY3)
    if compat.PY3:
        try:
            text = compat.bytes_to_str(
                text, encoding=(kwargs.get('encoding') or
                                get_option('display.encoding'))
            )
        except:
            pass
    return read_table(StringIO(text), **kwargs) 
Example #5
Source File: test_network.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def salaries_table():
    path = os.path.join(tm.get_data_path(), 'salaries.csv')
    return read_table(path) 
Example #6
Source File: test_network.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def check_compressed_urls(salaries_table, compression, extension, mode,
                          engine):
    # test reading compressed urls with various engines and
    # extension inference
    base_url = ('https://github.com/pandas-dev/pandas/raw/master/'
                'pandas/tests/io/parser/data/salaries.csv')

    url = base_url + extension

    if mode != 'explicit':
        compression = mode

    url_table = read_table(url, compression=compression, engine=engine)
    tm.assert_frame_equal(url_table, salaries_table) 
Example #7
Source File: test_network.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def check_compressed_urls(salaries_table, compression, extension, mode,
                          engine):
    # test reading compressed urls with various engines and
    # extension inference
    base_url = ('https://github.com/pandas-dev/pandas/raw/master/'
                'pandas/tests/io/parser/data/salaries.csv')

    url = base_url + extension

    if mode != 'explicit':
        compression = mode

    url_table = read_table(url, compression=compression, engine=engine)
    tm.assert_frame_equal(url_table, salaries_table) 
Example #8
Source File: conftest.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def salaries_table(datapath):
    """DataFrame with the salaries dataset"""
    return read_table(datapath('io', 'parser', 'data', 'salaries.csv')) 
Example #9
Source File: clipboards.py    From Splunking-Crime with GNU Affero General Public License v3.0 4 votes vote down vote up
def read_clipboard(sep='\s+', **kwargs):  # pragma: no cover
    r"""
    Read text from clipboard and pass to read_table. See read_table for the
    full argument list

    Parameters
    ----------
    sep : str, default '\s+'.
        A string or regex delimiter. The default of '\s+' denotes
        one or more whitespace characters.

    Returns
    -------
    parsed : DataFrame
    """
    encoding = kwargs.pop('encoding', 'utf-8')

    # only utf-8 is valid for passed value because that's what clipboard
    # supports
    if encoding is not None and encoding.lower().replace('-', '') != 'utf8':
        raise NotImplementedError(
            'reading from clipboard only supports utf-8 encoding')

    from pandas.io.clipboard import clipboard_get
    from pandas.io.parsers import read_table
    text = clipboard_get()

    # try to decode (if needed on PY3)
    # Strange. linux py33 doesn't complain, win py33 does
    if compat.PY3:
        try:
            text = compat.bytes_to_str(
                text, encoding=(kwargs.get('encoding') or
                                get_option('display.encoding'))
            )
        except:
            pass

    # Excel copies into clipboard with \t separation
    # inspect no more then the 10 first lines, if they
    # all contain an equal number (>0) of tabs, infer
    # that this came from excel and set 'sep' accordingly
    lines = text[:10000].split('\n')[:-1][:10]

    # Need to remove leading white space, since read_table
    # accepts:
    #    a  b
    # 0  1  2
    # 1  3  4

    counts = set([x.lstrip().count('\t') for x in lines])
    if len(lines) > 1 and len(counts) == 1 and counts.pop() != 0:
        sep = '\t'

    if sep is None and kwargs.get('delim_whitespace') is None:
        sep = '\s+'

    return read_table(StringIO(text), sep=sep, **kwargs) 
Example #10
Source File: clipboards.py    From elasticintel with GNU General Public License v3.0 4 votes vote down vote up
def read_clipboard(sep='\s+', **kwargs):  # pragma: no cover
    r"""
    Read text from clipboard and pass to read_table. See read_table for the
    full argument list

    Parameters
    ----------
    sep : str, default '\s+'.
        A string or regex delimiter. The default of '\s+' denotes
        one or more whitespace characters.

    Returns
    -------
    parsed : DataFrame
    """
    encoding = kwargs.pop('encoding', 'utf-8')

    # only utf-8 is valid for passed value because that's what clipboard
    # supports
    if encoding is not None and encoding.lower().replace('-', '') != 'utf8':
        raise NotImplementedError(
            'reading from clipboard only supports utf-8 encoding')

    from pandas.io.clipboard import clipboard_get
    from pandas.io.parsers import read_table
    text = clipboard_get()

    # try to decode (if needed on PY3)
    # Strange. linux py33 doesn't complain, win py33 does
    if compat.PY3:
        try:
            text = compat.bytes_to_str(
                text, encoding=(kwargs.get('encoding') or
                                get_option('display.encoding'))
            )
        except:
            pass

    # Excel copies into clipboard with \t separation
    # inspect no more then the 10 first lines, if they
    # all contain an equal number (>0) of tabs, infer
    # that this came from excel and set 'sep' accordingly
    lines = text[:10000].split('\n')[:-1][:10]

    # Need to remove leading white space, since read_table
    # accepts:
    #    a  b
    # 0  1  2
    # 1  3  4

    counts = set([x.lstrip().count('\t') for x in lines])
    if len(lines) > 1 and len(counts) == 1 and counts.pop() != 0:
        sep = '\t'

    if sep is None and kwargs.get('delim_whitespace') is None:
        sep = '\s+'

    return read_table(StringIO(text), sep=sep, **kwargs)