Python pandas.errors.ParserError() Examples

The following are 30 code examples of pandas.errors.ParserError(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module pandas.errors , or try the search function .
Example #1
Source File: test_errors.py    From elasticintel with GNU General Public License v3.0 6 votes vote down vote up
def test_error_rename():
    # see gh-12665
    from pandas.errors import ParserError
    from pandas.io.common import CParserError

    try:
        raise CParserError()
    except ParserError:
        pass

    try:
        raise ParserError()
    except CParserError:
        pass

    with catch_warnings(record=True):
        try:
            raise ParserError()
        except pd.parser.CParserError:
            pass 
Example #2
Source File: test_common.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 6 votes vote down vote up
def test_malformed_chunks(all_parsers, nrows):
    data = """ignore
A,B,C
skip
1,2,3
3,5,10 # comment
1,2,3,4,5
2,3,4
"""
    parser = all_parsers
    msg = 'Expected 3 fields in line 6, saw 5'
    reader = parser.read_csv(StringIO(data), header=1, comment="#",
                             iterator=True, chunksize=1, skiprows=[2])

    with pytest.raises(ParserError, match=msg):
        reader.read(nrows) 
Example #3
Source File: test_common.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 6 votes vote down vote up
def test_uneven_lines_with_usecols(all_parsers, usecols):
    # see gh-12203
    parser = all_parsers
    data = r"""a,b,c
0,1,2
3,4,5,6,7
8,9,10"""

    if usecols is None:
        # Make sure that an error is still raised
        # when the "usecols" parameter is not provided.
        msg = r"Expected \d+ fields in line \d+, saw \d+"
        with pytest.raises(ParserError, match=msg):
            parser.read_csv(StringIO(data))
    else:
        expected = DataFrame({
            "a": [0, 3, 8],
            "b": [1, 4, 9]
        })

        result = parser.read_csv(StringIO(data), usecols=usecols)
        tm.assert_frame_equal(result, expected) 
Example #4
Source File: backend.py    From beat with GNU General Public License v3.0 6 votes vote down vote up
def _load_df(self):
        if self._df is None:
            try:
                self._df = pd.read_csv(self.filename)
            except pd.errors.EmptyDataError:
                logger.warning(
                    'Trace %s is empty and needs to be resampled!' %
                    self.filename)
                os.remove(self.filename)
                self.corrupted_flag = True
            except CParserError:
                logger.warning(
                    'Trace %s has wrong size!' % self.filename)
                self.corrupted_flag = True
                os.remove(self.filename)

            if len(self.flat_names) == 0 and not self.corrupted_flag:
                self.flat_names, self.var_shapes = extract_variables_from_df(
                    self._df)
                self.varnames = list(self.var_shapes.keys()) 
Example #5
Source File: parsers.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def _alert_malformed(self, msg, row_num):
        """
        Alert a user about a malformed row.

        If `self.error_bad_lines` is True, the alert will be `ParserError`.
        If `self.warn_bad_lines` is True, the alert will be printed out.

        Parameters
        ----------
        msg : The error message to display.
        row_num : The row number where the parsing error occurred.
                  Because this row number is displayed, we 1-index,
                  even though we 0-index internally.
        """

        if self.error_bad_lines:
            raise ParserError(msg)
        elif self.warn_bad_lines:
            base = 'Skipping line {row_num}: '.format(row_num=row_num)
            sys.stderr.write(base + msg + '\n') 
Example #6
Source File: test_python_parser_only.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 6 votes vote down vote up
def test_multi_char_sep_quotes(python_parser_only, quoting):
    # see gh-13374
    kwargs = dict(sep=",,")
    parser = python_parser_only

    data = 'a,,b\n1,,a\n2,,"2,,b"'
    msg = "ignored when a multi-char delimiter is used"

    def fail_read():
        with pytest.raises(ParserError, match=msg):
            parser.read_csv(StringIO(data), quoting=quoting, **kwargs)

    if quoting == csv.QUOTE_NONE:
        # We expect no match, so there should be an assertion
        # error out of the inner context manager.
        with pytest.raises(AssertionError):
            fail_read()
    else:
        fail_read() 
Example #7
Source File: test_python_parser_only.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 6 votes vote down vote up
def test_skipfooter_bad_row(python_parser_only, data, skipfooter):
    # see gh-13879 and gh-15910
    msg = "parsing errors in the skipped footer rows"
    parser = python_parser_only

    def fail_read():
        with pytest.raises(ParserError, match=msg):
            parser.read_csv(StringIO(data), skipfooter=skipfooter)

    if skipfooter:
        fail_read()
    else:
        # We expect no match, so there should be an assertion
        # error out of the inner context manager.
        with pytest.raises(AssertionError):
            fail_read() 
Example #8
Source File: common.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def test_bad_stream_exception(self):
        # Issue 13652:
        # This test validates that both python engine
        # and C engine will raise UnicodeDecodeError instead of
        # c engine raising ParserError and swallowing exception
        # that caused read to fail.
        codec = codecs.lookup("utf-8")
        utf8 = codecs.lookup('utf-8')

        if compat.PY3:
            msg = "'utf-8' codec can't decode byte"
        else:
            msg = "'utf8' codec can't decode byte"

        # stream must be binary UTF8
        with open(self.csv_shiftjs, "rb") as handle, codecs.StreamRecoder(
                handle, utf8.encode, utf8.decode, codec.streamreader,
                codec.streamwriter) as stream:

            with tm.assert_raises_regex(UnicodeDecodeError, msg):
                self.read_csv(stream) 
Example #9
Source File: python_parser_only.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def test_skipfooter_bad_row(self):
        # see gh-13879
        # see gh-15910

        msg = 'parsing errors in the skipped footer rows'

        for data in ('a\n1\n"b"a',
                     'a,b,c\ncat,foo,bar\ndog,foo,"baz'):
            with tm.assert_raises_regex(ParserError, msg):
                self.read_csv(StringIO(data), skipfooter=1)

            # We expect no match, so there should be an assertion
            # error out of the inner context manager.
            with pytest.raises(AssertionError):
                with tm.assert_raises_regex(ParserError, msg):
                    self.read_csv(StringIO(data)) 
Example #10
Source File: parsers.py    From Splunking-Crime with GNU Affero General Public License v3.0 6 votes vote down vote up
def _alert_malformed(self, msg, row_num):
        """
        Alert a user about a malformed row.

        If `self.error_bad_lines` is True, the alert will be `ParserError`.
        If `self.warn_bad_lines` is True, the alert will be printed out.

        Parameters
        ----------
        msg : The error message to display.
        row_num : The row number where the parsing error occurred.
                  Because this row number is displayed, we 1-index,
                  even though we 0-index internally.
        """

        if self.error_bad_lines:
            raise ParserError(msg)
        elif self.warn_bad_lines:
            base = 'Skipping line {row_num}: '.format(row_num=row_num)
            sys.stderr.write(base + msg + '\n') 
Example #11
Source File: test_errors.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def test_error_rename():
    # see gh-12665
    from pandas.errors import ParserError
    from pandas.io.common import CParserError

    try:
        raise CParserError()
    except ParserError:
        pass

    try:
        raise ParserError()
    except CParserError:
        pass

    with catch_warnings(record=True):
        try:
            raise ParserError()
        except pd.parser.CParserError:
            pass 
Example #12
Source File: python_parser_only.py    From elasticintel with GNU General Public License v3.0 6 votes vote down vote up
def test_skipfooter_bad_row(self):
        # see gh-13879
        # see gh-15910

        msg = 'parsing errors in the skipped footer rows'

        for data in ('a\n1\n"b"a',
                     'a,b,c\ncat,foo,bar\ndog,foo,"baz'):
            with tm.assert_raises_regex(ParserError, msg):
                self.read_csv(StringIO(data), skipfooter=1)

            # We expect no match, so there should be an assertion
            # error out of the inner context manager.
            with pytest.raises(AssertionError):
                with tm.assert_raises_regex(ParserError, msg):
                    self.read_csv(StringIO(data)) 
Example #13
Source File: test_python_parser_only.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_multi_char_sep_quotes(python_parser_only, quoting):
    # see gh-13374
    kwargs = dict(sep=",,")
    parser = python_parser_only

    data = 'a,,b\n1,,a\n2,,"2,,b"'
    msg = "ignored when a multi-char delimiter is used"

    def fail_read():
        with pytest.raises(ParserError, match=msg):
            parser.read_csv(StringIO(data), quoting=quoting, **kwargs)

    if quoting == csv.QUOTE_NONE:
        # We expect no match, so there should be an assertion
        # error out of the inner context manager.
        with pytest.raises(AssertionError):
            fail_read()
    else:
        fail_read() 
Example #14
Source File: common.py    From twitter-stock-recommendation with MIT License 6 votes vote down vote up
def test_bad_stream_exception(self):
        # Issue 13652:
        # This test validates that both python engine
        # and C engine will raise UnicodeDecodeError instead of
        # c engine raising ParserError and swallowing exception
        # that caused read to fail.
        codec = codecs.lookup("utf-8")
        utf8 = codecs.lookup('utf-8')

        if compat.PY3:
            msg = "'utf-8' codec can't decode byte"
        else:
            msg = "'utf8' codec can't decode byte"

        # stream must be binary UTF8
        with open(self.csv_shiftjs, "rb") as handle, codecs.StreamRecoder(
                handle, utf8.encode, utf8.decode, codec.streamreader,
                codec.streamwriter) as stream:

            with tm.assert_raises_regex(UnicodeDecodeError, msg):
                self.read_csv(stream) 
Example #15
Source File: python_parser_only.py    From twitter-stock-recommendation with MIT License 6 votes vote down vote up
def test_skipfooter_bad_row(self):
        # see gh-13879
        # see gh-15910

        msg = 'parsing errors in the skipped footer rows'

        for data in ('a\n1\n"b"a',
                     'a,b,c\ncat,foo,bar\ndog,foo,"baz'):
            with tm.assert_raises_regex(ParserError, msg):
                self.read_csv(StringIO(data), skipfooter=1)

            # We expect no match, so there should be an assertion
            # error out of the inner context manager.
            with pytest.raises(AssertionError):
                with tm.assert_raises_regex(ParserError, msg):
                    self.read_csv(StringIO(data)) 
Example #16
Source File: test_errors.py    From twitter-stock-recommendation with MIT License 6 votes vote down vote up
def test_error_rename():
    # see gh-12665
    from pandas.errors import ParserError
    from pandas.io.common import CParserError

    try:
        raise CParserError()
    except ParserError:
        pass

    try:
        raise ParserError()
    except CParserError:
        pass

    with catch_warnings(record=True):
        try:
            raise ParserError()
        except pd.parser.CParserError:
            pass 
Example #17
Source File: test_common.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_malformed_chunks(all_parsers, nrows):
    data = """ignore
A,B,C
skip
1,2,3
3,5,10 # comment
1,2,3,4,5
2,3,4
"""
    parser = all_parsers
    msg = 'Expected 3 fields in line 6, saw 5'
    reader = parser.read_csv(StringIO(data), header=1, comment="#",
                             iterator=True, chunksize=1, skiprows=[2])

    with pytest.raises(ParserError, match=msg):
        reader.read(nrows) 
Example #18
Source File: parsers.py    From elasticintel with GNU General Public License v3.0 6 votes vote down vote up
def _alert_malformed(self, msg, row_num):
        """
        Alert a user about a malformed row.

        If `self.error_bad_lines` is True, the alert will be `ParserError`.
        If `self.warn_bad_lines` is True, the alert will be printed out.

        Parameters
        ----------
        msg : The error message to display.
        row_num : The row number where the parsing error occurred.
                  Because this row number is displayed, we 1-index,
                  even though we 0-index internally.
        """

        if self.error_bad_lines:
            raise ParserError(msg)
        elif self.warn_bad_lines:
            base = 'Skipping line {row_num}: '.format(row_num=row_num)
            sys.stderr.write(base + msg + '\n') 
Example #19
Source File: test_common.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_uneven_lines_with_usecols(all_parsers, usecols):
    # see gh-12203
    parser = all_parsers
    data = r"""a,b,c
0,1,2
3,4,5,6,7
8,9,10"""

    if usecols is None:
        # Make sure that an error is still raised
        # when the "usecols" parameter is not provided.
        msg = r"Expected \d+ fields in line \d+, saw \d+"
        with pytest.raises(ParserError, match=msg):
            parser.read_csv(StringIO(data))
    else:
        expected = DataFrame({
            "a": [0, 3, 8],
            "b": [1, 4, 9]
        })

        result = parser.read_csv(StringIO(data), usecols=usecols)
        tm.assert_frame_equal(result, expected) 
Example #20
Source File: common.py    From elasticintel with GNU General Public License v3.0 6 votes vote down vote up
def test_bad_stream_exception(self):
        # Issue 13652:
        # This test validates that both python engine
        # and C engine will raise UnicodeDecodeError instead of
        # c engine raising ParserError and swallowing exception
        # that caused read to fail.
        handle = open(self.csv_shiftjs, "rb")
        codec = codecs.lookup("utf-8")
        utf8 = codecs.lookup('utf-8')
        # stream must be binary UTF8
        stream = codecs.StreamRecoder(
            handle, utf8.encode, utf8.decode, codec.streamreader,
            codec.streamwriter)
        if compat.PY3:
            msg = "'utf-8' codec can't decode byte"
        else:
            msg = "'utf8' codec can't decode byte"
        with tm.assert_raises_regex(UnicodeDecodeError, msg):
            self.read_csv(stream)
        stream.close() 
Example #21
Source File: test_python_parser_only.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_skipfooter_bad_row(python_parser_only, data, skipfooter):
    # see gh-13879 and gh-15910
    msg = "parsing errors in the skipped footer rows"
    parser = python_parser_only

    def fail_read():
        with pytest.raises(ParserError, match=msg):
            parser.read_csv(StringIO(data), skipfooter=skipfooter)

    if skipfooter:
        fail_read()
    else:
        # We expect no match, so there should be an assertion
        # error out of the inner context manager.
        with pytest.raises(AssertionError):
            fail_read() 
Example #22
Source File: common.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def test_null_byte_char(self):
        # see gh-2741
        data = '\x00,foo'
        cols = ['a', 'b']

        expected = DataFrame([[np.nan, 'foo']],
                             columns=cols)

        if self.engine == 'c':
            out = self.read_csv(StringIO(data), names=cols)
            tm.assert_frame_equal(out, expected)
        else:
            msg = "NULL byte detected"
            with tm.assert_raises_regex(ParserError, msg):
                self.read_csv(StringIO(data), names=cols) 
Example #23
Source File: test_python_parser_only.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def test_malformed_skipfooter(python_parser_only):
    parser = python_parser_only
    data = """ignore
A,B,C
1,2,3 # comment
1,2,3,4,5
2,3,4
footer
"""
    msg = "Expected 3 fields in line 4, saw 5"
    with pytest.raises(ParserError, match=msg):
        parser.read_csv(StringIO(data), header=1,
                        comment="#", skipfooter=1) 
Example #24
Source File: test_html.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_computer_sales_page(self, datapath):
        data = datapath('io', 'data', 'computer_sales_page.html')
        msg = (r"Passed header=\[0,1\] are too many "
               r"rows for this multi_index of columns")
        with pytest.raises(ParserError, match=msg):
            self.read_html(data, header=[0, 1])

        data = datapath('io', 'data', 'computer_sales_page.html')
        assert self.read_html(data, header=[1, 2]) 
Example #25
Source File: python_parser_only.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_multi_char_sep_quotes(self):
        # see gh-13374

        data = 'a,,b\n1,,a\n2,,"2,,b"'
        msg = 'ignored when a multi-char delimiter is used'

        with tm.assert_raises_regex(ParserError, msg):
            self.read_csv(StringIO(data), sep=',,')

        # We expect no match, so there should be an assertion
        # error out of the inner context manager.
        with pytest.raises(AssertionError):
            with tm.assert_raises_regex(ParserError, msg):
                self.read_csv(StringIO(data), sep=',,',
                              quoting=csv.QUOTE_NONE) 
Example #26
Source File: python_parser_only.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def test_multi_char_sep_quotes(self):
        # see gh-13374

        data = 'a,,b\n1,,a\n2,,"2,,b"'
        msg = 'ignored when a multi-char delimiter is used'

        with tm.assert_raises_regex(ParserError, msg):
            self.read_csv(StringIO(data), sep=',,')

        # We expect no match, so there should be an assertion
        # error out of the inner context manager.
        with pytest.raises(AssertionError):
            with tm.assert_raises_regex(ParserError, msg):
                self.read_csv(StringIO(data), sep=',,',
                              quoting=csv.QUOTE_NONE) 
Example #27
Source File: test_common.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def test_error_bad_lines(all_parsers, kwargs, warn_kwargs):
    # see gh-15925
    parser = all_parsers
    kwargs.update(**warn_kwargs)
    data = "a\n1\n1,2,3\n4\n5,6,7"

    msg = "Expected 1 fields in line 3, saw 3"
    with pytest.raises(ParserError, match=msg):
        parser.read_csv(StringIO(data), **kwargs) 
Example #28
Source File: test_common.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def test_eof_states(all_parsers, data, kwargs, expected, msg):
    # see gh-10728, gh-10548
    parser = all_parsers

    if expected is None:
        with pytest.raises(ParserError, match=msg):
            parser.read_csv(StringIO(data), **kwargs)
    else:
        result = parser.read_csv(StringIO(data), **kwargs)
        tm.assert_frame_equal(result, expected) 
Example #29
Source File: test_common.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def test_read_csv_wrong_num_columns(all_parsers):
    # Too few columns.
    data = """A,B,C,D,E,F
1,2,3,4,5,6
6,7,8,9,10,11,12
11,12,13,14,15,16
"""
    parser = all_parsers
    msg = "Expected 6 fields in line 3, saw 7"

    with pytest.raises(ParserError, match=msg):
        parser.read_csv(StringIO(data)) 
Example #30
Source File: common.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_null_byte_char(self):
        # see gh-2741
        data = '\x00,foo'
        cols = ['a', 'b']

        expected = DataFrame([[np.nan, 'foo']],
                             columns=cols)

        if self.engine == 'c':
            out = self.read_csv(StringIO(data), names=cols)
            tm.assert_frame_equal(out, expected)
        else:
            msg = "NULL byte detected"
            with tm.assert_raises_regex(ParserError, msg):
                self.read_csv(StringIO(data), names=cols)