Python pandas.core.api.DataFrame() Examples

The following are 30 code examples of pandas.core.api.DataFrame(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module pandas.core.api , or try the search function .
Example #1
Source File: moments.py    From Splunking-Crime with GNU Affero General Public License v3.0 6 votes vote down vote up
def expanding_count(arg, freq=None):
    """
    Expanding count of number of non-NaN observations.

    Parameters
    ----------
    arg :  DataFrame or numpy ndarray-like
    freq : string or DateOffset object, optional (default None)
        Frequency to conform the data to before computing the
        statistic. Specified as a frequency string or DateOffset object.

    Returns
    -------
    expanding_count : type of caller

    Notes
    -----
    The `freq` keyword is used to conform time series data to a specified
    frequency by resampling the data. This is done with the default parameters
    of :meth:`~pandas.Series.resample` (i.e. using the `mean`).

    To learn more about the frequency strings, please see `this link
    <http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__.
    """
    return ensure_compat('expanding', 'count', arg, freq=freq) 
Example #2
Source File: resample.py    From Computable with MIT License 6 votes vote down vote up
def asfreq(obj, freq, method=None, how=None, normalize=False):
    """
    Utility frequency conversion method for Series/DataFrame
    """
    if isinstance(obj.index, PeriodIndex):
        if method is not None:
            raise NotImplementedError

        if how is None:
            how = 'E'

        new_index = obj.index.asfreq(freq, how=how)
        new_obj = obj.copy()
        new_obj.index = new_index
        return new_obj
    else:
        if len(obj.index) == 0:
            return obj.copy()
        dti = date_range(obj.index[0], obj.index[-1], freq=freq)
        rs = obj.reindex(dti, method=method)
        if normalize:
            rs.index = rs.index.normalize()
        return rs 
Example #3
Source File: moments.py    From Computable with MIT License 6 votes vote down vote up
def _process_data_structure(arg, kill_inf=True):
    if isinstance(arg, DataFrame):
        return_hook = lambda v: type(arg)(v, index=arg.index,
                                          columns=arg.columns)
        values = arg.values
    elif isinstance(arg, Series):
        values = arg.values
        return_hook = lambda v: Series(v, arg.index)
    else:
        return_hook = lambda v: v
        values = arg

    if not issubclass(values.dtype.type, float):
        values = values.astype(float)

    if kill_inf:
        values = values.copy()
        values[np.isinf(values)] = np.NaN

    return return_hook, values

#------------------------------------------------------------------------------
# Exponential moving moments 
Example #4
Source File: test_filters.py    From elasticintel with GNU General Public License v3.0 6 votes vote down vote up
def test_filter_multiple_timestamp(self):
        # GH 10114
        df = DataFrame({'A': np.arange(5, dtype='int64'),
                        'B': ['foo', 'bar', 'foo', 'bar', 'bar'],
                        'C': Timestamp('20130101')})

        grouped = df.groupby(['B', 'C'])

        result = grouped['A'].filter(lambda x: True)
        assert_series_equal(df['A'], result)

        result = grouped['A'].transform(len)
        expected = Series([2, 3, 2, 3, 3], name='A')
        assert_series_equal(result, expected)

        result = grouped.filter(lambda x: True)
        assert_frame_equal(df, result)

        result = grouped.transform('sum')
        expected = DataFrame({'A': [2, 8, 2, 8, 8]})
        assert_frame_equal(result, expected)

        result = grouped.transform(len)
        expected = DataFrame({'A': [2, 3, 2, 3, 3]})
        assert_frame_equal(result, expected) 
Example #5
Source File: test_filters.py    From elasticintel with GNU General Public License v3.0 6 votes vote down vote up
def test_filter_bad_shapes(self):
        df = DataFrame({'A': np.arange(8),
                        'B': list('aabbbbcc'),
                        'C': np.arange(8)})
        s = df['B']
        g_df = df.groupby('B')
        g_s = s.groupby(s)

        f = lambda x: x
        pytest.raises(TypeError, lambda: g_df.filter(f))
        pytest.raises(TypeError, lambda: g_s.filter(f))

        f = lambda x: x == 1
        pytest.raises(TypeError, lambda: g_df.filter(f))
        pytest.raises(TypeError, lambda: g_s.filter(f))

        f = lambda x: np.outer(x, x)
        pytest.raises(TypeError, lambda: g_df.filter(f))
        pytest.raises(TypeError, lambda: g_s.filter(f)) 
Example #6
Source File: moments.py    From Computable with MIT License 6 votes vote down vote up
def _center_window(rs, window, axis):
    if axis > rs.ndim-1:
        raise ValueError("Requested axis is larger then no. of argument dimensions")

    offset = int((window - 1) / 2.)
    if isinstance(rs, (Series, DataFrame, Panel)):
        rs = rs.shift(-offset, axis=axis)
    else:
        rs_indexer = [slice(None)] * rs.ndim
        rs_indexer[axis] = slice(None, -offset)

        lead_indexer = [slice(None)] * rs.ndim
        lead_indexer[axis] = slice(offset, None)

        na_indexer = [slice(None)] * rs.ndim
        na_indexer[axis] = slice(-offset, None)

        rs[tuple(rs_indexer)] = np.copy(rs[tuple(lead_indexer)])
        rs[tuple(na_indexer)] = np.nan
    return rs 
Example #7
Source File: moments.py    From Computable with MIT License 6 votes vote down vote up
def expanding_count(arg, freq=None, center=False, time_rule=None):
    """
    Expanding count of number of non-NaN observations.

    Parameters
    ----------
    arg :  DataFrame or numpy ndarray-like
    freq : None or string alias / date offset object, default=None
        Frequency to conform to before computing statistic
    center : boolean, default False
        Whether the label should correspond with center of window
    time_rule : Legacy alias for freq
    
    Returns
    -------
    expanding_count : type of caller
    """
    return rolling_count(arg, len(arg), freq=freq, center=center,
                         time_rule=time_rule) 
Example #8
Source File: moments.py    From Computable with MIT License 6 votes vote down vote up
def expanding_quantile(arg, quantile, min_periods=1, freq=None,
                       center=False, time_rule=None):
    """Expanding quantile

    Parameters
    ----------
    arg : Series, DataFrame
    quantile : 0 <= quantile <= 1
    min_periods : int
        Minimum number of observations in window required to have a value
    freq : None or string alias / date offset object, default=None
        Frequency to conform to before computing statistic
    center : boolean, default False
        Whether the label should correspond with center of window
    time_rule : Legacy alias for freq
    
    Returns
    -------
    y : type of input argument
    """
    return rolling_quantile(arg, len(arg), quantile, min_periods=min_periods,
                            freq=freq, center=center, time_rule=time_rule) 
Example #9
Source File: moments.py    From Computable with MIT License 6 votes vote down vote up
def expanding_corr_pairwise(df, min_periods=1):
    """
    Computes pairwise expanding correlation matrices as Panel whose items are
    dates

    Parameters
    ----------
    df : DataFrame
    min_periods : int, default 1

    Returns
    -------
    correls : Panel
    """

    window = len(df)

    return rolling_corr_pairwise(df, window, min_periods=min_periods) 
Example #10
Source File: moments.py    From Computable with MIT License 6 votes vote down vote up
def expanding_apply(arg, func, min_periods=1, freq=None, center=False,
                    time_rule=None):
    """Generic expanding function application

    Parameters
    ----------
    arg : Series, DataFrame
    func : function
        Must produce a single value from an ndarray input
    min_periods : int
        Minimum number of observations in window required to have a value
    freq : None or string alias / date offset object, default=None
        Frequency to conform to before computing statistic
    center : boolean, default False
        Whether the label should correspond with center of window
    time_rule : Legacy alias for freq
    
    Returns
    -------
    y : type of input argument
    """
    window = len(arg)
    return rolling_apply(arg, window, func, min_periods=min_periods, freq=freq,
                         center=center, time_rule=time_rule) 
Example #11
Source File: test_filters.py    From elasticintel with GNU General Public License v3.0 6 votes vote down vote up
def test_filter_single_column_df(self):
        df = pd.DataFrame([1, 3, 20, 5, 22, 24, 7])
        expected_odd = pd.DataFrame([1, 3, 5, 7], index=[0, 1, 3, 6])
        expected_even = pd.DataFrame([20, 22, 24], index=[2, 4, 5])
        grouper = df[0].apply(lambda x: x % 2)
        grouped = df.groupby(grouper)
        assert_frame_equal(
            grouped.filter(lambda x: x.mean() < 10), expected_odd)
        assert_frame_equal(
            grouped.filter(lambda x: x.mean() > 10), expected_even)
        # Test dropna=False.
        assert_frame_equal(
            grouped.filter(lambda x: x.mean() < 10, dropna=False),
            expected_odd.reindex(df.index))
        assert_frame_equal(
            grouped.filter(lambda x: x.mean() > 10, dropna=False),
            expected_even.reindex(df.index)) 
Example #12
Source File: moments.py    From elasticintel with GNU General Public License v3.0 6 votes vote down vote up
def expanding_count(arg, freq=None):
    """
    Expanding count of number of non-NaN observations.

    Parameters
    ----------
    arg :  DataFrame or numpy ndarray-like
    freq : string or DateOffset object, optional (default None)
        Frequency to conform the data to before computing the
        statistic. Specified as a frequency string or DateOffset object.

    Returns
    -------
    expanding_count : type of caller

    Notes
    -----
    The `freq` keyword is used to conform time series data to a specified
    frequency by resampling the data. This is done with the default parameters
    of :meth:`~pandas.Series.resample` (i.e. using the `mean`).

    To learn more about the frequency strings, please see `this link
    <http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__.
    """
    return ensure_compat('expanding', 'count', arg, freq=freq) 
Example #13
Source File: pandas_groupby_tests.py    From sparklingpandas with Apache License 2.0 6 votes vote down vote up
def test_agg_api(self):
        # Note: needs a very recent version of pandas to pass
        # TODO(holden): Pass this test if local fails
        # GH 6337
        # http://stackoverflow.com/questions/21706030/pandas-groupby-agg-function-column-dtype-error
        # different api for agg when passed custom function with mixed frame

        pd_df = DataFrame({'data1': np.random.randn(5),
                           'data2': np.random.randn(5),
                           'key1': ['a', 'a', 'b', 'b', 'a'],
                           'key2': ['one', 'two', 'one', 'two', 'one']})
        ddf = self.psc.from_pd_data_frame(pd_df)
        dgrouped = ddf.groupby('key1')
        grouped = pd_df.groupby('key1')

        def peak_to_peak(arr):
            return arr.max() - arr.min()

        expected = grouped.agg([peak_to_peak])
        expected.columns = ['data1', 'data2']
        result = dgrouped.agg(peak_to_peak).collect()
        assert_frame_equal(result, expected) 
Example #14
Source File: test_filters.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def test_filter_with_axis_in_groupby(self):
        # issue 11041
        index = pd.MultiIndex.from_product([range(10), [0, 1]])
        data = pd.DataFrame(
            np.arange(100).reshape(-1, 20), columns=index, dtype='int64')
        result = data.groupby(level=0,
                              axis=1).filter(lambda x: x.iloc[0, 0] > 10)
        expected = data.iloc[:, 12:20]
        assert_frame_equal(result, expected) 
Example #15
Source File: test_expressions.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def test_boolean_ops(self):
        def testit():
            for f, f2 in [(self.frame, self.frame2),
                          (self.mixed, self.mixed2)]:

                f11 = f
                f12 = f + 1

                f21 = f2
                f22 = f2 + 1

                for op, op_str in [('gt', '>'), ('lt', '<'), ('ge', '>='),
                                   ('le', '<='), ('eq', '=='), ('ne', '!=')]:

                    op = getattr(operator, op)

                    result = expr._can_use_numexpr(op, op_str, f11, f12,
                                                   'evaluate')
                    assert result != f11._is_mixed_type

                    result = expr.evaluate(op, op_str, f11, f12,
                                           use_numexpr=True)
                    expected = expr.evaluate(op, op_str, f11, f12,
                                             use_numexpr=False)
                    if isinstance(result, DataFrame):
                        tm.assert_frame_equal(result, expected)
                    else:
                        tm.assert_numpy_array_equal(result, expected.values)

                    result = expr._can_use_numexpr(op, op_str, f21, f22,
                                                   'evaluate')
                    assert not result

        expr.set_use_numexpr(False)
        testit()
        expr.set_use_numexpr(True)
        expr.set_numexpr_threads(1)
        testit()
        expr.set_numexpr_threads()
        testit() 
Example #16
Source File: test_expressions.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def test_bool_ops_raise_on_arithmetic(self):
        df = DataFrame({'a': np.random.rand(10) > 0.5,
                        'b': np.random.rand(10) > 0.5})
        names = 'div', 'truediv', 'floordiv', 'pow'
        ops = '/', '/', '//', '**'
        msg = 'operator %r not implemented for bool dtypes'
        for op, name in zip(ops, names):
            if not compat.PY3 or name != 'div':
                f = getattr(operator, name)
                err_msg = re.escape(msg % op)

                with tm.assert_raises_regex(NotImplementedError, err_msg):
                    f(df, df)

                with tm.assert_raises_regex(NotImplementedError, err_msg):
                    f(df.a, df.b)

                with tm.assert_raises_regex(NotImplementedError, err_msg):
                    f(df.a, True)

                with tm.assert_raises_regex(NotImplementedError, err_msg):
                    f(False, df.a)

                with tm.assert_raises_regex(TypeError, err_msg):
                    f(False, df)

                with tm.assert_raises_regex(TypeError, err_msg):
                    f(df, True) 
Example #17
Source File: test_filters.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def setup_method(self, method):
        self.ts = tm.makeTimeSeries()

        self.seriesd = tm.getSeriesData()
        self.tsd = tm.getTimeSeriesData()
        self.frame = DataFrame(self.seriesd)
        self.tsframe = DataFrame(self.tsd)

        self.df = DataFrame(
            {'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'],
             'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'],
             'C': np.random.randn(8),
             'D': np.random.randn(8)})

        self.df_mixed_floats = DataFrame(
            {'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'],
             'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'],
             'C': np.random.randn(8),
             'D': np.array(
                 np.random.randn(8), dtype='float32')})

        index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two',
                                                                  'three']],
                           labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
                                   [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
                           names=['first', 'second'])
        self.mframe = DataFrame(np.random.randn(10, 3), index=index,
                                columns=['A', 'B', 'C'])

        self.three_group = DataFrame(
            {'A': ['foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar',
                   'foo', 'foo', 'foo'],
             'B': ['one', 'one', 'one', 'two', 'one', 'one', 'one', 'two',
                   'two', 'two', 'one'],
             'C': ['dull', 'dull', 'shiny', 'dull', 'dull', 'shiny', 'shiny',
                   'dull', 'shiny', 'shiny', 'shiny'],
             'D': np.random.randn(11),
             'E': np.random.randn(11),
             'F': np.random.randn(11)}) 
Example #18
Source File: test_filters.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def test_filter_nan_is_false(self):
        df = DataFrame({'A': np.arange(8),
                        'B': list('aabbbbcc'),
                        'C': np.arange(8)})
        s = df['B']
        g_df = df.groupby(df['B'])
        g_s = s.groupby(s)

        f = lambda x: np.nan
        assert_frame_equal(g_df.filter(f), df.loc[[]])
        assert_series_equal(g_s.filter(f), s[[]]) 
Example #19
Source File: test_indexing_slow.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def test_large_dataframe_indexing(self):
        # GH10692
        result = DataFrame({'x': range(10 ** 6)}, dtype='int64')
        result.loc[len(result)] = len(result) + 1
        expected = DataFrame({'x': range(10 ** 6 + 1)}, dtype='int64')
        tm.assert_frame_equal(result, expected) 
Example #20
Source File: moments.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def expanding_quantile(arg, quantile, min_periods=1, freq=None):
    """Expanding quantile.

    Parameters
    ----------
    arg : Series, DataFrame
    quantile : float
        0 <= quantile <= 1
    min_periods : int, default None
        Minimum number of observations in window required to have a value
        (otherwise result is NA).
    freq : string or DateOffset object, optional (default None)
        Frequency to conform the data to before computing the
        statistic. Specified as a frequency string or DateOffset object.

    Returns
    -------
    y : type of input argument

    Notes
    -----
    The `freq` keyword is used to conform time series data to a specified
    frequency by resampling the data. This is done with the default parameters
    of :meth:`~pandas.Series.resample` (i.e. using the `mean`).

    To learn more about the frequency strings, please see `this link
    <http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__.
    """
    return ensure_compat('expanding',
                         'quantile',
                         arg,
                         freq=freq,
                         min_periods=min_periods,
                         func_kw=['quantile'],
                         quantile=quantile) 
Example #21
Source File: test_indexing_slow.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_large_dataframe_indexing(self):
        # GH10692
        result = DataFrame({'x': range(10 ** 6)}, dtype='int64')
        result.loc[len(result)] = len(result) + 1
        expected = DataFrame({'x': range(10 ** 6 + 1)}, dtype='int64')
        tm.assert_frame_equal(result, expected) 
Example #22
Source File: moments.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def rolling_count(arg, window, **kwargs):
    """
    Rolling count of number of non-NaN observations inside provided window.

    Parameters
    ----------
    arg :  DataFrame or numpy ndarray-like
    window : int
        Size of the moving window. This is the number of observations used for
        calculating the statistic.
    freq : string or DateOffset object, optional (default None)
        Frequency to conform the data to before computing the
        statistic. Specified as a frequency string or DateOffset object.
    center : boolean, default False
        Whether the label should correspond with center of window
    how : string, default 'mean'
        Method for down- or re-sampling

    Returns
    -------
    rolling_count : type of caller

    Notes
    -----
    The `freq` keyword is used to conform time series data to a specified
    frequency by resampling the data. This is done with the default parameters
    of :meth:`~pandas.Series.resample` (i.e. using the `mean`).

    To learn more about the frequency strings, please see `this link
    <http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__.
    """
    return ensure_compat('rolling', 'count', arg, window=window, **kwargs) 
Example #23
Source File: dataload_tests.py    From sparklingpandas with Apache License 2.0 5 votes vote down vote up
def test_load_from_data_frame(self):
        df = DataFrame({'A': ['foo', 'bar', 'foo', 'bar',
                              'foo', 'bar', 'foo', 'foo'],
                        'B': ['one', 'one', 'two', 'three',
                              'two', 'two', 'one', 'three'],
                        'C': np.random.randn(8),
                        'D': np.random.randn(8)})
        ddf = self.psc.from_pd_data_frame(df)
        ddfc = ddf.collect()
        assert_frame_equal(ddfc, df) 
Example #24
Source File: dataload_tests.py    From sparklingpandas with Apache License 2.0 5 votes vote down vote up
def test_from_tuples(self):
        """
        Test loading the data from a python tuples.
        """
        input = [("tea", "happy"), ("water", "sad"), ("coffee", "happiest")]
        pframe = self.psc.DataFrame(input, columns=['magic', 'thing'])
        data = pframe.collect().sort(['magic'])
        expected = pandas.DataFrame(input, columns=['magic', 'thing']).sort(
            ['magic'])
        assert_frame_equal(data, expected) 
Example #25
Source File: test_expressions.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_boolean_ops(self):
        def testit():
            for f, f2 in [(self.frame, self.frame2),
                          (self.mixed, self.mixed2)]:

                f11 = f
                f12 = f + 1

                f21 = f2
                f22 = f2 + 1

                for op, op_str in [('gt', '>'), ('lt', '<'), ('ge', '>='),
                                   ('le', '<='), ('eq', '=='), ('ne', '!=')]:

                    op = getattr(operator, op)

                    result = expr._can_use_numexpr(op, op_str, f11, f12,
                                                   'evaluate')
                    assert result != f11._is_mixed_type

                    result = expr.evaluate(op, op_str, f11, f12,
                                           use_numexpr=True)
                    expected = expr.evaluate(op, op_str, f11, f12,
                                             use_numexpr=False)
                    if isinstance(result, DataFrame):
                        tm.assert_frame_equal(result, expected)
                    else:
                        tm.assert_numpy_array_equal(result, expected.values)

                    result = expr._can_use_numexpr(op, op_str, f21, f22,
                                                   'evaluate')
                    assert not result

        expr.set_use_numexpr(False)
        testit()
        expr.set_use_numexpr(True)
        expr.set_numexpr_threads(1)
        testit()
        expr.set_numexpr_threads()
        testit() 
Example #26
Source File: pandas_groupby_tests.py    From sparklingpandas with Apache License 2.0 5 votes vote down vote up
def test_var_on_multiplegroups(self):
        pd_df = DataFrame({'data1': np.random.randn(5),
                           'data2': np.random.randn(5),
                           'data3': np.random.randn(5),
                           'key1': ['a', 'a', 'b', 'b', 'a'],
                           'key2': ['one', 'two', 'one', 'two', 'one']})
        sp_df = self.psc.from_pd_data_frame(pd_df)
        actual_grouped = sp_df.groupby(['key1', 'key2'])
        expected_grouped = pd_df.groupby(['key1', 'key2'])
        assert_frame_equal(actual_grouped.var().collect(),
                           expected_grouped.var()) 
Example #27
Source File: pandas_groupby_tests.py    From sparklingpandas with Apache License 2.0 5 votes vote down vote up
def test_new_in0140(self):
        """
        Test new functionality in 0.14.0. This currently doesn't work.
        """
        # v0.14.0 whatsnew
        input_df = DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=['A', 'B'])
        sp_df = self.psc.from_pd_data_frame(input_df)
        grouped_sp_df = sp_df.groupby('A')
        result = grouped_sp_df.first().collect()
        expected = input_df.iloc[[1, 2]].set_index('A')
        assert_frame_equal(result, expected)

        expected = input_df.iloc[[1, 2]].set_index('A')
        result = grouped_sp_df.nth(0, dropna='any').collect()
        assert_frame_equal(result, expected) 
Example #28
Source File: moments.py    From Splunking-Crime with GNU Affero General Public License v3.0 5 votes vote down vote up
def expanding_quantile(arg, quantile, min_periods=1, freq=None):
    """Expanding quantile.

    Parameters
    ----------
    arg : Series, DataFrame
    quantile : float
        0 <= quantile <= 1
    min_periods : int, default None
        Minimum number of observations in window required to have a value
        (otherwise result is NA).
    freq : string or DateOffset object, optional (default None)
        Frequency to conform the data to before computing the
        statistic. Specified as a frequency string or DateOffset object.

    Returns
    -------
    y : type of input argument

    Notes
    -----
    The `freq` keyword is used to conform time series data to a specified
    frequency by resampling the data. This is done with the default parameters
    of :meth:`~pandas.Series.resample` (i.e. using the `mean`).

    To learn more about the frequency strings, please see `this link
    <http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__.
    """
    return ensure_compat('expanding',
                         'quantile',
                         arg,
                         freq=freq,
                         min_periods=min_periods,
                         func_kw=['quantile'],
                         quantile=quantile) 
Example #29
Source File: test_expressions.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_bool_ops_raise_on_arithmetic(self):
        df = DataFrame({'a': np.random.rand(10) > 0.5,
                        'b': np.random.rand(10) > 0.5})
        names = 'div', 'truediv', 'floordiv', 'pow'
        ops = '/', '/', '//', '**'
        msg = 'operator %r not implemented for bool dtypes'
        for op, name in zip(ops, names):
            if not compat.PY3 or name != 'div':
                f = getattr(operator, name)
                err_msg = re.escape(msg % op)

                with tm.assert_raises_regex(NotImplementedError, err_msg):
                    f(df, df)

                with tm.assert_raises_regex(NotImplementedError, err_msg):
                    f(df.a, df.b)

                with tm.assert_raises_regex(NotImplementedError, err_msg):
                    f(df.a, True)

                with tm.assert_raises_regex(NotImplementedError, err_msg):
                    f(False, df.a)

                with tm.assert_raises_regex(TypeError, err_msg):
                    f(False, df)

                with tm.assert_raises_regex(TypeError, err_msg):
                    f(df, True) 
Example #30
Source File: test_filters.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def test_filter_non_bool_raises(self):
        df = pd.DataFrame([
            ['best', 'a', 1],
            ['worst', 'b', 1],
            ['best', 'c', 1],
            ['best', 'd', 1],
            ['worst', 'd', 1],
            ['worst', 'd', 1],
            ['best', 'd', 1],
        ], columns=['a', 'b', 'c'])
        with tm.assert_raises_regex(TypeError,
                                    'filter function returned a.*'):
            df.groupby('a').filter(lambda g: g.c.mean())