Python pandas.core.api.DataFrame() Examples
The following are 30
code examples of pandas.core.api.DataFrame().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
pandas.core.api
, or try the search function
.
Example #1
Source File: moments.py From Splunking-Crime with GNU Affero General Public License v3.0 | 6 votes |
def expanding_count(arg, freq=None): """ Expanding count of number of non-NaN observations. Parameters ---------- arg : DataFrame or numpy ndarray-like freq : string or DateOffset object, optional (default None) Frequency to conform the data to before computing the statistic. Specified as a frequency string or DateOffset object. Returns ------- expanding_count : type of caller Notes ----- The `freq` keyword is used to conform time series data to a specified frequency by resampling the data. This is done with the default parameters of :meth:`~pandas.Series.resample` (i.e. using the `mean`). To learn more about the frequency strings, please see `this link <http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__. """ return ensure_compat('expanding', 'count', arg, freq=freq)
Example #2
Source File: resample.py From Computable with MIT License | 6 votes |
def asfreq(obj, freq, method=None, how=None, normalize=False): """ Utility frequency conversion method for Series/DataFrame """ if isinstance(obj.index, PeriodIndex): if method is not None: raise NotImplementedError if how is None: how = 'E' new_index = obj.index.asfreq(freq, how=how) new_obj = obj.copy() new_obj.index = new_index return new_obj else: if len(obj.index) == 0: return obj.copy() dti = date_range(obj.index[0], obj.index[-1], freq=freq) rs = obj.reindex(dti, method=method) if normalize: rs.index = rs.index.normalize() return rs
Example #3
Source File: moments.py From Computable with MIT License | 6 votes |
def _process_data_structure(arg, kill_inf=True): if isinstance(arg, DataFrame): return_hook = lambda v: type(arg)(v, index=arg.index, columns=arg.columns) values = arg.values elif isinstance(arg, Series): values = arg.values return_hook = lambda v: Series(v, arg.index) else: return_hook = lambda v: v values = arg if not issubclass(values.dtype.type, float): values = values.astype(float) if kill_inf: values = values.copy() values[np.isinf(values)] = np.NaN return return_hook, values #------------------------------------------------------------------------------ # Exponential moving moments
Example #4
Source File: test_filters.py From elasticintel with GNU General Public License v3.0 | 6 votes |
def test_filter_multiple_timestamp(self): # GH 10114 df = DataFrame({'A': np.arange(5, dtype='int64'), 'B': ['foo', 'bar', 'foo', 'bar', 'bar'], 'C': Timestamp('20130101')}) grouped = df.groupby(['B', 'C']) result = grouped['A'].filter(lambda x: True) assert_series_equal(df['A'], result) result = grouped['A'].transform(len) expected = Series([2, 3, 2, 3, 3], name='A') assert_series_equal(result, expected) result = grouped.filter(lambda x: True) assert_frame_equal(df, result) result = grouped.transform('sum') expected = DataFrame({'A': [2, 8, 2, 8, 8]}) assert_frame_equal(result, expected) result = grouped.transform(len) expected = DataFrame({'A': [2, 3, 2, 3, 3]}) assert_frame_equal(result, expected)
Example #5
Source File: test_filters.py From elasticintel with GNU General Public License v3.0 | 6 votes |
def test_filter_bad_shapes(self): df = DataFrame({'A': np.arange(8), 'B': list('aabbbbcc'), 'C': np.arange(8)}) s = df['B'] g_df = df.groupby('B') g_s = s.groupby(s) f = lambda x: x pytest.raises(TypeError, lambda: g_df.filter(f)) pytest.raises(TypeError, lambda: g_s.filter(f)) f = lambda x: x == 1 pytest.raises(TypeError, lambda: g_df.filter(f)) pytest.raises(TypeError, lambda: g_s.filter(f)) f = lambda x: np.outer(x, x) pytest.raises(TypeError, lambda: g_df.filter(f)) pytest.raises(TypeError, lambda: g_s.filter(f))
Example #6
Source File: moments.py From Computable with MIT License | 6 votes |
def _center_window(rs, window, axis): if axis > rs.ndim-1: raise ValueError("Requested axis is larger then no. of argument dimensions") offset = int((window - 1) / 2.) if isinstance(rs, (Series, DataFrame, Panel)): rs = rs.shift(-offset, axis=axis) else: rs_indexer = [slice(None)] * rs.ndim rs_indexer[axis] = slice(None, -offset) lead_indexer = [slice(None)] * rs.ndim lead_indexer[axis] = slice(offset, None) na_indexer = [slice(None)] * rs.ndim na_indexer[axis] = slice(-offset, None) rs[tuple(rs_indexer)] = np.copy(rs[tuple(lead_indexer)]) rs[tuple(na_indexer)] = np.nan return rs
Example #7
Source File: moments.py From Computable with MIT License | 6 votes |
def expanding_count(arg, freq=None, center=False, time_rule=None): """ Expanding count of number of non-NaN observations. Parameters ---------- arg : DataFrame or numpy ndarray-like freq : None or string alias / date offset object, default=None Frequency to conform to before computing statistic center : boolean, default False Whether the label should correspond with center of window time_rule : Legacy alias for freq Returns ------- expanding_count : type of caller """ return rolling_count(arg, len(arg), freq=freq, center=center, time_rule=time_rule)
Example #8
Source File: moments.py From Computable with MIT License | 6 votes |
def expanding_quantile(arg, quantile, min_periods=1, freq=None, center=False, time_rule=None): """Expanding quantile Parameters ---------- arg : Series, DataFrame quantile : 0 <= quantile <= 1 min_periods : int Minimum number of observations in window required to have a value freq : None or string alias / date offset object, default=None Frequency to conform to before computing statistic center : boolean, default False Whether the label should correspond with center of window time_rule : Legacy alias for freq Returns ------- y : type of input argument """ return rolling_quantile(arg, len(arg), quantile, min_periods=min_periods, freq=freq, center=center, time_rule=time_rule)
Example #9
Source File: moments.py From Computable with MIT License | 6 votes |
def expanding_corr_pairwise(df, min_periods=1): """ Computes pairwise expanding correlation matrices as Panel whose items are dates Parameters ---------- df : DataFrame min_periods : int, default 1 Returns ------- correls : Panel """ window = len(df) return rolling_corr_pairwise(df, window, min_periods=min_periods)
Example #10
Source File: moments.py From Computable with MIT License | 6 votes |
def expanding_apply(arg, func, min_periods=1, freq=None, center=False, time_rule=None): """Generic expanding function application Parameters ---------- arg : Series, DataFrame func : function Must produce a single value from an ndarray input min_periods : int Minimum number of observations in window required to have a value freq : None or string alias / date offset object, default=None Frequency to conform to before computing statistic center : boolean, default False Whether the label should correspond with center of window time_rule : Legacy alias for freq Returns ------- y : type of input argument """ window = len(arg) return rolling_apply(arg, window, func, min_periods=min_periods, freq=freq, center=center, time_rule=time_rule)
Example #11
Source File: test_filters.py From elasticintel with GNU General Public License v3.0 | 6 votes |
def test_filter_single_column_df(self): df = pd.DataFrame([1, 3, 20, 5, 22, 24, 7]) expected_odd = pd.DataFrame([1, 3, 5, 7], index=[0, 1, 3, 6]) expected_even = pd.DataFrame([20, 22, 24], index=[2, 4, 5]) grouper = df[0].apply(lambda x: x % 2) grouped = df.groupby(grouper) assert_frame_equal( grouped.filter(lambda x: x.mean() < 10), expected_odd) assert_frame_equal( grouped.filter(lambda x: x.mean() > 10), expected_even) # Test dropna=False. assert_frame_equal( grouped.filter(lambda x: x.mean() < 10, dropna=False), expected_odd.reindex(df.index)) assert_frame_equal( grouped.filter(lambda x: x.mean() > 10, dropna=False), expected_even.reindex(df.index))
Example #12
Source File: moments.py From elasticintel with GNU General Public License v3.0 | 6 votes |
def expanding_count(arg, freq=None): """ Expanding count of number of non-NaN observations. Parameters ---------- arg : DataFrame or numpy ndarray-like freq : string or DateOffset object, optional (default None) Frequency to conform the data to before computing the statistic. Specified as a frequency string or DateOffset object. Returns ------- expanding_count : type of caller Notes ----- The `freq` keyword is used to conform time series data to a specified frequency by resampling the data. This is done with the default parameters of :meth:`~pandas.Series.resample` (i.e. using the `mean`). To learn more about the frequency strings, please see `this link <http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__. """ return ensure_compat('expanding', 'count', arg, freq=freq)
Example #13
Source File: pandas_groupby_tests.py From sparklingpandas with Apache License 2.0 | 6 votes |
def test_agg_api(self): # Note: needs a very recent version of pandas to pass # TODO(holden): Pass this test if local fails # GH 6337 # http://stackoverflow.com/questions/21706030/pandas-groupby-agg-function-column-dtype-error # different api for agg when passed custom function with mixed frame pd_df = DataFrame({'data1': np.random.randn(5), 'data2': np.random.randn(5), 'key1': ['a', 'a', 'b', 'b', 'a'], 'key2': ['one', 'two', 'one', 'two', 'one']}) ddf = self.psc.from_pd_data_frame(pd_df) dgrouped = ddf.groupby('key1') grouped = pd_df.groupby('key1') def peak_to_peak(arr): return arr.max() - arr.min() expected = grouped.agg([peak_to_peak]) expected.columns = ['data1', 'data2'] result = dgrouped.agg(peak_to_peak).collect() assert_frame_equal(result, expected)
Example #14
Source File: test_filters.py From elasticintel with GNU General Public License v3.0 | 5 votes |
def test_filter_with_axis_in_groupby(self): # issue 11041 index = pd.MultiIndex.from_product([range(10), [0, 1]]) data = pd.DataFrame( np.arange(100).reshape(-1, 20), columns=index, dtype='int64') result = data.groupby(level=0, axis=1).filter(lambda x: x.iloc[0, 0] > 10) expected = data.iloc[:, 12:20] assert_frame_equal(result, expected)
Example #15
Source File: test_expressions.py From elasticintel with GNU General Public License v3.0 | 5 votes |
def test_boolean_ops(self): def testit(): for f, f2 in [(self.frame, self.frame2), (self.mixed, self.mixed2)]: f11 = f f12 = f + 1 f21 = f2 f22 = f2 + 1 for op, op_str in [('gt', '>'), ('lt', '<'), ('ge', '>='), ('le', '<='), ('eq', '=='), ('ne', '!=')]: op = getattr(operator, op) result = expr._can_use_numexpr(op, op_str, f11, f12, 'evaluate') assert result != f11._is_mixed_type result = expr.evaluate(op, op_str, f11, f12, use_numexpr=True) expected = expr.evaluate(op, op_str, f11, f12, use_numexpr=False) if isinstance(result, DataFrame): tm.assert_frame_equal(result, expected) else: tm.assert_numpy_array_equal(result, expected.values) result = expr._can_use_numexpr(op, op_str, f21, f22, 'evaluate') assert not result expr.set_use_numexpr(False) testit() expr.set_use_numexpr(True) expr.set_numexpr_threads(1) testit() expr.set_numexpr_threads() testit()
Example #16
Source File: test_expressions.py From elasticintel with GNU General Public License v3.0 | 5 votes |
def test_bool_ops_raise_on_arithmetic(self): df = DataFrame({'a': np.random.rand(10) > 0.5, 'b': np.random.rand(10) > 0.5}) names = 'div', 'truediv', 'floordiv', 'pow' ops = '/', '/', '//', '**' msg = 'operator %r not implemented for bool dtypes' for op, name in zip(ops, names): if not compat.PY3 or name != 'div': f = getattr(operator, name) err_msg = re.escape(msg % op) with tm.assert_raises_regex(NotImplementedError, err_msg): f(df, df) with tm.assert_raises_regex(NotImplementedError, err_msg): f(df.a, df.b) with tm.assert_raises_regex(NotImplementedError, err_msg): f(df.a, True) with tm.assert_raises_regex(NotImplementedError, err_msg): f(False, df.a) with tm.assert_raises_regex(TypeError, err_msg): f(False, df) with tm.assert_raises_regex(TypeError, err_msg): f(df, True)
Example #17
Source File: test_filters.py From elasticintel with GNU General Public License v3.0 | 5 votes |
def setup_method(self, method): self.ts = tm.makeTimeSeries() self.seriesd = tm.getSeriesData() self.tsd = tm.getTimeSeriesData() self.frame = DataFrame(self.seriesd) self.tsframe = DataFrame(self.tsd) self.df = DataFrame( {'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'], 'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'], 'C': np.random.randn(8), 'D': np.random.randn(8)}) self.df_mixed_floats = DataFrame( {'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'], 'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'], 'C': np.random.randn(8), 'D': np.array( np.random.randn(8), dtype='float32')}) index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', 'three']], labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], names=['first', 'second']) self.mframe = DataFrame(np.random.randn(10, 3), index=index, columns=['A', 'B', 'C']) self.three_group = DataFrame( {'A': ['foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar', 'foo', 'foo', 'foo'], 'B': ['one', 'one', 'one', 'two', 'one', 'one', 'one', 'two', 'two', 'two', 'one'], 'C': ['dull', 'dull', 'shiny', 'dull', 'dull', 'shiny', 'shiny', 'dull', 'shiny', 'shiny', 'shiny'], 'D': np.random.randn(11), 'E': np.random.randn(11), 'F': np.random.randn(11)})
Example #18
Source File: test_filters.py From elasticintel with GNU General Public License v3.0 | 5 votes |
def test_filter_nan_is_false(self): df = DataFrame({'A': np.arange(8), 'B': list('aabbbbcc'), 'C': np.arange(8)}) s = df['B'] g_df = df.groupby(df['B']) g_s = s.groupby(s) f = lambda x: np.nan assert_frame_equal(g_df.filter(f), df.loc[[]]) assert_series_equal(g_s.filter(f), s[[]])
Example #19
Source File: test_indexing_slow.py From elasticintel with GNU General Public License v3.0 | 5 votes |
def test_large_dataframe_indexing(self): # GH10692 result = DataFrame({'x': range(10 ** 6)}, dtype='int64') result.loc[len(result)] = len(result) + 1 expected = DataFrame({'x': range(10 ** 6 + 1)}, dtype='int64') tm.assert_frame_equal(result, expected)
Example #20
Source File: moments.py From elasticintel with GNU General Public License v3.0 | 5 votes |
def expanding_quantile(arg, quantile, min_periods=1, freq=None): """Expanding quantile. Parameters ---------- arg : Series, DataFrame quantile : float 0 <= quantile <= 1 min_periods : int, default None Minimum number of observations in window required to have a value (otherwise result is NA). freq : string or DateOffset object, optional (default None) Frequency to conform the data to before computing the statistic. Specified as a frequency string or DateOffset object. Returns ------- y : type of input argument Notes ----- The `freq` keyword is used to conform time series data to a specified frequency by resampling the data. This is done with the default parameters of :meth:`~pandas.Series.resample` (i.e. using the `mean`). To learn more about the frequency strings, please see `this link <http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__. """ return ensure_compat('expanding', 'quantile', arg, freq=freq, min_periods=min_periods, func_kw=['quantile'], quantile=quantile)
Example #21
Source File: test_indexing_slow.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_large_dataframe_indexing(self): # GH10692 result = DataFrame({'x': range(10 ** 6)}, dtype='int64') result.loc[len(result)] = len(result) + 1 expected = DataFrame({'x': range(10 ** 6 + 1)}, dtype='int64') tm.assert_frame_equal(result, expected)
Example #22
Source File: moments.py From elasticintel with GNU General Public License v3.0 | 5 votes |
def rolling_count(arg, window, **kwargs): """ Rolling count of number of non-NaN observations inside provided window. Parameters ---------- arg : DataFrame or numpy ndarray-like window : int Size of the moving window. This is the number of observations used for calculating the statistic. freq : string or DateOffset object, optional (default None) Frequency to conform the data to before computing the statistic. Specified as a frequency string or DateOffset object. center : boolean, default False Whether the label should correspond with center of window how : string, default 'mean' Method for down- or re-sampling Returns ------- rolling_count : type of caller Notes ----- The `freq` keyword is used to conform time series data to a specified frequency by resampling the data. This is done with the default parameters of :meth:`~pandas.Series.resample` (i.e. using the `mean`). To learn more about the frequency strings, please see `this link <http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__. """ return ensure_compat('rolling', 'count', arg, window=window, **kwargs)
Example #23
Source File: dataload_tests.py From sparklingpandas with Apache License 2.0 | 5 votes |
def test_load_from_data_frame(self): df = DataFrame({'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'], 'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'], 'C': np.random.randn(8), 'D': np.random.randn(8)}) ddf = self.psc.from_pd_data_frame(df) ddfc = ddf.collect() assert_frame_equal(ddfc, df)
Example #24
Source File: dataload_tests.py From sparklingpandas with Apache License 2.0 | 5 votes |
def test_from_tuples(self): """ Test loading the data from a python tuples. """ input = [("tea", "happy"), ("water", "sad"), ("coffee", "happiest")] pframe = self.psc.DataFrame(input, columns=['magic', 'thing']) data = pframe.collect().sort(['magic']) expected = pandas.DataFrame(input, columns=['magic', 'thing']).sort( ['magic']) assert_frame_equal(data, expected)
Example #25
Source File: test_expressions.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_boolean_ops(self): def testit(): for f, f2 in [(self.frame, self.frame2), (self.mixed, self.mixed2)]: f11 = f f12 = f + 1 f21 = f2 f22 = f2 + 1 for op, op_str in [('gt', '>'), ('lt', '<'), ('ge', '>='), ('le', '<='), ('eq', '=='), ('ne', '!=')]: op = getattr(operator, op) result = expr._can_use_numexpr(op, op_str, f11, f12, 'evaluate') assert result != f11._is_mixed_type result = expr.evaluate(op, op_str, f11, f12, use_numexpr=True) expected = expr.evaluate(op, op_str, f11, f12, use_numexpr=False) if isinstance(result, DataFrame): tm.assert_frame_equal(result, expected) else: tm.assert_numpy_array_equal(result, expected.values) result = expr._can_use_numexpr(op, op_str, f21, f22, 'evaluate') assert not result expr.set_use_numexpr(False) testit() expr.set_use_numexpr(True) expr.set_numexpr_threads(1) testit() expr.set_numexpr_threads() testit()
Example #26
Source File: pandas_groupby_tests.py From sparklingpandas with Apache License 2.0 | 5 votes |
def test_var_on_multiplegroups(self): pd_df = DataFrame({'data1': np.random.randn(5), 'data2': np.random.randn(5), 'data3': np.random.randn(5), 'key1': ['a', 'a', 'b', 'b', 'a'], 'key2': ['one', 'two', 'one', 'two', 'one']}) sp_df = self.psc.from_pd_data_frame(pd_df) actual_grouped = sp_df.groupby(['key1', 'key2']) expected_grouped = pd_df.groupby(['key1', 'key2']) assert_frame_equal(actual_grouped.var().collect(), expected_grouped.var())
Example #27
Source File: pandas_groupby_tests.py From sparklingpandas with Apache License 2.0 | 5 votes |
def test_new_in0140(self): """ Test new functionality in 0.14.0. This currently doesn't work. """ # v0.14.0 whatsnew input_df = DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=['A', 'B']) sp_df = self.psc.from_pd_data_frame(input_df) grouped_sp_df = sp_df.groupby('A') result = grouped_sp_df.first().collect() expected = input_df.iloc[[1, 2]].set_index('A') assert_frame_equal(result, expected) expected = input_df.iloc[[1, 2]].set_index('A') result = grouped_sp_df.nth(0, dropna='any').collect() assert_frame_equal(result, expected)
Example #28
Source File: moments.py From Splunking-Crime with GNU Affero General Public License v3.0 | 5 votes |
def expanding_quantile(arg, quantile, min_periods=1, freq=None): """Expanding quantile. Parameters ---------- arg : Series, DataFrame quantile : float 0 <= quantile <= 1 min_periods : int, default None Minimum number of observations in window required to have a value (otherwise result is NA). freq : string or DateOffset object, optional (default None) Frequency to conform the data to before computing the statistic. Specified as a frequency string or DateOffset object. Returns ------- y : type of input argument Notes ----- The `freq` keyword is used to conform time series data to a specified frequency by resampling the data. This is done with the default parameters of :meth:`~pandas.Series.resample` (i.e. using the `mean`). To learn more about the frequency strings, please see `this link <http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__. """ return ensure_compat('expanding', 'quantile', arg, freq=freq, min_periods=min_periods, func_kw=['quantile'], quantile=quantile)
Example #29
Source File: test_expressions.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_bool_ops_raise_on_arithmetic(self): df = DataFrame({'a': np.random.rand(10) > 0.5, 'b': np.random.rand(10) > 0.5}) names = 'div', 'truediv', 'floordiv', 'pow' ops = '/', '/', '//', '**' msg = 'operator %r not implemented for bool dtypes' for op, name in zip(ops, names): if not compat.PY3 or name != 'div': f = getattr(operator, name) err_msg = re.escape(msg % op) with tm.assert_raises_regex(NotImplementedError, err_msg): f(df, df) with tm.assert_raises_regex(NotImplementedError, err_msg): f(df.a, df.b) with tm.assert_raises_regex(NotImplementedError, err_msg): f(df.a, True) with tm.assert_raises_regex(NotImplementedError, err_msg): f(False, df.a) with tm.assert_raises_regex(TypeError, err_msg): f(False, df) with tm.assert_raises_regex(TypeError, err_msg): f(df, True)
Example #30
Source File: test_filters.py From elasticintel with GNU General Public License v3.0 | 5 votes |
def test_filter_non_bool_raises(self): df = pd.DataFrame([ ['best', 'a', 1], ['worst', 'b', 1], ['best', 'c', 1], ['best', 'd', 1], ['worst', 'd', 1], ['worst', 'd', 1], ['best', 'd', 1], ], columns=['a', 'b', 'c']) with tm.assert_raises_regex(TypeError, 'filter function returned a.*'): df.groupby('a').filter(lambda g: g.c.mean())