Python pandas.cut() Examples
The following are 30
code examples of pandas.cut().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
pandas
, or try the search function
.
Example #1
Source File: timeplots.py From NanoPlot with GNU General Public License v3.0 | 7 votes |
def quality_over_time(dfs, path, figformat, title, plot_settings={}): time_qual = Plot(path=path + "TimeQualityViolinPlot." + figformat, title="Violin plot of quality over time") sns.set(style="white", **plot_settings) ax = sns.violinplot(x="timebin", y="quals", data=dfs, inner=None, cut=0, linewidth=0) ax.set(xlabel='Interval (hours)', ylabel="Basecall quality", title=title or time_qual.title) plt.xticks(rotation=45, ha='center', fontsize=8) time_qual.fig = ax.get_figure() time_qual.save(format=figformat) plt.close("all") return time_qual
Example #2
Source File: timeplots.py From NanoPlot with GNU General Public License v3.0 | 7 votes |
def sequencing_speed_over_time(dfs, path, figformat, title, plot_settings={}): time_duration = Plot(path=path + "TimeSequencingSpeed_ViolinPlot." + figformat, title="Violin plot of sequencing speed over time") sns.set(style="white", **plot_settings) if "timebin" not in dfs: dfs['timebin'] = add_time_bins(dfs) mask = dfs['duration'] != 0 ax = sns.violinplot(x=dfs.loc[mask, "timebin"], y=dfs.loc[mask, "lengths"] / dfs.loc[mask, "duration"], inner=None, cut=0, linewidth=0) ax.set(xlabel='Interval (hours)', ylabel="Sequencing speed (nucleotides/second)", title=title or time_duration.title) plt.xticks(rotation=45, ha='center', fontsize=8) time_duration.fig = ax.get_figure() time_duration.save(format=figformat) plt.close("all") return time_duration
Example #3
Source File: monotonic_woe_binning.py From Monotonic-WOE-Binning-Algorithm with MIT License | 6 votes |
def generate_final_dataset(self): if self.sign == False: shift_var = 1 self.bucket = True else: shift_var = -1 self.bucket = False self.woe_summary[self.column + "_shift"] = self.woe_summary[self.column].shift(shift_var) if self.sign == False: self.woe_summary.loc[0, self.column + "_shift"] = -np.inf self.bins = np.sort(list(self.woe_summary[self.column]) + [np.Inf,-np.Inf]) else: self.woe_summary.loc[len(self.woe_summary) - 1, self.column + "_shift"] = np.inf self.bins = np.sort(list(self.woe_summary[self.column]) + [np.Inf,-np.Inf]) self.woe_summary["labels"] = self.woe_summary.apply(self.generate_bin_labels, axis=1) self.dataset["bins"] = pd.cut(self.dataset[self.column], self.bins, right=self.bucket, precision=0) self.dataset["bins"] = self.dataset["bins"].astype(str) self.dataset['bins'] = self.dataset['bins'].map(lambda x: x.lstrip('[').rstrip(')'))
Example #4
Source File: test_excel.py From recruit with Apache License 2.0 | 6 votes |
def test_to_excel_interval_no_labels(self, *_): # see gh-19242 # # Test writing Interval without labels. frame = DataFrame(np.random.randint(-10, 10, size=(20, 1)), dtype=np.int64) expected = frame.copy() frame["new"] = pd.cut(frame[0], 10) expected["new"] = pd.cut(expected[0], 10).astype(str) frame.to_excel(self.path, "test1") reader = ExcelFile(self.path) recons = read_excel(reader, "test1", index_col=0) tm.assert_frame_equal(expected, recons)
Example #5
Source File: test_categorical.py From recruit with Apache License 2.0 | 6 votes |
def test_sort(): # http://stackoverflow.com/questions/23814368/sorting-pandas-categorical-labels-after-groupby # noqa: flake8 # This should result in a properly sorted Series so that the plot # has a sorted x axis # self.cat.groupby(['value_group'])['value_group'].count().plot(kind='bar') df = DataFrame({'value': np.random.randint(0, 10000, 100)}) labels = ["{0} - {1}".format(i, i + 499) for i in range(0, 10000, 500)] cat_labels = Categorical(labels, labels) df = df.sort_values(by=['value'], ascending=True) df['value_group'] = pd.cut(df.value, range(0, 10500, 500), right=False, labels=cat_labels) res = df.groupby(['value_group'], observed=False)['value_group'].count() exp = res[sorted(res.index, key=lambda x: float(x.split()[0]))] exp.index = CategoricalIndex(exp.index, name=exp.index.name) tm.assert_series_equal(res, exp)
Example #6
Source File: stat_sina.py From plotnine with GNU General Public License v2.0 | 6 votes |
def setup_params(self, data): params = self.params.copy() random_state = params['random_state'] if params['maxwidth'] is None: params['maxwidth'] = resolution(data['x'], False) * 0.9 if params['binwidth'] is None and self.params['bins'] is None: params['bins'] = 50 if random_state is None: params['random_state'] = np.random elif isinstance(random_state, int): params['random_state'] = np.random.RandomState(random_state) # Required by compute_density params['kernel'] = 'gau' # It has to be a gaussian kernel params['cut'] = 0 params['gridsize'] = None params['clip'] = (-np.inf, np.inf) params['n'] = 512 return params
Example #7
Source File: test_sorting.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 6 votes |
def test_sort_index_intervalindex(self): # this is a de-facto sort via unstack # confirming that we sort in the order of the bins y = Series(np.random.randn(100)) x1 = Series(np.sign(np.random.randn(100))) x2 = pd.cut(Series(np.random.randn(100)), bins=[-3, -0.5, 0, 0.5, 3]) model = pd.concat([y, x1, x2], axis=1, keys=['Y', 'X1', 'X2']) result = model.groupby(['X1', 'X2'], observed=True).mean().unstack() expected = IntervalIndex.from_tuples( [(-3.0, -0.5), (-0.5, 0.0), (0.0, 0.5), (0.5, 3.0)], closed='right') result = result.columns.levels[1].categories tm.assert_index_equal(result, expected)
Example #8
Source File: test_categorical.py From recruit with Apache License 2.0 | 6 votes |
def test_observed_codes_remap(observed): d = {'C1': [3, 3, 4, 5], 'C2': [1, 2, 3, 4], 'C3': [10, 100, 200, 34]} df = pd.DataFrame(d) values = pd.cut(df['C1'], [1, 2, 3, 6]) values.name = "cat" groups_double_key = df.groupby([values, 'C2'], observed=observed) idx = MultiIndex.from_arrays([values, [1, 2, 3, 4]], names=["cat", "C2"]) expected = DataFrame({"C1": [3, 3, 4, 5], "C3": [10, 100, 200, 34]}, index=idx) if not observed: expected = cartesian_product_for_groupers( expected, [values.values, [1, 2, 3, 4]], ['cat', 'C2']) result = groups_double_key.agg('mean') tm.assert_frame_equal(result, expected)
Example #9
Source File: test_excel.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 6 votes |
def test_to_excel_interval_no_labels(self, *_): # see gh-19242 # # Test writing Interval without labels. frame = DataFrame(np.random.randint(-10, 10, size=(20, 1)), dtype=np.int64) expected = frame.copy() frame["new"] = pd.cut(frame[0], 10) expected["new"] = pd.cut(expected[0], 10).astype(str) frame.to_excel(self.path, "test1") reader = ExcelFile(self.path) recons = read_excel(reader, "test1", index_col=0) tm.assert_frame_equal(expected, recons)
Example #10
Source File: test_excel.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 6 votes |
def test_to_excel_interval_labels(self, *_): # see gh-19242 # # Test writing Interval with labels. frame = DataFrame(np.random.randint(-10, 10, size=(20, 1)), dtype=np.int64) expected = frame.copy() intervals = pd.cut(frame[0], 10, labels=["A", "B", "C", "D", "E", "F", "G", "H", "I", "J"]) frame["new"] = intervals expected["new"] = pd.Series(list(intervals)) frame.to_excel(self.path, "test1") reader = ExcelFile(self.path) recons = read_excel(reader, "test1", index_col=0) tm.assert_frame_equal(expected, recons)
Example #11
Source File: test_categorical.py From recruit with Apache License 2.0 | 6 votes |
def test_slicing(self): cat = Series(Categorical([1, 2, 3, 4])) reversed = cat[::-1] exp = np.array([4, 3, 2, 1], dtype=np.int64) tm.assert_numpy_array_equal(reversed.__array__(), exp) df = DataFrame({'value': (np.arange(100) + 1).astype('int64')}) df['D'] = pd.cut(df.value, bins=[0, 25, 50, 75, 100]) expected = Series([11, Interval(0, 25)], index=['value', 'D'], name=10) result = df.iloc[10] tm.assert_series_equal(result, expected) expected = DataFrame({'value': np.arange(11, 21).astype('int64')}, index=np.arange(10, 20).astype('int64')) expected['D'] = pd.cut(expected.value, bins=[0, 25, 50, 75, 100]) result = df.iloc[10:20] tm.assert_frame_equal(result, expected) expected = Series([9, Interval(0, 25)], index=['value', 'D'], name=8) result = df.loc[8] tm.assert_series_equal(result, expected)
Example #12
Source File: test_sorting.py From recruit with Apache License 2.0 | 6 votes |
def test_sort_index_intervalindex(self): # this is a de-facto sort via unstack # confirming that we sort in the order of the bins y = Series(np.random.randn(100)) x1 = Series(np.sign(np.random.randn(100))) x2 = pd.cut(Series(np.random.randn(100)), bins=[-3, -0.5, 0, 0.5, 3]) model = pd.concat([y, x1, x2], axis=1, keys=['Y', 'X1', 'X2']) result = model.groupby(['X1', 'X2'], observed=True).mean().unstack() expected = IntervalIndex.from_tuples( [(-3.0, -0.5), (-0.5, 0.0), (0.0, 0.5), (0.5, 3.0)], closed='right') result = result.columns.levels[1].categories tm.assert_index_equal(result, expected)
Example #13
Source File: test_excel.py From recruit with Apache License 2.0 | 6 votes |
def test_to_excel_interval_labels(self, *_): # see gh-19242 # # Test writing Interval with labels. frame = DataFrame(np.random.randint(-10, 10, size=(20, 1)), dtype=np.int64) expected = frame.copy() intervals = pd.cut(frame[0], 10, labels=["A", "B", "C", "D", "E", "F", "G", "H", "I", "J"]) frame["new"] = intervals expected["new"] = pd.Series(list(intervals)) frame.to_excel(self.path, "test1") reader = ExcelFile(self.path) recons = read_excel(reader, "test1", index_col=0) tm.assert_frame_equal(expected, recons)
Example #14
Source File: test_functions.py From ibis with Apache License 2.0 | 6 votes |
def test_category_label(alltypes, df): t = alltypes d = t.double_col bins = [0, 10, 25, 50, 100] labels = ['a', 'b', 'c', 'd'] bucket = d.bucket(bins) expr = bucket.label(labels) result = expr.execute() with warnings.catch_warnings(): warnings.simplefilter('ignore') result = pd.Series(pd.Categorical(result, ordered=True)) result.name = 'double_col' expected = pd.cut(df.double_col, bins, labels=labels, right=False) tm.assert_series_equal(result, expected)
Example #15
Source File: test_categorical.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 6 votes |
def test_sort(): # http://stackoverflow.com/questions/23814368/sorting-pandas-categorical-labels-after-groupby # noqa: flake8 # This should result in a properly sorted Series so that the plot # has a sorted x axis # self.cat.groupby(['value_group'])['value_group'].count().plot(kind='bar') df = DataFrame({'value': np.random.randint(0, 10000, 100)}) labels = ["{0} - {1}".format(i, i + 499) for i in range(0, 10000, 500)] cat_labels = Categorical(labels, labels) df = df.sort_values(by=['value'], ascending=True) df['value_group'] = pd.cut(df.value, range(0, 10500, 500), right=False, labels=cat_labels) res = df.groupby(['value_group'], observed=False)['value_group'].count() exp = res[sorted(res.index, key=lambda x: float(x.split()[0]))] exp.index = CategoricalIndex(exp.index, name=exp.index.name) tm.assert_series_equal(res, exp)
Example #16
Source File: monotonic_woe_binning.py From Monotonic-WOE-Binning-Algorithm with MIT License | 6 votes |
def generate_final_dataset(self): if self.sign == False: shift_var = 1 self.bucket = True else: shift_var = -1 self.bucket = False self.woe_summary[self.column + "_shift"] = self.woe_summary[self.column].shift(shift_var) if self.sign == False: self.woe_summary.loc[0, self.column + "_shift"] = -np.inf self.bins = np.sort(list(self.woe_summary[self.column]) + [np.Inf,-np.Inf]) else: self.woe_summary.loc[len(self.woe_summary) - 1, self.column + "_shift"] = np.inf self.bins = np.sort(list(self.woe_summary[self.column]) + [np.Inf,-np.Inf]) self.woe_summary["labels"] = self.woe_summary.apply(self.generate_bin_labels, axis=1) self.dataset["bins"] = pd.cut(self.dataset[self.column], self.bins, right=self.bucket, precision=0) self.dataset["bins"] = self.dataset["bins"].astype(str) self.dataset['bins'] = self.dataset['bins'].map(lambda x: x.lstrip('[').rstrip(')'))
Example #17
Source File: test_categorical.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 6 votes |
def test_observed_codes_remap(observed): d = {'C1': [3, 3, 4, 5], 'C2': [1, 2, 3, 4], 'C3': [10, 100, 200, 34]} df = pd.DataFrame(d) values = pd.cut(df['C1'], [1, 2, 3, 6]) values.name = "cat" groups_double_key = df.groupby([values, 'C2'], observed=observed) idx = MultiIndex.from_arrays([values, [1, 2, 3, 4]], names=["cat", "C2"]) expected = DataFrame({"C1": [3, 3, 4, 5], "C3": [10, 100, 200, 34]}, index=idx) if not observed: expected = cartesian_product_for_groupers( expected, [values.values, [1, 2, 3, 4]], ['cat', 'C2']) result = groups_double_key.agg('mean') tm.assert_frame_equal(result, expected)
Example #18
Source File: value_counts.py From mars with Apache License 2.0 | 6 votes |
def __call__(self, inp): test_series = build_series(inp).value_counts(normalize=self.normalize) if self._bins is not None: from .cut import cut # cut try: inp = cut(inp, self._bins, include_lowest=True) except TypeError: # pragma: no cover raise TypeError("bins argument only works with numeric data.") self._bins = None self._convert_index_to_interval = True return self.new_series([inp], shape=(np.nan,), index_value=parse_index(pd.CategoricalIndex([]), inp, store_data=False), name=inp.name, dtype=test_series.dtype) else: return self.new_series([inp], shape=(np.nan,), index_value=parse_index(test_series.index, store_data=False), name=inp.name, dtype=test_series.dtype)
Example #19
Source File: value_counts.py From mars with Apache License 2.0 | 6 votes |
def execute(cls, ctx, op: "DataFrameValueCounts"): if op.stage != OperandStage.map: if op.convert_index_to_interval: data = ctx[op.input.key] result = data.value_counts( normalize=False, sort=op.sort, ascending=op.ascending, bins=op.bins, dropna=op.dropna) if op.normalize: result /= data.shape[0] else: result = ctx[op.input.key].value_counts( normalize=op.normalize, sort=op.sort, ascending=op.ascending, bins=op.bins, dropna=op.dropna) else: result = ctx[op.input.key] if op.convert_index_to_interval: # convert CategoricalDtype which generated in `cut` # to IntervalDtype result.index = result.index.astype('interval') ctx[op.outputs[0].key] = result
Example #20
Source File: cut.py From mars with Apache License 2.0 | 6 votes |
def execute(cls, ctx, op): x = ctx[op.input.key] bins = ctx[op.bins.key] if isinstance(op.bins, (Base, Entity)) else op.bins labels = ctx[op.labels.key] if isinstance(op.labels, (Base, Entity)) else op.labels cut = partial(pd.cut, right=op.right, retbins=op.retbins, precision=op.precision, include_lowest=op.include_lowest, duplicates=op.duplicates) try: ret = cut(x, bins, labels=labels) except ValueError: # fail due to buffer source array is read-only ret = cut(x.copy(), bins, labels=labels) if op.retbins: # pragma: no cover ctx[op.outputs[0].key] = ret[0] ctx[op.outputs[1].key] = ret[1] else: ctx[op.outputs[0].key] = ret
Example #21
Source File: model_train.py From 4thdownbot-model with MIT License | 6 votes |
def calibration_plot(preds, truth): """Produces a calibration plot for the win probability model. Splits the predictions into percentiles and calculates the percentage of predictions per percentile that were wins. A perfectly calibrated model means that plays with a win probability of n% win about n% of the time. """ cal_df = pd.DataFrame({'pred': preds, 'win': truth}) cal_df['pred_bin'] = pd.cut(cal_df.pred, 100, labels=False) win_means = cal_df.groupby('pred_bin')['win'].mean() plt.figure() plt.plot(win_means.index.values, [100 * v for v in win_means.values], color='SteelBlue') plt.plot(np.arange(0, 100), np.arange(0, 100), 'k--', alpha=0.3) plt.xlim([0.0, 100]) plt.ylim([0.0, 100]) plt.xlabel('Estimated win probability') plt.ylabel('True win percentage') plt.title('Win probability calibration, binned by percent') plt.show() return
Example #22
Source File: test_categorical.py From vnpy_crypto with MIT License | 6 votes |
def test_slicing(self): cat = Series(Categorical([1, 2, 3, 4])) reversed = cat[::-1] exp = np.array([4, 3, 2, 1], dtype=np.int64) tm.assert_numpy_array_equal(reversed.__array__(), exp) df = DataFrame({'value': (np.arange(100) + 1).astype('int64')}) df['D'] = pd.cut(df.value, bins=[0, 25, 50, 75, 100]) expected = Series([11, Interval(0, 25)], index=['value', 'D'], name=10) result = df.iloc[10] tm.assert_series_equal(result, expected) expected = DataFrame({'value': np.arange(11, 21).astype('int64')}, index=np.arange(10, 20).astype('int64')) expected['D'] = pd.cut(expected.value, bins=[0, 25, 50, 75, 100]) result = df.iloc[10:20] tm.assert_frame_equal(result, expected) expected = Series([9, Interval(0, 25)], index=['value', 'D'], name=8) result = df.loc[8] tm.assert_series_equal(result, expected)
Example #23
Source File: test_categorical.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 6 votes |
def test_slicing(self): cat = Series(Categorical([1, 2, 3, 4])) reversed = cat[::-1] exp = np.array([4, 3, 2, 1], dtype=np.int64) tm.assert_numpy_array_equal(reversed.__array__(), exp) df = DataFrame({'value': (np.arange(100) + 1).astype('int64')}) df['D'] = pd.cut(df.value, bins=[0, 25, 50, 75, 100]) expected = Series([11, Interval(0, 25)], index=['value', 'D'], name=10) result = df.iloc[10] tm.assert_series_equal(result, expected) expected = DataFrame({'value': np.arange(11, 21).astype('int64')}, index=np.arange(10, 20).astype('int64')) expected['D'] = pd.cut(expected.value, bins=[0, 25, 50, 75, 100]) result = df.iloc[10:20] tm.assert_frame_equal(result, expected) expected = Series([9, Interval(0, 25)], index=['value', 'D'], name=8) result = df.loc[8] tm.assert_series_equal(result, expected)
Example #24
Source File: test_categorical.py From vnpy_crypto with MIT License | 6 votes |
def test_observed_codes_remap(observed): d = {'C1': [3, 3, 4, 5], 'C2': [1, 2, 3, 4], 'C3': [10, 100, 200, 34]} df = pd.DataFrame(d) values = pd.cut(df['C1'], [1, 2, 3, 6]) values.name = "cat" groups_double_key = df.groupby([values, 'C2'], observed=observed) idx = MultiIndex.from_arrays([values, [1, 2, 3, 4]], names=["cat", "C2"]) expected = DataFrame({"C1": [3, 3, 4, 5], "C3": [10, 100, 200, 34]}, index=idx) if not observed: expected = cartesian_product_for_groupers( expected, [values.values, [1, 2, 3, 4]], ['cat', 'C2']) result = groups_double_key.agg('mean') tm.assert_frame_equal(result, expected)
Example #25
Source File: test_categorical.py From vnpy_crypto with MIT License | 6 votes |
def test_sort(): # http://stackoverflow.com/questions/23814368/sorting-pandas-categorical-labels-after-groupby # noqa: flake8 # This should result in a properly sorted Series so that the plot # has a sorted x axis # self.cat.groupby(['value_group'])['value_group'].count().plot(kind='bar') df = DataFrame({'value': np.random.randint(0, 10000, 100)}) labels = ["{0} - {1}".format(i, i + 499) for i in range(0, 10000, 500)] cat_labels = Categorical(labels, labels) df = df.sort_values(by=['value'], ascending=True) df['value_group'] = pd.cut(df.value, range(0, 10500, 500), right=False, labels=cat_labels) res = df.groupby(['value_group'], observed=False)['value_group'].count() exp = res[sorted(res.index, key=lambda x: float(x.split()[0]))] exp.index = CategoricalIndex(exp.index, name=exp.index.name) tm.assert_series_equal(res, exp)
Example #26
Source File: test_sorting.py From vnpy_crypto with MIT License | 6 votes |
def test_sort_index_intervalindex(self): # this is a de-facto sort via unstack # confirming that we sort in the order of the bins y = Series(np.random.randn(100)) x1 = Series(np.sign(np.random.randn(100))) x2 = pd.cut(Series(np.random.randn(100)), bins=[-3, -0.5, 0, 0.5, 3]) model = pd.concat([y, x1, x2], axis=1, keys=['Y', 'X1', 'X2']) result = model.groupby(['X1', 'X2'], observed=True).mean().unstack() expected = IntervalIndex.from_tuples( [(-3.0, -0.5), (-0.5, 0.0), (0.0, 0.5), (0.5, 3.0)], closed='right') result = result.columns.levels[1].categories tm.assert_index_equal(result, expected)
Example #27
Source File: classification_metric.py From FATE with Apache License 2.0 | 5 votes |
def quantile_binning_and_count(scores, quantile_points): """ left edge and right edge of last interval are closed """ assert len(quantile_points) >= 2 left_bounds = copy.deepcopy(quantile_points[:-1]) right_bounds = copy.deepcopy(quantile_points[1:]) last_interval_left = left_bounds.pop() last_interval_right = right_bounds.pop() bin_result_1, bin_result_2 = None, None if len(left_bounds) != 0 and len(right_bounds) != 0: bin_result_1 = pd.cut(scores, pd.IntervalIndex.from_arrays(left_bounds, right_bounds, closed='left')) bin_result_2 = pd.cut(scores, pd.IntervalIndex.from_arrays([last_interval_left], [last_interval_right], closed='both')) count1 = None if bin_result_1 is None else bin_result_1.value_counts().reset_index() count2 = bin_result_2.value_counts().reset_index() rs = pd.concat([count1, count2], axis=0) rs.columns = ['interval', 'count'] return rs
Example #28
Source File: test_indexing.py From vnpy_crypto with MIT License | 5 votes |
def test_functions_no_warnings(self): df = DataFrame({'value': np.random.randint(0, 100, 20)}) labels = ["{0} - {1}".format(i, i + 9) for i in range(0, 100, 10)] with tm.assert_produces_warning(False): df['group'] = pd.cut(df.value, range(0, 105, 10), right=False, labels=labels)
Example #29
Source File: comparison_plot_data_preparation.py From estimagic with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _replace_by_bin_midpoint(values, bins): midpoints = (bins + bins.shift(periods=-1))[:-1] / 2 sr = pd.cut(values, bins, labels=midpoints).astype(float) sr.fillna(midpoints[0], inplace=True) return sr
Example #30
Source File: stat_summary_bin.py From plotnine with GNU General Public License v2.0 | 5 votes |
def compute_group(cls, data, scales, **params): bins = params['bins'] breaks = params['breaks'] binwidth = params['binwidth'] boundary = params['boundary'] func = make_summary_fun(params['fun_data'], params['fun_y'], params['fun_ymin'], params['fun_ymax'], params['fun_args']) breaks = fuzzybreaks(scales.x, breaks, boundary, binwidth, bins) data['bin'] = pd.cut(data['x'], bins=breaks, labels=False, include_lowest=True) def func_wrapper(data): """ Add `bin` column to each summary result. """ result = func(data) result['bin'] = data['bin'].iloc[0] return result # This is a plyr::ddply out = groupby_apply(data, 'bin', func_wrapper) centers = (breaks[:-1] + breaks[1:]) * 0.5 bin_centers = centers[out['bin'].values] out['x'] = bin_centers out['bin'] += 1 if isinstance(scales.x, scale_discrete): out['width'] = 0.9 else: out['width'] = np.diff(breaks)[bins-1] return out