Python pandas.groupby() Examples

The following are 8 code examples of pandas.groupby(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module pandas , or try the search function .
Example #1
Source File: plotting.py    From fitbit-analyzer with Apache License 2.0 6 votes vote down vote up
def _plotWeekdayStats(stats, columns, groupBy=True):
    dataToPlot = stats.copy()
    # Group by weekday and rename date column
    if groupBy:
        dataToPlot = dataToPlot.groupby(stats['date'].dt.weekday).mean()
        dataToPlot = dataToPlot.reset_index().rename(columns={'date':'weekday'})

    # change stats from columns to row attribute
    dataToPlot = pd.melt(dataToPlot, id_vars=['weekday'], value_vars=columns,
                         var_name='stats', value_name='val')
    # Rename stats and weekdays
    dataToPlot['stats'].replace(NAMES, inplace=True)
    dataToPlot['weekday'].replace(dayOfWeek, inplace=True)
    # Plot
    g = sns.factorplot(data=dataToPlot, x="weekday", y="val", col="stats",
                       order=dayOfWeekOrder, kind="point", sharey=False, col_wrap=3)
    g.set_xticklabels(rotation=45)
    g.set(xlabel='')
    return g
    #sns.plt.show() 
Example #2
Source File: test_api.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def test_groupby(self):
        with tm.assert_produces_warning(FutureWarning,
                                        check_stacklevel=False):
            pd.groupby(pd.Series([1, 2, 3]), [1, 1, 1]) 
Example #3
Source File: test_api.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def test_groupby(self):
        with tm.assert_produces_warning(FutureWarning,
                                        check_stacklevel=False):
            pd.groupby(pd.Series([1, 2, 3]), [1, 1, 1]) 
Example #4
Source File: plotting.py    From fitbit-analyzer with Apache License 2.0 5 votes vote down vote up
def _prepareWeekdayByMonthStats(stats):
    # Add day and month columns, and groupby
    stats = stats.copy()
    stats['day'] = stats['date'].dt.weekday
    stats['month'] = stats['date'].dt.month
    dataToPlot = stats.groupby(['day', 'month']).mean()

    dataToPlot = dataToPlot.reset_index()
    dataToPlot['day'].replace(dayOfWeek, inplace=True)
    dataToPlot['month'].replace(months, inplace=True)

    return dataToPlot

# def plotWeekdayStats(stats, columns):
#     """
#     Plot aggregated (mean) stats by dayOfWeek
#     :param stats: data to plot
#     :param columns: columns from stats to plot
#     """
#     MEASURE_NAME = 'weekday'
#     dayOfWeek={0:'Mon', 1:'Tue', 2:'Wed', 3:'Thur', 4:'Fri', 5:'Sat', 6:'Sun'}
#     order = ['Mon','Tue','Wed','Thur','Fri','Sat','Sun']
#     stats[MEASURE_NAME] = stats[MEASURE_NAME].map(dayOfWeek)
#
#     f, axes = getAxes(2,2)
#     for i, c in enumerate(columns):
#         if c in NAMES:
#             c = NAMES[c]
#         g = sns.barplot(x=MEASURE_NAME, y=c, data=stats, order=order, ax=axes[i])
#         g.set_xlabel('')
#     sns.plt.show()
#     #plot(stats, columns, MEASURE_NAME, 2, 3, order=order) 
Example #5
Source File: plotting.py    From fitbit-analyzer with Apache License 2.0 5 votes vote down vote up
def _prepareYearAndMonthStats(stats, columns):
    # Group by month and change stats from columns to row attribute
    dataToPlot = stats.groupby(stats['date'].dt.to_period("M")).mean()
    dataToPlot = pd.melt(dataToPlot.reset_index(), id_vars=['date'], value_vars=columns,
                         var_name='stats', value_name='val')
    # Rename stats
    dataToPlot['stats'].replace(NAMES, inplace=True)
    return dataToPlot 
Example #6
Source File: plotting.py    From fitbit-analyzer with Apache License 2.0 5 votes vote down vote up
def plotDailyStatsHb(data):
    ax = data.groupby(data[hbStats.NAME_DT_COL].dt.date).mean().plot()
    #data.groupby(data[hbStats.NAME_DT_COL].dt.date).mean().rolling(30).mean().plot(ax=ax)
    sns.plt.show() 
Example #7
Source File: test_api.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_groupby(self):
        with tm.assert_produces_warning(FutureWarning,
                                        check_stacklevel=False):
            pd.groupby(pd.Series([1, 2, 3]), [1, 1, 1]) 
Example #8
Source File: plotting.py    From fitbit-analyzer with Apache License 2.0 4 votes vote down vote up
def _plotMonthlyStats(stats, columns, groupBy=True):
    dataToPlot = stats.copy()
    # Group by month and rename date column
    if groupBy:
        dataToPlot = dataToPlot.groupby(stats['date'].dt.month).mean()
        dataToPlot = dataToPlot.reset_index().rename(columns={'date': 'month'})

    # change stats from columns to row attribute
    dataToPlot = pd.melt(dataToPlot, id_vars=['month'], value_vars=columns,
                         var_name='stats', value_name='val')
    # Rename stats and weekdays
    dataToPlot['stats'].replace(NAMES, inplace=True)
    dataToPlot['month'].replace(months, inplace=True)
    order = [m for m in monthsOrder if m in dataToPlot['month'].unique()]
    # Plot
    g = sns.factorplot(data=dataToPlot, x="month", y="val", col="stats", order=order, kind="bar", sharey=False)
    g.set_xticklabels(rotation=45)
    g.set(xlabel='')
    return g
    #sns.plt.show()

# def _plotMonthlyStats(stats, columns):
#     """
#     Plot aggregated (mean) stats by month
#     :param stats: data to plot
#     :param columns: columns from stats to plot
#     """
#     MEASURE_NAME = 'month'
#     months={1:'Jan', 2:'Feb', 3:'Mar', 4:'Apr', 5:'May', 6:'Jun', 7:'Jul', 8:'Aug',
#             9:'Sep', 10:'Oct', 11:'Nov', 12:'Dec'}
#     order = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']
#     stats[MEASURE_NAME] = stats[MEASURE_NAME].map(months)
#
#     order = [m for m in order if m in stats[MEASURE_NAME].unique()]
#
#     f, axes = getAxes(2,2)
#     for i, c in enumerate(columns):
#         if c in NAMES:
#             c = NAMES[c]
#         g = sns.barplot(x=MEASURE_NAME, y=c, data=stats, order=order, ax=axes[i])
#         g.set_xlabel('')
#     sns.plt.show()