Python pandas.Grouper() Examples

The following are 30 code examples of pandas.Grouper(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module pandas , or try the search function .
Example #1
Source File: test_function.py    From recruit with Apache License 2.0 7 votes vote down vote up
def test_nunique_with_timegrouper():
    # GH 13453
    test = pd.DataFrame({
        'time': [Timestamp('2016-06-28 09:35:35'),
                 Timestamp('2016-06-28 16:09:30'),
                 Timestamp('2016-06-28 16:46:28')],
        'data': ['1', '2', '3']}).set_index('time')
    result = test.groupby(pd.Grouper(freq='h'))['data'].nunique()
    expected = test.groupby(
        pd.Grouper(freq='h')
    )['data'].apply(pd.Series.nunique)
    tm.assert_series_equal(result, expected)


# count
# -------------------------------- 
Example #2
Source File: test_timegrouper.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def test_scalar_call_versus_list_call(self):
        # Issue: 17530
        data_frame = {
            'location': ['shanghai', 'beijing', 'shanghai'],
            'time': pd.Series(['2017-08-09 13:32:23', '2017-08-11 23:23:15',
                               '2017-08-11 22:23:15'],
                              dtype='datetime64[ns]'),
            'value': [1, 2, 3]
        }
        data_frame = pd.DataFrame(data_frame).set_index('time')
        grouper = pd.Grouper(freq='D')

        grouped = data_frame.groupby(grouper)
        result = grouped.count()
        grouped = data_frame.groupby([grouper])
        expected = grouped.count()

        assert_frame_equal(result, expected) 
Example #3
Source File: test_grouping.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_groupby_grouper_f_sanity_checked(self):
        dates = date_range('01-Jan-2013', periods=12, freq='MS')
        ts = Series(np.random.randn(12), index=dates)

        # GH3035
        # index.map is used to apply grouper to the index
        # if it fails on the elements, map tries it on the entire index as
        # a sequence. That can yield invalid results that cause trouble
        # down the line.
        # the surprise comes from using key[0:6] rather then str(key)[0:6]
        # when the elements are Timestamp.
        # the result is Index[0:6], very confusing.

        msg = r"Grouper result violates len\(labels\) == len\(data\)"
        with pytest.raises(AssertionError, match=msg):
            ts.groupby(lambda key: key[0:6]) 
Example #4
Source File: test_grouping.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_grouper_multilevel_freq(self):

        # GH 7885
        # with level and freq specified in a pd.Grouper
        from datetime import date, timedelta
        d0 = date.today() - timedelta(days=14)
        dates = date_range(d0, date.today())
        date_index = pd.MultiIndex.from_product(
            [dates, dates], names=['foo', 'bar'])
        df = pd.DataFrame(np.random.randint(0, 100, 225), index=date_index)

        # Check string level
        expected = df.reset_index().groupby([pd.Grouper(
            key='foo', freq='W'), pd.Grouper(key='bar', freq='W')]).sum()
        # reset index changes columns dtype to object
        expected.columns = pd.Index([0], dtype='int64')

        result = df.groupby([pd.Grouper(level='foo', freq='W'), pd.Grouper(
            level='bar', freq='W')]).sum()
        assert_frame_equal(result, expected)

        # Check integer level
        result = df.groupby([pd.Grouper(level=0, freq='W'), pd.Grouper(
            level=1, freq='W')]).sum()
        assert_frame_equal(result, expected) 
Example #5
Source File: test_groupby.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_groupby_agg_ohlc_non_first():
    # GH 21716
    df = pd.DataFrame([[1], [1]], columns=['foo'],
                      index=pd.date_range('2018-01-01', periods=2, freq='D'))

    expected = pd.DataFrame([
        [1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1]
    ], columns=pd.MultiIndex.from_tuples((
        ('foo', 'ohlc', 'open'), ('foo', 'ohlc', 'high'),
        ('foo', 'ohlc', 'low'), ('foo', 'ohlc', 'close'),
        ('foo', 'sum', 'foo'))), index=pd.date_range(
            '2018-01-01', periods=2, freq='D'))

    result = df.groupby(pd.Grouper(freq='D')).agg(['sum', 'ohlc'])

    tm.assert_frame_equal(result, expected) 
Example #6
Source File: test_resampler_grouper.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 6 votes vote down vote up
def test_apply_with_mutated_index():
    # GH 15169
    index = pd.date_range('1-1-2015', '12-31-15', freq='D')
    df = DataFrame(data={'col1': np.random.rand(len(index))}, index=index)

    def f(x):
        s = Series([1, 2], index=['a', 'b'])
        return s

    expected = df.groupby(pd.Grouper(freq='M')).apply(f)

    result = df.resample('M').apply(f)
    assert_frame_equal(result, expected)

    # A case for series
    expected = df['col1'].groupby(pd.Grouper(freq='M')).apply(f)
    result = df['col1'].resample('M').apply(f)
    assert_series_equal(result, expected) 
Example #7
Source File: test_grouping.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 6 votes vote down vote up
def test_groupby_grouper_f_sanity_checked(self):
        dates = date_range('01-Jan-2013', periods=12, freq='MS')
        ts = Series(np.random.randn(12), index=dates)

        # GH3035
        # index.map is used to apply grouper to the index
        # if it fails on the elements, map tries it on the entire index as
        # a sequence. That can yield invalid results that cause trouble
        # down the line.
        # the surprise comes from using key[0:6] rather then str(key)[0:6]
        # when the elements are Timestamp.
        # the result is Index[0:6], very confusing.

        msg = r"Grouper result violates len\(labels\) == len\(data\)"
        with pytest.raises(AssertionError, match=msg):
            ts.groupby(lambda key: key[0:6]) 
Example #8
Source File: test_grouping.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 6 votes vote down vote up
def test_list_grouper_with_nat(self):
        # GH 14715
        df = pd.DataFrame({'date': pd.date_range('1/1/2011',
                                                 periods=365, freq='D')})
        df.iloc[-1] = pd.NaT
        grouper = pd.Grouper(key='date', freq='AS')

        # Grouper in a list grouping
        result = df.groupby([grouper])
        expected = {pd.Timestamp('2011-01-01'): pd.Index(list(range(364)))}
        tm.assert_dict_equal(result.groups, expected)

        # Test case without a list
        result = df.groupby(grouper)
        expected = {pd.Timestamp('2011-01-01'): 365}
        tm.assert_dict_equal(result.groups, expected)


# get_group
# -------------------------------- 
Example #9
Source File: test_datetime_index.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 6 votes vote down vote up
def test_resample_nunique():

    # GH 12352
    df = DataFrame({
        'ID': {Timestamp('2015-06-05 00:00:00'): '0010100903',
               Timestamp('2015-06-08 00:00:00'): '0010150847'},
        'DATE': {Timestamp('2015-06-05 00:00:00'): '2015-06-05',
                 Timestamp('2015-06-08 00:00:00'): '2015-06-08'}})
    r = df.resample('D')
    g = df.groupby(pd.Grouper(freq='D'))
    expected = df.groupby(pd.Grouper(freq='D')).ID.apply(lambda x:
                                                         x.nunique())
    assert expected.name == 'ID'

    for t in [r, g]:
        result = r.ID.nunique()
        assert_series_equal(result, expected)

    result = df.ID.resample('D').nunique()
    assert_series_equal(result, expected)

    result = df.ID.groupby(pd.Grouper(freq='D')).nunique()
    assert_series_equal(result, expected) 
Example #10
Source File: test_timegrouper.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_timegrouper_apply_return_type_value(self):
        # Using `apply` with the `TimeGrouper` should give the
        # same return type as an `apply` with a `Grouper`.
        # Issue #11742
        df = pd.DataFrame({'date': ['10/10/2000', '11/10/2000'],
                           'value': [10, 13]})
        df_dt = df.copy()
        df_dt['date'] = pd.to_datetime(df_dt['date'])

        def sumfunc_value(x):
            return x.value.sum()

        expected = df.groupby(pd.Grouper(key='date')).apply(sumfunc_value)
        with tm.assert_produces_warning(FutureWarning,
                                        check_stacklevel=False):
            result = (df_dt.groupby(pd.TimeGrouper(freq='M', key='date'))
                      .apply(sumfunc_value))
        assert_series_equal(result.reset_index(drop=True),
                            expected.reset_index(drop=True)) 
Example #11
Source File: test_timegrouper.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_timegrouper_apply_return_type_series(self):
        # Using `apply` with the `TimeGrouper` should give the
        # same return type as an `apply` with a `Grouper`.
        # Issue #11742
        df = pd.DataFrame({'date': ['10/10/2000', '11/10/2000'],
                           'value': [10, 13]})
        df_dt = df.copy()
        df_dt['date'] = pd.to_datetime(df_dt['date'])

        def sumfunc_series(x):
            return pd.Series([x['value'].sum()], ('sum',))

        expected = df.groupby(pd.Grouper(key='date')).apply(sumfunc_series)
        result = (df_dt.groupby(pd.Grouper(freq='M', key='date'))
                  .apply(sumfunc_series))
        assert_frame_equal(result.reset_index(drop=True),
                           expected.reset_index(drop=True)) 
Example #12
Source File: test_period_index.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 6 votes vote down vote up
def test_resample_nonexistent_time_bin_edge(self):
        # GH 19375
        index = date_range('2017-03-12', '2017-03-12 1:45:00', freq='15T')
        s = Series(np.zeros(len(index)), index=index)
        expected = s.tz_localize('US/Pacific')
        result = expected.resample('900S').mean()
        tm.assert_series_equal(result, expected)

        # GH 23742
        index = date_range(start='2017-10-10', end='2017-10-20', freq='1H')
        index = index.tz_localize('UTC').tz_convert('America/Sao_Paulo')
        df = DataFrame(data=list(range(len(index))), index=index)
        result = df.groupby(pd.Grouper(freq='1D')).count()
        expected = date_range(start='2017-10-09', end='2017-10-20', freq='D',
                              tz="America/Sao_Paulo",
                              nonexistent='shift_forward', closed='left')
        tm.assert_index_equal(result.index, expected) 
Example #13
Source File: test_grouping.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_grouper_getting_correct_binner(self):

        # GH 10063
        # using a non-time-based grouper and a time-based grouper
        # and specifying levels
        df = DataFrame({'A': 1}, index=pd.MultiIndex.from_product(
            [list('ab'), date_range('20130101', periods=80)], names=['one',
                                                                     'two']))
        result = df.groupby([pd.Grouper(level='one'), pd.Grouper(
            level='two', freq='M')]).sum()
        expected = DataFrame({'A': [31, 28, 21, 31, 28, 21]},
                             index=MultiIndex.from_product(
                                 [list('ab'),
                                  date_range('20130101', freq='M', periods=3)],
                                 names=['one', 'two']))
        assert_frame_equal(result, expected) 
Example #14
Source File: test_grouping.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 6 votes vote down vote up
def test_grouper_getting_correct_binner(self):

        # GH 10063
        # using a non-time-based grouper and a time-based grouper
        # and specifying levels
        df = DataFrame({'A': 1}, index=pd.MultiIndex.from_product(
            [list('ab'), date_range('20130101', periods=80)], names=['one',
                                                                     'two']))
        result = df.groupby([pd.Grouper(level='one'), pd.Grouper(
            level='two', freq='M')]).sum()
        expected = DataFrame({'A': [31, 28, 21, 31, 28, 21]},
                             index=MultiIndex.from_product(
                                 [list('ab'),
                                  date_range('20130101', freq='M', periods=3)],
                                 names=['one', 'two']))
        assert_frame_equal(result, expected) 
Example #15
Source File: test_grouping.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_list_grouper_with_nat(self):
        # GH 14715
        df = pd.DataFrame({'date': pd.date_range('1/1/2011',
                                                 periods=365, freq='D')})
        df.iloc[-1] = pd.NaT
        grouper = pd.Grouper(key='date', freq='AS')

        # Grouper in a list grouping
        result = df.groupby([grouper])
        expected = {pd.Timestamp('2011-01-01'): pd.Index(list(range(364)))}
        tm.assert_dict_equal(result.groups, expected)

        # Test case without a list
        result = df.groupby(grouper)
        expected = {pd.Timestamp('2011-01-01'): 365}
        tm.assert_dict_equal(result.groups, expected)


# get_group
# -------------------------------- 
Example #16
Source File: test_grouping.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 6 votes vote down vote up
def test_grouper_multilevel_freq(self):

        # GH 7885
        # with level and freq specified in a pd.Grouper
        from datetime import date, timedelta
        d0 = date.today() - timedelta(days=14)
        dates = date_range(d0, date.today())
        date_index = pd.MultiIndex.from_product(
            [dates, dates], names=['foo', 'bar'])
        df = pd.DataFrame(np.random.randint(0, 100, 225), index=date_index)

        # Check string level
        expected = df.reset_index().groupby([pd.Grouper(
            key='foo', freq='W'), pd.Grouper(key='bar', freq='W')]).sum()
        # reset index changes columns dtype to object
        expected.columns = pd.Index([0], dtype='int64')

        result = df.groupby([pd.Grouper(level='foo', freq='W'), pd.Grouper(
            level='bar', freq='W')]).sum()
        assert_frame_equal(result, expected)

        # Check integer level
        result = df.groupby([pd.Grouper(level=0, freq='W'), pd.Grouper(
            level=1, freq='W')]).sum()
        assert_frame_equal(result, expected) 
Example #17
Source File: test_period_index.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_resample_nonexistent_time_bin_edge(self):
        # GH 19375
        index = date_range('2017-03-12', '2017-03-12 1:45:00', freq='15T')
        s = Series(np.zeros(len(index)), index=index)
        expected = s.tz_localize('US/Pacific')
        result = expected.resample('900S').mean()
        tm.assert_series_equal(result, expected)

        # GH 23742
        index = date_range(start='2017-10-10', end='2017-10-20', freq='1H')
        index = index.tz_localize('UTC').tz_convert('America/Sao_Paulo')
        df = DataFrame(data=list(range(len(index))), index=index)
        result = df.groupby(pd.Grouper(freq='1D')).count()
        expected = date_range(start='2017-10-09', end='2017-10-20', freq='D',
                              tz="America/Sao_Paulo",
                              nonexistent='shift_forward', closed='left')
        tm.assert_index_equal(result.index, expected) 
Example #18
Source File: test_groupby.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 6 votes vote down vote up
def test_groupby_agg_ohlc_non_first():
    # GH 21716
    df = pd.DataFrame([[1], [1]], columns=['foo'],
                      index=pd.date_range('2018-01-01', periods=2, freq='D'))

    expected = pd.DataFrame([
        [1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1]
    ], columns=pd.MultiIndex.from_tuples((
        ('foo', 'ohlc', 'open'), ('foo', 'ohlc', 'high'),
        ('foo', 'ohlc', 'low'), ('foo', 'ohlc', 'close'),
        ('foo', 'sum', 'foo'))), index=pd.date_range(
            '2018-01-01', periods=2, freq='D'))

    result = df.groupby(pd.Grouper(freq='D')).agg(['sum', 'ohlc'])

    tm.assert_frame_equal(result, expected) 
Example #19
Source File: test_resampler_grouper.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_apply_with_mutated_index():
    # GH 15169
    index = pd.date_range('1-1-2015', '12-31-15', freq='D')
    df = DataFrame(data={'col1': np.random.rand(len(index))}, index=index)

    def f(x):
        s = Series([1, 2], index=['a', 'b'])
        return s

    expected = df.groupby(pd.Grouper(freq='M')).apply(f)

    result = df.resample('M').apply(f)
    assert_frame_equal(result, expected)

    # A case for series
    expected = df['col1'].groupby(pd.Grouper(freq='M')).apply(f)
    result = df['col1'].resample('M').apply(f)
    assert_series_equal(result, expected) 
Example #20
Source File: test_timegrouper.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 6 votes vote down vote up
def test_timegrouper_apply_return_type_value(self):
        # Using `apply` with the `TimeGrouper` should give the
        # same return type as an `apply` with a `Grouper`.
        # Issue #11742
        df = pd.DataFrame({'date': ['10/10/2000', '11/10/2000'],
                           'value': [10, 13]})
        df_dt = df.copy()
        df_dt['date'] = pd.to_datetime(df_dt['date'])

        def sumfunc_value(x):
            return x.value.sum()

        expected = df.groupby(pd.Grouper(key='date')).apply(sumfunc_value)
        with tm.assert_produces_warning(FutureWarning,
                                        check_stacklevel=False):
            result = (df_dt.groupby(pd.TimeGrouper(freq='M', key='date'))
                      .apply(sumfunc_value))
        assert_series_equal(result.reset_index(drop=True),
                            expected.reset_index(drop=True)) 
Example #21
Source File: test_timegrouper.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 6 votes vote down vote up
def test_timegrouper_apply_return_type_series(self):
        # Using `apply` with the `TimeGrouper` should give the
        # same return type as an `apply` with a `Grouper`.
        # Issue #11742
        df = pd.DataFrame({'date': ['10/10/2000', '11/10/2000'],
                           'value': [10, 13]})
        df_dt = df.copy()
        df_dt['date'] = pd.to_datetime(df_dt['date'])

        def sumfunc_series(x):
            return pd.Series([x['value'].sum()], ('sum',))

        expected = df.groupby(pd.Grouper(key='date')).apply(sumfunc_series)
        result = (df_dt.groupby(pd.Grouper(freq='M', key='date'))
                  .apply(sumfunc_series))
        assert_frame_equal(result.reset_index(drop=True),
                           expected.reset_index(drop=True)) 
Example #22
Source File: test_function.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 6 votes vote down vote up
def test_nunique_with_timegrouper():
    # GH 13453
    test = pd.DataFrame({
        'time': [Timestamp('2016-06-28 09:35:35'),
                 Timestamp('2016-06-28 16:09:30'),
                 Timestamp('2016-06-28 16:46:28')],
        'data': ['1', '2', '3']}).set_index('time')
    result = test.groupby(pd.Grouper(freq='h'))['data'].nunique()
    expected = test.groupby(
        pd.Grouper(freq='h')
    )['data'].apply(pd.Series.nunique)
    tm.assert_series_equal(result, expected)


# count
# -------------------------------- 
Example #23
Source File: test_resample.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def test_resample_nunique(self):

        # GH 12352
        df = DataFrame({
            'ID': {Timestamp('2015-06-05 00:00:00'): '0010100903',
                   Timestamp('2015-06-08 00:00:00'): '0010150847'},
            'DATE': {Timestamp('2015-06-05 00:00:00'): '2015-06-05',
                     Timestamp('2015-06-08 00:00:00'): '2015-06-08'}})
        r = df.resample('D')
        g = df.groupby(pd.Grouper(freq='D'))
        expected = df.groupby(pd.Grouper(freq='D')).ID.apply(lambda x:
                                                             x.nunique())
        assert expected.name == 'ID'

        for t in [r, g]:
            result = r.ID.nunique()
            assert_series_equal(result, expected)

        result = df.ID.resample('D').nunique()
        assert_series_equal(result, expected)

        result = df.ID.groupby(pd.Grouper(freq='D')).nunique()
        assert_series_equal(result, expected) 
Example #24
Source File: test_function.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def test_nunique_with_timegrouper():
    # GH 13453
    test = pd.DataFrame({
        'time': [Timestamp('2016-06-28 09:35:35'),
                 Timestamp('2016-06-28 16:09:30'),
                 Timestamp('2016-06-28 16:46:28')],
        'data': ['1', '2', '3']}).set_index('time')
    result = test.groupby(pd.Grouper(freq='h'))['data'].nunique()
    expected = test.groupby(
        pd.Grouper(freq='h')
    )['data'].apply(pd.Series.nunique)
    tm.assert_series_equal(result, expected)


# count
# -------------------------------- 
Example #25
Source File: test_grouping.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def test_grouper_multilevel_freq(self):

        # GH 7885
        # with level and freq specified in a pd.Grouper
        from datetime import date, timedelta
        d0 = date.today() - timedelta(days=14)
        dates = date_range(d0, date.today())
        date_index = pd.MultiIndex.from_product(
            [dates, dates], names=['foo', 'bar'])
        df = pd.DataFrame(np.random.randint(0, 100, 225), index=date_index)

        # Check string level
        expected = df.reset_index().groupby([pd.Grouper(
            key='foo', freq='W'), pd.Grouper(key='bar', freq='W')]).sum()
        # reset index changes columns dtype to object
        expected.columns = pd.Index([0], dtype='int64')

        result = df.groupby([pd.Grouper(level='foo', freq='W'), pd.Grouper(
            level='bar', freq='W')]).sum()
        assert_frame_equal(result, expected)

        # Check integer level
        result = df.groupby([pd.Grouper(level=0, freq='W'), pd.Grouper(
            level=1, freq='W')]).sum()
        assert_frame_equal(result, expected) 
Example #26
Source File: test_timegrouper.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def test_timegrouper_apply_return_type_series(self):
        # Using `apply` with the `TimeGrouper` should give the
        # same return type as an `apply` with a `Grouper`.
        # Issue #11742
        df = pd.DataFrame({'date': ['10/10/2000', '11/10/2000'],
                           'value': [10, 13]})
        df_dt = df.copy()
        df_dt['date'] = pd.to_datetime(df_dt['date'])

        def sumfunc_series(x):
            return pd.Series([x['value'].sum()], ('sum',))

        expected = df.groupby(pd.Grouper(key='date')).apply(sumfunc_series)
        result = (df_dt.groupby(pd.Grouper(freq='M', key='date'))
                  .apply(sumfunc_series))
        assert_frame_equal(result.reset_index(drop=True),
                           expected.reset_index(drop=True)) 
Example #27
Source File: test_timegrouper.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def test_timegrouper_apply_return_type_value(self):
        # Using `apply` with the `TimeGrouper` should give the
        # same return type as an `apply` with a `Grouper`.
        # Issue #11742
        df = pd.DataFrame({'date': ['10/10/2000', '11/10/2000'],
                           'value': [10, 13]})
        df_dt = df.copy()
        df_dt['date'] = pd.to_datetime(df_dt['date'])

        def sumfunc_value(x):
            return x.value.sum()

        expected = df.groupby(pd.Grouper(key='date')).apply(sumfunc_value)
        with tm.assert_produces_warning(FutureWarning,
                                        check_stacklevel=False):
            result = (df_dt.groupby(pd.TimeGrouper(freq='M', key='date'))
                      .apply(sumfunc_value))
        assert_series_equal(result.reset_index(drop=True),
                            expected.reset_index(drop=True)) 
Example #28
Source File: test_datetime_index.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_resample_nunique():

    # GH 12352
    df = DataFrame({
        'ID': {Timestamp('2015-06-05 00:00:00'): '0010100903',
               Timestamp('2015-06-08 00:00:00'): '0010150847'},
        'DATE': {Timestamp('2015-06-05 00:00:00'): '2015-06-05',
                 Timestamp('2015-06-08 00:00:00'): '2015-06-08'}})
    r = df.resample('D')
    g = df.groupby(pd.Grouper(freq='D'))
    expected = df.groupby(pd.Grouper(freq='D')).ID.apply(lambda x:
                                                         x.nunique())
    assert expected.name == 'ID'

    for t in [r, g]:
        result = r.ID.nunique()
        assert_series_equal(result, expected)

    result = df.ID.resample('D').nunique()
    assert_series_equal(result, expected)

    result = df.ID.groupby(pd.Grouper(freq='D')).nunique()
    assert_series_equal(result, expected) 
Example #29
Source File: test_grouping.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def test_groupby_with_empty(self):
        index = pd.DatetimeIndex(())
        data = ()
        series = pd.Series(data, index)
        grouper = pd.Grouper(freq='D')
        grouped = series.groupby(grouper)
        assert next(iter(grouped), None) is None 
Example #30
Source File: test_datetime_index.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def test_resample_timegrouper():
    # GH 7227
    dates1 = [datetime(2014, 10, 1), datetime(2014, 9, 3),
              datetime(2014, 11, 5), datetime(2014, 9, 5),
              datetime(2014, 10, 8), datetime(2014, 7, 15)]

    dates2 = dates1[:2] + [pd.NaT] + dates1[2:4] + [pd.NaT] + dates1[4:]
    dates3 = [pd.NaT] + dates1 + [pd.NaT]

    for dates in [dates1, dates2, dates3]:
        df = DataFrame(dict(A=dates, B=np.arange(len(dates))))
        result = df.set_index('A').resample('M').count()
        exp_idx = pd.DatetimeIndex(['2014-07-31', '2014-08-31',
                                    '2014-09-30',
                                    '2014-10-31', '2014-11-30'],
                                   freq='M', name='A')
        expected = DataFrame({'B': [1, 0, 2, 2, 1]}, index=exp_idx)
        assert_frame_equal(result, expected)

        result = df.groupby(pd.Grouper(freq='M', key='A')).count()
        assert_frame_equal(result, expected)

        df = DataFrame(dict(A=dates, B=np.arange(len(dates)), C=np.arange(
            len(dates))))
        result = df.set_index('A').resample('M').count()
        expected = DataFrame({'B': [1, 0, 2, 2, 1], 'C': [1, 0, 2, 2, 1]},
                             index=exp_idx, columns=['B', 'C'])
        assert_frame_equal(result, expected)

        result = df.groupby(pd.Grouper(freq='M', key='A')).count()
        assert_frame_equal(result, expected)