Python pandas._libs.lib.generate_bins_dt64() Examples

The following are 10 code examples of pandas._libs.lib.generate_bins_dt64(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module pandas._libs.lib , or try the search function .
Example #1
Source File: test_bin_groupby.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_generate_bins(self):
        values = np.array([1, 2, 3, 4, 5, 6], dtype=np.int64)
        binner = np.array([0, 3, 6, 9], dtype=np.int64)

        for func in [lib.generate_bins_dt64, generate_bins_generic]:
            bins = func(values, binner, closed='left')
            assert ((bins == np.array([2, 5, 6])).all())

            bins = func(values, binner, closed='right')
            assert ((bins == np.array([3, 6, 6])).all())

        for func in [lib.generate_bins_dt64, generate_bins_generic]:
            values = np.array([1, 2, 3, 4, 5, 6], dtype=np.int64)
            binner = np.array([0, 3, 6], dtype=np.int64)

            bins = func(values, binner, closed='right')
            assert ((bins == np.array([3, 6])).all())

        msg = "Invalid length for values or for binner"
        with pytest.raises(ValueError, match=msg):
            generate_bins_generic(values, [], 'right')
        with pytest.raises(ValueError, match=msg):
            generate_bins_generic(values[:0], binner, 'right')

        msg = "Values falls before first bin"
        with pytest.raises(ValueError, match=msg):
            generate_bins_generic(values, [4], 'right')
        msg = "Values falls after last bin"
        with pytest.raises(ValueError, match=msg):
            generate_bins_generic(values, [-3, -1], 'right') 
Example #2
Source File: test_bin_groupby.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def test_generate_bins(self):
        from pandas.core.groupby.groupby import generate_bins_generic
        values = np.array([1, 2, 3, 4, 5, 6], dtype=np.int64)
        binner = np.array([0, 3, 6, 9], dtype=np.int64)

        for func in [lib.generate_bins_dt64, generate_bins_generic]:
            bins = func(values, binner, closed='left')
            assert ((bins == np.array([2, 5, 6])).all())

            bins = func(values, binner, closed='right')
            assert ((bins == np.array([3, 6, 6])).all())

        for func in [lib.generate_bins_dt64, generate_bins_generic]:
            values = np.array([1, 2, 3, 4, 5, 6], dtype=np.int64)
            binner = np.array([0, 3, 6], dtype=np.int64)

            bins = func(values, binner, closed='right')
            assert ((bins == np.array([3, 6])).all())

        pytest.raises(ValueError, generate_bins_generic, values, [],
                      'right')
        pytest.raises(ValueError, generate_bins_generic, values[:0],
                      binner, 'right')

        pytest.raises(ValueError, generate_bins_generic, values, [4],
                      'right')
        pytest.raises(ValueError, generate_bins_generic, values, [-3, -1],
                      'right') 
Example #3
Source File: test_bin_groupby.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def test_generate_bins(self):
        values = np.array([1, 2, 3, 4, 5, 6], dtype=np.int64)
        binner = np.array([0, 3, 6, 9], dtype=np.int64)

        for func in [lib.generate_bins_dt64, generate_bins_generic]:
            bins = func(values, binner, closed='left')
            assert ((bins == np.array([2, 5, 6])).all())

            bins = func(values, binner, closed='right')
            assert ((bins == np.array([3, 6, 6])).all())

        for func in [lib.generate_bins_dt64, generate_bins_generic]:
            values = np.array([1, 2, 3, 4, 5, 6], dtype=np.int64)
            binner = np.array([0, 3, 6], dtype=np.int64)

            bins = func(values, binner, closed='right')
            assert ((bins == np.array([3, 6])).all())

        msg = "Invalid length for values or for binner"
        with pytest.raises(ValueError, match=msg):
            generate_bins_generic(values, [], 'right')
        with pytest.raises(ValueError, match=msg):
            generate_bins_generic(values[:0], binner, 'right')

        msg = "Values falls before first bin"
        with pytest.raises(ValueError, match=msg):
            generate_bins_generic(values, [4], 'right')
        msg = "Values falls after last bin"
        with pytest.raises(ValueError, match=msg):
            generate_bins_generic(values, [-3, -1], 'right') 
Example #4
Source File: test_bin_groupby.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def test_generate_bins(self):
        from pandas.core.groupby import generate_bins_generic
        values = np.array([1, 2, 3, 4, 5, 6], dtype=np.int64)
        binner = np.array([0, 3, 6, 9], dtype=np.int64)

        for func in [lib.generate_bins_dt64, generate_bins_generic]:
            bins = func(values, binner, closed='left')
            assert ((bins == np.array([2, 5, 6])).all())

            bins = func(values, binner, closed='right')
            assert ((bins == np.array([3, 6, 6])).all())

        for func in [lib.generate_bins_dt64, generate_bins_generic]:
            values = np.array([1, 2, 3, 4, 5, 6], dtype=np.int64)
            binner = np.array([0, 3, 6], dtype=np.int64)

            bins = func(values, binner, closed='right')
            assert ((bins == np.array([3, 6])).all())

        pytest.raises(ValueError, generate_bins_generic, values, [],
                      'right')
        pytest.raises(ValueError, generate_bins_generic, values[:0],
                      binner, 'right')

        pytest.raises(ValueError, generate_bins_generic, values, [4],
                      'right')
        pytest.raises(ValueError, generate_bins_generic, values, [-3, -1],
                      'right') 
Example #5
Source File: test_bin_groupby.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_generate_bins(self):
        from pandas.core.groupby.groupby import generate_bins_generic
        values = np.array([1, 2, 3, 4, 5, 6], dtype=np.int64)
        binner = np.array([0, 3, 6, 9], dtype=np.int64)

        for func in [lib.generate_bins_dt64, generate_bins_generic]:
            bins = func(values, binner, closed='left')
            assert ((bins == np.array([2, 5, 6])).all())

            bins = func(values, binner, closed='right')
            assert ((bins == np.array([3, 6, 6])).all())

        for func in [lib.generate_bins_dt64, generate_bins_generic]:
            values = np.array([1, 2, 3, 4, 5, 6], dtype=np.int64)
            binner = np.array([0, 3, 6], dtype=np.int64)

            bins = func(values, binner, closed='right')
            assert ((bins == np.array([3, 6])).all())

        pytest.raises(ValueError, generate_bins_generic, values, [],
                      'right')
        pytest.raises(ValueError, generate_bins_generic, values[:0],
                      binner, 'right')

        pytest.raises(ValueError, generate_bins_generic, values, [4],
                      'right')
        pytest.raises(ValueError, generate_bins_generic, values, [-3, -1],
                      'right') 
Example #6
Source File: resample.py    From recruit with Apache License 2.0 4 votes vote down vote up
def _get_time_bins(self, ax):
        if not isinstance(ax, DatetimeIndex):
            raise TypeError('axis must be a DatetimeIndex, but got '
                            'an instance of %r' % type(ax).__name__)

        if len(ax) == 0:
            binner = labels = DatetimeIndex(
                data=[], freq=self.freq, name=ax.name)
            return binner, [], labels

        first, last = _get_timestamp_range_edges(ax.min(), ax.max(),
                                                 self.freq,
                                                 closed=self.closed,
                                                 base=self.base)
        # GH #12037
        # use first/last directly instead of call replace() on them
        # because replace() will swallow the nanosecond part
        # thus last bin maybe slightly before the end if the end contains
        # nanosecond part and lead to `Values falls after last bin` error
        binner = labels = date_range(freq=self.freq,
                                     start=first,
                                     end=last,
                                     tz=ax.tz,
                                     name=ax.name,
                                     ambiguous='infer',
                                     nonexistent='shift_forward')

        ax_values = ax.asi8
        binner, bin_edges = self._adjust_bin_edges(binner, ax_values)

        # general version, knowing nothing about relative frequencies
        bins = lib.generate_bins_dt64(
            ax_values, bin_edges, self.closed, hasnans=ax.hasnans)

        if self.closed == 'right':
            labels = binner
            if self.label == 'right':
                labels = labels[1:]
        elif self.label == 'right':
            labels = labels[1:]

        if ax.hasnans:
            binner = binner.insert(0, NaT)
            labels = labels.insert(0, NaT)

        # if we end up with more labels than bins
        # adjust the labels
        # GH4076
        if len(bins) < len(labels):
            labels = labels[:len(bins)]

        return binner, bins, labels 
Example #7
Source File: resample.py    From vnpy_crypto with MIT License 4 votes vote down vote up
def _get_period_bins(self, ax):
        if not isinstance(ax, PeriodIndex):
            raise TypeError('axis must be a PeriodIndex, but got '
                            'an instance of %r' % type(ax).__name__)

        memb = ax.asfreq(self.freq, how=self.convention)

        # NaT handling as in pandas._lib.lib.generate_bins_dt64()
        nat_count = 0
        if memb.hasnans:
            nat_count = np.sum(memb._isnan)
            memb = memb[~memb._isnan]

        # if index contains no valid (non-NaT) values, return empty index
        if not len(memb):
            binner = labels = PeriodIndex(
                data=[], freq=self.freq, name=ax.name)
            return binner, [], labels

        start = ax.min().asfreq(self.freq, how=self.convention)
        end = ax.max().asfreq(self.freq, how='end')

        labels = binner = PeriodIndex(start=start, end=end,
                                      freq=self.freq, name=ax.name)

        i8 = memb.asi8
        freq_mult = self.freq.n

        # when upsampling to subperiods, we need to generate enough bins
        expected_bins_count = len(binner) * freq_mult
        i8_extend = expected_bins_count - (i8[-1] - i8[0])
        rng = np.arange(i8[0], i8[-1] + i8_extend, freq_mult)
        rng += freq_mult
        bins = memb.searchsorted(rng, side='left')

        if nat_count > 0:
            # NaT handling as in pandas._lib.lib.generate_bins_dt64()
            # shift bins by the number of NaT
            bins += nat_count
            bins = np.insert(bins, 0, nat_count)
            binner = binner.insert(0, tslib.NaT)
            labels = labels.insert(0, tslib.NaT)

        return binner, bins, labels 
Example #8
Source File: resample.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 4 votes vote down vote up
def _get_time_bins(self, ax):
        if not isinstance(ax, DatetimeIndex):
            raise TypeError('axis must be a DatetimeIndex, but got '
                            'an instance of %r' % type(ax).__name__)

        if len(ax) == 0:
            binner = labels = DatetimeIndex(
                data=[], freq=self.freq, name=ax.name)
            return binner, [], labels

        first, last = _get_timestamp_range_edges(ax.min(), ax.max(),
                                                 self.freq,
                                                 closed=self.closed,
                                                 base=self.base)
        # GH #12037
        # use first/last directly instead of call replace() on them
        # because replace() will swallow the nanosecond part
        # thus last bin maybe slightly before the end if the end contains
        # nanosecond part and lead to `Values falls after last bin` error
        binner = labels = date_range(freq=self.freq,
                                     start=first,
                                     end=last,
                                     tz=ax.tz,
                                     name=ax.name,
                                     ambiguous='infer',
                                     nonexistent='shift_forward')

        ax_values = ax.asi8
        binner, bin_edges = self._adjust_bin_edges(binner, ax_values)

        # general version, knowing nothing about relative frequencies
        bins = lib.generate_bins_dt64(
            ax_values, bin_edges, self.closed, hasnans=ax.hasnans)

        if self.closed == 'right':
            labels = binner
            if self.label == 'right':
                labels = labels[1:]
        elif self.label == 'right':
            labels = labels[1:]

        if ax.hasnans:
            binner = binner.insert(0, NaT)
            labels = labels.insert(0, NaT)

        # if we end up with more labels than bins
        # adjust the labels
        # GH4076
        if len(bins) < len(labels):
            labels = labels[:len(bins)]

        return binner, bins, labels 
Example #9
Source File: resample.py    From Splunking-Crime with GNU Affero General Public License v3.0 4 votes vote down vote up
def _get_period_bins(self, ax):
        if not isinstance(ax, PeriodIndex):
            raise TypeError('axis must be a PeriodIndex, but got '
                            'an instance of %r' % type(ax).__name__)

        memb = ax.asfreq(self.freq, how=self.convention)

        # NaT handling as in pandas._lib.lib.generate_bins_dt64()
        nat_count = 0
        if memb.hasnans:
            nat_count = np.sum(memb._isnan)
            memb = memb[~memb._isnan]

        # if index contains no valid (non-NaT) values, return empty index
        if not len(memb):
            binner = labels = PeriodIndex(
                data=[], freq=self.freq, name=ax.name)
            return binner, [], labels

        start = ax.min().asfreq(self.freq, how=self.convention)
        end = ax.max().asfreq(self.freq, how='end')

        labels = binner = PeriodIndex(start=start, end=end,
                                      freq=self.freq, name=ax.name)

        i8 = memb.asi8
        freq_mult = self.freq.n

        # when upsampling to subperiods, we need to generate enough bins
        expected_bins_count = len(binner) * freq_mult
        i8_extend = expected_bins_count - (i8[-1] - i8[0])
        rng = np.arange(i8[0], i8[-1] + i8_extend, freq_mult)
        rng += freq_mult
        bins = memb.searchsorted(rng, side='left')

        if nat_count > 0:
            # NaT handling as in pandas._lib.lib.generate_bins_dt64()
            # shift bins by the number of NaT
            bins += nat_count
            bins = np.insert(bins, 0, nat_count)
            binner = binner.insert(0, tslib.NaT)
            labels = labels.insert(0, tslib.NaT)

        return binner, bins, labels 
Example #10
Source File: resample.py    From elasticintel with GNU General Public License v3.0 4 votes vote down vote up
def _get_period_bins(self, ax):
        if not isinstance(ax, PeriodIndex):
            raise TypeError('axis must be a PeriodIndex, but got '
                            'an instance of %r' % type(ax).__name__)

        memb = ax.asfreq(self.freq, how=self.convention)

        # NaT handling as in pandas._lib.lib.generate_bins_dt64()
        nat_count = 0
        if memb.hasnans:
            nat_count = np.sum(memb._isnan)
            memb = memb[~memb._isnan]

        # if index contains no valid (non-NaT) values, return empty index
        if not len(memb):
            binner = labels = PeriodIndex(
                data=[], freq=self.freq, name=ax.name)
            return binner, [], labels

        start = ax.min().asfreq(self.freq, how=self.convention)
        end = ax.max().asfreq(self.freq, how='end')

        labels = binner = PeriodIndex(start=start, end=end,
                                      freq=self.freq, name=ax.name)

        i8 = memb.asi8
        freq_mult = self.freq.n

        # when upsampling to subperiods, we need to generate enough bins
        expected_bins_count = len(binner) * freq_mult
        i8_extend = expected_bins_count - (i8[-1] - i8[0])
        rng = np.arange(i8[0], i8[-1] + i8_extend, freq_mult)
        rng += freq_mult
        bins = memb.searchsorted(rng, side='left')

        if nat_count > 0:
            # NaT handling as in pandas._lib.lib.generate_bins_dt64()
            # shift bins by the number of NaT
            bins += nat_count
            bins = np.insert(bins, 0, nat_count)
            binner = binner.insert(0, tslib.NaT)
            labels = labels.insert(0, tslib.NaT)

        return binner, bins, labels