Python pandas.DatetimeIndex() Examples

The following are code examples for showing how to use pandas.DatetimeIndex(). They are from open source Python projects. You can vote up the examples you like or vote down the ones you don't like.

Example 1
Project: performance_tracker   Author: metro-ontime   File: analyze_estimates.py    GNU General Public License v3.0 6 votes vote down vote up
def match_times(stop_id, estimates, schedule):
    schedule = schedule.set_index(pd.DatetimeIndex(schedule["datetime"])).sort_index()
    # This technique finds the closest scheduled time to actual arrivals
    # This is flawed since it does not account for scheduled arrivals where
    # a train never arrived.
    # It's difficult to search the other way around however, since our estimated
    # arrival times are incomplete (see "select.dropna" in "estimate_arrivals").
    # If we search for the closest estimated arrival to each scheduled stop,
    # we will get some that are far apart because the actual train was likely associated
    # with a different scheduled stop time.
    # This way seems to be fairer on Metro, but we are open to improvements!

    # Try clause is here because there was an unexplained bug occurring on April 10 2019 with data inputs from around 1:35pm. There was an index (-1) out of range error.
    # Exact cause of the issue is still uncertain but there was a vehicle position observation out of range on the blue line at that time.
    try:
        estimates.loc[:, "closest_scheduled"] = estimates.datetime.apply(
            lambda x: schedule.index[schedule.index.get_loc(x, method="nearest")]
        )
        estimates.loc[:, "closest_scheduled"] = pd.DatetimeIndex(
            estimates["closest_scheduled"]
        )
        return estimates
    except:
        return None 
Example 2
Project: pyplis   Author: jgliss   File: processing.py    GNU General Public License v3.0 6 votes vote down vote up
def get_poly_vals(self, time_stamps, ext_border_secs=0.0):
        """Get value of polynomial at input time stamp.

        :param datetime time_stamp: poly input value
        """
        if not isinstance(self.poly_model, poly1d):
            raise AttributeError("No polynomial available, please call"
                                 "function fit_polynomial first")
        if isinstance(time_stamps, datetime):
            time_stamps = [time_stamps, ]
        if not any([isinstance(time_stamps, x)
                    for x in [list, DatetimeIndex]]):
            raise ValueError("Invalid input for time stamps, need list")
        if not all([self.includes_timestamp(x, ext_border_secs)
                    for x in time_stamps]):
            raise IndexError("At least one of the time stamps is not included "
                             "in this series: %s - %s"
                             % (self.start, self.stop))
        values = []
        for time_stamp in time_stamps:
            values.append(self.poly_model(date2num(time_stamp)))

        return asarray(values) 
Example 3
Project: arctic   Author: man-group   File: numpy_records.py    GNU Lesser General Public License v2.1 6 votes vote down vote up
def _multi_index_to_records(index, empty_index):
    # array of tuples to numpy cols. copy copy copy
    if not empty_index:
        ix_vals = list(map(np.array, [index.get_level_values(i) for i in range(index.nlevels)]))
    else:
        # empty multi index has no size, create empty arrays for recarry.
        ix_vals = [np.array([]) for n in index.names]
    index_names = list(index.names)
    count = 0
    for i, n in enumerate(index_names):
        if n is None:
            index_names[i] = 'level_%d' % count
            count += 1
            log.info("Level in MultiIndex has no name, defaulting to %s" % index_names[i])
    index_tz = [get_timezone(i.tz) if isinstance(i, DatetimeIndex) else None for i in index.levels]
    return ix_vals, index_names, index_tz 
Example 4
Project: arctic   Author: man-group   File: numpy_records.py    GNU Lesser General Public License v2.1 6 votes vote down vote up
def _index_to_records(self, df):
        metadata = {}
        index = df.index
        index_tz = None

        if isinstance(index, MultiIndex):
            ix_vals, index_names, index_tz = _multi_index_to_records(index, len(df) == 0)
        else:
            ix_vals = [index.values]
            index_names = list(index.names)
            if index_names[0] is None:
                index_names = ['index']
                log.info("Index has no name, defaulting to 'index'")
            if isinstance(index, DatetimeIndex) and index.tz is not None:
                index_tz = get_timezone(index.tz)

        if index_tz is not None:
            metadata['index_tz'] = index_tz
        metadata['index'] = index_names

        return index_names, ix_vals, metadata 
Example 5
Project: arctic   Author: man-group   File: numpy_records.py    GNU Lesser General Public License v2.1 6 votes vote down vote up
def _index_from_records(self, recarr):
        index = recarr.dtype.metadata['index']

        if len(index) == 1:
            rtn = Index(np.copy(recarr[str(index[0])]), name=index[0])
            if isinstance(rtn, DatetimeIndex) and 'index_tz' in recarr.dtype.metadata:
                rtn = rtn.tz_localize('UTC').tz_convert(recarr.dtype.metadata['index_tz'])
        else:
            level_arrays = []
            index_tz = recarr.dtype.metadata.get('index_tz', [])
            for level_no, index_name in enumerate(index):
                # build each index level separately to ensure we end up with the right index dtype
                level = Index(np.copy(recarr[str(index_name)]))
                if level_no < len(index_tz):
                    tz = index_tz[level_no]
                    if tz is not None:
                        if not isinstance(level, DatetimeIndex) and len(level) == 0:
                            # index type information got lost during save as the index was empty, cast back
                            level = DatetimeIndex([], tz=tz)
                        else:
                            level = level.tz_localize('UTC').tz_convert(tz)
                level_arrays.append(level)
            rtn = MultiIndex.from_arrays(level_arrays, names=index)
        return rtn 
Example 6
Project: arctic   Author: man-group   File: date_chunker.py    GNU Lesser General Public License v2.1 6 votes vote down vote up
def to_mongo(self, range_obj):
        """
        takes the range object used for this chunker type
        and converts it into a string that can be use for a
        mongo query that filters by the range

        returns
        -------
        dict
        """
        if isinstance(range_obj, (pd.DatetimeIndex, tuple)):
            range_obj = DateRange(range_obj[0], range_obj[-1])
        if range_obj.start and range_obj.end:
            return {'$and': [{START: {'$lte': range_obj.end}}, {END: {'$gte': range_obj.start}}]}
        elif range_obj.start:
            return {END: {'$gte': range_obj.start}}
        elif range_obj.end:
            return {START: {'$lte': range_obj.end}}
        else:
            return {} 
Example 7
Project: arctic   Author: man-group   File: date_chunker.py    GNU Lesser General Public License v2.1 6 votes vote down vote up
def exclude(self, data, range_obj):
        """
        Removes data within the bounds of the range object (inclusive)

        returns
        -------
        data, filtered by range_obj
        """
        if isinstance(range_obj, (pd.DatetimeIndex, tuple)):
            range_obj = DateRange(range_obj[0], range_obj[-1])
        if 'date' in data.index.names:
            return data[(data.index.get_level_values('date') < range_obj.start) | (data.index.get_level_values('date') > range_obj.end)]
        elif 'date' in data.columns:
            return data[(data.date < range_obj.start) | (data.date > range_obj.end)]
        else:
            return data 
Example 8
Project: arctic   Author: man-group   File: test_fixes.py    GNU Lesser General Public License v2.1 6 votes vote down vote up
def test_missing_cols(chunkstore_lib):
    index = DatetimeIndex(pd.date_range('2019-01-01', periods=3, freq='D'), name='date')
    index2 = DatetimeIndex(pd.date_range('2019-01-04', periods=3, freq='D'), name='date')
    expected_index = DatetimeIndex(pd.date_range('2019-01-01', periods=6, freq='D'), name='date')
    expected_df = DataFrame({'A': [1, 2, 3, 40, 50, 60], 'B': [5.0,6.0,7.0, np.nan, np.nan, np.nan]}, index=expected_index)

    df = pd.DataFrame({'A': [1, 2, 3], 'B': [5,6,7]}, index=index)
    chunkstore_lib.write('test', df, chunk_size='D')

    df = pd.DataFrame({'A': [40, 50, 60]}, index=index2)
    chunkstore_lib.append('test', df, chunk_size='D')


    assert_frame_equal(chunkstore_lib.read('test'), expected_df)
    df = chunkstore_lib.read('test', columns=['B'])
    assert_frame_equal(df, expected_df['B'].to_frame()) 
Example 9
Project: arctic   Author: man-group   File: test_pandas_store.py    GNU Lesser General Public License v2.1 6 votes vote down vote up
def test_dataframe_append_should_promote_string_column(library):
    data = np.zeros((2,), dtype=[('A', 'i4'), ('B', 'f4'), ('C', 'a10')])
    data[:] = [(1, 2., 'Hello'), (2, 3., "World")]
    df = DataFrame(data, index=DatetimeIndex(np.array([dt(2013, 1, 1),
                                                       dt(2013, 1, 2)]).astype('datetime64[ns]'), name=[u'DATETIME']))
    data2 = np.zeros((1,), dtype=[('A', 'i4'), ('B', 'f4'), ('C', 'a30')])
    data2[:] = [(3, 4., 'Hello World - Good Morning')]
    df2 = DataFrame(data2, index=DatetimeIndex(np.array([dt(2013, 1, 3)]).astype('datetime64[ns]'), name=[u'DATETIME']))
    expected_data = np.zeros((3,), dtype=[('A', 'i4'), ('B', 'f4'), ('C', 'a30')])
    expected_data[:] = [(1, 2., 'Hello'), (2, 3., "World"), (3, 4., 'Hello World - Good Morning')]
    expected = DataFrame(expected_data, index=DatetimeIndex(np.array([dt(2013, 1, 1),
                                                                       dt(2013, 1, 2),
                                                                       dt(2013, 1, 3)]).astype('datetime64[ns]'), name=[u'DATETIME']))

    library.write('pandas', df)
    library.append('pandas', df2)
    actual = library.read('pandas').data

    assert_frame_equal(expected, actual) 
Example 10
Project: arctic   Author: man-group   File: test_pandas_store.py    GNU Lesser General Public License v2.1 6 votes vote down vote up
def test_dataframe_append_should_add_new_columns_and_reorder(library):
    data = np.zeros((2,), dtype=[('A', 'i4'), ('B', 'f4'), ('C', 'a10')])
    data[:] = [(1, 2., 'Hello'), (2, 3., "World")]
    df = DataFrame(data, index=DatetimeIndex(np.array([dt(2013, 1, 1),
                                                       dt(2013, 1, 2)]).astype('datetime64[ns]'), name=[u'DATETIME']))
    data2 = np.zeros((1,), dtype=[('C', 'a10'), ('A', 'i4'), ('E', 'a1'), ('B', 'f4'), ('D', 'f4'), ('F', 'i4')])
    data2[:] = [('Hi', 4, 'Y', 5., 6., 7)]
    df2 = DataFrame(data2, index=DatetimeIndex(np.array([dt(2013, 1, 3)]).astype('datetime64[ns]'), name=[u'DATETIME']))
    expected_data = np.zeros((3,), dtype=[('C', 'a10'), ('A', 'i4'), ('E', 'a1'),
                                          ('B', 'f4'), ('D', 'f4'), ('F', 'i4')])
    expected_data[:] = [('Hello', 1, '', 2., np.nan, 0), ("World", 2, '', 3., np.nan, 0), ('Hi', 4, 'Y', 5., 6., 7)]
    expected = DataFrame(expected_data, index=DatetimeIndex(np.array([dt(2013, 1, 1),
                                                                       dt(2013, 1, 2),
                                                                       dt(2013, 1, 3)]).astype('datetime64[ns]'), name=[u'DATETIME']))

    library.write('pandas', df)
    library.append('pandas', df2)
    actual = library.read('pandas').data

    assert_frame_equal(expected, actual)


# -- auto generated tests --- # 
Example 11
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_base.py    MIT License 6 votes vote down vote up
def test_constructor_from_index_dtlike(self, cast_as_obj, index):
        if cast_as_obj:
            result = pd.Index(index.astype(object))
        else:
            result = pd.Index(index)

        tm.assert_index_equal(result, index)

        if isinstance(index, pd.DatetimeIndex):
            assert result.tz == index.tz
            if cast_as_obj:
                # GH#23524 check that Index(dti, dtype=object) does not
                #  incorrectly raise ValueError, and that nanoseconds are not
                #  dropped
                index += pd.Timedelta(nanoseconds=50)
                result = pd.Index(index, dtype=object)
                assert result.dtype == np.object_
                assert list(result) == list(index) 
Example 12
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_base.py    MIT License 6 votes vote down vote up
def test_constructor_from_frame_series_freq(self):
        # GH 6273
        # create from a series, passing a freq
        dts = ["1-1-1990", "2-1-1990", "3-1-1990", "4-1-1990", "5-1-1990"]
        expected = DatetimeIndex(dts, freq="MS")

        df = pd.DataFrame(np.random.rand(5, 3))
        df["date"] = dts
        result = DatetimeIndex(df["date"], freq="MS")

        assert df["date"].dtype == object
        expected.name = "date"
        tm.assert_index_equal(result, expected)

        expected = pd.Series(dts, name="date")
        tm.assert_series_equal(df["date"], expected)

        # GH 6274
        # infer freq of same
        freq = pd.infer_freq(df["date"])
        assert freq == "MS" 
Example 13
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_setops.py    MIT License 6 votes vote down vote up
def test_union_bug_1745(self, sort):
        left = DatetimeIndex(["2012-05-11 15:19:49.695000"])
        right = DatetimeIndex(
            [
                "2012-05-29 13:04:21.322000",
                "2012-05-11 15:27:24.873000",
                "2012-05-11 15:31:05.350000",
            ]
        )

        result = left.union(right, sort=sort)
        exp = DatetimeIndex(
            [
                "2012-05-11 15:19:49.695000",
                "2012-05-29 13:04:21.322000",
                "2012-05-11 15:27:24.873000",
                "2012-05-11 15:31:05.350000",
            ]
        )
        if sort is None:
            exp = exp.sort_values()
        tm.assert_index_equal(result, exp) 
Example 14
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_setops.py    MIT License 6 votes vote down vote up
def test_difference(self, tz, sort):
        rng_dates = ["1/2/2000", "1/3/2000", "1/1/2000", "1/4/2000", "1/5/2000"]

        rng1 = pd.DatetimeIndex(rng_dates, tz=tz)
        other1 = pd.date_range("1/6/2000", freq="D", periods=5, tz=tz)
        expected1 = pd.DatetimeIndex(rng_dates, tz=tz)

        rng2 = pd.DatetimeIndex(rng_dates, tz=tz)
        other2 = pd.date_range("1/4/2000", freq="D", periods=5, tz=tz)
        expected2 = pd.DatetimeIndex(rng_dates[:3], tz=tz)

        rng3 = pd.DatetimeIndex(rng_dates, tz=tz)
        other3 = pd.DatetimeIndex([], tz=tz)
        expected3 = pd.DatetimeIndex(rng_dates, tz=tz)

        for rng, other, expected in [
            (rng1, other1, expected1),
            (rng2, other2, expected2),
            (rng3, other3, expected3),
        ]:
            result_diff = rng.difference(other, sort)
            if sort is None:
                expected = expected.sort_values()
            tm.assert_index_equal(result_diff, expected) 
Example 15
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_setops.py    MIT License 6 votes vote down vote up
def test_union_not_cacheable(self, sort):
        rng = date_range("1/1/2000", periods=50, freq=Minute())
        rng1 = rng[10:]
        rng2 = rng[:25]
        the_union = rng1.union(rng2, sort=sort)
        if sort is None:
            tm.assert_index_equal(the_union, rng)
        else:
            expected = pd.DatetimeIndex(list(rng[10:]) + list(rng[:10]))
            tm.assert_index_equal(the_union, expected)

        rng1 = rng[10:]
        rng2 = rng[15:35]
        the_union = rng1.union(rng2, sort=sort)
        expected = rng[10:]
        tm.assert_index_equal(the_union, expected) 
Example 16
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_setops.py    MIT License 6 votes vote down vote up
def test_intersection(self):
        rng = date_range("1/1/2000", periods=50, freq=Minute())
        rng1 = rng[10:]
        rng2 = rng[:25]
        the_int = rng1.intersection(rng2)
        expected = rng[10:25]
        tm.assert_index_equal(the_int, expected)
        assert isinstance(the_int, DatetimeIndex)
        assert the_int.freq == rng.freq

        the_int = rng1.intersection(rng2.view(DatetimeIndex))
        tm.assert_index_equal(the_int, expected)

        # non-overlapping
        the_int = rng[:10].intersection(rng[10:])
        expected = DatetimeIndex([])
        tm.assert_index_equal(the_int, expected) 
Example 17
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_indexing.py    MIT License 6 votes vote down vote up
def test_dti_business_getitem(self):
        rng = pd.bdate_range(START, END)
        smaller = rng[:5]
        exp = DatetimeIndex(rng.view(np.ndarray)[:5])
        tm.assert_index_equal(smaller, exp)

        assert smaller.freq == rng.freq

        sliced = rng[::5]
        assert sliced.freq == BDay() * 5

        fancy_indexed = rng[[4, 3, 2, 1, 0]]
        assert len(fancy_indexed) == 5
        assert isinstance(fancy_indexed, DatetimeIndex)
        assert fancy_indexed.freq is None

        # 32-bit vs. 64-bit platforms
        assert rng[4] == rng[np.int_(4)] 
Example 18
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_indexing.py    MIT License 6 votes vote down vote up
def test_dti_custom_getitem(self):
        rng = pd.bdate_range(START, END, freq="C")
        smaller = rng[:5]
        exp = DatetimeIndex(rng.view(np.ndarray)[:5])
        tm.assert_index_equal(smaller, exp)
        assert smaller.freq == rng.freq

        sliced = rng[::5]
        assert sliced.freq == CDay() * 5

        fancy_indexed = rng[[4, 3, 2, 1, 0]]
        assert len(fancy_indexed) == 5
        assert isinstance(fancy_indexed, DatetimeIndex)
        assert fancy_indexed.freq is None

        # 32-bit vs. 64-bit platforms
        assert rng[4] == rng[np.int_(4)] 
Example 19
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_construction.py    MIT License 6 votes vote down vote up
def test_categorical_preserves_tz(self):
        # GH#18664 retain tz when going DTI-->Categorical-->DTI
        # TODO: parametrize over DatetimeIndex/DatetimeArray
        #  once CategoricalIndex(DTA) works

        dti = pd.DatetimeIndex(
            [pd.NaT, "2015-01-01", "1999-04-06 15:14:13", "2015-01-01"], tz="US/Eastern"
        )

        ci = pd.CategoricalIndex(dti)
        carr = pd.Categorical(dti)
        cser = pd.Series(ci)

        for obj in [ci, carr, cser]:
            result = pd.DatetimeIndex(obj)
            tm.assert_index_equal(result, dti) 
Example 20
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_construction.py    MIT License 6 votes vote down vote up
def test_dti_with_timedelta64_data_deprecation(self):
        # GH#23675
        data = np.array([0], dtype="m8[ns]")
        with tm.assert_produces_warning(FutureWarning):
            result = DatetimeIndex(data)

        assert result[0] == Timestamp("1970-01-01")

        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            result = to_datetime(data)

        assert result[0] == Timestamp("1970-01-01")

        with tm.assert_produces_warning(FutureWarning):
            result = DatetimeIndex(pd.TimedeltaIndex(data))

        assert result[0] == Timestamp("1970-01-01")

        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            result = to_datetime(pd.TimedeltaIndex(data))

        assert result[0] == Timestamp("1970-01-01") 
Example 21
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_construction.py    MIT License 6 votes vote down vote up
def test_construction_with_alt_tz_localize(self, kwargs, tz_aware_fixture):
        tz = tz_aware_fixture
        i = pd.date_range("20130101", periods=5, freq="H", tz=tz)
        kwargs = {key: attrgetter(val)(i) for key, val in kwargs.items()}

        if str(tz) in ("UTC", "tzutc()", "UTC+00:00"):
            warn = None
        else:
            warn = FutureWarning

        with tm.assert_produces_warning(warn, check_stacklevel=False):
            result = DatetimeIndex(i.tz_localize(None).asi8, **kwargs)
        expected = DatetimeIndex(i, **kwargs)
        tm.assert_index_equal(result, expected)

        # localize into the provided tz
        i2 = DatetimeIndex(i.tz_localize(None).asi8, tz="UTC")
        expected = i.tz_localize(None).tz_localize("UTC")
        tm.assert_index_equal(i2, expected)

        # incompat tz/dtype
        msg = "cannot supply both a tz and a dtype with a tz"
        with pytest.raises(ValueError, match=msg):
            DatetimeIndex(i.tz_localize(None).asi8, dtype=i.dtype, tz="US/Pacific") 
Example 22
Project: aospy   Author: spencerahill   File: times.py    Apache License 2.0 5 votes vote down vote up
def apply_time_offset(time, years=0, months=0, days=0, hours=0):
    """Apply a specified offset to the given time array.

    This is useful for GFDL model output of instantaneous values.  For example,
    3 hourly data postprocessed to netCDF files spanning 1 year each will
    actually have time values that are offset by 3 hours, such that the first
    value is for 1 Jan 03:00 and the last value is 1 Jan 00:00 of the
    subsequent year.  This causes problems in xarray, e.g. when trying to group
    by month.  It is resolved by manually subtracting off those three hours,
    such that the dates span from 1 Jan 00:00 to 31 Dec 21:00 as desired.

    Parameters
    ----------
    time : xarray.DataArray representing a timeseries
    years, months, days, hours : int, optional
        The number of years, months, days, and hours, respectively, to offset
        the time array by.  Positive values move the times later.

    Returns
    -------
    pandas.DatetimeIndex

    Examples
    --------
    Case of a length-1 input time array:

    >>> times = xr.DataArray(datetime.datetime(1899, 12, 31, 21))
    >>> apply_time_offset(times)
    Timestamp('1900-01-01 00:00:00')

    Case of input time array with length greater than one:

    >>> times = xr.DataArray([datetime.datetime(1899, 12, 31, 21),
    ...                       datetime.datetime(1899, 1, 31, 21)])
    >>> apply_time_offset(times) # doctest: +NORMALIZE_WHITESPACE
    DatetimeIndex(['1900-01-01', '1899-02-01'], dtype='datetime64[ns]',
                  freq=None)
    """
    return (pd.to_datetime(time.values) +
            pd.DateOffset(years=years, months=months, days=days, hours=hours)) 
Example 23
Project: automlk   Author: pierre-chaville   File: preprocessing.py    MIT License 5 votes vote down vote up
def transform(self, X):
        for col in self.date_cols:
            # if raw type is not a date
            if str(X[col].dtype) == 'object':
                X[col] = pd.DatetimeIndex(X[col])
            # add date features
            X[col + '__year'] = X[col].map(lambda x: x.year)
            X[col + '__month'] = X[col].map(lambda x: x.month)
            X[col + '__day'] = X[col].map(lambda x: x.day)
            X[col + '__dayofweek'] = X[col].map(lambda x: x.dayofweek)
            X[col + '__hour'] = X[col].map(lambda x: x.hour)
            X[col + '__second'] = X[col].map(lambda x: x.second)
            # remove initial column
            X.drop(col, axis=1, inplace=True)
        return X 
Example 24
Project: automlk   Author: pierre-chaville   File: preprocessing.py    MIT License 5 votes vote down vote up
def transform(self, X):
        for col in self.date_cols:
            # if raw type is not a date
            if str(X[col].dtype) == 'object':
                X[col] = pd.DatetimeIndex(X[col])
            # add date features
            X[col + '__year'] = X[col].map(lambda x: x.year)
            X[col + '__month'] = X[col].map(lambda x: x.month)
            X[col + '__day'] = X[col].map(lambda x: x.day)
            X[col + '__dayofweek'] = X[col].map(lambda x: x.dayofweek)
            # remove initial column
            X.drop(col, axis=1, inplace=True)
        return X 
Example 25
Project: automlk   Author: pierre-chaville   File: preprocessing.py    MIT License 5 votes vote down vote up
def transform(self, X):
        for col in self.date_cols:
            # if raw type is not a date
            if str(X[col].dtype) == 'object':
                X[col] = pd.DatetimeIndex(X[col])
            # add date features
            X[col + '__month'] = X[col].map(lambda x: x.month)
            X[col + '__day'] = X[col].map(lambda x: x.day)
            X[col + '__dayofweek'] = X[col].map(lambda x: x.dayofweek)
            # remove initial column
            X.drop(col, axis=1, inplace=True)
        return X 
Example 26
Project: performance_tracker   Author: metro-ontime   File: estimate_arrivals.py    GNU General Public License v3.0 5 votes vote down vote up
def estimate_arrivals(trip_id, trip, stations, direction):
    trip.loc[:, "estimate"] = False
    stations.loc[:, "estimate"] = True
    trip_est = stations
    trip_est.loc[:, "trip_id"] = trip_id
    trip_est.loc[:, "direction_id"] = direction
    combined = trip.append(trip_est)
    combined = combined.sort_values("relative_position")
    combined = combined.reset_index(drop=True)
    # shift vals to move adjacent position and date data into each row
    combined.loc[:, "previous_pos"] = combined.relative_position.shift()
    combined.loc[:, "next_pos"] = combined.relative_position.shift(-1)
    combined.loc[:, "previous_dt"] = combined.datetime.shift()
    combined.loc[:, "next_dt"] = combined.datetime.shift(-1)
    select = combined[combined["estimate"] == True]
    select.loc[:, "weight"] = (select.relative_position - select.previous_pos) / (
        select.next_pos - select.previous_pos
    )
    select.loc[:, "time_interpolation"] = (
        select.next_dt - select.previous_dt
    ) * select.weight
    select.loc[:, "datetime"] = select.previous_dt + select.time_interpolation
    select.loc[:, "datetime"] = pd.DatetimeIndex(select.datetime).round("S")
    select.loc[:, "stop_id"] = pd.to_numeric(select.stop_id, downcast="integer")
    # Some station estimates cannot be reliably estimated using this
    # technique and will have datetime = NaT, so we remove them.
    select = select.dropna(subset=["datetime"])
    return select 
Example 27
Project: performance_tracker   Author: metro-ontime   File: analyze_estimates.py    GNU General Public License v3.0 5 votes vote down vote up
def get_previous_stop_times(stop_id, stop_estimates):
    stop_estimates = stop_estimates.set_index(
        pd.DatetimeIndex(stop_estimates["datetime"])
    ).sort_index()
    stop_estimates.loc[:, "prev_stop_time"] = stop_estimates["datetime"].shift()
    return stop_estimates 
Example 28
Project: influx-sansio   Author: miracle2k   File: serialization.py    MIT License 5 votes vote down vote up
def make_df(resp) -> Union[bool, pd.DataFrame, Dict[str, pd.DataFrame]]:
    """Makes list of DataFrames from a response object"""

    def maker(series) -> pd.DataFrame:
        df = pd.DataFrame(series['values'], columns=series['columns'])
        if 'time' not in df.columns:
            return df
        df: pd.DataFrame = df.set_index(pd.to_datetime(df['time'])).drop('time', axis=1)
        df.index = df.index.tz_localize('UTC')
        df.index.name = None
        if 'tags' in series:
            for k, v in series['tags'].items():
                df[k] = v
        if 'name' in series:
            df.name = series['name']
        return df

    def drop_zero_index(df):
        if isinstance(df.index, pd.DatetimeIndex):
            if all(i.value == 0 for i in df.index):
                df.reset_index(drop=True, inplace=True)

    df_list = [(series['name'], maker(series))
               for statement in resp['results'] if 'series' in statement
               for series in statement['series']]
    if len(df_list) == 1:
        drop_zero_index(df_list[0][1])
        return df_list[0][1]
    else:
        d = defaultdict(list)
        for k, df in sorted(df_list, key=lambda x: x[0]):
            d[k].append(df)
        dfs = {k: pd.concat(v, axis=0) for k, v in d.items()}
        for df in dfs.values():
            drop_zero_index(df)
        return dfs 
Example 29
Project: influx-sansio   Author: miracle2k   File: serialization.py    MIT License 5 votes vote down vote up
def parse_df(df, measurement, tag_columns=None, **extra_tags):
    """Converts a Pandas DataFrame into line protocol format"""
    # Calling t._asdict is more straightforward
    # but about 40% slower than using indexes
    def parser(df):
        for t in df.itertuples():
            tags = dict()
            fields = dict()
            # noinspection PyProtectedMember
            for i, k in enumerate(t._fields):
                if i in tag_indexes:
                    tags[k] = t[i]
                elif i == 0:
                    continue
                else:
                    fields[k] = t[i]
            tags.update(extra_tags)
            yield dict(measurement=measurement,
                       time=t[0],
                       tags=tags,
                       fields=fields)

    # Make a copy because modifications are made to the dataframe before insertion
    df = df.copy()
    if not isinstance(df.index, pd.DatetimeIndex):
        raise ValueError('DataFrame index is not DatetimeIndex')
    for key, value in extra_tags.items():
        df[key] = value
    if tag_columns:
        tag_indexes = [df.columns.get_loc(tag) + 1 for tag in tag_columns + list(extra_tags)]
    else:
        tag_indexes = list()
    lines = [make_line(p) for p in parser(df)]
    return b'\n'.join(lines) 
Example 30
Project: arctic   Author: man-group   File: test_ts_read.py    GNU Lesser General Public License v2.1 5 votes vote down vote up
def test_read_all_cols_all_dtypes(tickstore_lib, chunk_size):
    data = [{'f': 0.1,
            'of': 0.2,
            's': 's',
            'os': 'os',
            'l': 1,
            'ol': 2,
            'index': dt(1970, 1, 1, tzinfo=mktz('UTC')),
            },
            {'f': 0.3,
            'nf': 0.4,
            's': 't',
            'ns': 'ns',
            'l': 3,
            'nl': 4,
            'index': dt(1970, 1, 1, 0, 0, 1, tzinfo=mktz('UTC')),
            },
            ]
    tickstore_lib._chunk_size = chunk_size
    tickstore_lib.write('sym', data)
    df = tickstore_lib.read('sym', columns=None)

    assert df.index.tzinfo == mktz()

    # The below is probably more trouble than it's worth, but we *should*
    # be able to roundtrip data and get the same answer...

    # Ints become floats
    data[0]['l'] = float(data[0]['l'])
    # Treat missing strings as None
    data[0]['ns'] = None
    data[1]['os'] = None
    index = DatetimeIndex([dt(1970, 1, 1, tzinfo=mktz('UTC')),
                         dt(1970, 1, 1, 0, 0, 1, tzinfo=mktz('UTC'))],
                        )
    df.index = df.index.tz_convert(mktz('UTC'))
    expected = pd.DataFrame(data, index=index)
    expected = expected[df.columns]
    assert_frame_equal(expected, df, check_names=False) 
Example 31
Project: arctic   Author: man-group   File: test_fixes.py    GNU Lesser General Public License v2.1 5 votes vote down vote up
def test_write_dataframe(chunkstore_lib):
    # Create dataframe of time measurements taken every 6 hours
    date_range = pd.date_range(start=dt(2017, 5, 1, 1), periods=8, freq='6H')

    df = DataFrame(data={'something': [100, 200, 300, 400, 500, 600, 700, 800]},
                   index=DatetimeIndex(date_range, name='date'))

    chunkstore_lib.write('test', df, chunk_size='D')

    # Iterate
    for chunk in chunkstore_lib.iterator('test'):
        assert(len(chunk) > 0) 
Example 32
Project: arctic   Author: man-group   File: test_fixes.py    GNU Lesser General Public License v2.1 5 votes vote down vote up
def test_date_interval(chunkstore_lib):
    date_range = pd.date_range(start=dt(2017, 5, 1), periods=8, freq='D')

    df = DataFrame(data={'data': range(8)},
                   index=DatetimeIndex(date_range, name='date'))

    # test with index
    chunkstore_lib.write('test', df, chunk_size='D')

    ret = chunkstore_lib.read('test', chunk_range=DateRange(dt(2017, 5, 2), dt(2017, 5, 5), CLOSED_OPEN))
    assert_frame_equal(ret, df[1:4])
    ret = chunkstore_lib.read('test', chunk_range=DateRange(dt(2017, 5, 2), dt(2017, 5, 5), OPEN_OPEN))
    assert_frame_equal(ret, df[2:4])
    ret = chunkstore_lib.read('test', chunk_range=DateRange(dt(2017, 5, 2), dt(2017, 5, 5), OPEN_CLOSED))
    assert_frame_equal(ret, df[2:5])
    ret = chunkstore_lib.read('test', chunk_range=DateRange(dt(2017, 5, 2), dt(2017, 5, 5), CLOSED_CLOSED))
    assert_frame_equal(ret, df[1:5])
    ret = chunkstore_lib.read('test', chunk_range=DateRange(dt(2017, 5, 2), None, CLOSED_OPEN))
    assert_frame_equal(ret, df[1:8])

    # test without index
    df = DataFrame(data={'data': range(8),
                         'date': date_range})

    chunkstore_lib.write('test2', df, chunk_size='D')

    ret = chunkstore_lib.read('test2', chunk_range=DateRange(dt(2017, 5, 2), dt(2017, 5, 5), CLOSED_OPEN))
    assert(len(ret) == 3)
    ret = chunkstore_lib.read('test2', chunk_range=DateRange(dt(2017, 5, 2), dt(2017, 5, 5), OPEN_OPEN))
    assert(len(ret) == 2)
    ret = chunkstore_lib.read('test2', chunk_range=DateRange(dt(2017, 5, 2), dt(2017, 5, 5), OPEN_CLOSED))
    assert(len(ret) == 3)
    ret = chunkstore_lib.read('test2', chunk_range=DateRange(dt(2017, 5, 2), dt(2017, 5, 5), CLOSED_CLOSED))
    assert(len(ret) == 4)
    ret = chunkstore_lib.read('test2', chunk_range=DateRange(dt(2017, 5, 2), None, CLOSED_OPEN))
    assert(len(ret) == 7) 
Example 33
Project: arctic   Author: man-group   File: test_fixes.py    GNU Lesser General Public License v2.1 5 votes vote down vote up
def test_column_copy(chunkstore_lib):
    index = DatetimeIndex(pd.date_range('2019-01-01', periods=3, freq='D'), name='date')

    df = pd.DataFrame({'A': [1, 2, 3], 'B': [5,6,7]}, index=index)
    cols = ['A']
    chunkstore_lib.write('test', df)
    chunkstore_lib.read('test', columns=cols)
    assert cols == ['A'] 
Example 34
Project: arctic   Author: man-group   File: test_pandas_store.py    GNU Lesser General Public License v2.1 5 votes vote down vote up
def test_save_read_pandas_series_with_datetimeindex_with_timezone(library):
    df = Series(data=['A', 'BC', 'DEF'], index=DatetimeIndex(np.array([dt(2013, 1, 1),
                                                                       dt(2013, 1, 2),
                                                                       dt(2013, 1, 3)]).astype('datetime64[ns]'),
                                                                tz="America/Chicago"))
    library.write('pandas', df)
    saved_df = library.read('pandas').data
    assert df.index.tz == saved_df.index.tz
    assert all(df.index == saved_df.index) 
Example 35
Project: arctic   Author: man-group   File: test_pandas_store.py    GNU Lesser General Public License v2.1 5 votes vote down vote up
def test_save_read_pandas_dataframe_with_datetimeindex_with_timezone(library):
    df = DataFrame(data=['A', 'BC', 'DEF'], index=DatetimeIndex(np.array([dt(2013, 1, 1),
                                                                       dt(2013, 1, 2),
                                                                       dt(2013, 1, 3)]).astype('datetime64[ns]'),
                                                                tz="America/Chicago"))
    library.write('pandas', df)
    saved_df = library.read('pandas').data
    assert df.index.tz == saved_df.index.tz
    assert all(df.index == saved_df.index) 
Example 36
Project: arctic   Author: man-group   File: test_pandas_store.py    GNU Lesser General Public License v2.1 5 votes vote down vote up
def test_save_read_pandas_empty_series_with_datetime_multiindex_with_timezone(library):
    empty_index = pd.MultiIndex(levels=(pd.DatetimeIndex([], tz="America/Chicago"), pd.Index([])), labels=([], []))
    df = Series(data=[], index=empty_index)
    library.write('pandas', df)
    saved_df = library.read('pandas').data
    assert empty_index.equal_levels(saved_df.index), "Index timezone information should be maintained, even when empty" 
Example 37
Project: arctic   Author: man-group   File: test_pandas_store.py    GNU Lesser General Public License v2.1 5 votes vote down vote up
def test_save_read_pandas_dataframe_strings(library):
    df = DataFrame(data=['a', 'b', 'c'], index=DatetimeIndex(start='1/1/2011', periods=3, freq='H'))
    library.write('pandas', df)
    saved_df = library.read('pandas').data
    assert np.all(df.values == saved_df.values) 
Example 38
Project: arctic   Author: man-group   File: test_pandas_store.py    GNU Lesser General Public License v2.1 5 votes vote down vote up
def test_append_pandas_dataframe(library):
    df = DataFrame(data=[1, 2, 3], index=DatetimeIndex(start='1/1/2011', periods=3, freq='H'))
    df2 = DataFrame(data=[4, 5, 6], index=DatetimeIndex(start='2/1/2011', periods=3, freq='H'))
    library.write('pandas', df)
    library.append('pandas', df2)
    saved_df = library.read('pandas').data
    assert np.all(df.append(df2).values == saved_df.values) 
Example 39
Project: arctic   Author: man-group   File: test_pandas_store.py    GNU Lesser General Public License v2.1 5 votes vote down vote up
def test_dataframe_append_empty(library):
    df = DataFrame(data=[1, 2, 3], index=DatetimeIndex(start='1/1/2011', periods=3, freq='H'))
    df2 = DataFrame(data=[], index=[])
    library.write('pandas', df)
    library.append('pandas', df2)
    saved_df = library.read('pandas').data
    assert np.all(df.append(df2).values == saved_df.values) 
Example 40
Project: arctic   Author: man-group   File: test_pandas_store.py    GNU Lesser General Public License v2.1 5 votes vote down vote up
def test_empy_dataframe_append(library):
    df = DataFrame(data=[], index=[])
    df2 = DataFrame(data=[1, 2, 3], index=DatetimeIndex(start='1/1/2011', periods=3, freq='H'))
    library.write('pandas', df)
    library.append('pandas', df2)
    saved_df = library.read('pandas').data
    assert np.all(df.append(df2).values == saved_df.values) 
Example 41
Project: arctic   Author: man-group   File: test_multi_index.py    GNU Lesser General Public License v2.1 5 votes vote down vote up
def get_bitemporal_test_data():
    # Create an index of 8 sample dates, 2 rows per date
    sample_dates = pd.date_range('1/1/2014', periods=4, freq='D')
    sample_dates = pd.DatetimeIndex(data=sorted(itertools.chain(sample_dates, sample_dates)))

    # Create a list of insert dates. These are a year later than sample date, to show
    # that they don't necessarily have to be related
    insert_dates = pd.date_range('1/1/2015', periods=8, freq='D')

    # Build the bitemporal index
    index = pd.MultiIndex.from_arrays([sample_dates, insert_dates], names=['sample_dt', 'observed_dt'])

    # Create the dataframe with a couple of column, each value incrementing by 0.1 on the successive updates so
    # we can tell them apart
    prices = [[1.0, 10.0],
              [1.1, 10.1],
              [2.0, 20.0],
              [2.1, 20.1],
              [3.0, 30.0],
              [3.1, 30.1],
              [4.0, 40.0],
              [4.1, 40.1]]

    df = pd.DataFrame(prices, index=index, columns=['OPEN', 'CLOSE'])

    #                         OPEN  CLOSE
    # sample_dt  observed_dt
    # 2014-01-01 2015-01-01   1.0   10.0
    #            2015-01-02   1.1   10.1
    # 2014-01-02 2015-01-03   2.0   20.0
    #            2015-01-04   2.1   20.1
    # 2014-01-03 2015-01-05   3.0   30.0
    #            2015-01-06   3.1   30.1
    # 2014-01-04 2015-01-07   4.0   40.0
    #            2015-01-08   4.1   40.1
    return df 
Example 42
Project: arctic   Author: man-group   File: test_multi_index.py    GNU Lesser General Public License v2.1 5 votes vote down vote up
def get_datetime_index_test_data():
    sample_dates = pd.DatetimeIndex(4 * [dt('1/1/2014 21:30')] +
                                    4 * [dt('2/1/2014 21:30')] +
                                    4 * [dt('3/1/2014 21:30')])
    observed_dates = [dt('1/1/2014 22:00'), dt('1/1/2014 22:30'), dt('2/1/2014 00:00'), dt('1/1/2015 21:30'),
                      dt('2/1/2014 23:00'), dt('2/1/2014 23:30'), dt('3/1/2014 00:00'), dt('2/1/2015 21:30'),
                      dt('3/1/2014 21:30'), dt('3/1/2014 22:30'), dt('4/1/2014 00:00'), dt('3/1/2015 21:30'),
                      ]
    index = pd.MultiIndex.from_arrays([sample_dates, observed_dates], names=['sample_dt', 'observed_dt'])

    prices = np.arange(24).reshape(12, 2) * 10
    df = pd.DataFrame(prices, index=index, columns=['OPEN', 'CLOSE'])

    #                                          OPEN  CLOSE
    # sample_dt           observed_dt                     
    # 2014-01-01 21:30:00 2014-01-01 22:00:00     0     10
    #                     2014-01-01 22:30:00    20     30
    #                     2014-02-01 00:00:00    40     50
    #                     2015-01-01 21:30:00    60     70
    # 2014-02-01 21:30:00 2014-02-01 23:00:00    80     90
    #                     2014-02-01 23:30:00   100    110
    #                     2014-03-01 00:00:00   120    130
    #                     2015-02-01 21:30:00   140    150
    # 2014-03-01 21:30:00 2014-03-01 21:30:00   160    170
    #                     2014-03-01 22:30:00   180    190
    #                     2014-04-01 00:00:00   200    210
    #                     2015-03-01 21:30:00   220    230
    return df 
Example 43
Project: u8timeseries   Author: unit8co   File: autoregressive_model.py    Apache License 2.0 5 votes vote down vote up
def _generate_new_dates(self, n: int):
        """
        Generate n new dates after the end of the training set
        """
        new_dates = [self.training_series.time_index()[-1] + (i * self.training_series.freq()) for i in range(1, n+1)]
        return pd.DatetimeIndex(new_dates, freq=self.training_series.freq_str()) 
Example 44
Project: u8timeseries   Author: unit8co   File: timeseries.py    Apache License 2.0 5 votes vote down vote up
def __init__(self, series: pd.Series, confidence_lo: pd.Series = None, confidence_hi: pd.Series = None):
        """
        A TimeSeries is an immutable object defined by the following three components:

        :param series: The actual time series, as a pandas Series with a proper time index.
        :param confidence_lo: Optionally, a Pandas Series representing lower confidence interval.
        :param confidence_hi: Optionally, a Pandas Series representing upper confidence interval.

        Within this class, TimeSeries type annotations are 'TimeSeries'; see:
        https://stackoverflow.com/questions/15853469/putting-current-class-as-return-type-annotation
        """

        assert len(series) >= 1, 'Series must have at least one value.'
        assert isinstance(series.index, pd.DatetimeIndex), 'Series must be indexed with a DatetimeIndex.'
        assert np.issubdtype(series.dtype, np.number), 'Series must contain numerical values.'

        self._series: pd.Series = series.sort_index()  # Sort by time
        self._freq: str = self._series.index.inferred_freq  # Infer frequency

        # TODO: optionally fill holes (including missing dates) - for now we assume no missing dates
        assert self._freq is not None, 'Could not infer frequency. Are some dates missing? Is Series too short (n=2)?'

        # TODO: are there some pandas Series where the line below causes issues?
        self._series.index.freq = self._freq  # Set the inferred frequency in the Pandas series

        # Handle confidence intervals:
        self._confidence_lo = None
        self._confidence_hi = None
        if confidence_lo is not None:
            self._confidence_lo = confidence_lo.sort_index()
            assert len(self._confidence_lo) == len(self._series), 'Lower confidence interval must have same size as ' \
                                                                  'the main time series.'
            assert (self._confidence_lo.index == self._series.index).all(), 'Lower confidence interval and main ' \
                                                                            'series must have the same time index.'
        if confidence_hi is not None:
            self._confidence_hi = confidence_hi.sort_index()
            assert len(self._confidence_hi) == len(self._series), 'Upper confidence interval must have same size as ' \
                                                                  'the main time series.'
            assert (self._confidence_hi.index == self._series.index).all(), 'Upper confidence interval and main ' \
                                                                            'series must have the same time index.' 
Example 45
Project: u8timeseries   Author: unit8co   File: timeseries.py    Apache License 2.0 5 votes vote down vote up
def time_index(self) -> pd.DatetimeIndex:
        """
        Returns the index of the TimeSeries.

        :return: A DatetimeIndex containing the index of the TimeSeries.
        """
        return self._series.index 
Example 46
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_datetimes.py    MIT License 5 votes vote down vote up
def test_cmp_dt64_arraylike_tznaive(self, all_compare_operators):
        # arbitrary tz-naive DatetimeIndex
        opname = all_compare_operators.strip("_")
        op = getattr(operator, opname)

        dti = pd.date_range("2016-01-1", freq="MS", periods=9, tz=None)
        arr = DatetimeArray(dti)
        assert arr.freq == dti.freq
        assert arr.tz == dti.tz

        right = dti

        expected = np.ones(len(arr), dtype=bool)
        if opname in ["ne", "gt", "lt"]:
            # for these the comparisons should be all-False
            expected = ~expected

        result = op(arr, arr)
        tm.assert_numpy_array_equal(result, expected)
        for other in [right, np.array(right)]:
            # TODO: add list and tuple, and object-dtype once those
            #  are fixed in the constructor
            result = op(arr, other)
            tm.assert_numpy_array_equal(result, expected)

            result = op(other, arr)
            tm.assert_numpy_array_equal(result, expected) 
Example 47
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_datetimelike.py    MIT License 5 votes vote down vote up
def datetime_index(request):
    """
    A fixture to provide DatetimeIndex objects with different frequencies.

    Most DatetimeArray behavior is already tested in DatetimeIndex tests,
    so here we just test that the DatetimeArray behavior matches
    the DatetimeIndex behavior.
    """
    freqstr = request.param
    # TODO: non-monotone indexes; NaTs, different start dates, timezones
    pi = pd.date_range(start=pd.Timestamp("2000-01-01"), periods=100, freq=freqstr)
    return pi 
Example 48
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_datetimelike.py    MIT License 5 votes vote down vote up
def test_array_object_dtype(self, tz_naive_fixture):
        # GH#23524
        tz = tz_naive_fixture
        dti = pd.date_range("2016-01-01", periods=3, tz=tz)
        arr = DatetimeArray(dti)

        expected = np.array(list(dti))

        result = np.array(arr, dtype=object)
        tm.assert_numpy_array_equal(result, expected)

        # also test the DatetimeIndex method while we're at it
        result = np.array(dti, dtype=object)
        tm.assert_numpy_array_equal(result, expected) 
Example 49
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_datetimelike.py    MIT License 5 votes vote down vote up
def test_from_dti(self, tz_naive_fixture):
        tz = tz_naive_fixture
        dti = pd.date_range("2016-01-01", periods=3, tz=tz)
        arr = DatetimeArray(dti)
        assert list(dti) == list(arr)

        # Check that Index.__new__ knows what to do with DatetimeArray
        dti2 = pd.Index(arr)
        assert isinstance(dti2, pd.DatetimeIndex)
        assert list(dti2) == list(arr) 
Example 50
Project: FX-RER-Value-Extraction   Author: tsKenneth   File: test_base.py    MIT License 5 votes vote down vote up
def test_constructor_from_series(self, klass):
        expected = DatetimeIndex(
            [Timestamp("20110101"), Timestamp("20120101"), Timestamp("20130101")]
        )
        s = Series(
            [Timestamp("20110101"), Timestamp("20120101"), Timestamp("20130101")]
        )
        result = klass(s)
        tm.assert_index_equal(result, expected)