Python pandas.DatetimeIndex() Examples

The following are 30 code examples for showing how to use pandas.DatetimeIndex(). These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.

You may want to check out the right sidebar which shows the related API usage.

You may also want to check out all available functions/classes of the module pandas , or try the search function .

Example 1
Project: performance_tracker   Author: metro-ontime   File: analyze_estimates.py    License: GNU General Public License v3.0 6 votes vote down vote up
def match_times(stop_id, estimates, schedule):
    # This technique finds the closest scheduled time to actual arrivals
    # This is flawed since it does not account for scheduled arrivals where
    # a train never arrived.
    # It's difficult to search the other way around however, since our estimated
    # arrival times are incomplete (see "select.dropna" in "estimate_arrivals").
    # If we search for the closest estimated arrival to each scheduled stop,
    # we will get some that are far apart because the actual train was likely associated
    # with a different scheduled stop time.
    # This way seems to be fairer on Metro, but we are open to improvements!

    # Try clause is here because there was an unexplained bug occurring on April 10 2019 with data inputs from around 1:35pm. There was an index (-1) out of range error.
    # Exact cause of the issue is still uncertain but there was a vehicle position observation out of range on the blue line at that time.
    try:
        estimates.loc[:, "closest_scheduled"] = estimates.datetime_utc.apply(
            lambda x: schedule.index[schedule.index.get_loc(x, method="nearest")]
        )
        estimates.loc[:, "closest_scheduled"] = pd.DatetimeIndex(
            estimates["closest_scheduled"]
        )
        return estimates
    except:
        return None 
Example 2
Project: performance_tracker   Author: metro-ontime   File: analyze_estimates.py    License: GNU General Public License v3.0 6 votes vote down vote up
def match_arrivals_with_schedule(estimated_trips, schedule_direction):
    schedule_direction.loc[:,"datetime_utc"] = pd.to_datetime(schedule_direction["datetime"], utc=True)
    estimated_trips.loc[:,"datetime_utc"] = pd.to_datetime(estimated_trips["datetime"], utc=True)
    schedule_direction = schedule_direction.set_index(pd.DatetimeIndex(schedule_direction["datetime_utc"])).sort_index()
    matched_estimates = [
        match_times(
            stop_id,
            stop_estimates,
            schedule_direction[schedule_direction["stop_id"] == stop_id],
        )
        for stop_id, stop_estimates in estimated_trips.groupby(["stop_id"])
    ]
    matched_estimates = [x for x in matched_estimates if x is not None]
    matched_estimates = pd.concat(matched_estimates)
    matched_estimates["since_scheduled"] = (
        matched_estimates["datetime_utc"] - matched_estimates["closest_scheduled"]
    )
    return matched_estimates 
Example 3
Project: arctic   Author: man-group   File: numpy_records.py    License: GNU Lesser General Public License v2.1 6 votes vote down vote up
def _multi_index_to_records(index, empty_index):
    # array of tuples to numpy cols. copy copy copy
    if not empty_index:
        ix_vals = list(map(np.array, [index.get_level_values(i) for i in range(index.nlevels)]))
    else:
        # empty multi index has no size, create empty arrays for recarry.
        ix_vals = [np.array([]) for n in index.names]
    index_names = list(index.names)
    count = 0
    for i, n in enumerate(index_names):
        if n is None:
            index_names[i] = 'level_%d' % count
            count += 1
            log.info("Level in MultiIndex has no name, defaulting to %s" % index_names[i])
    index_tz = [get_timezone(i.tz) if isinstance(i, DatetimeIndex) else None for i in index.levels]
    return ix_vals, index_names, index_tz 
Example 4
Project: arctic   Author: man-group   File: numpy_records.py    License: GNU Lesser General Public License v2.1 6 votes vote down vote up
def _index_to_records(self, df):
        metadata = {}
        index = df.index
        index_tz = None

        if isinstance(index, MultiIndex):
            ix_vals, index_names, index_tz = _multi_index_to_records(index, len(df) == 0)
        else:
            ix_vals = [index.values]
            index_names = list(index.names)
            if index_names[0] is None:
                index_names = ['index']
                log.info("Index has no name, defaulting to 'index'")
            if isinstance(index, DatetimeIndex) and index.tz is not None:
                index_tz = get_timezone(index.tz)

        if index_tz is not None:
            metadata['index_tz'] = index_tz
        metadata['index'] = index_names

        return index_names, ix_vals, metadata 
Example 5
Project: arctic   Author: man-group   File: numpy_records.py    License: GNU Lesser General Public License v2.1 6 votes vote down vote up
def _index_from_records(self, recarr):
        index = recarr.dtype.metadata['index']

        if len(index) == 1:
            rtn = Index(np.copy(recarr[str(index[0])]), name=index[0])
            if isinstance(rtn, DatetimeIndex) and 'index_tz' in recarr.dtype.metadata:
                rtn = rtn.tz_localize('UTC').tz_convert(recarr.dtype.metadata['index_tz'])
        else:
            level_arrays = []
            index_tz = recarr.dtype.metadata.get('index_tz', [])
            for level_no, index_name in enumerate(index):
                # build each index level separately to ensure we end up with the right index dtype
                level = Index(np.copy(recarr[str(index_name)]))
                if level_no < len(index_tz):
                    tz = index_tz[level_no]
                    if tz is not None:
                        if not isinstance(level, DatetimeIndex) and len(level) == 0:
                            # index type information got lost during save as the index was empty, cast back
                            level = DatetimeIndex([], tz=tz)
                        else:
                            level = level.tz_localize('UTC').tz_convert(tz)
                level_arrays.append(level)
            rtn = MultiIndex.from_arrays(level_arrays, names=index)
        return rtn 
Example 6
Project: arctic   Author: man-group   File: date_chunker.py    License: GNU Lesser General Public License v2.1 6 votes vote down vote up
def to_mongo(self, range_obj):
        """
        takes the range object used for this chunker type
        and converts it into a string that can be use for a
        mongo query that filters by the range

        returns
        -------
        dict
        """
        if isinstance(range_obj, (pd.DatetimeIndex, tuple)):
            range_obj = DateRange(range_obj[0], range_obj[-1])
        if range_obj.start and range_obj.end:
            return {'$and': [{START: {'$lte': range_obj.end}}, {END: {'$gte': range_obj.start}}]}
        elif range_obj.start:
            return {END: {'$gte': range_obj.start}}
        elif range_obj.end:
            return {START: {'$lte': range_obj.end}}
        else:
            return {} 
Example 7
Project: arctic   Author: man-group   File: date_chunker.py    License: GNU Lesser General Public License v2.1 6 votes vote down vote up
def exclude(self, data, range_obj):
        """
        Removes data within the bounds of the range object (inclusive)

        returns
        -------
        data, filtered by range_obj
        """
        if isinstance(range_obj, (pd.DatetimeIndex, tuple)):
            range_obj = DateRange(range_obj[0], range_obj[-1])
        if 'date' in data.index.names:
            return data[(data.index.get_level_values('date') < range_obj.start) | (data.index.get_level_values('date') > range_obj.end)]
        elif 'date' in data.columns:
            return data[(data.date < range_obj.start) | (data.date > range_obj.end)]
        else:
            return data 
Example 8
Project: arctic   Author: man-group   File: test_fixes.py    License: GNU Lesser General Public License v2.1 6 votes vote down vote up
def test_missing_cols(chunkstore_lib):
    index = DatetimeIndex(pd.date_range('2019-01-01', periods=3, freq='D'), name='date')
    index2 = DatetimeIndex(pd.date_range('2019-01-04', periods=3, freq='D'), name='date')
    expected_index = DatetimeIndex(pd.date_range('2019-01-01', periods=6, freq='D'), name='date')
    expected_df = DataFrame({'A': [1, 2, 3, 40, 50, 60], 'B': [5.0,6.0,7.0, np.nan, np.nan, np.nan]}, index=expected_index)

    df = pd.DataFrame({'A': [1, 2, 3], 'B': [5,6,7]}, index=index)
    chunkstore_lib.write('test', df, chunk_size='D')

    df = pd.DataFrame({'A': [40, 50, 60]}, index=index2)
    chunkstore_lib.append('test', df, chunk_size='D')


    assert_frame_equal(chunkstore_lib.read('test'), expected_df)
    df = chunkstore_lib.read('test', columns=['B'])
    assert_frame_equal(df, expected_df['B'].to_frame()) 
Example 9
Project: arctic   Author: man-group   File: test_pandas_store.py    License: GNU Lesser General Public License v2.1 6 votes vote down vote up
def test_dataframe_append_should_add_new_column(library):
    data = np.zeros((2,), dtype=[('A', 'i4'), ('B', 'f4'), ('C', 'a10')])
    data[:] = [(1, 2., 'Hello'), (2, 3., "World")]
    df = DataFrame(data, index=DatetimeIndex(np.array([dt(2013, 1, 1),
                                                       dt(2013, 1, 2)]).astype('datetime64[ns]'), name='DATETIME'))
    data2 = np.zeros((1,), dtype=[('A', 'i4'), ('B', 'f4'), ('C', 'a10'), ('D', 'f4')])
    data2[:] = [(4, 5., 'Hi', 6.)]
    df2 = DataFrame(data2, index=DatetimeIndex(np.array([dt(2013, 1, 3)]).astype('datetime64[ns]'), name='DATETIME'))
    expected_data = np.zeros((3,), dtype=[('A', 'i4'), ('B', 'f4'), ('C', 'a10'), ('D', 'f4')])
    expected_data[:] = [(1, 2., 'Hello', np.nan), (2, 3., "World", np.nan), (4, 5., 'Hi', 6.)]
    expected = DataFrame(expected_data, index=DatetimeIndex(np.array([dt(2013, 1, 1),
                                                                       dt(2013, 1, 2),
                                                                       dt(2013, 1, 3)]).astype('datetime64[ns]'), name='DATETIME'))

    library.write('pandas', df)
    library.append('pandas', df2)
    actual = library.read('pandas').data

    assert_frame_equal(expected, actual) 
Example 10
Project: arctic   Author: man-group   File: test_pandas_store.py    License: GNU Lesser General Public License v2.1 6 votes vote down vote up
def test_dataframe_append_should_add_new_columns_and_reorder(library):
    data = np.zeros((2,), dtype=[('A', 'i4'), ('B', 'f4'), ('C', 'a10')])
    data[:] = [(1, 2., 'Hello'), (2, 3., "World")]
    df = DataFrame(data, index=DatetimeIndex(np.array([dt(2013, 1, 1),
                                                       dt(2013, 1, 2)]).astype('datetime64[ns]'), name='DATETIME'))
    data2 = np.zeros((1,), dtype=[('C', 'a10'), ('A', 'i4'), ('E', 'a1'), ('B', 'f4'), ('D', 'f4'), ('F', 'i4')])
    data2[:] = [('Hi', 4, 'Y', 5., 6., 7)]
    df2 = DataFrame(data2, index=DatetimeIndex(np.array([dt(2013, 1, 3)]).astype('datetime64[ns]'), name='DATETIME'))
    expected_data = np.zeros((3,), dtype=[('C', 'a10'), ('A', 'i4'), ('E', 'a1'),
                                          ('B', 'f4'), ('D', 'f4'), ('F', 'i4')])
    expected_data[:] = [('Hello', 1, '', 2., np.nan, 0), ("World", 2, '', 3., np.nan, 0), ('Hi', 4, 'Y', 5., 6., 7)]
    expected = DataFrame(expected_data, index=DatetimeIndex(np.array([dt(2013, 1, 1),
                                                                       dt(2013, 1, 2),
                                                                       dt(2013, 1, 3)]).astype('datetime64[ns]'), name='DATETIME'))

    library.write('pandas', df)
    library.append('pandas', df2)
    actual = library.read('pandas').data

    assert_frame_equal(expected, actual)


# -- auto generated tests --- # 
Example 11
Project: recruit   Author: Frank-qlu   File: datetimelike.py    License: Apache License 2.0 6 votes vote down vote up
def test_map_dictlike(self, mapper):
        expected = self.index + self.index.freq

        # don't compare the freqs
        if isinstance(expected, pd.DatetimeIndex):
            expected.freq = None

        result = self.index.map(mapper(expected, self.index))
        tm.assert_index_equal(result, expected)

        expected = pd.Index([pd.NaT] + self.index[1:].tolist())
        result = self.index.map(mapper(expected, self.index))
        tm.assert_index_equal(result, expected)

        # empty map; these map to np.nan because we cannot know
        # to re-infer things
        expected = pd.Index([np.nan] * len(self.index))
        result = self.index.map(mapper([], []))
        tm.assert_index_equal(result, expected) 
Example 12
Project: recruit   Author: Frank-qlu   File: test_base.py    License: Apache License 2.0 6 votes vote down vote up
def test_constructor_from_index_dtlike(self, cast_as_obj, index):
        if cast_as_obj:
            result = pd.Index(index.astype(object))
        else:
            result = pd.Index(index)

        tm.assert_index_equal(result, index)

        if isinstance(index, pd.DatetimeIndex):
            assert result.tz == index.tz
            if cast_as_obj:
                # GH#23524 check that Index(dti, dtype=object) does not
                #  incorrectly raise ValueError, and that nanoseconds are not
                #  dropped
                index += pd.Timedelta(nanoseconds=50)
                result = pd.Index(index, dtype=object)
                assert result.dtype == np.object_
                assert list(result) == list(index) 
Example 13
Project: recruit   Author: Frank-qlu   File: test_base.py    License: Apache License 2.0 6 votes vote down vote up
def test_constructor_from_frame_series_freq(self):
        # GH 6273
        # create from a series, passing a freq
        dts = ['1-1-1990', '2-1-1990', '3-1-1990', '4-1-1990', '5-1-1990']
        expected = DatetimeIndex(dts, freq='MS')

        df = pd.DataFrame(np.random.rand(5, 3))
        df['date'] = dts
        result = DatetimeIndex(df['date'], freq='MS')

        assert df['date'].dtype == object
        expected.name = 'date'
        tm.assert_index_equal(result, expected)

        expected = pd.Series(dts, name='date')
        tm.assert_series_equal(df['date'], expected)

        # GH 6274
        # infer freq of same
        freq = pd.infer_freq(df['date'])
        assert freq == 'MS' 
Example 14
Project: recruit   Author: Frank-qlu   File: test_setops.py    License: Apache License 2.0 6 votes vote down vote up
def test_union(self, tz):
        rng1 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz)
        other1 = pd.date_range('1/6/2000', freq='D', periods=5, tz=tz)
        expected1 = pd.date_range('1/1/2000', freq='D', periods=10, tz=tz)

        rng2 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz)
        other2 = pd.date_range('1/4/2000', freq='D', periods=5, tz=tz)
        expected2 = pd.date_range('1/1/2000', freq='D', periods=8, tz=tz)

        rng3 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz)
        other3 = pd.DatetimeIndex([], tz=tz)
        expected3 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz)

        for rng, other, expected in [(rng1, other1, expected1),
                                     (rng2, other2, expected2),
                                     (rng3, other3, expected3)]:

            result_union = rng.union(other)
            tm.assert_index_equal(result_union, expected) 
Example 15
Project: recruit   Author: Frank-qlu   File: test_setops.py    License: Apache License 2.0 6 votes vote down vote up
def test_difference(self, tz, sort):
        rng_dates = ['1/2/2000', '1/3/2000', '1/1/2000', '1/4/2000',
                     '1/5/2000']

        rng1 = pd.DatetimeIndex(rng_dates, tz=tz)
        other1 = pd.date_range('1/6/2000', freq='D', periods=5, tz=tz)
        expected1 = pd.DatetimeIndex(rng_dates, tz=tz)

        rng2 = pd.DatetimeIndex(rng_dates, tz=tz)
        other2 = pd.date_range('1/4/2000', freq='D', periods=5, tz=tz)
        expected2 = pd.DatetimeIndex(rng_dates[:3], tz=tz)

        rng3 = pd.DatetimeIndex(rng_dates, tz=tz)
        other3 = pd.DatetimeIndex([], tz=tz)
        expected3 = pd.DatetimeIndex(rng_dates, tz=tz)

        for rng, other, expected in [(rng1, other1, expected1),
                                     (rng2, other2, expected2),
                                     (rng3, other3, expected3)]:
            result_diff = rng.difference(other, sort)
            if sort is None:
                expected = expected.sort_values()
            tm.assert_index_equal(result_diff, expected) 
Example 16
Project: recruit   Author: Frank-qlu   File: test_setops.py    License: Apache License 2.0 6 votes vote down vote up
def test_intersection(self):
        rng = date_range('1/1/2000', periods=50, freq=Minute())
        rng1 = rng[10:]
        rng2 = rng[:25]
        the_int = rng1.intersection(rng2)
        expected = rng[10:25]
        tm.assert_index_equal(the_int, expected)
        assert isinstance(the_int, DatetimeIndex)
        assert the_int.freq == rng.freq

        the_int = rng1.intersection(rng2.view(DatetimeIndex))
        tm.assert_index_equal(the_int, expected)

        # non-overlapping
        the_int = rng[:10].intersection(rng[10:])
        expected = DatetimeIndex([])
        tm.assert_index_equal(the_int, expected) 
Example 17
Project: recruit   Author: Frank-qlu   File: test_indexing.py    License: Apache License 2.0 6 votes vote down vote up
def test_dti_business_getitem(self):
        rng = pd.bdate_range(START, END)
        smaller = rng[:5]
        exp = DatetimeIndex(rng.view(np.ndarray)[:5])
        tm.assert_index_equal(smaller, exp)

        assert smaller.freq == rng.freq

        sliced = rng[::5]
        assert sliced.freq == BDay() * 5

        fancy_indexed = rng[[4, 3, 2, 1, 0]]
        assert len(fancy_indexed) == 5
        assert isinstance(fancy_indexed, DatetimeIndex)
        assert fancy_indexed.freq is None

        # 32-bit vs. 64-bit platforms
        assert rng[4] == rng[np.int_(4)] 
Example 18
Project: recruit   Author: Frank-qlu   File: test_indexing.py    License: Apache License 2.0 6 votes vote down vote up
def test_take2(self, tz):
        dates = [datetime(2010, 1, 1, 14), datetime(2010, 1, 1, 15),
                 datetime(2010, 1, 1, 17), datetime(2010, 1, 1, 21)]

        idx = pd.date_range(start='2010-01-01 09:00',
                            end='2010-02-01 09:00', freq='H', tz=tz,
                            name='idx')
        expected = DatetimeIndex(dates, freq=None, name='idx', tz=tz)

        taken1 = idx.take([5, 6, 8, 12])
        taken2 = idx[[5, 6, 8, 12]]

        for taken in [taken1, taken2]:
            tm.assert_index_equal(taken, expected)
            assert isinstance(taken, DatetimeIndex)
            assert taken.freq is None
            assert taken.tz == expected.tz
            assert taken.name == expected.name 
Example 19
Project: recruit   Author: Frank-qlu   File: test_construction.py    License: Apache License 2.0 6 votes vote down vote up
def test_categorical_preserves_tz(self):
        # GH#18664 retain tz when going DTI-->Categorical-->DTI
        # TODO: parametrize over DatetimeIndex/DatetimeArray
        #  once CategoricalIndex(DTA) works

        dti = pd.DatetimeIndex(
            [pd.NaT, '2015-01-01', '1999-04-06 15:14:13', '2015-01-01'],
            tz='US/Eastern')

        ci = pd.CategoricalIndex(dti)
        carr = pd.Categorical(dti)
        cser = pd.Series(ci)

        for obj in [ci, carr, cser]:
            result = pd.DatetimeIndex(obj)
            tm.assert_index_equal(result, dti) 
Example 20
Project: recruit   Author: Frank-qlu   File: test_construction.py    License: Apache License 2.0 6 votes vote down vote up
def test_construction_with_alt_tz_localize(self, kwargs, tz_aware_fixture):
        tz = tz_aware_fixture
        i = pd.date_range('20130101', periods=5, freq='H', tz=tz)
        kwargs = {key: attrgetter(val)(i) for key, val in kwargs.items()}

        if str(tz) in ('UTC', 'tzutc()'):
            warn = None
        else:
            warn = FutureWarning

        with tm.assert_produces_warning(warn, check_stacklevel=False):
            result = DatetimeIndex(i.tz_localize(None).asi8, **kwargs)
        expected = DatetimeIndex(i, **kwargs)
        tm.assert_index_equal(result, expected)

        # localize into the provided tz
        i2 = DatetimeIndex(i.tz_localize(None).asi8, tz='UTC')
        expected = i.tz_localize(None).tz_localize('UTC')
        tm.assert_index_equal(i2, expected)

        # incompat tz/dtype
        pytest.raises(ValueError, lambda: DatetimeIndex(
            i.tz_localize(None).asi8, dtype=i.dtype, tz='US/Pacific')) 
Example 21
Project: aospy   Author: spencerahill   File: times.py    License: Apache License 2.0 5 votes vote down vote up
def apply_time_offset(time, years=0, months=0, days=0, hours=0):
    """Apply a specified offset to the given time array.

    This is useful for GFDL model output of instantaneous values.  For example,
    3 hourly data postprocessed to netCDF files spanning 1 year each will
    actually have time values that are offset by 3 hours, such that the first
    value is for 1 Jan 03:00 and the last value is 1 Jan 00:00 of the
    subsequent year.  This causes problems in xarray, e.g. when trying to group
    by month.  It is resolved by manually subtracting off those three hours,
    such that the dates span from 1 Jan 00:00 to 31 Dec 21:00 as desired.

    Parameters
    ----------
    time : xarray.DataArray representing a timeseries
    years, months, days, hours : int, optional
        The number of years, months, days, and hours, respectively, to offset
        the time array by.  Positive values move the times later.

    Returns
    -------
    pandas.DatetimeIndex

    Examples
    --------
    Case of a length-1 input time array:

    >>> times = xr.DataArray(datetime.datetime(1899, 12, 31, 21))
    >>> apply_time_offset(times)
    Timestamp('1900-01-01 00:00:00')

    Case of input time array with length greater than one:

    >>> times = xr.DataArray([datetime.datetime(1899, 12, 31, 21),
    ...                       datetime.datetime(1899, 1, 31, 21)])
    >>> apply_time_offset(times) # doctest: +NORMALIZE_WHITESPACE
    DatetimeIndex(['1900-01-01', '1899-02-01'], dtype='datetime64[ns]',
                  freq=None)
    """
    return (pd.to_datetime(time.values) +
            pd.DateOffset(years=years, months=months, days=days, hours=hours)) 
Example 22
Project: pywr   Author: pywr   File: timestepper.py    License: GNU General Public License v3.0 5 votes vote down vote up
def datetime_index(self):
        """ Return a `pandas.DatetimeIndex` using the start, end and delta of this object

        This is useful for creating `pandas.DataFrame` objects from Model results
        """
        return pandas.period_range(self.start, self.end, freq=self.freq) 
Example 23
Project: performance_tracker   Author: metro-ontime   File: estimate_arrivals.py    License: GNU General Public License v3.0 5 votes vote down vote up
def estimate_arrivals(trip_id, trip, stations, direction):
    trip.loc[:, "estimate"] = False
    stations.loc[:, "estimate"] = True
    trip_est = stations
    trip_est.loc[:, "trip_id"] = trip_id
    trip_est.loc[:, "direction_id"] = direction
    combined = trip.append(trip_est)
    combined = combined.sort_values("relative_position")
    combined = combined.reset_index(drop=True)
    # shift vals to move adjacent position and date data into each row
    combined.loc[:, "previous_pos"] = combined.relative_position.shift()
    combined.loc[:, "next_pos"] = combined.relative_position.shift(-1)
    combined.loc[:, "previous_dt"] = combined.datetime.shift()
    combined.loc[:, "next_dt"] = combined.datetime.shift(-1)
    select = combined[combined["estimate"] == True]
    select.loc[:, "weight"] = (select.relative_position - select.previous_pos) / (
        select.next_pos - select.previous_pos
    )
    select.loc[:, "time_interpolation"] = (
        select.next_dt - select.previous_dt
    ) * select.weight
    select.loc[:, "datetime"] = select.previous_dt + select.time_interpolation
    select.loc[:, "datetime"] = pd.DatetimeIndex(select.datetime).round("S")
    select.loc[:, "stop_id"] = pd.to_numeric(select.stop_id, downcast="integer")
    # Some station estimates cannot be reliably estimated using this
    # technique and will have datetime = NaT, so we remove them.
    select = select.dropna(subset=["datetime"])
    return select 
Example 24
Project: performance_tracker   Author: metro-ontime   File: analyze_estimates.py    License: GNU General Public License v3.0 5 votes vote down vote up
def get_previous_stop_times(stop_id, stop_estimates):
    stop_estimates = stop_estimates.set_index(
        pd.DatetimeIndex(stop_estimates["datetime"])
    ).sort_index()
    stop_estimates.loc[:, "prev_stop_time"] = stop_estimates["datetime"].shift()
    return stop_estimates 
Example 25
Project: pyTD   Author: addisonlynch   File: price_history.py    License: MIT License 5 votes vote down vote up
def _convert_output_one(self, out):
        df = pd.DataFrame(out)
        df = df.set_index(pd.DatetimeIndex(df["datetime"]/1000*10**9))
        df = df.drop("datetime", axis=1)
        return df 
Example 26
Project: arctic   Author: man-group   File: test_ts_read.py    License: GNU Lesser General Public License v2.1 5 votes vote down vote up
def test_read_all_cols_all_dtypes(tickstore_lib, chunk_size):
    data = [{'f': 0.1,
            'of': 0.2,
            's': 's',
            'os': 'os',
            'l': 1,
            'ol': 2,
            'index': dt(1970, 1, 1, tzinfo=mktz('UTC')),
            },
            {'f': 0.3,
            'nf': 0.4,
            's': 't',
            'ns': 'ns',
            'l': 3,
            'nl': 4,
            'index': dt(1970, 1, 1, 0, 0, 1, tzinfo=mktz('UTC')),
            },
            ]
    tickstore_lib._chunk_size = chunk_size
    tickstore_lib.write('sym', data)
    df = tickstore_lib.read('sym', columns=None)

    assert df.index.tzinfo == mktz()

    # The below is probably more trouble than it's worth, but we *should*
    # be able to roundtrip data and get the same answer...

    # Ints become floats
    data[0]['l'] = float(data[0]['l'])
    # Treat missing strings as None
    data[0]['ns'] = None
    data[1]['os'] = None
    index = DatetimeIndex([dt(1970, 1, 1, tzinfo=mktz('UTC')),
                         dt(1970, 1, 1, 0, 0, 1, tzinfo=mktz('UTC'))],
                        )
    df.index = df.index.tz_convert(mktz('UTC'))
    expected = pd.DataFrame(data, index=index)
    expected = expected[df.columns]
    assert_frame_equal(expected, df, check_names=False) 
Example 27
Project: arctic   Author: man-group   File: test_fixes.py    License: GNU Lesser General Public License v2.1 5 votes vote down vote up
def test_write_dataframe(chunkstore_lib):
    # Create dataframe of time measurements taken every 6 hours
    date_range = pd.date_range(start=dt(2017, 5, 1, 1), periods=8, freq='6H')

    df = DataFrame(data={'something': [100, 200, 300, 400, 500, 600, 700, 800]},
                   index=DatetimeIndex(date_range, name='date'))

    chunkstore_lib.write('test', df, chunk_size='D')

    # Iterate
    for chunk in chunkstore_lib.iterator('test'):
        assert(len(chunk) > 0) 
Example 28
Project: arctic   Author: man-group   File: test_fixes.py    License: GNU Lesser General Public License v2.1 5 votes vote down vote up
def test_date_interval(chunkstore_lib):
    date_range = pd.date_range(start=dt(2017, 5, 1), periods=8, freq='D')

    df = DataFrame(data={'data': range(8)},
                   index=DatetimeIndex(date_range, name='date'))

    # test with index
    chunkstore_lib.write('test', df, chunk_size='D')

    ret = chunkstore_lib.read('test', chunk_range=DateRange(dt(2017, 5, 2), dt(2017, 5, 5), CLOSED_OPEN))
    assert_frame_equal(ret, df[1:4])
    ret = chunkstore_lib.read('test', chunk_range=DateRange(dt(2017, 5, 2), dt(2017, 5, 5), OPEN_OPEN))
    assert_frame_equal(ret, df[2:4])
    ret = chunkstore_lib.read('test', chunk_range=DateRange(dt(2017, 5, 2), dt(2017, 5, 5), OPEN_CLOSED))
    assert_frame_equal(ret, df[2:5])
    ret = chunkstore_lib.read('test', chunk_range=DateRange(dt(2017, 5, 2), dt(2017, 5, 5), CLOSED_CLOSED))
    assert_frame_equal(ret, df[1:5])
    ret = chunkstore_lib.read('test', chunk_range=DateRange(dt(2017, 5, 2), None, CLOSED_OPEN))
    assert_frame_equal(ret, df[1:8])

    # test without index
    df = DataFrame(data={'data': range(8),
                         'date': date_range})

    chunkstore_lib.write('test2', df, chunk_size='D')

    ret = chunkstore_lib.read('test2', chunk_range=DateRange(dt(2017, 5, 2), dt(2017, 5, 5), CLOSED_OPEN))
    assert(len(ret) == 3)
    ret = chunkstore_lib.read('test2', chunk_range=DateRange(dt(2017, 5, 2), dt(2017, 5, 5), OPEN_OPEN))
    assert(len(ret) == 2)
    ret = chunkstore_lib.read('test2', chunk_range=DateRange(dt(2017, 5, 2), dt(2017, 5, 5), OPEN_CLOSED))
    assert(len(ret) == 3)
    ret = chunkstore_lib.read('test2', chunk_range=DateRange(dt(2017, 5, 2), dt(2017, 5, 5), CLOSED_CLOSED))
    assert(len(ret) == 4)
    ret = chunkstore_lib.read('test2', chunk_range=DateRange(dt(2017, 5, 2), None, CLOSED_OPEN))
    assert(len(ret) == 7) 
Example 29
Project: arctic   Author: man-group   File: test_fixes.py    License: GNU Lesser General Public License v2.1 5 votes vote down vote up
def test_column_copy(chunkstore_lib):
    index = DatetimeIndex(pd.date_range('2019-01-01', periods=3, freq='D'), name='date')

    df = pd.DataFrame({'A': [1, 2, 3], 'B': [5,6,7]}, index=index)
    cols = ['A']
    chunkstore_lib.write('test', df)
    chunkstore_lib.read('test', columns=cols)
    assert cols == ['A'] 
Example 30
Project: arctic   Author: man-group   File: test_pandas_store.py    License: GNU Lesser General Public License v2.1 5 votes vote down vote up
def test_save_read_pandas_series_with_datetimeindex_with_timezone(library):
    df = Series(data=['A', 'BC', 'DEF'], index=DatetimeIndex(np.array([dt(2013, 1, 1),
                                                                       dt(2013, 1, 2),
                                                                       dt(2013, 1, 3)]).astype('datetime64[ns]'),
                                                                tz="America/Chicago"))
    library.write('pandas', df)
    saved_df = library.read('pandas').data
    assert df.index.tz == saved_df.index.tz
    assert all(df.index == saved_df.index)