Python pandas.date_range() Examples

The following are code examples for showing how to use pandas.date_range(). They are extracted from open source Python projects. You can vote up the examples you like or vote down the ones you don't like. You can also save this page to your account.

Example 1
Project: pyrsss   Author: butala   File: process_hdf.py    (MIT License) View Source Project 7 votes vote down vote up
def fill_nans(df, delta=None):
    """
    """
    if not delta:
        dt_diff = NP.diff(df.index.values)
        delta_timedelta64 = min(dt_diff)
        delta_seconds = delta_timedelta64 / NP.timedelta64(1, 's')
        delta = timedelta(seconds=delta_seconds)
    logger.info('Using delta = {} (s)'.format(delta.total_seconds()))
    index_new = PD.date_range(start=df.index[0],
                              end=df.index[-1],
                              freq=delta)
    missing = sorted(set(index_new) - set(df.index))
    if missing:
        logger.warning('Missing time indices (filled by NaNs):')
        for x in missing:
            logger.warning(x)
    return df.reindex(index_new, copy=False), delta 
Example 2
Project: zipline-chinese   Author: zhanghan1990   File: core.py    (Apache License 2.0) View Source Project 6 votes vote down vote up
def gen_calendars(start, stop, critical_dates):
    """
    Generate calendars to use as inputs.
    """
    all_dates = pd.date_range(start, stop, tz='utc')
    for to_drop in map(list, powerset(critical_dates)):
        # Have to yield tuples.
        yield (all_dates.drop(to_drop),)

    # Also test with the trading calendar.
    yield (trading_days[trading_days.slice_indexer(start, stop)],) 
Example 3
Project: zipline-chinese   Author: zhanghan1990   File: test_sources.py    (Apache License 2.0) View Source Project 6 votes vote down vote up
def test_nan_filter_dataframe(self):
        dates = pd.date_range('1/1/2000', periods=2, freq='B', tz='UTC')
        df = pd.DataFrame(np.random.randn(2, 2),
                          index=dates,
                          columns=[4, 5])
        # should be filtered
        df.loc[dates[0], 4] = np.nan
        # should not be filtered, should have been ffilled
        df.loc[dates[1], 5] = np.nan
        source = DataFrameSource(df)
        event = next(source)
        self.assertEqual(5, event.sid)
        event = next(source)
        self.assertEqual(4, event.sid)
        event = next(source)
        self.assertEqual(5, event.sid)
        self.assertFalse(np.isnan(event.price)) 
Example 4
Project: zipline-chinese   Author: zhanghan1990   File: test_sources.py    (Apache License 2.0) View Source Project 6 votes vote down vote up
def test_nan_filter_panel(self):
        dates = pd.date_range('1/1/2000', periods=2, freq='B', tz='UTC')
        df = pd.Panel(np.random.randn(2, 2, 2),
                      major_axis=dates,
                      items=[4, 5],
                      minor_axis=['price', 'volume'])
        # should be filtered
        df.loc[4, dates[0], 'price'] = np.nan
        # should not be filtered, should have been ffilled
        df.loc[5, dates[1], 'price'] = np.nan
        source = DataPanelSource(df)
        event = next(source)
        self.assertEqual(5, event.sid)
        event = next(source)
        self.assertEqual(4, event.sid)
        self.assertRaises(StopIteration, next, source) 
Example 5
Project: slaveo   Author: lamter   File: future.py    (license) View Source Project 6 votes vote down vote up
def getCalendar(self):
        """
        ?????
        :return:
        """
        # ???????
        tradecalendar = pd.DataFrame(data=pd.date_range(self.begin, self.end), columns=['date'])

        # ??????????
        types, weekdays = self._weekend_trade_day_type(tradecalendar["date"])
        tradecalendar["type"] = types
        tradecalendar["weekday"] = weekdays
        tradecalendar["weekday"] += 1
        tradecalendar = tradecalendar.set_index("date", drop=False)

        # ?????????
        tradecalendar = self._holiday_trade_day_type(tradecalendar)

        # ??????
        tradecalendar = self._tradestatus(tradecalendar)

        return tradecalendar 
Example 6
Project: tianchi_power   Author: lvniqi   File: make_features.py    (license) View Source Project 6 votes vote down vote up
def make_features(user_id,user_df):
    """
    ??????
    """
    print 'user_id:', user_id
    power = user_df.power_consumption
    assert power.index[0] == user_df.index[0]
    assert len(user_df.index) == 639
    new_df = pd.DataFrame(index=user_df.index.union(pd.date_range('2016-9-1','2016-9-30')))
    pw_new = power.copy()
    #predict 30 days and 30days for features
    for d in range(60):
        pw_new.index += pd.Timedelta('1D')
        new_df['power#-%d'%(d+1)] = pw_new
    #create 30 models
    for d in range(30):
        #30 days features
        x_ = new_df[new_df.columns[d:30+d]]
        x_['y'] = power
        x_.to_csv('./features/day_model/%d/%d.csv'%(d+1,user_id))
        
    #return x_ 
Example 7
Project: tianchi_power   Author: lvniqi   File: make_features.py    (license) View Source Project 6 votes vote down vote up
def make_month_features(user_id,user_df):
    """
    ??????
    """
    print 'user_id:', user_id
    power = user_df.power_consumption.copy()
    assert power.index[0] == user_df.index[0]
    new_df = pd.DataFrame(index=user_df.index.union(pd.date_range('2016-10-1','2016-10-31')))
    pw_new = power.copy()
    #predict 30 days and 30days for features
    for d in range(30):
        pw_new.index += pd.Timedelta('1D')
        new_df['power#-%d'%(d+1)] = pw_new
    #create 30 models
    for d in range(31):
        #30 days features
        new_df['y#%d'%d] = power
        power.index -= pd.Timedelta('1D')
    save_month_df(new_df,user_id)
    return new_df 
Example 8
Project: renpass_gis   Author: znes   File: renpass_gis_main.py    (license) View Source Project 6 votes vote down vote up
def create_energysystem(nodes, **arguments):
    """Creates the energysystem.

    Parameters
    ----------
    nodes:
        A list of entities that comprise the energy system
    **arguments : key word arguments
        Arguments passed from command line
    """

    datetime_index = pd.date_range(arguments['--date-from'],
                                   arguments['--date-to'],
                                   freq='60min')

    es = EnergySystem(entities=nodes,
                      groupings=GROUPINGS,
                      timeindex=datetime_index)

    return es 
Example 9
Project: psyplot   Author: Chilipp   File: test_data.py    (license) View Source Project 6 votes vote down vote up
def _from_dataset_test_variables(self):
        """The variables and coords needed for the from_dataset tests"""
        variables = {
             # 3d-variable
             'v0': xr.Variable(('time', 'ydim', 'xdim'), np.zeros((4, 4, 4))),
             # 2d-variable with time and x
             'v1': xr.Variable(('time', 'xdim', ), np.zeros((4, 4))),
             # 2d-variable with y and x
             'v2': xr.Variable(('ydim', 'xdim', ), np.zeros((4, 4))),
             # 1d-variable
             'v3': xr.Variable(('xdim', ), np.zeros(4))}
        coords = {
            'ydim': xr.Variable(('ydim', ), np.arange(1, 5)),
            'xdim': xr.Variable(('xdim', ), np.arange(4)),
            'time': xr.Variable(
                ('time', ),
                pd.date_range('1999-01-01', '1999-05-01', freq='M').values)}
        return variables, coords 
Example 10
Project: Visualflee   Author: cspgdds   File: map_camps_timehistory.py    (license) View Source Project 6 votes vote down vote up
def make_features(locations_file='blocations.csv',
                  timeseries_file='burundioutput.csv',
                  startdate='2015-05-01'):
    locations = pd.read_csv(locations_file)
    timeseries = pd.read_csv(timeseries_file)
    n_days = timeseries.shape[0]
    # Construct an index with real dates rather than day numbers
    timeseries.index = pd.date_range(startdate, periods=n_days)

    features = []
    for location in locations.itertuples(name='Location'):
        latlon = (location.latitude, location.longitude)
        loctype_by_day = get_loctype(location, timeseries.index)
        population_by_day = get_population(timeseries, location.name)
        
        data_for_location = pd.DataFrame({'loctype': loctype_by_day,
                                          'population': population_by_day})
        feature = mgj.make_gj_points(latlon, location.name, data_for_location)
        features.extend(feature)
    return features 
Example 11
Project: Visualflee   Author: cspgdds   File: test_make_geojson.py    (license) View Source Project 6 votes vote down vote up
def test_make_gj_points():
    index = pandas.date_range('2015-3-1', periods=100)
    popn = pandas.Series([n * 500 for n in range(100)], index=index)
    loctype = pandas.Series((['city'] * 50) + (['conflict'] * 50), index=index)
    timeseries = pandas.DataFrame({'loctype': loctype, 'population': popn})
    
    res = make_geojson.make_gj_points((52.0, 0.0), 'Examplecamp', timeseries)
    
    assert len(res) == 100
    assert res[0]['type'] == 'Feature'
    assert res[0]['properties']['start'] == '2015-03-01'
    assert res[0]['properties']['end'] == '2015-03-02'
    assert res[0]['properties']['loctype'] == 'city'
    assert res[0]['geometry']['coordinates'] == (0.0, 52.0)

    assert res[50]['properties']['loctype'] == 'conflict'
    assert res[50]['properties']['start'] == '2015-04-20' 
Example 12
Project: astk   Author: openalea-incubator   File: Weather.py    (license) View Source Project 6 votes vote down vote up
def date_range_index(self, start, end=None, by=24):
        """ return a (list of) time sequence that allow indexing one or several time intervals between start and end every 'by' hours
        if end is None, only one time interval of 'by' hours is returned
        
        start and end are expected in local time
        """
        if end is None:
            seq = pandas.date_range(start=start, periods=by, freq='H',
                                    tz=self.timezone.zone)
            return seq.tz_convert('UTC')
        else:
            seq = pandas.date_range(start=start, end=end, freq='H',
                                    tz=self.timezone.zone)
            seq = seq.tz_convert('UTC')
            bins = pandas.date_range(start=start, end=end, freq=str(by) + 'H',
                                     tz=self.timezone.zone)
            bins = bins.tz_convert('UTC')
            return [seq[(seq >= bins[i]) & (seq < bins[i + 1])] for i in
                    range(len(bins) - 1)] 
Example 13
Project: demandlib   Author: oemof   File: bdew.py    (license) View Source Project 6 votes vote down vote up
def __init__(self, year, seasons=None, holidays=None):
        if calendar.isleap(year):
            hoy = 8784
        else:
            hoy = 8760
        self.datapath = os.path.join(os.path.dirname(__file__), 'bdew_data')
        self.date_time_index = pd.date_range(
            pd.datetime(year, 1, 1, 0), periods=hoy * 4, freq='15Min')
        if seasons is None:
            self.seasons = {
                'summer1': [5, 15, 9, 14],  # summer: 15.05. to 14.09
                'transition1': [3, 21, 5, 14],  # transition1 :21.03. to 14.05
                'transition2': [9, 15, 10, 31],  # transition2 :15.09. to 31.10
                'winter1': [1, 1, 3, 20],  # winter1:  01.01. to 20.03
                'winter2': [11, 1, 12, 31],  # winter2: 01.11. to 31.12
            }
        else:
            self.seasons = seasons
        self.year = year
        self.slp_frame = self.all_load_profiles(self.date_time_index,
                                                holidays=holidays) 
Example 14
Project: base_function   Author: Rockyzsu   File: pandas_book.py    (license) View Source Project 6 votes vote down vote up
def date_op():
    start = pd.date_range('2015-01-01', periods=50)
    #print start
    print type(start)

    date_list = [datetime.datetime(2017, 1, 1), datetime.datetime(2017, 1, 2), datetime.datetime(2017, 1, 3),
                 datetime.datetime(2017, 1, 4)]
    df = pd.DataFrame(np.random.randn(4), index=date_list)
    print df
    print df.index[2]
    format_line()

    s_x = pd.date_range('2000-1-1', periods=1000)
    df_x = pd.DataFrame(np.arange(2000).reshape(1000, 2), index=s_x)
    print df_x
    print df_x.ix['2002/09/24']
    print df_x[1]
    #?????????
    #?????ix
    print df_x.ix['2001-09'] 
Example 15
Project: Python-Machine-Learning-Cookbook   Author: PacktPublishing   File: convert_to_timeseries.py    (license) View Source Project 6 votes vote down vote up
def convert_data_to_timeseries(input_file, column, verbose=False):
    # Load the input file
    data = np.loadtxt(input_file, delimiter=',')

    # Extract the start and end dates
    start_date = str(int(data[0,0])) + '-' + str(int(data[0,1]))
    end_date = str(int(data[-1,0] + 1)) + '-' + str(int(data[-1,1] % 12 + 1))

    if verbose:
        print "\nStart date =", start_date
        print "End date =", end_date

    # Create a date sequence with monthly intervals
    dates = pd.date_range(start_date, end_date, freq='M')

    # Convert the data into time series data
    data_timeseries = pd.Series(data[:,column], index=dates)

    if verbose:
        print "\nTime series data:\n", data_timeseries[:10]

    return data_timeseries 
Example 16
Project: eemeter   Author: openeemeter   File: clients.py    (license) View Source Project 6 votes vote down vote up
def get_gsod_data(self, station, year):

        filename_format = '/pub/data/gsod/{year}/{station}-{year}.op.gz'
        lines = self._retreive_file_lines(filename_format, station, year)

        dates = pd.date_range("{}-01-01 00:00".format(year),
                              "{}-12-31 00:00".format(year),
                              freq='D', tz=pytz.UTC)
        series = pd.Series(None, index=dates, dtype=float)

        for line in lines[1:]:
            columns = line.split()
            date_str = columns[2].decode('utf-8')
            temp_F = float(columns[3])
            temp_C = (5. / 9.) * (temp_F - 32.)
            dt = pytz.UTC.localize(datetime.strptime(date_str, "%Y%m%d"))
            series[dt] = temp_C

        return series 
Example 17
Project: eemeter   Author: openeemeter   File: clients.py    (license) View Source Project 6 votes vote down vote up
def get_isd_data(self, station, year):

        filename_format = '/pub/data/noaa/{year}/{station}-{year}.gz'
        lines = self._retreive_file_lines(filename_format, station, year)

        dates = pd.date_range("{}-01-01 00:00".format(year),
                              "{}-12-31 23:00".format(int(year) + 1),
                              freq='H', tz=pytz.UTC)
        series = pd.Series(None, index=dates, dtype=float)

        for line in lines:
            if line[87:92].decode('utf-8') == "+9999":
                temp_C = float("nan")
            else:
                temp_C = float(line[87:92]) / 10.
            date_str = line[15:27].decode('utf-8')

            # there can be multiple readings per hour, so set all to minute 0
            dt = pytz.UTC.localize(datetime.strptime(date_str, "%Y%m%d%H%M")).replace(minute=0)

            # only set the temp if it's the first encountered in the hour.
            if pd.isnull(series.ix[dt]):
                series[dt] = temp_C

        return series 
Example 18
Project: eemeter   Author: openeemeter   File: test_arbitrary_start_serializer.py    (license) View Source Project 6 votes vote down vote up
def test_to_records(serializer):

    data = {"value": [1, np.nan], "estimated": [True, False]}
    columns = ["value", "estimated"]
    index = pd.date_range('2000-01-01', periods=2, freq='D')
    df = pd.DataFrame(data, index=index, columns=columns)

    records = serializer.to_records(df)
    assert len(records) == 2
    assert records[0]["start"] == datetime(2000, 1, 1, tzinfo=pytz.UTC)
    assert records[0]["value"] == 1
    assert records[0]["estimated"]

    assert records[1]["start"] == datetime(2000, 1, 2, tzinfo=pytz.UTC)
    assert pd.isnull(records[1]["value"])
    assert not records[1]["estimated"] 
Example 19
Project: eemeter   Author: openeemeter   File: test_arbitrary_end_serializer.py    (license) View Source Project 6 votes vote down vote up
def test_to_records(serializer):

    data = {"value": [1, np.nan], "estimated": [True, False]}
    columns = ["value", "estimated"]
    index = pd.date_range('2000-01-01', periods=2, freq='D')
    df = pd.DataFrame(data, index=index, columns=columns)

    records = serializer.to_records(df)
    assert len(records) == 2
    assert records[0]["end"] == datetime(2000, 1, 1, tzinfo=pytz.UTC)
    assert pd.isnull(records[0]["value"])
    assert not records[0]["estimated"]

    assert records[1]["end"] == datetime(2000, 1, 2, tzinfo=pytz.UTC)
    assert records[1]["value"] == 1
    assert records[1]["estimated"] 
Example 20
Project: eemeter   Author: openeemeter   File: test_energy_efficiency_meter.py    (license) View Source Project 6 votes vote down vote up
def meter_input_daily(project_meter_input):

    record_starts = pd.date_range(
        '2012-01-01', periods=365 * 4, freq='D', tz=pytz.UTC)

    records = [
        {
            "start": dt.isoformat(),
            "value": 1.0,
            "estimated": False
        } for dt in record_starts
    ]

    trace = _natural_gas_input(records)
    trace.update({'interval': 'daily'})

    meter_input = {
        "type": "SINGLE_TRACE_SIMPLE_PROJECT",
        "trace": trace,
        "project": project_meter_input,
    }
    return meter_input 
Example 21
Project: eemeter   Author: openeemeter   File: test_energy_efficiency_meter.py    (license) View Source Project 6 votes vote down vote up
def meter_input_hourly(project_meter_input):

    record_starts = pd.date_range(
        '2012-01-01', periods=365 * 4 * 24, freq='H', tz=pytz.UTC)

    records = [
        {
            "start": dt.isoformat(),
            "value": 1.0 + dt.hour,
            "estimated": False
        } for dt in record_starts
    ]

    trace = _natural_gas_input(records)
    trace.update({'interval': 'hourly'})

    meter_input = {
        "type": "SINGLE_TRACE_SIMPLE_PROJECT",
        "trace": trace,
        "project": project_meter_input,
    }
    return meter_input 
Example 22
Project: eemeter   Author: openeemeter   File: test_energy_efficiency_meter.py    (license) View Source Project 6 votes vote down vote up
def meter_input_daily_baseline_only(project_meter_input):

    record_starts = pd.date_range(
        '2012-01-01', periods=365 * 1, freq='D', tz=pytz.UTC)

    records = [
        {
            "start": dt.isoformat(),
            "value": 1.0,
            "estimated": False
        } for dt in record_starts
    ]

    meter_input = {
        "type": "SINGLE_TRACE_SIMPLE_PROJECT",
        "trace": _natural_gas_input(records),
        "project": project_meter_input,
    }
    return meter_input 
Example 23
Project: eemeter   Author: openeemeter   File: test_energy_efficiency_meter.py    (license) View Source Project 6 votes vote down vote up
def meter_input_daily_reporting_only(project_meter_input):

    record_starts = pd.date_range(
        '2014-02-01', periods=365 * 1, freq='D', tz=pytz.UTC)

    records = [
        {
            "start": dt.isoformat(),
            "value": 1.0,
            "estimated": False
        } for dt in record_starts
    ]

    meter_input = {
        "type": "SINGLE_TRACE_SIMPLE_PROJECT",
        "trace": _natural_gas_input(records),
        "project": project_meter_input,
    }
    return meter_input 
Example 24
Project: eemeter   Author: openeemeter   File: test_energy_efficiency_meter.py    (license) View Source Project 6 votes vote down vote up
def meter_input_daily_with_period_start_end(
        project_meter_input_with_period_start_end):

    record_starts = pd.date_range(
        '2012-01-01', periods=365 * 4, freq='D', tz=pytz.UTC)

    records = [
        {
            "start": dt.isoformat(),
            "value": 1.0,
            "estimated": False
        } for dt in record_starts
    ]

    trace = _natural_gas_input(records)
    trace.update({'interval': 'daily'})

    meter_input = {
        "type": "SINGLE_TRACE_SIMPLE_PROJECT",
        "trace": trace,
        "project": project_meter_input_with_period_start_end,
    }
    return meter_input 
Example 25
Project: eemeter   Author: openeemeter   File: test_energy_efficiency_meter.py    (license) View Source Project 6 votes vote down vote up
def meter_input_strange_interpretation(project_meter_input):

    record_starts = pd.date_range(
        '2012-01-01', periods=365 * 4, freq='D', tz=pytz.UTC)

    records = [
        {
            "start": dt.isoformat(),
            "value": 1.0,
            "estimated": False
        } for dt in record_starts
    ]

    meter_input = {
        "type": "SINGLE_TRACE_SIMPLE_PROJECT",
        "trace": {
            "type": "ARBITRARY_START",
            "interpretation": "ELECTRICITY_CONSUMPTION_NET",
            "unit": "therm",
            "records": records
        },
        "project": project_meter_input
    }
    return meter_input 
Example 26
Project: eemeter   Author: openeemeter   File: test_model_data_billing_formatter.py    (license) View Source Project 6 votes vote down vote up
def trace4():

    trace_length = 100
    data = {
        "value": [1 for _ in range(trace_length)],
        "estimated": [False for _ in range(trace_length)]
    }
    columns = ["value", "estimated"]
    index = pd.date_range(
        start=datetime(2011, 1, 1, tzinfo=pytz.UTC),
        periods=trace_length,
        freq='D',
        tz=pytz.UTC
    )
    df = pd.DataFrame(data, index=index, columns=columns)
    return EnergyTrace("ELECTRICITY_CONSUMPTION_SUPPLIED", df, unit="KWH") 
Example 27
Project: ModelFlow   Author: yuezPrincetechs   File: monitor.py    (license) View Source Project 6 votes vote down vote up
def parse_raw(filepath,seconds=1):
    '''
    ??filepath?????????????
    :param filepath: ???????????????
    :param seconds: int??????????????
    :return: dataframe??index??????columns??????
    '''
    data_head=pd.read_csv(filepath,delim_whitespace=True,header=None,nrows=1)
    data=pd.read_csv(filepath,delim_whitespace=True,header=None,skiprows=2)
    date_start=data_head.iloc[0,3]
    time_start=data.iloc[1,0]+' '+data.iloc[1,1]
    datetime_start=pd.to_datetime(date_start+' '+time_start)
    columns=list(data.iloc[0,2:])
    newdata=data.iloc[1:,2:].applymap(convert2float)
    newdata=newdata.dropna(axis=0,how='any')
    newdata=newdata.loc[(newdata.applymap(type)==type('')).sum(axis=1)<newdata.shape[1]]
    newdata=newdata.applymap(convert2float)
    newdata.columns=columns
    newdata.index=pd.date_range(start=datetime_start,periods=newdata.shape[0],freq='%dS'%seconds)
    newdata.index.name='datetime'
    return newdata 
Example 28
Project: pandas_market_calendars   Author: rsheftel   File: test_utils.py    (license) View Source Project 6 votes vote down vote up
def test_date_range_lower_freq():
    cal = mcal.get_calendar("NYSE")
    schedule = cal.schedule(pd.Timestamp('2017-09-05 20:00', tz='UTC'), pd.Timestamp('2017-10-23 20:00', tz='UTC'))

    # cannot get date range of frequency lower than 1D
    with pytest.raises(ValueError):
        mcal.date_range(schedule, frequency='3D')

    # instead get for 1D and convert to lower frequency
    short = mcal.date_range(schedule, frequency='1D')
    actual = mcal.convert_freq(short, '3D')
    expected = pd.date_range('2017-09-05 20:00', '2017-10-23 20:00', freq='3D', tz='UTC')
    assert_index_equal(actual, expected)

    actual = mcal.convert_freq(short, '1W')
    expected = pd.date_range('2017-09-05 20:00', '2017-10-23 20:00', freq='1W', tz='UTC')
    assert_index_equal(actual, expected) 
Example 29
Project: catalyst   Author: enigmampc   File: bundle_utils.py    (license) View Source Project 6 votes vote down vote up
def get_periods_range(start_dt, end_dt, freq):
    """
    Get a date range for the specified parameters.

    Parameters
    ----------
    start_dt: datetime
    end_dt: datetime
    freq: str

    Returns
    -------
    DateTimeIndex

    """
    if freq == 'minute':
        freq = 'T'

    elif freq == 'daily':
        freq = 'D'

    return pd.date_range(start_dt, end_dt, freq=freq) 
Example 30
Project: catalyst   Author: enigmampc   File: test_continuous_futures.py    (license) View Source Project 6 votes vote down vote up
def test_contract_at_offset(self):
        contract_sids = array([1, 2, 3, 4], dtype=int64)
        start_dates = pd.date_range('2015-01-01', periods=4, tz="UTC")

        contracts = deque(self.asset_finder.retrieve_all(contract_sids))

        oc = OrderedContracts('FO', contracts)

        self.assertEquals(1,
                          oc.contract_at_offset(1, 0, start_dates[-1].value),
                          "Offset of 0 should return provided sid")

        self.assertEquals(2,
                          oc.contract_at_offset(1, 1, start_dates[-1].value),
                          "Offset of 1 should return next sid in chain.")

        self.assertEquals(None,
                          oc.contract_at_offset(4, 1, start_dates[-1].value),
                          "Offset at end of chain should not crash.") 
Example 31
Project: catalyst   Author: enigmampc   File: test_events.py    (license) View Source Project 6 votes vote down vote up
def test_next_event_indexer(self):
        events = self.events
        event_sids = events['sid'].values
        event_dates = events['event_date'].values
        event_timestamps = events['timestamp'].values

        all_dates = pd.date_range('2014', '2014-01-31')
        all_sids = np.unique(event_sids)

        indexer = next_event_indexer(
            all_dates,
            all_sids,
            event_dates,
            event_timestamps,
            event_sids,
        )

        # Compute expected results without knowledge of null events.
        for i, sid in enumerate(all_sids):
            self.check_next_event_indexer(
                events,
                all_dates,
                sid,
                indexer[:, i],
            ) 
Example 32
Project: betterself   Author: jeffshek   File: pandas_utils.py    (license) View Source Project 6 votes vote down vote up
def force_start_end_data_to_dataframe(user, dataframe, start_date, end_date):
    assert type(dataframe) == pd.DataFrame

    # if dataframe contains any dates outside of start and end date ... exclude
    dataframe = dataframe[start_date:end_date].asfreq('D')

    index = pd.date_range(start=start_date, end=end_date, tz=user.pytz_timezone)

    # blank dataframe that we know for certain holds all the right dates
    dataframe_container = pd.DataFrame(index=index)

    # join the dataframe with an empty one that has all the right indices ... to return a dataframe with all the right
    # start and end dates
    normalized_dataframe = pd.DataFrame.join(dataframe_container, dataframe)

    # Pandas is like a fine edged sword, sometimes it cuts everything perfectly, other times you don't know it's
    # power and it claws at you and takes back the bamboo. For the record, problem is not the panda, but the trainer.
    assert dataframe_container.index.size == normalized_dataframe.index.size

    return normalized_dataframe 
Example 33
Project: betterself   Author: jeffshek   File: test_serializers.py    (license) View Source Project 6 votes vote down vote up
def _get_serialized_dataframe(self, supplement_name, boolean_string_name, values_to_create):
        data_values = [boolean_string_name] * values_to_create
        today = datetime.date.today()
        periods_ago = today - datetime.timedelta(days=values_to_create - 1)
        date_range = pd.date_range(periods_ago, today)

        # this would be stupid if the count is off
        self.assertEqual(len(data_values), len(date_range))

        dataframe = pd.DataFrame(index=date_range)
        dataframe[supplement_name] = data_values

        # make sure there's no dynamic type conversion that can screw you
        series = dataframe[supplement_name]
        self.assertEqual(series[0], boolean_string_name)

        serialized_dataframe = ExcelSupplementFileSerializer._sanitize_dataframe_values(dataframe)
        return serialized_dataframe 
Example 34
Project: betterself   Author: jeffshek   File: historical_daily_importer.py    (license) View Source Project 6 votes vote down vote up
def import_history(self, start_date, end_date):
        dataframe_columns = RESCUETIME_EFFICIENCY_HEADERS + [PRODUCTIVITY_PULSE]
        historical_df = pd.DataFrame(columns=dataframe_columns)

        query_dates = pd.date_range(start=start_date, end=end_date).date

        for query_date in query_dates:
            response = self._get_rescuetime_efficiency_for_date(query_date)

            if response.status_code != 200:
                continue

            efficiency_timeseries = self.get_efficiency_timeseries_from_response(response)
            pulse = calculate_rescue_time_pulse_from_dataframe(efficiency_timeseries)
            efficiency_timeseries[PRODUCTIVITY_PULSE] = pulse

            # Update the dataframe with history
            historical_df.loc[query_date] = efficiency_timeseries

        # when done, update into the results
        self.results = historical_df 
Example 35
Project: betterself   Author: jeffshek   File: builders.py    (license) View Source Project 6 votes vote down vote up
def __init__(self, user, periods_back=30):
        self.user = user
        self.hour_series = range(0, 24)

        historical_data_points_quantity = periods_back

        end_date = timezone.now()

        # use pandas to generate a nifty index of timestamps, use timezone to remove warning signals
        self.date_series = pd.date_range(end=end_date, freq='D', periods=historical_data_points_quantity)

        # build a series that shows the impact of what supplements/events have on sleep
        self.sleep_impact_series = pd.Series(0, index=self.date_series)
        self.productivity_impact_series = pd.Series(0, index=self.date_series)

        self.sleep_series = self._get_random_sleep_series(self.date_series)

        # Create a cache here because creating many events is very slow on Production ...
        # so create a cache of commonly used Django objects and then create a bunch of events that
        # need this foreign key, so we can use bulk_create
        self.user_activities = {}
        self.supplements = {} 
Example 36
Project: waffle-reviewer   Author: gabraganca   File: plot_activity.py    (license) View Source Project 6 votes vote down vote up
def create_timeseries(starting_date, ending_date, value=0):
    """Create a Pandas Time Series with constant values.

    Attributes
    ----------
    starting_date: str, pandas.tslib.Timestamp
        The first date of the Time Series.

    ending_date: str, pandas.tslib.Timestamp
        The last date of the Time Series.

    value: int,float
        Value to add to new entries. Default is zero.
    """
    timeseries_index = pd.date_range(starting_date, ending_date)
    timeseries = pd.Series(value, index=timeseries_index)

    return timeseries 
Example 37
Project: appBBB   Author: rl-institut   File: results_evaluation.py    (license) View Source Project 6 votes vote down vote up
def create_es(solver, timesteps, year):
    """ 
    Creates a default energy system to load results into.
    """
    simulation = es.Simulation(solver=solver, 
                               timesteps=timesteps,
                               debug=False, 
                               objective_options={"function": minimize_cost})

    # Adding a time index to the energy system
    time_index = pd.date_range('1/1/' + year,
                               periods=len(timesteps),
                               freq='H')
    energysystem = es.EnergySystem(time_idx=time_index,
                                   simulation=simulation)
    return energysystem 
Example 38
Project: fxcmminer   Author: JamesKBowler   File: tradinghours.py    (license) View Source Project 6 votes vote down vote up
def _hourly_range(self, init_date, time_frame):
        """
        Returns DatetimeIndex trading week/s in hours.
        """
        utcnow = datetime.utcnow()
        tr_wk_str, tr_wk_end = self.get_trading_week(init_date)
        if tr_wk_end > utcnow:
            tr_wk_end = utcnow.replace(
                minute=00,second=00, microsecond=00)
        freq, interval_type, delta = self._data_frequency(time_frame)
        dth = pd.date_range(str(tr_wk_str), str(tr_wk_end), freq=freq)
        while (len(dth) % (300*int(time_frame[1:])) == 0) == False:
            tr_wk_str = tr_wk_end + timedelta(**{interval_type: delta})
            if tr_wk_str < utcnow:
                tr_wk_str, tr_wk_end = self.get_trading_week(tr_wk_str)
                if tr_wk_end > utcnow:
                    tr_wk_end = utcnow.replace(
                        minute=00,second=00, microsecond=00)
                    tr_wk_end += timedelta(hours=1)
                dth = dth.append(
                    pd.date_range(str(tr_wk_str), str(tr_wk_end), freq=freq))
            else:
                break
        return dth 
Example 39
Project: fxcmminer   Author: JamesKBowler   File: tradinghours.py    (license) View Source Project 6 votes vote down vote up
def _daily_range(self, daily):
        """
        Returns DatetimeIndex for daily values.
        """
        max_bars = 299
        utcnow = datetime.utcnow()
        dtd = pd.DatetimeIndex([])
        while daily < utcnow:
            tr_wk_str, tr_wk_end = self.get_trading_week(daily)
            hour = int(str(tr_wk_str.time())[:2])
            daily += timedelta(days=1)
            daily = daily.replace(hour=hour)
            if daily >= tr_wk_end:
                daily, tr_wk_end = self.get_trading_week(daily)
            dtd = dtd.append(
                pd.date_range(str(daily), str(daily)))
        return dtd 
Example 40
Project: fxcmminer   Author: JamesKBowler   File: tradinghours.py    (license) View Source Project 6 votes vote down vote up
def _monthly_range(self, last_day_of_month):
        """
        Returns DatetimeIndex for monthly values.
        """
        ldom = last_day_of_month
        max_bars = 299
        utcnow = datetime.utcnow()
        dtm = pd.DatetimeIndex([])
        while ldom < utcnow:
            dtm = dtm.append(pd.date_range(
                str(ldom), str(ldom)))
            if ldom.month == 12:
                ldom = ldom.replace(year=ldom.year+1, month=2, day=1)
            elif ldom.month == 11:
                ldom = ldom.replace(year=ldom.year+1, month=1, day=1)
            else:
                ldom = ldom.replace(month=ldom.month+2, day=1)
            ldom -= timedelta(days=1)
            ldom = ldom.replace(hour=self.new_york_offset(ldom, 22))
        return dtm 
Example 41
Project: mlprojects-py   Author: srinathperera   File: __init__.py    (license) View Source Project 6 votes vote down vote up
def fill_in_missing_dates(df, date_col_name, other_col):
    startd = df[date_col_name].values[0]
    endd = df[date_col_name].values[-1]
    print startd, endd
    idx = pd.date_range(startd, endd)

    dict = {}
    for index, row in df.iterrows():
        dict[row[date_col_name]] = row[other_col]

    new_data = []
    for d in idx:
        pydate = d.to_pydatetime()
        daskey = pydate.strftime('%Y-%m-%d')
        new_data.append([daskey, dict[daskey] if dict.has_key(daskey) else None])

    return np.row_stack(new_data) 
Example 42
Project: mlprojects-py   Author: srinathperera   File: Tests.py    (license) View Source Project 6 votes vote down vote up
def fill_in_missing_dates(df, date_col_name, other_col):
    startd = df[date_col_name].values[0]
    endd = df[date_col_name].values[-1]
    print startd, endd
    idx = pd.date_range(startd, endd)

    dict = {}
    for index, row in df.iterrows():
        dict[row[date_col_name]] = row[other_col]

    new_data = []
    for d in idx:
        pydate = d.to_pydatetime()
        daskey = pydate.strftime('%Y-%m-%d')
        new_data.append([daskey, dict[daskey] if dict.has_key(daskey) else 0])

    return np.row_stack(new_data) 
Example 43
Project: PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda   Author: SignalMedia   File: test_util.py    (license) View Source Project 6 votes vote down vote up
def test_daily(self):
        rng = date_range('1/1/2000', '12/31/2004', freq='D')
        ts = Series(np.random.randn(len(rng)), index=rng)

        annual = pivot_annual(ts, 'D')

        doy = ts.index.dayofyear
        doy[(~isleapyear(ts.index.year)) & (doy >= 60)] += 1

        for i in range(1, 367):
            subset = ts[doy == i]
            subset.index = [x.year for x in subset.index]

            result = annual[i].dropna()
            tm.assert_series_equal(result, subset, check_names=False)
            self.assertEqual(result.name, i)

        # check leap days
        leaps = ts[(ts.index.month == 2) & (ts.index.day == 29)]
        day = leaps.index.dayofyear[0]
        leaps.index = leaps.index.year
        leaps.name = 60
        tm.assert_series_equal(annual[day].dropna(), leaps) 
Example 44
Project: zipline-chinese   Author: zhanghan1990   File: trading.py    (Apache License 2.0) View Source Project 5 votes vote down vote up
def market_minutes_for_day(self, stamp):
        market_open, market_close = self.get_open_and_close(stamp)
        return pd.date_range(market_open, market_close, freq='T') 
Example 45
Project: zipline-chinese   Author: zhanghan1990   File: tradingcalendar.py    (Apache License 2.0) View Source Project 5 votes vote down vote up
def get_trading_days(start, end, trading_day=trading_day):
    return pd.date_range(start=start.date(),
                         end=end.date(),
                         freq=trading_day).tz_localize('UTC') 
Example 46
Project: zipline-chinese   Author: zhanghan1990   File: tradingcalendar_tse.py    (Apache License 2.0) View Source Project 5 votes vote down vote up
def get_trading_days(start, end, trading_day=trading_day):
    return pd.date_range(start=start.date(),
                         end=end.date(),
                         freq=trading_day).tz_localize('UTC') 
Example 47
Project: zipline-chinese   Author: zhanghan1990   File: tradingcalendar_bmf.py    (Apache License 2.0) View Source Project 5 votes vote down vote up
def get_trading_days(start, end, trading_day=trading_day):
    return pd.date_range(start=start.date(),
                         end=end.date(),
                         freq=trading_day).tz_localize('UTC') 
Example 48
Project: zipline-chinese   Author: zhanghan1990   File: tradingcalendar_china.py    (Apache License 2.0) View Source Project 5 votes vote down vote up
def get_trading_days(start, end, trading_day=trading_day):
    return pd.date_range(start=start.date(),
                         end=end.date(),
                         freq=trading_day).tz_localize('UTC') 
Example 49
Project: zipline-chinese   Author: zhanghan1990   File: test_rolling_panel.py    (Apache License 2.0) View Source Project 5 votes vote down vote up
def test_basics(self, window=10):
        items = ['bar', 'baz', 'foo']
        minor = ['A', 'B', 'C', 'D']

        rp = MutableIndexRollingPanel(window, items, minor, cap_multiple=2)

        dates = pd.date_range('2000-01-01', periods=30, tz='utc')

        major_deque = deque(maxlen=window)

        frames = {}

        for i, date in enumerate(dates):
            frame = pd.DataFrame(np.random.randn(3, 4), index=items,
                                 columns=minor)

            rp.add_frame(date, frame)

            frames[date] = frame
            major_deque.append(date)

            result = rp.get_current()
            expected = pd.Panel(frames, items=list(major_deque),
                                major_axis=items, minor_axis=minor)

            tm.assert_panel_equal(result, expected.swapaxes(0, 1)) 
Example 50
Project: zipline-chinese   Author: zhanghan1990   File: test_blaze.py    (Apache License 2.0) View Source Project 5 votes vote down vote up
def setUpClass(cls):
        cls.dates = dates = pd.date_range('2014-01-01', '2014-01-03')
        dates = cls.dates.repeat(3)
        cls.sids = sids = ord('A'), ord('B'), ord('C')
        cls.df = df = pd.DataFrame({
            'sid': sids * 3,
            'value': (0., 1., 2., 1., 2., 3., 2., 3., 4.),
            'int_value': (0, 1, 2, 1, 2, 3, 2, 3, 4),
            'asof_date': dates,
            'timestamp': dates,
        })
        cls.dshape = dshape("""
        var * {
            sid: ?int64,
            value: ?float64,
            int_value: ?int64,
            asof_date: datetime,
            timestamp: datetime
        }
        """)
        cls.macro_df = df[df.sid == 65].drop('sid', axis=1)
        dshape_ = OrderedDict(cls.dshape.measure.fields)
        del dshape_['sid']
        cls.macro_dshape = var * Record(dshape_)

        cls.garbage_loader = BlazeLoader()
        cls.missing_values = {'int_value': 0}