Python pandas.to_datetime() Examples

The following are 30 code examples of pandas.to_datetime(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module pandas , or try the search function .
Example #1
Source File: analyze_estimates.py    From performance_tracker with GNU General Public License v3.0 6 votes vote down vote up
def match_arrivals_with_schedule(estimated_trips, schedule_direction):
    schedule_direction.loc[:,"datetime_utc"] = pd.to_datetime(schedule_direction["datetime"], utc=True)
    estimated_trips.loc[:,"datetime_utc"] = pd.to_datetime(estimated_trips["datetime"], utc=True)
    schedule_direction = schedule_direction.set_index(pd.DatetimeIndex(schedule_direction["datetime_utc"])).sort_index()
    matched_estimates = [
        match_times(
            stop_id,
            stop_estimates,
            schedule_direction[schedule_direction["stop_id"] == stop_id],
        )
        for stop_id, stop_estimates in estimated_trips.groupby(["stop_id"])
    ]
    matched_estimates = [x for x in matched_estimates if x is not None]
    matched_estimates = pd.concat(matched_estimates)
    matched_estimates["since_scheduled"] = (
        matched_estimates["datetime_utc"] - matched_estimates["closest_scheduled"]
    )
    return matched_estimates 
Example #2
Source File: test_utils_times.py    From aospy with Apache License 2.0 6 votes vote down vote up
def test_monthly_mean_at_each_ind():
    times_submonthly = pd.to_datetime(['2000-06-01', '2000-06-15',
                                       '2000-07-04', '2000-07-19'])
    times_means = pd.to_datetime(['2000-06-01', '2000-07-01'])
    len_other_dim = 2
    arr_submonthly = xr.DataArray(
        np.random.random((len(times_submonthly), len_other_dim)),
        dims=[TIME_STR, 'dim0'], coords={TIME_STR: times_submonthly}
    )
    arr_means = xr.DataArray(
        np.random.random((len(times_means), len_other_dim)),
        dims=arr_submonthly.dims, coords={TIME_STR: times_means}
    )
    actual = monthly_mean_at_each_ind(arr_means, arr_submonthly)
    desired_values = np.stack([arr_means.values[0]] * len_other_dim +
                              [arr_means.values[1]] * len_other_dim,
                              axis=0)
    desired = xr.DataArray(desired_values, dims=arr_submonthly.dims,
                           coords=arr_submonthly.coords)
    assert actual.identical(desired) 
Example #3
Source File: test_core.py    From pywr with GNU General Public License v3.0 6 votes vote down vote up
def test_storage_max_volume_zero():
    """Test a that an max_volume of zero results in a NaN for current_pc and no exception

    """

    model = Model(
        start=pandas.to_datetime('2016-01-01'),
        end=pandas.to_datetime('2016-01-01')
    )

    storage = Storage(model, 'storage', num_inputs=1, num_outputs=0)
    otpt = Output(model, 'output', max_flow=99999, cost=-99999)
    storage.connect(otpt)

    storage.max_volume = 0

    model.run()
    assert np.isnan(storage.current_pc) 
Example #4
Source File: test_files.py    From pysat with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_year_month_day_files_direct_call_to_from_os(self):
        # create a bunch of files by year and doy
        start = pysat.datetime(2008, 1, 1)
        stop = pysat.datetime(2009, 12, 31)
        create_files(self.testInst, start, stop, freq='1D', use_doy=False,
                     root_fname=''.join(('pysat_testing_junk_{year:04d}_gold_',
                                         '{day:03d}_stuff_{month:02d}.pysat_',
                                         'testing_file')))
        # use from_os function to get pandas Series of files and dates
        files = pysat.Files.from_os(data_path=self.testInst.files.data_path,
                                    format_str=''.join(('pysat_testing_junk_',
                                                        '{year:04d}_gold_',
                                                        '{day:03d}_stuff_',
                                                        '{month:02d}.pysat_',
                                                        'testing_file')))
        # check overall length
        check1 = len(files) == (365 + 366)
        # check specific dates
        check2 = pds.to_datetime(files.index[0]) == pysat.datetime(2008, 1, 1)
        check3 = pds.to_datetime(files.index[365]) == \
            pysat.datetime(2008, 12, 31)
        check4 = pds.to_datetime(files.index[-1]) == \
            pysat.datetime(2009, 12, 31)
        assert(check1 & check2 & check3 & check4) 
Example #5
Source File: test_files.py    From pysat with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_year_month_files_direct_call_to_from_os(self):
        # create a bunch of files by year and doy
        start = pysat.datetime(2008, 1, 1)
        stop = pysat.datetime(2009, 12, 31)
        create_files(self.testInst, start, stop, freq='1MS',
                     root_fname=''.join(('pysat_testing_junk_{year:04d}_gold_',
                                         'stuff_{month:02d}.pysat_testing_',
                                         'file')))
        # use from_os function to get pandas Series of files and dates
        files = pysat.Files.from_os(data_path=self.testInst.files.data_path,
                                    format_str=''.join(('pysat_testing_junk_',
                                                        '{year:04d}_gold_',
                                                        'stuff_{month:02d}.',
                                                        'pysat_testing_file')))
        # check overall length
        check1 = len(files) == 24
        # check specific dates
        check2 = pds.to_datetime(files.index[0]) == pysat.datetime(2008, 1, 1)
        check3 = pds.to_datetime(files.index[11]) == \
            pysat.datetime(2008, 12, 1)
        check4 = pds.to_datetime(files.index[-1]) == \
            pysat.datetime(2009, 12, 1)
        assert(check1 & check2 & check3 & check4) 
Example #6
Source File: universal.py    From xalpha with MIT License 6 votes vote down vote up
def get_sw_from_jq(code, start=None, end=None, **kws):
    """

    :param code: str. eg. 801180 申万行业指数
    :param start:
    :param end:
    :param kws:
    :return:
    """
    logger.debug("get sw data of %s" % code)
    df = finance.run_query(
        query(finance.SW1_DAILY_VALUATION)
        .filter(finance.SW1_DAILY_VALUATION.date >= start)
        .filter(finance.SW1_DAILY_VALUATION.date <= end)
        .filter(finance.SW1_DAILY_VALUATION.code == code)
        .order_by(finance.SW1_DAILY_VALUATION.date.asc())
    )
    df["date"] = pd.to_datetime(df["date"])
    return df 
Example #7
Source File: test_files.py    From pysat with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_year_doy_files_no_gap_in_name_direct_call_to_from_os(self):
        # create a bunch of files by year and doy
        start = pysat.datetime(2008, 1, 1)
        stop = pysat.datetime(2009, 12, 31)
        create_files(self.testInst, start, stop, freq='1D',
                     root_fname=''.join(('pysat_testing_junk_{year:04d}',
                                         '{day:03d}_stuff.pysat_testing_',
                                         'file')))
        # use from_os function to get pandas Series of files and dates
        files = pysat.Files.from_os(data_path=self.testInst.files.data_path,
                                    format_str=''.join(('pysat_testing_junk_',
                                                        '{year:04d}{day:03d}_',
                                                        'stuff.pysat_testing_',
                                                        'file')))
        # check overall length
        check1 = len(files) == (365 + 366)
        # check specific dates
        check2 = pds.to_datetime(files.index[0]) == pysat.datetime(2008, 1, 1)
        check3 = pds.to_datetime(files.index[365]) == \
            pysat.datetime(2008, 12, 31)
        check4 = pds.to_datetime(files.index[-1]) == \
            pysat.datetime(2009, 12, 31)
        assert(check1 & check2 & check3 & check4) 
Example #8
Source File: test_files.py    From pysat with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_year_doy_files_direct_call_to_from_os(self):
        # create a bunch of files by year and doy
        start = pysat.datetime(2008, 1, 1)
        stop = pysat.datetime(2009, 12, 31)
        create_files(self.testInst, start, stop, freq='1D')
        # use from_os function to get pandas Series of files and dates
        files = pysat.Files.from_os(data_path=self.testInst.files.data_path,
                                    format_str=''.join(('pysat_testing_junk_',
                                                        '{year:04d}_gold_',
                                                        '{day:03d}_stuff.',
                                                        'pysat_testing_file')))
        # check overall length
        check1 = len(files) == (365 + 366)
        # check specific dates
        check2 = pds.to_datetime(files.index[0]) == pysat.datetime(2008, 1, 1)
        check3 = pds.to_datetime(files.index[365]) == \
            pysat.datetime(2008, 12, 31)
        check4 = pds.to_datetime(files.index[-1]) == \
            pysat.datetime(2009, 12, 31)
        assert(check1 & check2 & check3 & check4) 
Example #9
Source File: universal.py    From xalpha with MIT License 6 votes vote down vote up
def _get_index_weight_range(code, start, end):
    if len(code.split(".")) != 2:
        code = _inverse_convert_code(code)
    start_obj = dt.datetime.strptime(start.replace("-", "").replace("/", ""), "%Y%m%d")
    end_obj = dt.datetime.strptime(end.replace("-", "").replace("/", ""), "%Y%m%d")
    start_m = start_obj.replace(day=1)
    if start_m < start_obj:
        start_m = start_m + relativedelta(months=1)
    end_m = end_obj.replace(day=1)
    if end_obj < end_m:
        end_m = end_m - relativedelta(months=1)
    d = start_m

    df = pd.DataFrame({"code": [], "weight": [], "display_name": [], "date": []})
    while True:
        if d > end_m:

            df["date"] = pd.to_datetime(df["date"])
            return df
        logger.debug("fetch index weight on %s for %s" % (d, code))
        df0 = get_index_weights(index_id=code, date=d.strftime("%Y-%m-%d"))
        df0["code"] = df0.index
        df = df.append(df0, ignore_index=True, sort=False)
        d = d + relativedelta(months=1) 
Example #10
Source File: fixtures.py    From pywr with GNU General Public License v3.0 6 votes vote down vote up
def simple_storage_model(request):
    """
    Make a simple model with a single Input, Storage and Output.
    
    Input -> Storage -> Output
    """

    model = pywr.core.Model(
        start=pandas.to_datetime('2016-01-01'),
        end=pandas.to_datetime('2016-01-05'),
        timestep=datetime.timedelta(1),
    )

    inpt = Input(model, name="Input", max_flow=5.0, cost=-1)
    res = Storage(model, name="Storage", num_outputs=1, num_inputs=1, max_volume=20, initial_volume=10)
    otpt = Output(model, name="Output", max_flow=8, cost=-999)
    
    inpt.connect(res)
    res.connect(otpt)
    
    return model 
Example #11
Source File: test_files.py    From pysat with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_year_month_day_hour_minute_files_direct_call_to_from_os(self):
        root_fname = ''.join(('pysat_testing_junk_{year:04d}_gold_{day:03d}_',
                              'stuff_{month:02d}_{hour:02d}{minute:02d}.',
                              'pysat_testing_file'))
        # create a bunch of files by year and doy
        start = pysat.datetime(2008, 1, 1)
        stop = pysat.datetime(2008, 1, 4)
        create_files(self.testInst, start, stop, freq='30min',
                     use_doy=False,
                     root_fname=root_fname)
        # use from_os function to get pandas Series of files and dates
        files = pysat.Files.from_os(data_path=self.testInst.files.data_path,
                                    format_str=root_fname)
        # check overall length
        check1 = len(files) == 145
        # check specific dates
        check2 = pds.to_datetime(files.index[0]) == pysat.datetime(2008, 1, 1)
        check3 = pds.to_datetime(files.index[1]) == \
            pysat.datetime(2008, 1, 1, 0, 30)
        check4 = pds.to_datetime(files.index[10]) == \
            pysat.datetime(2008, 1, 1, 5, 0)
        check5 = pds.to_datetime(files.index[-1]) == pysat.datetime(2008, 1, 4)
        assert(check1 & check2 & check3 & check4 & check5) 
Example #12
Source File: test_utils_times.py    From aospy with Apache License 2.0 6 votes vote down vote up
def test_yearly_average_no_mask():
    times = pd.to_datetime(['2000-06-01', '2000-06-15',
                            '2001-07-04', '2001-10-01', '2001-12-31',
                            '2004-01-01'])
    arr = xr.DataArray(np.random.random((len(times),)),
                       dims=[TIME_STR], coords={TIME_STR: times})
    dt = arr.copy(deep=True)
    dt.values = np.random.random((len(times),))

    actual = yearly_average(arr, dt)

    yr2000 = (arr[0]*dt[0] + arr[1]*dt[1]) / (dt[0] + dt[1])
    yr2001 = ((arr[2]*dt[2] + arr[3]*dt[3] + arr[4]*dt[4]) /
              (dt[2] + dt[3] + dt[4]))
    yr2004 = arr[-1]
    yrs_coord = [2000, 2001, 2004]
    yr_avgs = np.array([yr2000, yr2001, yr2004])
    desired = xr.DataArray(yr_avgs, dims=['year'], coords={'year': yrs_coord})
    xr.testing.assert_allclose(actual, desired) 
Example #13
Source File: test_core.py    From pywr with GNU General Public License v3.0 6 votes vote down vote up
def test_storage_max_volume_nested_param():
    """Test that a max_volume can be used with a Parameter with children.
    """

    model = Model(
        start=pandas.to_datetime('2016-01-01'),
        end=pandas.to_datetime('2016-01-01')
    )

    storage = Storage(model, 'storage', num_inputs=1, num_outputs=0)
    otpt = Output(model, 'output', max_flow=99999, cost=-99999)
    storage.connect(otpt)

    p = AggregatedParameter(model, [ConstantParameter(model, 20.0), ConstantParameter(model, 20.0)], agg_func='sum')

    storage.max_volume = p
    storage.initial_volume = 10.0

    model.setup()
    np.testing.assert_allclose(storage.current_pc, 0.25) 
Example #14
Source File: universal.py    From xalpha with MIT License 6 votes vote down vote up
def get_historical_fromhzindex(code, start, end):
    """
    华证指数源

    :param code:
    :param start:
    :param end:
    :return:
    """
    if code.startswith("HZ"):
        code = code[2:]

    r = rget_json(
        "http://www.chindices.com/index/values.val?code={code}".format(code=code)
    )
    df = pd.DataFrame(r["data"])
    df["date"] = pd.to_datetime(df["date"])
    df = df[["date", "price", "pctChange"]]
    df.rename(columns={"price": "close", "pctChange": "percent"}, inplace=True)
    df = df[::-1]
    return df 
Example #15
Source File: test_compare.py    From recordlinkage with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_date_incorrect_dtype(self):

        A = DataFrame({
            'col':
            ['2005/11/23', nan, '2004/11/23', '2010/01/10', '2010/10/30']
        })
        B = DataFrame({
            'col': [
                '2005/11/23', '2010/12/31', '2005/11/23', '2010/10/01',
                '2010/9/30'
            ]
        })
        ix = MultiIndex.from_arrays([A.index.values, B.index.values])

        A['col1'] = to_datetime(A['col'])
        B['col1'] = to_datetime(B['col'])

        comp = recordlinkage.Compare()
        comp.date('col', 'col1')
        pytest.raises(ValueError, comp.compute, ix, A, B)

        comp = recordlinkage.Compare()
        comp.date('col1', 'col')
        pytest.raises(ValueError, comp.compute, ix, A, B) 
Example #16
Source File: test_core.py    From pywr with GNU General Public License v3.0 6 votes vote down vote up
def test_storage_max_volume_param_raises():
    """Test that a max_volume can not be used with 2 levels of child parameters.
    """

    model = Model(
        start=pandas.to_datetime('2016-01-01'),
        end=pandas.to_datetime('2016-01-01')
    )

    storage = Storage(model, 'storage', num_inputs=1, num_outputs=0)
    otpt = Output(model, 'output', max_flow=99999, cost=-99999)
    storage.connect(otpt)

    p = AggregatedParameter(model, [ConstantParameter(model, 20.0), ConstantParameter(model, 20.0)], agg_func='sum')
    p1 = AggregatedParameter(model, [p, ConstantParameter(model, 1.5)], agg_func="product")

    storage.max_volume = p1
    storage.initial_volume = 10.0

    with pytest.raises(RuntimeError):
        model.run() 
Example #17
Source File: test_parameters.py    From pywr with GNU General Public License v3.0 6 votes vote down vote up
def test_daily_profile_leap_day(model):
    """Test behaviour of daily profile parameter for leap years
    """
    inpt = Input(model, "input")
    otpt = Output(model, "otpt", max_flow=None, cost=-999)
    inpt.connect(otpt)
    inpt.max_flow = DailyProfileParameter(model, np.arange(0, 366, dtype=np.float64))

    # non-leap year
    model.timestepper.start = pd.to_datetime("2015-01-01")
    model.timestepper.end = pd.to_datetime("2015-12-31")
    model.run()
    assert_allclose(inpt.flow, 365) # NOT 364

    # leap year
    model.timestepper.start = pd.to_datetime("2016-01-01")
    model.timestepper.end = pd.to_datetime("2016-12-31")
    model.run()
    assert_allclose(inpt.flow, 365) 
Example #18
Source File: test_parameters.py    From pywr with GNU General Public License v3.0 6 votes vote down vote up
def test_scenario_daily_profile_leap_day(self, simple_linear_model):
        """Test behaviour of daily profile parameter for leap years
        """

        model = simple_linear_model
        model.timestepper.start = pd.to_datetime("2016-01-01")
        model.timestepper.end = pd.to_datetime("2016-12-31")

        scenario = Scenario(model, 'A', 2)
        values = np.array([np.arange(366, dtype=np.float64), np.arange(366, 0, -1, dtype=np.float64)])

        expected_values = values.T

        p = ScenarioDailyProfileParameter(model, scenario, values)
        AssertionRecorder(model, p, expected_data=expected_values)

        model.setup()
        model.run() 
Example #19
Source File: test_compare.py    From recordlinkage with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_dates(self):

        A = DataFrame({
            'col':
            to_datetime(
                ['2005/11/23', nan, '2004/11/23', '2010/01/10', '2010/10/30'])
        })
        B = DataFrame({
            'col':
            to_datetime([
                '2005/11/23', '2010/12/31', '2005/11/23', '2010/10/01',
                '2010/9/30'
            ])
        })
        ix = MultiIndex.from_arrays([A.index.values, B.index.values])

        comp = recordlinkage.Compare()
        comp.date('col', 'col')
        result = comp.compute(ix, A, B)[0]

        expected = Series([1, 0, 0, 0.5, 0.5], index=ix, name=0)

        pdt.assert_series_equal(result, expected) 
Example #20
Source File: test_license.py    From pywr with GNU General Public License v3.0 6 votes vote down vote up
def test_simple_model_with_annual_licence_multi_year(simple_linear_model):
    """ Test the AnnualLicense over multiple years
    """
    import pandas as pd
    import datetime, calendar
    m = simple_linear_model
    # Modify model to run for 3 years of non-leap years at 30 day time-step.
    m.timestepper.start = pd.to_datetime('2017-1-1')
    m.timestepper.end = pd.to_datetime('2020-1-1')
    m.timestepper.delta = datetime.timedelta(30)

    annual_total = 365.0
    lic = AnnualLicense(m, m.nodes["Input"], annual_total)
    # Apply licence to the model
    m.nodes["Input"].max_flow = lic
    m.nodes["Output"].max_flow = 10.0
    m.nodes["Output"].cost = -10.0
    m.setup()

    for i in range(len(m.timestepper)):
        m.step()
        days_in_year = 365 + int(calendar.isleap(m.timestepper.current.datetime.year))
        assert_allclose(m.nodes["Output"].flow, annual_total/days_in_year) 
Example #21
Source File: routes.py    From thewarden with MIT License 6 votes vote down vote up
def tradedetails():
    if request.method == "GET":
        id = request.args.get("id")
        # if tradesonly is true then only look for buy and sells
        tradesonly = request.args.get("trades")
        df = pd.read_sql_table("trades", db.engine)
        # Filter only the trades for current user
        df = df[(df.user_id == current_user.username)]
        df = df[(df.trade_reference_id == id)]
        # Filter only buy and sells, ignore deposit / withdraw
        if tradesonly:
            df = df[(df.trade_operation == "B") | (df.trade_operation == "S")]
        # df['trade_date'] = pd.to_datetime(df['trade_date'])
        df.set_index("trade_reference_id", inplace=True)
        df.drop("user_id", axis=1, inplace=True)
        details = df.to_json()
        return details 
Example #22
Source File: test_recorders.py    From pywr with GNU General Public License v3.0 6 votes vote down vote up
def test_reset_timestepper_recorder():
    model = Model(
        start=pandas.to_datetime('2016-01-01'),
        end=pandas.to_datetime('2016-01-01')
    )

    inpt = Input(model, "input", max_flow=10)
    otpt = Output(model, "output", max_flow=50, cost=-10)
    inpt.connect(otpt)

    rec = NumpyArrayNodeRecorder(model, otpt)

    model.run()

    model.timestepper.end = pandas.to_datetime("2016-01-02")

    model.run() 
Example #23
Source File: info.py    From xalpha with MIT License 6 votes vote down vote up
def _fetch_csv(self, path):
        """
        fetch the information and pricetable from path+code.csv, not recommend to use manually,
        just set the fetch label to be true when init the object

        :param path:  string of folder path
        """
        try:
            content = pd.read_csv(path + self.code + ".csv")
            pricetable = content.iloc[1:]
            datel = list(pd.to_datetime(pricetable.date))
            self.price = pricetable[["netvalue", "totvalue", "comment"]]
            self.price["date"] = datel
            saveinfo = json.loads(content.iloc[0].date)
            if not isinstance(saveinfo, dict):
                raise FundTypeError("This csv doesn't looks like from fundinfo")
            self.segment = saveinfo["segment"]
            self.feeinfo = saveinfo["feeinfo"]
            self.name = saveinfo["name"]
            self.rate = saveinfo["rate"]
        except FileNotFoundError as e:
            # print('no saved copy of fund %s' % self.code)
            raise e 
Example #24
Source File: info.py    From xalpha with MIT License 6 votes vote down vote up
def _fetch_csv(self, path):
        """
        fetch the information and pricetable from path+code.csv, not recommend to use manually,
        just set the fetch label to be true when init the object

        :param path:  string of folder path
        """
        try:
            pricetable = pd.read_csv(path + self.code + ".csv")
            datel = list(pd.to_datetime(pricetable.date))
            self.price = pricetable[["netvalue", "totvalue", "comment"]]
            self.price["date"] = datel

        except FileNotFoundError as e:
            # print('no saved copy of %s' % self.code)
            raise e 
Example #25
Source File: universal.py    From xalpha with MIT License 6 votes vote down vote up
def get_portfolio_fromttjj(code, start=None, end=None):
    startobj = dt.datetime.strptime(start, "%Y%m%d")
    endobj = dt.datetime.strptime(end, "%Y%m%d")
    if (endobj - startobj).days < 90:
        return None  # note start is always 1.1 4.1 7.1 10.1 in incremental updates
    if code.startswith("F"):
        code = code[1:]
    r = rget("http://fundf10.eastmoney.com/zcpz_{code}.html".format(code=code))
    s = BeautifulSoup(r.text, "lxml")
    table = s.find("table", class_="tzxq")
    df = pd.read_html(str(table))[0]
    df["date"] = pd.to_datetime(df["报告期"])
    df["stock_ratio"] = df["股票占净比"].replace("---", "0%").apply(lambda s: _float(s[:-1]))
    df["bond_ratio"] = df["债券占净比"].replace("---", "0%").apply(lambda s: _float(s[:-1]))
    df["cash_ratio"] = df["现金占净比"].replace("---", "0%").apply(lambda s: _float(s[:-1]))
    #     df["dr_ratio"] = df["存托凭证占净比"].replace("---", "0%").apply(lambda s: xa.cons._float(s[:-1]))
    df["assets"] = df["净资产(亿元)"]
    df = df[::-1]
    return df[["date", "stock_ratio", "bond_ratio", "cash_ratio", "assets"]]


# this is the most elegant approach to dispatch get_daily, the definition can be such simple
# you actually don't need to bother on start end blah, everything is taken care of by ``cahcedio`` 
Example #26
Source File: info.py    From xalpha with MIT License 6 votes vote down vote up
def _fetch_csv(self, path):
        """
        fetch the information and pricetable from path+code.csv, not recommend to use manually,
        just set the fetch label to be true when init the object

        :param path:  string of folder path
        """
        try:
            content = pd.read_csv(path + self.code + ".csv")
            pricetable = content.iloc[1:]
            datel = list(pd.to_datetime(pricetable.date))
            self.price = pricetable[["netvalue", "totvalue", "comment"]]
            self.price["date"] = datel
            self.name = content.iloc[0].comment
        except FileNotFoundError as e:
            # print('no saved copy of %s' % self.code)
            raise e 
Example #27
Source File: universal.py    From xalpha with MIT License 5 votes vote down vote up
def get_historical_fromzzindex(code, start, end=None):
    """
    中证指数源

    :param code:
    :param start:
    :param end:
    :return:
    """
    if code.startswith("ZZ"):
        code = code[2:]
    start_obj = dt.datetime.strptime(start, "%Y%m%d")
    fromnow = (today_obj() - start_obj).days
    if fromnow < 20:
        flag = "1%E4%B8%AA%E6%9C%88"
    elif fromnow < 60:
        flag = "3%E4%B8%AA%E6%9C%88"  # 个月
    elif fromnow < 200:
        flag = "1%E5%B9%B4"  # 年
    else:
        flag = "5%E5%B9%B4"
    r = rget_json(
        "http://www.csindex.com.cn/zh-CN/indices/index-detail/\
{code}?earnings_performance={flag}&data_type=json".format(
            code=code, flag=flag
        ),
        headers={
            "Host": "www.csindex.com.cn",
            "Referer": "http://www.csindex.com.cn/zh-CN/indices/index-detail/{code}".format(
                code=code
            ),
            "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_4) AppleWebKit/537.36",
            "X-Requested-With": "XMLHttpRequest",
            "Accept": "application/json, text/javascript, */*; q=0.01",
        },
    )
    df = pd.DataFrame(r)
    df["date"] = pd.to_datetime(df["tradedate"])
    df["close"] = df["tclose"].apply(_float)
    return df[["date", "close"]] 
Example #28
Source File: process_vehicles.py    From performance_tracker with GNU General Public License v3.0 5 votes vote down vote up
def process_raw_vehicles(df, track):
    df = df.drop_duplicates(
        subset=["report_time", "latitude", "longitude", "vehicle_id"]
    )
    df = df[df["predictable"] == True]

    df["latitude"] = pd.to_numeric(df.latitude)
    df["longitude"] = pd.to_numeric(df.longitude)
    df = toGDF(df)

    mask_0 = (df["direction"] == 0) | (df["direction"] == 90)
    mask_1 = (df["direction"] == 180) | (df["direction"] == 270)
    df_0 = df.loc[mask_0]
    df_0 = df_0.assign(direction_id = 0)
    df_1 = df.loc[mask_1]
    df_1 = df_1.assign(direction_id = 1)
    df_0["relative_position"] = findRelativePositions(df_0, track[0])
    df_1["relative_position"] = findRelativePositions(df_1, track[1])
    df = pd.concat([df_0, df_1])

    df["datetime"] = pd.to_datetime(df["report_time"], utc=True)
    df["datetime_local_iso8601"] = df.report_time.apply(
        lambda dt: pendulum.parse(dt, tz="UTC")
        .in_tz("America/Los_Angeles")
        .to_iso8601_string()
    )
    df = df.reset_index(drop=True)  # necessary both before and after getTrips
    df = getTrips(df)
    df = df.reset_index(drop=True)  # necessary both before and after getTrips
    df["datetime"] = df["datetime_local_iso8601"]
    df = df[["datetime", "trip_id", "direction_id", "relative_position"]]
    return df 
Example #29
Source File: ornstein_uhlenbeck.py    From tensortrade with Apache License 2.0 5 votes vote down vote up
def ornstein(base_price: int = 1,
             base_volume: int = 1,
             start_date: str = '2010-01-01',
             start_date_format: str = '%Y-%m-%d',
             times_to_generate: int = 1000,
             time_frame: str = '1h',
             params: ModelParameters = None):

    delta = get_delta(time_frame)
    times_to_generate = scale_times_to_generate(times_to_generate, time_frame)

    params = params or default(base_price, times_to_generate, delta)

    prices = ornstein_uhlenbeck_levels(params)

    volume_gen = GaussianNoise(t=times_to_generate)
    volumes = volume_gen.sample(times_to_generate) + base_volume

    start_date = pd.to_datetime(start_date, format=start_date_format)
    price_frame = pd.DataFrame([], columns=['date', 'price'], dtype=float)
    volume_frame = pd.DataFrame([], columns=['date', 'volume'], dtype=float)

    price_frame['date'] = pd.date_range(start=start_date, periods=times_to_generate, freq="1min")
    price_frame['price'] = abs(prices)

    volume_frame['date'] = price_frame['date'].copy()
    volume_frame['volume'] = abs(volumes)

    price_frame.set_index('date')
    price_frame.index = pd.to_datetime(price_frame.index, unit='m', origin=start_date)

    volume_frame.set_index('date')
    volume_frame.index = pd.to_datetime(volume_frame.index, unit='m', origin=start_date)

    data_frame = price_frame['price'].resample(time_frame).ohlc()
    data_frame['volume'] = volume_frame['volume'].resample(time_frame).sum()

    return data_frame 
Example #30
Source File: cryptodatadownload.py    From tensortrade with Apache License 2.0 5 votes vote down vote up
def fetch_default(self, exchange_name, base_symbol, quote_symbol, timeframe, include_all_volumes=False):

        filename = "{}_{}{}_{}.csv".format(exchange_name, quote_symbol, base_symbol, timeframe)
        base_vc = "Volume {}".format(base_symbol)
        new_base_vc = "volume_base"
        quote_vc = "Volume {}".format(quote_symbol)
        new_quote_vc = "volume_quote"

        df = pd.read_csv(self.url + filename, skiprows=1)
        df = df[::-1]
        df = df.drop(["Symbol"], axis=1)
        df = df.rename({base_vc: new_base_vc, quote_vc: new_quote_vc, "Date": "date"}, axis=1)

        if "d" in timeframe:
            df["date"] = pd.to_datetime(df["date"])
        elif "h" in timeframe:
            df["date"] = pd.to_datetime(df["date"], format="%Y-%m-%d %I-%p")

        df = df.set_index("date")
        df.columns = [name.lower() for name in df.columns]
        df = df.reset_index()
        if not include_all_volumes:
            df = df.drop([new_quote_vc], axis=1)
            df = df.rename({new_base_vc: "volume"}, axis=1)
            return df
        return df