import sys
import glob

from datetime import datetime, timedelta

import traces
from traces.utils import datetime_range


def parse_iso_datetime(value):
    return datetime.strptime(value, "%Y-%m-%dT%H:%M:%S")


def read_all(pattern='data/lightbulb-*.csv'):
    """Read all of the CSVs in a directory matching the filename pattern
    as TimeSeries.

    """
    result = []
    for filename in glob.iglob(pattern):
        print('reading', filename, file=sys.stderr)
        ts = traces.TimeSeries.from_csv(
            filename,
            time_column=0,
            time_transform=parse_iso_datetime,
            value_column=1,
            value_transform=int,
            default=0,
        )
        ts.compact()
        result.append(ts)
    return result


ts_list = read_all()

total_watts = traces.TimeSeries.merge(ts_list, operation=sum)

# use distribution to look at the distribution of number of lights on
# over a month
histogram = total_watts.distribution(
    start=datetime(2016, 1, 1),
    end=datetime(2016, 2, 1),
)
print(histogram.mean())

# use distribution with mask to look at the median/lower/upper of
# lights on by hour of day, plot with your tool of choice
for hour, distribution in total_watts.distribution_by_hour_of_day():
    print(hour, distribution.quantiles([0.25, 0.5, 0.75]))

for day, distribution in total_watts.distribution_by_day_of_week():
    print(day, distribution.quantiles([0.25, 0.5, 0.75]))

# look at the typical number of lights on during business hours
# (8am-6pm) for each day in january
for t in datetime_range(datetime(2016, 1, 1), datetime(2016, 2, 1), 'days'):
    biz_start = t + timedelta(hours=8)
    biz_end = t + timedelta(hours=18)
    histogram = total_watts.distribution(start=biz_start, end=biz_end)
    print(t, histogram.quantiles([0.25, 0.5, 0.75]))

# transform time series to evenly spaced version using moving average
# instead of just sampling to avoid aliasing, and proceed to use
# statsmodels/pandas to forecast electricity usage "Modeling Time
# Series" http://tomaugspurger.github.io/modern-7-timeseries.html, in
# the Jupyter notebook it's `In [17]`.
regular = total_watts.moving_average(300, pandas=True)
print(regular)