python source code of htsPlot

# -*- coding: utf-8 -*-
"""
Name: htsPlot.py
Author: Collin Rooney
Last Updated: 7/17/2017

This script will contain functions for plotting the output of the hts.py file
These plots will be made to look like the plots Prophet creates

Credit to Rob J. Hyndman and research partners as much of the code was developed with the help of their work
https://www.otexts.org/fpp
https://robjhyndman.com/publications/
Credit to Facebook and their fbprophet package
https://facebookincubator.github.io/prophet/
It was my intention to make some of the code look similar to certain sections in the Prophet and (Hyndman's) hts packages

"""
from matplotlib import pyplot as plt
from matplotlib.dates import MonthLocator, num2date
from matplotlib.ticker import FuncFormatter
import pandas as pd
import numpy as np
import sys

#%%
def plotNode(dictframe, column, h = 1, xlabel = 'ds', ylabel = 'y', startFrom = 0, uncertainty = False, ax = None):
    '''
    Parameters
    ------------------
    
    dictframe - (dict) The dictionary of dataframes that is the output of the hts function
    
    column - (string) column title that you want to plot
    
    h - (int) number of steps in the forecast same as input to hts function
    
    xlabel - (string) label for the graph's x axis
    
    ylabel - (string) label for the graph's y axis
    
    start_from - (int) the number of values to skip at the beginning of yhat so that you can zoom in
    
    uncertainty - (Boolean) include the prediction intervals or not
    
    ax - (axes object) any axes object thats already created that you want to pass to the plot function
    
    Returns
    ------------------
    
    plot of that node's forecast
    
    '''
    nodeToPlot = dictframe[column]
    
    if ax is None:
        fig = plt.figure(facecolor='w', figsize=(10, 6))
        ax = fig.add_subplot(111)
    else:
        fig = ax.get_figure()
    ##
    # plot the yhat forecast as a solid line and then the h-step ahead forecast as a dashed line
    ##
    ax.plot(nodeToPlot['ds'].values[startFrom:-h], nodeToPlot['yhat'][startFrom:-h], ls='-', c='#0072B2')
    ax.plot(nodeToPlot['ds'].values[-h:], nodeToPlot['yhat'][-h:], dashes = [2,1])
    ##
    # plot the cap and uncertainty if necessary
    ##
    if 'cap' in nodeToPlot:
        ax.plot(nodeToPlot['ds'].values[startFrom:], nodeToPlot['cap'][startFrom:], ls='--', c='k')
    if uncertainty:
        ax.fill_between(nodeToPlot['ds'].values[startFrom:], nodeToPlot['yhat_lower'][startFrom:],
                        nodeToPlot['yhat_upper'][startFrom:], color='#0072B2',
                        alpha=0.2)
    ax.grid(True, which='major', c='gray', ls='-', lw=1, alpha=0.2)
    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)
    fig.tight_layout()
    return fig

#%%
def plotWeekly(dictframe, ax, uncertainty, weeklyStart, color='#0072B2'):

    if ax is None:
        figW = plt.figure(facecolor='w', figsize=(10, 6))
        ax = figW.add_subplot(111)
    else:
        figW = ax.get_figure()
    ##
    # Create a list of 7 days for the x axis of the plot
    ##
    days = (pd.date_range(start='2017-01-01', periods=7) +
            pd.Timedelta(days=weeklyStart))
    ##
    # Find the weekday seasonality values for each weekday
    ##
    weekdays = dictframe.ds.dt.weekday
    ind = []
    for weekday in range(7):
        ind.append(max(weekdays[weekdays == weekday].index.tolist()))
    ##
    # Plot only one weekday each
    ##
    ax.plot(range(len(days)), dictframe['weekly'][ind], ls='-', c=color)
    ##
    # Plot uncertainty if necessary
    ##
    if uncertainty:
        ax.fill_between(range(len(days)),dictframe['weekly_lower'][ind], dictframe['weekly_upper'][ind],color=color, alpha=0.2)
    ax.grid(True, which='major', c='gray', ls='-', lw=1, alpha=0.2)
    ax.set_xticks(range(len(days)))
    ax.set_xticklabels(dictframe['ds'][ind].dt.weekday_name)
    ax.set_xlabel('Day of week')
    ax.set_ylabel('weekly')
    figW.tight_layout()
    return figW
    
def plotYearly(dictframe, ax, uncertainty, color='#0072B2'):

    if ax is None:
        figY = plt.figure(facecolor='w', figsize=(10, 6))
        ax = figY.add_subplot(111)
    else:
        figY = ax.get_figure()
    ##
    # Find the max index for an entry of each month
    ##
    months = dictframe.ds.dt.month
    ind = []
    for month in range(1,13):
        ind.append(max(months[months == month].index.tolist()))
    ##
    # Plot from the minimum of those maximums on (this will almost certainly result in only 1 year plotted)
    ##
    ax.plot(dictframe['ds'][min(ind):], dictframe['yearly'][min(ind):], ls='-', c=color)
    if uncertainty:
        ax.fill_between(dictframe['ds'].values[min(ind):], dictframe['yearly_lower'][min(ind):], dictframe['yearly_upper'][min(ind):], color=color, alpha=0.2)
    ax.grid(True, which='major', c='gray', ls='-', lw=1, alpha=0.2)
    months = MonthLocator(range(1, 13), bymonthday=1, interval=2)
    ax.xaxis.set_major_formatter(FuncFormatter(
        lambda x, pos=None: '{dt:%B} {dt.day}'.format(dt=num2date(x))))
    ax.xaxis.set_major_locator(months)
    ax.set_xlabel('Day of year')
    ax.set_ylabel('yearly')
    figY.tight_layout()
    return figY

def plotHolidays(dictframe, holidays, ax, uncertainty, color='#0072B2'):
    ##
    # This function is largely the same as the one in Prophet
    ##
    if ax is None:
        figH = plt.figure(facecolor='w', figsize=(10, 6))
        ax = figH.add_subplot(111)
    else:
        figH = ax.get_figure()
    holidayComps = holidays.holiday.unique().tolist()
    yHoliday = dictframe[holidayComps].sum(1)
    yHolidayL = dictframe[[h + '_lower' for h in holidayComps]].sum(1)
    yHolidayU = dictframe[[h + '_upper' for h in holidayComps]].sum(1)
    # NOTE the above CI calculation is incorrect if holidays overlap
    # in time. Since it is just for the visualization we will not
    # worry about it now.
    ax.plot(dictframe['ds'].values, yHoliday, ls='-',
                       c=color)
    if uncertainty:
        ax.fill_between(dictframe['ds'].values, yHolidayL, yHolidayU, color=color, alpha=0.2)
    ax.grid(True, which='major', c='gray', ls='-', lw=1, alpha=0.2)
    ax.set_xlabel('ds')
    ax.set_ylabel('holidays')
    figH.tight_layout()
    return figH

def plotTrend(dictframe, ax, uncertainty, plotCap, color='#0072B2'):
    ##
    # This function is largely the same as the one in Prophet
    ##
    if ax is None:
        figT = plt.figure(facecolor='w', figsize=(10, 6))
        ax = figT.add_subplot(111)
    else:
        figT = ax.get_figure()
    ax.plot(dictframe['ds'].values, dictframe['trend'], ls='-', c=color)
    if 'cap' in dictframe and plotCap:
        ax.plot(dictframe['ds'].values, dictframe['cap'], ls='--', c='k')
    if uncertainty:
       ax.fill_between(dictframe['ds'].values, dictframe['trend_lower'], dictframe['trend_upper'], color=color, alpha=0.2)
    ax.grid(True, which='major', c='gray', ls='-', lw=1, alpha=0.2)
    ax.set_xlabel('ds')
    ax.set_ylabel('trend')
    figT.tight_layout()
    return figT

def plotNodeComponents(dictframe, column, holidays = None, uncertainty=False, plotCap=False, weeklyStart = 0, ax=None,):
    '''
    Parameters
    ------------------
    
    dictframe - (dict) The dictionary of dataframes that is the output of the hts function
    
    column - (string) column title that you want to plot
    
    uncertainty - (Boolean) include the prediction intervals or not
    
    plot_cap - (Boolean) include the cap lines or not
    
    weekly_start - (int) an integer that specifies the first day on the x axis of the plot
    
    ax - (axes object) any axes object thats already created that you want to pass to the plot function
    
    Returns
    ------------------
    
    plot of that node's trend, seasonalities, holidays, etc.
    
    '''
    nodeToPlot = dictframe[column]
    colNames = nodeToPlot.columns.tolist()
    trend = "trend" in colNames
    if holidays is not None:
        holiday = np.any(holidays.holiday[0] in colNames)
    weekly = "weekly" in colNames
    yearly = "yearly" in colNames

    if trend:
        plotTrend(nodeToPlot, ax=ax, uncertainty=uncertainty, plotCap=plotCap)
    if holiday:
        plotHolidays(nodeToPlot, holidays=holidays, ax=ax, uncertainty=uncertainty)
    if weekly:
        plotWeekly(nodeToPlot, ax=ax, uncertainty=uncertainty, weeklyStart = weeklyStart)
    if yearly:
        plotYearly(nodeToPlot, ax=ax, uncertainty=uncertainty)
    
    return

#%%
def plotChild(dictframe, column, h = 1, xlabel = 'ds', ylabel = 'y', startFrom = 0, uncertainty = False, ax = None):
    '''
    Parameters
    ------------------
    
    dictframe - (dict) The dictionary of dataframes that is the output of the hts function
    
    column - (string) column title that you want to plot
    
    h - (int) number of steps in the forecast same as input to hts function
    
    xlabel - (string) label for the graph's x axis
    
    ylabel - (string) label for the graph's y axis
    
    start_from - (int) the number of values to skip at the beginning of yhat so that you can zoom in
    
    uncertainty - (Boolean) include the prediction intervals or not
    
    ax - (axes object) any axes object thats already created that you want to pass to the plot function
    
    Returns
    ------------------
    
    plot of that node and its children's forecast
    
    '''
    ##
    # Set the color map to brg so that there are enough dark and discernably different choices
    ##
    cmap = plt.get_cmap('tab10')
    ##
    # Find the children nodes
    ##
    colOptions = list(dictframe.keys())
    allChildren = [s for s in colOptions if column in s]
    countChildren = [s.count('_') for s in colOptions if column in s]
    if min(countChildren)+1 not in countChildren and column != "Total":
        sys.exit("the specified column doesn't have children")
    if min(countChildren)+2 not in countChildren:
        columnsToPlot = allChildren
    else:
        ind = countChildren.index(min(countChildren)+2)
        columnsToPlot = allChildren[0:ind]
    if column == 'Total':
        allChildren = [s for s in colOptions]
        countChildren = [s.count('_') for s in colOptions]
        if max(countChildren) > 0:
            ind = countChildren.index(min(countChildren)+1)
            columnsToPlot = allChildren[0:ind]
        else:
            columnsToPlot = allChildren
    ##
    # Plot the node and its children the same way as the plot_node function did it
    ##
    i = 0
    N = len(columnsToPlot)
    for column in columnsToPlot:
        nodeToPlot = dictframe[column]
        if ax is None:
            fig = plt.figure(facecolor='w', figsize=(10, 6))
            ax = fig.add_subplot(111)
        else:
            fig = ax.get_figure()
        ax.plot(nodeToPlot['ds'].values[startFrom:-h], nodeToPlot['yhat'][startFrom:-h], ls='-', c = cmap(float(i)/N), label = column)
        ax.plot(nodeToPlot['ds'].values[-h:], nodeToPlot['yhat'][-h:], dashes = [2,1], c = cmap(float(i)/N), label = '_nolegend_')
        if 'cap' in nodeToPlot:
            ax.plot(nodeToPlot['ds'].values[startFrom:], nodeToPlot['cap'][startFrom:], ls='--', c='k')
        if uncertainty:
            ax.fill_between(nodeToPlot['ds'].values[startFrom:], nodeToPlot['yhat_lower'][startFrom:],
                            nodeToPlot['yhat_upper'][startFrom:], color='#0072B2',
                            alpha=0.2)
        i+=1
    
    ax.grid(True, which='major', color='gray', ls='-', lw=1, alpha = 0.2)
    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)
    ax.legend()
    fig.tight_layout()
    
    return fig