python source code of create_graph

import datetime
import os.path
import random
import string

import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

from experiment_impact_tracker.data_utils import load_data_into_frame

SMALL_SIZE = 22
MEDIUM_SIZE = 24
BIGGER_SIZE = 26

plt.rc("font", size=SMALL_SIZE)  # controls default text sizes
plt.rc("axes", titlesize=MEDIUM_SIZE)  # fontsize of the axes title
plt.rc("axes", labelsize=MEDIUM_SIZE)  # fontsize of the x and y labels
plt.rc("xtick", labelsize=SMALL_SIZE)  # fontsize of the tick labels
plt.rc("ytick", labelsize=SMALL_SIZE)  # fontsize of the tick labels
plt.rc("legend", fontsize=MEDIUM_SIZE)  # legend fontsize
plt.rc("figure", titlesize=BIGGER_SIZE)  # fontsize of the figure title

# def random_suffix(length=4):
#     letters = string.ascii_lowercase
#     return ''.join(random.choice(letters) for i in range(stringLength))


def dateparse(time_in_secs):
    return datetime.datetime.fromtimestamp(float(time_in_secs))


# def clean(x): #2.9066 GHz
#     x = x.replace(" GHz", "")
#     return float(x)


def handle_cpu_count_adjusted_average_load(df):
    separated_df = pd.DataFrame(df["cpu_count_adjusted_average_load"].values.tolist())
    separated_df.columns = [
        "5_min_cpu_count_adjusted_average_load",
        "10_min_cpu_count_adjusted_average_load",
        "15_min_cpu_count_adjusted_average_load",
    ]

    return pd.concat([separated_df, df], axis=1)


# HZ_ACTUAL_COL = 'hz_actual'
ADJUSTED_AVERAGE_LOAD = "cpu_count_adjusted_average_load"
TIMESTAMP_COL = "timestamp"

SKIP_COLUMN = ["timestamp", "per_gpu_performance_state"]

# TODO move per_gpu_performance_state to special handler
SPECIAL_COLUMN = [ADJUSTED_AVERAGE_LOAD]

HANDLER_MAP = {ADJUSTED_AVERAGE_LOAD: handle_cpu_count_adjusted_average_load}


def create_graphs(
    input_path: str,
    output_path: str = ".",
    fig_x: int = 16,
    fig_y: int = 8,
    max_level=None,
):
    if not os.path.exists(output_path):
        os.makedirs(output_path)
    # create graph dirs
    graph_dir = str(fig_x) + "_" + str(fig_y)
    out_dir = os.path.join(output_path, graph_dir)
    # if os.path.exists(out_dir):
    #     print("{} already exists, attaching random string to the out put dir and moving on.".format(out_dir))
    #     out_dir = out_dir + '_' + random_suffix()

    os.makedirs(out_dir, exist_ok=True)
    df, json_raw = load_data_into_frame(input_path, max_level=max_level)
    # df = pd.read_csv(os.path.join(input_path, csv), sep=',', parse_dates=[0], date_parser=dateparse)
    created_paths = []
    df["timestamp"] = pd.to_datetime(df["timestamp"], unit="s")
    print("Plotting {}".format(",".join([k for k in list(df)[1:]])))

    # Do a pass for any pre-processing
    for k in list(df)[:]:
        if k in SPECIAL_COLUMN:
            df = HANDLER_MAP[k](df)

    # Then graph everything
    for k in list(df)[:]:
        if k in SKIP_COLUMN:
            continue
        try:
            df.plot(kind="line", x=TIMESTAMP_COL, y=k, figsize=(25, 8))
        except:
            print("problem plotting {}, skipping".format(k))
            continue

        path_name = os.path.join(out_dir, k + ".png")
        plt.savefig(path_name)
        plt.close("all")
        created_paths.append(path_name)
    return created_paths


def create_scatterplot_from_df(
    df, x: str, y: str, output_path: str = ".", fig_x: int = 16, fig_y: int = 8
):
    """Loads an executive summary df and creates a scatterplot from some pre-specified variables.
    
    Args:
        df ([type]): [description]
        x (str): [description]
        y (str): [description]
        output_path (str, optional): [description]. Defaults to '.'.
        fig_x (int, optional): [description]. Defaults to 16.
        fig_y (int, optional): [description]. Defaults to 8.
    """
    if not os.path.exists(output_path):
        os.makedirs(output_path)
    # create graph dirs
    graph_dir = str(fig_x) + "_" + str(fig_y)
    out_dir = os.path.join(output_path, graph_dir)
    df[x] = df[x].astype(float)
    df[y] = df[y].astype(float)
    os.makedirs(out_dir, exist_ok=True)
    a4_dims = (14, 9)
    fig, ax = plt.subplots(figsize=a4_dims)
    graph = sns.scatterplot(
        ax=ax, x=x, y=y, data=df, s=325, alpha=0.5, hue="Experiment", legend="brief"
    )  # , palette="Set1")
    box = ax.get_position()
    plt.legend(markerscale=2)
    # ax.set_position([box.x0,box.y0,box.width*0.83,box.height])
    # plt.legend(loc='upper left',bbox_to_anchor=(1,1.15))
    # plt.ylim(bottom=0.0)

    # plt.legend(loc='lower right')
    # Use regplot to plot the regression line for the whole points
    # sns.regplot(x="FPOs", y=args.y_axis_var, data=df, sizes=(250, 500),  alpha=.5, scatter=False, ax=graph.axes[2])
    path_name = os.path.join(out_dir, "{}v{}.png".format(x, y))
    plt.savefig(path_name)
    plt.close("all")
    return path_name