python source code of

import os
import sys
import ast
import time
import string
import locale
import pickle
import warnings
import numpy as np
import pandas as pd
from pathlib import Path

import ServerSideExtension_pb2 as SSE

# Suppress warnings 
if not sys.warnoptions:
    warnings.simplefilter("ignore")

from sklearn import preprocessing

# Add Generated folder to module path.
PARENT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.append(os.path.join(PARENT_DIR, 'generated'))

def request_df(request_list, row_template, col_headers):
    """
    This function takes in a SSE request as a list together with a row template and column headers as lists of strings.
    Returns a Data Frame for the request.
    e.g. request_df(request_list, ['strData', 'numData', 'strData'], ['dim1', 'measure', 'kwargs'])
    """
    
    rows = [row for request_rows in request_list for row in request_rows.rows]
    outer = []
    
    for i in range(len(rows)):
        inner = []
        
        for j in range(len(row_template)):
            inner.append(getattr(rows[i].duals[j], row_template[j]))
        
        outer.append(inner)
    
    return pd.DataFrame(outer, columns=col_headers)

def get_response_rows(response, template):
    """
    Take in a list of responses and covert them to SSE.Rows based on the column type specified in template
    The template should be a list of the form: ["str", "num", "dual", ...]
    For string values use: "str"
    For numeric values use: "num"
    For dual values: "dual"
    """

    response_rows = []
    
    # For each row in the response list
    for row in response:
        i = 0
        this_row = []
        
        if len(template) > 1:        
            # For each column in the row
            for col in row:
                # Convert values to type SSE.Dual according to the template list
                if template[i] == "str":
                    if col is None:
                        col = "\x00"
                    elif type(col) is not str:
                        col = "{0:.5f}".format(col)
                    this_row.append(SSE.Dual(strData=col))
                elif template[i] == "num":
                    this_row.append(SSE.Dual(numData=col))
                elif template[i] == "dual":
                    this_row.append(SSE.Dual(strData=col, numData=col))
                i = i + 1
        else:
            # Convert values to type SSE.Dual according to the template list
            if template[0] == "str":
                if row is None:
                    row = "\x00"
                elif type(row) is not str:
                    row = "{0:.5f}".format(row)
                this_row.append(SSE.Dual(strData=row))
            elif template[0] == "num":
                this_row.append(SSE.Dual(numData=row))
            elif template[0] == "dual":
                this_row.append(SSE.Dual(strData=row, numData=row))
        
        # Group columns into a iterable and add to the the response_rows
        response_rows.append(iter(this_row))

    # Values are then structured as SSE.Rows
    response_rows = [SSE.Row(duals=duals) for duals in response_rows]

    return response_rows

def fillna(df, method="zeros"):
    """
    Fill empty values in a Data Frame with the chosen method.
    Valid options for method are: zeros, mean, median, mode
    """

    if method == "mean":
        return df.fillna(df.mean())
    elif method == "median":
        return df.fillna(df.median())
    elif method == "mode":
        return df.fillna(df.mode().iloc[0])
    elif method == "none":
        return df
    else:
        return df.fillna(0)

def get_scaler(df, missing="zeros", scaler="StandardScaler", **kwargs):
    """
    Fit a sklearn scaler on a Data Frame and return the scaler.
    Valid options for the scaler are: StandardScaler, MinMaxScaler, MaxAbsScaler, RobustScaler, QuantileTransformer
    Missing values must be dealt with before the scaling is applied. 
    Valid options specified through the missing parameter are: zeros, mean, median, mode
    """

    s = getattr(preprocessing, scaler)
    s = s(**kwargs)

    df = fillna(df, method=missing)
    
    return s.fit(df)  
    
def scale(df, missing="zeros", scaler="robust", **kwargs):
    """
    Scale values in a Data Frame using the relevant sklearn preprocessing method.
    Valid options for the scaler are: standard, minmax, maxabs, robust, quantile
    Missing values must be dealt with before the scaling is applied. 
    Valid options specified through the missing parameter are: zeros, mean, median, mode
    """
    
    scalers = {'standard':'StandardScaler', 'minmax':'MinMaxScaler', 'maxabs':'MaxAbsScaler',\
               'robust':'RobustScaler', 'quantile':'QuantileTransformer'}
    
    s = getattr(preprocessing, scalers[scaler])
    s = s(**kwargs)
    
    df = fillna(df, method=missing)
    df = pd.DataFrame(s.fit_transform(df), index=df.index, columns=df.columns)
    
    return df

def count_placeholders(series):
    """
    Count the number of null or zero values at the bottom of a series.
    """
    count = 0
    series = series.reset_index(drop=True)

    for i in range(series.size-1, -1, -1):
        if pd.isnull(series[i]) or series[i] == 0:
            count += 1
        else:
            break

    return count

def get_kwargs(str_kwargs):
    """
    Take in a string of key word arguments and return as a dictionary of key, value pairs
    The string should be in the form: 'arg1=value1,arg2=value2'
    """

    # If the argument string is empty return an empty dict
    if len(str_kwargs) == 0:
        return dict()
    
    # Remove any extra spaces and trailing commas
    args = str_kwargs.strip()
    if args[-1] == ',':
        args = args[:-1]
    
    # The parameter and values are transformed into key value pairs
    args = args.translate(str.maketrans('', '', string.whitespace)).split(",")
    kwargs = dict([arg.split("=") for arg in args])
    
    return kwargs

def get_kwargs_by_type(dict_kwargs):
    """
    Take in a dictionary of keyword arguments where values are converted to the specified data type.
    The values in the dictionary should be a string of the form: "value|type"
    e.g. {"arg1": "2|int", "arg2": "2.0|float", "arg3": "True|bool", "arg4": "string|str"}
    Dictionaries, lists and arrays are allowed with the following format:
    "x:1;y:2|dict|str|int" where str is the type for keys and int is the type for values
    "x;y;z|array|str" where str is the type of values in the array
    "1;2;3|list|int" where int is the type of the values in the list
    "0;1|tuple|int" where int is the type of the values in the tuple
    """
    
    # Dictionary used to convert argument values to the correct type
    types = {"boolean":ast.literal_eval, "bool":ast.literal_eval, "integer":atoi, "int":atoi,\
             "float":atof, "string":str, "str":str, "none":atonone, "None":atonone}
    
    result_dict = {}
    
    # Fill up the dictionary with the keyword arguments
    for k, v in dict_kwargs.items():
        # Split the value and type
        split = v.split("|")

        try:
            if len(split) == 2:      
                # Handle conversion from string to boolean
                if split[1] in ("boolean", "bool"):
                    split[0] = split[0].capitalize()
                
                # Convert the value based on the correct type
                result_dict[k] = types[split[1]](split[0])
            
            elif split[1] == "dict":
                # If the argument is a dictionary convert keys and values according to the correct types
                items = split[0].split(";")
                d = {}
                
                for i in items:
                    a,b = i.split(":")
                    
                    # Handle conversion from string to boolean
                    if split[2] in ("boolean", "bool"):
                        a = a.capitalize()
                    if split[3] in ("boolean", "bool"):
                        b = b.capitalize()
                    
                    # Handle None as an item in the dictionary
                    if b in ("None", "none"):
                        d[types[split[2]](a)] = None
                    else:
                        d[types[split[2]](a)] = types[split[3]](b)
                
                result_dict[k] = d
            
            elif split[1] in ("list", "array", "tuple"):
                # If the argument is a list, array or tuple convert keys and values according to the correct types
                items = split[0].split(";")
                l = []
                
                for i in items:
                    # Handle conversion from string to boolean
                    if split[2] in ("boolean", "bool"):
                        i = i.capitalize()

                    # Handle None as an item
                    if i in ("None", "none"):
                        l.append(None)
                    else:
                        l.append(types[split[2]](i))
                
                if split[1] == "array":
                    l = np.array(l)
                elif split[1] == "tuple":
                    l = tuple(l)
                    
                result_dict[k] = l
        except IndexError:
            err = "List index out of range. This is most likely due to incorrect syntax of keyword arguments."
            raise Exception(err)
    
    return result_dict

def get_args_by_type(str_args):
    """
    Take in a string of positional arguments and types and convert them to a list of values of the correct type.
    The string should be in the form: 'value1|type1,value2|type2'.
    e.g. '8|int, 0.5|float' would return [8, 0.5]
    """
    
    # Dictionary used to convert argument values to the correct type
    types = {"boolean":ast.literal_eval, "bool":ast.literal_eval, "integer":atoi, "int":atoi,\
             "float":atof, "string":str, "str":str}
    
    result_list = []
    
    # If the argument string is empty return an empty list
    if len(str_args) == 0:
        return list()
    
    for arg in str_args.split(","):
        # Split the value and type
        split = arg.strip().split("|")
        
        try:
            if len(split) == 2:      
                # Handle conversion from string to boolean
                if split[1] in ("boolean", "bool"):
                    split[0] = split[0].capitalize()
                
                # Convert the value based on the correct type
                result_list.append(types[split[1]](split[0]))

            elif split[1] == "dict":
                # If the argument is a dictionary convert keys and values according to the correct types
                items = split[0].split(";")
                d = {}
                
                for i in items:
                    a,b = i.split(":")
                    
                    # Handle conversion from string to boolean
                    if split[2] in ("boolean", "bool"):
                        a = a.capitalize()
                    if split[3] in ("boolean", "bool"):
                        b = b.capitalize()
                    
                    # Handle None as an item in the dictionary
                    if b == "None":
                        d[types[split[2]](a)] = None
                    else:
                        d[types[split[2]](a)] = types[split[3]](b)
                
                result_list.append(d)
            
            elif split[1] in ("list", "array", "tuple"):
                # If the argument is a list, array or tuple convert keys and values according to the correct types
                items = split[0].split(";")
                l = []
                
                for i in items:
                    # Handle conversion from string to boolean
                    if split[2] in ("boolean", "bool"):
                        i = i.capitalize()

                    # Handle None as an item in the dictionary
                    if i == "None":
                        l.append(None)
                    else:
                        l.append(types[split[2]](i))
                
                if split[1] == "array":
                    l = np.array(l)
                elif split[1] == "tuple":
                    l = tuple(l)
                    
                result_list.append(l)
            else:
                err = "Incorrect syntax for argument: '{}'. Expected 'value1|type1, value2|type2, ...'".format(str_args)
                raise Exception(err)    
        
        except IndexError:
            err = "List index out of range. This is most likely due to incorrect syntax of arguments."
            raise Exception(err)
        
    return result_list

def convert_types(n_samples, features_df, sort=True):
    """
    Convert data in n_samples to the correct data type based on the feature definitions.
    Optionally sort the data by the feature with "variable_type" set to "identifier". 
    Both n_samples and features_df must be supplied as dataframes. The columns in n_samples must be equal to rows in features_df.
    The features_df dataframe must have "name", "data_type" and "variable_type" columns.
    Accepted data_types are int, float, str, bool.
    """
    
    # Transpose the features dataframe and keep the data_types for each feature
    features_df_t = features_df.T
    features_df_t.columns = features_df_t.loc["name",:].tolist()
    dtypes = features_df_t.loc["data_type",:]
    
    # Dictionary used to convert argument values to the correct type
    types = {"boolean":ast.literal_eval, "bool":ast.literal_eval, "integer":atoi, "int":atoi,\
             "float":atof, "string":str, "str":str}
    
    # Convert columns by the corresponding data type
    for col in n_samples.columns:
        # Handle conversion from string to boolean
        if dtypes[col] in ("boolean", "bool"):
            n_samples.loc[:, col] = n_samples.loc[:, col].astype("str").apply(str.capitalize)
        
        # Convert this column to the correct type
        n_samples.loc[:, col] = n_samples.loc[:, col].apply(types[dtypes[col]])     

    # Get the unique identifier from the feature definitions
    identifier = features_df.loc[features_df["variable_type"] == "identifier"]

    # Ensure the data is sorted by the identifier if available
    if sort and len(identifier) == 1:
        n_samples = n_samples.sort_values(by=[identifier.index[0]], ascending=True)

    return n_samples

def atoi(a):
    """
    Convert a string to float.
    The string can be in the following valid regional number formats:
    4,294,967,295   4 294 967 295   4.294.967.295   4 294 967.295  
    """
    if len(a) == 0:
        return np.NaN
    
    translator = str.maketrans("", "", ",. ")
    
    return (int(a.translate(translator)))

def atof(a):
    """
    Convert a string to float.
    The string can be in the following valid regional number formats:
    4,294,967,295.00   4 294 967 295,000   4.294.967.295,000  
    """
    if len(a) == 0:
        return np.NaN
    
    del_chars = " "
    
    if a.count(",") > 1 or a.rfind(",") < a.rfind("."):
        del_chars = del_chars + ","
    
    if a.count(".") > 1 or a.rfind(",") > a.rfind("."):
        del_chars = del_chars + "."
    
    s = a.translate(str.maketrans("", "", del_chars))

    return float(s.replace(",", "."))

def atonone(a):
    """
    Return None.
    Convenience function for type conversions.
    """

    return None

def dict_to_sse_arg(d):
    """
    Converts a dictionary to the argument syntax for this SSE
    """

    # Dictionary used to convert argument values to the correct type
    types = {bool:"bool", int:"int", float:"float", str:"str"}

    # Output string
    s = ""

    # Format into the required syntax
    for k, v in d.items():
        if v is None:
            s = s + str(k) + "=None, "
        else:
            s = s + str(k) + "=" + str(v) + "|" + types[type(v)] + ", "
    
    return s[:-2]

def make_stationary(y, strategy='log', stationarity_lags=[1], array_like=True):
    """
    Make a series stationary using a logarithm or differencing.
    Valid values for the strategy parameter are 'log' or 'difference'

    The array_like parameter determines if y is expected to be multiple values or a single value.
    Note that the differencing won't be done if array_like=False.

    By default the difference will be done with lag = 1. Alternate lags can be provided by passing a list of stationarity_lags.
    e.g. stationarity_lags=[1, 12]
    """

    y_transform = y

    # Apply a logarithm to make the array stationary
    if strategy == 'log':
        y_transform = np.log(y)

    # Apply stationarity lags by differencing the array
    elif strategy == 'difference' and array_like:
        y_diff = y_transform.copy()
        len_y = len(y_diff)

        for i in range(max(stationarity_lags), len_y):
            for lag in stationarity_lags:
                if isinstance(y_diff, (pd.Series, pd.DataFrame)):
                    y_diff.iloc[i] = y_diff.iloc[i] - y_transform.iloc[i - lag]
                else:
                    y_diff[i] = y_diff[i] - y_transform[i - lag]

        # Remove targets with insufficient lag periods
        # NOTE: The corresponding samples will need to be dropped at this function call's origin
        if isinstance(y_diff, (pd.Series, pd.DataFrame)):
            y_transform = y_diff.iloc[max(stationarity_lags):]
        else:
            y_transform = y_diff[max(stationarity_lags):]

    return y_transform

def add_lags(df, lag=1, extrapolate=1, dropna=True, suffix="t"):
    """
    Take in a 2D DataFrame (n_samples by n_features) and create a new DataFrame with lag observations added to it.
    E.g. If lags=2, the previous two observations will be concatenated as inputs to each sample.
    The extrapolate argument can be used to add current (=1) and future observations (>1).
    Samples without enough lags can be retained or dropped using the dropna argument.
    The input DataFrame is assumed to be pre-sorted.
    """
    
    n_features = df.shape[1]
    
    cols, names = list(), list()
    
    # input sequence (t-n, ... t-1)
    for i in range(lag, 0, -1):
        cols.append(df.shift(i))
        names += ["{0}({1}-{2})".format(df.columns[j], suffix, i) for j in range(n_features)]
    
    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, extrapolate):
        cols.append(df.shift(-i))
        if i == 0:
            names += ["{0}({1})".format(df.columns[j], suffix) for j in range(n_features)]
        else:
            names += ["{0}({1}+{2})".format(df.columns[j], suffix, i) for j in range(n_features)]
    
    # Concatenate the shifted DataFrames
    agg = pd.concat(cols, axis=1)
    # Add the concatenated column names
    agg.columns = names
    
    # drop rows with NaN values
    if dropna:
        agg.dropna(inplace=True)
    
    return agg

def vectorize_array(y, steps=1, return_type='df'):
    """
    Take in an array like sequence of values. 
    Vectorize the array and add steps from further in the series to each item.
    The values in the input array must be of shape [n, 1].
    Rows with nan values in the result array will be dropped.
    If return_type is 'df' return a dataframe, otherwise return a numpy array.

    E.g. vectorize_array([1,2,3,4,5], steps=2) == array([[1,2], [2,3], [3,4], [4,5]], dtype=int64)
    In the example the last row is dropped as it results in [5, nan]
    """
    if isinstance(y, pd.DataFrame):
        assert len(y.columns) == 1, "This function is built for array like structures. Got input with shape {}".format(y.shape)
        y_transform = y.copy().rename(lambda x: 'y', axis='columns')        
    else:
        y_transform = pd.DataFrame(y, columns=['y'])

    dtype = y_transform['y'].dtype

    for i in range(1, steps):
        y_transform['y+{}'.format(i)] = y_transform['y'].shift(-1*i)
    
    y_transform = y_transform.dropna().astype(dtype)

    if return_type.lower() != 'df':
        return y_transform.values
    
    return y_transform

def decode(y, labels={}):
    """
    Take in a numpy array and a dictionary of labels.
    Maps the items in the array to the labels.
    """

    # Setup a decoding function based on the dictionary of labels
    decode = lambda x, d={}: round(x) if round(x) not in d else d[round(x)]
    # Store the shape of the array
    shape = y.shape
    # Flatten the array and perform the decoding 
    y = np.array([decode(atof(x), labels) for x in y.ravel()])
    # Reshape the array back to it's original form
    y = np.reshape(y, shape)

    return y

def lock(filepath, wait=2, retries=2):
    """
    Create a lockfile for a specified file.
    If the file is already locked, wait for a specified number of seconds.
    Retry a maximum number of times before timing out.
    """

    # Create a path for the lock file
    f_lock = filepath + '.lock'

    for _ in range(retries):
        # If the file is currently locked
        if Path(f_lock).exists():
            # Wait a few seconds and check again
            time.sleep(wait)
        else:
            # Write a lock file
            with open(f_lock, 'wb') as file:
                pickle.dump(f_lock, file)
                break  
    else:
        # If the file is still locked after maximum retries raise an exception
        if Path(f_lock).exists():
            raise TimeoutError("The specified file is locked. If you believe this to be wrong, please delete file {0}".format(f_lock))
    
    return True

def unlock(filepath):
    """
    Unlock a file locked with the lock method.
    """
    
    # Create a path for the lock file
    f_lock = filepath + '.lock'
    # Delete the lock file
    if Path(f_lock).exists():
        Path(f_lock).unlink()
     
    return True