# -*- coding: utf-8 -*- # Import necessary modules import numpy as np import numpy.linalg as npla import hoggorm.statTools as st import hoggorm.cross_val as cv class nipalsPCR: """ This class carries out Principal Component Regression for two arrays using NIPALS algorithm. PARAMETERS ---------- arrX : numpy array This is X in the PCR model. Number and order of objects (rows) must match those of ``arrY``. arrY : numpy array This is Y in the PCR model. Number and order of objects (rows) must match those of ``arrX``. numComp : int, optional An integer that defines how many components are to be computed. If not provided, the maximum possible number of components is used. Xstand : boolean, optional Defines whether variables in ``arrX`` are to be standardised/scaled or centered. False : columns of ``arrX`` are mean centred (default) ``Xstand = False`` True : columns of ``arrX`` are mean centred and devided by their own standard deviation ``Xstand = True`` Ystand : boolean, optional Defines whether variables in ``arrY`` are to be standardised/scaled or centered. False : columns of ``arrY`` are mean centred (default) ``Ystand = False`` True : columns of ``arrY`` are mean centred and devided by their own standard deviation ``Ystand = True`` cvType : list, optional The list defines cross validation settings when computing the PCA model. Note if `cvType` is not provided, cross validation will not be performed and as such cross validation results will not be available. Choose cross validation type from the following: loo : leave one out / a.k.a. full cross validation (default) ``cvType = ["loo"]`` KFold : leave out one fold or segment ``cvType = ["KFold", numFolds]`` numFolds: int Number of folds or segments lolo : leave one label out ``cvType = ["lolo", labelsList]`` labelsList: list Sequence of lables. Must be same lenght as number of rows in ``arrX`` and ``arrY``. Leaves out objects with same lable. RETURNS ------- class A class that contains the PCR model and computational results EXAMPLES -------- First import the hoggormpackage >>> import hoggorm as ho Import your data into a numpy array. >>> np.shape(my_X_data) (14, 292) >>> np.shape(my_Y_data) (14, 5) Examples of how to compute a PCR model using different settings for the input parameters. >>> model = ho.nipalsPCR(arrX=my_X_data, arrY=my_Y_data, numComp=5) >>> model = ho.nipalsPCR(arrX=my_X_data, arrY=my_Y_data) >>> model = ho.nipalsPCR(arrX=my_X_data, arrY=my_Y_data, numComp=3, Ystand=True) >>> model = ho.nipalsPCR(arrX=my_X_data, arrY=my_Y_data, Xstand=False, Ystand=True) >>> model = ho.nipalsPCR(arrX=my_X_data, arrY=my_Y_data, cvType=["loo"]) >>> model = ho.nipalsPCR(arrX=my_X_data, arrY=my_Y_data, cvType=["KFold", 7]) >>> model = ho.nipalsPCR(arrX=my_X_data, arrY=my_Y_data, cvType=["lolo", [1, 2, 3, 4, 5, 6, 7, 1, 2, 3, 4, 5, 6, 7]]) Examples of how to extract results from the PCR model. >>> X_scores = model.X_scores() >>> X_loadings = model.X_loadings() >>> Y_loadings = model.Y_loadings() >>> X_cumulativeCalibratedExplainedVariance_allVariables = model.X_cumCalExplVar_indVar() >>> Y_cumulativeValidatedExplainedVariance_total = model.Y_cumCalExplVar() """ def __init__(self, arrX, arrY, numComp=None, Xstand=False, Ystand=False, cvType=None): """ On initialisation check how arrX and arrY are to be pre-processed (parameters Xstand and Ystand are either True or False). Then check whether number of components chosen by user is OK. """ # =============================================================================== # Check what is provided by user for PCA-part of PCR # =============================================================================== # Define X and Y within class such that the data can be accessed from # all attributes in class. self.arrX_input = arrX self.arrY_input = arrY # Check whether cvType is provided. If NOT, then no cross validation # is carried out. self.cvType = cvType # Define maximum number of components to compute depending on whether # cross validation was selected or not. if isinstance(self.cvType, type(None)): maxNumPC = min(np.shape(self.arrX_input)) else: # Depict the number of components that are possible to compute based # on size of data set (#rows, #cols), type of cross validation (i.e. # size of CV segments) numObj = np.shape(self.arrX_input)[0] # Compute the sizes of training sets in CV if self.cvType[0] == "loo": cvComb = cv.LeaveOneOut(numObj) elif self.cvType[0] == "KFold": cvComb = cv.KFold(numObj, k=self.cvType[1]) elif self.cvType[0] == "lolo": cvComb = cv.LeaveOneLabelOut(self.cvType[1]) else: print('Requested form of cross validation is not available') pass # First devide into combinations of training and test sets. Collect # sizes of training sets, since this also may limit the number of # components that can be computed. segSizes = [] for train_index, test_index in cvComb: x_train, x_test = cv.split(train_index, test_index, self.arrX_input) y_train, y_test = cv.split(train_index, test_index, self.arrY_input) segSizes.append(numObj - sum(train_index)) # Compute the max number of components based on only object size maxN = numObj - max(segSizes) - 1 # Choose whatever is smaller, number of variables or maxN maxNumPC = min(np.shape(arrX)[1], maxN) # Now set the number of components that is possible to compute. if numComp is None: self.numPC = maxNumPC else: if numComp > maxNumPC: self.numPC = maxNumPC else: self.numPC = numComp # Pre-process data according to user request. # ------------------------------------------- # Check whether standardisation of X and Y are requested by user. If # NOT, then X and y are centred by default. self.Xstand = Xstand self.Ystand = Ystand # Standardise X if requested by user, otherwise center X. if self.Xstand: self.Xmeans = np.average(self.arrX_input, axis=0) self.Xstd = np.std(self.arrX_input, axis=0, ddof=1) self.arrX = (self.arrX_input - self.Xmeans) / self.Xstd else: self.Xmeans = np.average(self.arrX_input, axis=0) self.arrX = self.arrX_input - self.Xmeans # Standardise Y if requested by user, otherwise center Y. if self.Ystand: self.Ymeans = np.average(self.arrY_input, axis=0) self.Ystd = np.std(self.arrY_input, axis=0, ddof=1) self.arrY = (self.arrY_input - self.Ymeans) / self.Ystd else: self.Ymeans = np.average(self.arrY_input, axis=0) self.arrY = self.arrY_input - self.Ymeans # Before PLS2 NIPALS algorithm starts initiate and lists in which # results will be stored. self.X_scoresList = [] self.Y_scoresList = [] self.X_loadingsList = [] self.Y_loadingsList = [] self.X_loadingsWeightsList = [] self.coeffList = [] self.Y_residualsList = [self.arrY] self.X_residualsList = [self.arrX] # Collect residual matrices/arrays after each computed PC self.resids = {} self.X_residualsDict = {} # Collect predicted matrices/array Xhat after each computed PC self.calXhatDict_singPC = {} # Collect explained variance in each PC self.calExplainedVariancesDict = {} self.X_calExplainedVariancesList = [] # =============================================================================== # Here the NIPALS PCA algorithm on X starts # =============================================================================== threshold = 1.0e-8 X_new = self.arrX.copy() # Compute number of principal components as specified by user for j in range(self.numPC): # Check if first column contains only zeros. If yes, then # NIPALS will not converge and (npla.norm(num) will contain # nan's). Rather put in other starting values. if not np.any(X_new[:, 0]): X_repl_nonCent = np.arange(np.shape(X_new)[0]) X_repl = X_repl_nonCent - np.mean(X_repl_nonCent) t = X_repl.reshape(-1,1) else: t = X_new[:,0].reshape(-1,1) # Iterate until score vector converges according to threshold while 1: num = np.dot(np.transpose(X_new), t) denom = npla.norm(num) p = num / denom t_new = np.dot(X_new, p) diff = t - t_new t = t_new.copy() SS = np.sum(np.square(diff)) # Check whether sum of squares is smaller than threshold. Break # out of loop if true and start computation of next PC. if SS < threshold: self.X_scoresList.append(t) self.X_loadingsList.append(p) break # Peel off information explained by actual componentand continue with # decomposition on the residuals (X_new = E). X_old = X_new.copy() Xhat_j = np.dot(t, np.transpose(p)) X_new = X_old - Xhat_j # Store residuals E and Xhat in their dictionaries self.X_residualsDict[j+1] = X_new self.calXhatDict_singPC[j+1] = Xhat_j if self.Xstand: self.calXhatDict_singPC[j+1] = (Xhat_j * self.Xstd) + self.Xmeans else: self.calXhatDict_singPC[j+1] = Xhat_j + self.Xmeans # Collect scores and loadings for the actual PC. self.arrT = np.hstack(self.X_scoresList) self.arrP = np.hstack(self.X_loadingsList) # Compute Y loadings by using MLR (see Module 6, Equ. 6.8 ++) term_1 = npla.inv(np.dot(np.transpose(self.arrT), self.arrT)) term_2 = np.dot(np.transpose(self.arrT), self.arrY) self.arrQ = np.transpose(np.dot(term_1, term_2)) # ============================================================================== # From here computation of CALIBRATED explained variance starts # ============================================================================== # ========== COMPUTATIONS FOR X ========== # --------------------------------------------------------------------- # Create a list holding arrays of Xhat predicted calibration after each # component. Xhat is computed with Xhat = T*P' self.calXpredList = [] # Compute Xhat for 1 and more components (cumulatively). for ind in range(1,self.numPC+1): part_arrT = self.arrT[:,0:ind] part_arrP = self.arrP[:,0:ind] predXcal = np.dot(part_arrT, np.transpose(part_arrP)) if self.Xstand: Xhat = (predXcal * self.Xstd) + self.Xmeans else: Xhat = predXcal + self.Xmeans self.calXpredList.append(Xhat) # --------------------------------------------------------------------- # --------------------------------------------------------------------- # Collect all PRESSE for individual variables in a dictionary. # Keys represent number of component. self.PRESSEdict_indVar_X = {} # Compute PRESS for calibration / estimation PRESSE_0_indVar_X = np.sum(np.square(st.center(self.arrX_input)), axis=0) self.PRESSEdict_indVar_X[0] = PRESSE_0_indVar_X # Compute PRESS for each Xhat for 1, 2, 3, etc number of components # and compute explained variance for ind, Xhat in enumerate(self.calXpredList): diffX = self.arrX_input - Xhat PRESSE_indVar_X = np.sum(np.square(diffX), axis=0) self.PRESSEdict_indVar_X[ind+1] = PRESSE_indVar_X # Now store all PRESSE values into an array. Then compute MSEE and # RMSEE. self.PRESSEarr_indVar_X = np.array(list(self.PRESSEdict_indVar_X.values())) self.MSEEarr_indVar_X = self.PRESSEarr_indVar_X / np.shape(self.arrX_input)[0] self.RMSEEarr_indVar_X = np.sqrt(self.MSEEarr_indVar_X) # --------------------------------------------------------------------- # --------------------------------------------------------------------- # Compute explained variance for each variable in X using the # MSEP for each variable. Also collect PRESSE, MSEE, RMSEE in # their respective dictionaries for each variable. Keys represent # now variables and NOT components as above with # self.PRESSEdict_indVar_X self.cumCalExplVarXarr_indVar = np.zeros(np.shape(self.MSEEarr_indVar_X)) MSEE_0_indVar_X = self.MSEEarr_indVar_X[0,:] for ind, MSEE_indVar_X in enumerate(self.MSEEarr_indVar_X): explVar = (MSEE_0_indVar_X - MSEE_indVar_X) / MSEE_0_indVar_X * 100 self.cumCalExplVarXarr_indVar[ind] = explVar self.PRESSE_indVar_X = {} self.MSEE_indVar_X = {} self.RMSEE_indVar_X = {} self.cumCalExplVarX_indVar = {} for ind in range(np.shape(self.PRESSEarr_indVar_X)[1]): self.PRESSE_indVar_X[ind] = self.PRESSEarr_indVar_X[:,ind] self.MSEE_indVar_X[ind] = self.MSEEarr_indVar_X[:,ind] self.RMSEE_indVar_X[ind] = self.RMSEEarr_indVar_X[:,ind] self.cumCalExplVarX_indVar[ind] = self.cumCalExplVarXarr_indVar[:,ind] # --------------------------------------------------------------------- # --------------------------------------------------------------------- # Collect total PRESSE across all variables in a dictionary. Also, # compute total calibrated explained variance in Y. self.PRESSE_total_dict_X = {} self.PRESSE_total_list_X = np.sum(self.PRESSEarr_indVar_X, axis=1) for ind, PRESSE_X in enumerate(self.PRESSE_total_list_X): self.PRESSE_total_dict_X[ind] = PRESSE_X # --------------------------------------------------------------------- # --------------------------------------------------------------------- # Collect total MSEE across all variables in a dictionary. Also, # compute total validated explained variance in X. self.MSEE_total_dict_X = {} self.MSEE_total_list_X = np.sum(self.MSEEarr_indVar_X, axis=1) / np.shape(self.arrX_input)[1] MSEE_0_X = self.MSEE_total_list_X[0] # Compute total calibrated explained variance in X self.XcumCalExplVarList = [] if not self.Xstand: for ind, MSEE_X in enumerate(self.MSEE_total_list_X): perc = (MSEE_0_X - MSEE_X) / MSEE_0_X * 100 self.MSEE_total_dict_X[ind] = MSEE_X self.XcumCalExplVarList.append(perc) else: self.XcumCalExplVarArr = np.average(self.cumCalExplVarXarr_indVar, axis=1) self.XcumCalExplVarList = list(self.XcumCalExplVarArr) # Construct list with total validated explained variance in X self.XcalExplVarList = [] for ind, item in enumerate(self.XcumCalExplVarList): if ind == len(self.XcumCalExplVarList)-1: break explVarComp = self.XcumCalExplVarList[ind+1] - self.XcumCalExplVarList[ind] self.XcalExplVarList.append(explVarComp) # Construct a dictionary that holds predicted X (Xhat) from calibration # for each number of components. self.calXpredDict = {} for ind, item in enumerate(self.calXpredList): self.calXpredDict[ind+1] = item # --------------------------------------------------------------------- # --------------------------------------------------------------------- # Compute total RMSEE and store values in a dictionary and list. self.RMSEE_total_dict_X = {} self.RMSEE_total_list_X = np.sqrt(self.MSEE_total_list_X) for ind, RMSEE_X in enumerate(self.RMSEE_total_list_X): self.RMSEE_total_dict_X[ind] = RMSEE_X # --------------------------------------------------------------------- # ========== COMPUTATIONS FOR Y ============ # --------------------------------------------------------------------- # Create a list holding arrays of Yhat predicted calibration after each # component. Yhat is computed with Yhat = T*Chat*Q' self.calYpredList = [] for ind in range(1, self.numPC+1): x_scores = self.arrT[:,0:ind] y_loadings = self.arrQ[:,0:ind] # c_regrCoeff = self.arrC[0:ind,0:ind] # Depending on whether Y was standardised or not compute Yhat # accordingly. if self.Ystand: Yhat_stand = np.dot(x_scores, np.transpose(y_loadings)) Yhat = (Yhat_stand * self.Ystd.reshape(1,-1)) + self.Ymeans.reshape(1,-1) else: Yhat = np.dot(x_scores, np.transpose(y_loadings)) + self.Ymeans.reshape(1,-1) self.calYpredList.append(Yhat) # Compute Y residuals and store in list self.Y_residualsList.append(self.arrY - Yhat) # --------------------------------------------------------------------- # --------------------------------------------------------------------- # Collect all PRESSE for individual variables in a dictionary. # Keys represent number of component. self.PRESSEdict_indVar = {} # Compute PRESS for calibration / estimation PRESSE_0_indVar = np.sum(np.square(st.center(self.arrY_input)), axis=0) self.PRESSEdict_indVar[0] = PRESSE_0_indVar # Compute PRESS for each Yhat for 1, 2, 3, etc number of components # and compute explained variance for ind, Yhat in enumerate(self.calYpredList): diffY = self.arrY_input - Yhat PRESSE_indVar = np.sum(np.square(diffY), axis=0) self.PRESSEdict_indVar[ind+1] = PRESSE_indVar # Now store all PRESSE values into an array. Then compute MSEE and # RMSEE. self.PRESSEarr_indVar = np.array(list(self.PRESSEdict_indVar.values())) self.MSEEarr_indVar = self.PRESSEarr_indVar / np.shape(self.arrY_input)[0] self.RMSEEarr_indVar = np.sqrt(self.MSEEarr_indVar) # --------------------------------------------------------------------- # --------------------------------------------------------------------- # Compute explained variance for each variable in Y using the # MSEP for each variable. Also collect PRESSE, MSEE, RMSEE in # their respective dictionaries for each variable. Keys represent # now variables and NOT components as above with # self.PRESSEdict_indVar self.cumCalExplVarYarr_indVar = np.zeros(np.shape(self.MSEEarr_indVar)) MSEE_0_indVar = self.MSEEarr_indVar[0,:] for ind, MSEE_indVar in enumerate(self.MSEEarr_indVar): explVar = (MSEE_0_indVar - MSEE_indVar) / MSEE_0_indVar * 100 self.cumCalExplVarYarr_indVar[ind] = explVar self.PRESSE_indVar = {} self.MSEE_indVar = {} self.RMSEE_indVar = {} self.cumCalExplVarY_indVar = {} for ind in range(np.shape(self.PRESSEarr_indVar)[1]): self.PRESSE_indVar[ind] = self.PRESSEarr_indVar[:,ind] self.MSEE_indVar[ind] = self.MSEEarr_indVar[:,ind] self.RMSEE_indVar[ind] = self.RMSEEarr_indVar[:,ind] self.cumCalExplVarY_indVar[ind] = self.cumCalExplVarYarr_indVar[:,ind] # --------------------------------------------------------------------- # --------------------------------------------------------------------- # Collect total PRESSE across all variables in a dictionary. Also, # compute total calibrated explained variance in Y. self.PRESSE_total_dict = {} self.PRESSE_total_list = np.sum(self.PRESSEarr_indVar, axis=1) for ind, PRESSE in enumerate(self.PRESSE_total_list): self.PRESSE_total_dict[ind] = PRESSE # --------------------------------------------------------------------- # --------------------------------------------------------------------- # Collect total MSEE across all variables in a dictionary. Also, # compute total calibrated explained variance in Y. self.MSEE_total_dict = {} self.MSEE_total_list = np.sum(self.MSEEarr_indVar, axis=1) / np.shape(self.arrY_input)[1] MSEE_0 = self.MSEE_total_list[0] # Compute total calibrated explained variance in Y self.YcumCalExplVarList = [] if not self.Ystand: for ind, MSEE in enumerate(self.MSEE_total_list): perc = (MSEE_0 - MSEE) / MSEE_0 * 100 self.MSEE_total_dict[ind] = MSEE self.YcumCalExplVarList.append(perc) else: self.YcumCalExplVarArr = np.average(self.cumCalExplVarYarr_indVar, axis=1) self.YcumCalExplVarList = list(self.YcumCalExplVarArr) # Construct list with total validated explained variance in Y self.YcalExplVarList = [] for ind, item in enumerate(self.YcumCalExplVarList): if ind == len(self.YcumCalExplVarList)-1: break explVarComp = self.YcumCalExplVarList[ind+1] - self.YcumCalExplVarList[ind] self.YcalExplVarList.append(explVarComp) # Construct a dictionary that holds predicted Y (Yhat) from calibration # for each number of components. self.calYpredDict = {} for ind, item in enumerate(self.calYpredList): self.calYpredDict[ind+1] = item # --------------------------------------------------------------------- # --------------------------------------------------------------------- # Compute total RMSEP and store values in a dictionary and list. self.RMSEE_total_dict = {} self.RMSEE_total_list = np.sqrt(self.MSEE_total_list) for ind, RMSEE in enumerate(self.RMSEE_total_list): self.RMSEE_total_dict[ind] = RMSEE # --------------------------------------------------------------------- # ============================================================================== # From here cross validation procedure starts # ============================================================================== if self.cvType is not None: numObj = np.shape(self.arrX)[0] if self.cvType[0] == "loo": print("loo") cvComb = cv.LeaveOneOut(numObj) elif self.cvType[0] == "KFold": print("KFold") cvComb = cv.KFold(numObj, k=self.cvType[1]) elif self.cvType[0] == "lolo": print("lolo") cvComb = cv.LeaveOneLabelOut(self.cvType[1]) else: print('Requested form of cross validation is not available') # Collect predicted y (i.e. yhat) for each CV segment in a # dictionary according to nubmer of PC self.valYpredDict = {} for ind in range(1, self.numPC+1): self.valYpredDict[ind] = np.zeros(np.shape(self.arrY_input)) # Collect predicted x (i.e. xhat) for each CV segment in a # dictionary according to number of PC self.valXpredDict = {} for ind in range(1, self.numPC+1): self.valXpredDict[ind] = np.zeros(np.shape(self.arrX_input)) # Collect train and test set in dictionaries for each componentand put # them in this list. self.cvTrainAndTestDataList = [] # Collect: validation X scores T, validation X loadings P, # validation Y scores U, validation Y loadings Q, # validation X loading weights W and scores regression coefficients C # in lists for each PC self.val_arrTlist = [] self.val_arrPlist = [] self.val_arrQlist = [] # Collect train and test set in a dictionary for each component self.cvTrainAndTestDataList = [] self.X_train_means_list = np.zeros(np.shape(self.arrX_input)) self.Y_train_means_list = np.zeros(np.shape(self.arrY_input)) # First devide into combinations of training and test sets for train_index, test_index in cvComb: X_train, X_test = cv.split(train_index, test_index, self.arrX_input) Y_train, Y_test = cv.split(train_index, test_index, self.arrY_input) subDict = {} subDict['x train'] = X_train subDict['x test'] = X_test subDict['y train'] = Y_train subDict['y test'] = Y_test self.cvTrainAndTestDataList.append(subDict) # ------------------------------------------------------------- # Center or standardise X according to users choice if self.Xstand: X_train_mean = np.average(X_train, axis=0).reshape(1,-1) X_train_std = np.std(X_train, axis=0, ddof=1).reshape(1,-1) X_train_proc = (X_train - X_train_mean) / X_train_std # Standardise X test using mean and STD from training set X_test_proc = (X_test - X_train_mean) / X_train_std else: X_train_mean = np.average(X_train, axis=0).reshape(1,-1) X_train_proc = X_train - X_train_mean # Center X test using mean from training set X_test_proc = X_test - X_train_mean # ------------------------------------------------------------- self.X_train_means_list[test_index,] = X_train_mean # ------------------------------------------------------------- # Center or standardise Y according to users choice if self.Ystand: Y_train_mean = np.average(Y_train, axis=0) Y_train_std = np.std(Y_train, axis=0, ddof=1) Y_train_proc = (Y_train - Y_train_mean) / Y_train_std else: Y_train_mean = np.average(Y_train, axis=0) Y_train_proc = Y_train - Y_train_mean # ------------------------------------------------------------- self.Y_train_means_list[test_index,] = Y_train_mean # Here the NIPALS PCA algorithm starts # ------------------------------------ threshold = 1.0e-8 X_new = X_train_proc.copy() # Collect scores and loadings in lists that will be later converted # to arrays. scoresList = [] loadingsList = [] # Compute number of principal components as specified by user for j in range(self.numPC): # Check if first column contains only zeros. If yes, then # NIPALS will not converge and (npla.norm(num) will contain # nan's). Rather put in other starting values. if not np.any(X_new[:, 0]): X_repl_nonCent = np.arange(np.shape(X_new)[0]) X_repl = X_repl_nonCent - np.mean(X_repl_nonCent) t = X_repl.reshape(-1,1) else: t = X_new[:,0].reshape(-1,1) # Iterate until score vector converges according to threshold while 1: num = np.dot(np.transpose(X_new), t) denom = npla.norm(num) p = num / denom t_new = np.dot(X_new, p) diff = t - t_new t = t_new.copy() SS = np.sum(np.square(diff)) # Check whether sum of squares is smaller than threshold. Break # out of loop if true and start computation of next PC. if SS < threshold: scoresList.append(t) loadingsList.append(p) break # Peel off information explained by actual component and continue with # decomposition on the residuals (X_new = E). X_old = X_new.copy() Xhat_j = np.dot(t, np.transpose(p)) X_new = X_old - Xhat_j # Collect X scores and X loadings for the actual PC. valT = np.hstack(scoresList) valP = np.hstack(loadingsList) self.val_arrTlist.append(valT) self.val_arrPlist.append(valP) # Compute Y loadings term_1 = npla.inv(np.dot(np.transpose(valT),valT)) term_2 = np.dot(np.transpose(valT),Y_train_proc) valQ = np.transpose(np.dot(term_1,term_2)) self.val_arrQlist.append(valQ) # Compute the scores for the left out object projT = np.dot(X_test_proc, valP) dims = np.shape(projT)[1] # Construct validated predicted X first for one component, # then two, three, etc for ind in range(0, dims): # part_projT = projT[:,0:ind+1].reshape(1,-1) part_projT = projT[:,0:ind+1] part_valP = valP[:,0:ind+1] valPredX_proc = np.dot(part_projT, np.transpose(part_valP)) part_valQ = valQ[:,0:ind+1] valPredY_proc = np.dot(part_projT, np.transpose(part_valQ)) # Depending on preprocessing re-process in same manner # in order to get values that compare to original values. if self.Xstand: valPredX = (valPredX_proc * X_train_std) + X_train_mean else: valPredX = valPredX_proc + X_train_mean self.valXpredDict[ind+1][test_index,] = valPredX if self.Ystand: valPredY = (valPredY_proc * Y_train_std) + Y_train_mean else: valPredY = valPredY_proc + Y_train_mean self.valYpredDict[ind+1][test_index,] = valPredY # Put all predicitons into an array that corresponds to the # original variable self.valXpredList = [] valPreds = self.valXpredDict.values() for preds in valPreds: pc_arr = np.vstack(preds) self.valXpredList.append(pc_arr) # Put all predicitons into an array that corresponds to the # original variable self.valYpredList = [] valPreds = self.valYpredDict.values() for preds in valPreds: pc_arr = np.vstack(preds) self.valYpredList.append(pc_arr) # ============================================================================== # From here VALIDATED explained variance is computed # ============================================================================== # ========== Computations for X ========== # ----------------------------------------------------------------- # Compute PRESSCV (PRediction Error Sum of Squares) for cross # validation self.valXpredList = self.valXpredDict.values() # Collect all PRESS in a dictionary. Keys represent number of # component. self.PRESSCVdict_indVar_X = {} # First compute PRESSCV for zero components self.PRESSCV_0_indVar_X = np.sum(np.square(self.arrX_input-self.X_train_means_list), axis=0) self.PRESSCVdict_indVar_X[0] = self.PRESSCV_0_indVar_X # Compute PRESSCV for each Yhat for 1, 2, 3, etc number of # components and compute explained variance for ind, Xhat in enumerate(self.valXpredList): # diffX = self.arrX_input - Xhat diffX = self.arrX_input - Xhat PRESSCV_indVar_X = np.sum(np.square(diffX), axis=0) self.PRESSCVdict_indVar_X[ind+1] = PRESSCV_indVar_X # Now store all PRESSCV values into an array. Then compute MSECV # and RMSECV. self.PRESSCVarr_indVar_X = np.array(list(self.PRESSCVdict_indVar_X.values())) self.MSECVarr_indVar_X = self.PRESSCVarr_indVar_X / np.shape(self.arrX_input)[0] self.RMSECVarr_indVar_X = np.sqrt(self.MSECVarr_indVar_X) # ----------------------------------------------------------------- # ----------------------------------------------------------------- # Compute explained variance for each variable in X using the # MSEP for each variable. Also collect PRESS, MSECV, RMSECV in # their respective dictionaries for each variable. Keys represent # now variables and NOT components as above with # self.PRESSdict_indVar self.cumValExplVarXarr_indVar = np.zeros(np.shape(self.MSECVarr_indVar_X)) MSECV_0_indVar_X = self.MSECVarr_indVar_X[0,:] for ind, MSECV_indVar_X in enumerate(self.MSECVarr_indVar_X): explVar = (MSECV_0_indVar_X - MSECV_indVar_X) / MSECV_0_indVar_X * 100 self.cumValExplVarXarr_indVar[ind] = explVar self.PRESSCV_indVar_X = {} self.MSECV_indVar_X = {} self.RMSECV_indVar_X = {} self.cumValExplVarX_indVar = {} for ind in range(np.shape(self.PRESSCVarr_indVar_X)[1]): self.PRESSCV_indVar_X[ind] = self.PRESSCVarr_indVar_X[:,ind] self.MSECV_indVar_X[ind] = self.MSECVarr_indVar_X[:,ind] self.RMSECV_indVar_X[ind] = self.RMSECVarr_indVar_X[:,ind] self.cumValExplVarX_indVar[ind] = self.cumValExplVarXarr_indVar[:,ind] # ----------------------------------------------------------------- # ----------------------------------------------------------------- # Collect total PRESSCV across all variables in a dictionary. self.PRESSCV_total_dict_X = {} self.PRESSCV_total_list_X = np.sum(self.PRESSCVarr_indVar_X, axis=1) for ind, PRESSCV_X in enumerate(self.PRESSCV_total_list_X): self.PRESSCV_total_dict_X[ind] = PRESSCV_X # ----------------------------------------------------------------- # ----------------------------------------------------------------- # Collect total MSECV across all variables in a dictionary. Also, # compute total validated explained variance in X. self.MSECV_total_dict_X = {} self.MSECV_total_list_X = np.sum(self.MSECVarr_indVar_X, axis=1) / np.shape(self.arrX_input)[1] MSECV_0_X = self.MSECV_total_list_X[0] # Compute total validated explained variance in X self.XcumValExplVarList = [] if not self.Xstand: for ind, MSECV_X in enumerate(self.MSECV_total_list_X): perc = (MSECV_0_X - MSECV_X) / MSECV_0_X * 100 self.MSECV_total_dict_X[ind] = MSECV_X self.XcumValExplVarList.append(perc) else: self.XcumValExplVarArr = np.average(self.cumValExplVarXarr_indVar, axis=1) self.XcumValExplVarList = list(self.XcumValExplVarArr) # Construct list with total validated explained variance in X in # each component self.XvalExplVarList = [] for ind, item in enumerate(self.XcumValExplVarList): if ind == len(self.XcumValExplVarList)-1: break explVarComp = self.XcumValExplVarList[ind+1] - self.XcumValExplVarList[ind] self.XvalExplVarList.append(explVarComp) # ----------------------------------------------------------------- # ----------------------------------------------------------------- # Compute total RMSECV and store values in a dictionary and list. self.RMSECV_total_dict_X = {} self.RMSECV_total_list_X = np.sqrt(self.MSECV_total_list_X) for ind, RMSECV_X in enumerate(self.RMSECV_total_list_X): self.RMSECV_total_dict_X[ind] = RMSECV_X # ----------------------------------------------------------------- # ========== Computations for Y ========== # ----------------------------------------------------------------- # Compute PRESSCV (PRediction Error Sum of Squares) for cross # validation self.valYpredList = self.valYpredDict.values() # Collect all PRESS in a dictionary. Keys represent number of # component. self.PRESSdict_indVar = {} # First compute PRESSCV for zero components self.PRESSCV_0_indVar = np.sum(np.square(self.arrY_input-self.Y_train_means_list), axis=0) self.PRESSdict_indVar[0] = self.PRESSCV_0_indVar # Compute PRESSCV for each Yhat for 1, 2, 3, etc number of components # and compute explained variance for ind, Yhat in enumerate(self.valYpredList): diffY = self.arrY_input - Yhat PRESSCV_indVar = np.sum(np.square(diffY), axis=0) self.PRESSdict_indVar[ind+1] = PRESSCV_indVar # Now store all PRESSCV values into an array. Then compute MSECV and # RMSECV. self.PRESSCVarr_indVar = np.array(list(self.PRESSdict_indVar.values())) self.MSECVarr_indVar = self.PRESSCVarr_indVar / np.shape(self.arrY_input)[0] self.RMSECVarr_indVar = np.sqrt(self.MSECVarr_indVar) # ----------------------------------------------------------------- # ----------------------------------------------------------------- # Compute explained variance for each variable in Y using the # MSECV for each variable. Also collect PRESS, MSECV, RMSECV in # their respective dictionaries for each variable. Keys represent # now variables and NOT components as above with # self.PRESSdict_indVar self.cumValExplVarYarr_indVar = np.zeros(np.shape(self.MSECVarr_indVar)) MSECV_0_indVar = self.MSECVarr_indVar[0,:] for ind, MSECV_indVar in enumerate(self.MSECVarr_indVar): explVar = (MSECV_0_indVar - MSECV_indVar) / MSECV_0_indVar * 100 self.cumValExplVarYarr_indVar[ind] = explVar self.PRESSCV_indVar = {} self.MSECV_indVar = {} self.RMSECV_indVar = {} self.cumValExplVarY_indVar = {} for ind in range(np.shape(self.PRESSCVarr_indVar)[1]): self.PRESSCV_indVar[ind] = self.PRESSCVarr_indVar[:,ind] self.MSECV_indVar[ind] = self.MSECVarr_indVar[:,ind] self.RMSECV_indVar[ind] = self.RMSECVarr_indVar[:,ind] self.cumValExplVarY_indVar[ind] = self.cumValExplVarYarr_indVar[:,ind] # ----------------------------------------------------------------- # ----------------------------------------------------------------- # Collect total PRESSCV across all variables in a dictionary. self.PRESSCV_total_dict = {} self.PRESSCV_total_list = np.sum(self.PRESSCVarr_indVar, axis=1) for ind, PRESSCV in enumerate(self.PRESSCV_total_list): self.PRESSCV_total_dict[ind] = PRESSCV # ----------------------------------------------------------------- # ----------------------------------------------------------------- # Collect total MSECV across all variables in a dictionary. Also, # compute total validated explained variance in Y. self.MSECV_total_dict = {} self.MSECV_total_list = np.sum(self.MSECVarr_indVar, axis=1) / np.shape(self.arrY_input)[1] MSECV_0 = self.MSECV_total_list[0] # Compute total validated explained variance in Y self.YcumValExplVarList = [] if not self.Ystand: for ind, MSECV in enumerate(self.MSECV_total_list): perc = (MSECV_0 - MSECV) / MSECV_0 * 100 self.MSECV_total_dict[ind] = MSECV self.YcumValExplVarList.append(perc) else: self.YcumValExplVarArr = np.average(self.cumValExplVarYarr_indVar, axis=1) self.YcumValExplVarList = list(self.YcumValExplVarArr) # Construct list with total validated explained variance in Y in # each component self.YvalExplVarList = [] for ind, item in enumerate(self.YcumValExplVarList): if ind == len(self.YcumValExplVarList)-1: break explVarComp = self.YcumValExplVarList[ind+1] - self.YcumValExplVarList[ind] self.YvalExplVarList.append(explVarComp) # ----------------------------------------------------------------- # ----------------------------------------------------------------- # Compute total RMSECV and store values in a dictionary and list. self.RMSECV_total_dict = {} self.RMSECV_total_list = np.sqrt(self.MSECV_total_list) for ind, RMSECV in enumerate(self.RMSECV_total_list): self.RMSECV_total_dict[ind] = RMSECV # ----------------------------------------------------------------- def modelSettings(self): """ Returns a dictionary holding the settings under which NIPALS PCR was run. """ # Collect settings under which PCA was run. self.settings = {} self.settings['numComp'] = self.numPC self.settings['Xstand'] = self.Xstand self.settings['arrX'] = self.arrX_input self.settings['analysed arrX'] = self.arrX self.settings['arrY'] = self.arrY_input self.settings['analysed arrY'] = self.arrX return self.settings def X_means(self): """ Returns array holding column means of array X. """ return self.Xmeans.reshape(1,-1) def X_scores(self): """ Returns array holding scores of array X. First column holds scores for component 1, second column holds scores for component 2, etc. """ return self.arrT def X_loadings(self): """ Returns array holding loadings of array X. Rows represent variables and columns represent components. First column holds loadings for component 1, second column holds scores for component 2, etc. """ return self.arrP def X_corrLoadings(self): """ Returns array holding correlation loadings of array X. First column holds correlation loadings for component 1, second column holds correlation loadings for component 2, etc. """ # Creates empty matrix for correlation loadings arr_corrLoadings = np.zeros((np.shape(self.arrT)[1], np.shape(self.arrP)[0]), float) # Compute correlation loadings: # For each component in score matrix for PC in range(np.shape(self.arrT)[1]): PCscores = self.arrT[:, PC] # For each variable/attribute in original matrix (not meancentered) for var in range(np.shape(self.arrX)[1]): origVar = self.arrX[:, var] corrs = np.corrcoef(PCscores, origVar) arr_corrLoadings[PC, var] = corrs[0,1] self.arr_corrLoadings = np.transpose(arr_corrLoadings) return self.arr_corrLoadings def X_residuals(self): """ Returns a dictionary holding the residual arrays for array X after each computed component. Dictionary key represents order of component. """ return self.X_residualsDict def X_calExplVar(self): """ Returns a list holding the calibrated explained variance for each component. First number in list is for component 1, second number for component 2, etc. """ return self.XcalExplVarList def X_cumCalExplVar_indVar(self): """ Returns an array holding the cumulative calibrated explained variance for each variable in X after each component. First row represents zero components, second row represents one component, third row represents two components, etc. Columns represent variables. """ return self.cumCalExplVarXarr_indVar def X_cumCalExplVar(self): """ Returns a list holding the cumulative calibrated explained variance for array X after each component. """ return self.XcumCalExplVarList def X_predCal(self): """ Returns a dictionary holding the predicted arrays Xhat from calibration after each computed component. Dictionary key represents order of component. """ return self.calXpredDict def X_PRESSE_indVar(self): """ Returns array holding PRESSE for each individual variable in X acquired through calibration after each computed component. First row is PRESSE for zero components, second row for component 1, third row for component 2, etc. """ return self.PRESSEarr_indVar_X def X_PRESSE(self): """ Returns array holding PRESSE across all variables in X acquired through calibration after each computed component. First row is PRESSE for zero components, second row for component 1, third row for component 2, etc. """ return self.PRESSE_total_list_X def X_MSEE_indVar(self): """ Returns an array holding MSEE for each variable in array X acquired through calibration after each computed component. First row holds MSEE for zero components, second row for component 1, third row for component 2, etc. """ return self.MSEEarr_indVar_X def X_MSEE(self): """ Returns an array holding MSEE across all variables in X acquired through calibration after each computed component. First row is MSEE for zero components, second row for component 1, third row for component 2, etc. """ return self.MSEE_total_list_X def X_RMSEE_indVar(self): """ Returns an array holding RMSEE for each variable in array X acquired through calibration after each component. First row holds RMSEE for zero components, second row for component 1, third row for component 2, etc. """ return self.RMSEEarr_indVar_X def X_RMSEE(self): """ Returns an array holding RMSEE across all variables in X acquired through calibration after each computed component. First row is RMSEE for zero components, second row for component 1, third row for component 2, etc. """ return self.RMSEE_total_list_X def X_valExplVar(self): """ Returns a list holding the validated explained variance for X after each component. First number in list is for component 1, second number for component 2, third number for component 3, etc. """ return self.XvalExplVarList def X_cumValExplVar_indVar(self): """ Returns an array holding the cumulative validated explained variance for each variable in X after each component. First row represents zero components, second row represents component 1, third row for compnent 2, etc. Columns represent variables. """ return self.cumValExplVarXarr_indVar def X_cumValExplVar(self): """ Returns a list holding the cumulative validated explained variance for array X after each component. First number represents zero components, second number represents component 1, etc. """ return self.XcumValExplVarList def X_predVal(self): """ Returns dictionary holding arrays of predicted Xhat after each component from validation. Dictionary key represents order of component. """ return self.valXpredDict def X_PRESSCV_indVar(self): """ Returns array holding PRESSCV for each individual variable in X acquired through cross validation after each computed component. First row is PRESSCV for zero components, second row for component 1, third row for component 2, etc. """ return self.PRESSCVarr_indVar_X def X_PRESSCV(self): """ Returns an array holding PRESSCV across all variables in X acquired through cross validation after each computed component. First row is PRESSCV for zero components, second row for component 1, third row for component 2, etc. """ return self.PRESSCV_total_list_X def X_MSECV_indVar(self): """ Returns an arrary holding MSECV for each variable in X acquired through cross validation. First row is MSECV for zero components, second row for component 1, etc. """ return self.MSECVarr_indVar_X def X_MSECV(self): """ Returns an array holding MSECV across all variables in X acquired through cross validation after each computed component. First row is MSECV for zero components, second row for component 1, third row for component 2, etc. """ return self.MSECV_total_list_X def X_RMSECV_indVar(self): """ Returns an arrary holding RMSECV for each variable in X acquired through cross validation after each computed component. First row is RMSECV for zero components, second row for component 1, third row for component 2, etc. """ return self.RMSECVarr_indVar_X def X_RMSECV(self): """ Returns an array holding RMSECV across all variables in X acquired through cross validation after each computed component. First row is RMSECV for zero components, second row for component 1, third row for component 2, etc. """ return self.RMSECV_total_list_X def X_scores_predict(self, Xnew, numComp=None): """ Returns array of X scores from new X data using the exsisting model. Rows represent objects and columns represent components. """ if numComp == None: numComp = self.numPC assert numComp <= self.numPC, ValueError('Maximum numComp = ' + str(self.numPC)) assert numComp > -1, ValueError('numComp must be >= 0') # First pre-process new X data accordingly if self.Xstand: x_new = (Xnew - np.average(self.arrX_input, axis=0)) / np.std(self.arrX_input, ddof=1) else: x_new = (Xnew - np.average(self.arrX_input, axis=0)) # Compute the scores for new object projT = np.dot(x_new, self.arrP[:, 0:numComp]) return projT def Y_means(self): """ Returns array holding means of columns in array Y. """ return self.Ymeans.reshape(1,-1) def Y_loadings(self): """ Returns an array holding loadings C of array Y. Rows represent variables and columns represent components. First column for component 1, second columns for component 2, etc. """ return self.arrQ def Y_corrLoadings(self): """ Returns array holding correlation loadings of array X. First column holds correlation loadings for component 1, second column holds correlation loadings for component 2, etc. """ # Creates empty matrix for correlation loadings arr_YcorrLoadings = np.zeros((np.shape(self.arrT)[1], np.shape(self.arrQ)[0]), float) # Compute correlation loadings: # For each component in score matrix for PC in range(np.shape(self.arrT)[1]): PCscores = self.arrT[:, PC] # For each variable/attribute in original matrix (not meancentered) for var in range(np.shape(self.arrY)[1]): origVar = self.arrY[:, var] corrs = np.corrcoef(PCscores, origVar) arr_YcorrLoadings[PC, var] = corrs[0,1] self.arr_YcorrLoadings = np.transpose(arr_YcorrLoadings) return self.arr_YcorrLoadings def Y_residuals(self): """ Returns a dictionary holding residuals F of array Y after each component. Dictionary key represents order of component. """ # Create empty dictionary that will hold residuals Y_residualsDict = {} # Fill dictionary with residuals arrays from residuals list for ind, item in enumerate(self.Y_residualsList): Y_residualsDict[ind] = item return Y_residualsDict def Y_calExplVar(self): """ Returns a list holding the calibrated explained variance for each component. First number in list is for component 1, second number for component 2, etc. """ return self.YcalExplVarList def Y_cumCalExplVar_indVar(self): """ Returns an array holding the cumulative calibrated explained variance for each variable in Y after each component. First row represents zero components, second row represents one component, third row represents two components, etc. Columns represent variables. """ return self.cumCalExplVarYarr_indVar def Y_cumCalExplVar(self): """ Returns a list holding the cumulative calibrated explained variance for array X after each component. First number represents zero components, second number represents component 1, etc. """ return self.YcumCalExplVarList def Y_predCal(self): """ Returns dictionary holding arrays of predicted Yhat after each component from calibration. Dictionary key represents order of components. """ return self.calYpredDict def Y_PRESSE_indVar(self): """ Returns array holding PRESSE for each individual variable in Y acquired through calibration after each component. First row is PRESSE for zero components, second row for component 1, third row for component 2, etc. """ return self.PRESSEarr_indVar def Y_PRESSE(self): """ Returns array holding PRESSE across all variables in Y acquired through calibration after each computed component. First row is PRESSE for zero components, second row for component 1, third row for component 2, etc. """ return self.PRESSE_total_list def Y_MSEE_indVar(self): """ Returns an array holding MSEE for each variable in array Y acquired through calibration after each computed component. First row holds MSEE for zero components, second row for component 1, third row for component 2, etc. """ return self.MSEEarr_indVar def Y_MSEE(self): """ Returns an array holding MSEE across all variables in Y acquired through calibration after each computed component. First row is MSEE for zero components, second row for component 1, third row for component 2, etc. """ return self.MSEE_total_list def Y_RMSEE_indVar(self): """ Returns an array holding RMSEE for each variable in array Y acquired through calibration after each component. First row holds RMSEE for zero components, second row for component 1, third row for component 2, etc. """ return self.RMSEEarr_indVar def Y_RMSEE(self): """ Returns an array holding RMSEE across all variables in Y acquired through calibration after each computed component. First row is RMSEE for zero components, second row for component 1, third row for component 2, etc. """ return self.RMSEE_total_list def Y_valExplVar(self): """ Returns a list holding the validated explained variance for Y after each component. First number in list is for component 1, second number for component 2, third number for component 3, etc. """ return self.YvalExplVarList def Y_cumValExplVar_indVar(self): """ Returns an array holding the cumulative validated explained variance for each variable in Y after each component. First row represents zero components, second row represents component 1, third row for compnent 2, etc. Columns represent variables. """ return self.cumValExplVarYarr_indVar def Y_cumValExplVar(self): """ Returns a list holding the cumulative validated explained variance for array X after each component. First number represents zero components, second number represents component 1, etc. """ return self.YcumValExplVarList def Y_predVal(self): """ Returns dictionary holding arrays of predicted Yhat after each component from validation. Dictionary key represents order of component. """ return self.valYpredDict def Y_PRESSCV_indVar(self): """ Returns an array holding PRESSCV of each variable in array Y acquired through cross validation after each computed component. First row is PRESSCV for zero components, second row component 1, third row for component 2, etc. """ return self.PRESSCVarr_indVar def Y_PRESSCV(self): """ Returns an array holding PRESSCV across all variables in Y acquired through cross validation after each computed component. First row is PRESSCV for zero components, second row component 1, third row for component 2, etc. """ return self.PRESSCV_total_list def Y_MSECV_indVar(self): """ Returns an array holding MSECV of each variable in array Y acquired through cross validation after each computed component. First row is MSECV for zero components, second row component 1, third row for component 2, etc. """ return self.MSECVarr_indVar def Y_MSECV(self): """ Returns an array holding MSECV across all variables in Y acquired through cross validation after each computed component. First row is MSECV for zero components, second row component 1, third row for component 2, etc. """ return self.MSECV_total_list def Y_RMSECV_indVar(self): """ Returns an array holding RMSECV for each variable in array Y acquired through cross validation after each computed component. First row is RMSECV for zero components, second row component 1, third row for component 2, etc. """ return self.RMSECVarr_indVar def Y_RMSECV(self): """ Returns an array holding RMSECV across all variables in Y acquired through cross validation after each computed component. First row is RMSECV for zero components, second row component 1, third row for component 2, etc. """ return self.RMSECV_total_list def regressionCoefficients(self, numComp=1): """ Returns regression coefficients from the fitted model using all available samples and a chosen number of components. """ assert numComp <= self.numPC, ValueError('Maximum numComp = ' + str(self.numPC)) assert numComp > -1, ValueError('numComp must be >= 0') # B = P*Q' if self.Ystand: return np.dot(self.arrP[:,0:numComp], np.transpose(self.arrQ[:,0:numComp])) \ * np.std(self.arrY_input, ddof=1, axis=0).reshape(1,-1) else: return np.dot(self.arrP[:,0:numComp], np.transpose(self.arrQ[:,0:numComp])) def Y_predict(self, Xnew, numComp=1): """ Return predicted Yhat from new measurements X. """ assert numComp <= self.numPC, ValueError('Maximum numComp = ' + str(self.numPC)) assert numComp > -1, ValueError('numComp must be >= 0') # Return average if numComp == 0 if numComp == 0: Yhat = np.zeros(np.shape(self.arrY_input)) + np.average(self.arrY_input, axis=0) else: # First pre-process new X data accordingly if self.Xstand: x_new = (Xnew - np.average(self.arrX_input, axis=0)) / np.std(self.arrX_input, ddof=1, axis=0) else: x_new = (Xnew - np.average(self.arrX_input, axis=0)) # Compute the scores for new object projT = np.dot(x_new, self.arrP[:, 0:numComp]) # Compute processed responses y_pred_proc = np.dot(projT, np.transpose(self.arrQ[:, 0:numComp])) # Compute predicted values back to original scale if self.Ystand: Yhat = (y_pred_proc * np.std(self.arrY_input, ddof=1, axis=0).reshape(1,-1)) + np.average(self.arrY_input, axis=0) else: Yhat = y_pred_proc + np.average(self.arrY_input, axis=0) return Yhat def cvTrainAndTestData(self): """ Returns a list consisting of dictionaries holding training and test sets. """ return self.cvTrainAndTestDataList def corrLoadingsEllipses(self): """ Returns coordinates for the ellipses that represent 50% and 100% expl. variance in correlation loadings plot. """ # Create range for ellipses t = np.arange(0.0, 2*np.pi, 0.01) # Compuing the outer circle (100 % expl. variance) xcords100perc = np.cos(t) ycords100perc = np.sin(t) # Computing inner circle xcords50perc = 0.707 * np.cos(t) ycords50perc = 0.707 * np.sin(t) # Collect ellipse coordinates in dictionary ellipses = {} ellipses['x50perc'] = xcords50perc ellipses['y50perc'] = ycords50perc ellipses['x100perc'] = xcords100perc ellipses['y100perc'] = ycords100perc return ellipses