""" Tests for the choice_calcs.py file. """ import unittest import warnings from collections import OrderedDict import numpy as np import numpy.testing as npt import pandas as pd from scipy.sparse import csr_matrix from scipy.sparse import diags from scipy.sparse import block_diag import pylogit.asym_logit as asym import pylogit.conditional_logit as mnl import pylogit.choice_calcs as cc # Use the following to always show the warnings np.seterr(all='warn') warnings.simplefilter("always") class GenericTestCase(unittest.TestCase): """ Defines the common setUp method used for the different type of tests. """ def setUp(self): # The set up being used is one where there are two choice situations, # The first having three alternatives, and the second having only two # alternatives. There is one generic variable. Two alternative # specific constants and all three shape parameters are used. # Create the betas to be used during the tests self.fake_betas = np.array([-0.6]) # Create the fake outside intercepts to be used during the tests self.fake_intercepts = np.array([1, 0.5]) # Create names for the intercept parameters self.fake_intercept_names = ["ASC 1", "ASC 2"] # Record the position of the intercept that is not being estimated self.fake_intercept_ref_pos = 2 # Create the shape parameters to be used during the tests. Note that # these are the reparameterized shape parameters, thus they will be # exponentiated in the fit_mle process and various calculations. self.fake_shapes = np.array([-1, 1]) # Create names for the intercept parameters self.fake_shape_names = ["Shape 1", "Shape 2"] # Record the position of the shape parameter that is being constrained self.fake_shape_ref_pos = 2 # Calculate the 'natural' shape parameters self.natural_shapes = asym._convert_eta_to_c(self.fake_shapes, self.fake_shape_ref_pos) # Create an array of all model parameters self.fake_all_params = np.concatenate((self.fake_shapes, self.fake_intercepts, self.fake_betas)) # The mapping between rows and alternatives is given below. self.fake_rows_to_alts = csr_matrix(np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1], [1, 0, 0], [0, 0, 1]])) # Create the mapping between rows and individuals self.fake_rows_to_obs = csr_matrix(np.array([[1, 0], [1, 0], [1, 0], [0, 1], [0, 1]])) # Create the fake design matrix with columns denoting X # The intercepts are not included because they are kept outside the # index in the scobit model. self.fake_design = np.array([[1], [2], [3], [1.5], [3.5]]) # Create the index array for this set of choice situations self.fake_index = self.fake_design.dot(self.fake_betas) # Create the needed dataframe for the Asymmetric Logit constructor self.fake_df = pd.DataFrame({"obs_id": [1, 1, 1, 2, 2], "alt_id": [1, 2, 3, 1, 3], "choice": [0, 1, 0, 0, 1], "x": self.fake_design[:, 0], "intercept": [1 for i in range(5)]}) # Record the various column names self.alt_id_col = "alt_id" self.obs_id_col = "obs_id" self.choice_col = "choice" # Store the choices as their own array self.choice_array = self.fake_df[self.choice_col].values # Create the index specification and name dictionaryfor the model self.fake_specification = OrderedDict() self.fake_names = OrderedDict() self.fake_specification["x"] = [[1, 2, 3]] self.fake_names["x"] = ["x (generic coefficient)"] # Bundle args and kwargs used to construct the Asymmetric Logit model. self.constructor_args = [self.fake_df, self.alt_id_col, self.obs_id_col, self.choice_col, self.fake_specification] # Create a variable for the kwargs being passed to the constructor self.constructor_kwargs = {"intercept_ref_pos": self.fake_intercept_ref_pos, "shape_ref_pos": self.fake_shape_ref_pos, "names": self.fake_names, "intercept_names": self.fake_intercept_names, "shape_names": self.fake_shape_names} # Initialize a basic Asymmetric Logit model whose coefficients will be # estimated. self.model_obj = asym.MNAL(*self.constructor_args, **self.constructor_kwargs) # Store a ridge penalty for use in calculations. self.ridge = 0.5 return None class ComputationalTests(GenericTestCase): """ Tests the computational functions to make sure that they return the expected results. """ # Store a utility transformation function for the tests def utility_transform(self, sys_utilities, alt_IDs, rows_to_alts, shape_params, intercept_params): return sys_utilities[:, None] def test_calc_asymptotic_covariance(self): """ Ensure that the correct Huber-White covariance matrix is calculated. """ ones_array = np.ones(5) # Create the hessian matrix for testing. It will be a 5 by 5 matrix. test_hessian = np.diag(2 * ones_array) # Create the approximation of the Fisher Information Matrix test_fisher_matrix = np.diag(ones_array) # Create the inverse of the hessian matrix. test_hess_inverse = np.diag(0.5 * ones_array) # Calculated the expected result expected_result = np.dot(test_hess_inverse, np.dot(test_fisher_matrix, test_hess_inverse)) # Alias the function being tested func = cc.calc_asymptotic_covariance # Perform the test. function_results = func(test_hessian, test_fisher_matrix) self.assertIsInstance(function_results, np.ndarray) self.assertEqual(function_results.shape, test_hessian.shape) npt.assert_allclose(expected_result, function_results) return None def test_log_likelihood(self): """ Ensure that we correctly calculate the log-likelihood, both with and without ridge penalties, and both with and without shape and intercept parameters. """ # Create a utility transformation function for testing def test_utility_transform(x, *args): return x # Calculate the index for each alternative for each individual test_index = self.fake_design.dot(self.fake_betas) # Exponentiate each index value exp_test_index = np.exp(test_index) # Calculate the denominator for each probability interim_dot_product = self.fake_rows_to_obs.T.dot(exp_test_index) test_denoms = self.fake_rows_to_obs.dot(interim_dot_product) # Calculate the probabilities for each individual prob_array = exp_test_index / test_denoms # Calculate what the log-likelihood should be choices = self.fake_df[self.choice_col].values expected_log_likelihood = np.dot(choices, np.log(prob_array)) # Create a set of intercepts, that are all zeros intercepts = np.zeros(2) # Combine all the 'parameters' test_all_params = np.concatenate([intercepts, self.fake_betas], axis=0) # Calculate what the log-likelihood should be with a ridge penalty penalty = self.ridge * (test_all_params**2).sum() expected_log_likelihood_penalized = expected_log_likelihood - penalty # Alias the function being tested func = cc.calc_log_likelihood # Create the arguments for the function being tested args = [self.fake_betas, self.fake_design, self.fake_df[self.alt_id_col].values, self.fake_rows_to_obs, self.fake_rows_to_alts, choices, test_utility_transform] kwargs = {"intercept_params": intercepts, "shape_params": None} # Perform the tests function_results = func(*args, **kwargs) self.assertAlmostEqual(expected_log_likelihood, function_results) # Test the weighted log-likelihood capability weights = 2 * np.ones(self.fake_design.shape[0]) kwargs["weights"] = weights function_results_2 = func(*args, **kwargs) self.assertAlmostEqual(2 * expected_log_likelihood, function_results_2) kwargs["weights"] = None # Test the ridge regression calculations kwargs["ridge"] = self.ridge function_results_3 = func(*args, **kwargs) self.assertAlmostEqual(expected_log_likelihood_penalized, function_results_3) # Test the function again, this time without intercepts kwargs["intercept_params"] = None function_results_4 = func(*args, **kwargs) self.assertAlmostEqual(expected_log_likelihood_penalized, function_results_4) return None def test_array_size_error_in_calc_probabilities(self): """ Ensure that a helpful ValueError is raised when a person tries to calculate probabilities using BOTH a 2D coefficient array and a 3D design matrix. """ # Alias the function being tested func = cc.calc_probabilities # Create fake arguments for the function being tested. # Note these arguments are not valid in general, but suffice for # testing the functionality we care about in this function. args = [np.arange(9).reshape((3, 3)), np.arange(27).reshape((3, 3, 3)), None, None, None, None] # Note the error message that should be shown. msg_1 = "Cannot calculate probabilities with both 3D design matrix AND" msg_2 = " 2D coefficient array." msg = msg_1 + msg_2 self.assertRaisesRegexp(ValueError, msg, func, *args) return None def test_return_argument_error_in_calc_probabilities(self): """ Ensure that a helpful ValueError is raised when a person tries to calculate probabilities using BOTH a return_long_probs == False and chosen_row_to_obs being None. """ # Alias the function being tested func = cc.calc_probabilities # Create fake arguments for the function being tested. # Note these arguments are not valid in general, but suffice for # testing the functionality we care about in this function. args = [np.arange(9).reshape((3, 3)), np.arange(9).reshape((3, 3)), None, None, None, None] # Note the error message that should be shown. msg = "chosen_row_to_obs is None AND return_long_probs is False" self.assertRaisesRegexp(ValueError, msg, func, *args) return None def test_1D_calc_probabilities(self): """ Ensure that when using a 2D design matrix and 1D vector of parameters, that the calc_probabilities function returns the correct values. Note that this test will only verify the functionality under 'normal' conditions, where the values of the exponentiated indices do not go to zero nor to infinity. """ # Calculate the index vector expected_index = self.fake_design.dot(self.fake_betas) # Calculate exp(index) expected_exp_index = np.exp(expected_index) # Calculate the sum of exp(index) for each individual denoms = self.fake_rows_to_obs.T.dot(expected_exp_index) # Calculate the expected probabilities expected_probs = expected_exp_index / self.fake_rows_to_obs.dot(denoms) # Alias the function to be tested func = cc.calc_probabilities # Collect the arguments needed for this function args = [self.fake_betas, self.fake_design, self.fake_df[self.alt_id_col].values, self.fake_rows_to_obs, self.fake_rows_to_alts, self.utility_transform] kwargs = {"intercept_params": self.fake_intercepts, "shape_params": self.fake_shapes, "return_long_probs": True} function_results = func(*args, **kwargs) # Perform the tests self.assertIsInstance(function_results, np.ndarray) self.assertEqual(len(function_results.shape), 1) self.assertEqual(function_results.shape, (self.fake_design.shape[0],)) npt.assert_allclose(function_results, expected_probs) return None def test_return_values_of_calc_probabilities(self): """ Ensure that the various configuration of return values can all be returned. """ # Calculate the index vector expected_index = self.fake_design.dot(self.fake_betas) # Calculate exp(index) expected_exp_index = np.exp(expected_index) # Calculate the sum of exp(index) for each individual denoms = self.fake_rows_to_obs.T.dot(expected_exp_index) # Calculate the expected probabilities expected_probs = expected_exp_index / self.fake_rows_to_obs.dot(denoms) # Extract the probabilities of the chosen alternatives for each # observaation chosen_indices = np.where(self.choice_array == 1) expected_chosen_probs = expected_probs[chosen_indices] # Alias the function to be tested func = cc.calc_probabilities # Create the chosen_row_to_obs mapping matrix choices_2d = self.choice_array[:, None] chosen_row_to_obs = self.fake_rows_to_obs.multiply(choices_2d) # Collect the arguments needed for this function args = [self.fake_betas, self.fake_design, self.fake_df[self.alt_id_col].values, self.fake_rows_to_obs, self.fake_rows_to_alts, self.utility_transform] # kwargs_1 should result in long_probs being returned. kwargs_1 = {"intercept_params": self.fake_intercepts, "shape_params": self.fake_shapes, "return_long_probs": True} # kwargs_2 should result in (chosen_probs, long_probs being returned) kwargs_2 = {"intercept_params": self.fake_intercepts, "shape_params": self.fake_shapes, "chosen_row_to_obs": chosen_row_to_obs, "return_long_probs": True} # kwargs_3 should result in chosen_probs being returned. kwargs_3 = {"intercept_params": self.fake_intercepts, "shape_params": self.fake_shapes, "chosen_row_to_obs": chosen_row_to_obs, "return_long_probs": False} # Collect the expected results expected_results = [expected_probs, (expected_chosen_probs, expected_probs), expected_chosen_probs] # Perform the tests for pos, kwargs in enumerate([kwargs_1, kwargs_2, kwargs_3]): function_results = func(*args, **kwargs) if isinstance(function_results, tuple): expected_arrays = expected_results[pos] for array_pos, array in enumerate(function_results): current_expected_array = expected_arrays[array_pos] self.assertIsInstance(array, np.ndarray) self.assertEqual(array.shape, current_expected_array.shape) npt.assert_allclose(array, current_expected_array) else: expected_array = expected_results[pos] self.assertIsInstance(function_results, np.ndarray) self.assertEqual(function_results.shape, expected_array.shape) npt.assert_allclose(function_results, expected_array) return None def test_2D_calc_probabilities(self): """ Ensure that when using either a 2D design matrix and 2D vector of parameters, or a 3D design matrix and 1D vector of parameters, that the calc_probabilities function returns the correct values. Note that this test will only verify the functionality under 'normal' conditions, where the values of the exponentiated indices do not go to zero nor to infinity. """ # Designate a utility transform for this test utility_transform = mnl._mnl_utility_transform # Calculate the index vector expected_index = self.fake_design.dot(self.fake_betas) # Calculate exp(index) expected_exp_index = np.exp(expected_index) # Calculate the sum of exp(index) for each individual denoms = self.fake_rows_to_obs.T.dot(expected_exp_index) # Calculate the expected probabilities expected_probs = expected_exp_index / self.fake_rows_to_obs.dot(denoms) # Create the 2D vector of expected probs expected_probs_2d = np.concatenate([expected_probs[:, None], expected_probs[:, None]], axis=1) # Create the 2D coefficient vector betas_2d = np.concatenate([self.fake_betas[:, None], self.fake_betas[:, None]], axis=1) assert self.fake_design.dot(betas_2d).shape[1] > 1 # Create the 3D design matrix design_3d = np.concatenate([self.fake_design[:, None, :], self.fake_design[:, None, :]], axis=1) # Alias the function to be tested func = cc.calc_probabilities # Collect the arguments needed for this function args = [betas_2d, self.fake_design, self.fake_df[self.alt_id_col].values, self.fake_rows_to_obs, self.fake_rows_to_alts, utility_transform] # The kwargs below mean that only the long format probabilities will # be returned. kwargs = {"intercept_params": self.fake_intercepts, "shape_params": self.fake_shapes, "chosen_row_to_obs": None, "return_long_probs": True} function_results_1 = func(*args, **kwargs) # Now test the functions with the various multidimensional argumnts args[0] = self.fake_betas args[1] = design_3d function_results_2 = func(*args, **kwargs) # Now try the results when calling for chosen_probs as well chosen_row_to_obs = self.fake_rows_to_obs.multiply( self.choice_array[:, None]) kwargs["chosen_row_to_obs"] = chosen_row_to_obs chosen_probs, function_results_3 = func(*args, **kwargs) # Perform the tests using a 2d coefficient array for function_results in [function_results_1, function_results_2, function_results_3]: self.assertIsInstance(function_results, np.ndarray) self.assertEqual(len(function_results.shape), 2) self.assertEqual(function_results.shape, (self.fake_design.shape[0], 2)) npt.assert_allclose(function_results, expected_probs_2d) chosen_idx = np.where(self.choice_array == 1)[0] self.assertIsInstance(chosen_probs, np.ndarray) self.assertEqual(len(chosen_probs.shape), 2) npt.assert_allclose(chosen_probs, expected_probs_2d[chosen_idx, :]) return None def test_calc_probabilities_robustness_to_under_overflow(self): """ Ensure that the calc_probabilities function correctly handles under- and overflow in the exponential of the systematic utilities. """ # Create a design array that will test the under- and over-flow # capabilities of the calc_probabilities function. extreme_design = np.array([[1], [800 / self.fake_betas[0]], [-800 / self.fake_betas[0]], [-800 / self.fake_betas[0]], [3]]) # Calculate the index vector expected_index = extreme_design.dot(self.fake_betas) # Calculate exp(index) expected_exp_index = np.exp(expected_index) # Guard against over and underflow expected_exp_index[1] = np.exp(cc.max_exponent_val) expected_exp_index[[2, 3]] = np.exp(cc.min_exponent_val) # Calculate the sum of exp(index) for each individual denoms = self.fake_rows_to_obs.T.dot(expected_exp_index) # Calculate the expected probabilities expected_probs = expected_exp_index / self.fake_rows_to_obs.dot(denoms) # Guard against underflow expected_probs[expected_probs == 0.0] = cc.min_comp_value # Alias the function to be tested func = cc.calc_probabilities # Collect the arguments needed for this function args = [self.fake_betas, extreme_design, self.fake_df[self.alt_id_col].values, self.fake_rows_to_obs, self.fake_rows_to_alts, self.utility_transform] kwargs = {"intercept_params": self.fake_intercepts, "shape_params": self.fake_shapes, "return_long_probs": True} function_results = func(*args, **kwargs) # Perform the tests self.assertIsInstance(function_results, np.ndarray) self.assertEqual(len(function_results.shape), 1) self.assertEqual(function_results.shape, (self.fake_design.shape[0],)) npt.assert_allclose(function_results, expected_probs) return None def test_calc_gradient_no_shapes_no_intercepts(self): """ Ensure that calc_gradient returns the correct values when there are no shape parameters and no intercept parameters. """ # Designate a function that calculates the parital derivative of the # transformed index values, with respect to the index. dh_dv = diags(np.ones(self.fake_design.shape[0]), 0, format='csr') def transform_deriv_v(*args): return dh_dv # Designate functions that calculate the partial derivative of the # transformed index values, with respect to shape and index parameters def transform_deriv_intercepts(*args): return None def transform_deriv_shapes(*args): return None # Collect the arguments needed to calculate the probabilities args = [self.fake_betas, self.fake_design, self.fake_df[self.alt_id_col].values, self.fake_rows_to_obs, self.fake_rows_to_alts, self.utility_transform] kwargs = {"intercept_params": self.fake_intercepts, "shape_params": self.fake_shapes, "return_long_probs": True} # Calculate the required probabilities probs = cc.calc_probabilities(*args, **kwargs) # In this scenario, the gradient should be (Y- P)'(dh_dv * dv_db) # which simplifies to (Y- P)'(dh_dv * X) error_vec = (self.choice_array - probs)[None, :] expected_gradient = error_vec.dot(dh_dv.dot(self.fake_design)).ravel() # Alias the function being tested func = cc.calc_gradient # Collect the arguments for the function being tested gradient_args = [self.fake_betas, self.fake_design, self.fake_df[self.alt_id_col].values, self.fake_rows_to_obs, self.fake_rows_to_alts, self.choice_array, self.utility_transform, transform_deriv_shapes, transform_deriv_v, transform_deriv_intercepts, None, None, None, None] function_gradient = func(*gradient_args) # Perform the required tests self.assertIsInstance(function_gradient, np.ndarray) self.assertEqual(function_gradient.shape, (self.fake_betas.shape[0],)) npt.assert_allclose(function_gradient, expected_gradient) # Test the gradient function with the ridge argument gradient_args[-2] = self.ridge new_expected_gradient = (expected_gradient - 2 * self.ridge * self.fake_betas[0]) function_gradient_penalized = func(*gradient_args) self.assertIsInstance(function_gradient_penalized, np.ndarray) self.assertEqual(function_gradient_penalized.shape, (self.fake_betas.shape[0],)) npt.assert_allclose(function_gradient_penalized, new_expected_gradient) # Test the gradient function with weights new_weights = 2 * np.ones(self.fake_design.shape[0]) gradient_args[-1] = new_weights new_expected_gradient_weighted =\ 2 * expected_gradient - 2 * self.ridge * self.fake_betas[0] func_gradient_penalized_weighted = func(*gradient_args) self.assertIsInstance(func_gradient_penalized_weighted, np.ndarray) self.assertEqual(func_gradient_penalized_weighted.shape, (self.fake_betas.shape[0],)) npt.assert_allclose(func_gradient_penalized_weighted, new_expected_gradient_weighted) return None def test_calc_gradient_no_shapes(self): """ Ensure that calc_gradient returns the correct values when there are no shape parameters but there are intercept parameters. """ # Designate a function that calculates the parital derivative of the # transformed index values, with respect to the index. dh_dv = diags(np.ones(self.fake_design.shape[0]), 0, format='csr') def transform_deriv_v(*args): return dh_dv # Designate functions that calculate the partial derivative of the # transformed index values, with respect to shape and index parameters dh_d_intercept = self.fake_rows_to_alts[:, 1:] def transform_deriv_intercepts(*args): return dh_d_intercept def transform_deriv_shapes(*args): return None # Collect the arguments needed to calculate the probabilities args = [self.fake_betas, self.fake_design, self.fake_df[self.alt_id_col].values, self.fake_rows_to_obs, self.fake_rows_to_alts, self.utility_transform] kwargs = {"intercept_params": self.fake_intercepts, "shape_params": self.fake_shapes, "return_long_probs": True} # Calculate the required probabilities probs = cc.calc_probabilities(*args, **kwargs) # In this scenario, the gradient should be (Y- P)'(dh_d_theta) # which simplifies to (Y- P)'[dh_d_intercept | dh_d_beta] # and finally to (Y- P)'[dh_d_intercept | dh_dv * X] error_vec = (self.choice_array - probs)[None, :] dh_d_beta = dh_dv.dot(self.fake_design) dh_d_theta = np.concatenate((dh_d_intercept.A, dh_d_beta), axis=1) expected_gradient = error_vec.dot(dh_d_theta).ravel() # Alias the function being tested func = cc.calc_gradient # Collect the arguments for the function being tested gradient_args = [self.fake_betas, self.fake_design, self.fake_df[self.alt_id_col].values, self.fake_rows_to_obs, self.fake_rows_to_alts, self.choice_array, self.utility_transform, transform_deriv_shapes, transform_deriv_v, transform_deriv_intercepts, self.fake_intercepts, None, None, None] function_gradient = func(*gradient_args) # Perform the required tests self.assertIsInstance(function_gradient, np.ndarray) self.assertEqual(function_gradient.shape, (self.fake_betas.shape[0] + self.fake_intercepts.shape[0],)) npt.assert_allclose(function_gradient, expected_gradient) # Test the gradient function with weights new_weights = 2 * np.ones(self.fake_design.shape[0]) gradient_args[-1] = new_weights expected_gradient_weighted = 2 * expected_gradient func_gradient_weighted = func(*gradient_args) self.assertIsInstance(func_gradient_weighted, np.ndarray) self.assertEqual(func_gradient_weighted.shape, (self.fake_betas.shape[0] + self.fake_intercepts.shape[0],)) npt.assert_allclose(func_gradient_weighted, expected_gradient_weighted) return None def test_calc_gradient_no_intercepts(self): """ Ensure that calc_gradient returns the correct values when there are no intercept parameters but there are shape parameters. """ # Designate a function that calculates the parital derivative of the # transformed index values, with respect to the index. dh_dv = diags(np.ones(self.fake_design.shape[0]), 0, format='csr') def transform_deriv_v(*args): return dh_dv # Designate functions that calculate the partial derivative of the # transformed index values, with respect to shape and index parameters def transform_deriv_intercepts(*args): return None fake_deriv = np.exp(self.fake_shapes)[None, :] dh_d_shape = self.fake_rows_to_alts[:, 1:].multiply(fake_deriv) def transform_deriv_shapes(*args): return dh_d_shape # Collect the arguments needed to calculate the probabilities args = [self.fake_betas, self.fake_design, self.fake_df[self.alt_id_col].values, self.fake_rows_to_obs, self.fake_rows_to_alts, self.utility_transform] kwargs = {"intercept_params": self.fake_intercepts, "shape_params": self.fake_shapes, "return_long_probs": True} # Calculate the required probabilities probs = cc.calc_probabilities(*args, **kwargs) # In this scenario, the gradient should be (Y- P)'(dh_d_theta) # which simplifies to (Y- P)'[dh_d_shape | dh_d_beta] # and finally to (Y- P)'[dh_d_shape | dh_dv * X] error_vec = (self.choice_array - probs)[None, :] dh_d_beta = dh_dv.dot(self.fake_design) dh_d_theta = np.concatenate((dh_d_shape.A, dh_d_beta), axis=1) expected_gradient = error_vec.dot(dh_d_theta).ravel() # Alias the function being tested func = cc.calc_gradient # Collect the arguments for the function being tested gradient_args = [self.fake_betas, self.fake_design, self.fake_df[self.alt_id_col].values, self.fake_rows_to_obs, self.fake_rows_to_alts, self.choice_array, self.utility_transform, transform_deriv_shapes, transform_deriv_v, transform_deriv_intercepts, None, self.fake_shapes, None, None] function_gradient = func(*gradient_args) # Perform the required tests self.assertIsInstance(function_gradient, np.ndarray) self.assertEqual(function_gradient.shape, (self.fake_betas.shape[0] + self.fake_shapes.shape[0],)) npt.assert_allclose(function_gradient, expected_gradient) # Perform the tests with a weighted gradient new_weights = 2 * np.ones(self.fake_design.shape[0]) gradient_args[-1] = new_weights expected_gradient_weighted = 2 * expected_gradient func_gradient_weighted = func(*gradient_args) self.assertIsInstance(func_gradient_weighted, np.ndarray) self.assertEqual(func_gradient_weighted.shape, (self.fake_betas.shape[0] + self.fake_shapes.shape[0],)) npt.assert_allclose(func_gradient_weighted, expected_gradient_weighted) # Test the gradient function with weights new_weights = 2 * np.ones(self.fake_design.shape[0]) gradient_args[-1] = new_weights expected_gradient_weighted = 2 * expected_gradient func_gradient_weighted = func(*gradient_args) self.assertIsInstance(func_gradient_weighted, np.ndarray) self.assertEqual(func_gradient_weighted.shape, (self.fake_betas.shape[0] + self.fake_shapes.shape[0],)) npt.assert_allclose(func_gradient_weighted, expected_gradient_weighted) return None def test_calc_gradient_shapes_and_intercepts(self): """ Ensure that calc_gradient returns the correct values when there are shape and intercept parameters. """ # Designate a function that calculates the parital derivative of the # transformed index values, with respect to the index. dh_dv = diags(np.ones(self.fake_design.shape[0]), 0, format='csr') def transform_deriv_v(*args): return dh_dv # Designate functions that calculate the partial derivative of the # transformed index values, with respect to shape and index parameters dh_d_intercept = self.fake_rows_to_alts[:, 1:] def transform_deriv_intercepts(*args): return dh_d_intercept fake_deriv = np.exp(self.fake_shapes)[None, :] dh_d_shape = self.fake_rows_to_alts[:, 1:].multiply(fake_deriv) def transform_deriv_shapes(*args): return dh_d_shape # Collect the arguments needed to calculate the probabilities args = [self.fake_betas, self.fake_design, self.fake_df[self.alt_id_col].values, self.fake_rows_to_obs, self.fake_rows_to_alts, self.utility_transform] kwargs = {"intercept_params": self.fake_intercepts, "shape_params": self.fake_shapes, "return_long_probs": True} # Calculate the required probabilities probs = cc.calc_probabilities(*args, **kwargs) # In this scenario, the gradient should be (Y- P)'(dh_d_theta) # which simplifies to (Y- P)'[dh_d_shape | dh_d_intercept | dh_d_beta] # and finally to (Y- P)'[dh_d_shape | dh_d_intercept | dh_dv * X] error_vec = (self.choice_array - probs)[None, :] dh_d_beta = dh_dv.dot(self.fake_design) dh_d_theta = np.concatenate((dh_d_shape.A, dh_d_intercept.A, dh_d_beta), axis=1) expected_gradient = error_vec.dot(dh_d_theta).ravel() # Alias the function being tested func = cc.calc_gradient # Collect the arguments for the function being tested gradient_args = [self.fake_betas, self.fake_design, self.fake_df[self.alt_id_col].values, self.fake_rows_to_obs, self.fake_rows_to_alts, self.choice_array, self.utility_transform, transform_deriv_shapes, transform_deriv_v, transform_deriv_intercepts, self.fake_intercepts, self.fake_shapes, None, None] function_gradient = func(*gradient_args) # Perform the required tests self.assertIsInstance(function_gradient, np.ndarray) self.assertEqual(function_gradient.shape, (self.fake_betas.shape[0] + self.fake_intercepts.shape[0] + self.fake_shapes.shape[0],)) npt.assert_allclose(function_gradient, expected_gradient) # Test the gradient function with weights new_weights = 2 * np.ones(self.fake_design.shape[0]) gradient_args[-1] = new_weights expected_gradient_weighted = 2 * expected_gradient func_gradient_weighted = func(*gradient_args) self.assertIsInstance(func_gradient_weighted, np.ndarray) self.assertEqual(func_gradient_weighted.shape, (self.fake_betas.shape[0] + self.fake_intercepts.shape[0] + self.fake_shapes.shape[0],)) npt.assert_allclose(func_gradient_weighted, expected_gradient_weighted) return None def test_create_matrix_block_indices(self): """ Ensure that create_matrix_block_indices returns the expected results. """ # Note that we have two observations, the first with three alternatives # and the second with two alternatives. expected_results = [np.array([0, 1, 2]), np.array([3, 4])] # Get the results of the function being tested results = cc.create_matrix_block_indices(self.fake_rows_to_obs) # Test that the two sets of results are equal self.assertIsInstance(results, list) self.assertTrue(all([isinstance(x, np.ndarray) for x in results])) npt.assert_allclose(expected_results[0], results[0]) npt.assert_allclose(expected_results[1], results[1]) return None def test_robust_outer_product(self): """ Ensure that robust_outer_product returns the expected results. Unfortunately, I cannot find a good case now where using the regular outer product gives incorrect results. However without a compelling reason to remove the function, I'll trust my earlier judgement in creating it in the first place. """ # Define a vector whose outer product we want to take x = np.array([1e-100, 0.01]) outer_product = np.outer(x, x) robust_outer_product = cc.robust_outer_product(x, x) # Perform the desired tests self.assertIsInstance(robust_outer_product, np.ndarray) self.assertEqual(robust_outer_product.shape, outer_product.shape) npt.assert_allclose(outer_product, robust_outer_product) return None def test_create_matrix_blocks(self): """ Ensure that create_matrix_blocks returns expected results when not having to correct for underflow. """ # Designate a utility transform for this test utility_transform = mnl._mnl_utility_transform # Collect the arguments needed for this function args = [self.fake_betas, self.fake_design, self.fake_df[self.alt_id_col].values, self.fake_rows_to_obs, self.fake_rows_to_alts, utility_transform] kwargs = {"intercept_params": self.fake_intercepts, "shape_params": self.fake_shapes, "return_long_probs": True} # Get the long-format probabilities long_probs = cc.calc_probabilities(*args, **kwargs) # Get the matrix-block indices matrix_indices = cc.create_matrix_block_indices(self.fake_rows_to_obs) # Create the matrix block for individual 1. matrix_block_1 = (np.diag(long_probs[:3]) - np.outer(long_probs[:3], long_probs[:3])) matrix_block_2 = (np.diag(long_probs[3:]) - np.outer(long_probs[3:], long_probs[3:])) # Create a list of the expected results expected_results = [matrix_block_1, matrix_block_2] # Get the function results func_results = cc.create_matrix_blocks(long_probs, matrix_indices) for pos, result in enumerate(func_results): self.assertIsInstance(result, np.ndarray) self.assertEqual(result.shape, expected_results[pos].shape) npt.assert_allclose(result, expected_results[pos]) return None def test_create_matrix_blocks_with_underflow(self): """ Ensure that create_matrix_blocks returns expected results when also having to correct for underflow. """ # Get the long-format probabilities long_probs = np.array([cc.min_comp_value, cc.min_comp_value, 1.0, cc.min_comp_value, 1.0]) # Get the matrix-block indices matrix_indices = cc.create_matrix_block_indices(self.fake_rows_to_obs) # Create the matrix block for individual 1. row_1 = [cc.min_comp_value, -cc.min_comp_value, -cc.min_comp_value] row_2 = [-cc.min_comp_value, cc.min_comp_value, -cc.min_comp_value] row_3 = [-cc.min_comp_value, -cc.min_comp_value, cc.min_comp_value] matrix_block_1 = np.array([row_1, row_2, row_3]) matrix_block_2 = (np.diag(long_probs[3:]) - np.outer(long_probs[3:], long_probs[3:])) # Assuming that no probabilities should actually be zero or one, # the underflow guard would set the last value to a very small, # positive number matrix_block_2[-1, -1] = cc.min_comp_value # Create a list of the expected results expected_results = [matrix_block_1, matrix_block_2] # Get the function results func_results = cc.create_matrix_blocks(long_probs, matrix_indices) for pos, result in enumerate(func_results): self.assertIsInstance(result, np.ndarray) self.assertEqual(result.shape, expected_results[pos].shape) npt.assert_allclose(result, expected_results[pos]) return None def test_calc_fisher_info_matrix_no_shapes_no_intercepts(self): """ Ensure that calc_fisher_info_matrix returns the expected values when there are no shape or intercept parameters. """ # Designate a function that calculates the parital derivative of the # transformed index values, with respect to the index. def transform_deriv_v(sys_utilities, alt_IDs, rows_to_alts, shape_params): return diags(np.ones(sys_utilities.shape[0]), 0, format='csr') # Designate functions that calculate the partial derivative of the # transformed index values, with respect to shape and index parameters def transform_deriv_intercepts(*args): return None def transform_deriv_shapes(*args): return None # Collect the arguments for the function being tested gradient_args = [self.fake_betas, self.fake_design, self.fake_df[self.alt_id_col].values, self.fake_rows_to_obs, self.fake_rows_to_alts, self.choice_array, self.utility_transform, transform_deriv_shapes, transform_deriv_v, transform_deriv_intercepts, None, None, None, None] gradient_args[1] = self.fake_design[:3, :] gradient_args[2] = self.fake_df[self.alt_id_col].values[:3] gradient_args[3] = self.fake_rows_to_obs[:3, :] gradient_args[4] = self.fake_rows_to_alts[:3, :] gradient_args[5] = self.choice_array[:3] gradient_1 = cc.calc_gradient(*gradient_args) gradient_args[1] = self.fake_design[3:, :] gradient_args[2] = self.fake_df[self.alt_id_col].values[3:] gradient_args[3] = self.fake_rows_to_obs[3:, :] gradient_args[4] = self.fake_rows_to_alts[3:, :] gradient_args[5] = self.choice_array[3:] gradient_2 = cc.calc_gradient(*gradient_args) # Calcuate the BHHH approximation to the Fisher Info Matrix expected_result = (np.outer(gradient_1, gradient_1) + np.outer(gradient_2, gradient_2)) # Alias the function being tested func = cc.calc_fisher_info_matrix # Get the results of the function being tested gradient_args[1] = self.fake_design gradient_args[2] = self.fake_df[self.alt_id_col].values gradient_args[3] = self.fake_rows_to_obs gradient_args[4] = self.fake_rows_to_alts gradient_args[5] = self.choice_array function_result = func(*gradient_args) # Perform the required tests self.assertIsInstance(function_result, np.ndarray) self.assertEqual(function_result.shape, expected_result.shape) npt.assert_allclose(function_result, expected_result) # Test the Fisher info matrix function with weights new_weights = 2 * np.ones(self.fake_design.shape[0]) gradient_args[-1] = new_weights expected_result_weighted = 2 * expected_result func_result_weighted = func(*gradient_args) self.assertIsInstance(func_result_weighted, np.ndarray) self.assertEqual(func_result_weighted.shape, expected_result_weighted.shape) npt.assert_allclose(func_result_weighted, expected_result_weighted) # Test the function with the ridge penalty expected_result_weighted -= 2 * self.ridge gradient_args[-2] = self.ridge function_result = func(*gradient_args) self.assertIsInstance(function_result, np.ndarray) self.assertEqual(function_result.shape, expected_result_weighted.shape) npt.assert_allclose(function_result, expected_result_weighted) return None def test_calc_fisher_info_matrix_no_intercepts(self): """ Ensure that calc_fisher_info_matrix returns the expected values when there are no intercept parameters. """ # Designate a function that calculates the parital derivative of the # transformed index values, with respect to the index. def transform_deriv_v(sys_utilities, alt_IDs, rows_to_alts, shape_params): return diags(np.ones(sys_utilities.shape[0]), 0, format='csr') # Designate functions that calculate the partial derivative of the # transformed index values, with respect to shape and index parameters def transform_deriv_intercepts(*args): return None fake_deriv = np.exp(self.fake_shapes)[None, :] def transform_deriv_shapes(sys_utilities, alt_IDs, rows_to_alts, shape_params): return rows_to_alts[:, 1:].multiply(fake_deriv) # Collect the arguments needed to calculate the gradients gradient_args = [self.fake_betas, self.fake_design, self.fake_df[self.alt_id_col].values, self.fake_rows_to_obs, self.fake_rows_to_alts, self.choice_array, self.utility_transform, transform_deriv_shapes, transform_deriv_v, transform_deriv_intercepts, None, self.fake_shapes, None, None] # Get the gradient, for each observation, separately. # Note this test expects that we only have two observations. gradient_args[1] = self.fake_design[:3, :] gradient_args[2] = self.fake_df[self.alt_id_col].values[:3] gradient_args[3] = self.fake_rows_to_obs[:3, :] gradient_args[4] = self.fake_rows_to_alts[:3, :] gradient_args[5] = self.choice_array[:3] gradient_1 = cc.calc_gradient(*gradient_args) gradient_args[1] = self.fake_design[3:, :] gradient_args[2] = self.fake_df[self.alt_id_col].values[3:] gradient_args[3] = self.fake_rows_to_obs[3:, :] gradient_args[4] = self.fake_rows_to_alts[3:, :] gradient_args[5] = self.choice_array[3:] gradient_2 = cc.calc_gradient(*gradient_args) # Calcuate the BHHH approximation to the Fisher Info Matrix expected_result = (np.outer(gradient_1, gradient_1) + np.outer(gradient_2, gradient_2)) # Alias the function being tested func = cc.calc_fisher_info_matrix # Get the results of the function being tested gradient_args[1] = self.fake_design gradient_args[2] = self.fake_df[self.alt_id_col].values gradient_args[3] = self.fake_rows_to_obs gradient_args[4] = self.fake_rows_to_alts gradient_args[5] = self.choice_array function_result = func(*gradient_args) # Perform the required tests self.assertIsInstance(function_result, np.ndarray) self.assertEqual(function_result.shape, expected_result.shape) npt.assert_allclose(function_result, expected_result) # Test the Fisher info matrix function with weights new_weights = 2 * np.ones(self.fake_design.shape[0]) gradient_args[-1] = new_weights expected_result_weighted = 2 * expected_result func_result_weighted = func(*gradient_args) self.assertIsInstance(func_result_weighted, np.ndarray) self.assertEqual(func_result_weighted.shape, expected_result_weighted.shape) npt.assert_allclose(func_result_weighted, expected_result_weighted) return None def test_calc_fisher_info_matrix_no_shapes(self): """ Ensure that calc_fisher_info_matrix returns the expected values when there are no shape parameters. """ # Designate a function that calculates the parital derivative of the # transformed index values, with respect to the index. def transform_deriv_v(sys_utilities, alt_IDs, rows_to_alts, shape_params): return diags(np.ones(sys_utilities.shape[0]), 0, format='csr') # Designate functions that calculate the partial derivative of the # transformed index values, with respect to shape and index parameters def transform_deriv_intercepts(sys_utilities, alt_IDs, rows_to_alts, shape_params): return rows_to_alts[:, 1:] def transform_deriv_shapes(*args): return None # Collect the arguments needed to calculate the gradients gradient_args = [self.fake_betas, self.fake_design, self.fake_df[self.alt_id_col].values, self.fake_rows_to_obs, self.fake_rows_to_alts, self.choice_array, self.utility_transform, transform_deriv_shapes, transform_deriv_v, transform_deriv_intercepts, self.fake_intercepts, None, None, None] # Get the gradient, for each observation, separately. # Note this test expects that we only have two observations. gradient_args[1] = self.fake_design[:3, :] gradient_args[2] = self.fake_df[self.alt_id_col].values[:3] gradient_args[3] = self.fake_rows_to_obs[:3, :] gradient_args[4] = self.fake_rows_to_alts[:3, :] gradient_args[5] = self.choice_array[:3] gradient_1 = cc.calc_gradient(*gradient_args) gradient_args[1] = self.fake_design[3:, :] gradient_args[2] = self.fake_df[self.alt_id_col].values[3:] gradient_args[3] = self.fake_rows_to_obs[3:, :] gradient_args[4] = self.fake_rows_to_alts[3:, :] gradient_args[5] = self.choice_array[3:] gradient_2 = cc.calc_gradient(*gradient_args) # Calcuate the BHHH approximation to the Fisher Info Matrix expected_result = (np.outer(gradient_1, gradient_1) + np.outer(gradient_2, gradient_2)) # Alias the function being tested func = cc.calc_fisher_info_matrix # Get the results of the function being tested gradient_args[1] = self.fake_design gradient_args[2] = self.fake_df[self.alt_id_col].values gradient_args[3] = self.fake_rows_to_obs gradient_args[4] = self.fake_rows_to_alts gradient_args[5] = self.choice_array function_result = func(*gradient_args) # Perform the required tests self.assertIsInstance(function_result, np.ndarray) self.assertEqual(function_result.shape, expected_result.shape) npt.assert_allclose(function_result, expected_result) # Test the Fisher info matrix function with weights new_weights = 2 * np.ones(self.fake_design.shape[0]) gradient_args[-1] = new_weights expected_result_weighted = 2 * expected_result func_result_weighted = func(*gradient_args) self.assertIsInstance(func_result_weighted, np.ndarray) self.assertEqual(func_result_weighted.shape, expected_result_weighted.shape) npt.assert_allclose(func_result_weighted, expected_result_weighted) return None def test_calc_fisher_info_matrix(self): """ Ensure that calc_fisher_info_matrix returns the expected values. """ # Designate a function that calculates the parital derivative of the # transformed index values, with respect to the index. def transform_deriv_v(sys_utilities, alt_IDs, rows_to_alts, shape_params): return diags(np.ones(sys_utilities.shape[0]), 0, format='csr') # Designate functions that calculate the partial derivative of the # transformed index values, with respect to shape and index parameters def transform_deriv_intercepts(sys_utilities, alt_IDs, rows_to_alts, shape_params): return rows_to_alts[:, 1:] fake_deriv = np.exp(self.fake_shapes)[None, :] def transform_deriv_shapes(sys_utilities, alt_IDs, rows_to_alts, shape_params): return rows_to_alts[:, 1:].multiply(fake_deriv) # Collect the arguments needed to calculate the gradients gradient_args = [self.fake_betas, self.fake_design, self.fake_df[self.alt_id_col].values, self.fake_rows_to_obs, self.fake_rows_to_alts, self.choice_array, self.utility_transform, transform_deriv_shapes, transform_deriv_v, transform_deriv_intercepts, self.fake_intercepts, self.fake_shapes, None, None] # Get the gradient, for each observation, separately. # Note this test expects that we only have two observations. gradient_args[1] = self.fake_design[:3, :] gradient_args[2] = self.fake_df[self.alt_id_col].values[:3] gradient_args[3] = self.fake_rows_to_obs[:3, :] gradient_args[4] = self.fake_rows_to_alts[:3, :] gradient_args[5] = self.choice_array[:3] gradient_1 = cc.calc_gradient(*gradient_args) gradient_args[1] = self.fake_design[3:, :] gradient_args[2] = self.fake_df[self.alt_id_col].values[3:] gradient_args[3] = self.fake_rows_to_obs[3:, :] gradient_args[4] = self.fake_rows_to_alts[3:, :] gradient_args[5] = self.choice_array[3:] gradient_2 = cc.calc_gradient(*gradient_args) # Calcuate the BHHH approximation to the Fisher Info Matrix expected_result = (np.outer(gradient_1, gradient_1) + np.outer(gradient_2, gradient_2)) # Alias the function being tested func = cc.calc_fisher_info_matrix # Get the results of the function being tested gradient_args[1] = self.fake_design gradient_args[2] = self.fake_df[self.alt_id_col].values gradient_args[3] = self.fake_rows_to_obs gradient_args[4] = self.fake_rows_to_alts gradient_args[5] = self.choice_array function_result = func(*gradient_args) # Perform the required tests self.assertIsInstance(function_result, np.ndarray) self.assertEqual(function_result.shape, expected_result.shape) npt.assert_allclose(function_result, expected_result) # Test the Fisher info matrix function with weights new_weights = 2 * np.ones(self.fake_design.shape[0]) gradient_args[-1] = new_weights expected_result_weighted = 2 * expected_result func_result_weighted = func(*gradient_args) self.assertIsInstance(func_result_weighted, np.ndarray) self.assertEqual(func_result_weighted.shape, expected_result_weighted.shape) npt.assert_allclose(func_result_weighted, expected_result_weighted) return None def test_calc_hessian_no_shapes_no_intercept(self): """ Ensure that the calc_hessian function returns expected results when there are no shape parameters and no intercept parameters. """ # Alias the design matrix design = self.fake_design # Get the matrix block indices for the test matrix_indices = cc.create_matrix_block_indices(self.fake_rows_to_obs) # Calculate the probabilities for this test. args = [self.fake_betas, self.fake_design, self.fake_df[self.alt_id_col].values, self.fake_rows_to_obs, self.fake_rows_to_alts, self.utility_transform] kwargs = {"intercept_params": self.fake_intercepts, "shape_params": self.fake_shapes, "return_long_probs": True} probs = cc.calc_probabilities(*args, **kwargs) # Get the matrix blocks for dP_i_dH_i matrix_blocks = cc.create_matrix_blocks(probs, matrix_indices) # Create the dP_dH matrix that represents the derivative of the # long probabilities with respect to the array of transformed index # values / systematic utilities dP_dH = block_diag(matrix_blocks) # Designate a function that calculates the parital derivative of the # transformed index values, with respect to the index. def transform_deriv_v(sys_utilities, alt_IDs, rows_to_alts, shape_params): return diags(np.ones(sys_utilities.shape[0]), 0, format='csr') # Designate functions that calculate the partial derivative of the # transformed index values, with respect to shape and index parameters def transform_deriv_intercepts(*args): return None def transform_deriv_shapes(*args): return None # Collect the arguments for the hessian function being tested hessian_args = [self.fake_betas, self.fake_design, self.fake_df[self.alt_id_col].values, self.fake_rows_to_obs, self.fake_rows_to_alts, self.utility_transform, transform_deriv_shapes, transform_deriv_v, transform_deriv_intercepts, matrix_indices, None, None, None, None] # Calculate the expected result # Since we're essentially dealing with an MNL model in this test, # the expected answer is -X^T * dP_dH * X expected_result = (-1 * design.T.dot(dP_dH.dot(design))) # Alias the function being tested func = cc.calc_hessian # Get the results of the function being tested function_result = func(*hessian_args) # Perform the required tests self.assertIsInstance(function_result, np.ndarray) self.assertEqual(function_result.shape, expected_result.shape) npt.assert_allclose(function_result, expected_result) # Test the Hessian function with weights new_weights = 2 * np.ones(self.fake_design.shape[0]) hessian_args[-1] = new_weights expected_result_weighted = 2 * expected_result func_result_weighted = func(*hessian_args) self.assertIsInstance(func_result_weighted, np.ndarray) self.assertEqual(func_result_weighted.shape, expected_result_weighted.shape) npt.assert_allclose(func_result_weighted, expected_result_weighted) # Test the function with the ridge penalty expected_result_weighted -= 2 * self.ridge hessian_args[-2] = self.ridge function_result = func(*hessian_args) self.assertIsInstance(function_result, np.ndarray) self.assertEqual(function_result.shape, expected_result_weighted.shape) npt.assert_allclose(function_result, expected_result_weighted) return None def test_calc_hessian(self): """ Ensure that the calc_hessian function returns expected results when there are both shape parameters and intercept parameters. """ # Alias the design matrix design = self.fake_design # Get the matrix block indices for the test matrix_indices = cc.create_matrix_block_indices(self.fake_rows_to_obs) # Calculate the probabilities for this test. args = [self.fake_betas, self.fake_design, self.fake_df[self.alt_id_col].values, self.fake_rows_to_obs, self.fake_rows_to_alts, self.utility_transform] kwargs = {"intercept_params": self.fake_intercepts, "shape_params": self.fake_shapes, "return_long_probs": True} probs = cc.calc_probabilities(*args, **kwargs) # Get the matrix blocks for dP_i_dH_i matrix_blocks = cc.create_matrix_blocks(probs, matrix_indices) # Create the dP_dH matrix that represents the derivative of the # long probabilities with respect to the array of transformed index # values / systematic utilities dP_dH = block_diag(matrix_blocks) # Designate a function that calculates the parital derivative of the # transformed index values, with respect to the index. def transform_deriv_v(sys_utilities, alt_IDs, rows_to_alts, shape_params): return diags(np.ones(sys_utilities.shape[0]), 0, format='csr') # Designate functions that calculate the partial derivative of the # transformed index values, with respect to shape and index parameters def transform_deriv_intercepts(sys_utilities, alt_IDs, rows_to_alts, intercept_params): return rows_to_alts[:, 1:] fake_deriv = np.exp(self.fake_shapes)[None, :] def transform_deriv_shapes(sys_utilities, alt_IDs, rows_to_alts, shape_params): return rows_to_alts[:, 1:].multiply(fake_deriv) # Collect the arguments for the hessian function being tested hessian_args = [self.fake_betas, self.fake_design, self.fake_df[self.alt_id_col].values, self.fake_rows_to_obs, self.fake_rows_to_alts, self.utility_transform, transform_deriv_shapes, transform_deriv_v, transform_deriv_intercepts, matrix_indices, self.fake_intercepts, self.fake_shapes, None, None] # Calculate the derivative of the transformation vector with respect # to the shape parameters. args = (design.dot(self.fake_betas), self.fake_df[self.alt_id_col].values, self.fake_rows_to_alts, self.fake_shapes) dh_d_shape = transform_deriv_shapes(*args) # Calculate the derivative of the transformation vector with respect # to the intercept parameters dh_d_intercept = self.fake_rows_to_alts[:, 1:] # Calculate the various matrices needed for the expected result # Note dH_dV is the Identity matrix in this test. # See the documentation pdf for a description of what each of these # matrixes are. # h_33 is -X^T * dP_dH * X. This is the hessian in the standard MNL h_33 = np.asarray(-1 * design.T.dot(dP_dH.dot(design))) # h_32 is -X^T * dH_dV^T * dP_dH * dH_d_intercept h_32 = np.asarray(-1 * design.T.dot(dP_dH.dot(dh_d_intercept.A))) # h_31 is -X^T * dH_dV^T * dP_dH * dH_d_shape h_31 = np.asarray(-1 * design.T.dot(dP_dH.dot(dh_d_shape.A))) # h_21 = -dH_d_intercept^T * dP_dH * dH_d_shape h_21 = np.asarray(-1 * dh_d_intercept.T.dot(dP_dH.dot(dh_d_shape.A))) # h_22 = -dH_d_intercept^T * dP_dH * dH_d_intercept h_22 = np.asarray(-1 * dh_d_intercept.T.dot(dP_dH.dot(dh_d_intercept.A))) # h_11 = -dH_d_shape^T * dP_dH * dH_d_shape h_11 = np.asarray(-1 * dh_d_shape.T.dot(dP_dH.dot(dh_d_shape.A))) # Create the final hessian top_row = np.concatenate((h_11, h_21.T, h_31.T), axis=1) middle_row = np.concatenate((h_21, h_22, h_32.T), axis=1) bottom_row = np.concatenate((h_31, h_32, h_33), axis=1) expected_result = np.concatenate((top_row, middle_row, bottom_row), axis=0) # Alias the function being tested func = cc.calc_hessian # Get the results of the function being tested function_result = func(*hessian_args) # Perform the required tests self.assertIsInstance(function_result, np.ndarray) self.assertEqual(function_result.shape, expected_result.shape) self.assertFalse(isinstance(function_result, np.matrixlib.defmatrix.matrix)) npt.assert_allclose(function_result, expected_result) # Test the Hessian function with weights new_weights = 2 * np.ones(self.fake_design.shape[0]) hessian_args[-1] = new_weights expected_result_weighted = 2 * expected_result func_result_weighted = func(*hessian_args) self.assertIsInstance(func_result_weighted, np.ndarray) self.assertEqual(func_result_weighted.shape, expected_result_weighted.shape) npt.assert_allclose(func_result_weighted, expected_result_weighted) # Test the function with the ridge penalty expected_result_weighted -= 2 * self.ridge hessian_args[-2] = self.ridge function_result = func(*hessian_args) self.assertIsInstance(function_result, np.ndarray) self.assertEqual(function_result.shape, expected_result_weighted.shape) self.assertFalse(isinstance(function_result, np.matrixlib.defmatrix.matrix)) npt.assert_allclose(function_result, expected_result_weighted) return None def test_calc_hessian_no_shapes(self): """ Ensure that the calc_hessian function returns expected results when there are no shape parameters. """ # Alias the design matrix design = self.fake_design # Get the matrix block indices for the test matrix_indices = cc.create_matrix_block_indices(self.fake_rows_to_obs) # Calculate the probabilities for this test. args = [self.fake_betas, self.fake_design, self.fake_df[self.alt_id_col].values, self.fake_rows_to_obs, self.fake_rows_to_alts, self.utility_transform] kwargs = {"intercept_params": self.fake_intercepts, "shape_params": self.fake_shapes, "return_long_probs": True} probs = cc.calc_probabilities(*args, **kwargs) # Get the matrix blocks for dP_i_dH_i matrix_blocks = cc.create_matrix_blocks(probs, matrix_indices) # Create the dP_dH matrix that represents the derivative of the # long probabilities with respect to the array of transformed index # values / systematic utilities dP_dH = block_diag(matrix_blocks) # Designate a function that calculates the parital derivative of the # transformed index values, with respect to the index. def transform_deriv_v(sys_utilities, alt_IDs, rows_to_alts, shape_params): return diags(np.ones(sys_utilities.shape[0]), 0, format='csr') # Designate functions that calculate the partial derivative of the # transformed index values, with respect to shape and index parameters def transform_deriv_intercepts(sys_utilities, alt_IDs, rows_to_alts, intercept_params): return rows_to_alts[:, 1:] def transform_deriv_shapes(sys_utilities, alt_IDs, rows_to_alts, shape_params): return None # Collect the arguments for the hessian function being tested hessian_args = [self.fake_betas, self.fake_design, self.fake_df[self.alt_id_col].values, self.fake_rows_to_obs, self.fake_rows_to_alts, self.utility_transform, transform_deriv_shapes, transform_deriv_v, transform_deriv_intercepts, matrix_indices, self.fake_intercepts, None, None, None] # Calculate the derivative of the transformation vector with respect # to the intercept parameters dh_d_intercept = self.fake_rows_to_alts[:, 1:] # Calculate the various matrices needed for the expected result # Note dH_dV is the Identity matrix in this test. # See the documentation pdf for a description of what each of these # matrixes are. # h_33 is -X^T * dP_dH * X. This is the hessian in the standard MNL h_33 = np.asarray(-1 * design.T.dot(dP_dH.dot(design))) # h_32 is -X^T * dH_dV^T * dP_dH * dH_d_intercept h_32 = np.asarray(-1 * design.T.dot(dP_dH.dot(dh_d_intercept.A))) # h_22 = -dH_d_intercept^T * dP_dH * dH_d_intercept h_22 = np.asarray(-1 * dh_d_intercept.T.dot(dP_dH.dot(dh_d_intercept.A))) # Create the final hessian middle_row = np.concatenate((h_22, h_32.T), axis=1) bottom_row = np.concatenate((h_32, h_33), axis=1) expected_result = np.concatenate((middle_row, bottom_row), axis=0) # Alias the function being tested func = cc.calc_hessian # Get the results of the function being tested function_result = func(*hessian_args) # Perform the required tests self.assertIsInstance(function_result, np.ndarray) self.assertEqual(function_result.shape, expected_result.shape) self.assertFalse(isinstance(function_result, np.matrixlib.defmatrix.matrix)) npt.assert_allclose(function_result, expected_result) # Test the Hessian function with weights new_weights = 2 * np.ones(self.fake_design.shape[0]) hessian_args[-1] = new_weights expected_result_weighted = 2 * expected_result func_result_weighted = func(*hessian_args) self.assertIsInstance(func_result_weighted, np.ndarray) self.assertEqual(func_result_weighted.shape, expected_result_weighted.shape) npt.assert_allclose(func_result_weighted, expected_result_weighted) return None def test_calc_hessian_no_intercepts(self): """ Ensure that the calc_hessian function returns expected results when there are no intercept parameters. """ # Alias the design matrix design = self.fake_design # Get the matrix block indices for the test matrix_indices = cc.create_matrix_block_indices(self.fake_rows_to_obs) # Calculate the probabilities for this test. args = [self.fake_betas, self.fake_design, self.fake_df[self.alt_id_col].values, self.fake_rows_to_obs, self.fake_rows_to_alts, self.utility_transform] kwargs = {"intercept_params": self.fake_intercepts, "shape_params": self.fake_shapes, "return_long_probs": True} probs = cc.calc_probabilities(*args, **kwargs) # Get the matrix blocks for dP_i_dH_i matrix_blocks = cc.create_matrix_blocks(probs, matrix_indices) # Create the dP_dH matrix that represents the derivative of the # long probabilities with respect to the array of transformed index # values / systematic utilities dP_dH = block_diag(matrix_blocks) # Designate a function that calculates the parital derivative of the # transformed index values, with respect to the index. def transform_deriv_v(sys_utilities, alt_IDs, rows_to_alts, shape_params): return diags(np.ones(sys_utilities.shape[0]), 0, format='csr') # Designate functions that calculate the partial derivative of the # transformed index values, with respect to shape and index parameters def transform_deriv_intercepts(sys_utilities, alt_IDs, rows_to_alts, intercept_params): return None fake_deriv = np.exp(self.fake_shapes)[None, :] def transform_deriv_shapes(sys_utilities, alt_IDs, rows_to_alts, shape_params): return rows_to_alts[:, 1:].multiply(fake_deriv) # Collect the arguments for the hessian function being tested hessian_args = [self.fake_betas, self.fake_design, self.fake_df[self.alt_id_col].values, self.fake_rows_to_obs, self.fake_rows_to_alts, self.utility_transform, transform_deriv_shapes, transform_deriv_v, transform_deriv_intercepts, matrix_indices, None, self.fake_shapes, None, None] # Calculate the derivative of the transformation vector with respect # to the shape parameters. args = (design.dot(self.fake_betas), self.fake_df[self.alt_id_col].values, self.fake_rows_to_alts, self.fake_shapes) dh_d_shape = transform_deriv_shapes(*args) # Calculate the various matrices needed for the expected result # Note dH_dV is the Identity matrix in this test. # See the documentation pdf for a description of what each of these # matrixes are. # h_33 is -X^T * dP_dH * X. This is the hessian in the standard MNL h_33 = np.asarray(-1 * design.T.dot(dP_dH.dot(design))) # h_31 is -X^T * dH_dV^T * dP_dH * dH_d_shape h_31 = np.asarray(-1 * design.T.dot(dP_dH.dot(dh_d_shape.A))) # h_11 = -dH_d_shape^T * dP_dH * dH_d_shape h_11 = np.asarray(-1 * dh_d_shape.T.dot(dP_dH.dot(dh_d_shape.A))) # Create the final hessian top_row = np.concatenate((h_11, h_31.T), axis=1) bottom_row = np.concatenate((h_31, h_33), axis=1) expected_result = np.concatenate((top_row, bottom_row), axis=0) # Alias the function being tested func = cc.calc_hessian # Get the results of the function being tested function_result = func(*hessian_args) # Perform the required tests self.assertIsInstance(function_result, np.ndarray) self.assertEqual(function_result.shape, expected_result.shape) npt.assert_allclose(function_result, expected_result) # Test the Hessian function with weights new_weights = 2 * np.ones(self.fake_design.shape[0]) hessian_args[-1] = new_weights expected_result_weighted = 2 * expected_result func_result_weighted = func(*hessian_args) self.assertIsInstance(func_result_weighted, np.ndarray) self.assertEqual(func_result_weighted.shape, expected_result_weighted.shape) npt.assert_allclose(func_result_weighted, expected_result_weighted) return None