import unittest import numpy as np from disco.core import result_iterator import datasets class Tests_Regression(unittest.TestCase): @classmethod def setUpClass(self): import chunk_testdata from disco import ddfs ddfs = ddfs.DDFS() if not ddfs.exists("test:ex3"): print "Chunking test datasets to DDFS..." chunk_testdata.chunk_testdata() def test_lwlr(self): # python -m unittest tests_regression.Tests_Regression.test_lwlr import locally_weighted_linear_regression as lwlr1 from discomll.regression import locally_weighted_linear_regression as lwlr2 x_train, y_train, x_test, y_test = datasets.regression_data() train_data, test_data = datasets.regression_data_discomll() lwlr1 = lwlr1.Locally_Weighted_Linear_Regression() taus = [1, 10, 25] sorted_indices = np.argsort([str(el) for el in x_test[:, 1].tolist()]) for tau in taus: thetas1, estimation1 = lwlr1.fit(x_train, y_train, x_test, tau=tau) thetas1, estimation1 = np.array(thetas1)[sorted_indices], np.array(estimation1)[sorted_indices] results = lwlr2.fit_predict(train_data, test_data, tau=tau) thetas2, estimation2 = [], [] for x_id, (est, thetas) in result_iterator(results): estimation2.append(est) thetas2.append(thetas) self.assertTrue(np.allclose(thetas1, thetas2, atol=1e-8)) self.assertTrue(np.allclose(estimation1, estimation2, atol=1e-3)) def test_lin_reg(self): # python -m unittest tests_regression.Tests_Regression.test_lin_reg from sklearn import linear_model from discomll.regression import linear_regression x_train, y_train, x_test, y_test = datasets.ex3() train_data, test_data = datasets.ex3_discomll() lin_reg = linear_model.LinearRegression() # Create linear regression object lin_reg.fit(x_train, y_train) # Train the model using the training sets thetas1 = [lin_reg.intercept_] + lin_reg.coef_[1:].tolist() prediction1 = lin_reg.predict(x_test) thetas_url = linear_regression.fit(train_data) thetas2 = [v for k, v in result_iterator(thetas_url["linreg_fitmodel"])] results = linear_regression.predict(test_data, thetas_url) prediction2 = [v[0] for k, v in result_iterator(results)] self.assertTrue(np.allclose(thetas1, thetas2)) self.assertTrue(np.allclose(prediction1, prediction2)) if __name__ == '__main__': unittest.main()