import numpy as np
import pandas as pd
import matplotlib
matplotlib.use('TkAgg')
import matplotlib.pyplot as plt
import ui

print("PART 1 单变量线性回归")
ui.split_line1()

path = 'data/ex1data1.txt'
data = pd.read_csv(path, header=None, names=['Population', 'Profit'])
print("查看前 5 行数据")
print(data.head())
ui.split_line2()
print("查看数据集统计信息")
print(data.describe())
ui.split_line2()
print("绘制数据分布")
data.plot(kind='scatter', x='Population', y='Profit', figsize=(12,8))
plt.show()
ui.split_line2()
print("设置代价函数")

def computeCost(X, y, theta):
    inner = np.power((X * theta.T - y), 2)
    return np.sum(inner) / (2 * len(X))

ui.split_line2()
print("在训练集中添加一列,便于我们以向量化的方法计算 Cost 和梯度")
data.insert(0, 'Ones', 1)
print("进行变量初始化")
cols = data.shape[1]
print("X 是所有的行,去掉最后一列")
X = data.iloc[:, 0:cols-1]
print("X 的前五行")
print(X.head())
print("y 是最后一列")
y = data.iloc[:, cols-1:cols]
print("y 的前五行")
print(y.head())
ui.split_line2()
print("我们的代价函数需要传入 numpy 矩阵,先进行转换,并初始化 theta")
X = np.matrix(X.values)
y = np.matrix(y.values)
print("theta 是一个 (1,2) 矩阵")
theta = np.matrix(np.array([0, 0]))
print("X, y, theta 的维度")
print(X.shape, theta.shape, y.shape)
print("计算初始状态的 Cost")
print(computeCost(X, y, theta))
ui.split_line2()
print("批量梯度下降函数")

def gradientDescent(X, y, theta, alpha, iters):
    temp = np.matrix(np.zeros(theta.shape))
    parameters = int(theta.ravel().shape[1])
    cost = np.zeros(iters)

    for i in range(iters):
        error = (X * theta.T) - y

        for j in range(parameters):
            term = np.multiply(error, X[:, j])
            temp[0,j] = theta[0,j] - ((alpha / len(X)) * np.sum(term))
        
        theta = temp
        cost[i] = computeCost(X, y, theta)
    return theta, cost

print("初始化学习率与迭代次数")
alpha = 0.01
iters = 1000
print("运行梯度下降算法")
g, cost = gradientDescent(X, y, theta, alpha, iters)
print("最后得到的 theta 为", g)
print("再次计算代价函数")
print(computeCost(X, y, g))
ui.split_line2()
print("绘制线性模型及数据,查看拟合情况")
x = np.linspace(data.Population.min(), data.Population.max(), 100)
f = g[0, 0] + (g[0, 1] * x)

fig, ax = plt.subplots(figsize=(12,8))
ax.plot(x, f, 'r', label='Prediction')
ax.scatter(data.Population, data.Profit, label='Traning Data')
ax.legend(loc=2)
ax.set_xlabel('Population')
ax.set_ylabel('Profit')
ax.set_title('Predicted Profit vs. Population Size')
plt.show()
print("绘制 Cost 下降的趋势")
fig, ax = plt.subplots(figsize=(12,8))
ax.plot(np.arange(iters), cost, 'r')
ax.set_xlabel('Iterations')
ax.set_ylabel('Cost')
ax.set_title('Error vs. Training Epoch')
plt.show()

ui.split_line1()
print("PART 2 多变量线性回归")
ui.split_line1()

path = 'data/ex1data2.txt'
data2 = pd.read_csv(path, header=None, names=['Size', 'Bedrooms', 'Price'])
print("查看数据前 5 行")
print(data2.head())
print("进行特征归一化,查看归一化后的数据")
data2 = (data2 - data2.mean()) / data2.std()
print(data2.head())
print("对新数据应用前面一样的套路")
data2.insert(0, 'Ones', 1)
# set X (training data) and y (target variable)
cols = data2.shape[1]
X2 = data2.iloc[:,0:cols-1]
y2 = data2.iloc[:,cols-1:cols]

# convert to matrices and initialize theta
X2 = np.matrix(X2.values)
y2 = np.matrix(y2.values)
theta2 = np.matrix(np.array([0,0,0]))

# perform linear regression on the data set
g2, cost2 = gradientDescent(X2, y2, theta2, alpha, iters)

# get the cost (error) of the model
computeCost(X2, y2, g2)
print("查看训练过程")
fig, ax = plt.subplots(figsize=(12,8))
ax.plot(np.arange(iters), cost2, 'r')
ax.set_xlabel('Iterations')
ax.set_ylabel('Cost')
ax.set_title('Error vs. Training Epoch')
plt.show()

ui.split_line1()
print("PART 3 用 Scikit Learn 回归一次")
ui.split_line1()
from sklearn import linear_model
model = linear_model.LinearRegression()
model.fit(X, y)
print("scikit-learn 预测表现")
x = np.array(X[:, 1].A1)
f = model.predict(X).flatten()

fig, ax = plt.subplots(figsize=(12,8))
ax.plot(x, f, 'r', label='Prediction')
ax.scatter(data.Population, data.Profit, label='Traning Data')
ax.legend(loc=2)
ax.set_xlabel('Population')
ax.set_ylabel('Profit')
ax.set_title('Predicted Profit vs. Population Size')
plt.show()

ui.split_line1()
print("PART 4 直接解方程得到最佳 theta")
ui.split_line1()
print("注意:如果特征数量较大, normal equation 因为计算复杂度较高,所以无法使用")

def normalEqn(X, y):
    #X.T@X等价于X.T.dot(X)
    theta = np.linalg.inv(X.T@X)@X.T@y
    return theta

final_theta2 = normalEqn(X, y)
print("直接计算得到的 theta", final_theta2)
print("第一步中梯度下降得到的 theta 为", g)
print("可以看到还是有一定差别的")