import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.animation import FuncAnimation
from matplotlib import animation
import pandas as pd
from IPython.display import HTML
import time 
from matplotlib.patches import Rectangle
from matplotlib.collections import PatchCollection
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from scipy.optimize import minimize
from collections import namedtuple 
plt.style.use('ggplot')
plt.rcParams["figure.figsize"] = (10, 6)

#使用基于梯度下降的方法（gradient descent-based method）求解 Solow 增长模型（Solow growth model） 
SolowParameters = namedtuple("SolowParameters", ('A', 's', 'α', 'δ')) 

def create_solow_params(A=2.0, s=0.3, α=0.3, δ=0.4):
    "创建带默认值的 Solow 模型参数化（Solow model parameterization）。"
    return SolowParameters(A=A, s=s, α=α, δ=δ)
#定义资本运动方程（law-of-motion）函数
def g(k, params):
    A, s, α, δ = params
    return A * s * k**α + (1 - δ) * k
#定义稳态的解析解（analytical steady-state solution）  
def exact_fixed_point(params):
    A, s, α, δ = params
    return ((s * A) / δ)**(1/(1 - α))

#绘制资本运动方程（law-of-motion）和 45 度线（45-degree line）
def plot_45(params, ax, fontsize=14):
    k_min, k_max = 0.0, 3.0
    k_grid = np.linspace(k_min, k_max, 1200)
    # 绘制函数
    lb = r"g(k) = sAk^{\alpha} + (1 - \delta)kg(k) = sAk^{\alpha} + (1 - \delta)k"
    ax.plot(k_grid, g(k_grid, params),  lw=2, alpha=0.6, label=lb)
    ax.plot(k_grid, k_grid, "k--", lw=1, alpha=0.7, label="45-degree")
    # 显示并标注不动点（fixed point）
    kstar = exact_fixed_point(params)
    fps = (kstar,)
    ax.plot(fps, fps, "go", ms=10, alpha=0.6)
    ax.annotate(r"k^* = (sA / \delta)^{\frac{1}{1-\alpha}}k^* = (sA / \delta)^{\frac{1}{1-\alpha}}", 
             xy=(kstar, kstar),
             xycoords="data",
             xytext=(20, -20),
             textcoords="offset points",
             fontsize=fontsize)
    ax.legend(loc="upper left", frameon=False, fontsize=fontsize)
    ax.set_title(r"Parameters: A = {:.2f}, s = {:.2f}, α = {:.2f}, δ = {:.2f}".format(params.A, params.s, params.α, params.δ), fontsize=fontsize)
    ax.set_yticks((0, 1, 2, 3))
    ax.set_yticklabels((0.0, 1.0, 2.0, 3.0), fontsize=fontsize)
    ax.set_ylim(0, 3)
    ax.set_xlabel("$k_t$", fontsize=fontsize)
    ax.set_ylabel("$k_{t+1}$", fontsize=fontsize)

# 绘制两组 Solow 模型参数化（parameterization）下的图形和解析解
params1 = create_solow_params()
params2 = create_solow_params(α=0.05, δ=0.5)
fig, axs = plt.subplots(1, 2, figsize=(12, 5))
plot_45(params1, axs[0])
plot_45(params2, axs[1])
plt.tight_layout()
plt.show()

# 使用梯度下降法（gradient descent）求解 Solow 模型
## 第 0 步：定义梯度（gradient）和 Hessian 矩阵（Hessian） 
def Gradg(k, params):
    A, s, α, δ = params
    return 2*(s*A*k**α - δ*k)*(s*A*α*k**(α-1) - δ) 
## 第 1 步：更新参数（parameters）
## 初始化（initialization） 
k0 = 1 #初始值（initial value） 
tol = 1e-5 #收敛容差（tolerance for convergence） 
##梯度下降法（gradient descent method） 
k = k0 
k_list = [k]
funv_list = [g(k, params)]
while True:
    k_new = k - Gradg(k, params)
    k_list.append(k_new)
    funv_list.append(g(k_new, params))
    if abs(k_new - k) < tol:
        break
    k = k_new

print("True Steady State: k* = {:.4f}".format(exact_fixed_point(params)))
print('Newton-Method:')
print('Converged to k = {:.4f}'.format(k))
print('Minimal Functional Value ={:.4f}'.format(g(k, params)))
print('Number of iterations =', len(funv_list))
#绘制 k 的迭代路径和函数值
plt.plot(k_list, 'o-', color='blue', label='Parameter Value Path: initial={:3g}, stepsize={:3g}'.format(k0, stepsize))
plt.xlabel('iterations')
plt.ylabel('k(t)k^{(t)}')
#绘制 k 的迭代路径和函数值
plt.plot(k_list, 'o-', color='red', label='Parameter Value Path: initial={:3g}, stepsize={:3g}'.format(k0, stepsize))
plt.xlabel('iterations')
plt.ylabel('k(t)k^{(t)}')
plt.legend()
plt.show()

while True:
    if condition:
        break  # 退出循环（break）
    # 代码块（code block）

# 用于二次函数优化（quadratic function optimization）的梯度下降 
#函数和导数（derivatives） 
def f(x):
    return x**3 + x**2 - 1
def df(x):
    return 3*x**2 + 2*x
def d2f(x):
    return 6*x + 2
#第 0 步：绘制函数 
x = np.linspace(-2, 2, 100)
y = f(x)
plt.plot(x, y)
plt.title('Graph of f(x)=x3+x2−1x^3+x^2-1')
plt.xlabel('x')
plt.ylabel('f(x)')
plt.show()

# 固定步长（固定步长（fixed step size））和不同初始点下的梯度下降 
# 实验 1：步长（step size）= 0.2，初始点 1 = 1.5
stepsize = 0.2 #学习率（learning rate） 
x0 = 1.5 #初始点 1（initial point 1）
tol = 1e-5 #收敛容差（tolerance for convergence）
#通过迭代求解优化问题（optimization problem）
x = x0
funv_list = [f(x)]
x_list = [x]
start_time = time.time()  # 记录开始时间
while True:
    x_new = x - stepsize * df(x)
    x_list.append(x_new)
    funv_list.append(f(x_new))
    if abs(x_new - x) < tol:
        break
    x = x_new
end_time = time.time()  # 记录结束时间
print("Running Time: {:.4f} 秒".format(end_time - start_time))
print('Converged to x =', x_new)
print('Minimal Functional Value =', f(x_new))
print('Number of iterations =', len(funv_list))
#绘制 x 的迭代路径和函数值
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.plot(x_list, 'o-', color='blue', 
         label='Parameter Value Path: initial={:3g}, stepsize={:3g}'.format(x0, stepsize))  
plt.xlabel('iterations')
plt.ylabel('x(k)x^{(k)}')
plt.legend()
plt.title('Gradient Descent Path of X')
plt.subplot(1, 2, 2)
plt.plot(funv_list, 'o-', color='red')
plt.xlabel('iterations')
plt.ylabel('f(x(k))f(x^{(k)})')
plt.title('Gradient Descent Path of Function Value')
plt.tight_layout()
plt.show()

#创建 x 和 f(x) 迭代路径的动画（animation）
fig, ax1 = plt.subplots(1, 1)
x = np.linspace(-1.5, 1.5, 100)
y = f(x)
ax1.plot(x, y)
line1, = ax1.plot([], [], 'o-', color='blue')
ax1.set_xlabel('x')
ax1.set_ylabel('f(x)f(x)')
ax1.set_title('Gradient Descent Path of X')
def update(frame):
    line1.set_data(x_list[:frame+1], funv_list[:frame+1])
    return line1, line2

ani = FuncAnimation(fig, update, frames=len(x_list), interval=300, blit=True)
plt.close(fig)  # 防止静态图重复显示
HTML(ani.to_jshtml())

# 实验 2：步长（step size）= 0.3，初始点 1 = 1.5
stepsize = 0.3 #学习率（learning rate） 
x0 = 1.5 #初始点 1（initial point 1）
tol = 1e-5 #收敛容差（tolerance for convergence）
#通过迭代求解优化问题（optimization problem）
x = x0
funv_list = [f(x)]
x_list = [x]
Max_iter = 5
for i in range(Max_iter):
    x_new = x - stepsize * df(x)
    x_list.append(x_new)
    funv_list.append(f(x_new))
    if abs(x_new - x) < tol:
        break
    x = x_new
print('Converged to x =', x_new)
print('Minimal Functional Value =', f(x_new))
print('Number of iterations =', len(funv_list))
#绘制 x 的迭代路径和函数值
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.plot(x_list, 'o-', color='blue', 
         label='Parameter Value Path: initial={:3g}, stepsize={:3g}'.format(x0, stepsize))  
plt.xlabel('iterations')
plt.ylabel('x(k)x^{(k)}')
plt.legend()
plt.title('Exploding Gradient Descent Path of X')
plt.subplot(1, 2, 2)
plt.plot(funv_list, 'o-', color='red')
plt.xlabel('iterations')
plt.ylabel('f(x(k))f(x^{(k)})')
plt.title('Exploding Gradient Descent Path of Function Value')
plt.tight_layout()
plt.show()

# 实验 4：步长（step size）= 0.001，初始点 = 1.5 
stepsize = 0.002 #学习率（learning rate） 
x0 = 1.5 #初始点 1（initial point 1）
tol = 1e-5 #收敛容差（tolerance for convergence）
#通过迭代求解优化问题（optimization problem）
x = x0
funv_list = [f(x)]
x_list = [x]
start_time = time.time()  # 记录开始时间
while True:
    x_new = x - stepsize * df(x)
    x_list.append(x_new)
    funv_list.append(f(x_new))
    if abs(x_new - x) < tol:
        break
    x = x_new
end_time = time.time()  # 记录结束时间
print("Running Time: {:.4f} 秒".format(end_time - start_time))
print('Converged to x =', x_new)
print('Minimal Functional Value =', f(x_new))
print('Number of iterations =', len(funv_list))
#绘制 x 的迭代路径和函数值
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.plot(x_list, 'o-', color='blue', 
         label='Parameter Value Path: initial={:3g}, stepsize={:3g}'.format(x0, stepsize))  
plt.xlabel('iterations')
plt.ylabel('x(k)x^{(k)}')
plt.legend()
plt.title('Gradient Descent Path of X')
plt.subplot(1, 2, 2)
plt.plot(funv_list, 'o-', color='red')
plt.xlabel('iterations')
plt.ylabel('f(x(k))f(x^{(k)})')
plt.title('Gradient Descent Path of Function Value')
plt.tight_layout()
plt.show()

# 实验 5：步长（step size）= 0.1，初始点 = 1.5 
stepsize = 0.1 #学习率（learning rate） 
x0 = -1 #初始点 1（initial point 1）
tol = 1e-5 #收敛容差（tolerance for convergence）
#通过迭代求解优化问题（optimization problem）
x = x0
funv_list = [f(x)]
x_list = [x]
start_time = time.time()  # 记录开始时间
max_iter = 10
for i in range(max_iter):
    x_new = x - stepsize * df(x)
    x_list.append(x_new)
    funv_list.append(f(x_new))
    if abs(x_new - x) < tol:
        break
    x = x_new
end_time = time.time()  # 记录结束时间
print("Running Time: {:.4f} 秒".format(end_time - start_time))
print('Converged to x =', x_new)
print('Minimal Functional Value =', f(x_new))
print('Number of iterations =', len(funv_list))
#绘制 x 的迭代路径和函数值
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.plot(x_list, 'o-', color='blue', 
         label='Parameter Value Path: initial={:3g}, stepsize={:3g}'.format(x0, stepsize))  
plt.xlabel('iterations')
plt.ylabel('x(k)x^{(k)}')
plt.legend()
plt.title('Gradient Descent Path of X')
plt.subplot(1, 2, 2)
plt.plot(funv_list, 'o-', color='red')
plt.xlabel('iterations')
plt.ylabel('f(x(k))f(x^{(k)})')
plt.title('Gradient Descent Path of Function Value')
plt.tight_layout()
plt.show()

# 实验 6：牛顿法（Newton's method），初始点 = 1.5 
x0 = 1.5 #初始点 1（initial point 1）
tol = 1e-5 #收敛容差（tolerance for convergence）
#通过迭代求解优化问题（optimization problem）
x = x0
funv_list = [f(x)]
x_list = [x]
start_time = time.time()  # 记录开始时间
while True:
    x_new = x - 1/d2f(x)* df(x)
    x_list.append(x_new)
    funv_list.append(f(x_new))
    if abs(x_new - x) < tol:
        break
    x = x_new
end_time = time.time()  # 记录结束时间
print("Running Time: {:.4f} 秒".format(end_time - start_time))
print('Converged to x =', x_new)
print('Minimal Functional Value =', f(x_new))
print('Number of iterations =', len(funv_list))
#绘制 x 的迭代路径和函数值
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.plot(x_list, 'o-', color='blue', 
         label='Parameter Value Path: initial={:3g}, stepsize={:3g}'.format(x0, stepsize))  
plt.xlabel('iterations')
plt.ylabel('x(k)x^{(k)}')
plt.legend()
plt.title('Gradient Descent Path of X')
plt.subplot(1, 2, 2)
plt.plot(funv_list, 'o-', color='red')
plt.xlabel('iterations')
plt.ylabel('f(x(k))f(x^{(k)})')
plt.title('Gradient Descent Path of Function Value')
plt.tight_layout()
plt.show()

# 实验 7：牛顿法（Newton's method），初始点 = -1.5（停在局部最大值 local maximum） 
x0 = -1.5 #初始点 1（initial point 1）
tol = 1e-5 #收敛容差（tolerance for convergence）
#通过迭代求解优化问题（optimization problem）
x = x0
funv_list = [f(x)]
x_list = [x]
start_time = time.time()  # 记录开始时间
while True:
    x_new = x - 1/d2f(x)* df(x)
    x_list.append(x_new)
    funv_list.append(f(x_new))
    if abs(x_new - x) < tol:
        break
    x = x_new
end_time = time.time()  # 记录结束时间
print("Running Time: {:.4f} 秒".format(end_time - start_time))
print('Converged to x =', x_new)
print('Minimal Functional Value =', f(x_new))
print('Number of iterations =', len(funv_list))
#绘制 x 的迭代路径和函数值
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.plot(x_list, 'o-', color='blue', 
         label='Parameter Value Path: initial={:3g}, stepsize={:3g}'.format(x0, stepsize))  
plt.xlabel('iterations')
plt.ylabel('x(k)x^{(k)}')
plt.legend()
plt.title('Newton Method Path of X')
plt.subplot(1, 2, 2)
plt.plot(funv_list, 'o-', color='red')
plt.xlabel('iterations')
plt.ylabel('f(x(k))f(x^{(k)})')
plt.title('Newton Method Path of Function Value')
plt.tight_layout()
plt.show()

#数据生成过程（data generation process）
true_b = 1
true_w = 2
N = 100 #样本量（sample size） 
# 生成数据（data generation）
np.random.seed(42)
x = np.random.rand(N, 1)
epsilon = (.1 * np.random.randn(N, 1))
y = true_b + true_w * x + epsilon
# 绘制 x 和 y 的散点图（scatter plot） 
plt.figure(figsize=(6, 4))
plt.scatter(x, y)
plt.xlabel("x")
plt.ylabel("y")
plt.title("Scatter plot of x and y")
plt.show()

# 使用梯度下降（gradient descent）进行优化
#第 0 步：初始化参数（parameters） 
np.random.seed(42)
b = np.random.randn(1)
w = np.random.randn(1)
print('initialized parameter values:', b, w)
#第 1 步：计算并可视化模型预测（model prediction） 
yhat = b + w * x #计算模型预测值（model prediction）
x_range = np.linspace(0, 1, 101) #设置数据范围（data range） 
yhat_range = b + w * x_range #生成预测值（prediction）
fig, ax = plt.subplots(1, 1, figsize=(6, 4))
ax.set_xlabel('x')
ax.set_ylabel('y')
ax.set_ylim([0, 3])
ax.scatter(x, y, color='blue') #数据集（dataset） 
ax.plot(x_range, yhat_range, label='Model\'s predictions', c='red', linestyle='--')
ax.annotate('b = {:.4f} w = {:.4f}'.format(b[0], w[0]), xy=(.2, .55), c='red')
ax.legend(loc=0)
fig.tight_layout()

#可视化 MSE（损失函数 loss function）的曲面
#生成 b 和 w 取值的网格（grid）
b_range = np.linspace(true_b - 3, true_b + 3, 101)
w_range = np.linspace(true_w - 3, true_w + 3, 101)
bs, ws = np.meshgrid(b_range, w_range) # meshgrid 是生成 b 和 w 网格的便捷函数
print('shape', bs.shape, ws.shape)
print('---------------------------------------')
print('bs:', bs)
print('---------------------------------------')
print('ws:', ws)

# 计算每组 b 和 w 对应的 MSE
all_predictions = np.apply_along_axis(
    func1d=lambda x: bs + ws * x, # 对 x 的每一行进行线性预测（linear prediction）
    axis=1, 
    arr=x
)
print("predictions", all_predictions.shape)
all_labels = y.reshape(-1, 1, 1)
print('transformed data shape:', all_labels.shape)
all_errors = (all_predictions - all_labels)
print('errors shape:', all_errors.shape)
all_losses = (all_errors ** 2).mean(axis=0) #对样本取平均
print('losses shape:', all_losses.shape)

#绘制 b 和 w 的 MSE 曲面 
figure = plt.figure(figsize=(12, 6))
# 第 1 张图
ax1 = figure.add_subplot(1, 2, 1, projection='3d')
ax1.set_xlabel('b')
ax1.set_ylabel('w')
ax1.set_title('Loss Surface')
surf = ax1.plot_surface(bs, ws, all_losses, rstride=1, cstride=1, alpha=.5, cmap=plt.cm.jet, linewidth=0, antialiased=True)
ax1.contour(bs[0, :], ws[:, 0], all_losses, 10, offset=-1, cmap=plt.cm.jet)
ax1.view_init(30, -60)
# 第 2 张图
regression = LinearRegression()
regression.fit(x, y)
b_minimum, w_minimum = regression.intercept_[0], regression.coef_[0][0] #用回归方法得到最小值（minimum）
ax2 = figure.add_subplot(1, 2, 2)
ax2.set_xlabel('b')
ax2.set_ylabel('w')
ax2.set_title('Loss Surface')
CS = ax2.contour(bs[0, :], ws[:, 0], all_losses, cmap=plt.cm.jet)
ax2.clabel(CS, inline=1, fontsize=10)
ax2.scatter(b_minimum, w_minimum, c='k') # 最小值（minimum）
ax2.scatter(b, w, c='k') # 随机初始点（random start）
ax2.annotate('Random Start', xy=(-.2, 0.05), c='k')
ax2.annotate('Minimum', xy=(.5, 2.2), c='k')   
figure.tight_layout()

# 实验 1：随机初始点（random start），步长（step size）= 0.2
# 第 2 步：计算梯度（gradient）并可视化 
stepsize = 0.5 #学习率（learning rate） 
b0 = b[0] #初始点 1（initial point 1）
w0 = w[0] #初始点 1（initial point 1）
tol = 1e-5 #收敛容差（tolerance for convergence）
def mse_gradient(b, w):
    partial_b = (2 * (b + w * x - y)).mean()
    partial_w = (2 * (b + w * x - y) * x).mean()
    return partial_b, partial_w
partial2_b = 2
partial_bw = (2 * x).mean()
partial2_w = (2 * (x ** 2)).mean()
mse_hessian = np.array([[partial2_b, partial_bw], [partial_bw, partial2_w]])
# 第 3 步：通过迭代求解优化问题（optimization problem）
theta0 = np.array([b0, w0]) #初始点 1（initial point 1）
funv_list = [np.mean((b0 + w0 * x - y) ** 2)]
theta_list = [theta0]
theta = theta0
start_time = time.time()  # 记录开始时间
while True:
    gradient = mse_gradient(theta[0], theta[1])
    theta_new = theta - stepsize * np.array(gradient)
    theta_list.append(theta_new)
    funv_list.append(np.mean((theta_new[0] + theta_new[1] * x - y) ** 2))
    if np.linalg.norm(theta_new - theta) < tol:
        break
    theta = theta_new
end_time = time.time()  # 记录结束时间
print("Running Time: {:.4f} 秒".format(end_time - start_time))
print('Converged to (b,w) =({:.4f},{:.4f})'.format(theta_new[0], theta_new[1]))
print('Minimal Functional Value =', funv_list[-1])
print('Number of iterations =', len(funv_list))
#绘制 b、w 和函数值的迭代路径
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.plot([theta[0] for theta in theta_list], 'o-', color='blue', label='b (true=1): initial={:.3g}, stepsize={:3g}'.format(b0, stepsize))
plt.plot([theta[1] for theta in theta_list], 'o-', color='green', label='w (true=2): initial={:.3g}, stepsize={:3g}'.format(w0, stepsize))
plt.xlabel('iterations')
plt.ylabel('$(b^{(k)},w^{(k)})$')
plt.legend()
plt.title('Gradient Descent Path of (b,w)')
plt.subplot(1, 2, 2)
plt.plot(funv_list, 'o-', color='red')
plt.xlabel('iterations')
plt.ylabel('$MSE(b^{(k)},w^{(k)})$')
plt.title('Gradient Descent Path of Function Value')
plt.tight_layout()
plt.show()

# 实验 3：从最小值邻域（neighborhood of the minimum）开始，步长（step size）= 0.2
# 第 2 步：计算梯度（gradient）并可视化 
b0 = 0.8 #初始点 2（initial point 2）
w0 = 1.8 #初始点 2（initial point 2）
# 第 3 步：通过迭代求解优化问题（optimization problem）
theta0 = np.array([b0, w0]) #初始点 1（initial point 1）
funv_list = [np.mean((b0 + w0 * x - y) ** 2)]
theta_list = [theta0]
theta = theta0
start_time = time.time()  # 记录开始时间
while True:
    gradient = mse_gradient(theta[0], theta[1])
    theta_new = theta - stepsize * np.array(gradient)
    theta_list.append(theta_new)
    funv_list.append(np.mean((theta_new[0] + theta_new[1] * x - y) ** 2))
    if np.linalg.norm(theta_new - theta) < tol:
        break
    theta = theta_new
end_time = time.time()  # 记录结束时间
print("Running Time: {:.4f} 秒".format(end_time - start_time))
print('Converged to (b,w) =({:.4f},{:.4f})'.format(theta_new[0], theta_new[1]))
print('Minimal Functional Value =', funv_list[-1])
print('Number of iterations =', len(funv_list))
#绘制 b、w 和函数值的迭代路径
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.plot([theta[0] for theta in theta_list], 'o-', color='blue', label='b (true=1): initial={:.3g}, stepsize={:3g}'.format(b0, stepsize))
plt.plot([theta[1] for theta in theta_list], 'o-', color='green', label='w (true=2): initial={:.3g}, stepsize={:3g}'.format(w0, stepsize))
plt.xlabel('iterations')
plt.ylabel('(b(k),w(k))(b^{(k)},w^{(k)})')
plt.legend()
plt.title('Gradient Descent Path of (b,w)')
plt.subplot(1, 2, 2)
plt.plot(funv_list, 'o-', color='red')
plt.xlabel('iterations')
plt.ylabel('MSE(b(k),w(k))MSE(b^{(k)},w^{(k)})')
plt.title('Gradient Descent Path of Function Value')
plt.tight_layout()
plt.show()

# 实验 3：牛顿法（Newton method），随机初始点，步长（step size）= 0.2
# 第 2 步：计算梯度（gradient）并可视化 
b0 = b[0] #初始点 1（initial point 1）
w0 = w[0] #初始点 1（initial point 1）
tol = 1e-5 #收敛容差（tolerance for convergence）
# 第 3 步：通过迭代求解优化问题（optimization problem）
theta0 = np.array([b0, w0]) #初始点 1（initial point 1）
funv_list = [np.mean((b0 + w0 * x - y) ** 2)]
theta_list = [theta0]
theta = theta0
start_time = time.time()  # 记录开始时间
while True:
    gradient = mse_gradient(theta[0], theta[1])
    theta_new = theta -  np.linalg.inv(mse_hessian) @ gradient #牛顿法（Newton method）更新 
    theta_list.append(theta_new)
    funv_list.append(np.mean((theta_new[0] + theta_new[1] * x - y) ** 2))
    if np.linalg.norm(theta_new - theta) < tol:
        break
    theta = theta_new
end_time = time.time()  # 记录结束时间
print("Running Time: {:.4f} 秒".format(end_time - start_time))
print('Converged to (b,w) =({:.4f},{:.4f})'.format(theta_new[0], theta_new[1]))
print('Minimal Functional Value =', funv_list[-1])
print('Number of iterations =', len(funv_list))
#绘制 b、w 和函数值的迭代路径
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.plot([theta[0] for theta in theta_list], 'o-', color='blue', label='b (true=1): initial={:.3g}, stepsize={:3g}'.format(b0, stepsize))
plt.plot([theta[1] for theta in theta_list], 'o-', color='green', label='w (true=2): initial={:.3g}, stepsize={:3g}'.format(w0, stepsize))
plt.xlabel('iterations')
plt.ylabel('(b(k),w(k))(b^{(k)},w^{(k)})')
plt.legend()
plt.title('Newton-Method Path of (b,w)')
plt.subplot(1, 2, 2)
plt.plot(funv_list, 'o-', color='red')
plt.xlabel('iterations')
plt.ylabel('$MSE(b^{(k)},w^{(k)})$')
plt.title('Newton-Method Path of Function Value')
plt.tight_layout()
plt.show()

# 使用 Scipy.optimize 求解回归问题 
from scipy.optimize import minimize
def mse_loss(theta):
    b, w = theta
    return np.mean((b + w * x - y) ** 2)
def mse_gradient_fun(theta):
    b, w = theta
    partial_b = (2 * (b + w * x - y)).mean()
    partial_w = (2 * (b + w * x - y) * x).mean()
    return np.array([partial_b, partial_w])
def mse_hessian_fun(theta):
    return mse_hessian
# 第 1 步：初始猜测值（initial guess）
theta0 = np.array([b0, w0]) #初始点 1（initial point 1）
# 第 2 步：调用使用牛顿法（Newton's method）的优化器（optimizer）
result = minimize(mse_loss, theta0, method='Newton-CG', jac=mse_gradient_fun, hess = mse_hessian_fun, options={'xtol': 1e-5, 'disp': True})
print('Converged to (b,w) =({:.4f},{:.4f})'.format(result.x[0], result.x[1]))
print('Regression Coefficients (b,w) =({:.4f},{:.4f})'.format(b_minimum, w_minimum))

# 示例：数值微分（numerical differentiation） 
h = 0.1 # 步长（step size）
x = np.arange(0, 2*np.pi, h) # 定义网格（grid）
y = np.cos(x)  # 计算函数值
forward_diff = np.diff(y)/h # 计算前向差分（forward difference）向量
backward_diff = (y[1:]-y[:-1])/h # 计算后向差分（backward difference）向量
central_diff = (y[2:]-y[:-2])/(2*h) # 计算中心差分（central difference）向量
x_fdiff = x[:-1] # 计算对应网格（grid）
x_bdiff = x[1:] # 计算对应网格（grid）
x_cdiff = x[1:-1] # 计算对应网格（grid）
exact_solution = -np.sin(x_fdiff) # 计算精确解（exact solution）
# 绘图比较不同解
plt.figure(figsize = (12, 6))
plt.plot(x_fdiff, forward_diff, '--', label = 'Foward-difference approximation')
plt.plot(x_bdiff, backward_diff, '-.',label = 'Backward-difference approximation')
plt.plot(x_cdiff, central_diff, ':', label = 'Central-difference approximation')
plt.plot(x_fdiff, exact_solution, label = 'Exact solution')
plt.legend()
plt.show()
# 计算最大误差（maximum errors） 
print('Maximum Error: ForwardDiff={:.4f}, BackwardDiff={:.4f}, CentralDiff={:.4f}'.format(
    np.max(np.abs(forward_diff - exact_solution)), 
    np.max(np.abs(backward_diff - exact_solution)), 
    np.max(np.abs(central_diff - exact_solution[1:]))))

# 比较不同方法的精度（accuracy） 
h = 1
iterations = 20 
step_size = [] 
# 用于存储每个步长最大误差的列表
max_error = [] 
for i in range(iterations):
    h /= 2 
    step_size.append(h) 
    x = np.arange(0, 2 * np.pi, h) 
    y = np.cos(x) 
    forward_diff = np.diff(y)/h # 计算前向差分（forward difference）向量
    central_diff = (y[2:]-y[:-2])/(2*h) # 计算中心差分（central difference）向量
    x_fdiff = x[:-1] # 计算对应网格（grid）
    x_cdiff = x[1:-1] # 计算对应网格（grid）
    exact_solution = -np.sin(x_fdiff)  # 计算精确解（exact solution）
    max_error.append([max(abs(exact_solution - forward_diff)), max(abs(exact_solution[1:] - central_diff))])
# 绘制最大误差与步长的双对数图（log-log plot）
plt.figure(figsize = (10, 6))
plt.loglog(step_size, [r[0] for r in max_error], 'o-', label='Forward Difference') 
plt.loglog(step_size, [r[1] for r in max_error], '*-.', label='Central Difference')  
plt.xlabel('Step size (h)')
plt.ylabel('Maximum Error')
plt.legend()
plt.title('Step Size and Accuracy of Numerical Differentiation')
plt.show()

import autograd.numpy as au
from autograd import elementwise_grad

def f(x): # 目标函数（objective function）
    return au.cos(x) 
grad_f = elementwise_grad(f) # 逐元素导数（elementwise derivative）
# 绘图比较不同解
plt.figure(figsize = (12, 6))
plt.plot(x_cdiff, grad_f(x_cdiff), '--', label = 'Autograd approximation')
plt.plot(x_cdiff, central_diff, ':', label = 'Central-difference approximation')
plt.plot(x_cdiff, exact_solution[1:], label = 'Exact solution')
plt.legend()
plt.show()
# 计算最大误差（maximum errors） 
print('Maximum Error: AutoDiff={:.5f}, CentralDiff={:.5f}'.format(
    np.max(np.abs(grad_f(x_cdiff) - exact_solution[1:])), 
    np.max(np.abs(central_diff - exact_solution[1:]))))

#不同积分方法（integration methods）：以 [0, pi] 上的 f(x)=sin(x) 为例
a = 0
b = np.pi
n = 11
h = (b - a) / (n - 1)
x = np.linspace(a, b, n)
f = np.sin(x)
# 起点（start point）
I_riemannL = h * sum(f[:-1])
err_riemannL = 2 - I_riemannL
# 终点（end point）
I_riemannR = h * sum(f[1::])
err_riemannR = 2 - I_riemannR
# 中点（midpoint） 
I_mid = h * sum(np.sin((x[:-1] + x[1:])/2))
err_mid = 2 - I_mid
# 梯形法则（trapezoidal rule）
I_trap = (h/2)*(f[0] + 2 * sum(f[1:n-1]) + f[n-1])
err_trap = 2 - I_trap
# Simpson 法则（Simpson's rule）
I_simp = (h/3) * (f[0] + 2*sum(f[:n-2:2]) + 4*sum(f[1:n-1:2]) + f[n-1])
err_simp = 2 - I_simp
print('Start_point, End_point, Mid_point, Trape, Simpson \n ({:.4f}, {:.4f}, {:.4f}, {:.4f}, {:.4f})'.format(I_riemannL, I_riemannR, I_mid, I_trap, I_simp))
print("Start_Error, End_Error, Mid_Error, Trape_Error, Simpson_Error \n ({:.4f}, {:.4f}, {:.4f}, {:4f}, {:4f})".format(err_riemannL, err_riemannR, err_mid, err_trap, err_simp))

x = x0
for i in range(max_iter):
    x_new = T(x)
    if norm(x_new - x) < tol:
        break
    x = x_new

#使用直接迭代（direct iteration）求解稳态（steady state）
def compute_iterates(k_0, f, params, n=25):
    "计算由任意函数 f 生成、长度为 n 的时间序列（time series）。"
    k = k_0
    k_iterates = []
    for t in range(n):
        k_iterates.append(k)
        k = f(k, params)
    return k_iterates
params = create_solow_params()
k_0 = 0.25
k_series = compute_iterates(k_0, g, params)
k_0 = 3.0
k_series1 = compute_iterates(k_0, g, params)
k_star = exact_fixed_point(params)
fig, ax = plt.subplots()
ax.plot(k_series, 'o')
ax.plot(k_series1, 'v', color='blue')
ax.plot([k_star] * len(k_series), 'k--')
ax.set_xlabel("time")
ax.set_ylabel("ktk_t")
ax.set_title("Time Series of ktk_t using Direct Iterations")
ax.set_ylim(0, 3)
plt.show()

### 引力模型（gravity model）中价格指数的定点迭代（fixed-point iteration） 
#参数化（parameterization）
n = 10 #国家数量（number of countries） 
GravityParams = namedtuple("GravityParams", ('σ', 'Theta', 'T'))
np.random.seed(42)
rand_y = np.random.rand(n, 1)
Theta0 =1/np.exp(rand_y).sum() * np.exp(rand_y)
T0 = np.random.uniform(2, 4, size=(n,n))
#将对角线元素替换为 1
T0[np.diag_indices(n)] = 1  # 将对角线元素设为 1
def create_gravity_params(σ=2, Theta=Theta0, T=T0):
    return GravityParams(σ=σ, Theta=Theta, T=T0)
def price_iteration(params, tol = 1e-5, max_iter=1000):
    σ, Theta, T = params
    n = len(Theta)
    P = np.ones(n)  # 价格指数的初始猜测值（initial guess），维度为 1×n
    P = P.reshape((n,1))
    Plist = [P.copy()]  # 存储每次迭代的价格指数
    for i in range(max_iter):
        P_new = ((T**(1-σ)) @ (Theta * P**(σ - 1)))**(1/(1-σ))
        Plist.append(P_new.copy())
        if np.linalg.norm(P_new - P) < tol:
            print("Converged to price index after {} iterations.".format(len(Plist)))   
            return P_new, Plist
        P = 0.5*P_new + 0.5*P 
        if i == max_iter - 1:
            print("Warning: Maximum iterations reached without convergence.")
            return P, Plist

#使用迭代求解价格指数（price index）
params = create_gravity_params()
Pindex, Plist = price_iteration(params)
print(Pindex)

#求解投入分配（input allocations） 
from scipy.stats import pareto 
mp_params = namedtuple("mp_params",('βl_1','βl_2', 'βk_1', 'βk_2', 'κ_1', 'κ_2', 'v_1', 'v_2'))
def mp_input_params(βl_1=0.2, βl_2=0.2, βk_1=0.3, βk_2=0.2, κ_1=0.9, κ_2=0.8, v_1=1, v_2=1):
    return mp_params(βl_1=βl_1, βl_2=βl_2, βk_1=βk_1, βk_2=βk_2, κ_1=κ_1, κ_2=κ_2, v_1=v_1, v_2=v_2)
def mp_data(params, n=1000):
    βl_1, βl_2, βk_1, βk_2, κ_1, κ_2, v_1, v_2 = params
    np.random.seed(42)  # 设置随机种子以保证可复现性（reproducibility）
    R = (pareto.rvs(3., scale =1., size=(n, 2)) + 1) #两个产品的收入（revenues）
    # 平移第二个产品的收入，确保它高于第一个产品
    R[:, 1] += 2.0
    K = np.exp(np.random.normal(1, 2, (n, 1)))  # 企业层面的资本、劳动和材料（Capital, Labor, Materials）
    L = np.exp(np.random.normal(0, 2, (n, 1)))  # 企业层面的劳动（Labor）
    M = np.exp(np.random.normal(2, 5, (n, 1)))  # 企业层面的材料（Materials）
    data = [R, K, L, M]
    return data

def mp_input_allocation(params, data, tol=1e-5, max_iter=1000):
    βl_1, βl_2, βk_1, βk_2, κ_1, κ_2, v_1, v_2 = params
    βm_1 = 1.0 - βl_1 - βk_1  # 材料分配参数（material allocation parameter）
    βm_2 = 1.0 - βl_2 - βk_2  # 材料分配参数（material allocation parameter）
    R, K, L, M = data  # 拆包数据（unpack the data）
    Lopt= [0.5*np.log(L), 0.5*np.log(L)] # 劳动分配的初始猜测值（initial guess）
    Kopt = [0.5*np.log(K), 0.5*np.log(K)] # 资本分配的初始猜测值（initial guess）
    Mopt = [0.5*np.log(M), 0.5*np.log(M)]  # 材料分配的初始猜测值（initial guess）
    Llist = [Lopt.copy()]
    Klist = [Kopt.copy()]
    Mlist = [Mopt.copy()]
    
    for i in range(max_iter):
        #计算产出弹性（output elasticities） 
        γ_k = [v_1*βk_1* (np.exp(Kopt[0]))**(1-1/κ_1)/(βk_1*(np.exp(Kopt[0]))**(1-1/κ_1) +βl_1*(np.exp(Lopt[0]))**(1-1/κ_1) + βm_1*(np.exp(Mopt[0]))**(1-1/κ_1)), 
                v_2*βk_2* (np.exp(Kopt[1]))**(1-1/κ_2)/(βk_2*(np.exp(Kopt[1]))**(1-1/κ_2) +βl_2*(np.exp(Lopt[1]))**(1-1/κ_2) + βm_2*(np.exp(Mopt[1]))**(1-1/κ_2))]
        γ_l = [v_1*βl_1* (np.exp(Lopt[0]))**(1-1/κ_1)/(βk_1*(np.exp(Kopt[0]))**(1-1/κ_1) +βl_1*(np.exp(Lopt[0]))**(1-1/κ_1) + βm_1*(np.exp(Mopt[0]))**(1-1/κ_1)),
                v_2*βl_2* (np.exp(Lopt[1]))**(1-1/κ_2)/(βk_2*(np.exp(Kopt[1]))**(1-1/κ_2) +βl_2*(np.exp(Lopt[1]))**(1-1/κ_2) + βm_2*(np.exp(Mopt[1]))**(1-1/κ_2))]
        γ_m = [v_1 - γ_k[0] - γ_l[0], v_2 - γ_k[1] - γ_l[1]]
        # 计算投入分配（input allocations） 
        Kopt_new = np.log([K*R[:, 0:1]*γ_k[0]/(R[:,0:1]*γ_k[0] + R[:,1:2]*γ_k[1]), K*R[:,1:2]*γ_k[1]/(R[:,0:1]*γ_k[0] + R[:,1:2]*γ_k[1])])
        Lopt_new = np.log([L*R[:,0:1]*γ_l[0]/(R[:,0:1]*γ_l[0] + R[:,1:2]*γ_l[1]), L*R[:,1:2]*γ_l[1]/(R[:,0:1]*γ_l[0] + R[:,1:2]*γ_l[1])])
        Mopt_new = np.log([M*R[:,0:1]*γ_m[0]/(R[:,0:1]*γ_m[0] + R[:,1:2]*γ_m[1]), M*R[:,1:2]*γ_m[1]/(R[:,0:1]*γ_m[0] + R[:,1:2]*γ_m[1])])
        Llist.append(Lopt_new.copy())
        Klist.append(Kopt_new.copy())
        Mlist.append(Mopt_new.copy())
        
        if np.linalg.norm(Lopt_new - Lopt) < tol and np.linalg.norm(Kopt_new - Kopt) < tol and np.linalg.norm(Mopt_new-Mopt) < tol:
            print("Converged to input allocations after {} iterations.".format(len(Llist)))
            return Lopt_new, Kopt_new, Mopt_new, Llist, Klist, Mlist
        Lopt = Lopt_new
        Kopt = Kopt_new
        Mopt = Mopt_new
    
        if i == max_iter - 1:
            print("Warning: Maximum iterations reached without convergence.")
            return Lopt, Kopt, Mopt, Llist, Klist, Mlist

# 通过迭代求解投入分配问题（input allocation problem）
params = mp_input_params()
data = mp_data(params, n =10_000)
%time Lopt, Kopt, Mopt, Llist, Klist, Mlist = mp_input_allocation(params, data)

#可视化投入分配（input allocation） 
plt.figure(figsize=(10, 5))
plt.subplot(1, 3, 1)
plt.scatter(np.log(data[1][:,0:1]), Kopt[0], alpha=0.5, s=2,  label='k1k_1', color='blue')
plt.scatter(np.log(data[1][:,0:1]), Kopt[1], alpha=0.5, s=2, label='k2k_2', color='green')
plt.title('Capital Allocation')
plt.xlabel('log(K)')
plt.legend()
plt.ylabel('Capital Allocation')
plt.subplot(1, 3, 2)
plt.scatter(np.log(data[2][:,0:1]), Lopt[0], alpha=0.5, s=2, label='l1l_1', color='blue')
plt.scatter(np.log(data[2][:,0:1]), Lopt[1], alpha=0.5, s=2, label='l2l_2', color='green')
plt.title('Labor Allocation')
plt.legend()
plt.xlabel('log(L)')
plt.ylabel('Labor Allocation')
plt.subplot(1, 3, 3)
plt.scatter(np.log(data[3][:,0:1]), Mopt[0], alpha=0.5, s=2, label='m1m_1', color='blue')
plt.scatter(np.log(data[3][:,0:1]), Mopt[1], alpha=0.5, s=2, label='m2m_2', color='green')
plt.title('Materials Allocation')
plt.xlabel('log(M)')
plt.ylabel('Materials Allocation')
plt.tight_layout()
plt.legend()
plt.show()

# 通过投入份额（input shares）迭代求解问题 
def mp_input_shares(params, data, tol=1e-5, max_iter=1000):
    βl_1, βl_2, βk_1, βk_2, κ_1, κ_2, v_1, v_2 = params
    βm_1 = 1.0 - βl_1 - βk_1  # 材料分配参数（material allocation parameter）
    βm_2 = 1.0 - βl_2 - βk_2  # 材料分配参数（material allocation parameter）
    R, K, L, M = data  # 拆包数据（unpack the data）
    n = len(K)  # 企业数量（number of firms）
    Lopt= [0.5*np.ones((n,1)), 0.5*np.ones((n,1))] # 劳动分配的初始猜测值（initial guess）
    Kopt = [0.5*np.ones((n,1)), 0.5*np.ones((n,1))]# 资本分配的初始猜测值（initial guess）
    Mopt = [0.5*np.ones((n,1)), 0.5*np.ones((n,1))]  # 材料分配的初始猜测值（initial guess）
    Llist = [Lopt.copy()]
    Klist = [Kopt.copy()]
    Mlist = [Mopt.copy()]
    
    for i in range(max_iter):
        #计算产出弹性（output elasticities） 
        γ_k = [v_1*βk_1* (Kopt[0]*K)**(1-1/κ_1)/(βk_1*(Kopt[0]*K)**(1-1/κ_1) +βl_1*(Lopt[0]*L)**(1-1/κ_1) + βm_1*(Mopt[0]*M)**(1-1/κ_1)), 
                v_2*βk_2* (Kopt[1]*K)**(1-1/κ_2)/(βk_2*(Kopt[1]*K)**(1-1/κ_2) +βl_2*(Lopt[1]*L)**(1-1/κ_2) + βm_2*(Mopt[1]*M)**(1-1/κ_2))]
        γ_l = [v_1*βl_1* (Lopt[0]*L)**(1-1/κ_1)/(βk_1*(Kopt[0]*K)**(1-1/κ_1) +βl_1*(Lopt[0]*L)**(1-1/κ_1) + βm_1*(Mopt[0]*M)**(1-1/κ_1)),
                v_2*βl_2* (Lopt[1]*L)**(1-1/κ_2)/(βk_2*(Kopt[1]*K)**(1-1/κ_2) +βl_2*(Lopt[1]*L)**(1-1/κ_2) + βm_2*(Mopt[1]*M)**(1-1/κ_2))]
        γ_m = [v_1 - γ_k[0] - γ_l[0], v_2 - γ_k[1] - γ_l[1]]
        # 计算投入分配份额（input allocation shares） 
        Kopt_new = [R[:, 0:1]*γ_k[0]/(R[:,0:1]*γ_k[0] + R[:,1:2]*γ_k[1]), R[:,1:2]*γ_k[1]/(R[:,0:1]*γ_k[0] + R[:,1:2]*γ_k[1])]
        Lopt_new = [R[:,0:1]*γ_l[0]/(R[:,0:1]*γ_l[0] + R[:,1:2]*γ_l[1]), R[:,1:2]*γ_l[1]/(R[:,0:1]*γ_l[0] + R[:,1:2]*γ_l[1])]
        Mopt_new = [R[:,0:1]*γ_m[0]/(R[:,0:1]*γ_m[0] + R[:,1:2]*γ_m[1]), R[:,1:2]*γ_m[1]/(R[:,0:1]*γ_m[0] + R[:,1:2]*γ_m[1])]
        Llist.append(Lopt_new.copy())
        Klist.append(Kopt_new.copy())
        Mlist.append(Mopt_new.copy())
        
        if (np.linalg.norm(Lopt_new[0] - Lopt[0]) < tol and
            np.linalg.norm(Lopt_new[1] - Lopt[1]) < tol and
            np.linalg.norm(Kopt_new[0] - Kopt[0]) < tol and
            np.linalg.norm(Kopt_new[1] - Kopt[1]) < tol and
            np.linalg.norm(Mopt_new[0] - Mopt[0]) < tol and
            np.linalg.norm(Mopt_new[1] - Mopt[1]) < tol):
            print("Converged to input allocations after {} iterations.".format(len(Llist)))
            return Lopt_new, Kopt_new, Mopt_new, Llist, Klist, Mlist
        Lopt = Lopt_new
        Kopt = Kopt_new
        Mopt = Mopt_new
    
        if i == max_iter - 1:
            print("Warning: Maximum iterations reached without convergence.")
            return Lopt_new, Kopt_new, Mopt_new, Llist, Klist, Mlist

# 通过迭代求解投入分配问题（input allocation problem）
%time Lopt, Kopt, Mopt, Llist, Klist, Mlist = mp_input_shares(params, data)

#可视化投入分配（input allocation） 
plt.figure(figsize=(10, 5))
plt.subplot(1, 3, 1)
plt.scatter(np.log(data[1][:,0:1]), Kopt[0], alpha=0.5, s=2,  label='k1k_1', color='blue')
plt.scatter(np.log(data[1][:,0:1]), Kopt[1], alpha=0.5, s=2, label='k2k_2', color='green')
plt.title('Capital Allocation')
plt.xlabel('log(K)')
plt.legend()
plt.ylabel('Capital Allocation')
plt.subplot(1, 3, 2)
plt.scatter(np.log(data[2][:,0:1]), Lopt[0], alpha=0.5, s=2, label='l1l_1', color='blue')
plt.scatter(np.log(data[2][:,0:1]), Lopt[1], alpha=0.5, s=2, label='l2l_2', color='green')
plt.title('Labor Allocation')
plt.legend()
plt.xlabel('log(L)')
plt.ylabel('Labor Allocation')
plt.subplot(1, 3, 3)
plt.scatter(np.log(data[3][:,0:1]), Mopt[0], alpha=0.5, s=2, label='m1m_1', color='blue')
plt.scatter(np.log(data[3][:,0:1]), Mopt[1], alpha=0.5, s=2, label='m2m_2', color='green')
plt.title('Materials Allocation')
plt.xlabel('log(M)')
plt.ylabel('Materials Allocation')
plt.tight_layout()
plt.legend()
plt.show()

##不同 kappa 取值的实验 
params = mp_input_params(κ_1=0.5, κ_2=3) 
data = mp_data(params, n =1000)
%time Lopt, Kopt, Mopt, Llist, Klist, Mlist = mp_input_allocation(params, data)
%time Lopt, Kopt, Mopt, Llist, Klist, Mlist = mp_input_shares(params, data)

def mp_input_share2(params, data, tol=1e-5, max_iter=1000):
    βl_1, βl_2, βk_1, βk_2, κ_1, κ_2, v_1, v_2 = params
    βm_1 = 1.0 - βl_1 - βk_1  # 材料分配参数（material allocation parameter）
    βm_2 = 1.0 - βl_2 - βk_2  # 材料分配参数（material allocation parameter）
    R, K, L, M = data  # 拆包数据（unpack the data）
    n = len(K)  # 企业数量（number of firms）
    Lopt= [0.5*np.ones((n,1)), 0.5*np.ones((n,1))] # 劳动分配的初始猜测值（initial guess）
    Kopt = [0.5*np.ones((n,1)), 0.5*np.ones((n,1))]# 资本分配的初始猜测值（initial guess）
    Mopt = [0.5*np.ones((n,1)), 0.5*np.ones((n,1))]  # 材料分配的初始猜测值（initial guess）
    Llist = [Lopt.copy()]
    Klist = [Kopt.copy()]
    Mlist = [Mopt.copy()]
    
    for i in range(max_iter):
        #计算产出弹性（output elasticities） 
        γ_k = [v_1*βk_1* (Kopt[0]*K)**(1-1/κ_1)/(βk_1*(Kopt[0]*K)**(1-1/κ_1) +βl_1*(Lopt[0]*L)**(1-1/κ_1) + βm_1*(Mopt[0]*M)**(1-1/κ_1)), 
                v_2*βk_2* (Kopt[1]*K)**(1-1/κ_2)/(βk_2*(Kopt[1]*K)**(1-1/κ_2) +βl_2*(Lopt[1]*L)**(1-1/κ_2) + βm_2*(Mopt[1]*M)**(1-1/κ_2))]
        γ_l = [v_1*βl_1* (Lopt[0]*L)**(1-1/κ_1)/(βk_1*(Kopt[0]*K)**(1-1/κ_1) +βl_1*(Lopt[0]*L)**(1-1/κ_1) + βm_1*(Mopt[0]*M)**(1-1/κ_1)),
                v_2*βl_2* (Lopt[1]*L)**(1-1/κ_2)/(βk_2*(Kopt[1]*K)**(1-1/κ_2) +βl_2*(Lopt[1]*L)**(1-1/κ_2) + βm_2*(Mopt[1]*M)**(1-1/κ_2))]
        γ_m = [v_1 - γ_k[0] - γ_l[0], v_2 - γ_k[1] - γ_l[1]]
        # 计算投入分配份额（input allocation shares）
        Kopt_new = [R[:, 0:1]*γ_k[0]/(R[:,0:1]*γ_k[0] + R[:,1:2]*γ_k[1]), R[:,1:2]*γ_k[1]/(R[:,0:1]*γ_k[0] + R[:,1:2]*γ_k[1])]
        Lopt_new = [R[:,0:1]*γ_l[0]/(R[:,0:1]*γ_l[0] + R[:,1:2]*γ_l[1]), R[:,1:2]*γ_l[1]/(R[:,0:1]*γ_l[0] + R[:,1:2]*γ_l[1])]
        Mopt_new = [R[:,0:1]*γ_m[0]/(R[:,0:1]*γ_m[0] + R[:,1:2]*γ_m[1]), R[:,1:2]*γ_m[1]/(R[:,0:1]*γ_m[0] + R[:,1:2]*γ_m[1])]
        Llist.append(Lopt_new.copy())
        Klist.append(Kopt_new.copy())
        Mlist.append(Mopt_new.copy())
        
        if (np.linalg.norm(Lopt_new[0] - Lopt[0]) < tol and
            np.linalg.norm(Lopt_new[1] - Lopt[1]) < tol and
            np.linalg.norm(Kopt_new[0] - Kopt[0]) < tol and
            np.linalg.norm(Kopt_new[1] - Kopt[1]) < tol and
            np.linalg.norm(Mopt_new[0] - Mopt[0]) < tol and
            np.linalg.norm(Mopt_new[1] - Mopt[1]) < tol):
            print("Converged to input allocations after {} iterations.".format(len(Llist)))
            return Lopt_new, Kopt_new, Mopt_new, Llist, Klist, Mlist
        α = 0.99* min([1, 0.5*κ_1/abs(κ_1-1), 0.5*κ_2/abs(κ_2-1)])  # 基于 κ 取值的步长（step size）
        Lopt = [α*Lopt_new[0] + (1-α)*Lopt[0], α*Lopt_new[1] +  (1-α)*Lopt[1]]
        Kopt = [α*Kopt_new[0] + (1-α)*Kopt[0], α*Kopt_new[1] +  (1-α)*Kopt[1]]
        Mopt = [α*Mopt_new[0] +  (1-α)*Mopt[0], α*Mopt_new[1] +  (1-α)*Mopt[1]]
    
        if i == max_iter - 1:
            print("Warning: Maximum iterations reached without convergence.")
            return Lopt_new, Kopt_new, Mopt_new, Llist, Klist, Mlist    
        
def mp_input_allocation2(params, data, tol=1e-5, max_iter=1000):
    βl_1, βl_2, βk_1, βk_2, κ_1, κ_2, v_1, v_2 = params
    βm_1 = 1.0 - βl_1 - βk_1  # 材料分配参数（material allocation parameter）
    βm_2 = 1.0 - βl_2 - βk_2  # 材料分配参数（material allocation parameter）
    R, K, L, M = data  # 拆包数据（unpack the data）
    Lopt= [0.5*np.log(L), 0.5*np.log(L)] # 劳动分配的初始猜测值（initial guess）
    Kopt = [0.5*np.log(K), 0.5*np.log(K)] # 资本分配的初始猜测值（initial guess）
    Mopt = [0.5*np.log(M), 0.5*np.log(M)]  # 材料分配的初始猜测值（initial guess）
    Llist = [Lopt.copy()]
    Klist = [Kopt.copy()]
    Mlist = [Mopt.copy()]
    for i in range(max_iter):
        #计算产出弹性（output elasticities） 
        γ_k = [v_1*βk_1* (np.exp(Kopt[0]))**(1-1/κ_1)/(βk_1*(np.exp(Kopt[0]))**(1-1/κ_1) +βl_1*(np.exp(Lopt[0]))**(1-1/κ_1) + βm_1*(np.exp(Mopt[0]))**(1-1/κ_1)), 
                v_2*βk_2* (np.exp(Kopt[1]))**(1-1/κ_2)/(βk_2*(np.exp(Kopt[1]))**(1-1/κ_2) +βl_2*(np.exp(Lopt[1]))**(1-1/κ_2) + βm_2*(np.exp(Mopt[1]))**(1-1/κ_2))]
        γ_l = [v_1*βl_1* (np.exp(Lopt[0]))**(1-1/κ_1)/(βk_1*(np.exp(Kopt[0]))**(1-1/κ_1) +βl_1*(np.exp(Lopt[0]))**(1-1/κ_1) + βm_1*(np.exp(Mopt[0]))**(1-1/κ_1)),
                v_2*βl_2* (np.exp(Lopt[1]))**(1-1/κ_2)/(βk_2*(np.exp(Kopt[1]))**(1-1/κ_2) +βl_2*(np.exp(Lopt[1]))**(1-1/κ_2) + βm_2*(np.exp(Mopt[1]))**(1-1/κ_2))]
        γ_m = [v_1 - γ_k[0] - γ_l[0], v_2 - γ_k[1] - γ_l[1]]
        # 计算投入分配（input allocations） 
        Kopt_new = np.log([K*R[:, 0:1]*γ_k[0]/(R[:,0:1]*γ_k[0] + R[:,1:2]*γ_k[1]), K*R[:,1:2]*γ_k[1]/(R[:,0:1]*γ_k[0] + R[:,1:2]*γ_k[1])])
        Lopt_new = np.log([L*R[:,0:1]*γ_l[0]/(R[:,0:1]*γ_l[0] + R[:,1:2]*γ_l[1]), L*R[:,1:2]*γ_l[1]/(R[:,0:1]*γ_l[0] + R[:,1:2]*γ_l[1])])
        Mopt_new = np.log([M*R[:,0:1]*γ_m[0]/(R[:,0:1]*γ_m[0] + R[:,1:2]*γ_m[1]), M*R[:,1:2]*γ_m[1]/(R[:,0:1]*γ_m[0] + R[:,1:2]*γ_m[1])])
        Llist.append(Lopt_new.copy())
        Klist.append(Kopt_new.copy())
        Mlist.append(Mopt_new.copy())
        if np.linalg.norm(Lopt_new - Lopt) < tol and np.linalg.norm(Kopt_new - Kopt) < tol and np.linalg.norm(Mopt_new-Mopt) < tol:
            print("Converged to input allocations after {} iterations.".format(len(Llist)))
            return Lopt_new, Kopt_new, Mopt_new, Llist, Klist, Mlist
        α = 0.99* min([1, 0.5*κ_1/abs(κ_1-1), 0.5*κ_2/abs(κ_2-1)])  # 基于 κ 取值的步长（step size）
        Lopt = [α*Lopt_new[0] + (1-α)*Lopt[0], α*Lopt_new[1] +  (1-α)*Lopt[1]]
        Kopt = [α*Kopt_new[0] + (1-α)*Kopt[0], α*Kopt_new[1] +  (1-α)*Kopt[1]]
        Mopt = [α*Mopt_new[0] +  (1-α)*Mopt[0], α*Mopt_new[1] +  (1-α)*Mopt[1]]
    
        if i == max_iter - 1:
            print("Warning: Maximum iterations reached without convergence.")
            return Lopt, Kopt, Mopt, Llist, Klist, Mlist

import time
kappa1_list = np.arange(0.1, 6, 0.2)
kappa2_list = np.arange(0.1, 6, 0.2)
results = []
for κ_1 in kappa1_list:
    for κ_2 in kappa2_list:
        params = mp_input_params(κ_1=κ_1, κ_2=κ_2)
        data = mp_data(params, n=1000)
        start = time.time()
        Lopt, Kopt, Mopt, Llist, Klist, Mlist = mp_input_share2(params, data)
        iter1 = len(Llist)
        if iter1 < 1000:
            converged1 = True
        else:
            converged1 = False
        elapsed1 = time.time() - start
        start = time.time()
        Lopt, Kopt, Mopt, Llist, Klist, Mlist = mp_input_shares(params, data)
        iter2 = len(Llist)
        if iter2 < 1000:
            converged2 = True
        else:
            converged2 = False
        elapsed2 = time.time() - start
        start = time.time()
        Lopt, Kopt, Mopt, Llist, Klist, Mlist = mp_input_allocation2(params, data)
        iter3 = len(Llist)
        if iter3 < 1000:
            converged3 = True
        else:
            converged3 = False
        elapsed3 = time.time() - start
        start = time.time()
        Lopt, Kopt, Mopt, Llist, Klist, Mlist = mp_input_allocation(params, data)
        elapsed4 = time.time() - start
        iter4 = len(Llist)
        if iter4 < 1000:
            converged4 = True
        else:
            converged4 = False
        elapsed4 = time.time() - start
               
        results.append({
            'κ_1': κ_1,
            'κ_2': κ_2,
            'iterations1': iter1,
            'iterations2': iter2,
            'iterations3': iter3,
            'iterations4': iter4,
            'converged3': converged3,
            'converged1': converged1,
            'converged2': converged2,
            'converged4': converged4,
            'time1': elapsed1,
            'time2': elapsed2,
            'time3': elapsed3,
            'time4': elapsed4
        })

import pandas as pd 
import seaborn as sns
df_results = pd.DataFrame(results)
plt.figure(figsize=(12, 6))
plt.subplot(2, 2, 2)
sns.scatterplot(
	data=df_results,
	x='κ_1',
	y='κ_2',
	hue='converged1',
	style='converged1',
	markers={True: 'o', False: 'v'},  # 两类标记都使用填充样式（filled markers）
	palette={True: 'blue', False: 'red'}
)
plt.title('Modified Input Share Iteration')
plt.xlabel(r'κ1\kappa_1')
plt.ylabel(r'κ2\kappa_2')
plt.legend(title='Converged', loc='upper right')
plt.subplot(2, 2, 1)
sns.scatterplot(
	data=df_results,
	x='κ_1',
	y='κ_2',
	hue='converged2',
	style='converged2',
	markers={True: 'o', False: 'v'},  # 两类标记都使用填充样式（filled markers）
	palette={True: 'blue', False: 'red'}
)
plt.title('Raw Input Share Iteration')
plt.xlabel(r'κ1\kappa_1')
plt.ylabel(r'κ2\kappa_2')
plt.legend(title='Converged', loc='upper right')
plt.subplot(2, 2, 4)
sns.scatterplot(
	data=df_results,
	x='κ_1',
	y='κ_2',
	hue='converged3',
	style='converged3',
	markers={True: 'o', False: 'v'},  # 两类标记都使用填充样式（filled markers）
	palette={True: 'blue', False: 'red'}
)
plt.title('Modified Logged Inputs Iteration')
plt.xlabel(r'κ1\kappa_1')
plt.ylabel(r'κ2\kappa_2')
plt.legend(title='Converged', loc='upper right')
plt.subplot(2, 2, 3)
sns.scatterplot(
	data=df_results,
	x='κ_1',
	y='κ_2',
	hue='converged4',
	style='converged4',
	markers={True: 'o', False: 'v'},  # 两类标记都使用填充样式（filled markers）
	palette={True: 'blue', False: 'red'}
)
plt.title('Raw Logged Inputs Iteration')
plt.xlabel(r'κ1\kappa_1')
plt.ylabel(r'κ2\kappa_2')
plt.legend(title='Converged', loc='upper right')
plt.tight_layout()

plt.show()

#正态分布（normal distribution）的 MLE 示例
#参数化（parameterization） 
normal_params = namedtuple("normal_params", ('mu', 'sigma'))
def create_normal_params(mu=1.5, sigma=3.5):
    return normal_params(mu=mu, sigma=sigma)
#生成数据
def generate_data(mu, sigma, n=1000):
    np.random.seed(42)  # 设置随机种子以保证可复现性（reproducibility）
    data = np.random.normal(mu, sigma, n)
    return data
#对数似然函数（log likelihood function）
def log_likelihood(params, data):
    mu, sigma = params
    if sigma <= 0:  # 确保 sigma 为正
        return np.inf
    n = len(data)
    ll = -n/2 * np.log(2 * np.pi) - n * np.log(sigma) - np.sum((data - mu)**2) / (2 * sigma**2)
    return -ll  # 返回负对数似然（negative log likelihood），用于最小化

#使用 scipy.optimize 库求解 MLE
from scipy.optimize import minimize
params_true = create_normal_params(mu=1, sigma=3)
data = generate_data(params_true.mu, params_true.sigma, n=5000)
initial_params = create_normal_params(mu=0.8, sigma=2)
mle_res = minimize(
    log_likelihood, 
    x0=np.array([initial_params.mu, initial_params.sigma]), 
    args=(data,),
    method='L-BFGS-B',
    options={'disp': True, 'maxiter': 10000}, 
    bounds=((None, None), (1e-6, None))  # sigma 必须为正
)
print(mle_res)
print("True parameters (mu, sigma)=({:.4f}, {:.4f})".format(params_true.mu, params_true.sigma))
print("Estimated (mu, sigma)=({:.4f}, {:.4f})".format(mle_res.x[0], mle_res.x[1]))

#正态分布（normal distribution）的 GMM 估计 
def gmm_objective(params, data):
    """使用单位权重矩阵（identity weighting）的目标函数"""
    mu, sigma = params
    if sigma <= 0:  # 确保 sigma 为正
        return np.inf
    n = len(data)
    m1 = np.mean(data) - mu
    m2 = np.var(data) - sigma**2
    moments = np.array([m1, m2])
    return np.sum(moments**2)
def gmm_weight(params, data):
    """GMM 的权重矩阵（weighting matrix）"""
    mu, sigma = params
    n = len(data)
    moments_matrix = np.stack([
        data - mu,
        (data - mu)**2 - sigma**2
    ], axis=1)
    Omega =1/n * np.cov(moments_matrix, rowvar=False)
    return np.linalg.pinv(Omega)  # 用伪逆更稳健    
def gmm_iter_objective(params, data, W):
    """使用一般权重矩阵（general weighting matrix）的目标函数"""
    mu, sigma = params
    if sigma <= 0:  # 确保 sigma 为正
        return np.inf
    n = len(data)
    m1 = np.mean(data) - mu
    m2 = np.var(data) - sigma**2
    moments = np.array([m1, m2])
    return moments.T @ W @ moments

# 两步 GMM 估计（two-step GMM estimation） 
params_init = create_normal_params(mu=0.8, sigma=2)
x0 = np.array([params_init.mu, params_init.sigma])
# 第一步：使用单位权重矩阵（identity weighting matrix）
gmm_res1 = minimize(
    gmm_objective, 
    x0=x0, 
    args=(data,), 
    method='L-BFGS-B', 
    options={'disp': True, 'maxiter': 10000}, 
    bounds=((None, None), (1e-8, None))  # sigma 必须为正
)
print("First step GMM results:", gmm_res1)

#第二步：计算权重矩阵（weighting matrix）并重新估计
W = gmm_weight(gmm_res1.x, data)
print("Estimated Second-Step Weighting Matrix:\n", W)
gmm_res2 = minimize(
    gmm_iter_objective, 
    x0=gmm_res1.x, 
    args=(data, W), 
    method='L-BFGS-B', 
    options={'disp': True, 'maxiter': 10000}, 
    bounds=((None, None), (1e-8, None))  # sigma 必须为正
)
print("Second step GMM results:", gmm_res2)
print("Weighting Matrix:\n", gmm_weight(gmm_res2.x, data))

# GMM 估计量的推断（inference）
### 计算矩条件（moments）关于参数的导数
import autograd.numpy as np
from autograd import jacobian
def gmm_moments(params):
    mu, sigma = params
    n = len(data)
    m1 = np.mean(data) - mu
    m2 = np.var(data) - sigma**2
    moments = np.array([m1, m2])
    return moments
#使用 autograd 计算矩条件的雅可比矩阵（Jacobian）
gmm_jacobian = jacobian(gmm_moments)  # 计算关于第一个参数（params）的雅可比矩阵（Jacobian）
gmm_jacobian_val = gmm_jacobian(gmm_res2.x)  # 在估计参数处计算雅可比矩阵（Jacobian）  
print(gmm_jacobian_val)

def gmm_vc(data, params):
    W = gmm_weight(gmm_res2.x, data)
    n = len(data)
    gmm_delta = gmm_jacobian(params)  # 在估计参数处计算雅可比矩阵（Jacobian）
    CV = 1/n * np.linalg.inv(gmm_delta.T @ W @ gmm_delta)
    return CV
# 计算方差-协方差矩阵（variance-covariance matrix）
theta_vc = gmm_vc(data, gmm_res2.x)
#生成估计参数及其标准误（standard errors）的表格
def gmm_results_table(params, vc_matrix):
    mu, sigma = params
    std_errors = np.sqrt(np.diag(vc_matrix))
    results = pd.DataFrame({
        'Parameter': ['mu', 'sigma'],
        'Estimate': [mu, sigma],
        'Std. Error': std_errors
    })
    return results

results_table = gmm_results_table(gmm_res2.x, theta_vc)
print("------Two-Step GMM Results-------")
print("---------------------------------")
print(results_table)

#迭代 GMM 估计（iterative GMM estimation） 
def iterative_gmm_objective(params, data, W):
    """迭代 GMM 的目标函数"""
    mu, sigma = params
    if sigma <= 0:  # 确保 sigma 为正
        return np.inf
    n = len(data)
    m1 = np.mean(data) - mu
    m2 = np.var(data) - sigma**2
    moments = np.array([m1, m2])
    return moments.T @ W @ moments

Tol = 1e-8
def iterative_gmm_estimation(data, params_init, tol=Tol, max_iter=1000):
    """带收敛检查（convergence check）的迭代 GMM 估计"""
    params = np.array([params_init.mu, params_init.sigma])
    W = gmm_weight(params, data)
    for i in range(max_iter):
        res = minimize(
            iterative_gmm_objective, 
            x0=params, 
            args=(data, W), 
            method='L-BFGS-B', 
            options={'disp': True, 'maxiter': 10000}, 
            bounds=((None, None), (1e-8, None))  # sigma 必须为正
        )
        new_params = res.x
        new_W = gmm_weight(new_params, data)
        if np.linalg.norm(new_W - W) < tol:
            print(f"Converged after {i+1} iterations.")
            return new_params
        params = new_params
        W = new_W # 更新权重矩阵（weighting matrix）
    print("Warning: Maximum iterations reached without convergence.")
    return params

# 迭代 GMM 估计结果 
iterative_params = iterative_gmm_estimation(data, params_init)
# 计算迭代 GMM 的方差-协方差矩阵（variance-covariance matrix）
theta_vc_iterative = gmm_vc(data, iterative_params)
# 生成迭代 GMM 估计参数及其标准误（standard errors）的表格
iterative_results_table = gmm_results_table(iterative_params, theta_vc_iterative)
print("------Iterative GMM Results-------")
print("---------------------------------")
print(iterative_results_table)

# 使用 SMM 估计正态分布（normal distribution）参数 
def smm_objective(params, data):
    """为 SMM 模拟矩（simulate moments）"""
    mu, sigma = params
    np.random.seed(42)  # 设置随机种子以保证可复现性（reproducibility）
    n = 10000  # 模拟数据点数量（number of simulated data points）
    simulated_data = np.random.normal(mu, sigma, n)
    m1 = np.mean(simulated_data) - np.mean(data)
    m2 = np.var(simulated_data) - np.var(data)
    moments = np.array([m1, m2])
    return np.sum(moments**2)
# SMM 估计
smm_res = minimize(
    smm_objective, 
    x0=np.array([params_init.mu, params_init.sigma]), 
    args=(data,), 
    method='L-BFGS-B', 
    options={'disp': True, 'maxiter': 10000}, 
    bounds=((None, None), (1e-8, None))  # sigma 必须为正
)
print("SMM results:", smm_res)
# 计算 SMM 的方差-协方差矩阵（variance-covariance matrix）

# 定义函数 f(x)=x^2
def f(x):
    return x**2
# 生成 k 的取值
k_vals = np.arange(0, 30)
# 示例 1：定义 x_k=(-1)^k*(1+2^{-k})
x_k = (-1)**k_vals * (1 + 2.0**(-k_vals))  # 使用 2.0 以避免 ValueError
# 示例 2：定义 x_k=1+2^{-k} 
x_k1 = 1 + 2.0**(-k_vals)  
# 计算 f(x_k)
f_k = f(x_k)
f_k1 = f(x_k1)
# 绘制 x_k
plt.subplot(1, 2, 1)
plt.plot(k_vals, x_k, 'o-', label='xk=(−1)k(1+2−k)x_k = (-1)^k(1 + 2^{-k})')
plt.axhline(y=1, color='gray', linestyle='--', linewidth=1)
plt.axhline(y=-1, color='gray', linestyle='--', linewidth=1)
plt.title('Example 1: Sequence xkx_k')
plt.xlabel('Iteration kk')
plt.ylabel('xkx_k')
plt.grid(True)
plt.legend()
# 绘制 f(x_k)
plt.subplot(1, 2, 2)
plt.plot(k_vals, f_k1, 'o-', color='blue', label='xk=1+2kx_k = 1+2^{k}')
plt.axhline(y=1, color='gray', linestyle='--')
plt.title('Example 2: Sequence xkx_k')
plt.xlabel('Iteration kk')
plt.ylabel('f(xk)f(x_k)')
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.show()

特征（Features）	约化式模型（Reduced-form Model）	结构模型（Structural Model）
数据生成过程（Data Generation Process）	$Y_i = f(X_i) + \varepsilon_i$	$X_i = \operatorname*{arg\,max}_{z} \{U(Y_i, z)\}$
估计目的（Estimation Purpose）	$X$ 对 $Y$ 的因果效应（causal effect）	模型的结构参数（structural parameters）
估计方法（Estimation Method）	OLS、IV、RDD、Matching	SMM、MLE、MCMC
优点（Advantages）	易于实现、解释和理解	更灵活，可纳入更多信息，可用于反事实分析（counterfactual analysis）
缺点（Disadvantages）	机制？误设（mis-specification）？一般均衡效应（general equilibrium effects）？	通常更复杂，需要更多假设，存在过拟合问题（overfitting problem）

路线图（Roadmap）¶

具有经济学应用的算法（Algorithms with Economics Applications）¶

方程组求解（Solving System of Equations）¶

牛顿法（Newton's Method）¶

牛顿法的扩展（Extensions of Newton's Method）¶

割线法（Secant Method）¶

布罗伊登法（Broyden's Method，可选）¶

经济学应用 I：Solow 增长模型（Solow Growth Model）¶

梯度下降算法（Gradient Descent Algorithms）¶

梯度下降算法（Gradient Descent Algorithm，续）¶

扩展 1：随机梯度下降（Stochastic Gradient Descent, SGD）¶

🧮 扩展 2：牛顿法（Newton’s Method）¶

扩展 3：约束优化的拉格朗日方法（Lagrangian Methods for Constrained Optimization）¶

扩展 4：次梯度方法（Subgradient Methods）¶

编程准备：Python 中的循环（Do Loops in Python）¶

示例：单变量函数的梯度下降与牛顿法（Gradient Descent & Newton's Method）¶

示例：线性回归的梯度下降（Gradient Descent for Linear Regression）¶

线性回归的梯度与 Hessian 矩阵（Gradient and Hessian）¶

Python 中的优化：Scipy.optimize¶

数值微分与数值积分（Numerical Differentiation and Integration）¶

泰勒级数展开与有限差分近似（Taylor Series Expansion and Finite Difference Approximation）¶

Python 中的数值微分：Autograd¶

数值积分（Numerical Integration）¶

梯形法则（Trapezoidal Rule）¶

Simpson 法则（Simpson's Rule）¶

经济学应用 II：需求估计的 BLP 方法（BLP Method for Demand Estimation）¶

原始定点迭代（Raw Fixed-Point-Based Iterations）¶

不动点（Fixed Points）¶

压缩映射（Contraction Mapping）¶

Banach 不动点定理（Banach Fixed Point Theorem）¶

压缩的基于导数的条件（Derivative-Based Condition for Contraction）¶

算子范数示例（Examples of Operator Norms）¶

定点迭代算法（Fixed-Point Iteration Algorithm）¶

回到应用 I：Solow 增长模型（Revisit Application I: the Solow Growth Model）¶

直接迭代的收敛性（Convergence of Direct Iteration）¶

应用 III：BLP 需求估计中的市场份额迭代（Market Share Iteration in BLP Demand Estimation）¶

应用 IV：引力模型估计（Gravity Model Estimation）¶

求解价格系统（Solving the System of Prices）¶

应用 V：多产品企业投入分配求解（Solving Input Allocations of Multi-Product Firms）¶

计算问题与基于压缩映射的迭代（Computational Problem and Contraction Mapping-based Iteration）¶

修正的定点迭代算法（Modified Fixed-Point Iteration Algorithm）¶

经济学中的结构估计简介（Introduction to Structural Estimation in Economics）¶

约化式方法与结构方法初识（Reduced-form and Structural Methods）¶

约化式方法（Reduced-form Methods）：¶

结构方法（Structural Methods）：¶

连接模型与数据：比较（Connecting Model to Data: A Comparison）¶

结构估计的常规流程（Routine Procedures of Structural Estimation）¶

结构估计中的基本概念（Basic Concepts in Structural Estimation）¶

结构估计中的估计方法（Estimation Methods in Structural Estimation）¶

极大似然估计（Maximum Likelihood Estimation, MLE）¶

广义矩量法（Generalized Method of Moments, GMM）¶

模拟矩量法（Simulated Method of Moments, SMM）¶

极大似然估计（Maximum Likelihood Estimation, MLE）¶

MLE 示例：正态分布（Normal Distribution）¶

广义矩量法（Generalized Method of Moments, GMM）¶

GMM 估计量与两步 GMM（GMM Estimator and Two-Step GMM）¶

加权矩阵的选择（Choice of Weighting Matrix）¶

两步 GMM 估计量（Two-Step GMM Estimator）¶

GMM 中的推断（Inference in GMM）¶

GMM 示例：正态分布（Normal Distribution）¶

模拟矩量法（Simulated Method of Moments, SMM）¶

基于矩估计中的识别（Identification in Moments-Based Estimation）¶

附录（Appendix）¶

1. Broyden 方法中的雅可比矩阵（Jacobian Matrix in Broyden Method）¶

第 1 步：向量化问题（Vectorizing the Problem）¶

第 2 步：使用拉格朗日乘子（Use Lagrange Multipliers）¶

第 3 步：对 $A$ 求导（Take Derivative w.r.t. $A$）¶

第 4 步：代入约束（Plug into Constraint）¶

第 5 步：更新的最终表达式（Final Expression for Update）¶

最终更新规则（Final Update Rule）¶

2. 基于梯度下降方法的收敛性质（Convergence Properties of Gradient Descent-based Method）¶

最陡下降方向（The Steepest Descent Direction）¶

示例 1 分析（Analysis of Example 1）¶

示例 2 分析（Analysis of Example 2）¶

参考文献（References）¶

Python 中的优化：`Scipy.optimize`¶

Python 中的数值微分：`Autograd`¶