import numpy as np
import matplotlib.pyplot as plt
np.random.seed(42)
n = 500
# 潜在结果由"健康意识"驱动
health_motive = np.random.normal(0, 1, n) # 越高越健康
gym = (health_motive + np.random.normal(0, 0.5, n)) > 0.5 # 健康的人更去健身
y0 = 25 - 2 * health_motive + np.random.normal(0, 1, n) # BMI:意识越强越低
y1 = y0 - 1.0 # 真实效应:健身减少 1 个 BMI 单位
y_obs = np.where(gym, y1, y0) # 观测值
naive = y_obs[gym].mean() - y_obs[~gym].mean()
print(f"真实 ATE = {(y1 - y0).mean():.2f}")
print(f"朴素均值差 = {naive:.2f} ← 选择偏差拉大了估计量")真实 ATE = -1.00
朴素均值差 = -4.00 ← 选择偏差拉大了估计量


