import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
def simulate_nested_split_rd(n, eta, rho, lam, zeta, 
                             max_iter=100, tol=1e-8):
    """
    Simulate the fixed‐point (A, y) for given parameters.
    Args:
      n       : number of firms
      eta     : array‐like, length‐n vector of productivities η_i
      rho     : float, benefit parameter
      lam     : float, spill‐out parameter λ
      zeta    : float, link cost ζ
      max_iter: maximum number of updates to A,y
      tol     : convergence tolerance on max |y_new - y_old| 
    Returns:
      A       : (n×n) adjacency matrix of the network
      y       : length‐n vector of R&D efforts
    """
    η = np.asarray(eta).reshape(n)
    # initialize A to no links
    A = np.zeros((n,n), dtype=int)
    # initial guess for y: just the stand‐alone η
    y = η.copy() 
    for it in range(max_iter):
        # --- 1) given A, solve for y:  (I - B) y = η ---
        B = np.zeros((n,n))
        # off‐diagonal entries
        B += rho * A
        B -= lam * (np.ones((n,n)) - np.eye(n))
        # zero out diagonal
        np.fill_diagonal(B, 0.0)  
        # solve (I - B) y = η
        M = np.eye(n) - B
        y_new = np.linalg.solve(M, η)  
        # --- 2) update adjacency A by threshold rule ---
        thresh = zeta / rho
        # outer product y_i * y_j
        Yprod = np.outer(y_new, y_new)
        A_new = (Yprod > thresh).astype(int)
        np.fill_diagonal(A_new, 0)   # no self‐links
        # check convergence
        if np.max(np.abs(y_new - y)) < tol and np.array_equal(A_new, A):
            break 
        y = y_new
        A = A_new
    return A, y

def plot_adjacency(A, ax=None):
    """Plot adjacency matrix as a heatmap."""
    if ax is None:
        fig, ax = plt.subplots()
    im = ax.matshow(A, cmap='Blues')
    ax.set_xlabel('j')
    ax.set_ylabel('i')
    ax.set_xticks(np.arange(-0.5, A.shape[1], 1), minor=True)
    ax.set_yticks(np.arange(-0.5, A.shape[0], 1), minor=True)
    ax.grid(which='minor', color='gray', linestyle='-', linewidth=0.5)
    ax.tick_params(which='minor', bottom=False, left=False)
    return ax

if __name__ == "__main__":
    # === parameters from Figure 4 ===
    n   = 10
    lam = 0.06
    rho = 0.05 # in paper, rho = 0.02 
    eta = [1.00, 0.71, 0.58, 0.50, 0.45,
           0.41, 0.38, 0.35, 0.33, 0.32]
    zetas = [0.005, 0.0075, 0.015]
    # Simulate for each ζ and plot
    fig, axes = plt.subplots(1, len(zetas), figsize=(12,4))
    for ax, ζ in zip(axes, zetas):
        A, y = simulate_nested_split_rd(n, eta, rho, lam, ζ)
        ax = plot_adjacency(A, ax=ax)
        ax.set_title(fr'$\zeta={ζ}$')
    
    plt.suptitle("Stepwise Adjacency Matrices of the Nested‐Split Graph")
    plt.tight_layout(rect=[0,0,1,0.95])
    plt.show()


# socially optimal rd effort 
def simulate_nested_split_opt_rd(n, eta, rho, lam, zeta, psi,
                             max_iter=100, tol=1e-8):
    η = 1/psi * np.asarray(eta).reshape(n)
    # initialize A to no links
    A = np.zeros((n,n), dtype=int)
    # initial guess for y: just the stand‐alone η
    y = η.copy() 
    for it in range(max_iter):
        # --- 1) given A, solve for y:  (I - B) y = η ---
        B = np.zeros((n,n))
        # off‐diagonal entries
        B += 1/psi * rho * A
        B -= 1/psi * lam * (np.ones((n,n)) - np.eye(n))
        # zero out diagonal
        np.fill_diagonal(B, 0.0)  
        # solve (I - B) y = η
        M = np.eye(n) - B
        y_new = np.linalg.solve(M, η)  
        # --- 2) update adjacency A by threshold rule ---
        thresh = zeta / rho
        # outer product y_i * y_j
        Yprod = np.outer(y_new, y_new)
        A_new = (Yprod > thresh).astype(int)
        np.fill_diagonal(A_new, 0)   # no self‐links
        # check convergence
        if np.max(np.abs(y_new - y)) < tol and np.array_equal(A_new, A):
            break 
        y = y_new
        A = A_new
    return A, y
## Firm productivity and R&D effort
n   = 10
lam = 0.06
rho = 0.05 # in paper, rho = 0.02 
psi = 0.90 # weight in consumer welfare 
eta = [1.00, 0.71, 0.58, 0.50, 0.45,
       0.41, 0.38, 0.35, 0.33, 0.32]
zeta = 0.005
Aopt, yopt = simulate_nested_split_opt_rd(n, eta, rho, lam, zeta, psi)
A, y = simulate_nested_split_rd(n, eta, rho, lam, zeta)
fig, axes = plt.subplots(1, 2, figsize=(12, 5))
# figure1: R&D effort vs. productivity
axes[0].scatter(eta, y, color='blue', s=80)
axes[0].scatter(eta, yopt, color='blue', s=80, marker='*')
axes[0].legend(['Equilibrium R&D effort', 'Optimal R&D effort'])
axes[0].set_xlabel(r"Firm productivity $\eta_i$")
axes[0].set_ylabel(r"R&D effort $y_i$")
axes[0].set_title(r"R&D effort vs. productivity")
axes[0].grid(True)
# figure2: Degree vs. productivity
axes[1].scatter(eta, np.sum(A, axis=1), color='red', s = 80)
axes[1].scatter(eta, np.sum(Aopt, axis=1), color='red', s = 80, marker='*')
axes[1].legend(['Equilibrium degree', 'Optimal degree'])
axes[1].set_xlabel(r"Firm productivity $\eta_i$")
axes[1].set_ylabel("Degree (number of links)")
axes[1].set_title("Degree vs. productivity")
axes[1].grid(True)
plt.tight_layout()
plt.show()


# Bayesian inference using Metropolis-Hastings with Gaussian Random Walk 
# Generate synthetic data
np.random.seed(1)
true_mu = 5.0
sigma = 2.0   # known standard deviation
n_samples = 50
data = np.random.normal(loc=true_mu, scale=sigma, size=n_samples)
tau = 10.0
# Log-posterior (up to a constant)
def log_post(mu, data, sigma, tau):
    ll = -0.5 * np.sum((data - mu)**2) / sigma**2  # log-likelihood
    lp = -0.5 * mu**2 / tau**2                    # log-prior
    return ll + lp
# Metropolis-Hastings sampler
def metropolis_hastings(log_post, data, sigma, tau, initial_mu, proposal_std, n_iters):
    mu_current = initial_mu
    logp_current = log_post(mu_current, data, sigma, tau)
    samples = np.zeros(n_iters)
    for i in range(n_iters):
        mu_proposal = np.random.normal(mu_current, proposal_std) #Gaussian proposal 
        logp_proposal = log_post(mu_proposal, data, sigma, tau)  
        # acceptance probability
        log_alpha = logp_proposal - logp_current
        if np.log(np.random.rand()) < log_alpha:
            mu_current = mu_proposal
            logp_current = logp_proposal
        samples[i] = mu_current
    return samples
# Run the sampler
n_iters = 10000
proposal_std = 0.5
initial_mu = 0.0
samples = metropolis_hastings(
    log_posterior, data, sigma, tau, initial_mu, proposal_std, n_iters
)
# Discard burn-in
burn_in = 2000 #dropping first 2000 samples
posterior_samples = samples[burn_in:]
fig, axes = plt.subplots(1, 2, figsize=(12, 4))

# Trace plot
axes[0].plot(samples, alpha=0.5, color='blue')
axes[0].axvline(burn_in, color='red', linestyle='--', label='Burn-in cutoff')
axes[0].set_title(r'Trace of $\mu$ samples')
axes[0].set_xlabel(r'Iteration')
axes[0].set_ylabel(r'$\mu$')
axes[0].legend()

# Posterior histogram
axes[1].hist(posterior_samples, bins=30, density=True, alpha=0.5, color='blue', edgecolor='black')
axes[1].axvline(true_mu, color='black', linestyle='-', label=r'True $\mu$')
axes[1].set_title(r'Posterior distribution of $\mu$ (post burn-in)')
axes[1].set_xlabel(r'$\mu$')
axes[1].set_ylabel('Density')
axes[1].legend()

plt.tight_layout()
plt.show()

# Print summary
print("-------------------------------------")
print(f"True value    :  {true_mu:.3f}")
print(f"Posterior mean estimate: {np.mean(posterior_samples):.3f}")
print(f"Posterior std estimate: ({np.std(posterior_samples):.3f})")
print("-------------------------------------")

-------------------------------------
True value    :  5.000
Posterior mean estimate: 4.949
Posterior std estimate: (0.284)
-------------------------------------

Features	Reduced-form Model	Structural Model
Data Generation Process	$Y_i = f(X_i) + \varepsilon_i$	$X_i = argmax_{z} \{U(Y_i, z)\}$
Estimation Purpose	Causal effect of $X$ on $Y$	Structural parameters of the model
Estimation Method	OLS, IV, RDD, Matching	GMM, MLE, MCMC
Advantages	Easy to implement, interpret, and understand	More flexible, can incorporate more information, can be used for counterfactual analysis
Disadvantages	mechanisms? mis-specification? General equilibrium effects?	Usually more complicated, requires more assumptions, overfitting problem

Endogenous Technology Spillovers in R&D Collaboration Networks¶

Roadmap¶

A Gentle Introduction to Structural Modeling and Estimation¶

Connecting Model with Data: A Comparison¶

The Paper by Hsieh, Konig and Liu (2023) in a Nutshell¶

Theory¶

Theory (cont'd)¶

Theory (cont'd)¶

Theory (cont'd)¶

Empirical Analysis¶

Estimation Method: Bayesian MCMC (Markov Chain Monte Carlo)¶

Two Core Algorithms for MCMC¶

1. Metropolis–Hastings¶

2. Gibbs Sampling¶

Practical Tips¶

Extensions and Takeaways¶