From a24731fd8d5122209c54cc6e05112b8432e68b3b Mon Sep 17 00:00:00 2001 From: Humphrey Yang Date: Fri, 6 Feb 2026 22:40:10 +1100 Subject: [PATCH 01/19] update --- lectures/_static/quant-econ.bib | 24 + lectures/_toc.yml | 1 + lectures/chow_business_cycles.md | 1162 ++++++++++++++++++++++++++++++ 3 files changed, 1187 insertions(+) create mode 100644 lectures/chow_business_cycles.md diff --git a/lectures/_static/quant-econ.bib b/lectures/_static/quant-econ.bib index 218573589..55b678f94 100644 --- a/lectures/_static/quant-econ.bib +++ b/lectures/_static/quant-econ.bib @@ -2733,3 +2733,27 @@ @article{Meghir2004 year={2004}, publisher={Wiley Online Library} } + +@article{Chow1968, + title={The Acceleration Principle and the Nature of Business Cycles}, + author={Chow, Gregory C.}, + journal={The Quarterly Journal of Economics}, + volume={82}, + number={3}, + pages={403--418}, + year={1968}, + month={aug}, + publisher={Oxford University Press} +} + +@article{ChowLevitan1969, + title={Nature of Business Cycles Implicit in a Linear Economic Model}, + author={Chow, Gregory C. and Levitan, Richard E.}, + journal={The Quarterly Journal of Economics}, + volume={83}, + number={3}, + pages={504--517}, + year={1969}, + month={aug}, + publisher={Oxford University Press} +} diff --git a/lectures/_toc.yml b/lectures/_toc.yml index 8d63e5906..aeaab36b5 100644 --- a/lectures/_toc.yml +++ b/lectures/_toc.yml @@ -56,6 +56,7 @@ parts: - file: inventory_dynamics - file: linear_models - file: samuelson + - file: chow_business_cycles - file: kesten_processes - file: wealth_dynamics - file: kalman diff --git a/lectures/chow_business_cycles.md b/lectures/chow_business_cycles.md new file mode 100644 index 000000000..9e99b0478 --- /dev/null +++ b/lectures/chow_business_cycles.md @@ -0,0 +1,1162 @@ +--- +jupytext: + text_representation: + extension: .md + format_name: myst + format_version: 0.13 + jupytext_version: 1.17.2 +kernelspec: + display_name: Python 3 (ipykernel) + language: python + name: python3 +--- + +(chow_business_cycles)= + +```{raw} jupyter +
+ + QuantEcon + +
+``` + +# The Acceleration Principle and the Nature of Business Cycles + +```{contents} Contents +:depth: 2 +``` + +## Overview + +This lecture studies two classic papers by Gregory Chow on business cycles in linear dynamic models: + +- {cite}`Chow1968`: why acceleration-type investment behavior matters for oscillations, and how to read stochastic dynamics through autocovariances and spectral densities +- {cite}`ChowLevitan1969`: how those tools look when applied to a calibrated macroeconometric model of the U.S. economy + +These papers sit right at the intersection of three themes in this lecture series: + +- The multiplier–accelerator mechanism in {doc}`samuelson` +- Linear stochastic difference equations and autocovariances in {doc}`linear_models` +- Eigenmodes of multivariate dynamics in {doc}`var_dmd` +- Fourier ideas in {doc}`eig_circulant` (and, for empirical estimation, the advanced lecture [Estimation of Spectra](https://python-advanced.quantecon.org/estspec.html#)) + +We will keep coming back to three ideas: + +- In deterministic models, oscillations correspond to complex eigenvalues of a transition matrix. +- In stochastic models, a "cycle" shows up as a local peak in a (univariate) spectral density. +- Spectral peaks depend on eigenvalues, but also on how shocks enter (the covariance matrix $V$) and on how observables load on eigenmodes. + +## A linear system with shocks + +Both papers analyze (or reduce to) a first-order linear stochastic system + +```{math} +:label: chow_var1 + +y_t = A y_{t-1} + u_t, +\qquad +\mathbb E[u_t] = 0, +\qquad +\mathbb E[u_t u_t^\top] = V, +\qquad +\mathbb E[u_t u_{t-k}^\top] = 0 \ (k \neq 0). +``` + +When the eigenvalues of $A$ are strictly inside the unit circle, the process is (covariance) stationary and its autocovariances exist. + +In the notation of {doc}`linear_models`, this is the same stability condition that guarantees a unique solution to a discrete Lyapunov equation. + +Define the lag-$k$ autocovariance matrices + +```{math} +:label: chow_autocov_def + +\Gamma_k := \mathbb E[y_t y_{t-k}^\top] . +``` + +Standard calculations (also derived in {cite}`Chow1968`) give the recursion + +```{math} +:label: chow_autocov_rec + +\Gamma_k = A \Gamma_{k-1}, \quad k \ge 1, +\qquad\text{and}\qquad +\Gamma_0 = A \Gamma_0 A^\top + V. +``` + +The second equation is the discrete Lyapunov equation for $\Gamma_0$. + +## From autocovariances to spectra + +Chow’s key step is to translate the autocovariance sequence $\{\Gamma_k\}$ into a frequency-domain object. + +The **spectral density matrix** is the Fourier transform of $\Gamma_k$: + +```{math} +:label: chow_spectral_def + +F(\omega) := \frac{1}{2\pi} \sum_{k=-\infty}^{\infty} \Gamma_k e^{-i \omega k}, +\qquad \omega \in [0, \pi]. +``` + +For the VAR(1) system {eq}`chow_var1`, this sum has a closed form + +```{math} +:label: chow_spectral_closed + +F(\omega) += \frac{1}{2\pi} +\left(I - A e^{-i\omega}\right)^{-1} +V +\left(I - A^\top e^{i\omega}\right)^{-1}. +``` + +Intuitively, $F(\omega)$ tells us how much variation in $y_t$ is associated with cycles of (angular) frequency $\omega$. + +The corresponding cycle length is + +```{math} +:label: chow_period + +T(\omega) = \frac{2\pi}{\omega}. +``` + +The advanced lecture {doc}`advanced:estspec` explains how to estimate $F(\omega)$ from data. + +Here we focus on the model-implied spectrum. + +We will use the following imports and helper functions throughout the lecture. + +```{code-cell} ipython3 +import numpy as np +import matplotlib.pyplot as plt + +def spectral_density_var1(A, V, ω_grid): + """Spectral density matrix for VAR(1): y_t = A y_{t-1} + u_t.""" + A, V = np.asarray(A), np.asarray(V) + n = A.shape[0] + I = np.eye(n) + F = np.empty((len(ω_grid), n, n), dtype=complex) + for k, ω in enumerate(ω_grid): + H = np.linalg.inv(I - np.exp(-1j * ω) * A) + F[k] = (H @ V @ H.conj().T) / (2 * np.pi) + return F + +def spectrum_of_linear_combination(F, b): + """Spectrum of x_t = b'y_t given the spectral matrix F(ω).""" + b = np.asarray(b).reshape(-1, 1) + return np.array([np.real((b.T @ F[k] @ b).item()) for k in range(F.shape[0])]) + +def simulate_var1(A, V, T, burn=200, seed=1234): + """Simulate y_t = A y_{t-1} + u_t with u_t ~ N(0, V).""" + rng = np.random.default_rng(seed) + A, V = np.asarray(A), np.asarray(V) + n = A.shape[0] + chol = np.linalg.cholesky(V) + y = np.zeros((T + burn, n)) + for t in range(1, T + burn): + y[t] = A @ y[t - 1] + chol @ rng.standard_normal(n) + return y[burn:] + +def sample_autocorrelation(x, max_lag): + """Sample autocorrelation of a 1d array from lag 0 to max_lag.""" + x = np.asarray(x) + x = x - x.mean() + denom = np.dot(x, x) + acf = np.empty(max_lag + 1) + for k in range(max_lag + 1): + acf[k] = np.dot(x[:-k] if k else x, x[k:]) / denom + return acf +``` + +## Deterministic propagation and acceleration + +Chow {cite}`Chow1968` begins with a clean deterministic question: + +> If you build a macro model using only standard demand equations with simple distributed lags, can the system generate sustained oscillations without acceleration? + +He shows that, under natural sign restrictions, the answer is no. + +### A demand system without acceleration + +Consider a system where each component $y_{it}$ responds to aggregate output $Y_t$ and its own lag: + +```{math} +:label: chow_simple_demand + +y_{it} = a_i Y_t + b_i y_{i,t-1}, +\qquad +Y_t = \sum_i y_{it}, +\qquad +a_i > 0,\; b_i > 0. +``` + +Chow shows that the implied transition matrix has real characteristic roots, and that if $\sum_i a_i < 1$ these roots are also positive. + +In that case, solutions are linear combinations of decaying exponentials without persistent sign-switching components, so there are no “business-cycle-like” oscillations driven purely by internal propagation. + +### What acceleration changes + +For investment (and some durables), Chow argues that a more relevant starting point is a *stock adjustment* equation (demand for a stock), e.g. + +```{math} +:label: chow_stock_adj + +s_{it} = \alpha_i Y_t + \beta_i s_{i,t-1}. +``` + +If flow investment is proportional to the change in the desired stock, differencing introduces terms in $\Delta Y_t$. + +That "acceleration" structure creates negative coefficients (in lagged levels), which makes complex roots possible. + +This connects directly to {doc}`samuelson`, where acceleration is the key ingredient that can generate damped or persistent oscillations in a deterministic second-order difference equation. + +To see the mechanism with minimal algebra, take the multiplier–accelerator law of motion + +```{math} +Y_t = c Y_{t-1} + v (Y_{t-1} - Y_{t-2}), +``` + +and rewrite it as a first-order system in $(Y_t, Y_{t-1})$. + +```{code-cell} ipython3 +def samuelson_transition(c, v): + return np.array([[c + v, -v], [1.0, 0.0]]) + +c = 0.6 +v_values = (0.0, 0.8) +A_list = [samuelson_transition(c, v) for v in v_values] + +for v, A in zip(v_values, A_list): + eig = np.linalg.eigvals(A) + print(f"v={v:.1f}, eigenvalues={eig}") + +# impulse responses from a one-time unit shock in Y +T = 40 +s0 = np.array([1.0, 0.0]) +irfs = [] +for A in A_list: + s = s0.copy() + path = np.empty(T + 1) + for t in range(T + 1): + path[t] = s[0] + s = A @ s + irfs.append(path) + +# model-implied spectra for the stochastic version with shocks in the Y equation +freq = np.linspace(1e-4, 0.5, 2500) # cycles/period +ω_grid = 2 * np.pi * freq +V = np.array([[1.0, 0.0], [0.0, 0.0]]) + +spectra = [] +for A in A_list: + F = spectral_density_var1(A, V, ω_grid) + f11 = np.real(F[:, 0, 0]) + spectra.append(f11 / np.trapz(f11, freq)) + +fig, axes = plt.subplots(1, 2, figsize=(12, 4)) + +axes[0].plot(range(T + 1), irfs[0], lw=1.8, label="no acceleration") +axes[0].plot(range(T + 1), irfs[1], lw=1.8, label="with acceleration") +axes[0].axhline(0.0, lw=0.8) +axes[0].set_xlabel("time") +axes[0].set_ylabel(r"$Y_t$") +axes[0].legend(frameon=False) + +axes[1].plot(freq, spectra[0], lw=1.8, label="no acceleration") +axes[1].plot(freq, spectra[1], lw=1.8, label="with acceleration") +axes[1].set_xlabel(r"frequency $\omega/2\pi$") +axes[1].set_ylabel("normalized spectrum") +axes[1].set_xlim([0.0, 0.5]) +axes[1].legend(frameon=False) + +plt.tight_layout() +plt.show() +``` + +The left panel shows that acceleration creates oscillatory impulse responses. + +The right panel shows the corresponding spectral signature: a peak at interior frequencies. + +### How the accelerator shifts the spectral peak + +As we increase the accelerator $v$, the complex eigenvalues rotate further from the real axis, shifting the spectral peak to higher frequencies. + +```{code-cell} ipython3 +v_grid = np.linspace(0.2, 1.2, 6) +c = 0.6 +freq_fine = np.linspace(1e-4, 0.5, 2000) +ω_fine = 2 * np.pi * freq_fine +V_acc = np.array([[1.0, 0.0], [0.0, 0.0]]) + +fig, axes = plt.subplots(1, 2, figsize=(12, 4)) + +for v in v_grid: + A = samuelson_transition(c, v) + eig = np.linalg.eigvals(A) + F = spectral_density_var1(A, V_acc, ω_fine) + f11 = np.real(F[:, 0, 0]) + f11_norm = f11 / np.trapz(f11, freq_fine) + + # plot eigenvalues + axes[0].scatter(eig.real, eig.imag, s=40, label=f'$v={v:.1f}$') + + # plot spectrum + axes[1].plot(freq_fine, f11_norm, lw=1.5, label=f'$v={v:.1f}$') + +# unit circle +θ_circle = np.linspace(0, 2*np.pi, 100) +axes[0].plot(np.cos(θ_circle), np.sin(θ_circle), 'k--', lw=0.8) +axes[0].set_xlabel('real part') +axes[0].set_ylabel('imaginary part') +axes[0].set_aspect('equal') +axes[0].legend(frameon=False, fontsize=8) + +axes[1].set_xlabel(r'frequency $\omega/2\pi$') +axes[1].set_ylabel('normalized spectrum') +axes[1].set_xlim([0, 0.5]) +axes[1].legend(frameon=False, fontsize=8) + +plt.tight_layout() +plt.show() +``` + +Larger $v$ pushes the eigenvalues further off the real axis, shifting the spectral peak to higher frequencies. + +When $v$ is large enough that eigenvalues leave the unit circle, the system becomes explosive. + +## Spectral peaks are not just eigenvalues + +With shocks, the deterministic question ("does the system oscillate?") becomes: at which cycle lengths does the variance of $y_t$ concentrate? + +In this lecture, a "cycle" means a local peak in a univariate spectrum $f_{ii}(\omega)$. + +Chow's point in {cite}`Chow1968` is that eigenvalues help interpret spectra, but they do not determine peaks by themselves. + +Two extra ingredients matter: + +- how shocks load on the eigenmodes (the covariance matrix $V$), +- how the variable of interest mixes those modes. + +The next simulations isolate these effects. + +### Complex roots: a peak and an oscillating autocorrelation + +Take a stable “rotation–contraction” matrix + +```{math} +:label: chow_rot + +A = r +\begin{bmatrix} +\cos \theta & -\sin \theta \\ +\sin \theta & \cos \theta +\end{bmatrix}, +\qquad 0 < r < 1, +``` + +whose eigenvalues are $r e^{\pm i\theta}$. + +When $r$ is close to 1, the spectrum shows a pronounced peak near $\omega \approx \theta$. + +```{code-cell} ipython3 +def rotation_contraction(r, θ): + c, s = np.cos(θ), np.sin(θ) + return r * np.array([[c, -s], [s, c]]) + +θ = np.pi / 3 +r_values = (0.95, 0.4) +ω_grid = np.linspace(1e-3, np.pi - 1e-3, 800) +V = np.eye(2) + +acfs = [] +spectra = [] +for r in r_values: + A = rotation_contraction(r, θ) + + y = simulate_var1(A, V, T=5000, burn=500, seed=1234) + acfs.append(sample_autocorrelation(y[:, 0], 40)) + + F = spectral_density_var1(A, V, ω_grid) + spectra.append(np.real(F[:, 0, 0])) + +fig, axes = plt.subplots(1, 2, figsize=(12, 4)) + +for r, acf in zip(r_values, acfs): + axes[0].plot(range(len(acf)), acf, lw=1.8, label=fr"$r={r}$") +axes[0].axhline(0.0, lw=0.8) +axes[0].set_xlabel("lag") +axes[0].set_ylabel("autocorrelation") +axes[0].legend(frameon=False) + +for r, f11 in zip(r_values, spectra): + axes[1].plot(ω_grid / np.pi, f11, lw=1.8, label=fr"$r={r}$") +axes[1].axvline(θ / np.pi, ls="--", lw=1.0, label=r"$\theta/\pi$") +axes[1].set_xlabel(r"frequency $\omega/\pi$") +axes[1].set_ylabel(r"$f_{11}(\omega)$") +axes[1].legend(frameon=False) + +plt.tight_layout() +plt.show() +``` + +When $r$ is close to 1, the autocorrelation oscillates slowly and the spectrum has a sharp peak near $\theta$. + +When $r$ is smaller, oscillations die out quickly and the spectrum is flatter. + +### How shock structure shapes the spectrum + +Even with the same transition matrix, different shock covariance structures produce different spectral shapes. + +Here we fix $r = 0.9$ and vary the correlation between the two shocks. + +```{code-cell} ipython3 +r_fixed = 0.9 +A_fixed = rotation_contraction(r_fixed, θ) +corr_values = [-0.9, 0.0, 0.9] + +fig, ax = plt.subplots(figsize=(9, 4)) +for corr in corr_values: + V_corr = np.array([[1.0, corr], [corr, 1.0]]) + F = spectral_density_var1(A_fixed, V_corr, ω_grid) + f11 = np.real(F[:, 0, 0]) + f11_norm = f11 / np.trapz(f11, ω_grid / np.pi) + ax.plot(ω_grid / np.pi, f11_norm, lw=1.8, label=fr'$\rho = {corr}$') + +ax.axvline(θ / np.pi, ls='--', lw=1.0, color='gray') +ax.set_xlabel(r'frequency $\omega/\pi$') +ax.set_ylabel('normalized spectrum') +ax.legend(frameon=False) +plt.show() +``` + +The peak location is unchanged, but the peak height depends on the shock correlation. + +This illustrates that eigenvalues alone do not determine the full spectral shape. + +### Complex roots: an oscillatory mode can be hidden + +Complex roots are not sufficient for a visible peak in the spectrum of every observed series. + +Even if the state vector contains an oscillatory mode, a variable can be dominated by a non-oscillatory component. + +The next example combines a rotation–contraction block with a very persistent real root, and then looks at a mixture that is dominated by the persistent component. + +```{code-cell} ipython3 +A_osc = rotation_contraction(0.95, θ) +A = np.block([ + [A_osc, np.zeros((2, 1))], + [np.zeros((1, 2)), np.array([[0.99]])] +]) + +# shocks hit the persistent component much more strongly +V = np.diag([1.0, 1.0, 50.0]) + +ω_grid_big = np.linspace(1e-3, np.pi - 1e-3, 1200) +F = spectral_density_var1(A, V, ω_grid_big) + +x_grid = ω_grid_big / np.pi +f_y1 = np.real(F[:, 0, 0]) + +b = np.array([0.05, 0.0, 1.0]) +f_mix = spectrum_of_linear_combination(F, b) + +f_y1_norm = f_y1 / np.trapz(f_y1, x_grid) +f_mix_norm = f_mix / np.trapz(f_mix, x_grid) + +fig, ax = plt.subplots(figsize=(9, 4)) +ax.plot(x_grid, f_y1_norm, lw=1.8, label=r"$y_1$") +ax.plot(x_grid, f_mix_norm, lw=1.8, label=r"$x = 0.05\,y_1 + y_3$") +ax.set_xlabel(r"frequency $\omega/\pi$") +ax.set_ylabel("normalized spectrum") +ax.legend(frameon=False) +plt.show() +``` + +Here the oscillatory mode is still present (the $y_1$ spectrum peaks away from zero), but the mixture $x$ is dominated by the near-unit root and hence by very low frequencies. + +### Real roots: a peak from mixing shocks + +Chow also constructs examples where all roots are real and positive yet a linear combination displays a local spectral peak. + +The mechanism is that cross-correlation in shocks can generate cyclical-looking behavior. + +Here is a close analog of Chow’s two-root illustration. + +```{code-cell} ipython3 +A = np.diag([0.1, 0.9]) +V = np.array([[1.0, 0.8], [0.8, 1.0]]) +b = np.array([1.0, -0.01]) + +F = spectral_density_var1(A, V, ω_grid) +f_x = spectrum_of_linear_combination(F, b) +imax = np.argmax(f_x) +ω_star = ω_grid[imax] +period_star = 2 * np.pi / ω_star + +fig, ax = plt.subplots(figsize=(9, 4)) +ax.plot(ω_grid / np.pi, f_x) +ax.scatter([ω_star / np.pi], [f_x[imax]], zorder=3) +ax.set_xlabel(r"frequency $\omega/\pi$") +ax.set_ylabel(r"$f_x(\omega)$") +plt.show() +print(f"peak period ≈ {period_star:.1f}") +``` + +The lesson is the same as Chow’s: in multivariate stochastic systems, “cycle-like” spectra are shaped not only by eigenvalues, but also by how shocks enter ($V$) and how variables combine (the analogue of Chow’s eigenvector matrix). + +## A calibrated model in the frequency domain + +Chow and Levitan {cite}`ChowLevitan1969` use the frequency-domain objects from {cite}`Chow1968` to study a calibrated annual macroeconometric model. + +They work with five annual aggregates + +- $y_1 = C$ (consumption), +- $y_2 = I_1$ (equipment plus inventories), +- $y_3 = I_2$ (construction), +- $y_4 = R_a$ (long rate), +- $y_5 = Y_1 = C + I_1 + I_2$ (private-domestic gnp), + +and add $y_6 = y_{1,t-1}$ to rewrite the original system in first-order form. + +Throughout this section, frequency is measured in cycles per year, $f = \omega/2\pi \in [0, 1/2]$. + +Following the paper, we normalize each spectrum to have area 1 over $[0, 1/2]$ so plots compare shape rather than scale. + +Our goal is to reconstruct the transition matrix $A$ and then compute and interpret the model-implied spectra, gains/coherences, and phase differences. + +### The cycle subsystem + +The paper starts from a reduced form with exogenous inputs, + +```{math} +:label: chow_reduced_full + +y_t = A y_{t-1} + C x_t + u_t. +``` + +To study cycles, they remove the deterministic component attributable to $x_t$ and focus on the zero-mean subsystem + +```{math} +:label: chow_cycle_system + +y_t = A y_{t-1} + u_t. +``` + +For second moments, the only additional ingredient is the covariance matrix $V = \mathbb E[u_t u_t^\top]$. + +Chow and Levitan compute it from structural parameters via + +```{math} +:label: chow_v_from_structural + +V = M^{-1} \Sigma (M^{-1})^\top +``` + +where $\Sigma$ is the covariance of structural residuals and $M$ is the matrix of contemporaneous structural coefficients. + +Here we take $A$ and $V$ as given and ask what they imply for spectra and cross-spectra. + +### Reported shock covariance + +Chow and Levitan report the $6 \times 6$ reduced-form shock covariance matrix $V$ (scaled by $10^{-7}$): + +```{math} +:label: chow_V_matrix + +V = \begin{bmatrix} +8.250 & 7.290 & 2.137 & 2.277 & 17.68 & 0 \\ +7.290 & 7.135 & 1.992 & 2.165 & 16.42 & 0 \\ +2.137 & 1.992 & 0.618 & 0.451 & 4.746 & 0 \\ +2.277 & 2.165 & 0.451 & 1.511 & 4.895 & 0 \\ +17.68 & 16.42 & 4.746 & 4.895 & 38.84 & 0 \\ +0 & 0 & 0 & 0 & 0 & 0 +\end{bmatrix}. +``` + +The sixth row and column are zeros because $y_6$ is an identity (lagged $y_1$). + +### Reported eigenvalues + +The transition matrix $A$ has six characteristic roots: + +```{math} +:label: chow_eigenvalues + +\begin{aligned} +\lambda_1 &= 0.9999725, \quad \lambda_2 = 0.9999064, \quad \lambda_3 = 0.4838, \\ +\lambda_4 &= 0.0761 + 0.1125i, \quad \lambda_5 = 0.0761 - 0.1125i, \quad \lambda_6 = -0.00004142. +\end{aligned} +``` + +Two roots are near unity because two structural equations are in first differences. + +One root ($\lambda_6$) is theoretically zero because of the identity $y_5 = y_1 + y_2 + y_3$. + +The complex conjugate pair $\lambda_{4,5}$ has modulus $|\lambda_4| = \sqrt{0.0761^2 + 0.1125^2} \approx 0.136$. + +### Reported eigenvectors + +The right eigenvector matrix $B$ (columns are eigenvectors corresponding to $\lambda_1, \ldots, \lambda_6$): + +```{math} +:label: chow_B_matrix + +B = \begin{bmatrix} +-0.008 & 1.143 & 0.320 & 0.283+0.581i & 0.283-0.581i & 0.000 \\ +-0.000 & 0.013 & -0.586 & -2.151+0.742i & -2.151-0.742i & 2.241 \\ +-0.001 & 0.078 & 0.889 & -0.215+0.135i & -0.215-0.135i & 0.270 \\ +1.024 & 0.271 & 0.069 & -0.231+0.163i & -0.231-0.163i & 0.307 \\ +-0.009 & 1.235 & 0.623 & -2.082+1.468i & -2.082-1.468i & 2.766 \\ +-0.008 & 1.143 & 0.662 & 4.772+0.714i & 4.772-0.714i & -4.399 +\end{bmatrix}. +``` + +Together, $V$, $\{\lambda_i\}$, and $B$ are sufficient to compute all spectral and cross-spectral densities. + +### Reconstructing $A$ and computing $F(\omega)$ + +The paper reports $(\lambda, B, V)$, which is enough to reconstruct +$A = B \, \mathrm{diag}(\lambda_1,\dots,\lambda_6)\, B^{-1}$ and then compute the model-implied spectral objects. + +```{code-cell} ipython3 +λ = np.array([ + 0.9999725, 0.9999064, 0.4838, + 0.0761 + 0.1125j, 0.0761 - 0.1125j, -0.00004142 +], dtype=complex) + +B = np.array([ + [-0.008, 1.143, 0.320, 0.283+0.581j, 0.283-0.581j, 0.000], + [-0.000, 0.013, -0.586, -2.151+0.742j, -2.151-0.742j, 2.241], + [-0.001, 0.078, 0.889, -0.215+0.135j, -0.215-0.135j, 0.270], + [1.024, 0.271, 0.069, -0.231+0.163j, -0.231-0.163j, 0.307], + [-0.009, 1.235, 0.623, -2.082+1.468j, -2.082-1.468j, 2.766], + [-0.008, 1.143, 0.662, 4.772+0.714j, 4.772-0.714j, -4.399] +], dtype=complex) + +V = np.array([ + [8.250, 7.290, 2.137, 2.277, 17.68, 0], + [7.290, 7.135, 1.992, 2.165, 16.42, 0], + [2.137, 1.992, 0.618, 0.451, 4.746, 0], + [2.277, 2.165, 0.451, 1.511, 4.895, 0], + [17.68, 16.42, 4.746, 4.895, 38.84, 0], + [0, 0, 0, 0, 0, 0] +]) * 1e-7 + +D_λ = np.diag(λ) +A_chow = B @ D_λ @ np.linalg.inv(B) +A_chow = np.real(A_chow) # drop tiny imaginary parts from reported rounding +print("eigenvalues of reconstructed A:") +print(np.linalg.eigvals(A_chow).round(6)) +``` + +### Canonical coordinates + +Chow's canonical transformation uses $z_t = B^{-1} y_t$, giving dynamics $z_t = D_\lambda z_{t-1} + e_t$. + +An algebraic detail: the closed form for $F(\omega)$ uses $A^\top$ (real transpose) rather than a conjugate transpose. + +Accordingly, the canonical shock covariance is + +```{math} +W = B^{-1} V (B^{-1})^\top. +``` + +```{code-cell} ipython3 +B_inv = np.linalg.inv(B) +W = B_inv @ V @ B_inv.T +print("diagonal of W:") +print(np.diag(W).round(10)) +``` + +### Spectral density via eigendecomposition + +Chow's closed-form formula for the spectral density matrix is + +```{math} +:label: chow_spectral_eigen + +F(\omega) += B \left[ \frac{w_{ij}}{(1 - \lambda_i e^{-i\omega})(1 - \lambda_j e^{i\omega})} \right] B^\top, +``` + +where $w_{ij}$ are elements of the canonical shock covariance $W$. + +```{code-cell} ipython3 +def spectral_density_chow(λ, B, W, ω_grid): + """Spectral density via Chow's eigendecomposition formula.""" + p = len(λ) + F = np.zeros((len(ω_grid), p, p), dtype=complex) + for k, ω in enumerate(ω_grid): + F_star = np.zeros((p, p), dtype=complex) + for i in range(p): + for j in range(p): + denom = (1 - λ[i] * np.exp(-1j * ω)) * (1 - λ[j] * np.exp(1j * ω)) + F_star[i, j] = W[i, j] / denom + F[k] = B @ F_star @ B.T + return F / (2 * np.pi) + +freq = np.linspace(1e-4, 0.5, 5000) # cycles/year in [0, 1/2] +ω_grid = 2 * np.pi * freq # radians in [0, π] +F_chow = spectral_density_chow(λ, B, W, ω_grid) +``` + +### Where is variance concentrated? + +Normalizing each spectrum to have unit area over $[0, 1/2]$ lets us compare shapes rather than scales. + +```{code-cell} ipython3 +variable_names = ['$C$', '$I_1$', '$I_2$', '$R_a$', '$Y_1$'] +freq_ticks = [1/18, 1/9, 1/6, 1/4, 1/3, 1/2] +freq_labels = [r'$\frac{1}{18}$', r'$\frac{1}{9}$', r'$\frac{1}{6}$', + r'$\frac{1}{4}$', r'$\frac{1}{3}$', r'$\frac{1}{2}$'] + +def paper_frequency_axis(ax): + ax.set_xlim([0.0, 0.5]) + ax.set_xticks(freq_ticks) + ax.set_xticklabels(freq_labels) + ax.set_xlabel(r'frequency $\omega/2\pi$') + +# Normalized spectra (areas set to 1) +S = np.real(np.diagonal(F_chow, axis1=1, axis2=2))[:, :5] # y1..y5 +areas = np.trapz(S, freq, axis=0) +S_norm = S / areas +mask = freq >= 0.0 + +fig, axes = plt.subplots(1, 2, figsize=(10, 6)) + +# Figure I.1: consumption (log scale) +axes[0].plot(freq[mask], S_norm[mask, 0], lw=1.8) +axes[0].set_yscale('log') +paper_frequency_axis(axes[0]) +axes[0].set_ylabel(r'normalized $f_{11}(\omega)$') + +# Figure I.2: equipment + inventories (log scale) +axes[1].plot(freq[mask], S_norm[mask, 1], lw=1.8) +axes[1].set_yscale('log') +paper_frequency_axis(axes[1]) +axes[1].set_ylabel(r'normalized $f_{22}(\omega)$') + +plt.tight_layout() +plt.show() + +i_peak = np.argmax(S_norm[mask, 1]) +f_peak = freq[mask][i_peak] +print(f"Peak within [1/18, 1/2]: frequency ≈ {f_peak:.3f} cycles/year, period ≈ {1/f_peak:.2f} years.") +``` + +Both spectra are dominated by very low frequencies, reflecting the near-unit eigenvalues. + +This is the "typical spectral shape" of macroeconomic time series. + +(These patterns match Figures I.1–I.2 of {cite}`ChowLevitan1969`.) + +### How variables move together across frequencies + +Beyond univariate spectra, we can ask how pairs of variables covary at each frequency. + +The **cross-spectrum** $f_{ij}(\omega) = c_{ij}(\omega) - i \cdot q_{ij}(\omega)$ decomposes into the cospectrum $c_{ij}$ and the quadrature spectrum $q_{ij}$. + +The **cross-amplitude** is $g_{ij}(\omega) = |f_{ij}(\omega)| = \sqrt{c_{ij}^2 + q_{ij}^2}$. + +The **squared coherence** measures linear association at frequency $\omega$: + +```{math} +:label: chow_coherence + +R^2_{ij}(\omega) = \frac{|f_{ij}(\omega)|^2}{f_{ii}(\omega) f_{jj}(\omega)} \in [0, 1]. +``` + +The **gain** is the frequency-response coefficient when regressing $y_i$ on $y_j$: + +```{math} +:label: chow_gain + +G_{ij}(\omega) = \frac{|f_{ij}(\omega)|}{f_{jj}(\omega)}. +``` + +The **phase** captures lead-lag relationships (in radians): + +```{math} +:label: chow_phase + +\Delta_{ij}(\omega) = \tan^{-1}\left( \frac{q_{ij}(\omega)}{c_{ij}(\omega)} \right). +``` + +```{code-cell} ipython3 +def cross_spectral_measures(F, i, j): + """Compute coherence, gain (y_i on y_j), and phase between variables i and j.""" + f_ij = F[:, i, j] + f_ii, f_jj = np.real(F[:, i, i]), np.real(F[:, j, j]) + g_ij = np.abs(f_ij) + coherence = (g_ij**2) / (f_ii * f_jj) + gain = g_ij / f_jj + phase = np.arctan2(-np.imag(f_ij), np.real(f_ij)) + return coherence, gain, phase +``` + +We now plot gain and coherence as in Figures II.1-II.3 of {cite}`ChowLevitan1969`. + +```{code-cell} ipython3 +gnp_idx = 4 + +fig, axes = plt.subplots(1, 3, figsize=(14, 6)) + +for idx, var_idx in enumerate([0, 1, 2]): + coherence, gain, phase = cross_spectral_measures(F_chow, var_idx, gnp_idx) + ax = axes[idx] + + ax.plot(freq[mask], coherence[mask], + lw=1.8, label=rf'$R^2_{{{var_idx+1}5}}(\omega)$') + ax.plot(freq[mask], gain[mask], + lw=1.8, label=rf'$G_{{{var_idx+1}5}}(\omega)$') + + paper_frequency_axis(ax) + ax.set_ylim([0, 1.0]) + ax.set_ylabel('gain, coherence') + ax.legend(frameon=False, loc='best') + +plt.tight_layout() +plt.show() +``` + +Coherence is high at low frequencies for all three components, meaning long-run movements track output closely. + +Gains differ: consumption smooths (gain below 1), while investment responds more strongly at higher frequencies. + +(These patterns match Figures II.1-II.3 of {cite}`ChowLevitan1969`.) + +### Lead-lag relationships + +The phase tells us which variable leads at each frequency. + +Positive phase means output leads the component; negative phase means the component leads output. + +```{code-cell} ipython3 +fig, ax = plt.subplots(figsize=(8, 6)) + +labels = [r'$\psi_{15}(\omega)/2\pi$', r'$\psi_{25}(\omega)/2\pi$', + r'$\psi_{35}(\omega)/2\pi$', r'$\psi_{45}(\omega)/2\pi$'] + +for var_idx in range(4): + coherence, gain, phase = cross_spectral_measures(F_chow, var_idx, gnp_idx) + phase_cycles = phase / (2 * np.pi) + ax.plot(freq[mask], phase_cycles[mask], lw=1.8, label=labels[var_idx]) + +ax.axhline(0, lw=0.8) +paper_frequency_axis(ax) +ax.set_ylabel('phase difference in cycles') +ax.set_ylim([-0.25, 0.25]) +ax.set_yticks([-0.25, -0.20, -0.15, -0.10, -0.05, 0, 0.05, 0.10, 0.15, 0.20, 0.25]) +ax.legend(frameon=False, fontsize=9) +plt.tight_layout() +plt.show() +``` + +At business-cycle frequencies, consumption tends to lag output while equipment and inventories tend to lead. + +The interest rate is roughly coincident. + +(This matches Figure III of {cite}`ChowLevitan1969`.) + +### Building blocks of spectral shape + +Each eigenvalue contributes a characteristic spectral shape through the **scalar kernel** + +```{math} +:label: chow_scalar_kernel + +g_i(\omega) = \frac{1 - |\lambda_i|^2}{|1 - \lambda_i e^{-i\omega}|^2} = \frac{1 - |\lambda_i|^2}{1 + |\lambda_i|^2 - 2 \text{Re}(\lambda_i) \cos\omega + 2 \text{Im}(\lambda_i) \sin\omega}. +``` + +For real $\lambda_i$, this simplifies to + +```{math} +g_i(\omega) = \frac{1 - \lambda_i^2}{1 + \lambda_i^2 - 2\lambda_i \cos\omega}. +``` + +Each observable spectral density is a linear combination of these kernels (plus cross-terms). + +```{code-cell} ipython3 +def scalar_kernel(λ_i, ω_grid): + """Chow's scalar spectral kernel g_i(ω).""" + λ_i = complex(λ_i) + mod_sq = np.abs(λ_i)**2 + return np.array([(1 - mod_sq) / np.abs(1 - λ_i * np.exp(-1j * ω))**2 for ω in ω_grid]) + +fig, ax = plt.subplots(figsize=(10, 5)) +for i, λ_i in enumerate(λ[:4]): + if np.abs(λ_i) > 0.01: + g_i = scalar_kernel(λ_i, ω_grid) + label = f'$\\lambda_{i+1}$ = {λ_i:.4f}' if np.isreal(λ_i) else f'$\\lambda_{i+1}$ = {λ_i:.3f}' + ax.semilogy(freq, g_i, label=label, lw=1.5) +ax.set_xlabel(r'frequency $\omega/2\pi$') +ax.set_ylabel('$g_i(\\omega)$') +ax.set_xlim([1/18, 0.5]) +ax.set_xticks(freq_ticks) +ax.set_xticklabels(freq_labels) +ax.legend(frameon=False) +plt.show() +``` + +Near-unit eigenvalues produce kernels sharply peaked at low frequencies. + +Smaller eigenvalues produce flatter kernels. + +The complex pair ($\lambda_{4,5}$) has such small modulus that its kernel is nearly flat. + +### Why the spectra look the way they do + +The two near-unit eigenvalues generate strong low-frequency power. + +The moderate eigenvalue ($\lambda_3 \approx 0.48$) contributes a flatter component. + +The complex pair has small modulus ($|\lambda_{4,5}| \approx 0.136$), so it cannot generate a pronounced interior peak. + +The near-zero eigenvalue reflects the accounting identity $Y_1 = C + I_1 + I_2$. + +This illustrates Chow's message: eigenvalues guide intuition, but observed spectra also depend on how shocks excite the modes and how observables combine them. + +### Summary + +The calibrated model reveals three patterns: (1) most variance sits at very low frequencies due to near-unit eigenvalues; (2) consumption smooths while investment amplifies high-frequency movements; (3) consumption lags output at business-cycle frequencies while investment leads. + +## Wrap-up + +Chow {cite}`Chow1968` emphasizes two complementary diagnostics for linear macro models: how eigenvalues shape deterministic propagation, and how spectra summarize stochastic dynamics. + +Chow and Levitan {cite}`ChowLevitan1969` then show what these objects look like in a calibrated system: strong low-frequency power, frequency-dependent gains/coherences, and lead–lag relations that vary with the cycle length. + +To connect this to data, pair the model-implied objects here with the advanced lecture [Estimation of Spectra](https://python-advanced.quantecon.org/estspec.html#). + +## A structural view of acceleration + +Chow {cite}`Chow1968` provides a structural interpretation of how acceleration enters the model. + +The starting point is a stock-adjustment demand for capital: + +```{math} +:label: chow_stock_adj_struct + +s_{it} = a_i Y_t + b_i s_{i,t-1} +``` + +where $s_{it}$ is the desired stock of capital type $i$, $Y_t$ is aggregate output, and $(a_i, b_i)$ are parameters. + +Net investment is the stock change: + +```{math} +:label: chow_net_inv + +y^n_{it} = \Delta s_{it} = a_i \Delta Y_t + b_i y^n_{i,t-1}. +``` + +For gross investment with depreciation rate $\delta_i$: + +```{math} +:label: chow_gross_inv + +y_{it} = a_i [Y_t - (1-\delta_i) Y_{t-1}] + b_i y_{i,t-1}. +``` + +The parameters $(a_i, b_i, \delta_i)$ are the key "acceleration equation" parameters. + +The term $a_i \Delta Y_t$ is the acceleration effect: investment responds to *changes* in output, not just levels. + +This creates negative coefficients on lagged output levels, which in turn makes complex roots (and hence oscillatory components) possible in the characteristic equation. + +## Exercises + +```{exercise} +:label: chow_cycles_ex1 + +In the rotation-contraction example, fix $\theta$ and vary $r$ in a grid between $0.2$ and $0.99$. + +1. For each $r$, compute the frequency $\omega^*(r)$ that maximizes $f_{11}(\omega)$. +2. Plot $\omega^*(r)$ and the implied peak period $2\pi/\omega^*(r)$ as functions of $r$. + +How does the peak location behave as $r \uparrow 1$? +``` + +```{solution-start} chow_cycles_ex1 +:class: dropdown +``` + +```{code-cell} ipython3 +r_grid = np.linspace(0.2, 0.99, 50) +θ = np.pi / 3 +ω_grid_ex = np.linspace(1e-3, np.pi - 1e-3, 1000) +V_ex = np.eye(2) + +ω_star = np.zeros(len(r_grid)) +period_star = np.zeros(len(r_grid)) +for idx, r in enumerate(r_grid): + A_ex = rotation_contraction(r, θ) + F_ex = spectral_density_var1(A_ex, V_ex, ω_grid_ex) + f11 = np.real(F_ex[:, 0, 0]) + i_max = np.argmax(f11) + ω_star[idx] = ω_grid_ex[i_max] + period_star[idx] = 2 * np.pi / ω_star[idx] + +fig, axes = plt.subplots(1, 2, figsize=(12, 4)) +axes[0].plot(r_grid, ω_star / np.pi, lw=1.8) +axes[0].axhline(θ / np.pi, ls='--', lw=1.0, label=r'$\theta/\pi$') +axes[0].set_xlabel('$r$') +axes[0].set_ylabel(r'$\omega^*/\pi$') +axes[0].legend(frameon=False) + +axes[1].plot(r_grid, period_star, lw=1.8) +axes[1].axhline(2 * np.pi / θ, ls='--', lw=1.0, label=r'$2\pi/\theta$') +axes[1].set_xlabel('$r$') +axes[1].set_ylabel('peak period') +axes[1].legend(frameon=False) +plt.tight_layout() +plt.show() +``` + +As $r \uparrow 1$, the peak frequency converges to $\theta$ (the argument of the complex eigenvalue). + +This confirms Chow's insight: when the modulus is close to 1, the spectral peak aligns with the eigenvalue frequency. + +```{solution-end} +``` + +```{exercise} +:label: chow_cycles_ex2 + +In the "real roots but a peak" example, hold $A$ fixed and vary the shock correlation (the off-diagonal entry of $V$) between $0$ and $0.99$. + +When does the interior-frequency peak appear, and how does its location change? +``` + +```{solution-start} chow_cycles_ex2 +:class: dropdown +``` + +```{code-cell} ipython3 +A_ex2 = np.diag([0.1, 0.9]) +b_ex2 = np.array([1.0, -0.01]) +corr_grid = np.linspace(0, 0.99, 50) +peak_periods = [] +for corr in corr_grid: + V_ex2 = np.array([[1.0, corr], [corr, 1.0]]) + F_ex2 = spectral_density_var1(A_ex2, V_ex2, ω_grid_ex) + f_x = spectrum_of_linear_combination(F_ex2, b_ex2) + i_max = np.argmax(f_x) + if 5 < i_max < len(ω_grid_ex) - 5: + peak_periods.append(2 * np.pi / ω_grid_ex[i_max]) + else: + peak_periods.append(np.nan) + +fig, ax = plt.subplots(figsize=(8, 4)) +ax.plot(corr_grid, peak_periods, marker='o', lw=1.8, markersize=4) +ax.set_xlabel('shock correlation') +ax.set_ylabel('peak period') +plt.show() + +threshold_idx = np.where(~np.isnan(peak_periods))[0] +if len(threshold_idx) > 0: + print(f"interior peak appears when correlation ≥ {corr_grid[threshold_idx[0]]:.2f}") +``` + +The interior peak appears only when the shock correlation exceeds a threshold. + +This illustrates Chow's point that spectral peaks depend on the full system structure, not just eigenvalues. + +```{solution-end} +``` + +```{exercise} +:label: chow_cycles_ex3 + +Using the calibrated Chow-Levitan (1969) parameters, compute the autocovariance matrices $\Gamma_0, \Gamma_1, \ldots, \Gamma_{10}$ using: + +1. The recursion $\Gamma_k = A \Gamma_{k-1}$ with $\Gamma_0$ from the Lyapunov equation. +2. Chow's eigendecomposition formula $\Gamma_k = B D_\lambda^k \Gamma_0^* B^\top$ where $\Gamma_0^*$ is the canonical covariance. + +Verify that both methods give the same result. +``` + +```{solution-start} chow_cycles_ex3 +:class: dropdown +``` + +```{code-cell} ipython3 +from scipy.linalg import solve_discrete_lyapunov + +Γ_0_lyap = solve_discrete_lyapunov(A_chow, V) +Γ_recursion = [Γ_0_lyap] +for k in range(1, 11): + Γ_recursion.append(A_chow @ Γ_recursion[-1]) + +p = len(λ) +Γ_0_star = np.zeros((p, p), dtype=complex) +for i in range(p): + for j in range(p): + Γ_0_star[i, j] = W[i, j] / (1 - λ[i] * λ[j]) + +Γ_eigen = [] +for k in range(11): + D_k = np.diag(λ**k) + Γ_eigen.append(np.real(B @ D_k @ Γ_0_star @ B.T)) + +print("Comparison of Γ_5 (first 3x3 block):") +print("\nRecursion method:") +print(np.real(Γ_recursion[5][:3, :3]).round(10)) +print("\nEigendecomposition method:") +print(Γ_eigen[5][:3, :3].round(10)) +print("\nMax absolute difference:", np.max(np.abs(np.real(Γ_recursion[5]) - Γ_eigen[5]))) +``` + +Both methods produce essentially identical results, up to numerical precision. + +```{solution-end} +``` + +```{exercise} +:label: chow_cycles_ex4 + +Modify the Chow-Levitan model by changing $\lambda_3$ from $0.4838$ to $0.95$. + +1. Recompute the spectral densities. +2. How does this change affect the spectral shape for each variable? +3. What economic interpretation might correspond to this parameter change? +``` + +```{solution-start} chow_cycles_ex4 +:class: dropdown +``` + +```{code-cell} ipython3 +λ_modified = λ.copy() +λ_modified[2] = 0.95 +F_mod = spectral_density_chow(λ_modified, B, W, ω_grid) + +fig, axes = plt.subplots(2, 3, figsize=(14, 8)) +axes = axes.flatten() +var_labels = ["consumption", "equipment + inventories", "construction", "long rate", "output"] +for i in range(5): + f_orig = np.real(F_chow[:, i, i]) + f_mod = np.real(F_mod[:, i, i]) + f_orig_norm = f_orig / np.trapz(f_orig, freq) + f_mod_norm = f_mod / np.trapz(f_mod, freq) + axes[i].semilogy(freq, f_orig_norm, lw=1.5, label=r"original ($\lambda_3=0.48$)") + axes[i].semilogy(freq, f_mod_norm, lw=1.5, ls="--", label=r"modified ($\lambda_3=0.95$)") + paper_frequency_axis(axes[i]) + axes[i].set_ylabel(rf"normalized $f_{{{i+1}{i+1}}}(\omega)$") + axes[i].text(0.03, 0.08, var_labels[i], transform=axes[i].transAxes) + axes[i].legend(frameon=False, fontsize=8) +axes[5].axis('off') +plt.tight_layout() +plt.show() +``` + +Increasing $\lambda_3$ from 0.48 to 0.95 adds more persistence to the system. + +The spectral densities show increased power at low frequencies. + +Economically, this could correspond to stronger persistence in the propagation of shocks—perhaps due to slower adjustment speeds in investment or consumption behavior. + +```{solution-end} +``` From 24063c178c9ae59868f7dc1c49eee27e755f10c9 Mon Sep 17 00:00:00 2001 From: Humphrey Yang Date: Sat, 7 Feb 2026 13:06:22 +1100 Subject: [PATCH 02/19] updates --- lectures/chow_business_cycles.md | 907 ++++++++++++++++++++++--------- 1 file changed, 646 insertions(+), 261 deletions(-) diff --git a/lectures/chow_business_cycles.md b/lectures/chow_business_cycles.md index 9e99b0478..393119345 100644 --- a/lectures/chow_business_cycles.md +++ b/lectures/chow_business_cycles.md @@ -4,7 +4,7 @@ jupytext: extension: .md format_name: myst format_version: 0.13 - jupytext_version: 1.17.2 + jupytext_version: 1.17.1 kernelspec: display_name: Python 3 (ipykernel) language: python @@ -31,15 +31,19 @@ kernelspec: This lecture studies two classic papers by Gregory Chow on business cycles in linear dynamic models: -- {cite}`Chow1968`: why acceleration-type investment behavior matters for oscillations, and how to read stochastic dynamics through autocovariances and spectral densities -- {cite}`ChowLevitan1969`: how those tools look when applied to a calibrated macroeconometric model of the U.S. economy +- {cite}`Chow1968`: empirical evidence for the acceleration principle, why acceleration enables oscillations, and when spectral peaks arise in stochastic systems +- {cite}`ChowLevitan1969`: spectral analysis of a calibrated U.S. macroeconometric model, showing gains, coherences, and lead-lag patterns -These papers sit right at the intersection of three themes in this lecture series: +These papers connect ideas in the following lectures: - The multiplier–accelerator mechanism in {doc}`samuelson` - Linear stochastic difference equations and autocovariances in {doc}`linear_models` - Eigenmodes of multivariate dynamics in {doc}`var_dmd` -- Fourier ideas in {doc}`eig_circulant` (and, for empirical estimation, the advanced lecture [Estimation of Spectra](https://python-advanced.quantecon.org/estspec.html#)) +- Fourier ideas in {doc}`eig_circulant` (and, for empirical estimation, the advanced lecture {doc}`advanced:estspec`) + +{cite:t}`Chow1968` builds on earlier empirical work testing the acceleration principle on U.S. investment data. + +We begin with that empirical foundation before developing the theoretical framework. We will keep coming back to three ideas: @@ -47,8 +51,90 @@ We will keep coming back to three ideas: - In stochastic models, a "cycle" shows up as a local peak in a (univariate) spectral density. - Spectral peaks depend on eigenvalues, but also on how shocks enter (the covariance matrix $V$) and on how observables load on eigenmodes. +In this lecture, we start with Chow's empirical evidence for the acceleration principle, then introduce the VAR(1) framework and spectral analysis tools. + +Next, we show why acceleration creates complex roots that enable oscillations, and derive Chow's conditions for spectral peaks in the Hansen-Samuelson model. + +We then present Chow's striking counterexample: real roots *can* produce spectral peaks in general multivariate systems. + +Finally, we apply these tools to the calibrated Chow-Levitan model to see what model-implied spectra look like in practice. + +Let's start with some standard imports + +```{code-cell} ipython3 +import numpy as np +import matplotlib.pyplot as plt +``` + +(empirical_section)= +## Empirical foundation for the acceleration principle + +{cite:t}`Chow1968` opens by reviewing empirical evidence for the acceleration principle from earlier macroeconometric work. + +Using annual observations for 1931--40 and 1948--63, Chow tested the acceleration equation on three investment categories: + +- new construction +- gross private domestic investment in producers' durable equipment combined with change in business inventories +- the last two variables separately + +In each case, when the regression included both $Y_t$ and $Y_{t-1}$ (where $Y$ is gross national product minus taxes net of transfers), the coefficient on $Y_{t-1}$ was of *opposite sign* and slightly smaller in absolute value than the coefficient on $Y_t$. + +Equivalently, when expressed in terms of $\Delta Y_t$ and $Y_{t-1}$, the coefficient on $Y_{t-1}$ was a small fraction of the coefficient on $\Delta Y_t$. + +### An example: Automobile demand + +Chow presents a clean illustration using data on net investment in automobiles from his earlier work on automobile demand. + +Using annual data for 1922--41 and 1948--57, he estimates by least squares: + +```{math} +:label: chow_auto_eq5 + +y_t^n = \underset{(0.0022)}{0.0155} Y_t \underset{(0.0020)}{- 0.0144} Y_{t-1} \underset{(0.0056)}{- 0.0239} p_t \underset{(0.0040)}{+ 0.0199} p_{t-1} + \underset{(0.101)}{0.351} y_{t-1}^n + \text{const.} +``` + +where: +- $Y_t$ is real disposable personal income per capita +- $p_t$ is a relative price index for automobiles +- $y_t^n$ is per capita net investment in passenger automobiles +- standard errors appear in parentheses + +The key observation: the coefficients on $Y_{t-1}$ and $p_{t-1}$ are *the negatives* of the coefficients on $Y_t$ and $p_t$. + +This pattern is exactly what the acceleration principle predicts. + +### From stock adjustment to acceleration + +The empirical support for acceleration should not be surprising once we accept a stock-adjustment demand equation for capital: + +```{math} +:label: chow_stock_adj_emp + +s_{it} = a_i Y_t + b_i s_{i,t-1} +``` + +where $s_{it}$ is the stock of capital good $i$. + +The acceleration equation {eq}`chow_auto_eq5` is essentially the *first difference* of {eq}`chow_stock_adj_emp`. + +Net investment is the change in stock, $y_{it}^n = \Delta s_{it}$, and differencing {eq}`chow_stock_adj_emp` gives: + +```{math} +:label: chow_acc_from_stock + +y_{it}^n = a_i \Delta Y_t + b_i y_{i,t-1}^n +``` + +The coefficients on $Y_t$ and $Y_{t-1}$ in the level form are $a_i$ and $-a_i(1-b_i)$ respectively. + +They are opposite in sign and similar in magnitude when $b_i$ is not too far from unity. + +This connection between stock adjustment and acceleration is central to Chow's argument about why acceleration matters for business cycles. + ## A linear system with shocks +To study business cycles formally, we need a framework that combines the deterministic dynamics (captured by the transition matrix $A$) with random shocks. + Both papers analyze (or reduce to) a first-order linear stochastic system ```{math} @@ -63,7 +149,7 @@ y_t = A y_{t-1} + u_t, \mathbb E[u_t u_{t-k}^\top] = 0 \ (k \neq 0). ``` -When the eigenvalues of $A$ are strictly inside the unit circle, the process is (covariance) stationary and its autocovariances exist. +When the eigenvalues of $A$ are strictly inside the unit circle, the process is covariance stationary and its autocovariances exist. In the notation of {doc}`linear_models`, this is the same stability condition that guarantees a unique solution to a discrete Lyapunov equation. @@ -87,6 +173,108 @@ Standard calculations (also derived in {cite}`Chow1968`) give the recursion The second equation is the discrete Lyapunov equation for $\Gamma_0$. +### Why stochastic dynamics matter + +{cite:t}`Chow1968` motivates the stochastic analysis with a quote from Ragnar Frisch: + +> The examples we have discussed ... show that when an [deterministic] economic system gives rise to oscillations, these will most frequently be damped. But in reality the cycles ... are generally not damped. How can the maintenance of the swings be explained? ... One way which I believe is particularly fruitful and promising is to study what would become of the solution of a determinate dynamic system if it were exposed to a stream of erratic shocks ... +> +> Thus, by connecting the two ideas: (1) the continuous solution of a determinate dynamic system and (2) the discontinuous shocks intervening and supplying the energy that may maintain the swings—we get a theoretical setup which seems to furnish a rational interpretation of those movements which we have been accustomed to see in our statistical time data. +> +> — Ragnar Frisch (1933) + +Chow's main insight is that oscillations in the deterministic system are *neither necessary nor sufficient* for producing "cycles" in the stochastic system. + +We have to bring the stochastic element into the picture. + +We will show that even when eigenvalues are real (no deterministic oscillations), the stochastic system can exhibit cyclical patterns in its autocovariances and spectral densities. + +### Autocovariances in terms of eigenvalues + +Let $\lambda_1, \ldots, \lambda_p$ be the (possibly complex) eigenvalues of $A$, assumed distinct, and let $B$ be the matrix whose columns are the corresponding right eigenvectors: + +```{math} +:label: chow_eigen_decomp + +A B = B D_\lambda, \quad \text{or equivalently} \quad A = B D_\lambda B^{-1} +``` + +where $D_\lambda = \text{diag}(\lambda_1, \ldots, \lambda_p)$. + +Define canonical variables $z_t = B^{-1} y_t$. +These satisfy the decoupled dynamics + +```{math} +:label: chow_canonical_dynamics + +z_t = D_\lambda z_{t-1} + \varepsilon_t +``` + +where $\varepsilon_t = B^{-1} u_t$ has covariance matrix $W = B^{-1} V (B^{-1})^\top$. + +The autocovariance matrix of the canonical variables, denoted $\Gamma_k^*$, satisfies + +```{math} +:label: chow_canonical_autocov + +\Gamma_k^* = D_\lambda^k \Gamma_0^*, \quad k = 1, 2, 3, \ldots +``` + +and + +```{math} +:label: chow_gamma0_star + +\Gamma_0^* = \left( \frac{w_{ij}}{1 - \lambda_i \lambda_j} \right) +``` + +where $w_{ij}$ are elements of $W$. + +The autocovariance matrices of the original variables are then + +```{math} +:label: chow_autocov_eigen + +\Gamma_k = B \Gamma_k^* B^\top = B D_\lambda^k \Gamma_0^* B^\top, \quad k = 0, 1, 2, \ldots +``` + +The scalar autocovariance $\gamma_{ij,k} = \mathbb{E}[y_{it} y_{j,t-k}]$ is a *linear combination* of powers of the eigenvalues: + +```{math} +:label: chow_scalar_autocov + +\gamma_{ij,k} = \sum_m \sum_n b_{im} b_{jn} \gamma^*_{mn,0} \lambda_m^k = \sum_m d_{ij,m} \lambda_m^k +``` + +Compare this to the deterministic time path from initial condition $y_0$: + +```{math} +:label: chow_det_path + +y_{it} = \sum_j b_{ij} z_{j0} \lambda_j^t +``` + +Both the autocovariance function {eq}`chow_scalar_autocov` and the deterministic path {eq}`chow_det_path` are linear combinations of $\lambda_m^k$ (or $\lambda_j^t$). + +This formal resemblance is important: the coefficients differ (depending on initial conditions vs. shock covariances), but the role of eigenvalues is analogous. + +### Complex roots and damped oscillations + +When eigenvalues come in complex conjugate pairs $\lambda = r e^{\pm i\theta}$ with $r < 1$, their contribution to the autocovariance function is a **damped cosine**: + +```{math} +:label: chow_damped_cosine + +2 s r^k \cos(\theta k + \phi) +``` + +for appropriate amplitude $s$ and phase $\phi$ determined by the eigenvector loadings. + +In the deterministic model, such complex roots generate damped oscillatory time paths. +In the stochastic model, they generate damped oscillatory autocovariance functions. + +It is in this sense that deterministic oscillations could be "maintained" in the stochastic model—but as we will see, the connection between eigenvalues and spectral peaks is more subtle than this suggests. + ## From autocovariances to spectra Chow’s key step is to translate the autocovariance sequence $\{\Gamma_k\}$ into a frequency-domain object. @@ -126,12 +314,9 @@ The advanced lecture {doc}`advanced:estspec` explains how to estimate $F(\omega) Here we focus on the model-implied spectrum. -We will use the following imports and helper functions throughout the lecture. +We will use the following helper functions throughout the lecture. ```{code-cell} ipython3 -import numpy as np -import matplotlib.pyplot as plt - def spectral_density_var1(A, V, ω_grid): """Spectral density matrix for VAR(1): y_t = A y_{t-1} + u_t.""" A, V = np.asarray(A), np.asarray(V) @@ -149,7 +334,7 @@ def spectrum_of_linear_combination(F, b): return np.array([np.real((b.T @ F[k] @ b).item()) for k in range(F.shape[0])]) def simulate_var1(A, V, T, burn=200, seed=1234): - """Simulate y_t = A y_{t-1} + u_t with u_t ~ N(0, V).""" + r"""Simulate y_t = A y_{t-1} + u_t with u_t \sim N(0, V).""" rng = np.random.default_rng(seed) A, V = np.asarray(A), np.asarray(V) n = A.shape[0] @@ -172,47 +357,25 @@ def sample_autocorrelation(x, max_lag): ## Deterministic propagation and acceleration -Chow {cite}`Chow1968` begins with a clean deterministic question: - -> If you build a macro model using only standard demand equations with simple distributed lags, can the system generate sustained oscillations without acceleration? - -He shows that, under natural sign restrictions, the answer is no. +Now we have the tools and the motivation to analyze spectral peaks in linear stochastic systems. -### A demand system without acceleration +We first go back to the deterministic system to understand why acceleration matters for generating oscillations in the first place. -Consider a system where each component $y_{it}$ responds to aggregate output $Y_t$ and its own lag: +Before analyzing spectral peaks, we need to understand why acceleration matters for generating oscillations in the first place. -```{math} -:label: chow_simple_demand +{cite:t}`Chow1968` asks a question in the deterministic setup: if we build a macro model using only standard demand equations with simple distributed lags, can the system generate sustained oscillations? -y_{it} = a_i Y_t + b_i y_{i,t-1}, -\qquad -Y_t = \sum_i y_{it}, -\qquad -a_i > 0,\; b_i > 0. -``` - -Chow shows that the implied transition matrix has real characteristic roots, and that if $\sum_i a_i < 1$ these roots are also positive. - -In that case, solutions are linear combinations of decaying exponentials without persistent sign-switching components, so there are no “business-cycle-like” oscillations driven purely by internal propagation. - -### What acceleration changes - -For investment (and some durables), Chow argues that a more relevant starting point is a *stock adjustment* equation (demand for a stock), e.g. - -```{math} -:label: chow_stock_adj +He shows that, under natural sign restrictions, the answer is no. -s_{it} = \alpha_i Y_t + \beta_i s_{i,t-1}. -``` +As we saw in the {ref}`empirical foundation `, stock-adjustment demand for durable goods leads to investment equations where the coefficient on $Y_{t-1}$ is negative, i.e., the **acceleration effect**. -If flow investment is proportional to the change in the desired stock, differencing introduces terms in $\Delta Y_t$. +This negative coefficient is what makes complex roots possible in the characteristic equation. -That "acceleration" structure creates negative coefficients (in lagged levels), which makes complex roots possible. +Without it, Chow proves that demand systems with only positive coefficients have real positive roots, and hence no oscillatory dynamics. -This connects directly to {doc}`samuelson`, where acceleration is the key ingredient that can generate damped or persistent oscillations in a deterministic second-order difference equation. +The {doc}`samuelson` lecture explores this mechanism in detail through the Hansen-Samuelson multiplier-accelerator model. -To see the mechanism with minimal algebra, take the multiplier–accelerator law of motion +Here we briefly illustrate the effect. Take the multiplier–accelerator law of motion ```{math} Y_t = c Y_{t-1} + v (Y_{t-1} - Y_{t-2}), @@ -224,13 +387,16 @@ and rewrite it as a first-order system in $(Y_t, Y_{t-1})$. def samuelson_transition(c, v): return np.array([[c + v, -v], [1.0, 0.0]]) -c = 0.6 -v_values = (0.0, 0.8) -A_list = [samuelson_transition(c, v) for v in v_values] +# Compare weak vs strong acceleration +# Weak: c=0.8, v=0.1 gives real roots (discriminant > 0) +# Strong: c=0.6, v=0.8 gives complex roots (discriminant < 0) +cases = [("weak acceleration", 0.8, 0.1), ("strong acceleration", 0.6, 0.8)] +A_list = [samuelson_transition(c, v) for _, c, v in cases] -for v, A in zip(v_values, A_list): +for (label, c, v), A in zip(cases, A_list): eig = np.linalg.eigvals(A) - print(f"v={v:.1f}, eigenvalues={eig}") + disc = (c + v)**2 - 4*v + print(f"{label}: c={c}, v={v}, discriminant={disc:.2f}, eigenvalues={eig}") # impulse responses from a one-time unit shock in Y T = 40 @@ -253,19 +419,19 @@ spectra = [] for A in A_list: F = spectral_density_var1(A, V, ω_grid) f11 = np.real(F[:, 0, 0]) - spectra.append(f11 / np.trapz(f11, freq)) + spectra.append(f11 / np.trapezoid(f11, freq)) fig, axes = plt.subplots(1, 2, figsize=(12, 4)) -axes[0].plot(range(T + 1), irfs[0], lw=1.8, label="no acceleration") -axes[0].plot(range(T + 1), irfs[1], lw=1.8, label="with acceleration") +axes[0].plot(range(T + 1), irfs[0], lw=2, label="weak acceleration (real roots)") +axes[0].plot(range(T + 1), irfs[1], lw=2, label="strong acceleration (complex roots)") axes[0].axhline(0.0, lw=0.8) axes[0].set_xlabel("time") axes[0].set_ylabel(r"$Y_t$") axes[0].legend(frameon=False) -axes[1].plot(freq, spectra[0], lw=1.8, label="no acceleration") -axes[1].plot(freq, spectra[1], lw=1.8, label="with acceleration") +axes[1].plot(freq, spectra[0], lw=2, label="weak acceleration (real roots)") +axes[1].plot(freq, spectra[1], lw=2, label="strong acceleration (complex roots)") axes[1].set_xlabel(r"frequency $\omega/2\pi$") axes[1].set_ylabel("normalized spectrum") axes[1].set_xlim([0.0, 0.5]) @@ -275,240 +441,448 @@ plt.tight_layout() plt.show() ``` -The left panel shows that acceleration creates oscillatory impulse responses. +The left panel shows the contrast between weak and strong acceleration: with weak acceleration ($v=0.1$) the roots are real and the impulse response decays monotonically; with strong acceleration ($v=0.8$) the roots are complex and the impulse response oscillates. + +The right panel shows the corresponding spectral signatures. -The right panel shows the corresponding spectral signature: a peak at interior frequencies. +Complex roots produce a pronounced peak at interior frequencies—the spectral signature of business cycles. -### How the accelerator shifts the spectral peak +### How acceleration strength affects the spectrum -As we increase the accelerator $v$, the complex eigenvalues rotate further from the real axis, shifting the spectral peak to higher frequencies. +As we increase the accelerator $v$, the eigenvalues move further from the origin. + +For this model, the eigenvalue modulus is $|\lambda| = \sqrt{v}$, so the stability boundary is $v = 1$. ```{code-cell} ipython3 -v_grid = np.linspace(0.2, 1.2, 6) +v_grid = [0.2, 0.4, 0.6, 0.8, 0.95] # stable cases only c = 0.6 freq_fine = np.linspace(1e-4, 0.5, 2000) ω_fine = 2 * np.pi * freq_fine V_acc = np.array([[1.0, 0.0], [0.0, 0.0]]) +T_irf = 40 # periods for impulse response -fig, axes = plt.subplots(1, 2, figsize=(12, 4)) +fig = plt.figure(figsize=(12, 8)) +ax_eig = fig.add_subplot(2, 2, 1) +ax_spec = fig.add_subplot(2, 2, 2) +ax_irf = fig.add_subplot(2, 1, 2) # spans entire bottom row for v in v_grid: A = samuelson_transition(c, v) eig = np.linalg.eigvals(A) - F = spectral_density_var1(A, V_acc, ω_fine) - f11 = np.real(F[:, 0, 0]) - f11_norm = f11 / np.trapz(f11, freq_fine) - # plot eigenvalues - axes[0].scatter(eig.real, eig.imag, s=40, label=f'$v={v:.1f}$') + # eigenvalues (top left) + ax_eig.scatter(eig.real, eig.imag, s=40, label=f'$v={v}$') - # plot spectrum - axes[1].plot(freq_fine, f11_norm, lw=1.5, label=f'$v={v:.1f}$') + # spectrum (top right) + F = spectral_density_var1(A, V_acc, ω_fine) + f11 = np.real(F[:, 0, 0]) + f11_norm = f11 / np.trapezoid(f11, freq_fine) + ax_spec.plot(freq_fine, f11_norm, lw=2, label=f'$v={v}$') + + # impulse response (bottom row) + s = np.array([1.0, 0.0]) + irf = np.empty(T_irf + 1) + for t in range(T_irf + 1): + irf[t] = s[0] + s = A @ s + ax_irf.plot(range(T_irf + 1), irf, lw=2, label=f'$v={v}$') -# unit circle +# eigenvalue panel with unit circle θ_circle = np.linspace(0, 2*np.pi, 100) -axes[0].plot(np.cos(θ_circle), np.sin(θ_circle), 'k--', lw=0.8) -axes[0].set_xlabel('real part') -axes[0].set_ylabel('imaginary part') -axes[0].set_aspect('equal') -axes[0].legend(frameon=False, fontsize=8) - -axes[1].set_xlabel(r'frequency $\omega/2\pi$') -axes[1].set_ylabel('normalized spectrum') -axes[1].set_xlim([0, 0.5]) -axes[1].legend(frameon=False, fontsize=8) +ax_eig.plot(np.cos(θ_circle), np.sin(θ_circle), 'k--', lw=0.8, label='unit circle') +ax_eig.set_xlabel('real part') +ax_eig.set_ylabel('imaginary part') +ax_eig.set_aspect('equal') +ax_eig.legend(frameon=False, fontsize=8) + +# spectrum panel +ax_spec.set_xlabel(r'frequency $\omega/2\pi$') +ax_spec.set_ylabel('normalized spectrum') +ax_spec.set_xlim([0, 0.5]) +ax_spec.set_yscale('log') +ax_spec.legend(frameon=False, fontsize=8) + +# impulse response panel +ax_irf.axhline(0, lw=0.8, color='gray') +ax_irf.set_xlabel('time') +ax_irf.set_ylabel(r'$Y_t$') +ax_irf.legend(frameon=False, fontsize=8) plt.tight_layout() plt.show() ``` -Larger $v$ pushes the eigenvalues further off the real axis, shifting the spectral peak to higher frequencies. +As $v$ increases, eigenvalues approach the unit circle and the spectral peak becomes sharper. + +This illustrates Chow's main point: acceleration creates complex eigenvalues, which are necessary for oscillatory dynamics. -When $v$ is large enough that eigenvalues leave the unit circle, the system becomes explosive. +Without acceleration, the eigenvalues would be real and the impulse response would decay monotonically without oscillation. -## Spectral peaks are not just eigenvalues +With stronger acceleration (larger $v$), eigenvalues move closer to the unit circle, producing more persistent oscillations and a sharper spectral peak. -With shocks, the deterministic question ("does the system oscillate?") becomes: at which cycle lengths does the variance of $y_t$ concentrate? +The above examples show that complex roots *can* produce spectral peaks. -In this lecture, a "cycle" means a local peak in a univariate spectrum $f_{ii}(\omega)$. +But when exactly does this happen, and are complex roots *necessary*? -Chow's point in {cite}`Chow1968` is that eigenvalues help interpret spectra, but they do not determine peaks by themselves. +Chow answers these questions for the Hansen-Samuelson model. -Two extra ingredients matter: +## Spectral peaks in the Hansen-Samuelson model -- how shocks load on the eigenmodes (the covariance matrix $V$), -- how the variable of interest mixes those modes. +{cite:t}`Chow1968` provides a detailed spectral analysis of the Hansen-Samuelson multiplier-accelerator model. -The next simulations isolate these effects. +This analysis reveals exactly when complex roots produce spectral peaks, and establishes that in this specific model, complex roots are *necessary* for a peak. -### Complex roots: a peak and an oscillating autocorrelation +### The model as a first-order system -Take a stable “rotation–contraction” matrix +The second-order Hansen-Samuelson equation can be written as a first-order system: ```{math} -:label: chow_rot +:label: chow_hs_system -A = r -\begin{bmatrix} -\cos \theta & -\sin \theta \\ -\sin \theta & \cos \theta -\end{bmatrix}, -\qquad 0 < r < 1, +\begin{bmatrix} y_{1t} \\ y_{2t} \end{bmatrix} = +\begin{bmatrix} a_{11} & a_{12} \\ 1 & 0 \end{bmatrix} +\begin{bmatrix} y_{1,t-1} \\ y_{2,t-1} \end{bmatrix} + +\begin{bmatrix} u_{1t} \\ 0 \end{bmatrix} ``` -whose eigenvalues are $r e^{\pm i\theta}$. +where $y_{2t} = y_{1,t-1}$ is simply the lagged value of $y_{1t}$. -When $r$ is close to 1, the spectrum shows a pronounced peak near $\omega \approx \theta$. +This structure implies a special relationship among the autocovariances: -```{code-cell} ipython3 -def rotation_contraction(r, θ): - c, s = np.cos(θ), np.sin(θ) - return r * np.array([[c, -s], [s, c]]) +```{math} +:label: chow_hs_autocov_relation -θ = np.pi / 3 -r_values = (0.95, 0.4) -ω_grid = np.linspace(1e-3, np.pi - 1e-3, 800) -V = np.eye(2) +\gamma_{11,k} = \gamma_{22,k} = \gamma_{12,k-1} = \gamma_{21,k+1} +``` -acfs = [] -spectra = [] -for r in r_values: - A = rotation_contraction(r, θ) +Using the autocovariance recursion, Chow shows that this leads to the condition - y = simulate_var1(A, V, T=5000, burn=500, seed=1234) - acfs.append(sample_autocorrelation(y[:, 0], 40)) +```{math} +:label: chow_hs_condition53 - F = spectral_density_var1(A, V, ω_grid) - spectra.append(np.real(F[:, 0, 0])) +\gamma_{11,-1} = d_{11,1} \lambda_1^{-1} + d_{11,2} \lambda_2^{-1} = \gamma_{11,1} = d_{11,1} \lambda_1 + d_{11,2} \lambda_2 +``` + +which constrains the spectral density in a useful way. + +### The spectral density formula + +From equations {eq}`chow_scalar_autocov` and the scalar kernel $g_i(\omega) = (1 - \lambda_i^2)/(1 + \lambda_i^2 - 2\lambda_i \cos\omega)$, the spectral density of $y_{1t}$ is: + +```{math} +:label: chow_hs_spectral + +f_{11}(\omega) = d_{11,1} g_1(\omega) + d_{11,2} g_2(\omega) +``` + +which can be written in the combined form: + +```{math} +:label: chow_hs_spectral_combined + +f_{11}(\omega) = \frac{d_{11,1}(1 - \lambda_1^2)(1 + \lambda_2^2) + d_{11,2}(1 - \lambda_2^2)(1 + \lambda_1^2) - 2[d_{11,1}(1-\lambda_1^2)\lambda_2 + d_{11,2}(1-\lambda_2^2)\lambda_1]\cos\omega}{(1 + \lambda_1^2 - 2\lambda_1 \cos\omega)(1 + \lambda_2^2 - 2\lambda_2 \cos\omega)} +``` + +A key observation: due to condition {eq}`chow_hs_condition53`, the *numerator is not a function of $\cos\omega$*. + +Therefore, to find a maximum of $f_{11}(\omega)$, we need only find a minimum of the denominator. + +### Conditions for a spectral peak + +The first derivative of the denominator with respect to $\omega$ is: + +```{math} +:label: chow_hs_derivative + +2[(1 + \lambda_1^2)\lambda_2 + (1 + \lambda_2^2)\lambda_1] \sin\omega - 8\lambda_1 \lambda_2 \cos\omega \sin\omega +``` + +For $0 < \omega < \pi$, we have $\sin\omega > 0$, so the derivative equals zero if and only if: + +```{math} +:label: chow_hs_foc + +(1 + \lambda_1^2)\lambda_2 + (1 + \lambda_2^2)\lambda_1 = 4\lambda_1 \lambda_2 \cos\omega +``` + +For *complex conjugate roots* $\lambda_1 = r e^{i\theta}$, $\lambda_2 = r e^{-i\theta}$, substitution into {eq}`chow_hs_foc` gives: + +```{math} +:label: chow_hs_peak_condition + +\cos\omega = \frac{1 + r^2}{2r} \cos\theta +``` + +The second derivative confirms this is a maximum when $\omega < \frac{3\pi}{4}$. + +The necessary condition for a valid solution is: + +```{math} +:label: chow_hs_necessary + +-1 < \frac{1 + r^2}{2r} \cos\theta < 1 +``` + +We can interpret it as: +- When $r \approx 1$, the factor $(1+r^2)/2r \approx 1$, so $\omega \approx \theta$ +- When $r$ is small (e.g., 0.3 or 0.4), condition {eq}`chow_hs_necessary` can only be satisfied if $\cos\theta \approx 0$, meaning $\theta \approx \pi/2$ (cycles of approximately 4 periods) + +If $\theta = 54 \degree$ (corresponding to cycles of 6.67 periods) and $r = 0.4$, then $(1+r^2)/2r = 1.45$, giving $\cos\omega = 1.45 \times 0.588 = 0.85$, or $\omega = 31.5 \degree$, corresponding to cycles of 11.4 periods, which is much longer than the deterministic cycle. + +```{code-cell} ipython3 +def peak_condition_factor(r): + """Compute (1 + r^2) / (2r)""" + return (1 + r**2) / (2 * r) + +# Verify Chow's analysis: peak frequency as function of r for fixed θ +θ_deg = 54 +θ = np.deg2rad(θ_deg) +r_grid = np.linspace(0.3, 0.99, 100) + +# For each r, compute the implied peak frequency (if it exists) +ω_peak = [] +for r in r_grid: + factor = peak_condition_factor(r) + cos_omega = factor * np.cos(θ) + if -1 < cos_omega < 1: + ω_peak.append(np.arccos(cos_omega)) + else: + ω_peak.append(np.nan) + +ω_peak = np.array(ω_peak) +period_peak = 2 * np.pi / ω_peak fig, axes = plt.subplots(1, 2, figsize=(12, 4)) -for r, acf in zip(r_values, acfs): - axes[0].plot(range(len(acf)), acf, lw=1.8, label=fr"$r={r}$") -axes[0].axhline(0.0, lw=0.8) -axes[0].set_xlabel("lag") -axes[0].set_ylabel("autocorrelation") +axes[0].plot(r_grid, np.rad2deg(ω_peak), lw=2) +axes[0].axhline(θ_deg, ls='--', lw=1.0, color='gray', label=rf'$\theta = {θ_deg}°$') +axes[0].set_xlabel('eigenvalue modulus $r$') +axes[0].set_ylabel('peak frequency $\omega$ (degrees)') axes[0].legend(frameon=False) -for r, f11 in zip(r_values, spectra): - axes[1].plot(ω_grid / np.pi, f11, lw=1.8, label=fr"$r={r}$") -axes[1].axvline(θ / np.pi, ls="--", lw=1.0, label=r"$\theta/\pi$") -axes[1].set_xlabel(r"frequency $\omega/\pi$") -axes[1].set_ylabel(r"$f_{11}(\omega)$") +axes[1].plot(r_grid, period_peak, lw=2) +axes[1].axhline(360/θ_deg, ls='--', lw=1.0, color='gray', label=rf'deterministic period = {360/θ_deg:.1f}') +axes[1].set_xlabel('eigenvalue modulus $r$') +axes[1].set_ylabel('peak period') axes[1].legend(frameon=False) plt.tight_layout() plt.show() + +# Verify Chow's specific example +r_example = 0.4 +factor = peak_condition_factor(r_example) +cos_omega = factor * np.cos(θ) +omega_example = np.arccos(cos_omega) +print(f"Chow's example: r = {r_example}, θ = {θ_deg}°") +print(f" Factor (1+r²)/2r = {factor:.3f}") +print(f" cos(ω) = {cos_omega:.3f}") +print(f" ω = {np.rad2deg(omega_example):.1f}°") +print(f" Peak period = {360/np.rad2deg(omega_example):.1f} (vs deterministic period = {360/θ_deg:.1f})") ``` -When $r$ is close to 1, the autocorrelation oscillates slowly and the spectrum has a sharp peak near $\theta$. +As $r \to 1$, the peak frequency converges to $\theta$. +For smaller $r$, the peak frequency can differ substantially from the deterministic oscillation frequency. -When $r$ is smaller, oscillations die out quickly and the spectrum is flatter. +### Real positive roots cannot produce peaks -### How shock structure shapes the spectrum +For *real and positive roots* $\lambda_1, \lambda_2 > 0$, the first-order condition {eq}`chow_hs_foc` cannot be satisfied. -Even with the same transition matrix, different shock covariance structures produce different spectral shapes. +To see why, note that we would need: -Here we fix $r = 0.9$ and vary the correlation between the two shocks. +```{math} +:label: chow_hs_real_impossible -```{code-cell} ipython3 -r_fixed = 0.9 -A_fixed = rotation_contraction(r_fixed, θ) -corr_values = [-0.9, 0.0, 0.9] +\cos\omega = \frac{(1 + \lambda_1^2)\lambda_2 + (1 + \lambda_2^2)\lambda_1}{4\lambda_1 \lambda_2} > 1 +``` -fig, ax = plt.subplots(figsize=(9, 4)) -for corr in corr_values: - V_corr = np.array([[1.0, corr], [corr, 1.0]]) - F = spectral_density_var1(A_fixed, V_corr, ω_grid) - f11 = np.real(F[:, 0, 0]) - f11_norm = f11 / np.trapz(f11, ω_grid / np.pi) - ax.plot(ω_grid / np.pi, f11_norm, lw=1.8, label=fr'$\rho = {corr}$') +The inequality follows because: + +```{math} +:label: chow_hs_real_proof + +(1 + \lambda_1^2)\lambda_2 + (1 + \lambda_2^2)\lambda_1 - 4\lambda_1\lambda_2 = \lambda_1(1-\lambda_2)^2 + \lambda_2(1-\lambda_1)^2 > 0 +``` + +which is strictly positive for any $\lambda_1, \lambda_2 > 0$. -ax.axvline(θ / np.pi, ls='--', lw=1.0, color='gray') +This is a key result: In the Hansen-Samuelson model, *complex roots are necessary* for a spectral peak at interior frequencies. + +```{code-cell} ipython3 +# Demonstrate: compare spectra with complex vs real roots +# Both cases use valid Hansen-Samuelson parameterizations +ω_grid = np.linspace(1e-3, np.pi - 1e-3, 800) +V_hs = np.array([[1.0, 0.0], [0.0, 0.0]]) # shock only in first equation + +# Case 1: Complex roots (c=0.6, v=0.8) +# Discriminant = (c+v)² - 4v = 1.96 - 3.2 < 0 → complex roots +c_complex, v_complex = 0.6, 0.8 +A_complex = samuelson_transition(c_complex, v_complex) +eig_complex = np.linalg.eigvals(A_complex) + +# Case 2: Real roots (c=0.8, v=0.1) +# Discriminant = (c+v)² - 4v = 0.81 - 0.4 > 0 → real roots +# Both roots positive and < 1 (stable) +c_real, v_real = 0.8, 0.1 +A_real = samuelson_transition(c_real, v_real) +eig_real = np.linalg.eigvals(A_real) + +print(f"Complex case (c={c_complex}, v={v_complex}): eigenvalues = {eig_complex}") +print(f"Real case (c={c_real}, v={v_real}): eigenvalues = {eig_real}") + +F_complex = spectral_density_var1(A_complex, V_hs, ω_grid) +F_real = spectral_density_var1(A_real, V_hs, ω_grid) + +f11_complex = np.real(F_complex[:, 0, 0]) +f11_real = np.real(F_real[:, 0, 0]) + +fig, ax = plt.subplots() +ax.plot(ω_grid / np.pi, f11_complex / np.max(f11_complex), lw=2, + label=fr'complex roots ($c={c_complex}, v={v_complex}$)') +ax.plot(ω_grid / np.pi, f11_real / np.max(f11_real), lw=2, + label=fr'real roots ($c={c_real}, v={v_real}$)') ax.set_xlabel(r'frequency $\omega/\pi$') ax.set_ylabel('normalized spectrum') ax.legend(frameon=False) plt.show() ``` -The peak location is unchanged, but the peak height depends on the shock correlation. +With complex roots, the spectrum has a clear interior peak. -This illustrates that eigenvalues alone do not determine the full spectral shape. +With real roots, the spectrum is monotonically decreasing and no interior peak is possible. -### Complex roots: an oscillatory mode can be hidden +## Real roots can produce peaks in general models -Complex roots are not sufficient for a visible peak in the spectrum of every observed series. +While real positive roots cannot produce spectral peaks in the Hansen-Samuelson model, {cite:t}`Chow1968` emphasizes that this is *not true in general*. -Even if the state vector contains an oscillatory mode, a variable can be dominated by a non-oscillatory component. +In multivariate systems, the spectral density of a linear combination of variables can have interior peaks even when all eigenvalues are real and positive. -The next example combines a rotation–contraction block with a very persistent real root, and then looks at a mixture that is dominated by the persistent component. +### Chow's example -```{code-cell} ipython3 -A_osc = rotation_contraction(0.95, θ) -A = np.block([ - [A_osc, np.zeros((2, 1))], - [np.zeros((1, 2)), np.array([[0.99]])] -]) +Chow constructs the following explicit example with two real positive eigenvalues: -# shocks hit the persistent component much more strongly -V = np.diag([1.0, 1.0, 50.0]) +```{math} +:label: chow_real_roots_example -ω_grid_big = np.linspace(1e-3, np.pi - 1e-3, 1200) -F = spectral_density_var1(A, V, ω_grid_big) +\lambda_1 = 0.1, \quad \lambda_2 = 0.9 +``` -x_grid = ω_grid_big / np.pi -f_y1 = np.real(F[:, 0, 0]) +```{math} +:label: chow_real_roots_W -b = np.array([0.05, 0.0, 1.0]) -f_mix = spectrum_of_linear_combination(F, b) +w_{11} = w_{22} = 1, \quad w_{12} = 0.8 +``` -f_y1_norm = f_y1 / np.trapz(f_y1, x_grid) -f_mix_norm = f_mix / np.trapz(f_mix, x_grid) +```{math} +:label: chow_real_roots_b -fig, ax = plt.subplots(figsize=(9, 4)) -ax.plot(x_grid, f_y1_norm, lw=1.8, label=r"$y_1$") -ax.plot(x_grid, f_mix_norm, lw=1.8, label=r"$x = 0.05\,y_1 + y_3$") -ax.set_xlabel(r"frequency $\omega/\pi$") -ax.set_ylabel("normalized spectrum") -ax.legend(frameon=False) -plt.show() +b_{m1} = 1, \quad b_{m2} = -0.01 ``` -Here the oscillatory mode is still present (the $y_1$ spectrum peaks away from zero), but the mixture $x$ is dominated by the near-unit root and hence by very low frequencies. +The spectral density of the linear combination $x_t = b_m^\top y_t$ is: -### Real roots: a peak from mixing shocks +```{math} +:label: chow_real_roots_spectrum + +f_{mm}(\omega) = \frac{0.9913}{1.01 - 0.2\cos\omega} - \frac{0.001570}{1.81 - 1.8\cos\omega} +``` -Chow also constructs examples where all roots are real and positive yet a linear combination displays a local spectral peak. +Chow tabulates the values: -The mechanism is that cross-correlation in shocks can generate cyclical-looking behavior. +| $\omega$ | $0$ | $\pi/8$ | $2\pi/8$ | $3\pi/8$ | $4\pi/8$ | $5\pi/8$ | $6\pi/8$ | $7\pi/8$ | $\pi$ | +|----------|-----|---------|----------|----------|----------|----------|----------|----------|-------| +| $f_{mm}(\omega)$ | 1.067 | 1.183 | 1.191 | 1.138 | 1.061 | 0.981 | 0.912 | 0.860 | 0.829 | -Here is a close analog of Chow’s two-root illustration. +The peak at $\omega$ slightly below $\pi/8$ (corresponding to periods around 11) is "quite pronounced." -```{code-cell} ipython3 -A = np.diag([0.1, 0.9]) -V = np.array([[1.0, 0.8], [0.8, 1.0]]) -b = np.array([1.0, -0.01]) +In the following figure, we reproduce this table, but with Python, we can plot a finer grid to find the peak more accurately. -F = spectral_density_var1(A, V, ω_grid) -f_x = spectrum_of_linear_combination(F, b) -imax = np.argmax(f_x) -ω_star = ω_grid[imax] -period_star = 2 * np.pi / ω_star +```{code-cell} ipython3 +# Reproduce Chow's exact example +λ1, λ2 = 0.1, 0.9 +w11, w22, w12 = 1.0, 1.0, 0.8 +bm1, bm2 = 1.0, -0.01 + +# Construct the system +A_chow_ex = np.diag([λ1, λ2]) +# W is the canonical shock covariance; we need V = B W B^T +# For diagonal A with distinct eigenvalues, B = I, so V = W +V_chow_ex = np.array([[w11, w12], [w12, w22]]) +b_chow_ex = np.array([bm1, bm2]) + +# Chow's formula (equation 67) +def chow_spectrum_formula(ω): + term1 = 0.9913 / (1.01 - 0.2 * np.cos(ω)) + term2 = 0.001570 / (1.81 - 1.8 * np.cos(ω)) + return term1 - term2 + +# Compute via formula and via our general method +ω_table = np.array([0, np.pi/8, 2*np.pi/8, 3*np.pi/8, 4*np.pi/8, + 5*np.pi/8, 6*np.pi/8, 7*np.pi/8, np.pi]) +f_formula = np.array([chow_spectrum_formula(ω) for ω in ω_table]) + +# General method +ω_grid_fine = np.linspace(1e-4, np.pi, 1000) +F_chow_ex = spectral_density_var1(A_chow_ex, V_chow_ex, ω_grid_fine) +f_general = spectrum_of_linear_combination(F_chow_ex, b_chow_ex) + +# Normalize to match Chow's table scale +scale = f_formula[0] / spectrum_of_linear_combination( + spectral_density_var1(A_chow_ex, V_chow_ex, np.array([0.0])), b_chow_ex)[0] + +print("Chow's Table (equation 67):") +print("ω/π: ", " ".join([f"{ω/np.pi:.3f}" for ω in ω_table])) +print("f_mm(ω): ", " ".join([f"{f:.3f}" for f in f_formula])) fig, ax = plt.subplots(figsize=(9, 4)) -ax.plot(ω_grid / np.pi, f_x) -ax.scatter([ω_star / np.pi], [f_x[imax]], zorder=3) -ax.set_xlabel(r"frequency $\omega/\pi$") -ax.set_ylabel(r"$f_x(\omega)$") +ax.plot(ω_grid_fine / np.pi, f_general * scale, lw=2, label='spectrum') +ax.scatter(ω_table / np.pi, f_formula, s=50, zorder=3, label="Chow's table values") + +# Mark the peak +i_peak = np.argmax(f_general) +ω_peak = ω_grid_fine[i_peak] +ax.axvline(ω_peak / np.pi, ls='--', lw=1.0, color='gray', alpha=0.7) +ax.set_xlabel(r'frequency $\omega/\pi$') +ax.set_ylabel(r'$f_{mm}(\omega)$') +ax.legend(frameon=False) plt.show() -print(f"peak period ≈ {period_star:.1f}") + +print(f"\nPeak at ω/π ≈ {ω_peak/np.pi:.3f}, period ≈ {2*np.pi/ω_peak:.1f}") +``` + +### The Slutsky connection + +Chow connects this result to Slutsky's well-known finding that taking moving averages of a random series can generate cycles. + +The VAR(1) model can be written as an infinite moving average: + +```{math} +:label: chow_ma_rep + +y_t = u_t + A u_{t-1} + A^2 u_{t-2} + \cdots ``` -The lesson is the same as Chow’s: in multivariate stochastic systems, “cycle-like” spectra are shaped not only by eigenvalues, but also by how shocks enter ($V$) and how variables combine (the analogue of Chow’s eigenvector matrix). +This amounts to taking an infinite moving average of the random vectors $u_t$ with "geometrically declining" weights $A^0, A^1, A^2, \ldots$ + +For a scalar process with $0 < \lambda < 1$, no distinct cycles can emerge. +But for a matrix $A$ with real roots between 0 and 1, cycles **can** emerge in linear combinations of the variables. + +As Chow puts it: "When neither of two (canonical) variables has distinct cycles... a linear combination can have a peak in its spectral density." + +### The general lesson + +The examples above illustrate Chow's central point: + +1. In the *Hansen-Samuelson model specifically*, complex roots are necessary for a spectral peak +2. But in *general multivariate systems*, complex roots are neither necessary nor sufficient +3. The full spectral shape depends on: + - The eigenvalues of $A$ + - The shock covariance structure $V$ + - How the observable of interest loads on the eigenmodes (the vector $b$) ## A calibrated model in the frequency domain -Chow and Levitan {cite}`ChowLevitan1969` use the frequency-domain objects from {cite}`Chow1968` to study a calibrated annual macroeconometric model. +{cite:t}`ChowLevitan1969` use the frequency-domain objects from {cite:t}`Chow1968` to study a calibrated annual macroeconometric model. They work with five annual aggregates @@ -720,20 +1094,20 @@ def paper_frequency_axis(ax): # Normalized spectra (areas set to 1) S = np.real(np.diagonal(F_chow, axis1=1, axis2=2))[:, :5] # y1..y5 -areas = np.trapz(S, freq, axis=0) +areas = np.trapezoid(S, freq, axis=0) S_norm = S / areas mask = freq >= 0.0 fig, axes = plt.subplots(1, 2, figsize=(10, 6)) # Figure I.1: consumption (log scale) -axes[0].plot(freq[mask], S_norm[mask, 0], lw=1.8) +axes[0].plot(freq[mask], S_norm[mask, 0], lw=2) axes[0].set_yscale('log') paper_frequency_axis(axes[0]) axes[0].set_ylabel(r'normalized $f_{11}(\omega)$') # Figure I.2: equipment + inventories (log scale) -axes[1].plot(freq[mask], S_norm[mask, 1], lw=1.8) +axes[1].plot(freq[mask], S_norm[mask, 1], lw=2) axes[1].set_yscale('log') paper_frequency_axis(axes[1]) axes[1].set_ylabel(r'normalized $f_{22}(\omega)$') @@ -808,9 +1182,9 @@ for idx, var_idx in enumerate([0, 1, 2]): ax = axes[idx] ax.plot(freq[mask], coherence[mask], - lw=1.8, label=rf'$R^2_{{{var_idx+1}5}}(\omega)$') + lw=2, label=rf'$R^2_{{{var_idx+1}5}}(\omega)$') ax.plot(freq[mask], gain[mask], - lw=1.8, label=rf'$G_{{{var_idx+1}5}}(\omega)$') + lw=2, label=rf'$G_{{{var_idx+1}5}}(\omega)$') paper_frequency_axis(ax) ax.set_ylim([0, 1.0]) @@ -842,7 +1216,7 @@ labels = [r'$\psi_{15}(\omega)/2\pi$', r'$\psi_{25}(\omega)/2\pi$', for var_idx in range(4): coherence, gain, phase = cross_spectral_measures(F_chow, var_idx, gnp_idx) phase_cycles = phase / (2 * np.pi) - ax.plot(freq[mask], phase_cycles[mask], lw=1.8, label=labels[var_idx]) + ax.plot(freq[mask], phase_cycles[mask], lw=2, label=labels[var_idx]) ax.axhline(0, lw=0.8) paper_frequency_axis(ax) @@ -890,7 +1264,7 @@ for i, λ_i in enumerate(λ[:4]): if np.abs(λ_i) > 0.01: g_i = scalar_kernel(λ_i, ω_grid) label = f'$\\lambda_{i+1}$ = {λ_i:.4f}' if np.isreal(λ_i) else f'$\\lambda_{i+1}$ = {λ_i:.3f}' - ax.semilogy(freq, g_i, label=label, lw=1.5) + ax.semilogy(freq, g_i, label=label, lw=2) ax.set_xlabel(r'frequency $\omega/2\pi$') ax.set_ylabel('$g_i(\\omega)$') ax.set_xlim([1/18, 0.5]) @@ -924,59 +1298,38 @@ The calibrated model reveals three patterns: (1) most variance sits at very low ## Wrap-up -Chow {cite}`Chow1968` emphasizes two complementary diagnostics for linear macro models: how eigenvalues shape deterministic propagation, and how spectra summarize stochastic dynamics. - -Chow and Levitan {cite}`ChowLevitan1969` then show what these objects look like in a calibrated system: strong low-frequency power, frequency-dependent gains/coherences, and lead–lag relations that vary with the cycle length. - -To connect this to data, pair the model-implied objects here with the advanced lecture [Estimation of Spectra](https://python-advanced.quantecon.org/estspec.html#). - -## A structural view of acceleration +{cite:t}`Chow1968` draws several conclusions that remain relevant for understanding business cycles: -Chow {cite}`Chow1968` provides a structural interpretation of how acceleration enters the model. +1. **Empirical support for acceleration**: The acceleration principle, as formulated through stock-adjustment equations, receives strong empirical support from investment data. The negative coefficient on lagged output levels is a robust empirical finding. -The starting point is a stock-adjustment demand for capital: +2. **Acceleration is necessary for deterministic oscillations**: In a model consisting only of demand equations with simple distributed lags, the transition matrix has real positive roots (under natural sign restrictions), and hence no prolonged oscillations can occur. Acceleration introduces the possibility of complex roots. -```{math} -:label: chow_stock_adj_struct - -s_{it} = a_i Y_t + b_i s_{i,t-1} -``` +3. **Complex roots are neither necessary nor sufficient for stochastic cycles**: While complex roots in the deterministic model guarantee oscillatory autocovariances, they are neither necessary nor sufficient for a pronounced spectral peak. In the Hansen-Samuelson model specifically, complex roots *are* necessary for a spectral peak. But in general multivariate systems, real roots can produce peaks through the interaction of shocks and eigenvector loadings. -where $s_{it}$ is the desired stock of capital type $i$, $Y_t$ is aggregate output, and $(a_i, b_i)$ are parameters. +4. **An integrated view is essential**: As Chow concludes, "an obvious moral is that the nature of business cycles can be understood only by an integrated view of the deterministic as well as the random elements." -Net investment is the stock change: +{cite:t}`ChowLevitan1969` then show what these objects look like in a calibrated system: strong low-frequency power (reflecting near-unit eigenvalues), frequency-dependent gains/coherences, and lead–lag relations that vary with the cycle length. -```{math} -:label: chow_net_inv +On the empirical side, Granger has noted a "typical spectral shape" for economic time series—a monotonically decreasing function of frequency. -y^n_{it} = \Delta s_{it} = a_i \Delta Y_t + b_i y^n_{i,t-1}. -``` +The Chow-Levitan calibration is consistent with this shape, driven by the near-unit eigenvalues. -For gross investment with depreciation rate $\delta_i$: +But as Chow emphasizes, understanding whether this shape reflects the true data-generating process requires analyzing the spectral densities implied by structural econometric models. -```{math} -:label: chow_gross_inv - -y_{it} = a_i [Y_t - (1-\delta_i) Y_{t-1}] + b_i y_{i,t-1}. -``` - -The parameters $(a_i, b_i, \delta_i)$ are the key "acceleration equation" parameters. - -The term $a_i \Delta Y_t$ is the acceleration effect: investment responds to *changes* in output, not just levels. - -This creates negative coefficients on lagged output levels, which in turn makes complex roots (and hence oscillatory components) possible in the characteristic equation. +To connect this to data, pair the model-implied objects here with the advanced lecture {doc}`advanced:estspec`. ## Exercises ```{exercise} :label: chow_cycles_ex1 -In the rotation-contraction example, fix $\theta$ and vary $r$ in a grid between $0.2$ and $0.99$. +Verify Chow's spectral peak condition {eq}`chow_hs_peak_condition` numerically for the Hansen-Samuelson model. -1. For each $r$, compute the frequency $\omega^*(r)$ that maximizes $f_{11}(\omega)$. -2. Plot $\omega^*(r)$ and the implied peak period $2\pi/\omega^*(r)$ as functions of $r$. - -How does the peak location behave as $r \uparrow 1$? +1. For a range of eigenvalue moduli $r \in [0.3, 0.99]$ with fixed $\theta = 60°$, compute: + - The theoretical peak frequency from Chow's formula: $\cos\omega = \frac{1+r^2}{2r}\cos\theta$ + - The actual peak frequency by numerically maximizing the spectral density +2. Plot both on the same graph and verify they match. +3. Identify the range of $r$ for which no valid peak exists (when the condition {eq}`chow_hs_necessary` is violated). ``` ```{solution-start} chow_cycles_ex1 @@ -984,40 +1337,72 @@ How does the peak location behave as $r \uparrow 1$? ``` ```{code-cell} ipython3 -r_grid = np.linspace(0.2, 0.99, 50) -θ = np.pi / 3 +θ_ex = np.pi / 3 # 60 degrees +r_grid = np.linspace(0.3, 0.99, 50) ω_grid_ex = np.linspace(1e-3, np.pi - 1e-3, 1000) -V_ex = np.eye(2) +V_hs_ex = np.array([[1.0, 0.0], [0.0, 0.0]]) + +ω_theory = [] +ω_numerical = [] -ω_star = np.zeros(len(r_grid)) -period_star = np.zeros(len(r_grid)) -for idx, r in enumerate(r_grid): - A_ex = rotation_contraction(r, θ) - F_ex = spectral_density_var1(A_ex, V_ex, ω_grid_ex) +for r in r_grid: + # Theoretical peak from Chow's formula + factor = (1 + r**2) / (2 * r) + cos_omega = factor * np.cos(θ_ex) + if -1 < cos_omega < 1: + ω_theory.append(np.arccos(cos_omega)) + else: + ω_theory.append(np.nan) + + # Numerical peak from spectral density + # Construct Hansen-Samuelson with eigenvalues r*exp(±iθ) + # This corresponds to c + v = 2r*cos(θ), v = r² + v = r**2 + c = 2 * r * np.cos(θ_ex) - v + A_ex = samuelson_transition(c, v) + F_ex = spectral_density_var1(A_ex, V_hs_ex, ω_grid_ex) f11 = np.real(F_ex[:, 0, 0]) i_max = np.argmax(f11) - ω_star[idx] = ω_grid_ex[i_max] - period_star[idx] = 2 * np.pi / ω_star[idx] + # Only count as a peak if it's not at the boundary + if 5 < i_max < len(ω_grid_ex) - 5: + ω_numerical.append(ω_grid_ex[i_max]) + else: + ω_numerical.append(np.nan) + +ω_theory = np.array(ω_theory) +ω_numerical = np.array(ω_numerical) fig, axes = plt.subplots(1, 2, figsize=(12, 4)) -axes[0].plot(r_grid, ω_star / np.pi, lw=1.8) -axes[0].axhline(θ / np.pi, ls='--', lw=1.0, label=r'$\theta/\pi$') -axes[0].set_xlabel('$r$') -axes[0].set_ylabel(r'$\omega^*/\pi$') + +# Plot peak frequencies +axes[0].plot(r_grid, ω_theory / np.pi, lw=2, label="Chow's formula") +axes[0].plot(r_grid, ω_numerical / np.pi, 'o', markersize=4, label='numerical') +axes[0].axhline(θ_ex / np.pi, ls='--', lw=1.0, color='gray', label=r'$\theta/\pi$') +axes[0].set_xlabel('eigenvalue modulus $r$') +axes[0].set_ylabel(r'peak frequency $\omega^*/\pi$') axes[0].legend(frameon=False) -axes[1].plot(r_grid, period_star, lw=1.8) -axes[1].axhline(2 * np.pi / θ, ls='--', lw=1.0, label=r'$2\pi/\theta$') -axes[1].set_xlabel('$r$') -axes[1].set_ylabel('peak period') +# Plot the factor (1+r²)/2r to show when peaks are valid +axes[1].plot(r_grid, (1 + r_grid**2) / (2 * r_grid), lw=2) +axes[1].axhline(1 / np.cos(θ_ex), ls='--', lw=1.0, color='red', + label=f'threshold = 1/cos({np.rad2deg(θ_ex):.0f}°) = {1/np.cos(θ_ex):.2f}') +axes[1].set_xlabel('eigenvalue modulus $r$') +axes[1].set_ylabel(r'$(1+r^2)/2r$') axes[1].legend(frameon=False) + plt.tight_layout() plt.show() -``` -As $r \uparrow 1$, the peak frequency converges to $\theta$ (the argument of the complex eigenvalue). +# Find threshold r below which no peak exists +valid_mask = ~np.isnan(ω_theory) +if valid_mask.any(): + r_threshold = r_grid[valid_mask][0] + print(f"Peak exists for r ≥ {r_threshold:.2f}") +``` -This confirms Chow's insight: when the modulus is close to 1, the spectral peak aligns with the eigenvalue frequency. +The theoretical and numerical peak frequencies match closely. +As $r \to 1$, the peak frequency converges to $\theta$. +For smaller $r$, the factor $(1+r^2)/2r$ exceeds the threshold, and no valid peak exists. ```{solution-end} ``` @@ -1050,7 +1435,7 @@ for corr in corr_grid: peak_periods.append(np.nan) fig, ax = plt.subplots(figsize=(8, 4)) -ax.plot(corr_grid, peak_periods, marker='o', lw=1.8, markersize=4) +ax.plot(corr_grid, peak_periods, marker='o', lw=2, markersize=4) ax.set_xlabel('shock correlation') ax.set_ylabel('peak period') plt.show() @@ -1139,10 +1524,10 @@ var_labels = ["consumption", "equipment + inventories", "construction", "long ra for i in range(5): f_orig = np.real(F_chow[:, i, i]) f_mod = np.real(F_mod[:, i, i]) - f_orig_norm = f_orig / np.trapz(f_orig, freq) - f_mod_norm = f_mod / np.trapz(f_mod, freq) - axes[i].semilogy(freq, f_orig_norm, lw=1.5, label=r"original ($\lambda_3=0.48$)") - axes[i].semilogy(freq, f_mod_norm, lw=1.5, ls="--", label=r"modified ($\lambda_3=0.95$)") + f_orig_norm = f_orig / np.trapezoid(f_orig, freq) + f_mod_norm = f_mod / np.trapezoid(f_mod, freq) + axes[i].semilogy(freq, f_orig_norm, lw=2, label=r"original ($\lambda_3=0.48$)") + axes[i].semilogy(freq, f_mod_norm, lw=2, ls="--", label=r"modified ($\lambda_3=0.95$)") paper_frequency_axis(axes[i]) axes[i].set_ylabel(rf"normalized $f_{{{i+1}{i+1}}}(\omega)$") axes[i].text(0.03, 0.08, var_labels[i], transform=axes[i].transAxes) From 4726b27a7661c6f026cd0c778915297186e18e64 Mon Sep 17 00:00:00 2001 From: Humphrey Yang Date: Sat, 7 Feb 2026 15:55:00 +1100 Subject: [PATCH 03/19] updates --- lectures/chow_business_cycles.md | 772 +++++++++++++++++-------------- 1 file changed, 422 insertions(+), 350 deletions(-) diff --git a/lectures/chow_business_cycles.md b/lectures/chow_business_cycles.md index 393119345..396a4de55 100644 --- a/lectures/chow_business_cycles.md +++ b/lectures/chow_business_cycles.md @@ -32,7 +32,7 @@ kernelspec: This lecture studies two classic papers by Gregory Chow on business cycles in linear dynamic models: - {cite}`Chow1968`: empirical evidence for the acceleration principle, why acceleration enables oscillations, and when spectral peaks arise in stochastic systems -- {cite}`ChowLevitan1969`: spectral analysis of a calibrated U.S. macroeconometric model, showing gains, coherences, and lead-lag patterns +- {cite}`ChowLevitan1969`: spectral analysis of a calibrated US macroeconometric model, showing gains, coherences, and lead–lag patterns These papers connect ideas in the following lectures: @@ -41,7 +41,7 @@ These papers connect ideas in the following lectures: - Eigenmodes of multivariate dynamics in {doc}`var_dmd` - Fourier ideas in {doc}`eig_circulant` (and, for empirical estimation, the advanced lecture {doc}`advanced:estspec`) -{cite:t}`Chow1968` builds on earlier empirical work testing the acceleration principle on U.S. investment data. +{cite:t}`Chow1968` builds on earlier empirical work testing the acceleration principle on US investment data. We begin with that empirical foundation before developing the theoretical framework. @@ -51,19 +51,52 @@ We will keep coming back to three ideas: - In stochastic models, a "cycle" shows up as a local peak in a (univariate) spectral density. - Spectral peaks depend on eigenvalues, but also on how shocks enter (the covariance matrix $V$) and on how observables load on eigenmodes. -In this lecture, we start with Chow's empirical evidence for the acceleration principle, then introduce the VAR(1) framework and spectral analysis tools. +Let's start with some standard imports: -Next, we show why acceleration creates complex roots that enable oscillations, and derive Chow's conditions for spectral peaks in the Hansen-Samuelson model. +```{code-cell} ipython3 +import numpy as np +import matplotlib.pyplot as plt +``` -We then present Chow's striking counterexample: real roots *can* produce spectral peaks in general multivariate systems. +We will use the following helper functions throughout the lecture: -Finally, we apply these tools to the calibrated Chow-Levitan model to see what model-implied spectra look like in practice. +```{code-cell} ipython3 +def spectral_density_var1(A, V, ω_grid): + """Spectral density matrix for VAR(1): y_t = A y_{t-1} + u_t.""" + A, V = np.asarray(A), np.asarray(V) + n = A.shape[0] + I = np.eye(n) + F = np.empty((len(ω_grid), n, n), dtype=complex) + for k, ω in enumerate(ω_grid): + H = np.linalg.inv(I - np.exp(-1j * ω) * A) + F[k] = (H @ V @ H.conj().T) / (2 * np.pi) + return F -Let's start with some standard imports +def spectrum_of_linear_combination(F, b): + """Spectrum of x_t = b'y_t given the spectral matrix F(ω).""" + b = np.asarray(b).reshape(-1, 1) + return np.array([np.real((b.T @ F[k] @ b).item()) for k in range(F.shape[0])]) -```{code-cell} ipython3 -import numpy as np -import matplotlib.pyplot as plt +def simulate_var1(A, V, T, burn=200, seed=1234): + r"""Simulate y_t = A y_{t-1} + u_t with u_t \sim N(0, V).""" + rng = np.random.default_rng(seed) + A, V = np.asarray(A), np.asarray(V) + n = A.shape[0] + chol = np.linalg.cholesky(V) + y = np.zeros((T + burn, n)) + for t in range(1, T + burn): + y[t] = A @ y[t - 1] + chol @ rng.standard_normal(n) + return y[burn:] + +def sample_autocorrelation(x, max_lag): + """Sample autocorrelation of a 1d array from lag 0 to max_lag.""" + x = np.asarray(x) + x = x - x.mean() + denom = np.dot(x, x) + acf = np.empty(max_lag + 1) + for k in range(max_lag + 1): + acf[k] = np.dot(x[:-k] if k else x, x[k:]) / denom + return acf ``` (empirical_section)= @@ -81,7 +114,7 @@ In each case, when the regression included both $Y_t$ and $Y_{t-1}$ (where $Y$ i Equivalently, when expressed in terms of $\Delta Y_t$ and $Y_{t-1}$, the coefficient on $Y_{t-1}$ was a small fraction of the coefficient on $\Delta Y_t$. -### An example: Automobile demand +### An example: automobile demand Chow presents a clean illustration using data on net investment in automobiles from his earlier work on automobile demand. @@ -125,17 +158,149 @@ Net investment is the change in stock, $y_{it}^n = \Delta s_{it}$, and differenc y_{it}^n = a_i \Delta Y_t + b_i y_{i,t-1}^n ``` -The coefficients on $Y_t$ and $Y_{t-1}$ in the level form are $a_i$ and $-a_i(1-b_i)$ respectively. +The coefficients on $Y_t$ and $Y_{t-1}$ in the level form are $a_i$ and $-a_i(1-b_i)$ respectively. They are opposite in sign and similar in magnitude when $b_i$ is not too far from unity. This connection between stock adjustment and acceleration is central to Chow's argument about why acceleration matters for business cycles. -## A linear system with shocks +## Acceleration enables oscillations + +Having established the empirical evidence for acceleration, we now examine why it matters theoretically for generating oscillations. + +{cite:t}`Chow1968` asks a fundamental question: if we build a macro model using only standard demand equations with simple distributed lags, can the system generate sustained oscillations? + +He shows that, under natural sign restrictions, the answer is no. -To study business cycles formally, we need a framework that combines the deterministic dynamics (captured by the transition matrix $A$) with random shocks. +Stock-adjustment demand for durable goods leads to investment equations where the coefficient on $Y_{t-1}$ is negative—the **acceleration effect**. -Both papers analyze (or reduce to) a first-order linear stochastic system +This negative coefficient is what makes complex roots possible in the characteristic equation. + +Without it, Chow proves that demand systems with only positive coefficients have real positive roots, and hence no oscillatory dynamics. + +The {doc}`samuelson` lecture explores this mechanism in detail through the Hansen-Samuelson multiplier-accelerator model. + +Here we briefly illustrate the effect. + +Take the multiplier–accelerator law of motion: + +```{math} +Y_t = c Y_{t-1} + v (Y_{t-1} - Y_{t-2}), +``` + +and rewrite it as a first-order system in $(Y_t, Y_{t-1})$. + +```{code-cell} ipython3 +def samuelson_transition(c, v): + return np.array([[c + v, -v], [1.0, 0.0]]) + +# Compare weak vs strong acceleration +# Weak: c=0.8, v=0.1 gives real roots (discriminant > 0) +# Strong: c=0.6, v=0.8 gives complex roots (discriminant < 0) +cases = [("weak acceleration", 0.8, 0.1), + ("strong acceleration", 0.6, 0.8)] +A_list = [samuelson_transition(c, v) for _, c, v in cases] + +for (label, c, v), A in zip(cases, A_list): + eig = np.linalg.eigvals(A) + disc = (c + v)**2 - 4*v + print(f"{label}: c={c}, v={v}, discriminant={disc:.2f}, eigenvalues={eig}") +``` + +With weak acceleration ($v=0.1$), the discriminant is positive and the roots are real. + +With strong acceleration ($v=0.8$), the discriminant is negative and the roots are complex conjugates, enabling oscillatory dynamics. + +```{code-cell} ipython3 +# impulse responses from a one-time unit shock in Y +T = 40 +s0 = np.array([1.0, 0.0]) +irfs = [] +for A in A_list: + s = s0.copy() + path = np.empty(T + 1) + for t in range(T + 1): + path[t] = s[0] + s = A @ s + irfs.append(path) + +fig, ax = plt.subplots(figsize=(10, 4)) +ax.plot(range(T + 1), irfs[0], lw=2, + label="weak acceleration (real roots)") +ax.plot(range(T + 1), irfs[1], lw=2, + label="strong acceleration (complex roots)") +ax.axhline(0.0, lw=0.8, color='gray') +ax.set_xlabel("time") +ax.set_ylabel(r"$Y_t$") +ax.legend(frameon=False) +plt.tight_layout() +plt.show() +``` + +With weak acceleration, the impulse response decays monotonically. + +With strong acceleration, it oscillates. + +We can ask how the eigenvalues change as we increase the accelerator $v$. + +As we increase the accelerator $v$, the eigenvalues move further from the origin. + +For this model, the eigenvalue modulus is $|\lambda| = \sqrt{v}$, so the stability boundary is $v = 1$. + +```{code-cell} ipython3 +v_grid = [0.2, 0.4, 0.6, 0.8, 0.95] +c = 0.6 +T_irf = 40 # periods for impulse response + +fig, axes = plt.subplots(1, 2, figsize=(12, 5)) + +for v in v_grid: + A = samuelson_transition(c, v) + eig = np.linalg.eigvals(A) + + # Eigenvalues (left panel) + axes[0].scatter(eig.real, eig.imag, s=40, label=f'$v={v}$') + + # Impulse response (right panel) + s = np.array([1.0, 0.0]) + irf = np.empty(T_irf + 1) + for t in range(T_irf + 1): + irf[t] = s[0] + s = A @ s + axes[1].plot(range(T_irf + 1), irf, lw=2, label=f'$v={v}$') + +# Eigenvalue panel with unit circle +θ_circle = np.linspace(0, 2*np.pi, 100) +axes[0].plot(np.cos(θ_circle), np.sin(θ_circle), + 'k--', lw=0.8, label='unit circle') +axes[0].set_xlabel('real part') +axes[0].set_ylabel('imaginary part') +axes[0].set_aspect('equal') +axes[0].legend(frameon=False) + +# impulse response panel +axes[1].axhline(0, lw=0.8, color='gray') +axes[1].set_xlabel('time') +axes[1].set_ylabel(r'$Y_t$') +axes[1].legend(frameon=False) + +plt.tight_layout() +plt.show() +``` + +As $v$ increases, eigenvalues approach the unit circle and oscillations become more persistent. + +This illustrates that acceleration creates complex eigenvalues, which are necessary for oscillatory dynamics in deterministic systems. + +But what happens when we add random shocks? + +Frisch's insight was that even damped oscillations can be "maintained" when the system is continuously perturbed by random disturbances. + +To study this formally, we need to introduce the stochastic framework. + +## A linear system with shocks + +We analyze (or reduce to) a first-order linear stochastic system ```{math} :label: chow_var1 @@ -173,11 +338,15 @@ Standard calculations (also derived in {cite}`Chow1968`) give the recursion The second equation is the discrete Lyapunov equation for $\Gamma_0$. -### Why stochastic dynamics matter - {cite:t}`Chow1968` motivates the stochastic analysis with a quote from Ragnar Frisch: -> The examples we have discussed ... show that when an [deterministic] economic system gives rise to oscillations, these will most frequently be damped. But in reality the cycles ... are generally not damped. How can the maintenance of the swings be explained? ... One way which I believe is particularly fruitful and promising is to study what would become of the solution of a determinate dynamic system if it were exposed to a stream of erratic shocks ... +> The examples we have discussed ... show that when a [deterministic] economic system gives rise to oscillations, these will most frequently be damped. +> +> But in reality the cycles ... are generally not damped. +> +> How can the maintenance of the swings be explained? +> +> ... One way which I believe is particularly fruitful and promising is to study what would become of the solution of a determinate dynamic system if it were exposed to a stream of erratic shocks ... > > Thus, by connecting the two ideas: (1) the continuous solution of a determinate dynamic system and (2) the discontinuous shocks intervening and supplying the energy that may maintain the swings—we get a theoretical setup which seems to furnish a rational interpretation of those movements which we have been accustomed to see in our statistical time data. > @@ -202,7 +371,8 @@ A B = B D_\lambda, \quad \text{or equivalently} \quad A = B D_\lambda B^{-1} where $D_\lambda = \text{diag}(\lambda_1, \ldots, \lambda_p)$. Define canonical variables $z_t = B^{-1} y_t$. -These satisfy the decoupled dynamics + +These satisfy the decoupled dynamics: ```{math} :label: chow_canonical_dynamics @@ -256,8 +426,6 @@ y_{it} = \sum_j b_{ij} z_{j0} \lambda_j^t Both the autocovariance function {eq}`chow_scalar_autocov` and the deterministic path {eq}`chow_det_path` are linear combinations of $\lambda_m^k$ (or $\lambda_j^t$). -This formal resemblance is important: the coefficients differ (depending on initial conditions vs. shock covariances), but the role of eigenvalues is analogous. - ### Complex roots and damped oscillations When eigenvalues come in complex conjugate pairs $\lambda = r e^{\pm i\theta}$ with $r < 1$, their contribution to the autocovariance function is a **damped cosine**: @@ -271,9 +439,10 @@ When eigenvalues come in complex conjugate pairs $\lambda = r e^{\pm i\theta}$ w for appropriate amplitude $s$ and phase $\phi$ determined by the eigenvector loadings. In the deterministic model, such complex roots generate damped oscillatory time paths. + In the stochastic model, they generate damped oscillatory autocovariance functions. -It is in this sense that deterministic oscillations could be "maintained" in the stochastic model—but as we will see, the connection between eigenvalues and spectral peaks is more subtle than this suggests. +It is in this sense that deterministic oscillations could be "maintained" in the stochastic model, but as we will see, the connection between eigenvalues and spectral peaks is more subtle than this suggests. ## From autocovariances to spectra @@ -314,223 +483,19 @@ The advanced lecture {doc}`advanced:estspec` explains how to estimate $F(\omega) Here we focus on the model-implied spectrum. -We will use the following helper functions throughout the lecture. - -```{code-cell} ipython3 -def spectral_density_var1(A, V, ω_grid): - """Spectral density matrix for VAR(1): y_t = A y_{t-1} + u_t.""" - A, V = np.asarray(A), np.asarray(V) - n = A.shape[0] - I = np.eye(n) - F = np.empty((len(ω_grid), n, n), dtype=complex) - for k, ω in enumerate(ω_grid): - H = np.linalg.inv(I - np.exp(-1j * ω) * A) - F[k] = (H @ V @ H.conj().T) / (2 * np.pi) - return F - -def spectrum_of_linear_combination(F, b): - """Spectrum of x_t = b'y_t given the spectral matrix F(ω).""" - b = np.asarray(b).reshape(-1, 1) - return np.array([np.real((b.T @ F[k] @ b).item()) for k in range(F.shape[0])]) - -def simulate_var1(A, V, T, burn=200, seed=1234): - r"""Simulate y_t = A y_{t-1} + u_t with u_t \sim N(0, V).""" - rng = np.random.default_rng(seed) - A, V = np.asarray(A), np.asarray(V) - n = A.shape[0] - chol = np.linalg.cholesky(V) - y = np.zeros((T + burn, n)) - for t in range(1, T + burn): - y[t] = A @ y[t - 1] + chol @ rng.standard_normal(n) - return y[burn:] - -def sample_autocorrelation(x, max_lag): - """Sample autocorrelation of a 1d array from lag 0 to max_lag.""" - x = np.asarray(x) - x = x - x.mean() - denom = np.dot(x, x) - acf = np.empty(max_lag + 1) - for k in range(max_lag + 1): - acf[k] = np.dot(x[:-k] if k else x, x[k:]) / denom - return acf -``` - -## Deterministic propagation and acceleration - -Now we have the tools and the motivation to analyze spectral peaks in linear stochastic systems. - -We first go back to the deterministic system to understand why acceleration matters for generating oscillations in the first place. - -Before analyzing spectral peaks, we need to understand why acceleration matters for generating oscillations in the first place. - -{cite:t}`Chow1968` asks a question in the deterministic setup: if we build a macro model using only standard demand equations with simple distributed lags, can the system generate sustained oscillations? +We saw earlier that acceleration creates complex eigenvalues, which enable oscillatory impulse responses. -He shows that, under natural sign restrictions, the answer is no. - -As we saw in the {ref}`empirical foundation `, stock-adjustment demand for durable goods leads to investment equations where the coefficient on $Y_{t-1}$ is negative, i.e., the **acceleration effect**. - -This negative coefficient is what makes complex roots possible in the characteristic equation. - -Without it, Chow proves that demand systems with only positive coefficients have real positive roots, and hence no oscillatory dynamics. - -The {doc}`samuelson` lecture explores this mechanism in detail through the Hansen-Samuelson multiplier-accelerator model. - -Here we briefly illustrate the effect. Take the multiplier–accelerator law of motion - -```{math} -Y_t = c Y_{t-1} + v (Y_{t-1} - Y_{t-2}), -``` - -and rewrite it as a first-order system in $(Y_t, Y_{t-1})$. - -```{code-cell} ipython3 -def samuelson_transition(c, v): - return np.array([[c + v, -v], [1.0, 0.0]]) - -# Compare weak vs strong acceleration -# Weak: c=0.8, v=0.1 gives real roots (discriminant > 0) -# Strong: c=0.6, v=0.8 gives complex roots (discriminant < 0) -cases = [("weak acceleration", 0.8, 0.1), ("strong acceleration", 0.6, 0.8)] -A_list = [samuelson_transition(c, v) for _, c, v in cases] - -for (label, c, v), A in zip(cases, A_list): - eig = np.linalg.eigvals(A) - disc = (c + v)**2 - 4*v - print(f"{label}: c={c}, v={v}, discriminant={disc:.2f}, eigenvalues={eig}") - -# impulse responses from a one-time unit shock in Y -T = 40 -s0 = np.array([1.0, 0.0]) -irfs = [] -for A in A_list: - s = s0.copy() - path = np.empty(T + 1) - for t in range(T + 1): - path[t] = s[0] - s = A @ s - irfs.append(path) - -# model-implied spectra for the stochastic version with shocks in the Y equation -freq = np.linspace(1e-4, 0.5, 2500) # cycles/period -ω_grid = 2 * np.pi * freq -V = np.array([[1.0, 0.0], [0.0, 0.0]]) - -spectra = [] -for A in A_list: - F = spectral_density_var1(A, V, ω_grid) - f11 = np.real(F[:, 0, 0]) - spectra.append(f11 / np.trapezoid(f11, freq)) - -fig, axes = plt.subplots(1, 2, figsize=(12, 4)) - -axes[0].plot(range(T + 1), irfs[0], lw=2, label="weak acceleration (real roots)") -axes[0].plot(range(T + 1), irfs[1], lw=2, label="strong acceleration (complex roots)") -axes[0].axhline(0.0, lw=0.8) -axes[0].set_xlabel("time") -axes[0].set_ylabel(r"$Y_t$") -axes[0].legend(frameon=False) - -axes[1].plot(freq, spectra[0], lw=2, label="weak acceleration (real roots)") -axes[1].plot(freq, spectra[1], lw=2, label="strong acceleration (complex roots)") -axes[1].set_xlabel(r"frequency $\omega/2\pi$") -axes[1].set_ylabel("normalized spectrum") -axes[1].set_xlim([0.0, 0.5]) -axes[1].legend(frameon=False) - -plt.tight_layout() -plt.show() -``` - -The left panel shows the contrast between weak and strong acceleration: with weak acceleration ($v=0.1$) the roots are real and the impulse response decays monotonically; with strong acceleration ($v=0.8$) the roots are complex and the impulse response oscillates. - -The right panel shows the corresponding spectral signatures. - -Complex roots produce a pronounced peak at interior frequencies—the spectral signature of business cycles. - -### How acceleration strength affects the spectrum - -As we increase the accelerator $v$, the eigenvalues move further from the origin. - -For this model, the eigenvalue modulus is $|\lambda| = \sqrt{v}$, so the stability boundary is $v = 1$. - -```{code-cell} ipython3 -v_grid = [0.2, 0.4, 0.6, 0.8, 0.95] # stable cases only -c = 0.6 -freq_fine = np.linspace(1e-4, 0.5, 2000) -ω_fine = 2 * np.pi * freq_fine -V_acc = np.array([[1.0, 0.0], [0.0, 0.0]]) -T_irf = 40 # periods for impulse response - -fig = plt.figure(figsize=(12, 8)) -ax_eig = fig.add_subplot(2, 2, 1) -ax_spec = fig.add_subplot(2, 2, 2) -ax_irf = fig.add_subplot(2, 1, 2) # spans entire bottom row - -for v in v_grid: - A = samuelson_transition(c, v) - eig = np.linalg.eigvals(A) - - # eigenvalues (top left) - ax_eig.scatter(eig.real, eig.imag, s=40, label=f'$v={v}$') - - # spectrum (top right) - F = spectral_density_var1(A, V_acc, ω_fine) - f11 = np.real(F[:, 0, 0]) - f11_norm = f11 / np.trapezoid(f11, freq_fine) - ax_spec.plot(freq_fine, f11_norm, lw=2, label=f'$v={v}$') - - # impulse response (bottom row) - s = np.array([1.0, 0.0]) - irf = np.empty(T_irf + 1) - for t in range(T_irf + 1): - irf[t] = s[0] - s = A @ s - ax_irf.plot(range(T_irf + 1), irf, lw=2, label=f'$v={v}$') - -# eigenvalue panel with unit circle -θ_circle = np.linspace(0, 2*np.pi, 100) -ax_eig.plot(np.cos(θ_circle), np.sin(θ_circle), 'k--', lw=0.8, label='unit circle') -ax_eig.set_xlabel('real part') -ax_eig.set_ylabel('imaginary part') -ax_eig.set_aspect('equal') -ax_eig.legend(frameon=False, fontsize=8) - -# spectrum panel -ax_spec.set_xlabel(r'frequency $\omega/2\pi$') -ax_spec.set_ylabel('normalized spectrum') -ax_spec.set_xlim([0, 0.5]) -ax_spec.set_yscale('log') -ax_spec.legend(frameon=False, fontsize=8) - -# impulse response panel -ax_irf.axhline(0, lw=0.8, color='gray') -ax_irf.set_xlabel('time') -ax_irf.set_ylabel(r'$Y_t$') -ax_irf.legend(frameon=False, fontsize=8) - -plt.tight_layout() -plt.show() -``` - -As $v$ increases, eigenvalues approach the unit circle and the spectral peak becomes sharper. - -This illustrates Chow's main point: acceleration creates complex eigenvalues, which are necessary for oscillatory dynamics. - -Without acceleration, the eigenvalues would be real and the impulse response would decay monotonically without oscillation. - -With stronger acceleration (larger $v$), eigenvalues move closer to the unit circle, producing more persistent oscillations and a sharper spectral peak. +But do complex roots guarantee a spectral peak? -The above examples show that complex roots *can* produce spectral peaks. +Are they necessary for one? -But when exactly does this happen, and are complex roots *necessary*? - -Chow answers these questions for the Hansen-Samuelson model. +Chow provides precise answers for the Hansen-Samuelson model. ## Spectral peaks in the Hansen-Samuelson model -{cite:t}`Chow1968` provides a detailed spectral analysis of the Hansen-Samuelson multiplier-accelerator model. +{cite:t}`Chow1968` provides a detailed spectral analysis of the Hansen-Samuelson multiplier-accelerator model, deriving exact conditions for when spectral peaks occur. -This analysis reveals exactly when complex roots produce spectral peaks, and establishes that in this specific model, complex roots are *necessary* for a peak. +The analysis reveals that in this specific model, complex roots are *necessary* for a peak, but as we will see later, this is not true in general. ### The model as a first-order system @@ -624,17 +589,16 @@ The necessary condition for a valid solution is: ``` We can interpret it as: -- When $r \approx 1$, the factor $(1+r^2)/2r \approx 1$, so $\omega \approx \theta$ +- When $r \approx 1$, the factor $(1+r^2)/2r \approx 1$, so $\omega \approx \theta$ - When $r$ is small (e.g., 0.3 or 0.4), condition {eq}`chow_hs_necessary` can only be satisfied if $\cos\theta \approx 0$, meaning $\theta \approx \pi/2$ (cycles of approximately 4 periods) -If $\theta = 54 \degree$ (corresponding to cycles of 6.67 periods) and $r = 0.4$, then $(1+r^2)/2r = 1.45$, giving $\cos\omega = 1.45 \times 0.588 = 0.85$, or $\omega = 31.5 \degree$, corresponding to cycles of 11.4 periods, which is much longer than the deterministic cycle. +If $\theta = 54^\circ$ (corresponding to cycles of 6.67 periods) and $r = 0.4$, then $(1+r^2)/2r = 1.45$, giving $\cos\omega = 1.45 \times 0.588 = 0.85$, or $\omega = 31.5^\circ$, corresponding to cycles of 11.4 periods, which is much longer than the deterministic cycle. ```{code-cell} ipython3 def peak_condition_factor(r): """Compute (1 + r^2) / (2r)""" return (1 + r**2) / (2 * r) -# Verify Chow's analysis: peak frequency as function of r for fixed θ θ_deg = 54 θ = np.deg2rad(θ_deg) r_grid = np.linspace(0.3, 0.99, 100) @@ -643,9 +607,9 @@ r_grid = np.linspace(0.3, 0.99, 100) ω_peak = [] for r in r_grid: factor = peak_condition_factor(r) - cos_omega = factor * np.cos(θ) - if -1 < cos_omega < 1: - ω_peak.append(np.arccos(cos_omega)) + cos_ω = factor * np.cos(θ) + if -1 < cos_ω < 1: + ω_peak.append(np.arccos(cos_ω)) else: ω_peak.append(np.nan) @@ -657,7 +621,7 @@ fig, axes = plt.subplots(1, 2, figsize=(12, 4)) axes[0].plot(r_grid, np.rad2deg(ω_peak), lw=2) axes[0].axhline(θ_deg, ls='--', lw=1.0, color='gray', label=rf'$\theta = {θ_deg}°$') axes[0].set_xlabel('eigenvalue modulus $r$') -axes[0].set_ylabel('peak frequency $\omega$ (degrees)') +axes[0].set_ylabel(r'peak frequency $\omega$ (degrees)') axes[0].legend(frameon=False) axes[1].plot(r_grid, period_peak, lw=2) @@ -669,19 +633,19 @@ axes[1].legend(frameon=False) plt.tight_layout() plt.show() -# Verify Chow's specific example r_example = 0.4 factor = peak_condition_factor(r_example) -cos_omega = factor * np.cos(θ) -omega_example = np.arccos(cos_omega) +cos_ω = factor * np.cos(θ) +ω_example = np.arccos(cos_ω) print(f"Chow's example: r = {r_example}, θ = {θ_deg}°") print(f" Factor (1+r²)/2r = {factor:.3f}") -print(f" cos(ω) = {cos_omega:.3f}") -print(f" ω = {np.rad2deg(omega_example):.1f}°") -print(f" Peak period = {360/np.rad2deg(omega_example):.1f} (vs deterministic period = {360/θ_deg:.1f})") +print(f" cos(ω) = {cos_ω:.3f}") +print(f" ω = {np.rad2deg(ω_example):.1f}°") +print(f" Peak period = {360/np.rad2deg(ω_example):.1f} (vs deterministic period = {360/θ_deg:.1f})") ``` As $r \to 1$, the peak frequency converges to $\theta$. + For smaller $r$, the peak frequency can differ substantially from the deterministic oscillation frequency. ### Real positive roots cannot produce peaks @@ -715,14 +679,11 @@ This is a key result: In the Hansen-Samuelson model, *complex roots are necessar V_hs = np.array([[1.0, 0.0], [0.0, 0.0]]) # shock only in first equation # Case 1: Complex roots (c=0.6, v=0.8) -# Discriminant = (c+v)² - 4v = 1.96 - 3.2 < 0 → complex roots c_complex, v_complex = 0.6, 0.8 A_complex = samuelson_transition(c_complex, v_complex) eig_complex = np.linalg.eigvals(A_complex) # Case 2: Real roots (c=0.8, v=0.1) -# Discriminant = (c+v)² - 4v = 0.81 - 0.4 > 0 → real roots -# Both roots positive and < 1 (stable) c_real, v_real = 0.8, 0.1 A_real = samuelson_transition(c_real, v_real) eig_real = np.linalg.eigvals(A_real) @@ -757,7 +718,7 @@ While real positive roots cannot produce spectral peaks in the Hansen-Samuelson In multivariate systems, the spectral density of a linear combination of variables can have interior peaks even when all eigenvalues are real and positive. -### Chow's example +### Example Chow constructs the following explicit example with two real positive eigenvalues: @@ -828,15 +789,18 @@ f_general = spectrum_of_linear_combination(F_chow_ex, b_chow_ex) # Normalize to match Chow's table scale scale = f_formula[0] / spectrum_of_linear_combination( - spectral_density_var1(A_chow_ex, V_chow_ex, np.array([0.0])), b_chow_ex)[0] + spectral_density_var1( + A_chow_ex, V_chow_ex, np.array([0.0])), b_chow_ex)[0] print("Chow's Table (equation 67):") print("ω/π: ", " ".join([f"{ω/np.pi:.3f}" for ω in ω_table])) print("f_mm(ω): ", " ".join([f"{f:.3f}" for f in f_formula])) fig, ax = plt.subplots(figsize=(9, 4)) -ax.plot(ω_grid_fine / np.pi, f_general * scale, lw=2, label='spectrum') -ax.scatter(ω_table / np.pi, f_formula, s=50, zorder=3, label="Chow's table values") +ax.plot(ω_grid_fine / np.pi, f_general * scale, lw=2, + label='spectrum') +ax.scatter(ω_table / np.pi, f_formula, s=50, zorder=3, + label="Chow's table values") # Mark the peak i_peak = np.argmax(f_general) @@ -850,6 +814,8 @@ plt.show() print(f"\nPeak at ω/π ≈ {ω_peak/np.pi:.3f}, period ≈ {2*np.pi/ω_peak:.1f}") ``` +The peak appears at $\omega/\pi \approx 0.10$, which corresponds to a cycle length of approximately 20 periods, again much longer than the deterministic cycles implied by the eigenvalues. + ### The Slutsky connection Chow connects this result to Slutsky's well-known finding that taking moving averages of a random series can generate cycles. @@ -865,13 +831,14 @@ y_t = u_t + A u_{t-1} + A^2 u_{t-2} + \cdots This amounts to taking an infinite moving average of the random vectors $u_t$ with "geometrically declining" weights $A^0, A^1, A^2, \ldots$ For a scalar process with $0 < \lambda < 1$, no distinct cycles can emerge. -But for a matrix $A$ with real roots between 0 and 1, cycles **can** emerge in linear combinations of the variables. + +But for a matrix $A$ with real roots between 0 and 1, cycles *can* emerge in linear combinations of the variables. As Chow puts it: "When neither of two (canonical) variables has distinct cycles... a linear combination can have a peak in its spectral density." ### The general lesson -The examples above illustrate Chow's central point: +The examples above illustrate the following central points: 1. In the *Hansen-Samuelson model specifically*, complex roots are necessary for a spectral peak 2. But in *general multivariate systems*, complex roots are neither necessary nor sufficient @@ -884,13 +851,13 @@ The examples above illustrate Chow's central point: {cite:t}`ChowLevitan1969` use the frequency-domain objects from {cite:t}`Chow1968` to study a calibrated annual macroeconometric model. -They work with five annual aggregates +They work with five annual aggregates: - $y_1 = C$ (consumption), - $y_2 = I_1$ (equipment plus inventories), - $y_3 = I_2$ (construction), - $y_4 = R_a$ (long rate), -- $y_5 = Y_1 = C + I_1 + I_2$ (private-domestic gnp), +- $y_5 = Y_1 = C + I_1 + I_2$ (private-domestic GNP), and add $y_6 = y_{1,t-1}$ to rewrite the original system in first-order form. @@ -934,7 +901,7 @@ Here we take $A$ and $V$ as given and ask what they imply for spectra and cross- ### Reported shock covariance -Chow and Levitan report the $6 \times 6$ reduced-form shock covariance matrix $V$ (scaled by $10^{-7}$): +The $6 \times 6$ reduced-form shock covariance matrix $V$ (scaled by $10^{-7}$) is: ```{math} :label: chow_V_matrix @@ -1029,8 +996,6 @@ print(np.linalg.eigvals(A_chow).round(6)) Chow's canonical transformation uses $z_t = B^{-1} y_t$, giving dynamics $z_t = D_\lambda z_{t-1} + e_t$. -An algebraic detail: the closed form for $F(\omega)$ uses $A^\top$ (real transpose) rather than a conjugate transpose. - Accordingly, the canonical shock covariance is ```{math} @@ -1044,9 +1009,7 @@ print("diagonal of W:") print(np.diag(W).round(10)) ``` -### Spectral density via eigendecomposition - -Chow's closed-form formula for the spectral density matrix is +Chow derives the following closed-form formula for the spectral density matrix: ```{math} :label: chow_spectral_eigen @@ -1076,9 +1039,7 @@ freq = np.linspace(1e-4, 0.5, 5000) # cycles/year in [0, 1/2] F_chow = spectral_density_chow(λ, B, W, ω_grid) ``` -### Where is variance concentrated? - -Normalizing each spectrum to have unit area over $[0, 1/2]$ lets us compare shapes rather than scales. +Let's plot the univariate spectra of consumption ($y_1$) and equipment plus inventories ($y_2$): ```{code-cell} ipython3 variable_names = ['$C$', '$I_1$', '$I_2$', '$R_a$', '$Y_1$'] @@ -1117,14 +1078,27 @@ plt.show() i_peak = np.argmax(S_norm[mask, 1]) f_peak = freq[mask][i_peak] -print(f"Peak within [1/18, 1/2]: frequency ≈ {f_peak:.3f} cycles/year, period ≈ {1/f_peak:.2f} years.") ``` -Both spectra are dominated by very low frequencies, reflecting the near-unit eigenvalues. +We reproduce only Figures I.1 and I.2 here. + +Figure I.1 corresponds to consumption and declines monotonically with frequency. + +Figure I.1 illustrates Granger's "typical spectral shape" for macroeconomic time series. + +Figure I.2 corresponds to equipment plus inventories and shows the clearest (but still very flat) interior-frequency bump. + +Chow and Levitan associate the dominance of very low frequencies in both plots with strong persistence and long-run movements. + +They note that very large low-frequency power can arise from eigenvalues extremely close to one, which can occur mechanically when some equations are written in first differences. -This is the "typical spectral shape" of macroeconomic time series. +They stress that local peaks are not automatic, because complex roots may have small modulus and multivariate interactions can generate peaks even when all roots are real. -(These patterns match Figures I.1–I.2 of {cite}`ChowLevitan1969`.) +They note that the interior bump in Figure I.2 corresponds to cycles of roughly three years and that the spectrum is nearly flat over cycles between about two and four years. + +Their other spectra in Figures I.3–I.5 (construction, the long rate, and private-domestic GNP) decline monotonically with frequency in the same calibration. + +(This discussion follows Section II of {cite}`ChowLevitan1969`.) ### How variables move together across frequencies @@ -1142,6 +1116,10 @@ The **squared coherence** measures linear association at frequency $\omega$: R^2_{ij}(\omega) = \frac{|f_{ij}(\omega)|^2}{f_{ii}(\omega) f_{jj}(\omega)} \in [0, 1]. ``` +Think of coherence as the frequency-domain analogue of $R^2$: it measures how much of the variance of $y_i$ at frequency $\omega$ can be "explained" by $y_j$ at the same frequency. + +High coherence means the two series move together tightly at that frequency. + The **gain** is the frequency-response coefficient when regressing $y_i$ on $y_j$: ```{math} @@ -1150,6 +1128,10 @@ The **gain** is the frequency-response coefficient when regressing $y_i$ on $y_j G_{ij}(\omega) = \frac{|f_{ij}(\omega)|}{f_{jj}(\omega)}. ``` +Think of gain as the frequency-domain analogue of a regression coefficient: it measures how much $y_i$ responds to a unit change in $y_j$ at frequency $\omega$. + +A gain of 0.9 at low frequencies means long-cycle movements in $y_j$ translate almost one-for-one to $y_i$; a gain of 0.3 at high frequencies means short-cycle movements are dampened. + The **phase** captures lead-lag relationships (in radians): ```{math} @@ -1170,14 +1152,14 @@ def cross_spectral_measures(F, i, j): return coherence, gain, phase ``` -We now plot gain and coherence as in Figures II.1-II.3 of {cite}`ChowLevitan1969`. +We now plot gain and coherence as in Figures II.1–II.4 of {cite}`ChowLevitan1969`. ```{code-cell} ipython3 gnp_idx = 4 -fig, axes = plt.subplots(1, 3, figsize=(14, 6)) +fig, axes = plt.subplots(1, 2, figsize=(8, 6)) -for idx, var_idx in enumerate([0, 1, 2]): +for idx, var_idx in enumerate([0, 1]): coherence, gain, phase = cross_spectral_measures(F_chow, var_idx, gnp_idx) ax = axes[idx] @@ -1185,7 +1167,6 @@ for idx, var_idx in enumerate([0, 1, 2]): lw=2, label=rf'$R^2_{{{var_idx+1}5}}(\omega)$') ax.plot(freq[mask], gain[mask], lw=2, label=rf'$G_{{{var_idx+1}5}}(\omega)$') - paper_frequency_axis(ax) ax.set_ylim([0, 1.0]) ax.set_ylabel('gain, coherence') @@ -1195,11 +1176,43 @@ plt.tight_layout() plt.show() ``` -Coherence is high at low frequencies for all three components, meaning long-run movements track output closely. +The gain and coherence patterns differ across components (Figures II.1–II.2 of {cite}`ChowLevitan1969`): + +- Consumption vs private-domestic GNP (left panel): + - Gain is about 0.9 at very low frequencies but falls below 0.4 for cycles shorter than four years. + - This is evidence that short-cycle income movements translate less into consumption than long-cycle movements, consistent with permanent-income interpretations. + - Coherence remains high throughout. +- For Equipment plus inventories vs private-domestic GNP (right panel): + - Gain *rises* with frequency, exceeding 0.5 for short cycles. + - This is the frequency-domain signature of acceleration and volatile short-run inventory movements. -Gains differ: consumption smooths (gain below 1), while investment responds more strongly at higher frequencies. +```{code-cell} ipython3 +fig, axes = plt.subplots(1, 2, figsize=(8, 6)) + +for idx, var_idx in enumerate([2, 3]): + coherence, gain, phase = cross_spectral_measures(F_chow, var_idx, gnp_idx) + ax = axes[idx] + + ax.plot(freq[mask], coherence[mask], + lw=2, label=rf'$R^2_{{{var_idx+3}5}}(\omega)$') + ax.plot(freq[mask], gain[mask], + lw=2, label=rf'$G_{{{var_idx+3}5}}(\omega)$') + paper_frequency_axis(ax) + ax.set_ylim([0, 1.0]) + ax.set_ylabel('gain, coherence') + ax.legend(frameon=False, loc='best') + +plt.tight_layout() +plt.show() +``` + +- New construction vs private-domestic GNP (left panel): + - Gain peaks at medium cycle lengths (around 0.1 for short cycles). + - Coherence for both investment series stays fairly high across frequencies. +- Long-bond yield vs private-domestic GNP (right panel): + - Gain varies less across frequencies than real activity series. + - Coherence with output is comparatively low at business-cycle frequencies, making it hard to explain interest-rate movements by inverting a money-demand equation. -(These patterns match Figures II.1-II.3 of {cite}`ChowLevitan1969`.) ### Lead-lag relationships @@ -1208,7 +1221,7 @@ The phase tells us which variable leads at each frequency. Positive phase means output leads the component; negative phase means the component leads output. ```{code-cell} ipython3 -fig, ax = plt.subplots(figsize=(8, 6)) +fig, ax = plt.subplots() labels = [r'$\psi_{15}(\omega)/2\pi$', r'$\psi_{25}(\omega)/2\pi$', r'$\psi_{35}(\omega)/2\pi$', r'$\psi_{45}(\omega)/2\pi$'] @@ -1223,16 +1236,19 @@ paper_frequency_axis(ax) ax.set_ylabel('phase difference in cycles') ax.set_ylim([-0.25, 0.25]) ax.set_yticks([-0.25, -0.20, -0.15, -0.10, -0.05, 0, 0.05, 0.10, 0.15, 0.20, 0.25]) -ax.legend(frameon=False, fontsize=9) +ax.legend(frameon=False) plt.tight_layout() plt.show() ``` -At business-cycle frequencies, consumption tends to lag output while equipment and inventories tend to lead. +The phase relationships reveal that: -The interest rate is roughly coincident. +- Output leads consumption by a small fraction of a cycle (about 0.06 cycles at a 6-year period, 0.04 cycles at a 3-year period). +- Equipment plus inventories tends to lead output (by about 0.07 cycles at a 6-year period, 0.03 cycles at a 3-year period). +- New construction leads at low frequencies and is close to coincident at higher frequencies. +- The bond yield lags output slightly, remaining close to coincident in timing. -(This matches Figure III of {cite}`ChowLevitan1969`.) +These implied leads and lags are broadly consistent with turning-point timing summaries reported elsewhere, and simulations of the same model deliver similar lead–lag ordering at turning points (Figure III of {cite}`ChowLevitan1969`). ### Building blocks of spectral shape @@ -1254,13 +1270,13 @@ Each observable spectral density is a linear combination of these kernels (plus ```{code-cell} ipython3 def scalar_kernel(λ_i, ω_grid): - """Chow's scalar spectral kernel g_i(ω).""" + """scalar spectral kernel g_i(ω).""" λ_i = complex(λ_i) mod_sq = np.abs(λ_i)**2 return np.array([(1 - mod_sq) / np.abs(1 - λ_i * np.exp(-1j * ω))**2 for ω in ω_grid]) fig, ax = plt.subplots(figsize=(10, 5)) -for i, λ_i in enumerate(λ[:4]): +for i, λ_i in enumerate(λ): if np.abs(λ_i) > 0.01: g_i = scalar_kernel(λ_i, ω_grid) label = f'$\\lambda_{i+1}$ = {λ_i:.4f}' if np.isreal(λ_i) else f'$\\lambda_{i+1}$ = {λ_i:.3f}' @@ -1274,65 +1290,120 @@ ax.legend(frameon=False) plt.show() ``` -Near-unit eigenvalues produce kernels sharply peaked at low frequencies. +The figure reveals how eigenvalue magnitude shapes spectral contributions: -Smaller eigenvalues produce flatter kernels. +- *Near-unit eigenvalues* ($\lambda_1, \lambda_2 \approx 1$) produce kernels sharply peaked at low frequencies—these drive the strong low-frequency power seen in the spectra above. +- *The moderate eigenvalue* ($\lambda_3 \approx 0.48$) contributes a flatter component that spreads power more evenly across frequencies. +- *The complex pair* ($\lambda_{4,5}$) has such small modulus ($|\lambda_{4,5}| \approx 0.136$) that its kernel is nearly flat, which is too weak to generate a pronounced interior peak. -The complex pair ($\lambda_{4,5}$) has such small modulus that its kernel is nearly flat. +This decomposition explains why the spectra look the way they do: the near-unit eigenvalues dominate, concentrating variance at very low frequencies. -### Why the spectra look the way they do +The complex pair, despite enabling oscillatory dynamics in principle, has insufficient modulus to produce a visible spectral peak. -The two near-unit eigenvalues generate strong low-frequency power. +## Summary -The moderate eigenvalue ($\lambda_3 \approx 0.48$) contributes a flatter component. +{cite:t}`Chow1968` draws several conclusions that remain relevant for understanding business cycles. -The complex pair has small modulus ($|\lambda_{4,5}| \approx 0.136$), so it cannot generate a pronounced interior peak. +The acceleration principle receives strong empirical support: the negative coefficient on lagged output in investment equations is a robust finding across datasets. -The near-zero eigenvalue reflects the accounting identity $Y_1 = C + I_1 + I_2$. +- This matters because, in a model consisting only of demand equations with simple distributed lags, the transition matrix has real positive roots under natural sign restrictions—ruling out prolonged oscillations. -This illustrates Chow's message: eigenvalues guide intuition, but observed spectra also depend on how shocks excite the modes and how observables combine them. +- Acceleration introduces the possibility of complex roots, which are necessary for oscillatory dynamics in deterministic systems. -### Summary +The relationship between eigenvalues and spectral peaks is more subtle than it first appears: -The calibrated model reveals three patterns: (1) most variance sits at very low frequencies due to near-unit eigenvalues; (2) consumption smooths while investment amplifies high-frequency movements; (3) consumption lags output at business-cycle frequencies while investment leads. +- Complex roots guarantee oscillatory autocovariances, but they are neither necessary nor sufficient for a pronounced spectral peak. -## Wrap-up +- In the Hansen–Samuelson model specifically, complex roots *are* necessary for a peak. -{cite:t}`Chow1968` draws several conclusions that remain relevant for understanding business cycles: +- But in general multivariate systems, even real roots can produce peaks through the interaction of shocks and eigenvector loadings. -1. **Empirical support for acceleration**: The acceleration principle, as formulated through stock-adjustment equations, receives strong empirical support from investment data. The negative coefficient on lagged output levels is a robust empirical finding. +Chow argues that understanding business cycles requires an integrated view of deterministic dynamics and random shocks. -2. **Acceleration is necessary for deterministic oscillations**: In a model consisting only of demand equations with simple distributed lags, the transition matrix has real positive roots (under natural sign restrictions), and hence no prolonged oscillations can occur. Acceleration introduces the possibility of complex roots. +{cite:t}`ChowLevitan1969` demonstrate what these objects look like in a calibrated system: strong low-frequency power from near-unit eigenvalues, frequency-dependent gains and coherences, and lead–lag relations that vary with cycle length. -3. **Complex roots are neither necessary nor sufficient for stochastic cycles**: While complex roots in the deterministic model guarantee oscillatory autocovariances, they are neither necessary nor sufficient for a pronounced spectral peak. In the Hansen-Samuelson model specifically, complex roots *are* necessary for a spectral peak. But in general multivariate systems, real roots can produce peaks through the interaction of shocks and eigenvector loadings. +Their results are consistent with Granger's "typical spectral shape" for economic time series. -4. **An integrated view is essential**: As Chow concludes, "an obvious moral is that the nature of business cycles can be understood only by an integrated view of the deterministic as well as the random elements." +That is a monotonically decreasing function of frequency, driven by the near-unit eigenvalues that arise when some equations are specified in first differences. -{cite:t}`ChowLevitan1969` then show what these objects look like in a calibrated system: strong low-frequency power (reflecting near-unit eigenvalues), frequency-dependent gains/coherences, and lead–lag relations that vary with the cycle length. +Understanding whether this shape reflects the true data-generating process requires analyzing the spectral densities implied by structural econometric models. -On the empirical side, Granger has noted a "typical spectral shape" for economic time series—a monotonically decreasing function of frequency. +## Exercises -The Chow-Levitan calibration is consistent with this shape, driven by the near-unit eigenvalues. +```{exercise} +:label: chow_cycles_ex1 -But as Chow emphasizes, understanding whether this shape reflects the true data-generating process requires analyzing the spectral densities implied by structural econometric models. +Plot impulse responses and spectra side-by-side for several values of the accelerator $v$ in the Hansen-Samuelson model, showing how acceleration strength affects both the time-domain and frequency-domain signatures. -To connect this to data, pair the model-implied objects here with the advanced lecture {doc}`advanced:estspec`. +Use the same $v$ values as in the main text: $v \in \{0.2, 0.4, 0.6, 0.8, 0.95\}$ with $c = 0.6$. +``` -## Exercises +```{solution-start} chow_cycles_ex1 +:class: dropdown +``` + +```{code-cell} ipython3 +v_grid_ex1 = [0.2, 0.4, 0.6, 0.8, 0.95] +c_ex1 = 0.6 +freq_ex1 = np.linspace(1e-4, 0.5, 2000) +ω_grid_ex1 = 2 * np.pi * freq_ex1 +V_ex1 = np.array([[1.0, 0.0], [0.0, 0.0]]) +T_irf_ex1 = 40 + +fig, axes = plt.subplots(1, 2, figsize=(12, 5)) + +for v in v_grid_ex1: + A = samuelson_transition(c_ex1, v) + + # impulse response (left panel) + s = np.array([1.0, 0.0]) + irf = np.empty(T_irf_ex1 + 1) + for t in range(T_irf_ex1 + 1): + irf[t] = s[0] + s = A @ s + axes[0].plot(range(T_irf_ex1 + 1), irf, lw=2, label=f'$v={v}$') + + # spectrum (right panel) + F = spectral_density_var1(A, V_ex1, ω_grid_ex1) + f11 = np.real(F[:, 0, 0]) + f11_norm = f11 / np.trapezoid(f11, freq_ex1) + axes[1].plot(freq_ex1, f11_norm, lw=2, label=f'$v={v}$') + +axes[0].axhline(0, lw=0.8, color='gray') +axes[0].set_xlabel('time') +axes[0].set_ylabel(r'$Y_t$') +axes[0].legend(frameon=False) + +axes[1].set_xlabel(r'frequency $\omega/2\pi$') +axes[1].set_ylabel('normalized spectrum') +axes[1].set_xlim([0, 0.5]) +axes[1].set_yscale('log') +axes[1].legend(frameon=False) + +plt.tight_layout() +plt.show() +``` + +As $v$ increases, eigenvalues approach the unit circle: oscillations become more persistent in the time domain (left), and the spectral peak becomes sharper in the frequency domain (right). + +Complex roots produce a pronounced peak at interior frequencies—the spectral signature of business cycles. + +```{solution-end} +``` ```{exercise} -:label: chow_cycles_ex1 +:label: chow_cycles_ex2 -Verify Chow's spectral peak condition {eq}`chow_hs_peak_condition` numerically for the Hansen-Samuelson model. +Verify spectral peak condition {eq}`chow_hs_peak_condition` numerically for the Hansen-Samuelson model. 1. For a range of eigenvalue moduli $r \in [0.3, 0.99]$ with fixed $\theta = 60°$, compute: - - The theoretical peak frequency from Chow's formula: $\cos\omega = \frac{1+r^2}{2r}\cos\theta$ + - The theoretical peak frequency from formula: $\cos\omega = \frac{1+r^2}{2r}\cos\theta$ - The actual peak frequency by numerically maximizing the spectral density 2. Plot both on the same graph and verify they match. 3. Identify the range of $r$ for which no valid peak exists (when the condition {eq}`chow_hs_necessary` is violated). ``` -```{solution-start} chow_cycles_ex1 +```{solution-start} chow_cycles_ex2 :class: dropdown ``` @@ -1346,17 +1417,17 @@ V_hs_ex = np.array([[1.0, 0.0], [0.0, 0.0]]) ω_numerical = [] for r in r_grid: - # Theoretical peak from Chow's formula + # Theoretical peak factor = (1 + r**2) / (2 * r) - cos_omega = factor * np.cos(θ_ex) - if -1 < cos_omega < 1: - ω_theory.append(np.arccos(cos_omega)) + cos_ω = factor * np.cos(θ_ex) + if -1 < cos_ω < 1: + ω_theory.append(np.arccos(cos_ω)) else: ω_theory.append(np.nan) # Numerical peak from spectral density - # Construct Hansen-Samuelson with eigenvalues r*exp(±iθ) - # This corresponds to c + v = 2r*cos(θ), v = r² + # Construct Hansen-Samuelson with eigenvalues r*exp(+-iθ) + # This corresponds to c + v = 2r*cos(θ), v = r^2 v = r**2 c = 2 * r * np.cos(θ_ex) - v A_ex = samuelson_transition(c, v) @@ -1401,33 +1472,35 @@ if valid_mask.any(): ``` The theoretical and numerical peak frequencies match closely. + As $r \to 1$, the peak frequency converges to $\theta$. + For smaller $r$, the factor $(1+r^2)/2r$ exceeds the threshold, and no valid peak exists. ```{solution-end} ``` ```{exercise} -:label: chow_cycles_ex2 +:label: chow_cycles_ex3 In the "real roots but a peak" example, hold $A$ fixed and vary the shock correlation (the off-diagonal entry of $V$) between $0$ and $0.99$. When does the interior-frequency peak appear, and how does its location change? ``` -```{solution-start} chow_cycles_ex2 +```{solution-start} chow_cycles_ex3 :class: dropdown ``` ```{code-cell} ipython3 -A_ex2 = np.diag([0.1, 0.9]) -b_ex2 = np.array([1.0, -0.01]) +A_ex3 = np.diag([0.1, 0.9]) +b_ex3 = np.array([1.0, -0.01]) corr_grid = np.linspace(0, 0.99, 50) peak_periods = [] for corr in corr_grid: - V_ex2 = np.array([[1.0, corr], [corr, 1.0]]) - F_ex2 = spectral_density_var1(A_ex2, V_ex2, ω_grid_ex) - f_x = spectrum_of_linear_combination(F_ex2, b_ex2) + V_ex3 = np.array([[1.0, corr], [corr, 1.0]]) + F_ex3 = spectral_density_var1(A_ex3, V_ex3, ω_grid_ex) + f_x = spectrum_of_linear_combination(F_ex3, b_ex3) i_max = np.argmax(f_x) if 5 < i_max < len(ω_grid_ex) - 5: peak_periods.append(2 * np.pi / ω_grid_ex[i_max]) @@ -1447,15 +1520,15 @@ if len(threshold_idx) > 0: The interior peak appears only when the shock correlation exceeds a threshold. -This illustrates Chow's point that spectral peaks depend on the full system structure, not just eigenvalues. +This illustrates that spectral peaks depend on the full system structure, not just eigenvalues. ```{solution-end} ``` ```{exercise} -:label: chow_cycles_ex3 +:label: chow_cycles_ex4 -Using the calibrated Chow-Levitan (1969) parameters, compute the autocovariance matrices $\Gamma_0, \Gamma_1, \ldots, \Gamma_{10}$ using: +Using the calibrated Chow-Levitan parameters, compute the autocovariance matrices $\Gamma_0, \Gamma_1, \ldots, \Gamma_{10}$ using: 1. The recursion $\Gamma_k = A \Gamma_{k-1}$ with $\Gamma_0$ from the Lyapunov equation. 2. Chow's eigendecomposition formula $\Gamma_k = B D_\lambda^k \Gamma_0^* B^\top$ where $\Gamma_0^*$ is the canonical covariance. @@ -1463,7 +1536,7 @@ Using the calibrated Chow-Levitan (1969) parameters, compute the autocovariance Verify that both methods give the same result. ``` -```{solution-start} chow_cycles_ex3 +```{solution-start} chow_cycles_ex4 :class: dropdown ``` @@ -1500,7 +1573,7 @@ Both methods produce essentially identical results, up to numerical precision. ``` ```{exercise} -:label: chow_cycles_ex4 +:label: chow_cycles_ex5 Modify the Chow-Levitan model by changing $\lambda_3$ from $0.4838$ to $0.95$. @@ -1509,39 +1582,38 @@ Modify the Chow-Levitan model by changing $\lambda_3$ from $0.4838$ to $0.95$. 3. What economic interpretation might correspond to this parameter change? ``` -```{solution-start} chow_cycles_ex4 +```{solution-start} chow_cycles_ex5 :class: dropdown ``` ```{code-cell} ipython3 +# Modify λ_3 and reconstruct the transition matrix λ_modified = λ.copy() λ_modified[2] = 0.95 -F_mod = spectral_density_chow(λ_modified, B, W, ω_grid) - -fig, axes = plt.subplots(2, 3, figsize=(14, 8)) -axes = axes.flatten() -var_labels = ["consumption", "equipment + inventories", "construction", "long rate", "output"] -for i in range(5): - f_orig = np.real(F_chow[:, i, i]) - f_mod = np.real(F_mod[:, i, i]) - f_orig_norm = f_orig / np.trapezoid(f_orig, freq) - f_mod_norm = f_mod / np.trapezoid(f_mod, freq) - axes[i].semilogy(freq, f_orig_norm, lw=2, label=r"original ($\lambda_3=0.48$)") - axes[i].semilogy(freq, f_mod_norm, lw=2, ls="--", label=r"modified ($\lambda_3=0.95$)") - paper_frequency_axis(axes[i]) - axes[i].set_ylabel(rf"normalized $f_{{{i+1}{i+1}}}(\omega)$") - axes[i].text(0.03, 0.08, var_labels[i], transform=axes[i].transAxes) - axes[i].legend(frameon=False, fontsize=8) -axes[5].axis('off') -plt.tight_layout() +D_λ_mod = np.diag(λ_modified) +A_mod = np.real(B @ D_λ_mod @ np.linalg.inv(B)) + +# Compute spectra using the VAR(1) formula with original V +F_mod = spectral_density_var1(A_mod, V, ω_grid) +F_orig = spectral_density_var1(A_chow, V, ω_grid) + +# Plot ratio of spectra for output (Y_1) +f_orig = np.real(F_orig[:, 4, 4]) +f_mod = np.real(F_mod[:, 4, 4]) + +fig, ax = plt.subplots() +ax.plot(freq, f_mod / f_orig, lw=2) +ax.axhline(1.0, ls='--', lw=1, color='gray') +paper_frequency_axis(ax) +ax.set_ylabel(r"ratio: modified / original spectrum for $Y_1$") plt.show() ``` -Increasing $\lambda_3$ from 0.48 to 0.95 adds more persistence to the system. +The near-unit eigenvalues ($\lambda_1, \lambda_2 \approx 0.9999$) dominate the output spectrum so heavily that changing $\lambda_3$ from 0.48 to 0.95 produces only a small relative effect. -The spectral densities show increased power at low frequencies. +The ratio plot reveals the change: the modified spectrum has slightly more power at low-to-medium frequencies and slightly less at high frequencies. -Economically, this could correspond to stronger persistence in the propagation of shocks—perhaps due to slower adjustment speeds in investment or consumption behavior. +Economically, increasing $\lambda_3$ adds persistence to the mode it governs. ```{solution-end} ``` From 8c917acdf6d6377d450d92a6ba801b0909c147ae Mon Sep 17 00:00:00 2001 From: Humphrey Yang Date: Sat, 7 Feb 2026 16:06:31 +1100 Subject: [PATCH 04/19] updates --- lectures/chow_business_cycles.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lectures/chow_business_cycles.md b/lectures/chow_business_cycles.md index 396a4de55..196059ec0 100644 --- a/lectures/chow_business_cycles.md +++ b/lectures/chow_business_cycles.md @@ -31,8 +31,8 @@ kernelspec: This lecture studies two classic papers by Gregory Chow on business cycles in linear dynamic models: -- {cite}`Chow1968`: empirical evidence for the acceleration principle, why acceleration enables oscillations, and when spectral peaks arise in stochastic systems -- {cite}`ChowLevitan1969`: spectral analysis of a calibrated US macroeconometric model, showing gains, coherences, and lead–lag patterns +- {cite:t}`Chow1968`: empirical evidence for the acceleration principle, why acceleration enables oscillations, and when spectral peaks arise in stochastic systems +- {cite:t}`ChowLevitan1969`: spectral analysis of a calibrated US macroeconometric model, showing gains, coherences, and lead–lag patterns These papers connect ideas in the following lectures: From 9606ef6c370ed737014265f0b0eeeacf1a963d14 Mon Sep 17 00:00:00 2001 From: Humphrey Yang Date: Sat, 7 Feb 2026 16:20:17 +1100 Subject: [PATCH 05/19] updates --- lectures/chow_business_cycles.md | 35 +++++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/lectures/chow_business_cycles.md b/lectures/chow_business_cycles.md index 196059ec0..f03f2f3cc 100644 --- a/lectures/chow_business_cycles.md +++ b/lectures/chow_business_cycles.md @@ -75,7 +75,8 @@ def spectral_density_var1(A, V, ω_grid): def spectrum_of_linear_combination(F, b): """Spectrum of x_t = b'y_t given the spectral matrix F(ω).""" b = np.asarray(b).reshape(-1, 1) - return np.array([np.real((b.T @ F[k] @ b).item()) for k in range(F.shape[0])]) + return np.array([np.real((b.T @ F[k] @ b).item()) + for k in range(F.shape[0])]) def simulate_var1(A, V, T, burn=200, seed=1234): r"""Simulate y_t = A y_{t-1} + u_t with u_t \sim N(0, V).""" @@ -84,8 +85,10 @@ def simulate_var1(A, V, T, burn=200, seed=1234): n = A.shape[0] chol = np.linalg.cholesky(V) y = np.zeros((T + burn, n)) + for t in range(1, T + burn): y[t] = A @ y[t - 1] + chol @ rng.standard_normal(n) + return y[burn:] def sample_autocorrelation(x, max_lag): @@ -172,9 +175,11 @@ Having established the empirical evidence for acceleration, we now examine why i He shows that, under natural sign restrictions, the answer is no. -Stock-adjustment demand for durable goods leads to investment equations where the coefficient on $Y_{t-1}$ is negative—the **acceleration effect**. +Stock-adjustment demand for durable goods leads to investment equations where the coefficient on $Y_{t-1}$ is negative. -This negative coefficient is what makes complex roots possible in the characteristic equation. +This negative coefficient captures the **acceleration effect**: investment responds not just to the level of income, but to its rate of change. + +This negative coefficient is also what makes complex roots possible in the characteristic equation. Without it, Chow proves that demand systems with only positive coefficients have real positive roots, and hence no oscillatory dynamics. @@ -469,9 +474,13 @@ V \left(I - A^\top e^{i\omega}\right)^{-1}. ``` -Intuitively, $F(\omega)$ tells us how much variation in $y_t$ is associated with cycles of (angular) frequency $\omega$. +$F(\omega)$ tells us how much variation in $y_t$ is associated with cycles of (angular) frequency $\omega$. + +Higher frequencies correspond to rapid oscillations, meaning short cycles where the series completes many up-and-down movements per unit of time. -The corresponding cycle length is +Lower frequencies correspond to slower oscillations, meaning long cycles that unfold over extended periods. + +The corresponding cycle length (or period) is ```{math} :label: chow_period @@ -479,6 +488,10 @@ The corresponding cycle length is T(\omega) = \frac{2\pi}{\omega}. ``` +Thus, a frequency of $\omega = \pi$ corresponds to the shortest possible cycle of $T = 2$ periods, while frequencies near zero correspond to very long cycles. + +When the spectral density $F(\omega)$ is concentrated at particular frequencies, it indicates that the time series exhibits pronounced cyclical behavior at those frequencies. + The advanced lecture {doc}`advanced:estspec` explains how to estimate $F(\omega)$ from data. Here we focus on the model-implied spectrum. @@ -638,7 +651,6 @@ factor = peak_condition_factor(r_example) cos_ω = factor * np.cos(θ) ω_example = np.arccos(cos_ω) print(f"Chow's example: r = {r_example}, θ = {θ_deg}°") -print(f" Factor (1+r²)/2r = {factor:.3f}") print(f" cos(ω) = {cos_ω:.3f}") print(f" ω = {np.rad2deg(ω_example):.1f}°") print(f" Peak period = {360/np.rad2deg(ω_example):.1f} (vs deterministic period = {360/θ_deg:.1f})") @@ -1128,9 +1140,9 @@ The **gain** is the frequency-response coefficient when regressing $y_i$ on $y_j G_{ij}(\omega) = \frac{|f_{ij}(\omega)|}{f_{jj}(\omega)}. ``` -Think of gain as the frequency-domain analogue of a regression coefficient: it measures how much $y_i$ responds to a unit change in $y_j$ at frequency $\omega$. +It measures how much $y_i$ responds to a unit change in $y_j$ at frequency $\omega$. -A gain of 0.9 at low frequencies means long-cycle movements in $y_j$ translate almost one-for-one to $y_i$; a gain of 0.3 at high frequencies means short-cycle movements are dampened. +For instance, a gain of 0.9 at low frequencies means long-cycle movements in $y_j$ translate almost one-for-one to $y_i$, and a gain of 0.3 at high frequencies means short-cycle movements are dampened. The **phase** captures lead-lag relationships (in radians): @@ -1273,13 +1285,16 @@ def scalar_kernel(λ_i, ω_grid): """scalar spectral kernel g_i(ω).""" λ_i = complex(λ_i) mod_sq = np.abs(λ_i)**2 - return np.array([(1 - mod_sq) / np.abs(1 - λ_i * np.exp(-1j * ω))**2 for ω in ω_grid]) + return np.array( + [(1 - mod_sq) / np.abs(1 - λ_i * np.exp(-1j * ω))**2 + for ω in ω_grid]) fig, ax = plt.subplots(figsize=(10, 5)) for i, λ_i in enumerate(λ): if np.abs(λ_i) > 0.01: g_i = scalar_kernel(λ_i, ω_grid) - label = f'$\\lambda_{i+1}$ = {λ_i:.4f}' if np.isreal(λ_i) else f'$\\lambda_{i+1}$ = {λ_i:.3f}' + label = f'$\\lambda_{i+1}$ = {λ_i:.4f}' \ + if np.isreal(λ_i) else f'$\\lambda_{i+1}$ = {λ_i:.3f}' ax.semilogy(freq, g_i, label=label, lw=2) ax.set_xlabel(r'frequency $\omega/2\pi$') ax.set_ylabel('$g_i(\\omega)$') From 8fa879a903179b86f92602068e2e2d1b0c4aa351 Mon Sep 17 00:00:00 2001 From: Humphrey Yang Date: Sat, 7 Feb 2026 17:55:09 +1100 Subject: [PATCH 06/19] updates --- lectures/chow_business_cycles.md | 128 +++++++++++++++++-------------- 1 file changed, 69 insertions(+), 59 deletions(-) diff --git a/lectures/chow_business_cycles.md b/lectures/chow_business_cycles.md index f03f2f3cc..664f6d206 100644 --- a/lectures/chow_business_cycles.md +++ b/lectures/chow_business_cycles.md @@ -49,7 +49,7 @@ We will keep coming back to three ideas: - In deterministic models, oscillations correspond to complex eigenvalues of a transition matrix. - In stochastic models, a "cycle" shows up as a local peak in a (univariate) spectral density. -- Spectral peaks depend on eigenvalues, but also on how shocks enter (the covariance matrix $V$) and on how observables load on eigenmodes. +- Spectral peaks depend on eigenvalues, but also on how shocks enter and on how observables load on eigenmodes. Let's start with some standard imports: @@ -214,7 +214,7 @@ for (label, c, v), A in zip(cases, A_list): With weak acceleration ($v=0.1$), the discriminant is positive and the roots are real. -With strong acceleration ($v=0.8$), the discriminant is negative and the roots are complex conjugates, enabling oscillatory dynamics. +With strong acceleration ($v=0.8$), the discriminant is negative and the roots are complex conjugates that enable oscillatory dynamics. ```{code-cell} ipython3 # impulse responses from a one-time unit shock in Y @@ -305,7 +305,7 @@ To study this formally, we need to introduce the stochastic framework. ## A linear system with shocks -We analyze (or reduce to) a first-order linear stochastic system +We analyze a first-order linear stochastic system ```{math} :label: chow_var1 @@ -316,7 +316,7 @@ y_t = A y_{t-1} + u_t, \qquad \mathbb E[u_t u_t^\top] = V, \qquad -\mathbb E[u_t u_{t-k}^\top] = 0 \ (k \neq 0). +\mathbb E[u_t u_{t-k}^\top] = 0, \quad k \neq 0. ``` When the eigenvalues of $A$ are strictly inside the unit circle, the process is covariance stationary and its autocovariances exist. @@ -346,13 +346,9 @@ The second equation is the discrete Lyapunov equation for $\Gamma_0$. {cite:t}`Chow1968` motivates the stochastic analysis with a quote from Ragnar Frisch: > The examples we have discussed ... show that when a [deterministic] economic system gives rise to oscillations, these will most frequently be damped. -> > But in reality the cycles ... are generally not damped. -> > How can the maintenance of the swings be explained? -> > ... One way which I believe is particularly fruitful and promising is to study what would become of the solution of a determinate dynamic system if it were exposed to a stream of erratic shocks ... -> > Thus, by connecting the two ideas: (1) the continuous solution of a determinate dynamic system and (2) the discontinuous shocks intervening and supplying the energy that may maintain the swings—we get a theoretical setup which seems to furnish a rational interpretation of those movements which we have been accustomed to see in our statistical time data. > > — Ragnar Frisch (1933) @@ -365,7 +361,7 @@ We will show that even when eigenvalues are real (no deterministic oscillations) ### Autocovariances in terms of eigenvalues -Let $\lambda_1, \ldots, \lambda_p$ be the (possibly complex) eigenvalues of $A$, assumed distinct, and let $B$ be the matrix whose columns are the corresponding right eigenvectors: +Let $\lambda_1, \ldots, \lambda_p$ be the distinct, possibly complex, eigenvalues of $A$, and let $B$ be the matrix whose columns are the corresponding right eigenvectors: ```{math} :label: chow_eigen_decomp @@ -616,7 +612,7 @@ def peak_condition_factor(r): θ = np.deg2rad(θ_deg) r_grid = np.linspace(0.3, 0.99, 100) -# For each r, compute the implied peak frequency (if it exists) +# For each r, compute the implied peak frequency ω_peak = [] for r in r_grid: factor = peak_condition_factor(r) @@ -632,13 +628,15 @@ period_peak = 2 * np.pi / ω_peak fig, axes = plt.subplots(1, 2, figsize=(12, 4)) axes[0].plot(r_grid, np.rad2deg(ω_peak), lw=2) -axes[0].axhline(θ_deg, ls='--', lw=1.0, color='gray', label=rf'$\theta = {θ_deg}°$') +axes[0].axhline(θ_deg, ls='--', lw=1.0, color='gray', + label=rf'$\theta = {θ_deg}°$') axes[0].set_xlabel('eigenvalue modulus $r$') axes[0].set_ylabel(r'peak frequency $\omega$ (degrees)') axes[0].legend(frameon=False) axes[1].plot(r_grid, period_peak, lw=2) -axes[1].axhline(360/θ_deg, ls='--', lw=1.0, color='gray', label=rf'deterministic period = {360/θ_deg:.1f}') +axes[1].axhline(360/θ_deg, ls='--', lw=1.0, color='gray', + label=rf'deterministic period = {360/θ_deg:.1f}') axes[1].set_xlabel('eigenvalue modulus $r$') axes[1].set_ylabel('peak period') axes[1].legend(frameon=False) @@ -664,29 +662,41 @@ For smaller $r$, the peak frequency can differ substantially from the determinis For *real and positive roots* $\lambda_1, \lambda_2 > 0$, the first-order condition {eq}`chow_hs_foc` cannot be satisfied. -To see why, note that we would need: +To see why, recall that a spectral peak at an interior frequency $\omega \in (0, \pi)$ requires ```{math} -:label: chow_hs_real_impossible - -\cos\omega = \frac{(1 + \lambda_1^2)\lambda_2 + (1 + \lambda_2^2)\lambda_1}{4\lambda_1 \lambda_2} > 1 +\cos\omega = \frac{(1 + \lambda_1^2)\lambda_2 + (1 + \lambda_2^2)\lambda_1}{4\lambda_1 \lambda_2}. ``` -The inequality follows because: +For this to have a solution, we need the right-hand side to lie in $[-1, 1]$. + +But for positive $\lambda_1, \lambda_2$, the numerator exceeds $4\lambda_1\lambda_2$: ```{math} :label: chow_hs_real_proof -(1 + \lambda_1^2)\lambda_2 + (1 + \lambda_2^2)\lambda_1 - 4\lambda_1\lambda_2 = \lambda_1(1-\lambda_2)^2 + \lambda_2(1-\lambda_1)^2 > 0 +(1 + \lambda_1^2)\lambda_2 + (1 + \lambda_2^2)\lambda_1 - 4\lambda_1\lambda_2 = \lambda_1(1-\lambda_2)^2 + \lambda_2(1-\lambda_1)^2. +``` + +The right-hand side is a sum of two non-negative terms (each is a positive number times a square). + +It equals zero only if both $\lambda_1 = 1$ and $\lambda_2 = 1$, which violates the stability condition $|\lambda_i| < 1$. + +For any stable system with real positive roots, this expression is strictly positive, so + +```{math} +:label: chow_hs_real_impossible + +\cos\omega = \frac{(1 + \lambda_1^2)\lambda_2 + (1 + \lambda_2^2)\lambda_1}{4\lambda_1 \lambda_2} > 1, ``` -which is strictly positive for any $\lambda_1, \lambda_2 > 0$. +which is impossible. + +This is a key result: in the Hansen-Samuelson model, *complex roots are necessary* for a spectral peak at interior frequencies. -This is a key result: In the Hansen-Samuelson model, *complex roots are necessary* for a spectral peak at interior frequencies. +The following figure illustrates the difference in spectra between a case with complex roots and a case with real roots ```{code-cell} ipython3 -# Demonstrate: compare spectra with complex vs real roots -# Both cases use valid Hansen-Samuelson parameterizations ω_grid = np.linspace(1e-3, np.pi - 1e-3, 800) V_hs = np.array([[1.0, 0.0], [0.0, 0.0]]) # shock only in first equation @@ -768,22 +778,22 @@ Chow tabulates the values: The peak at $\omega$ slightly below $\pi/8$ (corresponding to periods around 11) is "quite pronounced." -In the following figure, we reproduce this table, but with Python, we can plot a finer grid to find the peak more accurately. +In the following figure, we reproduce this table, but with Python, we can plot a finer grid to find the peak more accurately ```{code-cell} ipython3 -# Reproduce Chow's exact example λ1, λ2 = 0.1, 0.9 w11, w22, w12 = 1.0, 1.0, 0.8 bm1, bm2 = 1.0, -0.01 # Construct the system A_chow_ex = np.diag([λ1, λ2]) + # W is the canonical shock covariance; we need V = B W B^T # For diagonal A with distinct eigenvalues, B = I, so V = W V_chow_ex = np.array([[w11, w12], [w12, w22]]) b_chow_ex = np.array([bm1, bm2]) -# Chow's formula (equation 67) +# Chow's formula def chow_spectrum_formula(ω): term1 = 0.9913 / (1.01 - 0.2 * np.cos(ω)) term2 = 0.001570 / (1.81 - 1.8 * np.cos(ω)) @@ -911,9 +921,7 @@ where $\Sigma$ is the covariance of structural residuals and $M$ is the matrix o Here we take $A$ and $V$ as given and ask what they imply for spectra and cross-spectra. -### Reported shock covariance - -The $6 \times 6$ reduced-form shock covariance matrix $V$ (scaled by $10^{-7}$) is: +The $6 \times 6$ reduced-form shock covariance matrix $V$ (scaled by $10^{-7}$) reported by Chow and Levitan is: ```{math} :label: chow_V_matrix @@ -930,8 +938,6 @@ V = \begin{bmatrix} The sixth row and column are zeros because $y_6$ is an identity (lagged $y_1$). -### Reported eigenvalues - The transition matrix $A$ has six characteristic roots: ```{math} @@ -949,8 +955,6 @@ One root ($\lambda_6$) is theoretically zero because of the identity $y_5 = y_1 The complex conjugate pair $\lambda_{4,5}$ has modulus $|\lambda_4| = \sqrt{0.0761^2 + 0.1125^2} \approx 0.136$. -### Reported eigenvectors - The right eigenvector matrix $B$ (columns are eigenvectors corresponding to $\lambda_1, \ldots, \lambda_6$): ```{math} @@ -999,14 +1003,14 @@ V = np.array([ D_λ = np.diag(λ) A_chow = B @ D_λ @ np.linalg.inv(B) -A_chow = np.real(A_chow) # drop tiny imaginary parts from reported rounding +A_chow = np.real(A_chow) print("eigenvalues of reconstructed A:") print(np.linalg.eigvals(A_chow).round(6)) ``` ### Canonical coordinates -Chow's canonical transformation uses $z_t = B^{-1} y_t$, giving dynamics $z_t = D_\lambda z_{t-1} + e_t$. +Chow and Levitan's canonical transformation uses $z_t = B^{-1} y_t$, giving dynamics $z_t = D_\lambda z_{t-1} + e_t$. Accordingly, the canonical shock covariance is @@ -1021,7 +1025,7 @@ print("diagonal of W:") print(np.diag(W).round(10)) ``` -Chow derives the following closed-form formula for the spectral density matrix: +Chow and Levitan derive the following closed-form formula for the spectral density matrix: ```{math} :label: chow_spectral_eigen @@ -1051,7 +1055,7 @@ freq = np.linspace(1e-4, 0.5, 5000) # cycles/year in [0, 1/2] F_chow = spectral_density_chow(λ, B, W, ω_grid) ``` -Let's plot the univariate spectra of consumption ($y_1$) and equipment plus inventories ($y_2$): +Let's plot the univariate spectra of consumption ($y_1$) and equipment plus inventories ($y_2$) ```{code-cell} ipython3 variable_names = ['$C$', '$I_1$', '$I_2$', '$R_a$', '$Y_1$'] @@ -1092,23 +1096,19 @@ i_peak = np.argmax(S_norm[mask, 1]) f_peak = freq[mask][i_peak] ``` -We reproduce only Figures I.1 and I.2 here. - -Figure I.1 corresponds to consumption and declines monotonically with frequency. +The left panel corresponds to consumption and declines monotonically with frequency. -Figure I.1 illustrates Granger's "typical spectral shape" for macroeconomic time series. +It illustrates Granger's "typical spectral shape" for macroeconomic time series. -Figure I.2 corresponds to equipment plus inventories and shows the clearest (but still very flat) interior-frequency bump. +The right panel corresponds to equipment plus inventories and shows the clearest (but still very flat) interior-frequency bump. Chow and Levitan associate the dominance of very low frequencies in both plots with strong persistence and long-run movements. -They note that very large low-frequency power can arise from eigenvalues extremely close to one, which can occur mechanically when some equations are written in first differences. +Very large low-frequency power can arise from eigenvalues extremely close to one, which occurs mechanically when some equations are written in first differences. -They stress that local peaks are not automatic, because complex roots may have small modulus and multivariate interactions can generate peaks even when all roots are real. +Local peaks are not automatic: complex roots may have small modulus, and multivariate interactions can generate peaks even when all roots are real. -They note that the interior bump in Figure I.2 corresponds to cycles of roughly three years and that the spectrum is nearly flat over cycles between about two and four years. - -Their other spectra in Figures I.3–I.5 (construction, the long rate, and private-domestic GNP) decline monotonically with frequency in the same calibration. +The interior bump in the right panel corresponds to cycles of roughly three years, with the spectrum nearly flat over cycles between about two and four years. (This discussion follows Section II of {cite}`ChowLevitan1969`.) @@ -1128,7 +1128,7 @@ The **squared coherence** measures linear association at frequency $\omega$: R^2_{ij}(\omega) = \frac{|f_{ij}(\omega)|^2}{f_{ii}(\omega) f_{jj}(\omega)} \in [0, 1]. ``` -Think of coherence as the frequency-domain analogue of $R^2$: it measures how much of the variance of $y_i$ at frequency $\omega$ can be "explained" by $y_j$ at the same frequency. +Coherence measures how much of the variance of $y_i$ at frequency $\omega$ can be "explained" by $y_j$ at the same frequency. High coherence means the two series move together tightly at that frequency. @@ -1194,7 +1194,7 @@ The gain and coherence patterns differ across components (Figures II.1–II.2 of - Gain is about 0.9 at very low frequencies but falls below 0.4 for cycles shorter than four years. - This is evidence that short-cycle income movements translate less into consumption than long-cycle movements, consistent with permanent-income interpretations. - Coherence remains high throughout. -- For Equipment plus inventories vs private-domestic GNP (right panel): +- Equipment plus inventories vs private-domestic GNP (right panel): - Gain *rises* with frequency, exceeding 0.5 for short cycles. - This is the frequency-domain signature of acceleration and volatile short-run inventory movements. @@ -1247,7 +1247,7 @@ ax.axhline(0, lw=0.8) paper_frequency_axis(ax) ax.set_ylabel('phase difference in cycles') ax.set_ylim([-0.25, 0.25]) -ax.set_yticks([-0.25, -0.20, -0.15, -0.10, -0.05, 0, 0.05, 0.10, 0.15, 0.20, 0.25]) +ax.set_yticks(np.arange(-0.25, 0.3, 0.05), minor=True) ax.legend(frameon=False) plt.tight_layout() plt.show() @@ -1280,6 +1280,8 @@ g_i(\omega) = \frac{1 - \lambda_i^2}{1 + \lambda_i^2 - 2\lambda_i \cos\omega}. Each observable spectral density is a linear combination of these kernels (plus cross-terms). +Below, we plot the scalar kernels for each eigenvalue to see how they shape the overall spectra + ```{code-cell} ipython3 def scalar_kernel(λ_i, ω_grid): """scalar spectral kernel g_i(ω).""" @@ -1307,7 +1309,7 @@ plt.show() The figure reveals how eigenvalue magnitude shapes spectral contributions: -- *Near-unit eigenvalues* ($\lambda_1, \lambda_2 \approx 1$) produce kernels sharply peaked at low frequencies—these drive the strong low-frequency power seen in the spectra above. +- *Near-unit eigenvalues* ($\lambda_1, \lambda_2 \approx 1$) produce kernels sharply peaked at low frequencies as these drive the strong low-frequency power seen in the spectra above. - *The moderate eigenvalue* ($\lambda_3 \approx 0.48$) contributes a flatter component that spreads power more evenly across frequencies. - *The complex pair* ($\lambda_{4,5}$) has such small modulus ($|\lambda_{4,5}| \approx 0.136$) that its kernel is nearly flat, which is too weak to generate a pronounced interior peak. @@ -1321,10 +1323,6 @@ The complex pair, despite enabling oscillatory dynamics in principle, has insuff The acceleration principle receives strong empirical support: the negative coefficient on lagged output in investment equations is a robust finding across datasets. -- This matters because, in a model consisting only of demand equations with simple distributed lags, the transition matrix has real positive roots under natural sign restrictions—ruling out prolonged oscillations. - -- Acceleration introduces the possibility of complex roots, which are necessary for oscillatory dynamics in deterministic systems. - The relationship between eigenvalues and spectral peaks is more subtle than it first appears: - Complex roots guarantee oscillatory autocovariances, but they are neither necessary nor sufficient for a pronounced spectral peak. @@ -1333,8 +1331,6 @@ The relationship between eigenvalues and spectral peaks is more subtle than it f - But in general multivariate systems, even real roots can produce peaks through the interaction of shocks and eigenvector loadings. -Chow argues that understanding business cycles requires an integrated view of deterministic dynamics and random shocks. - {cite:t}`ChowLevitan1969` demonstrate what these objects look like in a calibrated system: strong low-frequency power from near-unit eigenvalues, frequency-dependent gains and coherences, and lead–lag relations that vary with cycle length. Their results are consistent with Granger's "typical spectral shape" for economic time series. @@ -1357,6 +1353,9 @@ Use the same $v$ values as in the main text: $v \in \{0.2, 0.4, 0.6, 0.8, 0.95\} :class: dropdown ``` +Here is one solution: + + ```{code-cell} ipython3 v_grid_ex1 = [0.2, 0.4, 0.6, 0.8, 0.95] c_ex1 = 0.6 @@ -1422,6 +1421,8 @@ Verify spectral peak condition {eq}`chow_hs_peak_condition` numerically for the :class: dropdown ``` +Here is one solution: + ```{code-cell} ipython3 θ_ex = np.pi / 3 # 60 degrees r_grid = np.linspace(0.3, 0.99, 50) @@ -1449,6 +1450,7 @@ for r in r_grid: F_ex = spectral_density_var1(A_ex, V_hs_ex, ω_grid_ex) f11 = np.real(F_ex[:, 0, 0]) i_max = np.argmax(f11) + # Only count as a peak if it's not at the boundary if 5 < i_max < len(ω_grid_ex) - 5: ω_numerical.append(ω_grid_ex[i_max]) @@ -1468,7 +1470,7 @@ axes[0].set_xlabel('eigenvalue modulus $r$') axes[0].set_ylabel(r'peak frequency $\omega^*/\pi$') axes[0].legend(frameon=False) -# Plot the factor (1+r²)/2r to show when peaks are valid +# Plot the factor (1+r^2)/2r to show when peaks are valid axes[1].plot(r_grid, (1 + r_grid**2) / (2 * r_grid), lw=2) axes[1].axhline(1 / np.cos(θ_ex), ls='--', lw=1.0, color='red', label=f'threshold = 1/cos({np.rad2deg(θ_ex):.0f}°) = {1/np.cos(θ_ex):.2f}') @@ -1483,7 +1485,7 @@ plt.show() valid_mask = ~np.isnan(ω_theory) if valid_mask.any(): r_threshold = r_grid[valid_mask][0] - print(f"Peak exists for r ≥ {r_threshold:.2f}") + print(f"Peak exists for r >= {r_threshold:.2f}") ``` The theoretical and numerical peak frequencies match closely. @@ -1507,6 +1509,8 @@ When does the interior-frequency peak appear, and how does its location change? :class: dropdown ``` +Here is one solution: + ```{code-cell} ipython3 A_ex3 = np.diag([0.1, 0.9]) b_ex3 = np.array([1.0, -0.01]) @@ -1530,7 +1534,7 @@ plt.show() threshold_idx = np.where(~np.isnan(peak_periods))[0] if len(threshold_idx) > 0: - print(f"interior peak appears when correlation ≥ {corr_grid[threshold_idx[0]]:.2f}") + print(f"interior peak appears when correlation >= {corr_grid[threshold_idx[0]]:.2f}") ``` The interior peak appears only when the shock correlation exceeds a threshold. @@ -1555,6 +1559,9 @@ Verify that both methods give the same result. :class: dropdown ``` +Here is one solution: + + ```{code-cell} ipython3 from scipy.linalg import solve_discrete_lyapunov @@ -1601,6 +1608,9 @@ Modify the Chow-Levitan model by changing $\lambda_3$ from $0.4838$ to $0.95$. :class: dropdown ``` +Here is one solution: + + ```{code-cell} ipython3 # Modify λ_3 and reconstruct the transition matrix λ_modified = λ.copy() From 11fa74a02f69214cc6afdb551e43235146cd2a2f Mon Sep 17 00:00:00 2001 From: thomassargent30 Date: Sun, 8 Feb 2026 11:52:43 +0800 Subject: [PATCH 07/19] Tom's Feb 8 edits of the Chow lecture --- lectures/_static/quant-econ.bib | 23 +++++++++++++++++++++++ lectures/chow_business_cycles.md | 20 ++++++++++---------- 2 files changed, 33 insertions(+), 10 deletions(-) diff --git a/lectures/_static/quant-econ.bib b/lectures/_static/quant-econ.bib index 55b678f94..21e6feacf 100644 --- a/lectures/_static/quant-econ.bib +++ b/lectures/_static/quant-econ.bib @@ -4,6 +4,29 @@ ### +@incollection{slutsky:1927, + address = {Moscow}, + author = {Slutsky, Eugen}, + booktitle = {Problems of Economic Conditions}, + date-added = {2021-02-16 14:44:03 -0600}, + date-modified = {2021-02-16 14:44:03 -0600}, + publisher = {The Conjuncture Institute}, + title = {The Summation of Random Causes as the Source of Cyclic Processes}, + volume = {3}, + year = {1927} +} + +@incollection{frisch33, + author = {Ragar Frisch}, + booktitle = {Economic Essays in Honour of Gustav Cassel}, + date-added = {2015-01-09 21:08:15 +0000}, + date-modified = {2015-01-09 21:08:15 +0000}, + pages = {171-205}, + publisher = {Allen and Unwin}, + title = {Propagation Problems and Impulse Problems in Dynamic Economics}, + year = {1933} +} + @article{harsanyi1968games, title={Games with Incomplete Information Played by ``{B}ayesian'' Players, {I}--{III} Part {II}. {B}ayesian Equilibrium Points}, author={Harsanyi, John C.}, diff --git a/lectures/chow_business_cycles.md b/lectures/chow_business_cycles.md index 664f6d206..5f16fca14 100644 --- a/lectures/chow_business_cycles.md +++ b/lectures/chow_business_cycles.md @@ -29,12 +29,12 @@ kernelspec: ## Overview -This lecture studies two classic papers by Gregory Chow on business cycles in linear dynamic models: +This lecture studies two classic papers by Gregory Chow: -- {cite:t}`Chow1968`: empirical evidence for the acceleration principle, why acceleration enables oscillations, and when spectral peaks arise in stochastic systems -- {cite:t}`ChowLevitan1969`: spectral analysis of a calibrated US macroeconometric model, showing gains, coherences, and lead–lag patterns +- {cite:t}`Chow1968` presents empirical evidence for the acceleration principle, describes how acceleration promotes oscillations, and analyzes conditions for the emergence of spectral peaks in linear difference equation subjected to random shocks +- {cite:t}`ChowLevitan1969` presents a spectral analysis of a calibrated US macroeconometric model and teaches about spectral gains, coherences, and lead–lag patterns -These papers connect ideas in the following lectures: +These papers are related to ideas in the following lectures: - The multiplier–accelerator mechanism in {doc}`samuelson` - Linear stochastic difference equations and autocovariances in {doc}`linear_models` @@ -43,11 +43,11 @@ These papers connect ideas in the following lectures: {cite:t}`Chow1968` builds on earlier empirical work testing the acceleration principle on US investment data. -We begin with that empirical foundation before developing the theoretical framework. +We start with that empirical evidence before developing the theoretical framework. -We will keep coming back to three ideas: +We will keep returning to three ideas: -- In deterministic models, oscillations correspond to complex eigenvalues of a transition matrix. +- In deterministic models, oscillations indicate complex eigenvalues of a transition matrix. - In stochastic models, a "cycle" shows up as a local peak in a (univariate) spectral density. - Spectral peaks depend on eigenvalues, but also on how shocks enter and on how observables load on eigenmodes. @@ -299,7 +299,7 @@ This illustrates that acceleration creates complex eigenvalues, which are necess But what happens when we add random shocks? -Frisch's insight was that even damped oscillations can be "maintained" when the system is continuously perturbed by random disturbances. +An insight of Ragnar Frisch {cite}`frisch33` was that damped oscillations can be "maintained" when the system is continuously perturbed by random disturbances. To study this formally, we need to introduce the stochastic framework. @@ -351,7 +351,7 @@ The second equation is the discrete Lyapunov equation for $\Gamma_0$. > ... One way which I believe is particularly fruitful and promising is to study what would become of the solution of a determinate dynamic system if it were exposed to a stream of erratic shocks ... > Thus, by connecting the two ideas: (1) the continuous solution of a determinate dynamic system and (2) the discontinuous shocks intervening and supplying the energy that may maintain the swings—we get a theoretical setup which seems to furnish a rational interpretation of those movements which we have been accustomed to see in our statistical time data. > -> — Ragnar Frisch (1933) +> — Ragnar Frisch (1933) {cite}`frisch33` Chow's main insight is that oscillations in the deterministic system are *neither necessary nor sufficient* for producing "cycles" in the stochastic system. @@ -840,7 +840,7 @@ The peak appears at $\omega/\pi \approx 0.10$, which corresponds to a cycle leng ### The Slutsky connection -Chow connects this result to Slutsky's well-known finding that taking moving averages of a random series can generate cycles. +Chow connects this result to Slutsky's {cite}`slutsky:1927` finding that moving averages of a random series have recurrent cycles. The VAR(1) model can be written as an infinite moving average: From c78fe1890641c27421d1426a086c321fb0826fbf Mon Sep 17 00:00:00 2001 From: Humphrey Yang Date: Sun, 8 Feb 2026 16:19:17 +1100 Subject: [PATCH 08/19] update --- lectures/chow_business_cycles.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/lectures/chow_business_cycles.md b/lectures/chow_business_cycles.md index 5f16fca14..d8a424875 100644 --- a/lectures/chow_business_cycles.md +++ b/lectures/chow_business_cycles.md @@ -1070,8 +1070,9 @@ def paper_frequency_axis(ax): ax.set_xlabel(r'frequency $\omega/2\pi$') # Normalized spectra (areas set to 1) -S = np.real(np.diagonal(F_chow, axis1=1, axis2=2))[:, :5] # y1..y5 -areas = np.trapezoid(S, freq, axis=0) +S = np.real(np.diagonal(F_chow, axis1=1, axis2=2))[:, :5] +df = np.diff(freq) +areas = np.sum(0.5 * (S[1:] + S[:-1]) * df[:, None], axis=0) S_norm = S / areas mask = freq >= 0.0 @@ -1355,7 +1356,6 @@ Use the same $v$ values as in the main text: $v \in \{0.2, 0.4, 0.6, 0.8, 0.95\} Here is one solution: - ```{code-cell} ipython3 v_grid_ex1 = [0.2, 0.4, 0.6, 0.8, 0.95] c_ex1 = 0.6 @@ -1380,7 +1380,9 @@ for v in v_grid_ex1: # spectrum (right panel) F = spectral_density_var1(A, V_ex1, ω_grid_ex1) f11 = np.real(F[:, 0, 0]) - f11_norm = f11 / np.trapezoid(f11, freq_ex1) + df = np.diff(freq_ex1) + area = np.sum(0.5 * (f11[1:] + f11[:-1]) * df) + f11_norm = f11 / area axes[1].plot(freq_ex1, f11_norm, lw=2, label=f'$v={v}$') axes[0].axhline(0, lw=0.8, color='gray') @@ -1561,7 +1563,6 @@ Verify that both methods give the same result. Here is one solution: - ```{code-cell} ipython3 from scipy.linalg import solve_discrete_lyapunov @@ -1610,7 +1611,6 @@ Modify the Chow-Levitan model by changing $\lambda_3$ from $0.4838$ to $0.95$. Here is one solution: - ```{code-cell} ipython3 # Modify λ_3 and reconstruct the transition matrix λ_modified = λ.copy() From ddd82516eaa447c3dfe89b66f839331406c6571c Mon Sep 17 00:00:00 2001 From: Humphrey Yang Date: Mon, 9 Feb 2026 16:28:32 +1100 Subject: [PATCH 09/19] minor updates --- lectures/_static/quant-econ.bib | 10 + lectures/_toc.yml | 1 + lectures/measurement_models.md | 810 ++++++++++++++++++++++++++++++++ 3 files changed, 821 insertions(+) create mode 100644 lectures/measurement_models.md diff --git a/lectures/_static/quant-econ.bib b/lectures/_static/quant-econ.bib index 21e6feacf..bd35b4809 100644 --- a/lectures/_static/quant-econ.bib +++ b/lectures/_static/quant-econ.bib @@ -2207,6 +2207,16 @@ @book{Sargent1987 year = {1987} } +@article{Sargent1989, + author = {Sargent, Thomas J}, + title = {Two Models of Measurements and the Investment Accelerator}, + journal = {Journal of Political Economy}, + volume = {97}, + number = {2}, + pages = {251--287}, + year = {1989} +} + @article{SchechtmanEscudero1977, author = {Schechtman, Jack and Escudero, Vera L S}, journal = {Journal of Economic Theory}, diff --git a/lectures/_toc.yml b/lectures/_toc.yml index aeaab36b5..098deaaa7 100644 --- a/lectures/_toc.yml +++ b/lectures/_toc.yml @@ -61,6 +61,7 @@ parts: - file: wealth_dynamics - file: kalman - file: kalman_2 + - file: measurement_models - caption: Search numbered: true chapters: diff --git a/lectures/measurement_models.md b/lectures/measurement_models.md new file mode 100644 index 000000000..6b640aece --- /dev/null +++ b/lectures/measurement_models.md @@ -0,0 +1,810 @@ +--- +jupytext: + text_representation: + extension: .md + format_name: myst + format_version: 0.13 + jupytext_version: 1.17.1 +kernelspec: + display_name: Python 3 (ipykernel) + language: python + name: python3 +--- + +(sargent_measurement_models)= +```{raw} jupyter + +``` + +# Two Models of Measurements and the Investment Accelerator + +```{contents} Contents +:depth: 2 +``` + +## Overview + +{cite:t}`Sargent1989` studies what happens to an econometrician's +inferences about economic dynamics when observed data are contaminated +by measurement error. + +The setting is a {doc}`permanent income ` economy in which the +investment accelerator, the mechanism studied in {doc}`samuelson` and +{doc}`chow_business_cycles`, drives business cycle fluctuations. + +Sargent specifies a {doc}`linear state space model ` for the +true economy and then considers two ways of extracting information from +noisy measurements: + +- Model 1 applies a {doc}`Kalman filter ` directly to + raw (noisy) observations. +- Model 2 first filters the data to remove measurement error, + then computes dynamics from the filtered series. + +The two models produce different Wold representations and +forecast-error-variance decompositions, even though they describe +the same underlying economy. + +In this lecture we reproduce all numbered tables and figures from +{cite}`Sargent1989` while studying the underlying mechanisms in the paper. + +We use the following imports and precision settings for tables: + +```{code-cell} ipython3 +import numpy as np +import pandas as pd +import matplotlib.pyplot as plt +from scipy import linalg + +np.set_printoptions(precision=4, suppress=True) +``` + +## Model Setup + +The true economy is a version of the permanent income model +(see {doc}`perm_income`) in which a representative consumer +chooses consumption $c_t$ and capital accumulation $\Delta k_t$ +to maximize expected discounted utility subject to a budget +constraint. + +Assume that the discount factor satisfies $\beta f = 1$ and that the +productivity shock $\theta_t$ is white noise. + +The optimal decision rules reduce the true system to + +```{math} +\begin{aligned} +k_{t+1} &= k_t + f^{-1}\theta_t, \\ +y_{n,t} &= (f-1)k_t + \theta_t, \\ +c_t &= (f-1)k_t + (1-f^{-1})\theta_t, \\ +\Delta k_t &= f^{-1}\theta_t. +\end{aligned} +``` + +with $f = 1.05$ and $\theta_t \sim \mathcal{N}(0, 1)$. + +Here $k_t$ is capital, $y_{n,t}$ is national income, $c_t$ is consumption, +and $\Delta k_t$ is net investment. + +Notice the investment accelerator at work: because $\Delta k_t = f^{-1}\theta_t$, +investment responds only to the innovation $\theta_t$, not to the level of +capital. + +This is the same mechanism that {cite:t}`Chow1968` documented +empirically (see {doc}`chow_business_cycles`). + +We can cast this as a {doc}`linear state space model ` by +defining state and observable vectors + +```{math} +x_t = \begin{bmatrix} k_t \\ \theta_t \end{bmatrix}, +\qquad +z_t = \begin{bmatrix} y_{n,t} \\ c_t \\ \Delta k_t \end{bmatrix}, +``` + +and matrices + +```{math} +A = \begin{bmatrix} +1 & f^{-1} \\ +0 & 0 +\end{bmatrix}, +\qquad +C = \begin{bmatrix} +f-1 & 1 \\ +f-1 & 1-f^{-1} \\ +0 & f^{-1} +\end{bmatrix}. +``` + +The econometrician does not observe $z_t$ directly but instead +sees $\bar z_t = z_t + v_t$, where $v_t$ is a vector of measurement +errors. + +Measurement errors are AR(1): + +```{math} +v_t = D v_{t-1} + \eta_t, +``` + +with diagonal + +```{math} +D = \operatorname{diag}(0.6, 0.7, 0.3), +``` + +and innovation standard deviations $(0.05, 0.035, 0.65)$. + +```{code-cell} ipython3 +f = 1.05 +β = 1 / f + +A = np.array([ + [1.0, 1.0 / f], + [0.0, 0.0] +]) + +C = np.array([ + [f - 1.0, 1.0], + [f - 1.0, 1.0 - 1.0 / f], + [0.0, 1.0 / f] +]) + +Q = np.array([ + [0.0, 0.0], + [0.0, 1.0] +]) + +ρ = np.array([0.6, 0.7, 0.3]) +D = np.diag(ρ) + +# Innovation std. devs shown in Table 1 +σ_η = np.array([0.05, 0.035, 0.65]) +Σ_η = np.diag(σ_η**2) + +# Unconditional covariance of measurement errors v_t +R = np.diag((σ_η / np.sqrt(1.0 - ρ**2))**2) + +print(f"f = {f}, β = 1/f = {β:.6f}") +print("\nA ="); display(pd.DataFrame(A)) +print("C ="); display(pd.DataFrame(C)) +print("D ="); display(pd.DataFrame(D)) +``` + +## Kalman Filter + +Both models require a steady-state {doc}`Kalman filter `. + +The function below iterates on the Riccati equation until convergence, +returning the Kalman gain $K$, the state covariance $S$, and the +innovation covariance $V$. + +```{code-cell} ipython3 +def steady_state_kalman(A, C_obs, Q, R, W=None, tol=1e-13, max_iter=200_000): + """ + Solve steady-state Kalman equations for + x_{t+1} = A x_t + w_{t+1} + y_t = C_obs x_t + v_t + with cov(w)=Q, cov(v)=R, cov(w,v)=W. + """ + n = A.shape[0] + m = C_obs.shape[0] + if W is None: + W = np.zeros((n, m)) + + S = Q.copy() + for _ in range(max_iter): + V = C_obs @ S @ C_obs.T + R + K = (A @ S @ C_obs.T + W) @ np.linalg.inv(V) + S_new = Q + A @ S @ A.T - K @ V @ K.T + + if np.max(np.abs(S_new - S)) < tol: + S = S_new + break + S = S_new + + V = C_obs @ S @ C_obs.T + R + K = (A @ S @ C_obs.T + W) @ np.linalg.inv(V) + return K, S, V +``` + +## Table 2: True Impulse Responses + +Before introducing measurement error, we verify the impulse response of +the true system to a unit shock $\theta_0 = 1$. + +The response shows the investment accelerator clearly: the full impact on +net income $y_n$ occurs at lag 0, while consumption adjusts by only +$1 - f^{-1} \approx 0.048$ and investment absorbs the remainder. + +From lag 1 onward the economy is in its new steady state. + +```{code-cell} ipython3 +def table2_irf(A, C, n_lags=6): + x = np.array([0.0, 1.0]) # k_0 = 0, theta_0 = 1 + rows = [] + for j in range(n_lags): + y_n, c, d_k = C @ x + rows.append([j, y_n, c, d_k]) + x = A @ x + return np.array(rows) + +rep_table2 = table2_irf(A, C, n_lags=6) + +pd.DataFrame( + np.round(rep_table2[:, 1:], 4), + columns=[r'$y_n$', r'$c$', r'$\Delta k$'], + index=pd.Index(range(6), name='Lag') +) +``` + +## Model 1 (Raw Measurements): Tables 3 and 4 + +Model 1 treats the raw measured series $\bar z_t$ as the observables and +applies a Kalman filter to extract the state. + +Because the measurement errors $v_t$ are serially correlated, Sargent +quasi-differences the observation equation to obtain an innovation form +with serially uncorrelated errors. + +The transformed observation equation is + +```{math} +\bar z_t - D \bar z_{t-1} = (CA - DC)x_{t-1} + C w_t + \eta_t. +``` + +Hence + +```{math} +\bar C = CA - DC, \quad R_1 = CQC^\top + R, \quad W_1 = QC^\top. +``` + +```{code-cell} ipython3 +C_bar = C @ A - D @ C +R1 = C @ Q @ C.T + R +W1 = Q @ C.T + +K1, S1, V1 = steady_state_kalman(A, C_bar, Q, R1, W1) +``` + +With the Kalman gain in hand, we can derive the Wold moving-average +representation for the measured data. + +This representation tells us how measured $y_n$, $c$, and $\Delta k$ +respond over time to the orthogonalized innovations in the +innovation covariance matrix $V_1$. + +To recover the Wold representation, define the augmented state + +```{math} +r_t = \begin{bmatrix} \hat x_{t-1} \\ z_{t-1} \end{bmatrix}, +``` + +with dynamics + +```{math} +r_{t+1} = F_1 r_t + G_1 u_t, +\qquad +z_t = H_1 r_t + u_t, +``` + +where + +```{math} +F_1 = +\begin{bmatrix} +A & 0 \\ +\bar C & D +\end{bmatrix}, +\quad +G_1 = +\begin{bmatrix} +K_1 \\ +I +\end{bmatrix}, +\quad +H_1 = [\bar C \;\; D]. +``` + +```{code-cell} ipython3 +F1 = np.block([ + [A, np.zeros((2, 3))], + [C_bar, D] +]) +G1 = np.vstack([K1, np.eye(3)]) +H1 = np.hstack([C_bar, D]) + + +def measured_wold_coeffs(F, G, H, n_terms=25): + psi = [np.eye(3)] + Fpow = np.eye(F.shape[0]) + for _ in range(1, n_terms): + psi.append(H @ Fpow @ G) + Fpow = Fpow @ F + return psi + + +def fev_contributions(psi, V, n_horizons=20): + """ + Returns contrib[var, shock, h-1] = contribution at horizon h. + """ + P = linalg.cholesky(V, lower=True) + out = np.zeros((3, 3, n_horizons)) + for h in range(1, n_horizons + 1): + acc = np.zeros((3, 3)) + for j in range(h): + T = psi[j] @ P + acc += T**2 + out[:, :, h - 1] = acc + return out + + +psi1 = measured_wold_coeffs(F1, G1, H1, n_terms=40) +resp1 = np.array([psi1[j] @ linalg.cholesky(V1, lower=True) for j in range(14)]) +decomp1 = fev_contributions(psi1, V1, n_horizons=20) +``` + +Table 3 reports the forecast-error-variance decomposition for Model 1. + +Each panel shows the cumulative contribution of one orthogonalized +innovation to the forecast-error variance of $y_n$, $c$, and $\Delta k$ +at horizons 1 through 20. + +```{code-cell} ipython3 +horizons = np.arange(1, 21) +cols = [r'$y_n$', r'$c$', r'$\Delta k$'] + +def fev_table(decomp, shock_idx, horizons): + return pd.DataFrame( + np.round(decomp[:, shock_idx, :].T, 4), + columns=cols, + index=pd.Index(horizons, name='Horizon') + ) + +print("Table 3A: Contribution of innovation 1") +display(fev_table(decomp1, 0, horizons)) + +print("Table 3B: Contribution of innovation 2") +display(fev_table(decomp1, 1, horizons)) + +print("Table 3C: Contribution of innovation 3") +display(fev_table(decomp1, 2, horizons)) +``` + +The innovation covariance matrix $V_1$ is: + +```{code-cell} ipython3 +labels = [r'$y_n$', r'$c$', r'$\Delta k$'] +pd.DataFrame(np.round(V1, 4), index=labels, columns=labels) +``` + +Table 4 reports the orthogonalized Wold impulse responses for Model 1 +at lags 0 through 13. + +```{code-cell} ipython3 +lags = np.arange(14) + +def wold_response_table(resp, shock_idx, lags): + return pd.DataFrame( + np.round(resp[:, :, shock_idx], 4), + columns=cols, + index=pd.Index(lags, name='Lag') + ) + +print("Table 4A: Response to innovation in y_n") +display(wold_response_table(resp1, 0, lags)) + +print("Table 4B: Response to innovation in c") +display(wold_response_table(resp1, 1, lags)) + +print("Table 4C: Response to innovation in Δk") +display(wold_response_table(resp1, 2, lags)) +``` + +## Model 2 (Filtered Measurements): Tables 5 and 6 + +Model 2 takes a different approach: instead of working with the raw data, +the econometrician first applies the Kalman filter from Model 1 to +strip out measurement error and then treats the filtered estimates +$\hat z_t = C \hat x_t$ as if they were the true observations. + +A second Kalman filter is then applied to the filtered series. + +The state noise covariance for this second filter is + +```{math} +Q_2 = K_1 V_1 K_1^\top, +``` + +We solve a second Kalman system with tiny measurement noise to regularize the +near-singular covariance matrix. + +```{code-cell} ipython3 +Q2 = K1 @ V1 @ K1.T +ε = 1e-7 + +K2, S2, V2 = steady_state_kalman(A, C, Q2, ε * np.eye(3)) + + +def filtered_wold_coeffs(A, C, K, n_terms=25): + psi = [np.eye(3)] + Apow = np.eye(2) + for _ in range(1, n_terms): + psi.append(C @ Apow @ K) + Apow = Apow @ A + return psi + + +psi2 = filtered_wold_coeffs(A, C, K2, n_terms=40) +resp2 = np.array([psi2[j] @ linalg.cholesky(V2, lower=True) for j in range(14)]) +decomp2 = fev_contributions(psi2, V2, n_horizons=20) +``` + +Table 5 is the analogue of Table 3 for Model 2. + +Because the filtered data are nearly noiseless, the second and third +innovations contribute very little to forecast-error variance. + +```{code-cell} ipython3 +print("Table 5A: Contribution of innovation 1") +display(fev_table(decomp2, 0, horizons)) + +print("Table 5B: Contribution of innovation 2 (×10³)") +display(pd.DataFrame( + np.round(decomp2[:, 1, :].T * 1e3, 4), + columns=cols, + index=pd.Index(horizons, name='Horizon') +)) + +print("Table 5C: Contribution of innovation 3 (×10⁶)") +display(pd.DataFrame( + np.round(decomp2[:, 2, :].T * 1e6, 4), + columns=cols, + index=pd.Index(horizons, name='Horizon') +)) +``` + +The innovation covariance matrix $V_2$ for Model 2 is: + +```{code-cell} ipython3 +pd.DataFrame(np.round(V2, 4), index=labels, columns=labels) +``` + +Table 6 reports the orthogonalized Wold impulse responses for Model 2. + +```{code-cell} ipython3 +print("Table 6A: Response to innovation in y_n") +display(wold_response_table(resp2, 0, lags)) + +print("Table 6B: Response to innovation in c") +display(wold_response_table(resp2, 1, lags)) + +print("Table 6C: Response to innovation in Δk (×10³)") +display(pd.DataFrame( + np.round(resp2[:, :, 2] * 1e3, 4), + columns=cols, + index=pd.Index(lags, name='Lag') +)) +``` + +## Simulation: Figures 1 through 9 and Table 7 + +The tables above characterize population moments of the two models. + +To see how the models perform on a finite sample, Sargent simulates +80 periods of true, measured, and filtered data and reports +covariance and correlation matrices (Table 7) together with +time-series plots (Figures 1 through 9). + +We replicate these objects below. + +```{code-cell} ipython3 +def simulate_series(seed=7909, T=80, k0=10.0): + """ + Simulate true, measured, and filtered series for Figures 1--9. + """ + rng = np.random.default_rng(seed) + + # True state/observables + θ = rng.normal(0.0, 1.0, size=T) + k = np.empty(T + 1) + k[0] = k0 + + y = np.empty(T) + c = np.empty(T) + dk = np.empty(T) + + for t in range(T): + x_t = np.array([k[t], θ[t]]) + y[t], c[t], dk[t] = C @ x_t + k[t + 1] = k[t] + (1.0 / f) * θ[t] + + # Measured data with AR(1) errors + v_prev = np.zeros(3) + v = np.empty((T, 3)) + for t in range(T): + η_t = rng.multivariate_normal(np.zeros(3), Σ_η) + v_prev = D @ v_prev + η_t + v[t] = v_prev + + z_meas = np.column_stack([y, c, dk]) + v + + # Filtered data via Model 1 transformed filter + xhat_prev = np.array([k0, 0.0]) + z_prev = np.zeros(3) + z_filt = np.empty((T, 3)) + k_filt = np.empty(T) + + for t in range(T): + z_bar_t = z_meas[t] - D @ z_prev + u_t = z_bar_t - C_bar @ xhat_prev + xhat_t = A @ xhat_prev + K1 @ u_t + + z_filt[t] = C @ xhat_t + k_filt[t] = xhat_t[0] + + xhat_prev = xhat_t + z_prev = z_meas[t] + + out = { + "y_true": y, "c_true": c, "dk_true": dk, "k_true": k[:-1], + "y_meas": z_meas[:, 0], "c_meas": z_meas[:, 1], "dk_meas": z_meas[:, 2], + "y_filt": z_filt[:, 0], "c_filt": z_filt[:, 1], "dk_filt": z_filt[:, 2], "k_filt": k_filt + } + return out + + +sim = simulate_series(seed=7909, T=80, k0=10.0) +``` + +```{code-cell} ipython3 +def plot_true_vs_other(t, true_series, other_series, other_label, ylabel=""): + fig, ax = plt.subplots(figsize=(8, 3.6)) + ax.plot(t, true_series, lw=2, color="black", label="true") + ax.plot(t, other_series, lw=2, ls="--", color="#1f77b4", label=other_label) + ax.set_xlabel("time", fontsize=11) + ax.set_ylabel(ylabel, fontsize=11) + ax.legend(loc="best") + ax.grid(alpha=0.3) + plt.tight_layout() + plt.show() + + +t = np.arange(1, 81) +``` + +```{code-cell} ipython3 +--- +mystnb: + figure: + caption: True and measured consumption + name: fig-true-measured-consumption + image: + alt: True and measured consumption plotted over 80 time periods +--- +plot_true_vs_other(t, sim["c_true"], sim["c_meas"], "measured", ylabel="consumption") +``` + +```{code-cell} ipython3 +--- +mystnb: + figure: + caption: True and measured investment + name: fig-true-measured-investment + image: + alt: True and measured investment plotted over 80 time periods +--- +plot_true_vs_other(t, sim["dk_true"], sim["dk_meas"], "measured", ylabel="investment") +``` + +```{code-cell} ipython3 +--- +mystnb: + figure: + caption: True and measured income + name: fig-true-measured-income + image: + alt: True and measured income plotted over 80 time periods +--- +plot_true_vs_other(t, sim["y_true"], sim["y_meas"], "measured", ylabel="income") +``` + +Figures 1 through 3 show how measurement error distorts each series. + +Investment (Figure 2) is hit hardest because its measurement error +has the largest innovation variance ($\sigma_\eta = 0.65$). + +Figures 4 through 7 compare the true series with the Kalman-filtered +estimates from Model 1. + +The filter removes much of the measurement +noise, recovering series that track the truth closely. + +```{code-cell} ipython3 +--- +mystnb: + figure: + caption: True and filtered consumption + name: fig-true-filtered-consumption + image: + alt: True and filtered consumption plotted over 80 time periods +--- +plot_true_vs_other(t, sim["c_true"], sim["c_filt"], "filtered", ylabel="consumption") +``` + +```{code-cell} ipython3 +--- +mystnb: + figure: + caption: True and filtered investment + name: fig-true-filtered-investment + image: + alt: True and filtered investment plotted over 80 time periods +--- +plot_true_vs_other(t, sim["dk_true"], sim["dk_filt"], "filtered", ylabel="investment") +``` + +```{code-cell} ipython3 +--- +mystnb: + figure: + caption: True and filtered income + name: fig-true-filtered-income + image: + alt: True and filtered income plotted over 80 time periods +--- +plot_true_vs_other(t, sim["y_true"], sim["y_filt"], "filtered", ylabel="income") +``` + +```{code-cell} ipython3 +--- +mystnb: + figure: + caption: True and filtered capital stock + name: fig-true-filtered-capital + image: + alt: True and filtered capital stock plotted over 80 time periods +--- +plot_true_vs_other(t, sim["k_true"], sim["k_filt"], "filtered", ylabel="capital stock") +``` + +Figures 8 and 9 plot the national income identity residual +$c_t + \Delta k_t - y_{n,t}$. + +In the true model this identity holds exactly. + +For measured data (Figure 8) the residual is non-zero because +independent measurement errors break the accounting identity. + +For filtered data (Figure 9) the Kalman filter approximately +restores the identity. + +```{code-cell} ipython3 +--- +mystnb: + figure: + caption: Measured consumption plus investment minus income + name: fig-measured-identity-residual + image: + alt: National income identity residual for measured data over 80 time periods +--- +fig, ax = plt.subplots(figsize=(8, 3.6)) +ax.plot(t, sim["c_meas"] + sim["dk_meas"] - sim["y_meas"], color="#d62728", lw=2) +ax.set_xlabel("time", fontsize=11) +ax.set_ylabel("residual", fontsize=11) +ax.grid(alpha=0.3) +plt.tight_layout() +plt.show() +``` + +```{code-cell} ipython3 +--- +mystnb: + figure: + caption: Filtered consumption plus investment minus income + name: fig-filtered-identity-residual + image: + alt: National income identity residual for filtered data over 80 time periods +--- +fig, ax = plt.subplots(figsize=(8, 3.6)) +ax.plot(t, sim["c_filt"] + sim["dk_filt"] - sim["y_filt"], color="#2ca02c", lw=2) +ax.set_xlabel("time", fontsize=11) +ax.set_ylabel("residual", fontsize=11) +ax.grid(alpha=0.3) +plt.tight_layout() +plt.show() +``` + +Table 7 reports covariance and correlation matrices among the true, +measured, and filtered versions of each variable. + +High correlations between true and filtered series confirm that the +Kalman filter does a good job of removing measurement noise. + +Lower correlations between true and measured series quantify how much +information is lost by using raw data. + +```{code-cell} ipython3 +def cov_corr_three(a, b, c): + X = np.vstack([a, b, c]) + return np.cov(X), np.corrcoef(X) + +def matrix_df(mat, labels): + return pd.DataFrame(np.round(mat, 4), index=labels, columns=labels) + +cov_c, corr_c = cov_corr_three(sim["c_true"], sim["c_meas"], sim["c_filt"]) +cov_i, corr_i = cov_corr_three(sim["dk_true"], sim["dk_meas"], sim["dk_filt"]) +cov_y, corr_y = cov_corr_three(sim["y_true"], sim["y_meas"], sim["y_filt"]) +cov_k = np.cov(np.vstack([sim["k_true"], sim["k_filt"]])) +corr_k = np.corrcoef(np.vstack([sim["k_true"], sim["k_filt"]])) + +tmf_labels = ['true', 'measured', 'filtered'] +tf_labels = ['true', 'filtered'] + +print("Table 7A: Covariance matrix of consumption") +display(matrix_df(cov_c, tmf_labels)) + +print("Table 7B: Correlation matrix of consumption") +display(matrix_df(corr_c, tmf_labels)) + +print("Table 7C: Covariance matrix of investment") +display(matrix_df(cov_i, tmf_labels)) + +print("Table 7D: Correlation matrix of investment") +display(matrix_df(corr_i, tmf_labels)) + +print("Table 7E: Covariance matrix of income") +display(matrix_df(cov_y, tmf_labels)) + +print("Table 7F: Correlation matrix of income") +display(matrix_df(corr_y, tmf_labels)) + +print("Table 7G: Covariance matrix of capital") +display(matrix_df(cov_k, tf_labels)) + +print("Table 7H: Correlation matrix of capital") +display(matrix_df(corr_k, tf_labels)) +``` + +## Summary + +This lecture reproduced the tables and figures in {cite}`Sargent1989`, +which studies how measurement error alters an econometrician's view +of a permanent income economy driven by the investment accelerator. + +Several lessons emerge: + +* The Wold representations and variance decompositions of Model 1 (raw + measurements) and Model 2 (filtered measurements) are quite different, + even though the underlying economy is the same. + +* Measurement error is not a second-order issue: it can + reshape inferences about which shocks drive which variables. + +* The {doc}`Kalman filter ` effectively strips measurement noise + from the data. + +* The filtered series track the truth closely + (Figures 4 through 7), and the near-zero residual in Figure 9 shows that + the filter approximately restores the national income accounting + identity that raw measurement error breaks (Figure 8). + +* The forecast-error-variance decompositions (Tables 3 and 5) reveal + that Model 1 attributes substantial variance to measurement noise + innovations, while Model 2, working with cleaned data, attributes + nearly all variance to the single structural shock $\theta_t$. + +These results connect to broader themes in this lecture series: +the role of {doc}`linear state space models ` in +representing economic dynamics, the power of {doc}`Kalman filtering ` +for signal extraction, and the importance of the investment accelerator +for understanding business cycles ({doc}`samuelson`, +{doc}`chow_business_cycles`). + +## References + +* {cite}`Sargent1989` From bc862f69015ed3917daff7954637041ad53d1765 Mon Sep 17 00:00:00 2001 From: Humphrey Yang Date: Mon, 9 Feb 2026 18:07:38 +1100 Subject: [PATCH 10/19] updates --- lectures/measurement_models.md | 640 ++++++++++++++++++++++++--------- 1 file changed, 463 insertions(+), 177 deletions(-) diff --git a/lectures/measurement_models.md b/lectures/measurement_models.md index 6b640aece..0f73e221a 100644 --- a/lectures/measurement_models.md +++ b/lectures/measurement_models.md @@ -49,8 +49,8 @@ The two models produce different Wold representations and forecast-error-variance decompositions, even though they describe the same underlying economy. -In this lecture we reproduce all numbered tables and figures from -{cite}`Sargent1989` while studying the underlying mechanisms in the paper. +In this lecture we reproduce the analysis from {cite}`Sargent1989` +while studying the underlying mechanisms in the paper. We use the following imports and precision settings for tables: @@ -59,8 +59,48 @@ import numpy as np import pandas as pd import matplotlib.pyplot as plt from scipy import linalg +from IPython.display import Latex np.set_printoptions(precision=4, suppress=True) + +def df_to_latex_matrix(df, label=''): + """Convert DataFrame to LaTeX matrix (for math matrices).""" + lines = [r'\begin{bmatrix}'] + + for idx, row in df.iterrows(): + row_str = ' & '.join([f'{v:.4f}' if isinstance(v, (int, float)) else str(v) for v in row]) + r' \\' + lines.append(row_str) + + lines.append(r'\end{bmatrix}') + + if label: + return '$' + label + ' = ' + '\n'.join(lines) + '$' + else: + return '$' + '\n'.join(lines) + '$' + +def df_to_latex_array(df): + """Convert DataFrame to LaTeX array (for tables with headers).""" + n_rows, n_cols = df.shape + + # Build column format (centered columns) + col_format = 'c' * (n_cols + 1) # +1 for index + + # Start array + lines = [r'\begin{array}{' + col_format + '}'] + + # Header row + header = ' & '.join([''] + [str(c) for c in df.columns]) + r' \\' + lines.append(header) + lines.append(r'\hline') + + # Data rows + for idx, row in df.iterrows(): + row_str = str(idx) + ' & ' + ' & '.join([f'{v:.4f}' if isinstance(v, (int, float)) else str(v) for v in row]) + r' \\' + lines.append(row_str) + + lines.append(r'\end{array}') + + return '$' + '\n'.join(lines) + '$' ``` ## Model Setup @@ -106,7 +146,15 @@ x_t = \begin{bmatrix} k_t \\ \theta_t \end{bmatrix}, z_t = \begin{bmatrix} y_{n,t} \\ c_t \\ \Delta k_t \end{bmatrix}, ``` -and matrices +so that the true economy follows the state-space system + +```{math} +:label: true_ss +x_{t+1} = A x_t + \varepsilon_t, \qquad z_t = C x_t, +``` + +where $\varepsilon_t = \begin{bmatrix} 0 \\ \theta_t \end{bmatrix}$ has +covariance $E \varepsilon_t \varepsilon_t^\top = Q$ and the matrices are ```{math} A = \begin{bmatrix} @@ -118,26 +166,53 @@ C = \begin{bmatrix} f-1 & 1 \\ f-1 & 1-f^{-1} \\ 0 & f^{-1} +\end{bmatrix}, +\qquad +Q = \begin{bmatrix} +0 & 0 \\ +0 & 1 \end{bmatrix}. ``` +Note that $Q$ is singular because only the second component of $x_t$ +(the productivity shock $\theta_t$) receives an innovation; the +capital stock $k_t$ evolves deterministically given $\theta_t$. + The econometrician does not observe $z_t$ directly but instead sees $\bar z_t = z_t + v_t$, where $v_t$ is a vector of measurement errors. -Measurement errors are AR(1): +Measurement errors follow an AR(1) process: ```{math} +:label: meas_error_ar1 v_t = D v_{t-1} + \eta_t, ``` -with diagonal +where $\eta_t$ is a vector white noise with +$E \eta_t \eta_t^\top = \Sigma_\eta$ and +$E \varepsilon_t v_s^\top = 0$ for all $t, s$ +(measurement errors are orthogonal to the true state innovations). + +The autoregressive matrix and innovation standard deviations are ```{math} D = \operatorname{diag}(0.6, 0.7, 0.3), +\qquad +\sigma_\eta = (0.05, 0.035, 0.65), +``` + +so the unconditional covariance of $v_t$ is + +```{math} +R = \operatorname{diag}\!\left(\frac{\sigma_{\eta,i}^2}{1 - \rho_i^2}\right). ``` -and innovation standard deviations $(0.05, 0.035, 0.65)$. +The measurement errors are ordered from smallest to largest innovation +variance: income is measured most accurately ($\sigma_\eta = 0.05$), +consumption next ($\sigma_\eta = 0.035$), and investment least +accurately ($\sigma_\eta = 0.65$). +This ordering is central to the results below. ```{code-cell} ipython3 f = 1.05 @@ -162,7 +237,7 @@ Q = np.array([ ρ = np.array([0.6, 0.7, 0.3]) D = np.diag(ρ) -# Innovation std. devs shown in Table 1 +# Innovation std. devs σ_η = np.array([0.05, 0.035, 0.65]) Σ_η = np.diag(σ_η**2) @@ -170,9 +245,10 @@ D = np.diag(ρ) R = np.diag((σ_η / np.sqrt(1.0 - ρ**2))**2) print(f"f = {f}, β = 1/f = {β:.6f}") -print("\nA ="); display(pd.DataFrame(A)) -print("C ="); display(pd.DataFrame(C)) -print("D ="); display(pd.DataFrame(D)) +print() +display(Latex(df_to_latex_matrix(pd.DataFrame(A), 'A'))) +display(Latex(df_to_latex_matrix(pd.DataFrame(C), 'C'))) +display(Latex(df_to_latex_matrix(pd.DataFrame(D), 'D'))) ``` ## Kalman Filter @@ -212,7 +288,8 @@ def steady_state_kalman(A, C_obs, Q, R, W=None, tol=1e-13, max_iter=200_000): return K, S, V ``` -## Table 2: True Impulse Responses +(true-impulse-responses)= +## True Impulse Responses Before introducing measurement error, we verify the impulse response of the true system to a unit shock $\theta_0 = 1$. @@ -235,53 +312,101 @@ def table2_irf(A, C, n_lags=6): rep_table2 = table2_irf(A, C, n_lags=6) -pd.DataFrame( - np.round(rep_table2[:, 1:], 4), - columns=[r'$y_n$', r'$c$', r'$\Delta k$'], - index=pd.Index(range(6), name='Lag') -) +fig, ax = plt.subplots(figsize=(8, 4.5)) +ax.plot(rep_table2[:, 0], rep_table2[:, 1], 'o-', label=r'$y_n$', lw=2.5, markersize=7) +ax.plot(rep_table2[:, 0], rep_table2[:, 2], 's-', label=r'$c$', lw=2.5, markersize=7) +ax.plot(rep_table2[:, 0], rep_table2[:, 3], '^-', label=r'$\Delta k$', lw=2.5, markersize=7) +ax.axhline(0, color='black', lw=0.8, ls='--', alpha=0.5) +ax.set_xlabel('Lag', fontsize=12) +ax.set_ylabel('Response', fontsize=12) +ax.set_title(r'True impulse response to unit shock $\theta_0 = 1$', fontsize=13) +ax.legend(loc='best', fontsize=11, frameon=True, shadow=True) +ax.grid(alpha=0.3) +plt.tight_layout() +plt.show() ``` -## Model 1 (Raw Measurements): Tables 3 and 4 +## Model 1 (Raw Measurements) + +Model 1 is a classical errors-in-variables model: the data collecting +agency simply reports the error-corrupted data $\bar z_t = z_t + v_t$ +that it collects, making no attempt to adjust for measurement errors. -Model 1 treats the raw measured series $\bar z_t$ as the observables and -applies a Kalman filter to extract the state. +Because the measurement errors $v_t$ are serially correlated (AR(1)), +we cannot directly apply the Kalman filter to +$\bar z_t = C x_t + v_t$. +Following {cite:t}`Sargent1989` (Section III.B), we quasi-difference the +observation equation. + +Substituting $\bar z_t = C x_t + v_t$, $x_{t+1} = A x_t + \varepsilon_t$, +and $v_{t+1} = D v_t + \eta_t$ into $\bar z_{t+1} - D \bar z_t$ gives + +```{math} +:label: model1_obs +\bar z_{t+1} - D \bar z_t = \bar C\, x_t + C \varepsilon_t + \eta_t, +``` -Because the measurement errors $v_t$ are serially correlated, Sargent -quasi-differences the observation equation to obtain an innovation form -with serially uncorrelated errors. +where $\bar C = CA - DC$. -The transformed observation equation is +The composite observation noise in {eq}`model1_obs` is +$\bar\nu_t = C\varepsilon_t + \eta_t$, which is serially uncorrelated. +Its covariance, and the cross-covariance between the state noise +$\varepsilon_t$ and $\bar\nu_t$, are ```{math} -\bar z_t - D \bar z_{t-1} = (CA - DC)x_{t-1} + C w_t + \eta_t. +:label: model1_covs +R_1 = C Q C^\top + \Sigma_\eta, \qquad W_1 = Q C^\top. ``` -Hence +The system $\{x_{t+1} = A x_t + \varepsilon_t,\; +\bar z_{t+1} - D\bar z_t = \bar C x_t + \bar\nu_t\}$ +with $\text{cov}(\varepsilon_t)=Q$, $\text{cov}(\bar\nu_t)=R_1$, and +$\text{cov}(\varepsilon_t, \bar\nu_t)=W_1$ now has serially uncorrelated +errors, so the standard {doc}`Kalman filter ` applies. + +The steady-state Kalman filter yields the **innovations representation** ```{math} -\bar C = CA - DC, \quad R_1 = CQC^\top + R, \quad W_1 = QC^\top. +:label: model1_innov +\hat x_{t+1} = A \hat x_t + K_1 u_t, \qquad +\bar z_{t+1} - D\bar z_t = \bar C \hat x_t + u_t, ``` +where $u_t = (\bar z_{t+1} - D\bar z_t) - +E[\bar z_{t+1} - D\bar z_t \mid \bar z_t, \bar z_{t-1}, \ldots]$ +is the innovation process, $K_1$ is the Kalman gain, and +$V_1 = \bar C S_1 \bar C^\top + R_1$ is the innovation covariance matrix +(with $S_1 = E[(x_t - \hat x_t)(x_t - \hat x_t)^\top]$ the steady-state +state estimation error covariance). + ```{code-cell} ipython3 C_bar = C @ A - D @ C -R1 = C @ Q @ C.T + R +R1 = C @ Q @ C.T + Σ_η W1 = Q @ C.T K1, S1, V1 = steady_state_kalman(A, C_bar, Q, R1, W1) ``` -With the Kalman gain in hand, we can derive the Wold moving-average -representation for the measured data. +### Wold representation for measured data + +With the innovations representation {eq}`model1_innov` in hand, we can +derive a Wold moving-average representation for the measured data +$\bar z_t$. + +From {eq}`model1_innov` and the quasi-differencing definition, the +measured data satisfy (see eq. 19 of {cite:t}`Sargent1989`) + +```{math} +:label: model1_wold +\bar z_{t+1} = (I - DL)^{-1}\bigl[\bar C(I - AL)^{-1}K_1 L + I\bigr] u_t, +``` -This representation tells us how measured $y_n$, $c$, and $\Delta k$ -respond over time to the orthogonalized innovations in the -innovation covariance matrix $V_1$. +where $L$ is the lag operator. -To recover the Wold representation, define the augmented state +To compute the Wold coefficients numerically, define the augmented state ```{math} -r_t = \begin{bmatrix} \hat x_{t-1} \\ z_{t-1} \end{bmatrix}, +r_t = \begin{bmatrix} \hat x_{t-1} \\ \bar z_{t-1} \end{bmatrix}, ``` with dynamics @@ -289,7 +414,7 @@ with dynamics ```{math} r_{t+1} = F_1 r_t + G_1 u_t, \qquad -z_t = H_1 r_t + u_t, +\bar z_t = H_1 r_t + u_t, ``` where @@ -310,6 +435,9 @@ I H_1 = [\bar C \;\; D]. ``` +The Wold coefficients are then $\psi_0 = I$ and +$\psi_j = H_1 F_1^{j-1} G_1$ for $j \geq 1$. + ```{code-cell} ipython3 F1 = np.block([ [A, np.zeros((2, 3))], @@ -348,15 +476,25 @@ resp1 = np.array([psi1[j] @ linalg.cholesky(V1, lower=True) for j in range(14)]) decomp1 = fev_contributions(psi1, V1, n_horizons=20) ``` -Table 3 reports the forecast-error-variance decomposition for Model 1. +### Forecast-error-variance decomposition + +To measure the relative importance of each innovation, we decompose +the $j$-step-ahead forecast-error variance of each measured variable. -Each panel shows the cumulative contribution of one orthogonalized +Write $\bar z_{t+j} - E_t \bar z_{t+j} = \sum_{i=0}^{j-1} \psi_i u_{t+j-i}$. +Let $P$ be the lower-triangular Cholesky factor of $V_1$ so that the +orthogonalized innovations are $e_t = P^{-1} u_t$. +Then the contribution of orthogonalized innovation $k$ to the +$j$-step-ahead variance of variable $m$ is +$\sum_{i=0}^{j-1} (\psi_i P)_{mk}^2$. + +Each panel below shows the cumulative contribution of one orthogonalized innovation to the forecast-error variance of $y_n$, $c$, and $\Delta k$ at horizons 1 through 20. ```{code-cell} ipython3 horizons = np.arange(1, 21) -cols = [r'$y_n$', r'$c$', r'$\Delta k$'] +cols = [r'y_n', r'c', r'\Delta k'] def fev_table(decomp, shock_idx, horizons): return pd.DataFrame( @@ -364,26 +502,49 @@ def fev_table(decomp, shock_idx, horizons): columns=cols, index=pd.Index(horizons, name='Horizon') ) +``` -print("Table 3A: Contribution of innovation 1") -display(fev_table(decomp1, 0, horizons)) - -print("Table 3B: Contribution of innovation 2") -display(fev_table(decomp1, 1, horizons)) +```{code-cell} ipython3 +fig, axes = plt.subplots(1, 3, figsize=(15, 4.5)) + +for i, (shock_name, ax) in enumerate(zip([r'Innovation 1 ($y_n$)', r'Innovation 2 ($c$)', r'Innovation 3 ($\Delta k$)'], axes)): + fev_data = decomp1[:, i, :] + ax.plot(horizons, fev_data[0, :], label=r'$y_n$', lw=2.5) + ax.plot(horizons, fev_data[1, :], label=r'$c$', lw=2.5) + ax.plot(horizons, fev_data[2, :], label=r'$\Delta k$', lw=2.5) + ax.set_xlabel('Horizon', fontsize=12) + ax.set_ylabel('Contribution to FEV', fontsize=12) + ax.set_title(shock_name, fontsize=13) + ax.legend(loc='best', fontsize=10, frameon=True, shadow=True) + ax.grid(alpha=0.3) -print("Table 3C: Contribution of innovation 3") -display(fev_table(decomp1, 2, horizons)) +plt.tight_layout() +plt.show() ``` +These plots replicate Table 3 of {cite:t}`Sargent1989`. +The income innovation accounts for substantial proportions of +forecast-error variance in all three variables, while the consumption and +investment innovations contribute mainly to their own variances. +This is a **Granger causality** pattern: income appears to +Granger-cause consumption and investment, but not vice versa. +The pattern arises because income is the best-measured variable +($\sigma_\eta = 0.05$), so its innovation carries the most +information about the underlying structural shock $\theta_t$. + The innovation covariance matrix $V_1$ is: ```{code-cell} ipython3 -labels = [r'$y_n$', r'$c$', r'$\Delta k$'] -pd.DataFrame(np.round(V1, 4), index=labels, columns=labels) +labels = [r'y_n', r'c', r'\Delta k'] +df_v1 = pd.DataFrame(np.round(V1, 4), index=labels, columns=labels) +display(Latex(df_to_latex_matrix(df_v1))) ``` -Table 4 reports the orthogonalized Wold impulse responses for Model 1 -at lags 0 through 13. +### Wold impulse responses + +The orthogonalized Wold impulse responses $\psi_j P$ show how the +measured variables respond at lag $j$ to a one-standard-deviation +orthogonalized innovation. We plot lags 0 through 13. ```{code-cell} ipython3 lags = np.arange(14) @@ -394,34 +555,122 @@ def wold_response_table(resp, shock_idx, lags): columns=cols, index=pd.Index(lags, name='Lag') ) +``` -print("Table 4A: Response to innovation in y_n") -display(wold_response_table(resp1, 0, lags)) +```{code-cell} ipython3 +fig, axes = plt.subplots(1, 3, figsize=(15, 4.5)) + +for i, (shock_name, ax) in enumerate(zip([r'Innovation in $y_n$', r'Innovation in $c$', r'Innovation in $\Delta k$'], axes)): + ax.plot(lags, resp1[:, 0, i], label=r'$y_n$', lw=2.5) + ax.plot(lags, resp1[:, 1, i], label=r'$c$', lw=2.5) + ax.plot(lags, resp1[:, 2, i], label=r'$\Delta k$', lw=2.5) + ax.axhline(0, color='black', lw=0.8, ls='--', alpha=0.5) + ax.set_xlabel('Lag', fontsize=12) + ax.set_ylabel('Response', fontsize=12) + ax.set_title(shock_name, fontsize=13) + ax.legend(loc='best', fontsize=10, frameon=True, shadow=True) + ax.grid(alpha=0.3) -print("Table 4B: Response to innovation in c") -display(wold_response_table(resp1, 1, lags)) +plt.tight_layout() +plt.show() +``` + +These plots replicate Table 4 of {cite:t}`Sargent1989`. +An income innovation generates persistent responses in all variables +because, being the best-measured series, its innovation is dominated +by the true permanent shock $\theta_t$, which permanently raises the +capital stock and hence steady-state consumption and income. +A consumption innovation produces smaller, decaying responses +that reflect the AR(1) structure of its measurement error ($\rho = 0.7$). +An investment innovation has a large initial impact on investment itself, +consistent with the high measurement error variance ($\sigma_\eta = 0.65$), +but the effect dies out quickly. + +## Model 2 (Filtered Measurements) + +Model 2 corresponds to a data collecting agency that, instead of +reporting raw error-corrupted data, applies an optimal filter +to construct least-squares estimates of the true variables. -print("Table 4C: Response to innovation in Δk") -display(wold_response_table(resp1, 2, lags)) +Specifically, the agency uses the Kalman filter from Model 1 to form +$\hat x_t = E[x_t \mid \bar z_{t-1}, \bar z_{t-2}, \ldots]$ and reports +filtered estimates + +```{math} +\tilde z_t = G \hat x_t, ``` -## Model 2 (Filtered Measurements): Tables 5 and 6 +where $G = C$ is a selection matrix +(see eq. 23 of {cite:t}`Sargent1989`). -Model 2 takes a different approach: instead of working with the raw data, -the econometrician first applies the Kalman filter from Model 1 to -strip out measurement error and then treats the filtered estimates -$\hat z_t = C \hat x_t$ as if they were the true observations. +### State-space for filtered data -A second Kalman filter is then applied to the filtered series. +From the innovations representation {eq}`model1_innov`, the state +$\hat x_t$ evolves as -The state noise covariance for this second filter is +```{math} +:label: model2_state +\hat x_{t+1} = A \hat x_t + K_1 u_t. +``` + +The reported filtered data are then + +```{math} +:label: model2_obs +\tilde z_t = C \hat x_t + \eta_t, +``` + +where $\eta_t$ is a type 2 white-noise measurement error process +("typos") with presumably very small covariance matrix $R_2$. + +The state noise in {eq}`model2_state` is $K_1 u_t$, which has covariance + +```{math} +:label: model2_Q +Q_2 = K_1 V_1 K_1^\top. +``` + +The covariance matrix of the joint noise is +(see eq. 25 of {cite:t}`Sargent1989`) ```{math} -Q_2 = K_1 V_1 K_1^\top, +E \begin{bmatrix} K_1 u_t \\ \eta_t \end{bmatrix} + \begin{bmatrix} K_1 u_t \\ \eta_t \end{bmatrix}^\top += \begin{bmatrix} Q_2 & 0 \\ 0 & R_2 \end{bmatrix}. ``` -We solve a second Kalman system with tiny measurement noise to regularize the -near-singular covariance matrix. +Since $R_2$ is close to or equal to zero (the filtered data have +negligible additional noise), we approximate it with a small +regularization term $R_2 = \epsilon I$ to keep the Kalman filter +numerically well-conditioned. + +A second Kalman filter applied to {eq}`model2_state`--{eq}`model2_obs` +yields a second innovations representation + +```{math} +:label: model2_innov +\hat{\hat x}_{t+1} = A \hat{\hat x}_t + K_2 a_t, +\qquad +\tilde z_t = C \hat{\hat x}_t + a_t, +``` + +where $a_t$ is the innovation process for the filtered data with +covariance $V_2 = C S_2 C^\top + R_2$. + +### Wold representation for filtered data + +The Wold moving-average representation for $\tilde z_t$ is +(see eq. 29 of {cite:t}`Sargent1989`) + +```{math} +:label: model2_wold +\tilde z_t = \bigl[C(I - AL)^{-1} K_2 L + I\bigr] a_t, +``` + +with coefficients $\psi_0 = I$ and $\psi_j = C A^{j-1} K_2$ for +$j \geq 1$. Note that this is simpler than the Model 1 Wold +representation {eq}`model1_wold` because there is no quasi-differencing +to undo. ```{code-cell} ipython3 Q2 = K1 @ V1 @ K1.T @@ -444,61 +693,95 @@ resp2 = np.array([psi2[j] @ linalg.cholesky(V2, lower=True) for j in range(14)]) decomp2 = fev_contributions(psi2, V2, n_horizons=20) ``` -Table 5 is the analogue of Table 3 for Model 2. +### Forecast-error-variance decomposition -Because the filtered data are nearly noiseless, the second and third -innovations contribute very little to forecast-error variance. +Because the filtered data are nearly noiseless, the innovation +covariance $V_2$ is close to singular with one dominant eigenvalue. +This means the filtered economy is driven by essentially one shock, +just like the true economy. ```{code-cell} ipython3 -print("Table 5A: Contribution of innovation 1") -display(fev_table(decomp2, 0, horizons)) - -print("Table 5B: Contribution of innovation 2 (×10³)") -display(pd.DataFrame( - np.round(decomp2[:, 1, :].T * 1e3, 4), - columns=cols, - index=pd.Index(horizons, name='Horizon') -)) +fig, axes = plt.subplots(1, 3, figsize=(15, 4.5)) + +for i, (shock_name, ax) in enumerate(zip([r'Innovation 1 ($y_n$)', r'Innovation 2 ($c$) $\times 10^3$', r'Innovation 3 ($\Delta k$) $\times 10^6$'], axes)): + scale = 1 if i == 0 else (1e3 if i == 1 else 1e6) + fev_data = decomp2[:, i, :] * scale + ax.plot(horizons, fev_data[0, :], label=r'$y_n$', lw=2.5) + ax.plot(horizons, fev_data[1, :], label=r'$c$', lw=2.5) + ax.plot(horizons, fev_data[2, :], label=r'$\Delta k$', lw=2.5) + ax.set_xlabel('Horizon', fontsize=12) + ax.set_ylabel('Contribution to FEV', fontsize=12) + ax.set_title(shock_name, fontsize=13) + ax.legend(loc='best', fontsize=10, frameon=True, shadow=True) + ax.grid(alpha=0.3) -print("Table 5C: Contribution of innovation 3 (×10⁶)") -display(pd.DataFrame( - np.round(decomp2[:, 2, :].T * 1e6, 4), - columns=cols, - index=pd.Index(horizons, name='Horizon') -)) +plt.tight_layout() +plt.show() ``` +These plots replicate Table 5 of {cite:t}`Sargent1989`. +In Model 2, the first innovation accounts for virtually all forecast-error +variance, just as in the true economy where the single structural shock +$\theta_t$ drives everything. +The second and third innovations contribute negligibly (note the scaling +factors of $10^3$ and $10^6$ required to make them visible). +This confirms that filtering strips away the measurement noise that created +the appearance of multiple independent sources of variation in Model 1. + The innovation covariance matrix $V_2$ for Model 2 is: ```{code-cell} ipython3 -pd.DataFrame(np.round(V2, 4), index=labels, columns=labels) +df_v2 = pd.DataFrame(np.round(V2, 4), index=labels, columns=labels) +display(Latex(df_to_latex_matrix(df_v2))) ``` -Table 6 reports the orthogonalized Wold impulse responses for Model 2. +### Wold impulse responses -```{code-cell} ipython3 -print("Table 6A: Response to innovation in y_n") -display(wold_response_table(resp2, 0, lags)) +The following plots show the orthogonalized Wold impulse responses for Model 2. -print("Table 6B: Response to innovation in c") -display(wold_response_table(resp2, 1, lags)) +```{code-cell} ipython3 +fig, axes = plt.subplots(1, 3, figsize=(15, 4.5)) + +for i, (shock_name, scale) in enumerate(zip([r'Innovation in $y_n$', r'Innovation in $c$ $\times 10^3$', r'Innovation in $\Delta k$ $\times 10^3$'], + [1, 1e3, 1e3])): + ax = axes[i] + ax.plot(lags, resp2[:, 0, i] * scale, label=r'$y_n$', lw=2.5) + ax.plot(lags, resp2[:, 1, i] * scale, label=r'$c$', lw=2.5) + ax.plot(lags, resp2[:, 2, i] * scale, label=r'$\Delta k$', lw=2.5) + ax.axhline(0, color='black', lw=0.8, ls='--', alpha=0.5) + ax.set_xlabel('Lag', fontsize=12) + ax.set_ylabel('Response', fontsize=12) + ax.set_title(shock_name, fontsize=13) + ax.legend(loc='best', fontsize=10, frameon=True, shadow=True) + ax.grid(alpha=0.3) -print("Table 6C: Response to innovation in Δk (×10³)") -display(pd.DataFrame( - np.round(resp2[:, :, 2] * 1e3, 4), - columns=cols, - index=pd.Index(lags, name='Lag') -)) +plt.tight_layout() +plt.show() ``` -## Simulation: Figures 1 through 9 and Table 7 +These plots replicate Table 6 of {cite:t}`Sargent1989`. +The income innovation in Model 2 produces responses that closely +approximate the true impulse response function from the structural +shock $\theta_t$ (compare with the figure in the +{ref}`true-impulse-responses` section above). +The consumption and investment innovations produce responses +that are orders of magnitude smaller (note the $10^3$ scaling), +confirming that the filtered data are driven by essentially one shock. + +A key implication: unlike Model 1, the filtered data from Model 2 +**cannot** reproduce the apparent Granger causality pattern that the +accelerator literature has documented empirically. +As {cite:t}`Sargent1989` emphasizes, the two models of measurement +produce quite different inferences about the economy's dynamics despite +sharing identical deep parameters. + +## Simulation The tables above characterize population moments of the two models. To see how the models perform on a finite sample, Sargent simulates 80 periods of true, measured, and filtered data and reports -covariance and correlation matrices (Table 7) together with -time-series plots (Figures 1 through 9). +covariance and correlation matrices together with time-series plots. We replicate these objects below. @@ -563,12 +846,12 @@ sim = simulate_series(seed=7909, T=80, k0=10.0) ```{code-cell} ipython3 def plot_true_vs_other(t, true_series, other_series, other_label, ylabel=""): - fig, ax = plt.subplots(figsize=(8, 3.6)) - ax.plot(t, true_series, lw=2, color="black", label="true") - ax.plot(t, other_series, lw=2, ls="--", color="#1f77b4", label=other_label) - ax.set_xlabel("time", fontsize=11) - ax.set_ylabel(ylabel, fontsize=11) - ax.legend(loc="best") + fig, ax = plt.subplots(figsize=(8, 4)) + ax.plot(t, true_series, lw=2.5, color="black", label="true") + ax.plot(t, other_series, lw=2.5, ls="--", color="#1f77b4", label=other_label) + ax.set_xlabel("Time", fontsize=12) + ax.set_ylabel(ylabel.capitalize(), fontsize=12) + ax.legend(loc="best", fontsize=11, frameon=True, shadow=True) ax.grid(alpha=0.3) plt.tight_layout() plt.show() @@ -613,16 +896,15 @@ mystnb: plot_true_vs_other(t, sim["y_true"], sim["y_meas"], "measured", ylabel="income") ``` -Figures 1 through 3 show how measurement error distorts each series. +The first three figures replicate Figures 1--3 of {cite:t}`Sargent1989`. +Investment is distorted the most because its measurement error +has the largest innovation variance ($\sigma_\eta = 0.65$), +while income is distorted the least ($\sigma_\eta = 0.05$). -Investment (Figure 2) is hit hardest because its measurement error -has the largest innovation variance ($\sigma_\eta = 0.65$). - -Figures 4 through 7 compare the true series with the Kalman-filtered -estimates from Model 1. - -The filter removes much of the measurement -noise, recovering series that track the truth closely. +The next four figures (Figures 4--7 in the paper) compare +true series with the Kalman-filtered estimates from Model 1. +The filter removes much of the measurement noise, recovering +series that track the truth closely. ```{code-cell} ipython3 --- @@ -672,59 +954,51 @@ mystnb: plot_true_vs_other(t, sim["k_true"], sim["k_filt"], "filtered", ylabel="capital stock") ``` -Figures 8 and 9 plot the national income identity residual -$c_t + \Delta k_t - y_{n,t}$. +The following figure plots the national income identity residual +$c_t + \Delta k_t - y_{n,t}$ for both measured and filtered data +(Figures 8--9 of {cite:t}`Sargent1989`). In the true model this identity holds exactly. - -For measured data (Figure 8) the residual is non-zero because +For measured data the residual is non-zero because independent measurement errors break the accounting identity. - -For filtered data (Figure 9) the Kalman filter approximately -restores the identity. +For filtered data the Kalman filter approximately restores the identity. ```{code-cell} ipython3 --- mystnb: figure: - caption: Measured consumption plus investment minus income - name: fig-measured-identity-residual + caption: "National income identity residual: measured (left) vs. filtered (right)" + name: fig-identity-residual image: - alt: National income identity residual for measured data over 80 time periods + alt: National income identity residual for measured and filtered data side by side --- -fig, ax = plt.subplots(figsize=(8, 3.6)) -ax.plot(t, sim["c_meas"] + sim["dk_meas"] - sim["y_meas"], color="#d62728", lw=2) -ax.set_xlabel("time", fontsize=11) -ax.set_ylabel("residual", fontsize=11) -ax.grid(alpha=0.3) -plt.tight_layout() -plt.show() -``` +fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 4)) + +ax1.plot(t, sim["c_meas"] + sim["dk_meas"] - sim["y_meas"], color="#d62728", lw=2.5) +ax1.axhline(0, color='black', lw=0.8, ls='--', alpha=0.5) +ax1.set_xlabel("Time", fontsize=12) +ax1.set_ylabel("Residual", fontsize=12) +ax1.set_title(r'Measured: $c_t + \Delta k_t - y_{n,t}$', fontsize=13) +ax1.grid(alpha=0.3) + +ax2.plot(t, sim["c_filt"] + sim["dk_filt"] - sim["y_filt"], color="#2ca02c", lw=2.5) +ax2.axhline(0, color='black', lw=0.8, ls='--', alpha=0.5) +ax2.set_xlabel("Time", fontsize=12) +ax2.set_ylabel("Residual", fontsize=12) +ax2.set_title(r'Filtered: $c_t + \Delta k_t - y_{n,t}$', fontsize=13) +ax2.grid(alpha=0.3) -```{code-cell} ipython3 ---- -mystnb: - figure: - caption: Filtered consumption plus investment minus income - name: fig-filtered-identity-residual - image: - alt: National income identity residual for filtered data over 80 time periods ---- -fig, ax = plt.subplots(figsize=(8, 3.6)) -ax.plot(t, sim["c_filt"] + sim["dk_filt"] - sim["y_filt"], color="#2ca02c", lw=2) -ax.set_xlabel("time", fontsize=11) -ax.set_ylabel("residual", fontsize=11) -ax.grid(alpha=0.3) plt.tight_layout() plt.show() ``` -Table 7 reports covariance and correlation matrices among the true, -measured, and filtered versions of each variable. +The following covariance and correlation matrices replicate Table 7 +of {cite:t}`Sargent1989`. +For each variable we report the $3 \times 3$ covariance and correlation +matrices among the true, measured, and filtered versions. High correlations between true and filtered series confirm that the -Kalman filter does a good job of removing measurement noise. - +Kalman filter removes most measurement noise. Lower correlations between true and measured series quantify how much information is lost by using raw data. @@ -744,35 +1018,46 @@ corr_k = np.corrcoef(np.vstack([sim["k_true"], sim["k_filt"]])) tmf_labels = ['true', 'measured', 'filtered'] tf_labels = ['true', 'filtered'] +``` -print("Table 7A: Covariance matrix of consumption") -display(matrix_df(cov_c, tmf_labels)) +**Consumption** -- Measurement error inflates variance, but the filtered +series recovers a variance close to the truth. +The true-filtered correlation exceeds 0.99. -print("Table 7B: Correlation matrix of consumption") -display(matrix_df(corr_c, tmf_labels)) +```{code-cell} ipython3 +display(Latex(df_to_latex_matrix(matrix_df(cov_c, tmf_labels)))) +display(Latex(df_to_latex_matrix(matrix_df(corr_c, tmf_labels)))) +``` -print("Table 7C: Covariance matrix of investment") -display(matrix_df(cov_i, tmf_labels)) +**Investment** -- Because $\sigma_\eta = 0.65$ is large, measurement error +creates the most variance inflation here. +Despite this, the true-filtered correlation remains high, +demonstrating the filter's effectiveness even with severe noise. -print("Table 7D: Correlation matrix of investment") -display(matrix_df(corr_i, tmf_labels)) +```{code-cell} ipython3 +display(Latex(df_to_latex_matrix(matrix_df(cov_i, tmf_labels)))) +display(Latex(df_to_latex_matrix(matrix_df(corr_i, tmf_labels)))) +``` -print("Table 7E: Covariance matrix of income") -display(matrix_df(cov_y, tmf_labels)) +**Income** -- Income has the smallest measurement error, so measured +and true variances are close. True-filtered correlations are very high. -print("Table 7F: Correlation matrix of income") -display(matrix_df(corr_y, tmf_labels)) +```{code-cell} ipython3 +display(Latex(df_to_latex_matrix(matrix_df(cov_y, tmf_labels)))) +display(Latex(df_to_latex_matrix(matrix_df(corr_y, tmf_labels)))) +``` -print("Table 7G: Covariance matrix of capital") -display(matrix_df(cov_k, tf_labels)) +**Capital stock** -- The capital stock is never directly observed, yet +the filter recovers it with very high accuracy. -print("Table 7H: Correlation matrix of capital") -display(matrix_df(corr_k, tf_labels)) +```{code-cell} ipython3 +display(Latex(df_to_latex_matrix(matrix_df(cov_k, tf_labels)))) +display(Latex(df_to_latex_matrix(matrix_df(corr_k, tf_labels)))) ``` ## Summary -This lecture reproduced the tables and figures in {cite}`Sargent1989`, +This lecture reproduced the analysis in {cite}`Sargent1989`, which studies how measurement error alters an econometrician's view of a permanent income economy driven by the investment accelerator. @@ -785,18 +1070,19 @@ Several lessons emerge: * Measurement error is not a second-order issue: it can reshape inferences about which shocks drive which variables. -* The {doc}`Kalman filter ` effectively strips measurement noise - from the data. +* Model 1 reproduces the **Granger causality** pattern documented in the + empirical accelerator literature -- income appears to Granger-cause + consumption and investment -- but this pattern is an artifact of + measurement error ordering, not of the structural model. -* The filtered series track the truth closely - (Figures 4 through 7), and the near-zero residual in Figure 9 shows that - the filter approximately restores the national income accounting - identity that raw measurement error breaks (Figure 8). +* Model 2, working with filtered data, attributes nearly all variance to + the single structural shock $\theta_t$ and **cannot** reproduce the + Granger causality pattern. -* The forecast-error-variance decompositions (Tables 3 and 5) reveal - that Model 1 attributes substantial variance to measurement noise - innovations, while Model 2, working with cleaned data, attributes - nearly all variance to the single structural shock $\theta_t$. +* The {doc}`Kalman filter ` effectively strips measurement noise + from the data: the filtered series track the truth closely, and the + near-zero residual shows that the filter approximately restores the + national income accounting identity that raw measurement error breaks. These results connect to broader themes in this lecture series: the role of {doc}`linear state space models ` in From 23d89876aae4b1a52d5c29eb1113adefc4d416ad Mon Sep 17 00:00:00 2001 From: Humphrey Yang Date: Mon, 9 Feb 2026 23:00:03 +1100 Subject: [PATCH 11/19] updates --- lectures/measurement_models.md | 722 ++++++++++++++++++++++----------- 1 file changed, 478 insertions(+), 244 deletions(-) diff --git a/lectures/measurement_models.md b/lectures/measurement_models.md index 0f73e221a..637a039b4 100644 --- a/lectures/measurement_models.md +++ b/lectures/measurement_models.md @@ -36,23 +36,24 @@ The setting is a {doc}`permanent income ` economy in which the investment accelerator, the mechanism studied in {doc}`samuelson` and {doc}`chow_business_cycles`, drives business cycle fluctuations. -Sargent specifies a {doc}`linear state space model ` for the -true economy and then considers two ways of extracting information from +We specify a {doc}`linear state space model ` for the +true economy and then consider two ways of extracting information from noisy measurements: -- Model 1 applies a {doc}`Kalman filter ` directly to +- In Model 1, the data collecting agency simply reports raw (noisy) observations. -- Model 2 first filters the data to remove measurement error, - then computes dynamics from the filtered series. +- In Model 2, the agency applies an optimal + {doc}`Kalman filter ` to the noisy data and + reports least-squares estimates of the true variables. The two models produce different Wold representations and forecast-error-variance decompositions, even though they describe the same underlying economy. -In this lecture we reproduce the analysis from {cite}`Sargent1989` +In this lecture we reproduce the analysis from {cite:t}`Sargent1989` while studying the underlying mechanisms in the paper. -We use the following imports and precision settings for tables: +We use the following imports and functions for matrices and tables ```{code-cell} ipython3 import numpy as np @@ -61,14 +62,16 @@ import matplotlib.pyplot as plt from scipy import linalg from IPython.display import Latex -np.set_printoptions(precision=4, suppress=True) +np.set_printoptions(precision=3, suppress=True) def df_to_latex_matrix(df, label=''): - """Convert DataFrame to LaTeX matrix (for math matrices).""" + """Convert DataFrame to LaTeX matrix.""" lines = [r'\begin{bmatrix}'] for idx, row in df.iterrows(): - row_str = ' & '.join([f'{v:.4f}' if isinstance(v, (int, float)) else str(v) for v in row]) + r' \\' + row_str = ' & '.join( + [f'{v:.4f}' if isinstance(v, (int, float)) + else str(v) for v in row]) + r' \\' lines.append(row_str) lines.append(r'\end{bmatrix}') @@ -79,7 +82,7 @@ def df_to_latex_matrix(df, label=''): return '$' + '\n'.join(lines) + '$' def df_to_latex_array(df): - """Convert DataFrame to LaTeX array (for tables with headers).""" + """Convert DataFrame to LaTeX array.""" n_rows, n_cols = df.shape # Build column format (centered columns) @@ -95,7 +98,9 @@ def df_to_latex_array(df): # Data rows for idx, row in df.iterrows(): - row_str = str(idx) + ' & ' + ' & '.join([f'{v:.4f}' if isinstance(v, (int, float)) else str(v) for v in row]) + r' \\' + row_str = str(idx) + ' & ' + ' & '.join( + [f'{v:.3f}' if isinstance(v, (int, float)) else str(v) + for v in row]) + r' \\' lines.append(row_str) lines.append(r'\end{array}') @@ -103,47 +108,205 @@ def df_to_latex_array(df): return '$' + '\n'.join(lines) + '$' ``` -## Model Setup +## The economic model -The true economy is a version of the permanent income model -(see {doc}`perm_income`) in which a representative consumer -chooses consumption $c_t$ and capital accumulation $\Delta k_t$ -to maximize expected discounted utility subject to a budget -constraint. +The true economy is a linear-quadratic version of a stochastic +optimal growth model (see also {doc}`perm_income`). -Assume that the discount factor satisfies $\beta f = 1$ and that the -productivity shock $\theta_t$ is white noise. +A social planner maximizes -The optimal decision rules reduce the true system to +```{math} +:label: planner_obj +E \sum_{t=0}^{\infty} \beta^t \left( u_0 + u_1 c_t - \frac{u_2}{2} c_t^2 \right) +``` + +subject to the technology + +```{math} +:label: tech_constraint +c_t + k_{t+1} = f k_t + \theta_t, \qquad \beta f^2 > 1, +``` + +where $c_t$ is consumption, $k_t$ is the capital stock, +$f$ is the gross rate of return on capital, +and $\theta_t$ is an endowment or technology shock following + +```{math} +:label: shock_process +a(L)\,\theta_t = \varepsilon_t, +``` + +with $a(L) = 1 - a_1 L - a_2 L^2 - \cdots - a_r L^r$ having all roots +outside the unit circle. + +### Optimal decision rule + +The solution can be represented by the optimal decision rule +for $c_t$: ```{math} -\begin{aligned} -k_{t+1} &= k_t + f^{-1}\theta_t, \\ -y_{n,t} &= (f-1)k_t + \theta_t, \\ -c_t &= (f-1)k_t + (1-f^{-1})\theta_t, \\ -\Delta k_t &= f^{-1}\theta_t. -\end{aligned} +:label: opt_decision +c_t = \frac{-\alpha}{f-1} + + \left(1 - \frac{1}{\beta f^2}\right) + \frac{L - f^{-1} a(f^{-1})^{-1} a(L)}{L - f^{-1}}\,\theta_t + + f k_t, +\qquad +k_{t+1} = f k_t + \theta_t - c_t, ``` -with $f = 1.05$ and $\theta_t \sim \mathcal{N}(0, 1)$. +where $\alpha = u_1[1-(\beta f)^{-1}]/u_2$. + +Equations {eq}`shock_process` and {eq}`opt_decision` exhibit the +cross-equation restrictions characteristic of rational expectations +models. + +### Net income and the accelerator + +Define net output or national income as + +```{math} +:label: net_income +y_{nt} = (f-1)k_t + \theta_t. +``` + +Note that {eq}`tech_constraint` and {eq}`net_income` imply +$(k_{t+1} - k_t) + c_t = y_{nt}$. + +To obtain both a version of {cite:t}`Friedman1956`'s geometric +distributed lag consumption function and a distributed lag +accelerator, we impose two assumptions: -Here $k_t$ is capital, $y_{n,t}$ is national income, $c_t$ is consumption, -and $\Delta k_t$ is net investment. +1. $a(L) = 1$, so that $\theta_t$ is white noise. +2. $\beta f = 1$, so the rate of return on capital equals the rate + of time preference. -Notice the investment accelerator at work: because $\Delta k_t = f^{-1}\theta_t$, -investment responds only to the innovation $\theta_t$, not to the level of -capital. +Assumption 1 is crucial for the strict form of the accelerator. + +Relaxing it to allow serially correlated $\theta_t$ preserves an +accelerator in a broad sense but loses the sharp geometric-lag +form of {eq}`accelerator`. + +Adding a second shock breaks the one-index structure entirely +and can generate nontrivial Granger causality even without +measurement error. + +The accelerator projection is also not invariant under +interventions that alter predictable components of income. + +Assumption 2 is less important, affecting only various constants. + +Under both assumptions, {eq}`opt_decision` simplifies to + +```{math} +:label: simple_crule +c_t = (1-f^{-1})\,\theta_t + (f-1)\,k_t. +``` + +When {eq}`simple_crule`, {eq}`net_income`, and +{eq}`tech_constraint` are combined, the optimal plan satisfies + +```{math} +:label: friedman_consumption +c_t = \left(\frac{1-\beta}{1-\beta L}\right) y_{nt}, +``` + +```{math} +:label: accelerator +k_{t+1} - k_t = f^{-1} \left(\frac{1-L}{1-\beta L}\right) y_{nt}, +``` + +```{math} +:label: income_process +y_{nt} = \theta_t + (1-\beta)(\theta_{t-1} + \theta_{t-2} + \cdots). +``` + +Equation {eq}`friedman_consumption` is Friedman's consumption +model: consumption is a geometric distributed lag of income, +with the decay coefficient $\beta$ equal to the discount factor. + +Equation {eq}`accelerator` is the distributed lag accelerator: +investment is a geometric distributed lag of the first difference +of income. This is the same mechanism that {cite:t}`Chow1968` documented empirically (see {doc}`chow_business_cycles`). -We can cast this as a {doc}`linear state space model ` by -defining state and observable vectors +Equation {eq}`income_process` says that $y_{nt}$ is an IMA(1,1) +process with innovation $\theta_t$. + +As {cite:t}`Muth1960` showed, such a process is optimally forecast +via a geometric distributed lag or "adaptive expectations" scheme. + +### The accelerator puzzle + +When all variables are measured accurately and are driven by +the single shock $\theta_t$, the spectral density of +$(c_t,\, k_{t+1}-k_t,\, y_{nt})$ has rank one at all frequencies. + +Each variable is an invertible one-sided distributed lag of the +same white noise, so no variable Granger-causes any other. + +Empirically, however, measures of output Granger-cause investment +but not vice versa. + +{cite:t}`Sargent1989` shows that measurement error can resolve +this puzzle. + +To illustrate, suppose first that output $y_{nt}$ is measured +perfectly while consumption and capital are each polluted by +serially correlated measurement errors $v_{ct}$ and $v_{kt}$ +orthogonal to $\theta_t$. + +Let $\bar c_t$ and $\bar k_{t+1} - \bar k_t$ denote the measured +series. Then + +```{math} +:label: meas_consumption +\bar c_t = \left(\frac{1-\beta}{1-\beta L}\right) y_{nt} + v_{ct}, +``` + +```{math} +:label: meas_investment +\bar k_{t+1} - \bar k_t + = \beta\left(\frac{1-L}{1-\beta L}\right) y_{nt} + + (v_{k,t+1} - v_{kt}), +``` + +```{math} +:label: income_process_ma +y_{nt} = \theta_t + (1-\beta)(\theta_{t-1} + \theta_{t-2} + \cdots). +``` + +In this case income Granger-causes consumption and investment +but is not Granger-caused by them. + +In the numerical example below, $y_{nt}$ is also measured +with error: the agency reports $\bar y_{nt} = y_{nt} + v_{yt}$, +where $v_{yt}$ follows an AR(1) process orthogonal to $\theta_t$. + +When every series is corrupted by measurement error, every measured +variable Granger-causes every other. + +The strength of Granger causality depends on the relative +signal-to-noise ratios. + +In a one-common-index model like this one ($\theta_t$ is the +common index), the best-measured variable extends the most +Granger causality to the others. + +## State-space formulation + +We now map the economic model and the measurement process into +a recursive state-space framework. + +Set $f = 1.05$ and $\theta_t \sim \mathcal{N}(0, 1)$. + +Define the state and observable vectors ```{math} x_t = \begin{bmatrix} k_t \\ \theta_t \end{bmatrix}, \qquad -z_t = \begin{bmatrix} y_{n,t} \\ c_t \\ \Delta k_t \end{bmatrix}, +z_t = \begin{bmatrix} y_{nt} \\ c_t \\ \Delta k_t \end{bmatrix}, ``` so that the true economy follows the state-space system @@ -174,27 +337,28 @@ Q = \begin{bmatrix} \end{bmatrix}. ``` -Note that $Q$ is singular because only the second component of $x_t$ -(the productivity shock $\theta_t$) receives an innovation; the -capital stock $k_t$ evolves deterministically given $\theta_t$. +$Q$ is singular because there is only one source of randomness +$\theta_t$; the capital stock $k_t$ evolves deterministically +given $\theta_t$. + +### Measurement errors The econometrician does not observe $z_t$ directly but instead sees $\bar z_t = z_t + v_t$, where $v_t$ is a vector of measurement errors. -Measurement errors follow an AR(1) process: +Measurement errors follow an AR(1) process ```{math} :label: meas_error_ar1 -v_t = D v_{t-1} + \eta_t, +v_{t+1} = D v_t + \eta_t, ``` where $\eta_t$ is a vector white noise with $E \eta_t \eta_t^\top = \Sigma_\eta$ and -$E \varepsilon_t v_s^\top = 0$ for all $t, s$ -(measurement errors are orthogonal to the true state innovations). +$E \varepsilon_t v_s^\top = 0$ for all $t, s$. -The autoregressive matrix and innovation standard deviations are +The parameters are ```{math} D = \operatorname{diag}(0.6, 0.7, 0.3), @@ -208,11 +372,19 @@ so the unconditional covariance of $v_t$ is R = \operatorname{diag}\!\left(\frac{\sigma_{\eta,i}^2}{1 - \rho_i^2}\right). ``` -The measurement errors are ordered from smallest to largest innovation -variance: income is measured most accurately ($\sigma_\eta = 0.05$), -consumption next ($\sigma_\eta = 0.035$), and investment least -accurately ($\sigma_\eta = 0.65$). -This ordering is central to the results below. +Consumption has the smallest measurement error innovation variance +($\sigma_\eta = 0.035$), income is next ($\sigma_\eta = 0.05$), +and investment has the largest ($\sigma_\eta = 0.65$). + +However, the ordering that matters for the results below is the +signal-to-noise ratio. + +Income carries a coefficient of $1$ on $\theta_t$, +whereas consumption carries only $1 - f^{-1} \approx 0.048$. + +The income innovation is therefore by far the most informative +about $\theta_t$, even though its measurement error innovation +is slightly larger than consumption's. ```{code-cell} ipython3 f = 1.05 @@ -237,7 +409,7 @@ Q = np.array([ ρ = np.array([0.6, 0.7, 0.3]) D = np.diag(ρ) -# Innovation std. devs +# Innovation std. devs of η_t σ_η = np.array([0.05, 0.035, 0.65]) Σ_η = np.diag(σ_η**2) @@ -251,13 +423,13 @@ display(Latex(df_to_latex_matrix(pd.DataFrame(C), 'C'))) display(Latex(df_to_latex_matrix(pd.DataFrame(D), 'D'))) ``` -## Kalman Filter +## Kalman filter Both models require a steady-state {doc}`Kalman filter `. The function below iterates on the Riccati equation until convergence, returning the Kalman gain $K$, the state covariance $S$, and the -innovation covariance $V$. +innovation covariance $V$ ```{code-cell} ipython3 def steady_state_kalman(A, C_obs, Q, R, W=None, tol=1e-13, max_iter=200_000): @@ -289,7 +461,7 @@ def steady_state_kalman(A, C_obs, Q, R, W=None, tol=1e-13, max_iter=200_000): ``` (true-impulse-responses)= -## True Impulse Responses +## True impulse responses Before introducing measurement error, we verify the impulse response of the true system to a unit shock $\theta_0 = 1$. @@ -298,7 +470,7 @@ The response shows the investment accelerator clearly: the full impact on net income $y_n$ occurs at lag 0, while consumption adjusts by only $1 - f^{-1} \approx 0.048$ and investment absorbs the remainder. -From lag 1 onward the economy is in its new steady state. +From lag 1 onward the economy is in its new steady state ```{code-cell} ipython3 def table2_irf(A, C, n_lags=6): @@ -306,37 +478,31 @@ def table2_irf(A, C, n_lags=6): rows = [] for j in range(n_lags): y_n, c, d_k = C @ x - rows.append([j, y_n, c, d_k]) + rows.append([y_n, c, d_k]) x = A @ x - return np.array(rows) - -rep_table2 = table2_irf(A, C, n_lags=6) - -fig, ax = plt.subplots(figsize=(8, 4.5)) -ax.plot(rep_table2[:, 0], rep_table2[:, 1], 'o-', label=r'$y_n$', lw=2.5, markersize=7) -ax.plot(rep_table2[:, 0], rep_table2[:, 2], 's-', label=r'$c$', lw=2.5, markersize=7) -ax.plot(rep_table2[:, 0], rep_table2[:, 3], '^-', label=r'$\Delta k$', lw=2.5, markersize=7) -ax.axhline(0, color='black', lw=0.8, ls='--', alpha=0.5) -ax.set_xlabel('Lag', fontsize=12) -ax.set_ylabel('Response', fontsize=12) -ax.set_title(r'True impulse response to unit shock $\theta_0 = 1$', fontsize=13) -ax.legend(loc='best', fontsize=11, frameon=True, shadow=True) -ax.grid(alpha=0.3) -plt.tight_layout() -plt.show() + return pd.DataFrame(rows, columns=[r'y_n', r'c', r'\Delta k'], + index=pd.Index(range(n_lags), name='lag')) + +table2 = table2_irf(A, C, n_lags=6) +display(Latex(df_to_latex_array(table2))) ``` -## Model 1 (Raw Measurements) +## Model 1 (raw measurements) Model 1 is a classical errors-in-variables model: the data collecting agency simply reports the error-corrupted data $\bar z_t = z_t + v_t$ that it collects, making no attempt to adjust for measurement errors. -Because the measurement errors $v_t$ are serially correlated (AR(1)), -we cannot directly apply the Kalman filter to -$\bar z_t = C x_t + v_t$. -Following {cite:t}`Sargent1989` (Section III.B), we quasi-difference the -observation equation. +Because the measurement errors $v_t$ are serially correlated, +the standard Kalman filter with white-noise measurement error +cannot be applied directly to $\bar z_t = C x_t + v_t$. + +An alternative is to augment the state vector with the +measurement-error AR components (see Appendix B of +{cite:t}`Sargent1989`). + +Here we take the quasi-differencing route, which reduces the +system to one with serially uncorrelated observation noise. Substituting $\bar z_t = C x_t + v_t$, $x_{t+1} = A x_t + \varepsilon_t$, and $v_{t+1} = D v_t + \eta_t$ into $\bar z_{t+1} - D \bar z_t$ gives @@ -350,12 +516,13 @@ where $\bar C = CA - DC$. The composite observation noise in {eq}`model1_obs` is $\bar\nu_t = C\varepsilon_t + \eta_t$, which is serially uncorrelated. + Its covariance, and the cross-covariance between the state noise $\varepsilon_t$ and $\bar\nu_t$, are ```{math} :label: model1_covs -R_1 = C Q C^\top + \Sigma_\eta, \qquad W_1 = Q C^\top. +R_1 = C Q C^\top + R, \qquad W_1 = Q C^\top. ``` The system $\{x_{t+1} = A x_t + \varepsilon_t,\; @@ -379,9 +546,32 @@ $V_1 = \bar C S_1 \bar C^\top + R_1$ is the innovation covariance matrix (with $S_1 = E[(x_t - \hat x_t)(x_t - \hat x_t)^\top]$ the steady-state state estimation error covariance). +To compute the innovations $\{u_t\}$ recursively from the data +$\{\bar z_t\}$, it is useful to represent {eq}`model1_innov` as + +```{math} +:label: model1_recursion +\hat x_{t+1} = (A - K_1 \bar C)\,\hat x_t + K_1 \bar z_t, +\qquad +u_t = -\bar C\,\hat x_t + \bar z_t, +``` + +where $\bar z_t := \bar z_{t+1} - D\bar z_t$ is the quasi-differenced +observation. + +Given an initial $\hat x_0$, equation {eq}`model1_recursion` generates +the innovation sequence, from which the Gaussian log-likelihood +of a sample $\{\bar z_t,\, t=0,\ldots,T\}$ is + +```{math} +:label: model1_loglik +\mathcal{L}^* = -T\ln 2\pi - \tfrac{1}{2}T\ln|V_1| + - \tfrac{1}{2}\sum_{t=0}^{T-1} u_t' V_1^{-1} u_t. +``` + ```{code-cell} ipython3 C_bar = C @ A - D @ C -R1 = C @ Q @ C.T + Σ_η +R1 = C @ Q @ C.T + R W1 = Q @ C.T K1, S1, V1 = steady_state_kalman(A, C_bar, Q, R1, W1) @@ -394,7 +584,7 @@ derive a Wold moving-average representation for the measured data $\bar z_t$. From {eq}`model1_innov` and the quasi-differencing definition, the -measured data satisfy (see eq. 19 of {cite:t}`Sargent1989`) +measured data satisfy ```{math} :label: model1_wold @@ -472,7 +662,9 @@ def fev_contributions(psi, V, n_horizons=20): psi1 = measured_wold_coeffs(F1, G1, H1, n_terms=40) -resp1 = np.array([psi1[j] @ linalg.cholesky(V1, lower=True) for j in range(14)]) +# Non-orthogonalized: scale each column by its own innovation std dev +std_u1 = np.sqrt(np.diag(V1)) +resp1 = np.array([psi1[j] * std_u1 for j in range(14)]) decomp1 = fev_contributions(psi1, V1, n_horizons=20) ``` @@ -482,69 +674,79 @@ To measure the relative importance of each innovation, we decompose the $j$-step-ahead forecast-error variance of each measured variable. Write $\bar z_{t+j} - E_t \bar z_{t+j} = \sum_{i=0}^{j-1} \psi_i u_{t+j-i}$. + Let $P$ be the lower-triangular Cholesky factor of $V_1$ so that the orthogonalized innovations are $e_t = P^{-1} u_t$. + Then the contribution of orthogonalized innovation $k$ to the $j$-step-ahead variance of variable $m$ is $\sum_{i=0}^{j-1} (\psi_i P)_{mk}^2$. -Each panel below shows the cumulative contribution of one orthogonalized +The table below shows the cumulative contribution of each orthogonalized innovation to the forecast-error variance of $y_n$, $c$, and $\Delta k$ at horizons 1 through 20. ```{code-cell} ipython3 horizons = np.arange(1, 21) -cols = [r'y_n', r'c', r'\Delta k'] +labels = [r'y_n', r'c', r'\Delta k'] def fev_table(decomp, shock_idx, horizons): return pd.DataFrame( np.round(decomp[:, shock_idx, :].T, 4), - columns=cols, - index=pd.Index(horizons, name='Horizon') + columns=labels, + index=pd.Index(horizons, name='j') ) ``` ```{code-cell} ipython3 -fig, axes = plt.subplots(1, 3, figsize=(15, 4.5)) - -for i, (shock_name, ax) in enumerate(zip([r'Innovation 1 ($y_n$)', r'Innovation 2 ($c$)', r'Innovation 3 ($\Delta k$)'], axes)): - fev_data = decomp1[:, i, :] - ax.plot(horizons, fev_data[0, :], label=r'$y_n$', lw=2.5) - ax.plot(horizons, fev_data[1, :], label=r'$c$', lw=2.5) - ax.plot(horizons, fev_data[2, :], label=r'$\Delta k$', lw=2.5) - ax.set_xlabel('Horizon', fontsize=12) - ax.set_ylabel('Contribution to FEV', fontsize=12) - ax.set_title(shock_name, fontsize=13) - ax.legend(loc='best', fontsize=10, frameon=True, shadow=True) - ax.grid(alpha=0.3) +shock_titles = [r'\text{A. Innovation in } y_n', + r'\text{B. Innovation in } c', + r'\text{C. Innovation in } \Delta k'] -plt.tight_layout() -plt.show() +parts = [] +for i, title in enumerate(shock_titles): + arr = df_to_latex_array(fev_table(decomp1, i, horizons)).strip('$') + parts.append(r'\begin{array}{c} ' + title + r' \\ ' + arr + r' \end{array}') + +display(Latex('$' + r' \quad '.join(parts) + '$')) ``` -These plots replicate Table 3 of {cite:t}`Sargent1989`. The income innovation accounts for substantial proportions of forecast-error variance in all three variables, while the consumption and investment innovations contribute mainly to their own variances. + This is a **Granger causality** pattern: income appears to Granger-cause consumption and investment, but not vice versa. -The pattern arises because income is the best-measured variable -($\sigma_\eta = 0.05$), so its innovation carries the most -information about the underlying structural shock $\theta_t$. -The innovation covariance matrix $V_1$ is: +The pattern arises because income has the highest signal-to-noise +ratio: its coefficient on $\theta_t$ is $1$, so its innovation carries +the most information about the underlying structural shock + +The covariance matrix of the innovations is not diagonal, but the eigenvalues are well-separated, with the first eigenvalue much larger +than the others, consistent with the presence of a dominant common shock $\theta_t$ ```{code-cell} ipython3 -labels = [r'y_n', r'c', r'\Delta k'] +print('Covariance matrix of innovations:') df_v1 = pd.DataFrame(np.round(V1, 4), index=labels, columns=labels) display(Latex(df_to_latex_matrix(df_v1))) ``` +```{code-cell} ipython3 +print('Eigenvalues of covariance matrix:') +print(np.sort(np.linalg.eigvalsh(V1))[::-1].round(4)) +``` + ### Wold impulse responses -The orthogonalized Wold impulse responses $\psi_j P$ show how the -measured variables respond at lag $j$ to a one-standard-deviation -orthogonalized innovation. We plot lags 0 through 13. +The Wold impulse responses $\psi_j$ scaled by the standard +deviation of each innovation show how the measured variables +respond at lag $j$ to a one-standard-deviation shock. + +Because $\psi_0 = I$, each innovation moves only its own +variable at impact (lag 0), with cross-variable effects +appearing from lag 1 onward. + +We report lags 0 through 13 ```{code-cell} ipython3 lags = np.arange(14) @@ -552,56 +754,57 @@ lags = np.arange(14) def wold_response_table(resp, shock_idx, lags): return pd.DataFrame( np.round(resp[:, :, shock_idx], 4), - columns=cols, - index=pd.Index(lags, name='Lag') + columns=labels, + index=pd.Index(lags, name='j') ) ``` ```{code-cell} ipython3 -fig, axes = plt.subplots(1, 3, figsize=(15, 4.5)) - -for i, (shock_name, ax) in enumerate(zip([r'Innovation in $y_n$', r'Innovation in $c$', r'Innovation in $\Delta k$'], axes)): - ax.plot(lags, resp1[:, 0, i], label=r'$y_n$', lw=2.5) - ax.plot(lags, resp1[:, 1, i], label=r'$c$', lw=2.5) - ax.plot(lags, resp1[:, 2, i], label=r'$\Delta k$', lw=2.5) - ax.axhline(0, color='black', lw=0.8, ls='--', alpha=0.5) - ax.set_xlabel('Lag', fontsize=12) - ax.set_ylabel('Response', fontsize=12) - ax.set_title(shock_name, fontsize=13) - ax.legend(loc='best', fontsize=10, frameon=True, shadow=True) - ax.grid(alpha=0.3) +wold_titles = [r'\text{A. Response to } y_n \text{ innovation}', + r'\text{B. Response to } c \text{ innovation}', + r'\text{C. Response to } \Delta k \text{ innovation}'] -plt.tight_layout() -plt.show() +parts = [] +for i, title in enumerate(wold_titles): + arr = df_to_latex_array(wold_response_table(resp1, i, lags)).strip('$') + parts.append(r'\begin{array}{c} ' + title + r' \\ ' + arr + r' \end{array}') + +display(Latex('$' + r' \quad '.join(parts) + '$')) ``` -These plots replicate Table 4 of {cite:t}`Sargent1989`. -An income innovation generates persistent responses in all variables -because, being the best-measured series, its innovation is dominated -by the true permanent shock $\theta_t$, which permanently raises the -capital stock and hence steady-state consumption and income. -A consumption innovation produces smaller, decaying responses -that reflect the AR(1) structure of its measurement error ($\rho = 0.7$). -An investment innovation has a large initial impact on investment itself, -consistent with the high measurement error variance ($\sigma_\eta = 0.65$), -but the effect dies out quickly. +At impact each innovation moves only its own variable. + +At subsequent lags the income innovation generates persistent +responses in all three variables because, being the best-measured +series, its innovation is dominated by the true permanent shock +$\theta_t$. -## Model 2 (Filtered Measurements) +The consumption and investment innovations produce responses that +decay according to the AR(1) structure of their respective +measurement errors ($\rho_c = 0.7$, $\rho_{\Delta k} = 0.3$), +with little spillover to other variables. + +## Model 2 (filtered measurements) Model 2 corresponds to a data collecting agency that, instead of reporting raw error-corrupted data, applies an optimal filter to construct least-squares estimates of the true variables. +This is a natural model for agencies that seasonally adjust +data (one-sided filtering of current and past observations) or +publish preliminary, revised, and final estimates of the same +variable (successive conditional expectations as more data +accumulate). + Specifically, the agency uses the Kalman filter from Model 1 to form -$\hat x_t = E[x_t \mid \bar z_{t-1}, \bar z_{t-2}, \ldots]$ and reports +$\hat x_t = E[x_t \mid \bar z_t, \bar z_{t-1}, \ldots]$ and reports filtered estimates ```{math} \tilde z_t = G \hat x_t, ``` -where $G = C$ is a selection matrix -(see eq. 23 of {cite:t}`Sargent1989`). +where $G = C$ is a selection matrix. ### State-space for filtered data @@ -631,7 +834,6 @@ Q_2 = K_1 V_1 K_1^\top. ``` The covariance matrix of the joint noise is -(see eq. 25 of {cite:t}`Sargent1989`) ```{math} E \begin{bmatrix} K_1 u_t \\ \eta_t \end{bmatrix} @@ -649,18 +851,43 @@ yields a second innovations representation ```{math} :label: model2_innov -\hat{\hat x}_{t+1} = A \hat{\hat x}_t + K_2 a_t, +\check{x}_{t+1} = A \check{x}_t + K_2 a_t, \qquad -\tilde z_t = C \hat{\hat x}_t + a_t, +\tilde z_t = C \check{x}_t + a_t, ``` where $a_t$ is the innovation process for the filtered data with covariance $V_2 = C S_2 C^\top + R_2$. +To compute the innovations $\{a_t\}$ from observations on +$\tilde z_t$, use + +```{math} +:label: model2_recursion +\check{x}_{t+1} = (A - K_2 C)\,\check{x}_t + K_2 \tilde z_t, +\qquad +a_t = -C\,\check{x}_t + \tilde z_t. +``` + +The Gaussian log-likelihood for a sample of $T$ observations +$\{\tilde z_t\}$ is then + +```{math} +:label: model2_loglik +\mathcal{L}^{**} = -T\ln 2\pi - \tfrac{1}{2}T\ln|V_2| + - \tfrac{1}{2}\sum_{t=0}^{T-1} a_t' V_2^{-1} a_t. +``` + +Computing {eq}`model2_loglik` requires both the first Kalman filter +(to form $\hat x_t$ and $u_t$) and the second Kalman filter +(to form $\check{x}_t$ and $a_t$). + +In effect, the econometrician must retrace the steps that the agency +used to synthesize the filtered data. + ### Wold representation for filtered data The Wold moving-average representation for $\tilde z_t$ is -(see eq. 29 of {cite:t}`Sargent1989`) ```{math} :label: model2_wold @@ -668,13 +895,15 @@ The Wold moving-average representation for $\tilde z_t$ is ``` with coefficients $\psi_0 = I$ and $\psi_j = C A^{j-1} K_2$ for -$j \geq 1$. Note that this is simpler than the Model 1 Wold +$j \geq 1$. + +Note that this is simpler than the Model 1 Wold representation {eq}`model1_wold` because there is no quasi-differencing to undo. ```{code-cell} ipython3 Q2 = K1 @ V1 @ K1.T -ε = 1e-7 +ε = 1e-6 K2, S2, V2 = steady_state_kalman(A, C, Q2, ε * np.eye(3)) @@ -697,98 +926,90 @@ decomp2 = fev_contributions(psi2, V2, n_horizons=20) Because the filtered data are nearly noiseless, the innovation covariance $V_2$ is close to singular with one dominant eigenvalue. + This means the filtered economy is driven by essentially one shock, just like the true economy. ```{code-cell} ipython3 -fig, axes = plt.subplots(1, 3, figsize=(15, 4.5)) - -for i, (shock_name, ax) in enumerate(zip([r'Innovation 1 ($y_n$)', r'Innovation 2 ($c$) $\times 10^3$', r'Innovation 3 ($\Delta k$) $\times 10^6$'], axes)): - scale = 1 if i == 0 else (1e3 if i == 1 else 1e6) - fev_data = decomp2[:, i, :] * scale - ax.plot(horizons, fev_data[0, :], label=r'$y_n$', lw=2.5) - ax.plot(horizons, fev_data[1, :], label=r'$c$', lw=2.5) - ax.plot(horizons, fev_data[2, :], label=r'$\Delta k$', lw=2.5) - ax.set_xlabel('Horizon', fontsize=12) - ax.set_ylabel('Contribution to FEV', fontsize=12) - ax.set_title(shock_name, fontsize=13) - ax.legend(loc='best', fontsize=10, frameon=True, shadow=True) - ax.grid(alpha=0.3) +parts = [] +for i, title in enumerate(shock_titles): + arr = df_to_latex_array(fev_table(decomp2, i, horizons)).strip('$') + parts.append(r'\begin{array}{c} ' + title + r' \\ ' + arr + r' \end{array}') -plt.tight_layout() -plt.show() +display(Latex('$' + r' \quad '.join(parts) + '$')) ``` -These plots replicate Table 5 of {cite:t}`Sargent1989`. In Model 2, the first innovation accounts for virtually all forecast-error variance, just as in the true economy where the single structural shock $\theta_t$ drives everything. -The second and third innovations contribute negligibly (note the scaling -factors of $10^3$ and $10^6$ required to make them visible). + +The second and third innovations contribute negligibly. + This confirms that filtering strips away the measurement noise that created the appearance of multiple independent sources of variation in Model 1. -The innovation covariance matrix $V_2$ for Model 2 is: - -```{code-cell} ipython3 -df_v2 = pd.DataFrame(np.round(V2, 4), index=labels, columns=labels) -display(Latex(df_to_latex_matrix(df_v2))) -``` ### Wold impulse responses -The following plots show the orthogonalized Wold impulse responses for Model 2. +Unlike Model 1, whose impulse responses use non-orthogonalized +innovations, the Model 2 Wold representation is orthogonalized +via a Cholesky decomposition of $V_2$ with the ordering +$y_n$, $c$, $\Delta k$. ```{code-cell} ipython3 -fig, axes = plt.subplots(1, 3, figsize=(15, 4.5)) - -for i, (shock_name, scale) in enumerate(zip([r'Innovation in $y_n$', r'Innovation in $c$ $\times 10^3$', r'Innovation in $\Delta k$ $\times 10^3$'], - [1, 1e3, 1e3])): - ax = axes[i] - ax.plot(lags, resp2[:, 0, i] * scale, label=r'$y_n$', lw=2.5) - ax.plot(lags, resp2[:, 1, i] * scale, label=r'$c$', lw=2.5) - ax.plot(lags, resp2[:, 2, i] * scale, label=r'$\Delta k$', lw=2.5) - ax.axhline(0, color='black', lw=0.8, ls='--', alpha=0.5) - ax.set_xlabel('Lag', fontsize=12) - ax.set_ylabel('Response', fontsize=12) - ax.set_title(shock_name, fontsize=13) - ax.legend(loc='best', fontsize=10, frameon=True, shadow=True) - ax.grid(alpha=0.3) +parts = [] +for i, title in enumerate(wold_titles): + arr = df_to_latex_array(wold_response_table(resp2, i, lags)).strip('$') + parts.append(r'\begin{array}{c} ' + title + r' \\ ' + arr + r' \end{array}') -plt.tight_layout() -plt.show() +display(Latex('$' + r' \quad '.join(parts) + '$')) ``` -These plots replicate Table 6 of {cite:t}`Sargent1989`. The income innovation in Model 2 produces responses that closely approximate the true impulse response function from the structural -shock $\theta_t$ (compare with the figure in the +shock $\theta_t$ (compare with the table in the {ref}`true-impulse-responses` section above). + The consumption and investment innovations produce responses -that are orders of magnitude smaller (note the $10^3$ scaling), -confirming that the filtered data are driven by essentially one shock. +that are orders of magnitude smaller, confirming that the filtered +data are driven by essentially one shock. -A key implication: unlike Model 1, the filtered data from Model 2 -**cannot** reproduce the apparent Granger causality pattern that the +Unlike Model 1, the filtered data from Model 2 +*cannot* reproduce the apparent Granger causality pattern that the accelerator literature has documented empirically. + +We also report the covariance matrix and eigenvalues of the innovations for Model 2 + +```{code-cell} ipython3 +print('Covariance matrix of innovations:') +df_v2 = pd.DataFrame(np.round(V2, 4), index=labels, columns=labels) +display(Latex(df_to_latex_matrix(df_v2))) +``` + +```{code-cell} ipython3 +print('Eigenvalues of covariance matrix:') +print(np.sort(np.linalg.eigvalsh(V2))[::-1].round(4)) +``` + + As {cite:t}`Sargent1989` emphasizes, the two models of measurement produce quite different inferences about the economy's dynamics despite -sharing identical deep parameters. +sharing identical underlying parameters. ## Simulation The tables above characterize population moments of the two models. -To see how the models perform on a finite sample, Sargent simulates -80 periods of true, measured, and filtered data and reports +To see how the models perform on a finite sample, we simulate +80 periods of true, measured, and filtered data and report covariance and correlation matrices together with time-series plots. -We replicate these objects below. +We replicate these objects below ```{code-cell} ipython3 -def simulate_series(seed=7909, T=80, k0=10.0): +def simulate_series(seed=0, T=80, k0=10.0): """ - Simulate true, measured, and filtered series for Figures 1--9. + Simulate true, measured, and filtered series. """ rng = np.random.default_rng(seed) @@ -835,24 +1056,25 @@ def simulate_series(seed=7909, T=80, k0=10.0): out = { "y_true": y, "c_true": c, "dk_true": dk, "k_true": k[:-1], - "y_meas": z_meas[:, 0], "c_meas": z_meas[:, 1], "dk_meas": z_meas[:, 2], - "y_filt": z_filt[:, 0], "c_filt": z_filt[:, 1], "dk_filt": z_filt[:, 2], "k_filt": k_filt + "y_meas": z_meas[:, 0], "c_meas": z_meas[:, 1], + "dk_meas": z_meas[:, 2], + "y_filt": z_filt[:, 0], "c_filt": z_filt[:, 1], + "dk_filt": z_filt[:, 2], "k_filt": k_filt } return out -sim = simulate_series(seed=7909, T=80, k0=10.0) +sim = simulate_series(seed=0, T=80, k0=10.0) ``` ```{code-cell} ipython3 def plot_true_vs_other(t, true_series, other_series, other_label, ylabel=""): fig, ax = plt.subplots(figsize=(8, 4)) - ax.plot(t, true_series, lw=2.5, color="black", label="true") - ax.plot(t, other_series, lw=2.5, ls="--", color="#1f77b4", label=other_label) - ax.set_xlabel("Time", fontsize=12) - ax.set_ylabel(ylabel.capitalize(), fontsize=12) + ax.plot(t, true_series, lw=2, color="black", label="true") + ax.plot(t, other_series, lw=2, ls="--", color="#1f77b4", label=other_label) + ax.set_xlabel("time", fontsize=12) + ax.set_ylabel(ylabel, fontsize=12) ax.legend(loc="best", fontsize=11, frameon=True, shadow=True) - ax.grid(alpha=0.3) plt.tight_layout() plt.show() @@ -896,13 +1118,13 @@ mystnb: plot_true_vs_other(t, sim["y_true"], sim["y_meas"], "measured", ylabel="income") ``` -The first three figures replicate Figures 1--3 of {cite:t}`Sargent1989`. Investment is distorted the most because its measurement error has the largest innovation variance ($\sigma_\eta = 0.65$), while income is distorted the least ($\sigma_\eta = 0.05$). -The next four figures (Figures 4--7 in the paper) compare -true series with the Kalman-filtered estimates from Model 1. +The next four figures compare true series with the +Kalman-filtered estimates from Model 1. + The filter removes much of the measurement noise, recovering series that track the truth closely. @@ -955,12 +1177,13 @@ plot_true_vs_other(t, sim["k_true"], sim["k_filt"], "filtered", ylabel="capital ``` The following figure plots the national income identity residual -$c_t + \Delta k_t - y_{n,t}$ for both measured and filtered data -(Figures 8--9 of {cite:t}`Sargent1989`). +$c_t + \Delta k_t - y_{n,t}$ for both measured and filtered data. In the true model this identity holds exactly. + For measured data the residual is non-zero because independent measurement errors break the accounting identity. + For filtered data the Kalman filter approximately restores the identity. ```{code-cell} ipython3 @@ -974,31 +1197,31 @@ mystnb: --- fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 4)) -ax1.plot(t, sim["c_meas"] + sim["dk_meas"] - sim["y_meas"], color="#d62728", lw=2.5) +ax1.plot(t, sim["c_meas"] + sim["dk_meas"] - sim["y_meas"], color="#d62728", lw=2) ax1.axhline(0, color='black', lw=0.8, ls='--', alpha=0.5) -ax1.set_xlabel("Time", fontsize=12) -ax1.set_ylabel("Residual", fontsize=12) +ax1.set_xlabel("time", fontsize=12) +ax1.set_ylabel("residual", fontsize=12) ax1.set_title(r'Measured: $c_t + \Delta k_t - y_{n,t}$', fontsize=13) -ax1.grid(alpha=0.3) -ax2.plot(t, sim["c_filt"] + sim["dk_filt"] - sim["y_filt"], color="#2ca02c", lw=2.5) +ax2.plot(t, sim["c_filt"] + sim["dk_filt"] - sim["y_filt"], color="#2ca02c", lw=2) ax2.axhline(0, color='black', lw=0.8, ls='--', alpha=0.5) -ax2.set_xlabel("Time", fontsize=12) -ax2.set_ylabel("Residual", fontsize=12) +ax2.set_xlabel("time", fontsize=12) +ax2.set_ylabel("residual", fontsize=12) ax2.set_title(r'Filtered: $c_t + \Delta k_t - y_{n,t}$', fontsize=13) -ax2.grid(alpha=0.3) plt.tight_layout() plt.show() ``` -The following covariance and correlation matrices replicate Table 7 -of {cite:t}`Sargent1989`. +The following covariance and correlation matrices compare the true, +measured, and filtered versions of each variable. + For each variable we report the $3 \times 3$ covariance and correlation matrices among the true, measured, and filtered versions. High correlations between true and filtered series confirm that the Kalman filter removes most measurement noise. + Lower correlations between true and measured series quantify how much information is lost by using raw data. @@ -1020,63 +1243,84 @@ tmf_labels = ['true', 'measured', 'filtered'] tf_labels = ['true', 'filtered'] ``` -**Consumption** -- Measurement error inflates variance, but the filtered -series recovers a variance close to the truth. -The true-filtered correlation exceeds 0.99. +**Consumption** -- Measurement error inflates the variance of measured +consumption relative to the truth, as the diagonal of the covariance +matrix shows. ```{code-cell} ipython3 display(Latex(df_to_latex_matrix(matrix_df(cov_c, tmf_labels)))) +``` + +The correlation matrix confirms that the filtered series recovers the +true series almost perfectly (true-filtered correlation exceeds 0.99). + +```{code-cell} ipython3 display(Latex(df_to_latex_matrix(matrix_df(corr_c, tmf_labels)))) ``` **Investment** -- Because $\sigma_\eta = 0.65$ is large, measurement error creates the most variance inflation here. + +```{code-cell} ipython3 +display(Latex(df_to_latex_matrix(matrix_df(cov_i, tmf_labels)))) +``` + Despite this, the true-filtered correlation remains high, demonstrating the filter's effectiveness even with severe noise. ```{code-cell} ipython3 -display(Latex(df_to_latex_matrix(matrix_df(cov_i, tmf_labels)))) display(Latex(df_to_latex_matrix(matrix_df(corr_i, tmf_labels)))) ``` -**Income** -- Income has the smallest measurement error, so measured -and true variances are close. True-filtered correlations are very high. +**Income** -- Income has the smallest measurement error ($\sigma_\eta = 0.05$), +so measured and true covariances are nearly identical. ```{code-cell} ipython3 display(Latex(df_to_latex_matrix(matrix_df(cov_y, tmf_labels)))) +``` + +The correlation matrix shows that both measured and filtered series +track the truth very closely. + +```{code-cell} ipython3 display(Latex(df_to_latex_matrix(matrix_df(corr_y, tmf_labels)))) ``` **Capital stock** -- The capital stock is never directly observed, yet -the filter recovers it with very high accuracy. +the covariance matrix shows that the filter recovers it with very +high accuracy. ```{code-cell} ipython3 display(Latex(df_to_latex_matrix(matrix_df(cov_k, tf_labels)))) +``` + +The near-unity correlation confirms this. + +```{code-cell} ipython3 display(Latex(df_to_latex_matrix(matrix_df(corr_k, tf_labels)))) ``` ## Summary -This lecture reproduced the analysis in {cite}`Sargent1989`, -which studies how measurement error alters an econometrician's view +{cite}`Sargent1989` studies how measurement error alters an econometrician's view of a permanent income economy driven by the investment accelerator. -Several lessons emerge: +We had the following findings: * The Wold representations and variance decompositions of Model 1 (raw measurements) and Model 2 (filtered measurements) are quite different, even though the underlying economy is the same. -* Measurement error is not a second-order issue: it can +* Measurement error can reshape inferences about which shocks drive which variables. * Model 1 reproduces the **Granger causality** pattern documented in the - empirical accelerator literature -- income appears to Granger-cause - consumption and investment -- but this pattern is an artifact of + empirical accelerator literature: income appears to Granger-cause + consumption and investment, but this pattern is an artifact of measurement error ordering, not of the structural model. * Model 2, working with filtered data, attributes nearly all variance to - the single structural shock $\theta_t$ and **cannot** reproduce the + the single structural shock $\theta_t$ and *cannot* reproduce the Granger causality pattern. * The {doc}`Kalman filter ` effectively strips measurement noise @@ -1084,13 +1328,3 @@ Several lessons emerge: near-zero residual shows that the filter approximately restores the national income accounting identity that raw measurement error breaks. -These results connect to broader themes in this lecture series: -the role of {doc}`linear state space models ` in -representing economic dynamics, the power of {doc}`Kalman filtering ` -for signal extraction, and the importance of the investment accelerator -for understanding business cycles ({doc}`samuelson`, -{doc}`chow_business_cycles`). - -## References - -* {cite}`Sargent1989` From 7857fe4b151faeab51d1656c7465f5e1c33bdd20 Mon Sep 17 00:00:00 2001 From: Humphrey Yang Date: Mon, 9 Feb 2026 23:58:28 +1100 Subject: [PATCH 12/19] updates --- lectures/measurement_models.md | 284 ++++++++++++++++++++------------- 1 file changed, 176 insertions(+), 108 deletions(-) diff --git a/lectures/measurement_models.md b/lectures/measurement_models.md index 637a039b4..6e5170226 100644 --- a/lectures/measurement_models.md +++ b/lectures/measurement_models.md @@ -50,10 +50,10 @@ The two models produce different Wold representations and forecast-error-variance decompositions, even though they describe the same underlying economy. -In this lecture we reproduce the analysis from {cite:t}`Sargent1989` -while studying the underlying mechanisms in the paper. +In this lecture we follow {cite:t}`Sargent1989` and study how +alternative measurement schemes change empirical implications. -We use the following imports and functions for matrices and tables +We start with imports and helper functions used throughout. ```{code-cell} ipython3 import numpy as np @@ -108,6 +108,89 @@ def df_to_latex_array(df): return '$' + '\n'.join(lines) + '$' ``` +## Classical formulation + +Before moving to state-space methods, {cite:t}`Sargent1989` formulates +both measurement models in classical Wold form. + +This setup separates: + +- The law of motion for true economic variables. +- The law of motion for measurement errors. +- The map from these two objects to observables used by an econometrician. + +Let the true data be + +```{math} +:label: classical_true_wold +Z_t = c_Z(L)\,\varepsilon_t^Z, \qquad +E\varepsilon_t^Z {\varepsilon_t^Z}' = I. +``` + +In Model 1 (raw reports), the agency observes and reports + +```{math} +:label: classical_model1_meas +z_t = Z_t + v_t, \qquad +v_t = c_v(L)\,\varepsilon_t^v, \qquad +E(Z_t v_s') = 0\ \forall t,s. +``` + +Then measured data have Wold representation + +```{math} +:label: classical_model1_wold +z_t = c_z(L)\,\varepsilon_t, +``` + +with spectral factorization + +```{math} +:label: classical_model1_factor +c_z(s)c_z(s^{-1})' = c_Z(s)c_Z(s^{-1})' + c_v(s)c_v(s^{-1})'. +``` + +In Model 2 (filtered reports), the agency reports + +```{math} +:label: classical_model2_report +\tilde z_t = E[Z_t \mid z_t, z_{t-1}, \ldots] = h(L) z_t, +``` + +where + +```{math} +:label: classical_model2_filter +h(L) += \Big[ + c_Z(L)c_Z(L^{-1})' + \big(c_z(L^{-1})'\big)^{-1} + \Big]_+ c_z(L)^{-1}, +``` + +and $[\cdot]_+$ keeps only nonnegative powers of $L$. + +Filtered reports satisfy + +```{math} +:label: classical_model2_wold +\tilde z_t = c_{\tilde z}(L)\,a_t, +``` + +with + +```{math} +:label: classical_model2_factor +c_{\tilde z}(s)c_{\tilde z}(s^{-1})' += h(s)c_z(s)c_z(s^{-1})'h(s^{-1})'. +``` + +These two data-generation schemes imply different Gaussian likelihood +functions. + +In the rest of the lecture, we switch to a recursive state-space +representation because it makes these objects easy to compute. + ## The economic model The true economy is a linear-quadratic version of a stochastic @@ -294,6 +377,8 @@ In a one-common-index model like this one ($\theta_t$ is the common index), the best-measured variable extends the most Granger causality to the others. +This mechanism drives the numerical results below. + ## State-space formulation We now map the economic model and the measurement process into @@ -372,19 +457,14 @@ so the unconditional covariance of $v_t$ is R = \operatorname{diag}\!\left(\frac{\sigma_{\eta,i}^2}{1 - \rho_i^2}\right). ``` -Consumption has the smallest measurement error innovation variance -($\sigma_\eta = 0.035$), income is next ($\sigma_\eta = 0.05$), -and investment has the largest ($\sigma_\eta = 0.65$). - -However, the ordering that matters for the results below is the -signal-to-noise ratio. +The innovation variances are smallest for consumption +($\sigma_\eta = 0.035$), next for income ($\sigma_\eta = 0.05$), +and largest for investment ($\sigma_\eta = 0.65$). -Income carries a coefficient of $1$ on $\theta_t$, -whereas consumption carries only $1 - f^{-1} \approx 0.048$. - -The income innovation is therefore by far the most informative -about $\theta_t$, even though its measurement error innovation -is slightly larger than consumption's. +As in {cite:t}`Sargent1989`, what matters for Granger-causality +asymmetries is the overall measurement quality in the full system: +output is relatively well measured while investment is relatively +poorly measured. ```{code-cell} ipython3 f = 1.05 @@ -463,7 +543,7 @@ def steady_state_kalman(A, C_obs, Q, R, W=None, tol=1e-13, max_iter=200_000): (true-impulse-responses)= ## True impulse responses -Before introducing measurement error, we verify the impulse response of +Before introducing measurement error, we compute the impulse response of the true system to a unit shock $\theta_0 = 1$. The response shows the investment accelerator clearly: the full impact on @@ -662,9 +742,7 @@ def fev_contributions(psi, V, n_horizons=20): psi1 = measured_wold_coeffs(F1, G1, H1, n_terms=40) -# Non-orthogonalized: scale each column by its own innovation std dev -std_u1 = np.sqrt(np.diag(V1)) -resp1 = np.array([psi1[j] * std_u1 for j in range(14)]) +resp1 = np.array([psi1[j] @ linalg.cholesky(V1, lower=True) for j in range(14)]) decomp1 = fev_contributions(psi1, V1, n_horizons=20) ``` @@ -686,6 +764,11 @@ The table below shows the cumulative contribution of each orthogonalized innovation to the forecast-error variance of $y_n$, $c$, and $\Delta k$ at horizons 1 through 20. +Each panel fixes one orthogonalized innovation and reports its +cumulative contribution to each variable's forecast-error variance. + +Rows are forecast horizons and columns are forecasted variables. + ```{code-cell} ipython3 horizons = np.arange(1, 21) labels = [r'y_n', r'c', r'\Delta k'] @@ -718,12 +801,14 @@ investment innovations contribute mainly to their own variances. This is a **Granger causality** pattern: income appears to Granger-cause consumption and investment, but not vice versa. -The pattern arises because income has the highest signal-to-noise -ratio: its coefficient on $\theta_t$ is $1$, so its innovation carries -the most information about the underlying structural shock +This matches the paper's message that, in a one-common-index model, +the relatively best measured series has the strongest predictive content. + +The covariance matrix of the innovations is not diagonal, but the +eigenvalues are well separated. -The covariance matrix of the innovations is not diagonal, but the eigenvalues are well-separated, with the first eigenvalue much larger -than the others, consistent with the presence of a dominant common shock $\theta_t$ +The first eigenvalue is much larger than the others, consistent with +the presence of a dominant common shock $\theta_t$. ```{code-cell} ipython3 print('Covariance matrix of innovations:') @@ -738,15 +823,12 @@ print(np.sort(np.linalg.eigvalsh(V1))[::-1].round(4)) ### Wold impulse responses -The Wold impulse responses $\psi_j$ scaled by the standard -deviation of each innovation show how the measured variables -respond at lag $j$ to a one-standard-deviation shock. - -Because $\psi_0 = I$, each innovation moves only its own -variable at impact (lag 0), with cross-variable effects -appearing from lag 1 onward. +The Wold impulse responses are reported using orthogonalized +innovations (Cholesky factorization of $V_1$ with ordering +$y_n$, $c$, $\Delta k$). -We report lags 0 through 13 +Under this identification, lag-0 responses reflect both +contemporaneous covariance and the Cholesky ordering. ```{code-cell} ipython3 lags = np.arange(14) @@ -772,7 +854,8 @@ for i, title in enumerate(wold_titles): display(Latex('$' + r' \quad '.join(parts) + '$')) ``` -At impact each innovation moves only its own variable. +At impact, the first orthogonalized innovation (ordered as output) +loads on all three measured variables, matching the paper's Table 4. At subsequent lags the income innovation generates persistent responses in all three variables because, being the best-measured @@ -948,13 +1031,29 @@ The second and third innovations contribute negligibly. This confirms that filtering strips away the measurement noise that created the appearance of multiple independent sources of variation in Model 1. +The covariance matrix and eigenvalues of the Model 2 innovations are + +```{code-cell} ipython3 +print('Covariance matrix of innovations:') +df_v2 = pd.DataFrame(np.round(V2, 4), index=labels, columns=labels) +display(Latex(df_to_latex_matrix(df_v2))) +``` + +```{code-cell} ipython3 +print('Eigenvalues of covariance matrix:') +print(np.sort(np.linalg.eigvalsh(V2))[::-1].round(4)) +``` + +As {cite:t}`Sargent1989` emphasizes, the two models of measurement +produce quite different inferences about the economy's dynamics despite +sharing identical underlying parameters. + + ### Wold impulse responses -Unlike Model 1, whose impulse responses use non-orthogonalized -innovations, the Model 2 Wold representation is orthogonalized -via a Cholesky decomposition of $V_2$ with the ordering -$y_n$, $c$, $\Delta k$. +We again use orthogonalized Wold responses with a Cholesky +decomposition of $V_2$ ordered as $y_n$, $c$, $\Delta k$. ```{code-cell} ipython3 parts = [] @@ -978,36 +1077,16 @@ Unlike Model 1, the filtered data from Model 2 *cannot* reproduce the apparent Granger causality pattern that the accelerator literature has documented empirically. -We also report the covariance matrix and eigenvalues of the innovations for Model 2 - -```{code-cell} ipython3 -print('Covariance matrix of innovations:') -df_v2 = pd.DataFrame(np.round(V2, 4), index=labels, columns=labels) -display(Latex(df_to_latex_matrix(df_v2))) -``` - -```{code-cell} ipython3 -print('Eigenvalues of covariance matrix:') -print(np.sort(np.linalg.eigvalsh(V2))[::-1].round(4)) -``` - - -As {cite:t}`Sargent1989` emphasizes, the two models of measurement -produce quite different inferences about the economy's dynamics despite -sharing identical underlying parameters. ## Simulation The tables above characterize population moments of the two models. -To see how the models perform on a finite sample, we simulate -80 periods of true, measured, and filtered data and report -covariance and correlation matrices together with time-series plots. - -We replicate these objects below +We now simulate 80 periods of true, measured, and filtered data +to compare population implications with finite-sample behavior. ```{code-cell} ipython3 -def simulate_series(seed=0, T=80, k0=10.0): +def simulate_series(seed=7909, T=80, k0=10.0): """ Simulate true, measured, and filtered series. """ @@ -1064,7 +1143,7 @@ def simulate_series(seed=0, T=80, k0=10.0): return out -sim = simulate_series(seed=0, T=80, k0=10.0) +sim = simulate_series(seed=7909, T=80, k0=10.0) ``` ```{code-cell} ipython3 @@ -1122,11 +1201,8 @@ Investment is distorted the most because its measurement error has the largest innovation variance ($\sigma_\eta = 0.65$), while income is distorted the least ($\sigma_\eta = 0.05$). -The next four figures compare true series with the -Kalman-filtered estimates from Model 1. - -The filter removes much of the measurement noise, recovering -series that track the truth closely. +The Kalman-filtered estimates from Model 1 remove much of the +measurement noise and track the truth closely. ```{code-cell} ipython3 --- @@ -1176,15 +1252,13 @@ mystnb: plot_true_vs_other(t, sim["k_true"], sim["k_filt"], "filtered", ylabel="capital stock") ``` -The following figure plots the national income identity residual -$c_t + \Delta k_t - y_{n,t}$ for both measured and filtered data. +In the true model the national income identity +$c_t + \Delta k_t = y_{n,t}$ holds exactly. -In the true model this identity holds exactly. +Independent measurement errors break this accounting identity +in the measured data. -For measured data the residual is non-zero because -independent measurement errors break the accounting identity. - -For filtered data the Kalman filter approximately restores the identity. +The Kalman filter approximately restores it. ```{code-cell} ipython3 --- @@ -1213,17 +1287,13 @@ plt.tight_layout() plt.show() ``` -The following covariance and correlation matrices compare the true, -measured, and filtered versions of each variable. - -For each variable we report the $3 \times 3$ covariance and correlation -matrices among the true, measured, and filtered versions. +We can also compare the true, measured, and filtered versions of +each variable through their covariance and correlation matrices. High correlations between true and filtered series confirm that the -Kalman filter removes most measurement noise. - -Lower correlations between true and measured series quantify how much -information is lost by using raw data. +Kalman filter removes most measurement noise, while lower correlations +between true and measured series quantify how much information raw +data lose. ```{code-cell} ipython3 def cov_corr_three(a, b, c): @@ -1243,7 +1313,7 @@ tmf_labels = ['true', 'measured', 'filtered'] tf_labels = ['true', 'filtered'] ``` -**Consumption** -- Measurement error inflates the variance of measured +For consumption, measurement error inflates the variance of measured consumption relative to the truth, as the diagonal of the covariance matrix shows. @@ -1258,8 +1328,7 @@ true series almost perfectly (true-filtered correlation exceeds 0.99). display(Latex(df_to_latex_matrix(matrix_df(corr_c, tmf_labels)))) ``` -**Investment** -- Because $\sigma_\eta = 0.65$ is large, measurement error -creates the most variance inflation here. +For investment, measurement error creates the most variance inflation here. ```{code-cell} ipython3 display(Latex(df_to_latex_matrix(matrix_df(cov_i, tmf_labels)))) @@ -1272,7 +1341,7 @@ demonstrating the filter's effectiveness even with severe noise. display(Latex(df_to_latex_matrix(matrix_df(corr_i, tmf_labels)))) ``` -**Income** -- Income has the smallest measurement error ($\sigma_\eta = 0.05$), +Income has the smallest measurement error ($\sigma_\eta = 0.05$), so measured and true covariances are nearly identical. ```{code-cell} ipython3 @@ -1286,7 +1355,7 @@ track the truth very closely. display(Latex(df_to_latex_matrix(matrix_df(corr_y, tmf_labels)))) ``` -**Capital stock** -- The capital stock is never directly observed, yet +The capital stock is never directly observed, yet the covariance matrix shows that the filter recovers it with very high accuracy. @@ -1302,29 +1371,28 @@ display(Latex(df_to_latex_matrix(matrix_df(corr_k, tf_labels)))) ## Summary -{cite}`Sargent1989` studies how measurement error alters an econometrician's view -of a permanent income economy driven by the investment accelerator. - -We had the following findings: - -* The Wold representations and variance decompositions of Model 1 (raw - measurements) and Model 2 (filtered measurements) are quite different, - even though the underlying economy is the same. +{cite}`Sargent1989` shows how measurement error alters an +econometrician's view of a permanent income economy driven by +the investment accelerator. -* Measurement error can - reshape inferences about which shocks drive which variables. +The Wold representations and variance decompositions of Model 1 +(raw measurements) and Model 2 (filtered measurements) differ +substantially, even though the underlying economy is the same. -* Model 1 reproduces the **Granger causality** pattern documented in the - empirical accelerator literature: income appears to Granger-cause - consumption and investment, but this pattern is an artifact of - measurement error ordering, not of the structural model. +Measurement error can reshape inferences about which shocks +drive which variables. -* Model 2, working with filtered data, attributes nearly all variance to - the single structural shock $\theta_t$ and *cannot* reproduce the - Granger causality pattern. +Model 1 reproduces the **Granger causality** pattern documented in +the empirical accelerator literature: income appears to Granger-cause +consumption and investment, a result {cite:t}`Sargent1989` attributes +to measurement error and signal extraction in raw reported data. -* The {doc}`Kalman filter ` effectively strips measurement noise - from the data: the filtered series track the truth closely, and the - near-zero residual shows that the filter approximately restores the - national income accounting identity that raw measurement error breaks. +Model 2, working with filtered data, attributes nearly all variance +to the single structural shock $\theta_t$ and *cannot* reproduce +the Granger causality pattern. +The {doc}`Kalman filter ` effectively strips measurement +noise from the data: the filtered series track the truth closely, +and the near-zero residual shows that the filter approximately +restores the national income accounting identity that raw +measurement error breaks. From 15e0c9d9a77653a3e8ff3c27d71e27553b38d127 Mon Sep 17 00:00:00 2001 From: Humphrey Yang Date: Tue, 10 Feb 2026 08:47:42 +1100 Subject: [PATCH 13/19] updates --- lectures/measurement_models.md | 144 +++++++++++++++++++++------------ 1 file changed, 91 insertions(+), 53 deletions(-) diff --git a/lectures/measurement_models.md b/lectures/measurement_models.md index 6e5170226..4dd700750 100644 --- a/lectures/measurement_models.md +++ b/lectures/measurement_models.md @@ -398,7 +398,10 @@ so that the true economy follows the state-space system ```{math} :label: true_ss -x_{t+1} = A x_t + \varepsilon_t, \qquad z_t = C x_t, +\begin{aligned} +x_{t+1} &= A x_t + \varepsilon_t, \\ +z_t &= C x_t. +\end{aligned} ``` where $\varepsilon_t = \begin{bmatrix} 0 \\ \theta_t \end{bmatrix}$ has @@ -615,8 +618,10 @@ The steady-state Kalman filter yields the **innovations representation** ```{math} :label: model1_innov -\hat x_{t+1} = A \hat x_t + K_1 u_t, \qquad -\bar z_{t+1} - D\bar z_t = \bar C \hat x_t + u_t, +\begin{aligned} +\hat x_{t+1} &= A \hat x_t + K_1 u_t, \\ +\bar z_{t+1} - D\bar z_t &= \bar C \hat x_t + u_t. +\end{aligned} ``` where $u_t = (\bar z_{t+1} - D\bar z_t) - @@ -631,9 +636,10 @@ $\{\bar z_t\}$, it is useful to represent {eq}`model1_innov` as ```{math} :label: model1_recursion -\hat x_{t+1} = (A - K_1 \bar C)\,\hat x_t + K_1 \bar z_t, -\qquad -u_t = -\bar C\,\hat x_t + \bar z_t, +\begin{aligned} +\hat x_{t+1} &= (A - K_1 \bar C)\,\hat x_t + K_1 \bar z_t, \\ +u_t &= -\bar C\,\hat x_t + \bar z_t. +\end{aligned} ``` where $\bar z_t := \bar z_{t+1} - D\bar z_t$ is the quasi-differenced @@ -805,10 +811,8 @@ This matches the paper's message that, in a one-common-index model, the relatively best measured series has the strongest predictive content. The covariance matrix of the innovations is not diagonal, but the -eigenvalues are well separated. +eigenvalues are well separated -The first eigenvalue is much larger than the others, consistent with -the presence of a dominant common shock $\theta_t$. ```{code-cell} ipython3 print('Covariance matrix of innovations:') @@ -816,6 +820,9 @@ df_v1 = pd.DataFrame(np.round(V1, 4), index=labels, columns=labels) display(Latex(df_to_latex_matrix(df_v1))) ``` +The first eigenvalue is much larger than the others, consistent with +the presence of a dominant common shock $\theta_t$ + ```{code-cell} ipython3 print('Eigenvalues of covariance matrix:') print(np.sort(np.linalg.eigvalsh(V1))[::-1].round(4)) @@ -854,8 +861,8 @@ for i, title in enumerate(wold_titles): display(Latex('$' + r' \quad '.join(parts) + '$')) ``` -At impact, the first orthogonalized innovation (ordered as output) -loads on all three measured variables, matching the paper's Table 4. +At impact, the first orthogonalized innovation +loads on all three measured variables. At subsequent lags the income innovation generates persistent responses in all three variables because, being the best-measured @@ -934,9 +941,10 @@ yields a second innovations representation ```{math} :label: model2_innov -\check{x}_{t+1} = A \check{x}_t + K_2 a_t, -\qquad -\tilde z_t = C \check{x}_t + a_t, +\begin{aligned} +\check{x}_{t+1} &= A \check{x}_t + K_2 a_t, \\ +\tilde z_t &= C \check{x}_t + a_t. +\end{aligned} ``` where $a_t$ is the innovation process for the filtered data with @@ -947,9 +955,10 @@ $\tilde z_t$, use ```{math} :label: model2_recursion -\check{x}_{t+1} = (A - K_2 C)\,\check{x}_t + K_2 \tilde z_t, -\qquad -a_t = -C\,\check{x}_t + \tilde z_t. +\begin{aligned} +\check{x}_{t+1} &= (A - K_2 C)\,\check{x}_t + K_2 \tilde z_t, \\ +a_t &= -C\,\check{x}_t + \tilde z_t. +\end{aligned} ``` The Gaussian log-likelihood for a sample of $T$ observations @@ -982,7 +991,7 @@ $j \geq 1$. Note that this is simpler than the Model 1 Wold representation {eq}`model1_wold` because there is no quasi-differencing -to undo. +to undo ```{code-cell} ipython3 Q2 = K1 @ V1 @ K1.T @@ -1011,7 +1020,7 @@ Because the filtered data are nearly noiseless, the innovation covariance $V_2$ is close to singular with one dominant eigenvalue. This means the filtered economy is driven by essentially one shock, -just like the true economy. +just like the true economy ```{code-cell} ipython3 parts = [] @@ -1031,6 +1040,10 @@ The second and third innovations contribute negligibly. This confirms that filtering strips away the measurement noise that created the appearance of multiple independent sources of variation in Model 1. +We invite readers to compare this table to the one for the true impulse responses in the {ref}`true-impulse-responses` section above. + +The numbers are essentially the same. + The covariance matrix and eigenvalues of the Model 2 innovations are ```{code-cell} ipython3 @@ -1058,8 +1071,10 @@ decomposition of $V_2$ ordered as $y_n$, $c$, $\Delta k$. ```{code-cell} ipython3 parts = [] for i, title in enumerate(wold_titles): - arr = df_to_latex_array(wold_response_table(resp2, i, lags)).strip('$') - parts.append(r'\begin{array}{c} ' + title + r' \\ ' + arr + r' \end{array}') + arr = df_to_latex_array( + wold_response_table(resp2, i, lags)).strip('$') + parts.append( + r'\begin{array}{c} ' + title + r' \\ ' + arr + r' \end{array}') display(Latex('$' + r' \quad '.join(parts) + '$')) ``` @@ -1147,10 +1162,12 @@ sim = simulate_series(seed=7909, T=80, k0=10.0) ``` ```{code-cell} ipython3 -def plot_true_vs_other(t, true_series, other_series, other_label, ylabel=""): +def plot_true_vs_other(t, true_series, other_series, + other_label, ylabel=""): fig, ax = plt.subplots(figsize=(8, 4)) ax.plot(t, true_series, lw=2, color="black", label="true") - ax.plot(t, other_series, lw=2, ls="--", color="#1f77b4", label=other_label) + ax.plot(t, other_series, lw=2, ls="--", + color="#1f77b4", label=other_label) ax.set_xlabel("time", fontsize=12) ax.set_ylabel(ylabel, fontsize=12) ax.legend(loc="best", fontsize=11, frameon=True, shadow=True) @@ -1170,7 +1187,8 @@ mystnb: image: alt: True and measured consumption plotted over 80 time periods --- -plot_true_vs_other(t, sim["c_true"], sim["c_meas"], "measured", ylabel="consumption") +plot_true_vs_other(t, sim["c_true"], sim["c_meas"], + "measured", ylabel="consumption") ``` ```{code-cell} ipython3 @@ -1182,7 +1200,8 @@ mystnb: image: alt: True and measured investment plotted over 80 time periods --- -plot_true_vs_other(t, sim["dk_true"], sim["dk_meas"], "measured", ylabel="investment") +plot_true_vs_other(t, sim["dk_true"], sim["dk_meas"], + "measured", ylabel="investment") ``` ```{code-cell} ipython3 @@ -1194,7 +1213,8 @@ mystnb: image: alt: True and measured income plotted over 80 time periods --- -plot_true_vs_other(t, sim["y_true"], sim["y_meas"], "measured", ylabel="income") +plot_true_vs_other(t, sim["y_true"], sim["y_meas"], + "measured", ylabel="income") ``` Investment is distorted the most because its measurement error @@ -1213,7 +1233,8 @@ mystnb: image: alt: True and filtered consumption plotted over 80 time periods --- -plot_true_vs_other(t, sim["c_true"], sim["c_filt"], "filtered", ylabel="consumption") +plot_true_vs_other(t, sim["c_true"], sim["c_filt"], + "filtered", ylabel="consumption") ``` ```{code-cell} ipython3 @@ -1225,7 +1246,8 @@ mystnb: image: alt: True and filtered investment plotted over 80 time periods --- -plot_true_vs_other(t, sim["dk_true"], sim["dk_filt"], "filtered", ylabel="investment") +plot_true_vs_other(t, sim["dk_true"], sim["dk_filt"], + "filtered", ylabel="investment") ``` ```{code-cell} ipython3 @@ -1237,7 +1259,8 @@ mystnb: image: alt: True and filtered income plotted over 80 time periods --- -plot_true_vs_other(t, sim["y_true"], sim["y_filt"], "filtered", ylabel="income") +plot_true_vs_other(t, sim["y_true"], sim["y_filt"], + "filtered", ylabel="income") ``` ```{code-cell} ipython3 @@ -1249,7 +1272,8 @@ mystnb: image: alt: True and filtered capital stock plotted over 80 time periods --- -plot_true_vs_other(t, sim["k_true"], sim["k_filt"], "filtered", ylabel="capital stock") +plot_true_vs_other(t, sim["k_true"], sim["k_filt"], + "filtered", ylabel="capital stock") ``` In the true model the national income identity @@ -1271,13 +1295,13 @@ mystnb: --- fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 4)) -ax1.plot(t, sim["c_meas"] + sim["dk_meas"] - sim["y_meas"], color="#d62728", lw=2) +ax1.plot(t, sim["c_meas"] + sim["dk_meas"] - sim["y_meas"], lw=2) ax1.axhline(0, color='black', lw=0.8, ls='--', alpha=0.5) ax1.set_xlabel("time", fontsize=12) ax1.set_ylabel("residual", fontsize=12) ax1.set_title(r'Measured: $c_t + \Delta k_t - y_{n,t}$', fontsize=13) -ax2.plot(t, sim["c_filt"] + sim["dk_filt"] - sim["y_filt"], color="#2ca02c", lw=2) +ax2.plot(t, sim["c_filt"] + sim["dk_filt"] - sim["y_filt"], lw=2) ax2.axhline(0, color='black', lw=0.8, ls='--', alpha=0.5) ax2.set_xlabel("time", fontsize=12) ax2.set_ylabel("residual", fontsize=12) @@ -1287,13 +1311,23 @@ plt.tight_layout() plt.show() ``` -We can also compare the true, measured, and filtered versions of -each variable through their covariance and correlation matrices. +For each variable $w \in \{c, \Delta k, y_n\}$ we compute the +covariance and correlation matrices among its true, measured, and +filtered versions. + +Each matrix has the structure + +```{math} +\begin{bmatrix} +\text{var}(w^{\text{true}}) & \text{cov}(w^{\text{true}}, w^{\text{meas}}) & \text{cov}(w^{\text{true}}, w^{\text{filt}}) \\ +\cdot & \text{var}(w^{\text{meas}}) & \text{cov}(w^{\text{meas}}, w^{\text{filt}}) \\ +\cdot & \cdot & \text{var}(w^{\text{filt}}) +\end{bmatrix}. +``` -High correlations between true and filtered series confirm that the -Kalman filter removes most measurement noise, while lower correlations -between true and measured series quantify how much information raw -data lose. +The key entries are the off-diagonal terms linking true to measured +(distortion from noise) and true to filtered (recovery by the Kalman +filter). ```{code-cell} ipython3 def cov_corr_three(a, b, c): @@ -1303,9 +1337,12 @@ def cov_corr_three(a, b, c): def matrix_df(mat, labels): return pd.DataFrame(np.round(mat, 4), index=labels, columns=labels) -cov_c, corr_c = cov_corr_three(sim["c_true"], sim["c_meas"], sim["c_filt"]) -cov_i, corr_i = cov_corr_three(sim["dk_true"], sim["dk_meas"], sim["dk_filt"]) -cov_y, corr_y = cov_corr_three(sim["y_true"], sim["y_meas"], sim["y_filt"]) +cov_c, corr_c = cov_corr_three( + sim["c_true"], sim["c_meas"], sim["c_filt"]) +cov_i, corr_i = cov_corr_three( + sim["dk_true"], sim["dk_meas"], sim["dk_filt"]) +cov_y, corr_y = cov_corr_three( + sim["y_true"], sim["y_meas"], sim["y_filt"]) cov_k = np.cov(np.vstack([sim["k_true"], sim["k_filt"]])) corr_k = np.corrcoef(np.vstack([sim["k_true"], sim["k_filt"]])) @@ -1315,41 +1352,41 @@ tf_labels = ['true', 'filtered'] For consumption, measurement error inflates the variance of measured consumption relative to the truth, as the diagonal of the covariance -matrix shows. +matrix shows ```{code-cell} ipython3 display(Latex(df_to_latex_matrix(matrix_df(cov_c, tmf_labels)))) ``` The correlation matrix confirms that the filtered series recovers the -true series almost perfectly (true-filtered correlation exceeds 0.99). +true series almost perfectly ```{code-cell} ipython3 display(Latex(df_to_latex_matrix(matrix_df(corr_c, tmf_labels)))) ``` -For investment, measurement error creates the most variance inflation here. +For investment, measurement error creates the most variance inflation here ```{code-cell} ipython3 display(Latex(df_to_latex_matrix(matrix_df(cov_i, tmf_labels)))) ``` Despite this, the true-filtered correlation remains high, -demonstrating the filter's effectiveness even with severe noise. +demonstrating the filter's effectiveness even with severe noise ```{code-cell} ipython3 display(Latex(df_to_latex_matrix(matrix_df(corr_i, tmf_labels)))) ``` Income has the smallest measurement error ($\sigma_\eta = 0.05$), -so measured and true covariances are nearly identical. +so measured and true covariances are nearly identical ```{code-cell} ipython3 display(Latex(df_to_latex_matrix(matrix_df(cov_y, tmf_labels)))) ``` The correlation matrix shows that both measured and filtered series -track the truth very closely. +track the truth very closely ```{code-cell} ipython3 display(Latex(df_to_latex_matrix(matrix_df(corr_y, tmf_labels)))) @@ -1357,13 +1394,13 @@ display(Latex(df_to_latex_matrix(matrix_df(corr_y, tmf_labels)))) The capital stock is never directly observed, yet the covariance matrix shows that the filter recovers it with very -high accuracy. +high accuracy ```{code-cell} ipython3 display(Latex(df_to_latex_matrix(matrix_df(cov_k, tf_labels)))) ``` -The near-unity correlation confirms this. +The near-unity correlation confirms this ```{code-cell} ipython3 display(Latex(df_to_latex_matrix(matrix_df(corr_k, tf_labels)))) @@ -1392,7 +1429,8 @@ to the single structural shock $\theta_t$ and *cannot* reproduce the Granger causality pattern. The {doc}`Kalman filter ` effectively strips measurement -noise from the data: the filtered series track the truth closely, -and the near-zero residual shows that the filter approximately -restores the national income accounting identity that raw -measurement error breaks. +noise from the data, so the filtered series track the truth closely. + +Raw measurement error breaks the national income accounting identity, +but the near-zero residual shows that the filter approximately +restores it. From 6d41d5140fa0c6035b6f3c7007c94ab921ef22ea Mon Sep 17 00:00:00 2001 From: Humphrey Yang Date: Tue, 10 Feb 2026 11:23:01 +1100 Subject: [PATCH 14/19] updates --- lectures/measurement_models.md | 812 +++++++++++++++++---------------- 1 file changed, 422 insertions(+), 390 deletions(-) diff --git a/lectures/measurement_models.md b/lectures/measurement_models.md index 4dd700750..ebddc3fe2 100644 --- a/lectures/measurement_models.md +++ b/lectures/measurement_models.md @@ -28,32 +28,49 @@ kernelspec: ## Overview -{cite:t}`Sargent1989` studies what happens to an econometrician's -inferences about economic dynamics when observed data are contaminated -by measurement error. +"Rational expectations econometrics" aims to interpret economic time +series in terms of objects that are meaningful to economists, namely, +parameters describing preferences, technologies, information sets, +endowments, and equilibrium concepts. -The setting is a {doc}`permanent income ` economy in which the +When fully worked out, rational expectations models typically deliver +a well-defined mapping from these economically interpretable parameters +to the moments of the time series determined by the model. + +If accurate observations on these time series are available, one can +use that mapping to implement parameter estimation methods based +either on the likelihood function or on the method of moments. + +However, if only error-ridden data exist for the variables of interest, +then more steps are needed to extract parameter estimates. + +In effect, we require a model of the data reporting agency, one that +is workable enough that we can determine the mapping induced jointly +by the dynamic economic model and the measurement process to the +probability law for the measured data. + +The model chosen for the data collection agency is an aspect of an +econometric specification that can make big differences in inferences +about the economic structure. + +{cite:t}`Sargent1989` describes two alternative models of data generation +in a {doc}`permanent income ` economy in which the investment accelerator, the mechanism studied in {doc}`samuelson` and {doc}`chow_business_cycles`, drives business cycle fluctuations. -We specify a {doc}`linear state space model ` for the -true economy and then consider two ways of extracting information from -noisy measurements: - -- In Model 1, the data collecting agency simply reports - raw (noisy) observations. -- In Model 2, the agency applies an optimal - {doc}`Kalman filter ` to the noisy data and - reports least-squares estimates of the true variables. +- In Model 1, the data collecting agency simply reports the + error-ridden data that it collects. +- In Model 2, although it collects error-ridden data that satisfy + a classical errors-in-variables model, the data collecting agency + filters the data and reports the best estimates that it possibly can. -The two models produce different Wold representations and -forecast-error-variance decompositions, even though they describe -the same underlying economy. +Although the two models have the same "deep parameters," they produce +quite different sets of restrictions on the data. In this lecture we follow {cite:t}`Sargent1989` and study how alternative measurement schemes change empirical implications. -We start with imports and helper functions used throughout. +We start with imports and helper functions used throughout ```{code-cell} ipython3 import numpy as np @@ -108,89 +125,6 @@ def df_to_latex_array(df): return '$' + '\n'.join(lines) + '$' ``` -## Classical formulation - -Before moving to state-space methods, {cite:t}`Sargent1989` formulates -both measurement models in classical Wold form. - -This setup separates: - -- The law of motion for true economic variables. -- The law of motion for measurement errors. -- The map from these two objects to observables used by an econometrician. - -Let the true data be - -```{math} -:label: classical_true_wold -Z_t = c_Z(L)\,\varepsilon_t^Z, \qquad -E\varepsilon_t^Z {\varepsilon_t^Z}' = I. -``` - -In Model 1 (raw reports), the agency observes and reports - -```{math} -:label: classical_model1_meas -z_t = Z_t + v_t, \qquad -v_t = c_v(L)\,\varepsilon_t^v, \qquad -E(Z_t v_s') = 0\ \forall t,s. -``` - -Then measured data have Wold representation - -```{math} -:label: classical_model1_wold -z_t = c_z(L)\,\varepsilon_t, -``` - -with spectral factorization - -```{math} -:label: classical_model1_factor -c_z(s)c_z(s^{-1})' = c_Z(s)c_Z(s^{-1})' + c_v(s)c_v(s^{-1})'. -``` - -In Model 2 (filtered reports), the agency reports - -```{math} -:label: classical_model2_report -\tilde z_t = E[Z_t \mid z_t, z_{t-1}, \ldots] = h(L) z_t, -``` - -where - -```{math} -:label: classical_model2_filter -h(L) -= \Big[ - c_Z(L)c_Z(L^{-1})' - \big(c_z(L^{-1})'\big)^{-1} - \Big]_+ c_z(L)^{-1}, -``` - -and $[\cdot]_+$ keeps only nonnegative powers of $L$. - -Filtered reports satisfy - -```{math} -:label: classical_model2_wold -\tilde z_t = c_{\tilde z}(L)\,a_t, -``` - -with - -```{math} -:label: classical_model2_factor -c_{\tilde z}(s)c_{\tilde z}(s^{-1})' -= h(s)c_z(s)c_z(s^{-1})'h(s^{-1})'. -``` - -These two data-generation schemes imply different Gaussian likelihood -functions. - -In the rest of the lecture, we switch to a recursive state-space -representation because it makes these objects easy to compute. - ## The economic model The true economy is a linear-quadratic version of a stochastic @@ -273,9 +207,6 @@ Adding a second shock breaks the one-index structure entirely and can generate nontrivial Granger causality even without measurement error. -The accelerator projection is also not invariant under -interventions that alter predictable components of income. - Assumption 2 is less important, affecting only various constants. Under both assumptions, {eq}`opt_decision` simplifies to @@ -314,8 +245,8 @@ of income. This is the same mechanism that {cite:t}`Chow1968` documented empirically (see {doc}`chow_business_cycles`). -Equation {eq}`income_process` says that $y_{nt}$ is an IMA(1,1) -process with innovation $\theta_t$. +Equation {eq}`income_process` states that the first difference of disposable income is a +first-order moving average process with innovation equal to the innovation of the endowment shock $\theta_t$. As {cite:t}`Muth1960` showed, such a process is optimally forecast via a geometric distributed lag or "adaptive expectations" scheme. @@ -363,25 +294,33 @@ y_{nt} = \theta_t + (1-\beta)(\theta_{t-1} + \theta_{t-2} + \cdots). In this case income Granger-causes consumption and investment but is not Granger-caused by them. -In the numerical example below, $y_{nt}$ is also measured -with error: the agency reports $\bar y_{nt} = y_{nt} + v_{yt}$, -where $v_{yt}$ follows an AR(1) process orthogonal to $\theta_t$. +When each measured series is corrupted by measurement error, every +measured variable will in general Granger-cause every other. -When every series is corrupted by measurement error, every measured -variable Granger-causes every other. +The strength of this Granger causality, as measured by decompositions +of $j$-step-ahead prediction error variances, depends on the relative +variances of the measurement errors. -The strength of Granger causality depends on the relative -signal-to-noise ratios. +In this case, each observed series mixes the common signal $\theta_t$ +with idiosyncratic measurement noise. -In a one-common-index model like this one ($\theta_t$ is the -common index), the best-measured variable extends the most -Granger causality to the others. +A series with lower measurement +error variance tracks $\theta_t$ more closely, so its innovations +contain more information about future values of the other series. -This mechanism drives the numerical results below. +Accordingly, in a forecast-error-variance decomposition, shocks to +better-measured series account for a larger share of other variables' +$j$-step-ahead prediction errors. -## State-space formulation +In a one-common-index model like this one ($\theta_t$ is the common +index), better-measured variables extend more Granger causality to +less well measured series than vice versa. -We now map the economic model and the measurement process into +This asymmetry drives the numerical results we observe soon. + +### State-space formulation + +Let's map the economic model and the measurement process into a recursive state-space framework. Set $f = 1.05$ and $\theta_t \sim \mathcal{N}(0, 1)$. @@ -429,7 +368,61 @@ $Q$ is singular because there is only one source of randomness $\theta_t$; the capital stock $k_t$ evolves deterministically given $\theta_t$. -### Measurement errors +```{code-cell} ipython3 +# Baseline structural matrices for the true economy +f = 1.05 +β = 1 / f + +A = np.array([ + [1.0, 1.0 / f], + [0.0, 0.0] +]) + +C = np.array([ + [f - 1.0, 1.0], + [f - 1.0, 1.0 - 1.0 / f], + [0.0, 1.0 / f] +]) + +Q = np.array([ + [0.0, 0.0], + [0.0, 1.0] +]) +``` + +(true-impulse-responses)= +### True impulse responses + +Before introducing measurement error, we compute the impulse response of +the true system to a unit shock $\theta_0 = 1$. + +This benchmark clarifies what changes when we later switch from +true variables to reported variables. + +The response shows the investment accelerator clearly: the full impact on +net income $y_n$ occurs at lag 0, while consumption adjusts by only +$1 - f^{-1} \approx 0.048$ and investment absorbs the remainder. + +From lag 1 onward the economy is in its new steady state + +```{code-cell} ipython3 +def table2_irf(A, C, n_lags=6): + x = np.array([0.0, 1.0]) # k_0 = 0, theta_0 = 1 + rows = [] + for j in range(n_lags): + y_n, c, d_k = C @ x + rows.append([y_n, c, d_k]) + x = A @ x + return pd.DataFrame(rows, columns=[r'y_n', r'c', r'\Delta k'], + index=pd.Index(range(n_lags), name='lag')) + +table2 = table2_irf(A, C, n_lags=6) +display(Latex(df_to_latex_array(table2))) +``` + +## Measurement errors + +Let's add the measurement layer that generates reported data. The econometrician does not observe $z_t$ directly but instead sees $\bar z_t = z_t + v_t$, where $v_t$ is a vector of measurement @@ -464,31 +457,12 @@ The innovation variances are smallest for consumption ($\sigma_\eta = 0.035$), next for income ($\sigma_\eta = 0.05$), and largest for investment ($\sigma_\eta = 0.65$). -As in {cite:t}`Sargent1989`, what matters for Granger-causality +As in {cite:t}`Sargent1989` and our discussion above, what matters for Granger-causality asymmetries is the overall measurement quality in the full system: output is relatively well measured while investment is relatively poorly measured. ```{code-cell} ipython3 -f = 1.05 -β = 1 / f - -A = np.array([ - [1.0, 1.0 / f], - [0.0, 0.0] -]) - -C = np.array([ - [f - 1.0, 1.0], - [f - 1.0, 1.0 - 1.0 / f], - [0.0, 1.0 / f] -]) - -Q = np.array([ - [0.0, 0.0], - [0.0, 1.0] -]) - ρ = np.array([0.6, 0.7, 0.3]) D = np.diag(ρ) @@ -506,9 +480,7 @@ display(Latex(df_to_latex_matrix(pd.DataFrame(C), 'C'))) display(Latex(df_to_latex_matrix(pd.DataFrame(D), 'D'))) ``` -## Kalman filter - -Both models require a steady-state {doc}`Kalman filter `. +We will analyze the two reporting schemes separately, but first we need a solver for the steady-state Kalman gain and error covariances. The function below iterates on the Riccati equation until convergence, returning the Kalman gain $K$, the state covariance $S$, and the @@ -543,38 +515,47 @@ def steady_state_kalman(A, C_obs, Q, R, W=None, tol=1e-13, max_iter=200_000): return K, S, V ``` -(true-impulse-responses)= -## True impulse responses +With structural matrices and tools we need in place, we now follow +{cite:t}`Sargent1989`'s two reporting schemes in sequence. -Before introducing measurement error, we compute the impulse response of -the true system to a unit shock $\theta_0 = 1$. +## A Classical Model of Measurements Initially Collected by an Agency -The response shows the investment accelerator clearly: the full impact on -net income $y_n$ occurs at lag 0, while consumption adjusts by only -$1 - f^{-1} \approx 0.048$ and investment absorbs the remainder. +A data collecting agency observes a noise-corrupted version of $z_t$, namely -From lag 1 onward the economy is in its new steady state +```{math} +:label: model1_obs +\bar z_t = C x_t + v_t. +``` -```{code-cell} ipython3 -def table2_irf(A, C, n_lags=6): - x = np.array([0.0, 1.0]) # k_0 = 0, theta_0 = 1 - rows = [] - for j in range(n_lags): - y_n, c, d_k = C @ x - rows.append([y_n, c, d_k]) - x = A @ x - return pd.DataFrame(rows, columns=[r'y_n', r'c', r'\Delta k'], - index=pd.Index(range(n_lags), name='lag')) +We refer to this as *Model 1*: the agency collects noisy +data and reports them without filtering. -table2 = table2_irf(A, C, n_lags=6) -display(Latex(df_to_latex_array(table2))) -``` +To represent the second moments of the $\bar z_t$ process, it is +convenient to obtain its population vector autoregression. + +The error vector in the vector autoregression is the +innovation to $\bar z_t$ and can be taken to be the white noise in a Wold +moving average representation, which can be obtained by "inverting" +the autoregressive representation. + +The population vector autoregression, and how it depends on the +parameters of the state-space system and the measurement error process, +carries insights about how to interpret estimated vector +autoregressions for $\bar z_t$. -## Model 1 (raw measurements) +Constructing the vector autoregression is also useful as an +intermediate step in computing the likelihood of a sample of +$\bar z_t$'s as a function of the free parameters +$\{A, C, D, Q, R\}$. -Model 1 is a classical errors-in-variables model: the data collecting -agency simply reports the error-corrupted data $\bar z_t = z_t + v_t$ -that it collects, making no attempt to adjust for measurement errors. +The particular method that will be used to construct the vector +autoregressive representation also proves useful as an intermediate +step in constructing a model of an optimal reporting agency. + +We use recursive (Kalman filtering) methods to obtain the +vector autoregression for $\bar z_t$. + +### Quasi-differencing Because the measurement errors $v_t$ are serially correlated, the standard Kalman filter with white-noise measurement error @@ -584,77 +565,104 @@ An alternative is to augment the state vector with the measurement-error AR components (see Appendix B of {cite:t}`Sargent1989`). -Here we take the quasi-differencing route, which reduces the +Here we take the quasi-differencing route described in +{cite:t}`Sargent1989`, which reduces the system to one with serially uncorrelated observation noise. -Substituting $\bar z_t = C x_t + v_t$, $x_{t+1} = A x_t + \varepsilon_t$, -and $v_{t+1} = D v_t + \eta_t$ into $\bar z_{t+1} - D \bar z_t$ gives +Define ```{math} -:label: model1_obs -\bar z_{t+1} - D \bar z_t = \bar C\, x_t + C \varepsilon_t + \eta_t, +:label: model1_qd +\tilde z_t = \bar z_{t+1} - D \bar z_t, \qquad +\bar\nu_t = C \varepsilon_t + \eta_t, \qquad +\bar C = CA - DC. ``` -where $\bar C = CA - DC$. +Then the state-space system {eq}`true_ss`, the measurement error +process {eq}`meas_error_ar1`, and the observation equation {eq}`model1_obs` +imply the state-space system -The composite observation noise in {eq}`model1_obs` is -$\bar\nu_t = C\varepsilon_t + \eta_t$, which is serially uncorrelated. +```{math} +:label: model1_transformed +\begin{aligned} +x_{t+1} &= A x_t + \varepsilon_t, \\ +\tilde z_t &= \bar C\, x_t + \bar\nu_t, +\end{aligned} +``` -Its covariance, and the cross-covariance between the state noise -$\varepsilon_t$ and $\bar\nu_t$, are +where $(\varepsilon_t, \bar\nu_t)$ is a white noise process with ```{math} :label: model1_covs -R_1 = C Q C^\top + R, \qquad W_1 = Q C^\top. +E \begin{bmatrix} \varepsilon_t \end{bmatrix} +\begin{bmatrix} \varepsilon_t' & \bar\nu_t' \end{bmatrix} += \begin{bmatrix} Q & W_1 \\ W_1' & R_1 \end{bmatrix}, +\qquad +R_1 = C Q C^\top + R, \quad W_1 = Q C^\top. ``` -The system $\{x_{t+1} = A x_t + \varepsilon_t,\; -\bar z_{t+1} - D\bar z_t = \bar C x_t + \bar\nu_t\}$ -with $\text{cov}(\varepsilon_t)=Q$, $\text{cov}(\bar\nu_t)=R_1$, and -$\text{cov}(\varepsilon_t, \bar\nu_t)=W_1$ now has serially uncorrelated -errors, so the standard {doc}`Kalman filter ` applies. +System {eq}`model1_transformed` with covariances {eq}`model1_covs` is +characterized by the five matrices +$[A, \bar C, Q, R_1, W_1]$. + +### Innovations representation -The steady-state Kalman filter yields the **innovations representation** +Associated with {eq}`model1_transformed` and {eq}`model1_covs` is the +**innovations representation** for $\tilde z_t$, ```{math} :label: model1_innov \begin{aligned} \hat x_{t+1} &= A \hat x_t + K_1 u_t, \\ -\bar z_{t+1} - D\bar z_t &= \bar C \hat x_t + u_t. +\tilde z_t &= \bar C \hat x_t + u_t, \end{aligned} ``` -where $u_t = (\bar z_{t+1} - D\bar z_t) - -E[\bar z_{t+1} - D\bar z_t \mid \bar z_t, \bar z_{t-1}, \ldots]$ -is the innovation process, $K_1$ is the Kalman gain, and -$V_1 = \bar C S_1 \bar C^\top + R_1$ is the innovation covariance matrix -(with $S_1 = E[(x_t - \hat x_t)(x_t - \hat x_t)^\top]$ the steady-state -state estimation error covariance). - -To compute the innovations $\{u_t\}$ recursively from the data -$\{\bar z_t\}$, it is useful to represent {eq}`model1_innov` as +where ```{math} -:label: model1_recursion +:label: model1_innov_defs \begin{aligned} -\hat x_{t+1} &= (A - K_1 \bar C)\,\hat x_t + K_1 \bar z_t, \\ -u_t &= -\bar C\,\hat x_t + \bar z_t. +\hat x_t &= E[x_t \mid \tilde z_{t-1}, \tilde z_{t-2}, \ldots, \hat x_0] + = E[x_t \mid \bar z_t, \bar z_{t-1}, \ldots], \\ +u_t &= \tilde z_t - E[\tilde z_t \mid \tilde z_{t-1}, \tilde z_{t-2}, \ldots] + = \bar z_{t+1} - E[\bar z_{t+1} \mid \bar z_t, \bar z_{t-1}, \ldots], \end{aligned} ``` -where $\bar z_t := \bar z_{t+1} - D\bar z_t$ is the quasi-differenced -observation. +$[K_1, S_1]$ are computed from the steady-state Kalman filter applied to +$[A, \bar C, Q, R_1, W_1]$, and + +```{math} +:label: model1_S1 +S_1 = E[(x_t - \hat x_t)(x_t - \hat x_t)^\top]. +``` + +From {eq}`model1_innov_defs`, $u_t$ is the innovation process for the +$\bar z_t$ process. -Given an initial $\hat x_0$, equation {eq}`model1_recursion` generates -the innovation sequence, from which the Gaussian log-likelihood -of a sample $\{\bar z_t,\, t=0,\ldots,T\}$ is +### Wold representation + +System {eq}`model1_innov` and definition {eq}`model1_qd` can be used to +obtain a Wold vector moving average representation for the $\bar z_t$ process: ```{math} -:label: model1_loglik -\mathcal{L}^* = -T\ln 2\pi - \tfrac{1}{2}T\ln|V_1| - - \tfrac{1}{2}\sum_{t=0}^{T-1} u_t' V_1^{-1} u_t. +:label: model1_wold +\bar z_{t+1} = (I - DL)^{-1}\bigl[\bar C(I - AL)^{-1}K_1 L + I\bigr] u_t, ``` +where $L$ is the lag operator. + +From {eq}`model1_transformed` and {eq}`model1_innov` the innovation +covariance is + +```{math} +:label: model1_V1 +V_1 = E\, u_t u_t^\top = \bar C\, S_1\, \bar C^\top + R_1. +``` + +Below we compute $K_1$, $S_1$, and $V_1$ numerically + ```{code-cell} ipython3 C_bar = C @ A - D @ C R1 = C @ Q @ C.T + R @@ -663,23 +671,11 @@ W1 = Q @ C.T K1, S1, V1 = steady_state_kalman(A, C_bar, Q, R1, W1) ``` -### Wold representation for measured data -With the innovations representation {eq}`model1_innov` in hand, we can -derive a Wold moving-average representation for the measured data -$\bar z_t$. +### Computing the Wold coefficients -From {eq}`model1_innov` and the quasi-differencing definition, the -measured data satisfy - -```{math} -:label: model1_wold -\bar z_{t+1} = (I - DL)^{-1}\bigl[\bar C(I - AL)^{-1}K_1 L + I\bigr] u_t, -``` - -where $L$ is the lag operator. - -To compute the Wold coefficients numerically, define the augmented state +To compute the Wold coefficients in {eq}`model1_wold` numerically, +define the augmented state ```{math} r_t = \begin{bmatrix} \hat x_{t-1} \\ \bar z_{t-1} \end{bmatrix}, @@ -752,6 +748,42 @@ resp1 = np.array([psi1[j] @ linalg.cholesky(V1, lower=True) for j in range(14)]) decomp1 = fev_contributions(psi1, V1, n_horizons=20) ``` +### Gaussian likelihood + +The Gaussian log-likelihood function for a sample +$\{\bar z_t,\, t=0,\ldots,T\}$, conditioned on an initial state estimate +$\hat x_0$, can be represented as + +```{math} +:label: model1_loglik +\mathcal{L}^* = -T\ln 2\pi - \tfrac{1}{2}T\ln|V_1| + - \tfrac{1}{2}\sum_{t=0}^{T-1} u_t' V_1^{-1} u_t, +``` + +where $u_t$ is a function of $\{\bar z_t\}$ defined by +{eq}`model1_recursion` below. + +To use {eq}`model1_innov` to compute $\{u_t\}$, it is useful to +represent it as + +```{math} +:label: model1_recursion +\begin{aligned} +\hat x_{t+1} &= (A - K_1 \bar C)\,\hat x_t + K_1 \tilde z_t, \\ +u_t &= -\bar C\,\hat x_t + \tilde z_t, +\end{aligned} +``` + +where $\tilde z_t = \bar z_{t+1} - D\bar z_t$ is the quasi-differenced +observation. + +Given $\hat x_0$, equation {eq}`model1_recursion` can be used recursively +to compute a $\{u_t\}$ process. + +Equations {eq}`model1_loglik` and {eq}`model1_recursion` give the +likelihood function of a sample of error-corrupted data +$\{\bar z_t\}$. + ### Forecast-error-variance decomposition To measure the relative importance of each innovation, we decompose @@ -810,9 +842,7 @@ Granger-cause consumption and investment, but not vice versa. This matches the paper's message that, in a one-common-index model, the relatively best measured series has the strongest predictive content. -The covariance matrix of the innovations is not diagonal, but the -eigenvalues are well separated - +Let's look at the the covariance matrix of the innovations ```{code-cell} ipython3 print('Covariance matrix of innovations:') @@ -820,23 +850,29 @@ df_v1 = pd.DataFrame(np.round(V1, 4), index=labels, columns=labels) display(Latex(df_to_latex_matrix(df_v1))) ``` -The first eigenvalue is much larger than the others, consistent with -the presence of a dominant common shock $\theta_t$ +The covariance matrix of the innovations is not diagonal, but the +eigenvalues are well separated as shown below + ```{code-cell} ipython3 print('Eigenvalues of covariance matrix:') print(np.sort(np.linalg.eigvalsh(V1))[::-1].round(4)) ``` +The first eigenvalue is much larger than the others, consistent with +the presence of a dominant common shock $\theta_t$ + ### Wold impulse responses The Wold impulse responses are reported using orthogonalized innovations (Cholesky factorization of $V_1$ with ordering $y_n$, $c$, $\Delta k$). -Under this identification, lag-0 responses reflect both +Under this method, lag-0 responses reflect both contemporaneous covariance and the Cholesky ordering. +We first define a helper function to format the Wold responses as a LaTeX array + ```{code-cell} ipython3 lags = np.arange(14) @@ -848,6 +884,8 @@ def wold_response_table(resp, shock_idx, lags): ) ``` +Now we report the Wold responses to each orthogonalized innovation in a single table with three panels + ```{code-cell} ipython3 wold_titles = [r'\text{A. Response to } y_n \text{ innovation}', r'\text{B. Response to } c \text{ innovation}', @@ -874,94 +912,158 @@ decay according to the AR(1) structure of their respective measurement errors ($\rho_c = 0.7$, $\rho_{\Delta k} = 0.3$), with little spillover to other variables. -## Model 2 (filtered measurements) +## A Model of Optimal Estimates Reported by an Agency -Model 2 corresponds to a data collecting agency that, instead of -reporting raw error-corrupted data, applies an optimal filter -to construct least-squares estimates of the true variables. +Suppose that instead of reporting the error-corrupted data $\bar z_t$, +the data collecting agency reports linear least-squares projections of +the true data on a history of the error-corrupted data. -This is a natural model for agencies that seasonally adjust -data (one-sided filtering of current and past observations) or -publish preliminary, revised, and final estimates of the same -variable (successive conditional expectations as more data -accumulate). +This model provides a possible way of interpreting two features of +the data-reporting process. -Specifically, the agency uses the Kalman filter from Model 1 to form -$\hat x_t = E[x_t \mid \bar z_t, \bar z_{t-1}, \ldots]$ and reports -filtered estimates +- *seasonal adjustment*: if the components of $v_t$ have +strong seasonals, the optimal filter will assume a shape that can be +interpreted partly in terms of a seasonal adjustment filter, one that +is one-sided in current and past $\bar z_t$'s. -```{math} -\tilde z_t = G \hat x_t, -``` +- *data revisions*: if $z_t$ contains current and lagged +values of some variable of interest, then the model simultaneously +determines "preliminary," "revised," and "final" estimates as +successive conditional expectations based on progressively longer +histories of error-ridden observations. + +To make this operational, we impute to the reporting agency a model of +the joint process generating the true data and the measurement errors. + +We assume that the reporting agency has "rational expectations": it +knows the economic and measurement structure leading to +{eq}`model1_transformed`--{eq}`model1_covs`. -where $G = C$ is a selection matrix. +To prepare its estimates, the reporting agency itself computes the +Kalman filter to obtain the innovations representation {eq}`model1_innov`. -### State-space for filtered data +Rather than reporting the error-corrupted data $\bar z_t$, the agency +reports $\tilde z_t = G \hat x_t$, where $G$ is a "selection matrix," +possibly equal to $C$, for the data reported by the agency. -From the innovations representation {eq}`model1_innov`, the state -$\hat x_t$ evolves as +The data $G \hat x_t = E[G x_t \mid \bar z_t, \bar z_{t-1}, \ldots, \hat x_0]$. + +The state-space representation for the reported data is then ```{math} :label: model2_state -\hat x_{t+1} = A \hat x_t + K_1 u_t. +\begin{aligned} +\hat x_{t+1} &= A \hat x_t + K_1 u_t, \\ +\tilde z_t &= G \hat x_t, +\end{aligned} ``` -The reported filtered data are then +where the first line of {eq}`model2_state` is from the innovations +representation {eq}`model1_innov`. + +Note that $u_t$ is the innovation to $\bar z_{t+1}$ and is *not* the +innovation to $\tilde z_t$. + +To obtain a Wold representation for $\tilde z_t$ and the likelihood +function for a sample of $\tilde z_t$ requires that we obtain an +innovations representation for {eq}`model2_state`. + +### Innovations representation for filtered data + +To add a little generality to {eq}`model2_state` we amend it to the system ```{math} :label: model2_obs -\tilde z_t = C \hat x_t + \eta_t, +\begin{aligned} +\hat x_{t+1} &= A \hat x_t + K_1 u_t, \\ +\tilde z_t &= G \hat x_t + \eta_t, +\end{aligned} ``` where $\eta_t$ is a type 2 white-noise measurement error process ("typos") with presumably very small covariance matrix $R_2$. -The state noise in {eq}`model2_state` is $K_1 u_t$, which has covariance - -```{math} -:label: model2_Q -Q_2 = K_1 V_1 K_1^\top. -``` - The covariance matrix of the joint noise is ```{math} +:label: model2_Q E \begin{bmatrix} K_1 u_t \\ \eta_t \end{bmatrix} \begin{bmatrix} K_1 u_t \\ \eta_t \end{bmatrix}^\top -= \begin{bmatrix} Q_2 & 0 \\ 0 & R_2 \end{bmatrix}. += \begin{bmatrix} Q_2 & 0 \\ 0 & R_2 \end{bmatrix}, ``` -Since $R_2$ is close to or equal to zero (the filtered data have -negligible additional noise), we approximate it with a small -regularization term $R_2 = \epsilon I$ to keep the Kalman filter +where $Q_2 = K_1 V_1 K_1^\top$. + +If $R_2$ is singular, it is necessary to adjust the Kalman filtering +formulas by using transformations that induce a "reduced order observer." + +In practice, we approximate a zero $R_2$ matrix with the matrix +$\epsilon I$ for a small $\epsilon > 0$ to keep the Kalman filter numerically well-conditioned. -A second Kalman filter applied to {eq}`model2_state`--{eq}`model2_obs` -yields a second innovations representation +For system {eq}`model2_obs` and {eq}`model2_Q`, an innovations +representation is ```{math} :label: model2_innov \begin{aligned} \check{x}_{t+1} &= A \check{x}_t + K_2 a_t, \\ -\tilde z_t &= C \check{x}_t + a_t. +\tilde z_t &= G \check{x}_t + a_t, +\end{aligned} +``` + +where + +```{math} +:label: model2_innov_defs +\begin{aligned} +a_t &= \tilde z_t - E[\tilde z_t \mid \tilde z_{t-1}, \tilde z_{t-2}, \ldots], \\ +\check{x}_t &= E[\hat x_t \mid \tilde z_{t-1}, \tilde z_{t-2}, \ldots, \check{x}_0], \\ +S_2 &= E[(\hat x_t - \check{x}_t)(\hat x_t - \check{x}_t)^\top], \\ +[K_2, S_2] &= \text{kelmanfilter}(A, G, Q_2, R_2, 0). \end{aligned} ``` -where $a_t$ is the innovation process for the filtered data with -covariance $V_2 = C S_2 C^\top + R_2$. +Thus $\{a_t\}$ is the innovation process for the reported data +$\tilde z_t$, with innovation covariance + +```{math} +:label: model2_V2 +V_2 = E\, a_t a_t^\top = G\, S_2\, G^\top + R_2. +``` + +### Wold representation -To compute the innovations $\{a_t\}$ from observations on -$\tilde z_t$, use +A Wold moving average representation for $\tilde z_t$ is found from +{eq}`model2_innov` to be + +```{math} +:label: model2_wold +\tilde z_t = \bigl[G(I - AL)^{-1} K_2 L + I\bigr] a_t, +``` + +with coefficients $\psi_0 = I$ and $\psi_j = G A^{j-1} K_2$ for +$j \geq 1$. + +Note that this is simpler than the Model 1 Wold +representation {eq}`model1_wold` because there is no quasi-differencing +to undo. + +### Gaussian likelihood + +When a method analogous to Model 1 is used, a Gaussian log-likelihood +for $\tilde z_t$ can be computed by first computing an $\{a_t\}$ sequence +from observations on $\tilde z_t$ by using ```{math} :label: model2_recursion \begin{aligned} -\check{x}_{t+1} &= (A - K_2 C)\,\check{x}_t + K_2 \tilde z_t, \\ -a_t &= -C\,\check{x}_t + \tilde z_t. +\check{x}_{t+1} &= (A - K_2 G)\,\check{x}_t + K_2 \tilde z_t, \\ +a_t &= -G\,\check{x}_t + \tilde z_t. \end{aligned} ``` -The Gaussian log-likelihood for a sample of $T$ observations +The likelihood function for a sample of $T$ observations $\{\tilde z_t\}$ is then ```{math} @@ -970,28 +1072,27 @@ $\{\tilde z_t\}$ is then - \tfrac{1}{2}\sum_{t=0}^{T-1} a_t' V_2^{-1} a_t. ``` -Computing {eq}`model2_loglik` requires both the first Kalman filter -(to form $\hat x_t$ and $u_t$) and the second Kalman filter -(to form $\check{x}_t$ and $a_t$). +Note that relative to computing the likelihood function +{eq}`model1_loglik` for the error-corrupted data, computing the +likelihood function for the optimally filtered data requires more +calculations. -In effect, the econometrician must retrace the steps that the agency -used to synthesize the filtered data. +Both likelihood functions require that the Kalman filter +{eq}`model1_innov_defs` be computed, while the likelihood function for +the filtered data requires that the Kalman filter +{eq}`model2_innov_defs` also be computed. -### Wold representation for filtered data +In effect, in order to interpret and use the filtered data reported by +the agency, it is necessary to retrace the steps that the agency used +to synthesize those data. -The Wold moving-average representation for $\tilde z_t$ is +The Kalman filter {eq}`model1_innov_defs` is supposed to be formed by +the agency. -```{math} -:label: model2_wold -\tilde z_t = \bigl[C(I - AL)^{-1} K_2 L + I\bigr] a_t, -``` - -with coefficients $\psi_0 = I$ and $\psi_j = C A^{j-1} K_2$ for -$j \geq 1$. +The agency need not use Kalman filter {eq}`model2_innov_defs` because +it does not need the Wold representation for the filtered data. -Note that this is simpler than the Model 1 Wold -representation {eq}`model1_wold` because there is no quasi-differencing -to undo +In our parameterization $G = C$. ```{code-cell} ipython3 Q2 = K1 @ V1 @ K1.T @@ -1010,7 +1111,8 @@ def filtered_wold_coeffs(A, C, K, n_terms=25): psi2 = filtered_wold_coeffs(A, C, K2, n_terms=40) -resp2 = np.array([psi2[j] @ linalg.cholesky(V2, lower=True) for j in range(14)]) +resp2 = np.array( + [psi2[j] @ linalg.cholesky(V2, lower=True) for j in range(14)]) decomp2 = fev_contributions(psi2, V2, n_horizons=20) ``` @@ -1040,10 +1142,6 @@ The second and third innovations contribute negligibly. This confirms that filtering strips away the measurement noise that created the appearance of multiple independent sources of variation in Model 1. -We invite readers to compare this table to the one for the true impulse responses in the {ref}`true-impulse-responses` section above. - -The numbers are essentially the same. - The covariance matrix and eigenvalues of the Model 2 innovations are ```{code-cell} ipython3 @@ -1081,8 +1179,12 @@ display(Latex('$' + r' \quad '.join(parts) + '$')) The income innovation in Model 2 produces responses that closely approximate the true impulse response function from the structural -shock $\theta_t$ (compare with the table in the -{ref}`true-impulse-responses` section above). +shock $\theta_t$. + +Readers can compare the left table with the table in the +{ref}`true-impulse-responses` section above. + +The numbers are essentially the same. The consumption and investment innovations produce responses that are orders of magnitude smaller, confirming that the filtered @@ -1093,13 +1195,25 @@ Unlike Model 1, the filtered data from Model 2 accelerator literature has documented empirically. +Hence, at the population level, the two measurement models imply different +empirical stories even though they share the same structural economy. + +- In Model 1 (raw data), measurement noise creates multiple innovations + and an apparent Granger-causality pattern. +- In Model 2 (filtered data), innovations collapse back to essentially + one dominant shock, mirroring the true one-index economy. + +Let's verify these implications in a finite sample simulation. + ## Simulation The tables above characterize population moments of the two models. -We now simulate 80 periods of true, measured, and filtered data +Let's simulate 80 periods of true, measured, and filtered data to compare population implications with finite-sample behavior. +First, we define a function to simulate the true economy, generate measured data with AR(1) measurement errors, and apply the Model 1 Kalman filter to produce filtered estimates + ```{code-cell} ipython3 def simulate_series(seed=7909, T=80, k0=10.0): """ @@ -1161,6 +1275,8 @@ def simulate_series(seed=7909, T=80, k0=10.0): sim = simulate_series(seed=7909, T=80, k0=10.0) ``` +We use the following helper function to plot the true series against either the measured or filtered series + ```{code-cell} ipython3 def plot_true_vs_other(t, true_series, other_series, other_label, ylabel=""): @@ -1178,6 +1294,8 @@ def plot_true_vs_other(t, true_series, other_series, t = np.arange(1, 81) ``` +Let's first compare the true series with the measured series to see how measurement errors distort the data + ```{code-cell} ipython3 --- mystnb: @@ -1221,8 +1339,9 @@ Investment is distorted the most because its measurement error has the largest innovation variance ($\sigma_\eta = 0.65$), while income is distorted the least ($\sigma_\eta = 0.05$). -The Kalman-filtered estimates from Model 1 remove much of the -measurement noise and track the truth closely. + +For the filtered series, we expect the Kalman filter to recover the true series more closely by stripping away measurement noise + ```{code-cell} ipython3 --- @@ -1276,6 +1395,9 @@ plot_true_vs_other(t, sim["k_true"], sim["k_filt"], "filtered", ylabel="capital stock") ``` +Indeed, Kalman-filtered estimates from Model 1 remove much of the +measurement noise and track the truth closely. + In the true model the national income identity $c_t + \Delta k_t = y_{n,t}$ holds exactly. @@ -1284,6 +1406,9 @@ in the measured data. The Kalman filter approximately restores it. +The following figure confirms this by showing the residual $c_t + \Delta k_t - y_{n,t}$ for +both measured and filtered data + ```{code-cell} ipython3 --- mystnb: @@ -1311,100 +1436,7 @@ plt.tight_layout() plt.show() ``` -For each variable $w \in \{c, \Delta k, y_n\}$ we compute the -covariance and correlation matrices among its true, measured, and -filtered versions. - -Each matrix has the structure - -```{math} -\begin{bmatrix} -\text{var}(w^{\text{true}}) & \text{cov}(w^{\text{true}}, w^{\text{meas}}) & \text{cov}(w^{\text{true}}, w^{\text{filt}}) \\ -\cdot & \text{var}(w^{\text{meas}}) & \text{cov}(w^{\text{meas}}, w^{\text{filt}}) \\ -\cdot & \cdot & \text{var}(w^{\text{filt}}) -\end{bmatrix}. -``` - -The key entries are the off-diagonal terms linking true to measured -(distortion from noise) and true to filtered (recovery by the Kalman -filter). - -```{code-cell} ipython3 -def cov_corr_three(a, b, c): - X = np.vstack([a, b, c]) - return np.cov(X), np.corrcoef(X) - -def matrix_df(mat, labels): - return pd.DataFrame(np.round(mat, 4), index=labels, columns=labels) - -cov_c, corr_c = cov_corr_three( - sim["c_true"], sim["c_meas"], sim["c_filt"]) -cov_i, corr_i = cov_corr_three( - sim["dk_true"], sim["dk_meas"], sim["dk_filt"]) -cov_y, corr_y = cov_corr_three( - sim["y_true"], sim["y_meas"], sim["y_filt"]) -cov_k = np.cov(np.vstack([sim["k_true"], sim["k_filt"]])) -corr_k = np.corrcoef(np.vstack([sim["k_true"], sim["k_filt"]])) - -tmf_labels = ['true', 'measured', 'filtered'] -tf_labels = ['true', 'filtered'] -``` - -For consumption, measurement error inflates the variance of measured -consumption relative to the truth, as the diagonal of the covariance -matrix shows - -```{code-cell} ipython3 -display(Latex(df_to_latex_matrix(matrix_df(cov_c, tmf_labels)))) -``` - -The correlation matrix confirms that the filtered series recovers the -true series almost perfectly - -```{code-cell} ipython3 -display(Latex(df_to_latex_matrix(matrix_df(corr_c, tmf_labels)))) -``` - -For investment, measurement error creates the most variance inflation here - -```{code-cell} ipython3 -display(Latex(df_to_latex_matrix(matrix_df(cov_i, tmf_labels)))) -``` - -Despite this, the true-filtered correlation remains high, -demonstrating the filter's effectiveness even with severe noise - -```{code-cell} ipython3 -display(Latex(df_to_latex_matrix(matrix_df(corr_i, tmf_labels)))) -``` - -Income has the smallest measurement error ($\sigma_\eta = 0.05$), -so measured and true covariances are nearly identical - -```{code-cell} ipython3 -display(Latex(df_to_latex_matrix(matrix_df(cov_y, tmf_labels)))) -``` - -The correlation matrix shows that both measured and filtered series -track the truth very closely - -```{code-cell} ipython3 -display(Latex(df_to_latex_matrix(matrix_df(corr_y, tmf_labels)))) -``` - -The capital stock is never directly observed, yet -the covariance matrix shows that the filter recovers it with very -high accuracy - -```{code-cell} ipython3 -display(Latex(df_to_latex_matrix(matrix_df(cov_k, tf_labels)))) -``` - -The near-unity correlation confirms this - -```{code-cell} ipython3 -display(Latex(df_to_latex_matrix(matrix_df(corr_k, tf_labels)))) -``` +As we have predicted, the residual for the measured data is large and volatile, while the residual for the filtered data is numerically 0. ## Summary From 150f848ab9b3632a2a11d8ab6238c9f87b3665a9 Mon Sep 17 00:00:00 2001 From: Humphrey Yang Date: Tue, 10 Feb 2026 11:25:24 +1100 Subject: [PATCH 15/19] updates --- lectures/measurement_models.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lectures/measurement_models.md b/lectures/measurement_models.md index ebddc3fe2..39103ddd0 100644 --- a/lectures/measurement_models.md +++ b/lectures/measurement_models.md @@ -842,7 +842,7 @@ Granger-cause consumption and investment, but not vice versa. This matches the paper's message that, in a one-common-index model, the relatively best measured series has the strongest predictive content. -Let's look at the the covariance matrix of the innovations +Let's look at the covariance matrix of the innovations ```{code-cell} ipython3 print('Covariance matrix of innovations:') @@ -1020,7 +1020,7 @@ where a_t &= \tilde z_t - E[\tilde z_t \mid \tilde z_{t-1}, \tilde z_{t-2}, \ldots], \\ \check{x}_t &= E[\hat x_t \mid \tilde z_{t-1}, \tilde z_{t-2}, \ldots, \check{x}_0], \\ S_2 &= E[(\hat x_t - \check{x}_t)(\hat x_t - \check{x}_t)^\top], \\ -[K_2, S_2] &= \text{kelmanfilter}(A, G, Q_2, R_2, 0). +[K_2, S_2] &= \text{kalmanfilter}(A, G, Q_2, R_2, 0). \end{aligned} ``` From d6d2fe1fafbeda6461a0dd5eee76649ea6c09455 Mon Sep 17 00:00:00 2001 From: Humphrey Yang Date: Tue, 10 Feb 2026 11:29:37 +1100 Subject: [PATCH 16/19] updates --- lectures/measurement_models.md | 31 +++++++++++-------------------- 1 file changed, 11 insertions(+), 20 deletions(-) diff --git a/lectures/measurement_models.md b/lectures/measurement_models.md index 39103ddd0..0ae50c7c0 100644 --- a/lectures/measurement_models.md +++ b/lectures/measurement_models.md @@ -302,7 +302,7 @@ of $j$-step-ahead prediction error variances, depends on the relative variances of the measurement errors. In this case, each observed series mixes the common signal $\theta_t$ -with idiosyncratic measurement noise. +with idiosyncratic measurement noise. A series with lower measurement error variance tracks $\theta_t$ more closely, so its innovations @@ -518,7 +518,7 @@ def steady_state_kalman(A, C_obs, Q, R, W=None, tol=1e-13, max_iter=200_000): With structural matrices and tools we need in place, we now follow {cite:t}`Sargent1989`'s two reporting schemes in sequence. -## A Classical Model of Measurements Initially Collected by an Agency +## A classical model of measurements initially collected by an agency A data collecting agency observes a noise-corrupted version of $z_t$, namely @@ -595,8 +595,8 @@ where $(\varepsilon_t, \bar\nu_t)$ is a white noise process with ```{math} :label: model1_covs E \begin{bmatrix} \varepsilon_t \end{bmatrix} -\begin{bmatrix} \varepsilon_t' & \bar\nu_t' \end{bmatrix} -= \begin{bmatrix} Q & W_1 \\ W_1' & R_1 \end{bmatrix}, +\begin{bmatrix} \varepsilon_t^\top & \bar\nu_t^\top \end{bmatrix} += \begin{bmatrix} Q & W_1 \\ W_1^\top & R_1 \end{bmatrix}, \qquad R_1 = C Q C^\top + R, \quad W_1 = Q C^\top. ``` @@ -757,7 +757,7 @@ $\hat x_0$, can be represented as ```{math} :label: model1_loglik \mathcal{L}^* = -T\ln 2\pi - \tfrac{1}{2}T\ln|V_1| - - \tfrac{1}{2}\sum_{t=0}^{T-1} u_t' V_1^{-1} u_t, + - \tfrac{1}{2}\sum_{t=0}^{T-1} u_t^\top V_1^{-1} u_t, ``` where $u_t$ is a function of $\{\bar z_t\}$ defined by @@ -912,7 +912,7 @@ decay according to the AR(1) structure of their respective measurement errors ($\rho_c = 0.7$, $\rho_{\Delta k} = 0.3$), with little spillover to other variables. -## A Model of Optimal Estimates Reported by an Agency +## A model of optimal estimates reported by an agency Suppose that instead of reporting the error-corrupted data $\bar z_t$, the data collecting agency reports linear least-squares projections of @@ -1069,7 +1069,7 @@ $\{\tilde z_t\}$ is then ```{math} :label: model2_loglik \mathcal{L}^{**} = -T\ln 2\pi - \tfrac{1}{2}T\ln|V_2| - - \tfrac{1}{2}\sum_{t=0}^{T-1} a_t' V_2^{-1} a_t. + - \tfrac{1}{2}\sum_{t=0}^{T-1} a_t^\top V_2^{-1} a_t. ``` Note that relative to computing the likelihood function @@ -1159,8 +1159,6 @@ As {cite:t}`Sargent1989` emphasizes, the two models of measurement produce quite different inferences about the economy's dynamics despite sharing identical underlying parameters. - - ### Wold impulse responses We again use orthogonalized Wold responses with a Cholesky @@ -1194,7 +1192,6 @@ Unlike Model 1, the filtered data from Model 2 *cannot* reproduce the apparent Granger causality pattern that the accelerator literature has documented empirically. - Hence, at the population level, the two measurement models imply different empirical stories even though they share the same structural economy. @@ -1339,10 +1336,8 @@ Investment is distorted the most because its measurement error has the largest innovation variance ($\sigma_\eta = 0.65$), while income is distorted the least ($\sigma_\eta = 0.05$). - For the filtered series, we expect the Kalman filter to recover the true series more closely by stripping away measurement noise - ```{code-cell} ipython3 --- mystnb: @@ -1413,24 +1408,20 @@ both measured and filtered data --- mystnb: figure: - caption: "National income identity residual: measured (left) vs. filtered (right)" + caption: National income identity residual name: fig-identity-residual - image: - alt: National income identity residual for measured and filtered data side by side --- fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 4)) ax1.plot(t, sim["c_meas"] + sim["dk_meas"] - sim["y_meas"], lw=2) ax1.axhline(0, color='black', lw=0.8, ls='--', alpha=0.5) ax1.set_xlabel("time", fontsize=12) -ax1.set_ylabel("residual", fontsize=12) -ax1.set_title(r'Measured: $c_t + \Delta k_t - y_{n,t}$', fontsize=13) +ax1.set_ylabel("measured residual", fontsize=12) ax2.plot(t, sim["c_filt"] + sim["dk_filt"] - sim["y_filt"], lw=2) ax2.axhline(0, color='black', lw=0.8, ls='--', alpha=0.5) ax2.set_xlabel("time", fontsize=12) -ax2.set_ylabel("residual", fontsize=12) -ax2.set_title(r'Filtered: $c_t + \Delta k_t - y_{n,t}$', fontsize=13) +ax2.set_ylabel("filtered residual", fontsize=12) plt.tight_layout() plt.show() @@ -1440,7 +1431,7 @@ As we have predicted, the residual for the measured data is large and volatile, ## Summary -{cite}`Sargent1989` shows how measurement error alters an +{cite:t}`Sargent1989` shows how measurement error alters an econometrician's view of a permanent income economy driven by the investment accelerator. From 65b8e4ca76809eaa034a4cfb6c153fc2bb95edcc Mon Sep 17 00:00:00 2001 From: thomassargent30 Date: Tue, 10 Feb 2026 12:06:59 +0800 Subject: [PATCH 17/19] Tom's Feb 10 edits of two measurement models lecture --- lectures/measurement_models.md | 64 ++++++++++++++++++---------------- 1 file changed, 33 insertions(+), 31 deletions(-) diff --git a/lectures/measurement_models.md b/lectures/measurement_models.md index 0ae50c7c0..d657948e3 100644 --- a/lectures/measurement_models.md +++ b/lectures/measurement_models.md @@ -41,6 +41,11 @@ If accurate observations on these time series are available, one can use that mapping to implement parameter estimation methods based either on the likelihood function or on the method of moments. +```{note} This is why econometrics estimation is often called an ''inverse'' problem, while +simulating a model for given parameter values is called a ''direct problem''. The direct problem +refers to the mapping we have just described, while the inverse problem involves somehow applying an ''inverse'' of that mapping to a data set that is treated as if it were one draw from the joint probability distribution described by the mapping. +``` + However, if only error-ridden data exist for the variables of interest, then more steps are needed to extract parameter estimates. @@ -55,22 +60,21 @@ about the economic structure. {cite:t}`Sargent1989` describes two alternative models of data generation in a {doc}`permanent income ` economy in which the -investment accelerator, the mechanism studied in {doc}`samuelson` and -{doc}`chow_business_cycles`, drives business cycle fluctuations. +investment accelerator, the mechanism studied in these two quantecon lectures -- {doc}`samuelson` and +{doc}`chow_business_cycles` -- shapes business cycle fluctuations. - In Model 1, the data collecting agency simply reports the error-ridden data that it collects. -- In Model 2, although it collects error-ridden data that satisfy - a classical errors-in-variables model, the data collecting agency - filters the data and reports the best estimates that it possibly can. +- In Model 2, the data collection agents first collects error-ridden data that satisfy + a classical errors-in-variables model, then filters the data, and reports the filtered objects. Although the two models have the same "deep parameters," they produce quite different sets of restrictions on the data. -In this lecture we follow {cite:t}`Sargent1989` and study how -alternative measurement schemes change empirical implications. +In this lecture we follow {cite:t}`Sargent1989` and study how these +alternative measurement schemes affect empirical implications. -We start with imports and helper functions used throughout +We start with imports and helper functions to be used throughout this lecture ```{code-cell} ipython3 import numpy as np @@ -127,24 +131,24 @@ def df_to_latex_array(df): ## The economic model -The true economy is a linear-quadratic version of a stochastic -optimal growth model (see also {doc}`perm_income`). +The data are generated by a linear-quadratic version of a stochastic +optimal growth model that is an instance of models described in this quantecon lecture: {doc}`perm_income`. -A social planner maximizes +A social planner chooses a stochastic process for $\{c_t, k_{t+1}\}_{t=0}^\infty$ that maximizes ```{math} :label: planner_obj E \sum_{t=0}^{\infty} \beta^t \left( u_0 + u_1 c_t - \frac{u_2}{2} c_t^2 \right) ``` -subject to the technology +subject to the restrictions imposed by the technology ```{math} :label: tech_constraint -c_t + k_{t+1} = f k_t + \theta_t, \qquad \beta f^2 > 1, +c_t + k_{t+1} = f k_t + \theta_t, \qquad \beta f^2 > 1. ``` -where $c_t$ is consumption, $k_t$ is the capital stock, +Here $c_t$ is consumption, $k_t$ is the capital stock, $f$ is the gross rate of return on capital, and $\theta_t$ is an endowment or technology shock following @@ -152,14 +156,12 @@ and $\theta_t$ is an endowment or technology shock following :label: shock_process a(L)\,\theta_t = \varepsilon_t, ``` - -with $a(L) = 1 - a_1 L - a_2 L^2 - \cdots - a_r L^r$ having all roots +where $L$ is the backward shift (or 'lag') operator and $a(z) = 1 - a_1 z - a_2 z^2 - \cdots - a_r z^r$ having all its zeroes outside the unit circle. ### Optimal decision rule -The solution can be represented by the optimal decision rule -for $c_t$: +The optimal decision rule for $c_t$ is ```{math} :label: opt_decision @@ -254,7 +256,7 @@ via a geometric distributed lag or "adaptive expectations" scheme. ### The accelerator puzzle When all variables are measured accurately and are driven by -the single shock $\theta_t$, the spectral density of +the single shock $\theta_t$, the spectral density matrix of $(c_t,\, k_{t+1}-k_t,\, y_{nt})$ has rank one at all frequencies. Each variable is an invertible one-sided distributed lag of the @@ -321,11 +323,11 @@ This asymmetry drives the numerical results we observe soon. ### State-space formulation Let's map the economic model and the measurement process into -a recursive state-space framework. +a linear state-space framework. Set $f = 1.05$ and $\theta_t \sim \mathcal{N}(0, 1)$. -Define the state and observable vectors +Define the state and observation vectors ```{math} x_t = \begin{bmatrix} k_t \\ \theta_t \end{bmatrix}, @@ -333,7 +335,7 @@ x_t = \begin{bmatrix} k_t \\ \theta_t \end{bmatrix}, z_t = \begin{bmatrix} y_{nt} \\ c_t \\ \Delta k_t \end{bmatrix}, ``` -so that the true economy follows the state-space system +so that the error-free data are described by the state-space system ```{math} :label: true_ss @@ -394,10 +396,10 @@ Q = np.array([ ### True impulse responses Before introducing measurement error, we compute the impulse response of -the true system to a unit shock $\theta_0 = 1$. +the error-free variables to a unit shock $\theta_0 = 1$. This benchmark clarifies what changes when we later switch from -true variables to reported variables. +error-free variables to variables reported by the statistical agency. The response shows the investment accelerator clearly: the full impact on net income $y_n$ occurs at lag 0, while consumption adjusts by only @@ -672,9 +674,9 @@ K1, S1, V1 = steady_state_kalman(A, C_bar, Q, R1, W1) ``` -### Computing the Wold coefficients +### Computing coefficients in a Wold moving average representation -To compute the Wold coefficients in {eq}`model1_wold` numerically, +To compute the moving average coefficients in {eq}`model1_wold` numerically, define the augmented state ```{math} @@ -707,7 +709,7 @@ I H_1 = [\bar C \;\; D]. ``` -The Wold coefficients are then $\psi_0 = I$ and +The moving average coefficients are then $\psi_0 = I$ and $\psi_j = H_1 F_1^{j-1} G_1$ for $j \geq 1$. ```{code-cell} ipython3 @@ -864,14 +866,14 @@ the presence of a dominant common shock $\theta_t$ ### Wold impulse responses -The Wold impulse responses are reported using orthogonalized +Impulse responses in the Wold representation are reported using orthogonalized innovations (Cholesky factorization of $V_1$ with ordering $y_n$, $c$, $\Delta k$). Under this method, lag-0 responses reflect both contemporaneous covariance and the Cholesky ordering. -We first define a helper function to format the Wold responses as a LaTeX array +We first define a helper function to format the response coefficients as a LaTeX array ```{code-cell} ipython3 lags = np.arange(14) @@ -884,7 +886,7 @@ def wold_response_table(resp, shock_idx, lags): ) ``` -Now we report the Wold responses to each orthogonalized innovation in a single table with three panels +Now we report the impulse responses to each orthogonalized innovation in a single table with three panels ```{code-cell} ipython3 wold_titles = [r'\text{A. Response to } y_n \text{ innovation}', @@ -1161,7 +1163,7 @@ sharing identical underlying parameters. ### Wold impulse responses -We again use orthogonalized Wold responses with a Cholesky +We again use orthogonalized Wold representation impulse responses with a Cholesky decomposition of $V_2$ ordered as $y_n$, $c$, $\Delta k$. ```{code-cell} ipython3 From 9de7a5dd9b55860e6dd2cb48e353e77764f1258d Mon Sep 17 00:00:00 2001 From: Humphrey Yang Date: Wed, 18 Feb 2026 09:44:18 +1100 Subject: [PATCH 18/19] updates --- lectures/chow_business_cycles.md | 30 +++++++++++++++++++----------- lectures/measurement_models.md | 20 ++++++++++++-------- 2 files changed, 31 insertions(+), 19 deletions(-) diff --git a/lectures/chow_business_cycles.md b/lectures/chow_business_cycles.md index d8a424875..6fcc777a5 100644 --- a/lectures/chow_business_cycles.md +++ b/lectures/chow_business_cycles.md @@ -58,7 +58,7 @@ import numpy as np import matplotlib.pyplot as plt ``` -We will use the following helper functions throughout the lecture: +We will use the following helper functions throughout the lecture ```{code-cell} ipython3 def spectral_density_var1(A, V, ω_grid): @@ -209,15 +209,17 @@ A_list = [samuelson_transition(c, v) for _, c, v in cases] for (label, c, v), A in zip(cases, A_list): eig = np.linalg.eigvals(A) disc = (c + v)**2 - 4*v - print(f"{label}: c={c}, v={v}, discriminant={disc:.2f}, eigenvalues={eig}") + print( + f"{label}: c={c}, v={v}, discriminant={disc:.2f}, eigenvalues={eig}") ``` With weak acceleration ($v=0.1$), the discriminant is positive and the roots are real. With strong acceleration ($v=0.8$), the discriminant is negative and the roots are complex conjugates that enable oscillatory dynamics. +Now let's see how these different eigenvalue structures affect the impulse responses to a one-time shock in $Y$ + ```{code-cell} ipython3 -# impulse responses from a one-time unit shock in Y T = 40 s0 = np.array([1.0, 0.0]) irfs = [] @@ -274,7 +276,7 @@ for v in v_grid: s = A @ s axes[1].plot(range(T_irf + 1), irf, lw=2, label=f'$v={v}$') -# Eigenvalue panel with unit circle +# Visualize the eigenvalue locations and the unit circle θ_circle = np.linspace(0, 2*np.pi, 100) axes[0].plot(np.cos(θ_circle), np.sin(θ_circle), 'k--', lw=0.8, label='unit circle') @@ -651,7 +653,7 @@ cos_ω = factor * np.cos(θ) print(f"Chow's example: r = {r_example}, θ = {θ_deg}°") print(f" cos(ω) = {cos_ω:.3f}") print(f" ω = {np.rad2deg(ω_example):.1f}°") -print(f" Peak period = {360/np.rad2deg(ω_example):.1f} (vs deterministic period = {360/θ_deg:.1f})") +print(f" Peak period = {360/np.rad2deg(ω_example):.1f}") ``` As $r \to 1$, the peak frequency converges to $\theta$. @@ -710,8 +712,10 @@ c_real, v_real = 0.8, 0.1 A_real = samuelson_transition(c_real, v_real) eig_real = np.linalg.eigvals(A_real) -print(f"Complex case (c={c_complex}, v={v_complex}): eigenvalues = {eig_complex}") -print(f"Real case (c={c_real}, v={v_real}): eigenvalues = {eig_real}") +print( + f"Complex case (c={c_complex}, v={v_complex}): eigenvalues = {eig_complex}") +print( + f"Real case (c={c_real}, v={v_real}): eigenvalues = {eig_real}") F_complex = spectral_density_var1(A_complex, V_hs, ω_grid) F_real = spectral_density_var1(A_real, V_hs, ω_grid) @@ -1045,7 +1049,8 @@ def spectral_density_chow(λ, B, W, ω_grid): F_star = np.zeros((p, p), dtype=complex) for i in range(p): for j in range(p): - denom = (1 - λ[i] * np.exp(-1j * ω)) * (1 - λ[j] * np.exp(1j * ω)) + denom = (1 - λ[i] * np.exp(-1j * ω)) \ + * (1 - λ[j] * np.exp(1j * ω)) F_star[i, j] = W[i, j] / denom F[k] = B @ F_star @ B.T return F / (2 * np.pi) @@ -1265,7 +1270,7 @@ These implied leads and lags are broadly consistent with turning-point timing su ### Building blocks of spectral shape -Each eigenvalue contributes a characteristic spectral shape through the **scalar kernel** +Each eigenvalue contributes a characteristic spectral shape through the *scalar kernel* ```{math} :label: chow_scalar_kernel @@ -1299,6 +1304,7 @@ for i, λ_i in enumerate(λ): label = f'$\\lambda_{i+1}$ = {λ_i:.4f}' \ if np.isreal(λ_i) else f'$\\lambda_{i+1}$ = {λ_i:.3f}' ax.semilogy(freq, g_i, label=label, lw=2) + ax.set_xlabel(r'frequency $\omega/2\pi$') ax.set_ylabel('$g_i(\\omega)$') ax.set_xlim([1/18, 0.5]) @@ -1536,7 +1542,8 @@ plt.show() threshold_idx = np.where(~np.isnan(peak_periods))[0] if len(threshold_idx) > 0: - print(f"interior peak appears when correlation >= {corr_grid[threshold_idx[0]]:.2f}") + print( + f"interior peak when correlation >= {corr_grid[threshold_idx[0]]:.2f}") ``` The interior peak appears only when the shock correlation exceeds a threshold. @@ -1587,7 +1594,8 @@ print("\nRecursion method:") print(np.real(Γ_recursion[5][:3, :3]).round(10)) print("\nEigendecomposition method:") print(Γ_eigen[5][:3, :3].round(10)) -print("\nMax absolute difference:", np.max(np.abs(np.real(Γ_recursion[5]) - Γ_eigen[5]))) +print("\nMax absolute difference:", + np.max(np.abs(np.real(Γ_recursion[5]) - Γ_eigen[5]))) ``` Both methods produce essentially identical results, up to numerical precision. diff --git a/lectures/measurement_models.md b/lectures/measurement_models.md index d657948e3..07ba71381 100644 --- a/lectures/measurement_models.md +++ b/lectures/measurement_models.md @@ -74,7 +74,7 @@ quite different sets of restrictions on the data. In this lecture we follow {cite:t}`Sargent1989` and study how these alternative measurement schemes affect empirical implications. -We start with imports and helper functions to be used throughout this lecture +We start with imports and helper functions to be used throughout this lecture to generate LaTeX output ```{code-cell} ipython3 import numpy as np @@ -203,7 +203,7 @@ Assumption 1 is crucial for the strict form of the accelerator. Relaxing it to allow serially correlated $\theta_t$ preserves an accelerator in a broad sense but loses the sharp geometric-lag -form of {eq}`accelerator`. +form of {eq}`mm_accelerator`. Adding a second shock breaks the one-index structure entirely and can generate nontrivial Granger causality even without @@ -227,7 +227,7 @@ c_t = \left(\frac{1-\beta}{1-\beta L}\right) y_{nt}, ``` ```{math} -:label: accelerator +:label: mm_accelerator k_{t+1} - k_t = f^{-1} \left(\frac{1-L}{1-\beta L}\right) y_{nt}, ``` @@ -240,7 +240,7 @@ Equation {eq}`friedman_consumption` is Friedman's consumption model: consumption is a geometric distributed lag of income, with the decay coefficient $\beta$ equal to the discount factor. -Equation {eq}`accelerator` is the distributed lag accelerator: +Equation {eq}`mm_accelerator` is the distributed lag accelerator: investment is a geometric distributed lag of the first difference of income. @@ -746,7 +746,8 @@ def fev_contributions(psi, V, n_horizons=20): psi1 = measured_wold_coeffs(F1, G1, H1, n_terms=40) -resp1 = np.array([psi1[j] @ linalg.cholesky(V1, lower=True) for j in range(14)]) +resp1 = np.array( + [psi1[j] @ linalg.cholesky(V1, lower=True) for j in range(14)]) decomp1 = fev_contributions(psi1, V1, n_horizons=20) ``` @@ -829,7 +830,8 @@ shock_titles = [r'\text{A. Innovation in } y_n', parts = [] for i, title in enumerate(shock_titles): arr = df_to_latex_array(fev_table(decomp1, i, horizons)).strip('$') - parts.append(r'\begin{array}{c} ' + title + r' \\ ' + arr + r' \end{array}') + parts.append( + r'\begin{array}{c} ' + title + r' \\ ' + arr + r' \end{array}') display(Latex('$' + r' \quad '.join(parts) + '$')) ``` @@ -896,7 +898,8 @@ wold_titles = [r'\text{A. Response to } y_n \text{ innovation}', parts = [] for i, title in enumerate(wold_titles): arr = df_to_latex_array(wold_response_table(resp1, i, lags)).strip('$') - parts.append(r'\begin{array}{c} ' + title + r' \\ ' + arr + r' \end{array}') + parts.append( + r'\begin{array}{c} ' + title + r' \\ ' + arr + r' \end{array}') display(Latex('$' + r' \quad '.join(parts) + '$')) ``` @@ -1130,7 +1133,8 @@ just like the true economy parts = [] for i, title in enumerate(shock_titles): arr = df_to_latex_array(fev_table(decomp2, i, horizons)).strip('$') - parts.append(r'\begin{array}{c} ' + title + r' \\ ' + arr + r' \end{array}') + parts.append( + r'\begin{array}{c} ' + title + r' \\ ' + arr + r' \end{array}') display(Latex('$' + r' \quad '.join(parts) + '$')) ``` From 3d0da78f0ba8147c19a28221517b0b3b1424d42c Mon Sep 17 00:00:00 2001 From: Humphrey Yang Date: Wed, 18 Feb 2026 10:15:24 +1100 Subject: [PATCH 19/19] updates --- lectures/measurement_models.md | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/lectures/measurement_models.md b/lectures/measurement_models.md index 07ba71381..b2d6e42e0 100644 --- a/lectures/measurement_models.md +++ b/lectures/measurement_models.md @@ -1284,12 +1284,11 @@ We use the following helper function to plot the true series against either the def plot_true_vs_other(t, true_series, other_series, other_label, ylabel=""): fig, ax = plt.subplots(figsize=(8, 4)) - ax.plot(t, true_series, lw=2, color="black", label="true") - ax.plot(t, other_series, lw=2, ls="--", - color="#1f77b4", label=other_label) - ax.set_xlabel("time", fontsize=12) - ax.set_ylabel(ylabel, fontsize=12) - ax.legend(loc="best", fontsize=11, frameon=True, shadow=True) + ax.plot(t, true_series, lw=2, label="true") + ax.plot(t, other_series, lw=2, ls="--", label=other_label) + ax.set_xlabel("time") + ax.set_ylabel(ylabel) + ax.legend() plt.tight_layout() plt.show() @@ -1421,13 +1420,13 @@ fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 4)) ax1.plot(t, sim["c_meas"] + sim["dk_meas"] - sim["y_meas"], lw=2) ax1.axhline(0, color='black', lw=0.8, ls='--', alpha=0.5) -ax1.set_xlabel("time", fontsize=12) -ax1.set_ylabel("measured residual", fontsize=12) +ax1.set_xlabel("time") +ax1.set_ylabel("measured residual") ax2.plot(t, sim["c_filt"] + sim["dk_filt"] - sim["y_filt"], lw=2) ax2.axhline(0, color='black', lw=0.8, ls='--', alpha=0.5) -ax2.set_xlabel("time", fontsize=12) -ax2.set_ylabel("filtered residual", fontsize=12) +ax2.set_xlabel("time") +ax2.set_ylabel("filtered residual") plt.tight_layout() plt.show()