diff --git a/lectures/_static/quant-econ.bib b/lectures/_static/quant-econ.bib index 218573589..bd35b4809 100644 --- a/lectures/_static/quant-econ.bib +++ b/lectures/_static/quant-econ.bib @@ -4,6 +4,29 @@ ### +@incollection{slutsky:1927, + address = {Moscow}, + author = {Slutsky, Eugen}, + booktitle = {Problems of Economic Conditions}, + date-added = {2021-02-16 14:44:03 -0600}, + date-modified = {2021-02-16 14:44:03 -0600}, + publisher = {The Conjuncture Institute}, + title = {The Summation of Random Causes as the Source of Cyclic Processes}, + volume = {3}, + year = {1927} +} + +@incollection{frisch33, + author = {Ragar Frisch}, + booktitle = {Economic Essays in Honour of Gustav Cassel}, + date-added = {2015-01-09 21:08:15 +0000}, + date-modified = {2015-01-09 21:08:15 +0000}, + pages = {171-205}, + publisher = {Allen and Unwin}, + title = {Propagation Problems and Impulse Problems in Dynamic Economics}, + year = {1933} +} + @article{harsanyi1968games, title={Games with Incomplete Information Played by ``{B}ayesian'' Players, {I}--{III} Part {II}. {B}ayesian Equilibrium Points}, author={Harsanyi, John C.}, @@ -2184,6 +2207,16 @@ @book{Sargent1987 year = {1987} } +@article{Sargent1989, + author = {Sargent, Thomas J}, + title = {Two Models of Measurements and the Investment Accelerator}, + journal = {Journal of Political Economy}, + volume = {97}, + number = {2}, + pages = {251--287}, + year = {1989} +} + @article{SchechtmanEscudero1977, author = {Schechtman, Jack and Escudero, Vera L S}, journal = {Journal of Economic Theory}, @@ -2733,3 +2766,27 @@ @article{Meghir2004 year={2004}, publisher={Wiley Online Library} } + +@article{Chow1968, + title={The Acceleration Principle and the Nature of Business Cycles}, + author={Chow, Gregory C.}, + journal={The Quarterly Journal of Economics}, + volume={82}, + number={3}, + pages={403--418}, + year={1968}, + month={aug}, + publisher={Oxford University Press} +} + +@article{ChowLevitan1969, + title={Nature of Business Cycles Implicit in a Linear Economic Model}, + author={Chow, Gregory C. and Levitan, Richard E.}, + journal={The Quarterly Journal of Economics}, + volume={83}, + number={3}, + pages={504--517}, + year={1969}, + month={aug}, + publisher={Oxford University Press} +} diff --git a/lectures/_toc.yml b/lectures/_toc.yml index 8d63e5906..098deaaa7 100644 --- a/lectures/_toc.yml +++ b/lectures/_toc.yml @@ -56,10 +56,12 @@ parts: - file: inventory_dynamics - file: linear_models - file: samuelson + - file: chow_business_cycles - file: kesten_processes - file: wealth_dynamics - file: kalman - file: kalman_2 + - file: measurement_models - caption: Search numbered: true chapters: diff --git a/lectures/chow_business_cycles.md b/lectures/chow_business_cycles.md new file mode 100644 index 000000000..6fcc777a5 --- /dev/null +++ b/lectures/chow_business_cycles.md @@ -0,0 +1,1652 @@ +--- +jupytext: + text_representation: + extension: .md + format_name: myst + format_version: 0.13 + jupytext_version: 1.17.1 +kernelspec: + display_name: Python 3 (ipykernel) + language: python + name: python3 +--- + +(chow_business_cycles)= + +```{raw} jupyter +
+ + QuantEcon + +
+``` + +# The Acceleration Principle and the Nature of Business Cycles + +```{contents} Contents +:depth: 2 +``` + +## Overview + +This lecture studies two classic papers by Gregory Chow: + +- {cite:t}`Chow1968` presents empirical evidence for the acceleration principle, describes how acceleration promotes oscillations, and analyzes conditions for the emergence of spectral peaks in linear difference equation subjected to random shocks +- {cite:t}`ChowLevitan1969` presents a spectral analysis of a calibrated US macroeconometric model and teaches about spectral gains, coherences, and lead–lag patterns + +These papers are related to ideas in the following lectures: + +- The multiplier–accelerator mechanism in {doc}`samuelson` +- Linear stochastic difference equations and autocovariances in {doc}`linear_models` +- Eigenmodes of multivariate dynamics in {doc}`var_dmd` +- Fourier ideas in {doc}`eig_circulant` (and, for empirical estimation, the advanced lecture {doc}`advanced:estspec`) + +{cite:t}`Chow1968` builds on earlier empirical work testing the acceleration principle on US investment data. + +We start with that empirical evidence before developing the theoretical framework. + +We will keep returning to three ideas: + +- In deterministic models, oscillations indicate complex eigenvalues of a transition matrix. +- In stochastic models, a "cycle" shows up as a local peak in a (univariate) spectral density. +- Spectral peaks depend on eigenvalues, but also on how shocks enter and on how observables load on eigenmodes. + +Let's start with some standard imports: + +```{code-cell} ipython3 +import numpy as np +import matplotlib.pyplot as plt +``` + +We will use the following helper functions throughout the lecture + +```{code-cell} ipython3 +def spectral_density_var1(A, V, ω_grid): + """Spectral density matrix for VAR(1): y_t = A y_{t-1} + u_t.""" + A, V = np.asarray(A), np.asarray(V) + n = A.shape[0] + I = np.eye(n) + F = np.empty((len(ω_grid), n, n), dtype=complex) + for k, ω in enumerate(ω_grid): + H = np.linalg.inv(I - np.exp(-1j * ω) * A) + F[k] = (H @ V @ H.conj().T) / (2 * np.pi) + return F + +def spectrum_of_linear_combination(F, b): + """Spectrum of x_t = b'y_t given the spectral matrix F(ω).""" + b = np.asarray(b).reshape(-1, 1) + return np.array([np.real((b.T @ F[k] @ b).item()) + for k in range(F.shape[0])]) + +def simulate_var1(A, V, T, burn=200, seed=1234): + r"""Simulate y_t = A y_{t-1} + u_t with u_t \sim N(0, V).""" + rng = np.random.default_rng(seed) + A, V = np.asarray(A), np.asarray(V) + n = A.shape[0] + chol = np.linalg.cholesky(V) + y = np.zeros((T + burn, n)) + + for t in range(1, T + burn): + y[t] = A @ y[t - 1] + chol @ rng.standard_normal(n) + + return y[burn:] + +def sample_autocorrelation(x, max_lag): + """Sample autocorrelation of a 1d array from lag 0 to max_lag.""" + x = np.asarray(x) + x = x - x.mean() + denom = np.dot(x, x) + acf = np.empty(max_lag + 1) + for k in range(max_lag + 1): + acf[k] = np.dot(x[:-k] if k else x, x[k:]) / denom + return acf +``` + +(empirical_section)= +## Empirical foundation for the acceleration principle + +{cite:t}`Chow1968` opens by reviewing empirical evidence for the acceleration principle from earlier macroeconometric work. + +Using annual observations for 1931--40 and 1948--63, Chow tested the acceleration equation on three investment categories: + +- new construction +- gross private domestic investment in producers' durable equipment combined with change in business inventories +- the last two variables separately + +In each case, when the regression included both $Y_t$ and $Y_{t-1}$ (where $Y$ is gross national product minus taxes net of transfers), the coefficient on $Y_{t-1}$ was of *opposite sign* and slightly smaller in absolute value than the coefficient on $Y_t$. + +Equivalently, when expressed in terms of $\Delta Y_t$ and $Y_{t-1}$, the coefficient on $Y_{t-1}$ was a small fraction of the coefficient on $\Delta Y_t$. + +### An example: automobile demand + +Chow presents a clean illustration using data on net investment in automobiles from his earlier work on automobile demand. + +Using annual data for 1922--41 and 1948--57, he estimates by least squares: + +```{math} +:label: chow_auto_eq5 + +y_t^n = \underset{(0.0022)}{0.0155} Y_t \underset{(0.0020)}{- 0.0144} Y_{t-1} \underset{(0.0056)}{- 0.0239} p_t \underset{(0.0040)}{+ 0.0199} p_{t-1} + \underset{(0.101)}{0.351} y_{t-1}^n + \text{const.} +``` + +where: +- $Y_t$ is real disposable personal income per capita +- $p_t$ is a relative price index for automobiles +- $y_t^n$ is per capita net investment in passenger automobiles +- standard errors appear in parentheses + +The key observation: the coefficients on $Y_{t-1}$ and $p_{t-1}$ are *the negatives* of the coefficients on $Y_t$ and $p_t$. + +This pattern is exactly what the acceleration principle predicts. + +### From stock adjustment to acceleration + +The empirical support for acceleration should not be surprising once we accept a stock-adjustment demand equation for capital: + +```{math} +:label: chow_stock_adj_emp + +s_{it} = a_i Y_t + b_i s_{i,t-1} +``` + +where $s_{it}$ is the stock of capital good $i$. + +The acceleration equation {eq}`chow_auto_eq5` is essentially the *first difference* of {eq}`chow_stock_adj_emp`. + +Net investment is the change in stock, $y_{it}^n = \Delta s_{it}$, and differencing {eq}`chow_stock_adj_emp` gives: + +```{math} +:label: chow_acc_from_stock + +y_{it}^n = a_i \Delta Y_t + b_i y_{i,t-1}^n +``` + +The coefficients on $Y_t$ and $Y_{t-1}$ in the level form are $a_i$ and $-a_i(1-b_i)$ respectively. + +They are opposite in sign and similar in magnitude when $b_i$ is not too far from unity. + +This connection between stock adjustment and acceleration is central to Chow's argument about why acceleration matters for business cycles. + +## Acceleration enables oscillations + +Having established the empirical evidence for acceleration, we now examine why it matters theoretically for generating oscillations. + +{cite:t}`Chow1968` asks a fundamental question: if we build a macro model using only standard demand equations with simple distributed lags, can the system generate sustained oscillations? + +He shows that, under natural sign restrictions, the answer is no. + +Stock-adjustment demand for durable goods leads to investment equations where the coefficient on $Y_{t-1}$ is negative. + +This negative coefficient captures the **acceleration effect**: investment responds not just to the level of income, but to its rate of change. + +This negative coefficient is also what makes complex roots possible in the characteristic equation. + +Without it, Chow proves that demand systems with only positive coefficients have real positive roots, and hence no oscillatory dynamics. + +The {doc}`samuelson` lecture explores this mechanism in detail through the Hansen-Samuelson multiplier-accelerator model. + +Here we briefly illustrate the effect. + +Take the multiplier–accelerator law of motion: + +```{math} +Y_t = c Y_{t-1} + v (Y_{t-1} - Y_{t-2}), +``` + +and rewrite it as a first-order system in $(Y_t, Y_{t-1})$. + +```{code-cell} ipython3 +def samuelson_transition(c, v): + return np.array([[c + v, -v], [1.0, 0.0]]) + +# Compare weak vs strong acceleration +# Weak: c=0.8, v=0.1 gives real roots (discriminant > 0) +# Strong: c=0.6, v=0.8 gives complex roots (discriminant < 0) +cases = [("weak acceleration", 0.8, 0.1), + ("strong acceleration", 0.6, 0.8)] +A_list = [samuelson_transition(c, v) for _, c, v in cases] + +for (label, c, v), A in zip(cases, A_list): + eig = np.linalg.eigvals(A) + disc = (c + v)**2 - 4*v + print( + f"{label}: c={c}, v={v}, discriminant={disc:.2f}, eigenvalues={eig}") +``` + +With weak acceleration ($v=0.1$), the discriminant is positive and the roots are real. + +With strong acceleration ($v=0.8$), the discriminant is negative and the roots are complex conjugates that enable oscillatory dynamics. + +Now let's see how these different eigenvalue structures affect the impulse responses to a one-time shock in $Y$ + +```{code-cell} ipython3 +T = 40 +s0 = np.array([1.0, 0.0]) +irfs = [] +for A in A_list: + s = s0.copy() + path = np.empty(T + 1) + for t in range(T + 1): + path[t] = s[0] + s = A @ s + irfs.append(path) + +fig, ax = plt.subplots(figsize=(10, 4)) +ax.plot(range(T + 1), irfs[0], lw=2, + label="weak acceleration (real roots)") +ax.plot(range(T + 1), irfs[1], lw=2, + label="strong acceleration (complex roots)") +ax.axhline(0.0, lw=0.8, color='gray') +ax.set_xlabel("time") +ax.set_ylabel(r"$Y_t$") +ax.legend(frameon=False) +plt.tight_layout() +plt.show() +``` + +With weak acceleration, the impulse response decays monotonically. + +With strong acceleration, it oscillates. + +We can ask how the eigenvalues change as we increase the accelerator $v$. + +As we increase the accelerator $v$, the eigenvalues move further from the origin. + +For this model, the eigenvalue modulus is $|\lambda| = \sqrt{v}$, so the stability boundary is $v = 1$. + +```{code-cell} ipython3 +v_grid = [0.2, 0.4, 0.6, 0.8, 0.95] +c = 0.6 +T_irf = 40 # periods for impulse response + +fig, axes = plt.subplots(1, 2, figsize=(12, 5)) + +for v in v_grid: + A = samuelson_transition(c, v) + eig = np.linalg.eigvals(A) + + # Eigenvalues (left panel) + axes[0].scatter(eig.real, eig.imag, s=40, label=f'$v={v}$') + + # Impulse response (right panel) + s = np.array([1.0, 0.0]) + irf = np.empty(T_irf + 1) + for t in range(T_irf + 1): + irf[t] = s[0] + s = A @ s + axes[1].plot(range(T_irf + 1), irf, lw=2, label=f'$v={v}$') + +# Visualize the eigenvalue locations and the unit circle +θ_circle = np.linspace(0, 2*np.pi, 100) +axes[0].plot(np.cos(θ_circle), np.sin(θ_circle), + 'k--', lw=0.8, label='unit circle') +axes[0].set_xlabel('real part') +axes[0].set_ylabel('imaginary part') +axes[0].set_aspect('equal') +axes[0].legend(frameon=False) + +# impulse response panel +axes[1].axhline(0, lw=0.8, color='gray') +axes[1].set_xlabel('time') +axes[1].set_ylabel(r'$Y_t$') +axes[1].legend(frameon=False) + +plt.tight_layout() +plt.show() +``` + +As $v$ increases, eigenvalues approach the unit circle and oscillations become more persistent. + +This illustrates that acceleration creates complex eigenvalues, which are necessary for oscillatory dynamics in deterministic systems. + +But what happens when we add random shocks? + +An insight of Ragnar Frisch {cite}`frisch33` was that damped oscillations can be "maintained" when the system is continuously perturbed by random disturbances. + +To study this formally, we need to introduce the stochastic framework. + +## A linear system with shocks + +We analyze a first-order linear stochastic system + +```{math} +:label: chow_var1 + +y_t = A y_{t-1} + u_t, +\qquad +\mathbb E[u_t] = 0, +\qquad +\mathbb E[u_t u_t^\top] = V, +\qquad +\mathbb E[u_t u_{t-k}^\top] = 0, \quad k \neq 0. +``` + +When the eigenvalues of $A$ are strictly inside the unit circle, the process is covariance stationary and its autocovariances exist. + +In the notation of {doc}`linear_models`, this is the same stability condition that guarantees a unique solution to a discrete Lyapunov equation. + +Define the lag-$k$ autocovariance matrices + +```{math} +:label: chow_autocov_def + +\Gamma_k := \mathbb E[y_t y_{t-k}^\top] . +``` + +Standard calculations (also derived in {cite}`Chow1968`) give the recursion + +```{math} +:label: chow_autocov_rec + +\Gamma_k = A \Gamma_{k-1}, \quad k \ge 1, +\qquad\text{and}\qquad +\Gamma_0 = A \Gamma_0 A^\top + V. +``` + +The second equation is the discrete Lyapunov equation for $\Gamma_0$. + +{cite:t}`Chow1968` motivates the stochastic analysis with a quote from Ragnar Frisch: + +> The examples we have discussed ... show that when a [deterministic] economic system gives rise to oscillations, these will most frequently be damped. +> But in reality the cycles ... are generally not damped. +> How can the maintenance of the swings be explained? +> ... One way which I believe is particularly fruitful and promising is to study what would become of the solution of a determinate dynamic system if it were exposed to a stream of erratic shocks ... +> Thus, by connecting the two ideas: (1) the continuous solution of a determinate dynamic system and (2) the discontinuous shocks intervening and supplying the energy that may maintain the swings—we get a theoretical setup which seems to furnish a rational interpretation of those movements which we have been accustomed to see in our statistical time data. +> +> — Ragnar Frisch (1933) {cite}`frisch33` + +Chow's main insight is that oscillations in the deterministic system are *neither necessary nor sufficient* for producing "cycles" in the stochastic system. + +We have to bring the stochastic element into the picture. + +We will show that even when eigenvalues are real (no deterministic oscillations), the stochastic system can exhibit cyclical patterns in its autocovariances and spectral densities. + +### Autocovariances in terms of eigenvalues + +Let $\lambda_1, \ldots, \lambda_p$ be the distinct, possibly complex, eigenvalues of $A$, and let $B$ be the matrix whose columns are the corresponding right eigenvectors: + +```{math} +:label: chow_eigen_decomp + +A B = B D_\lambda, \quad \text{or equivalently} \quad A = B D_\lambda B^{-1} +``` + +where $D_\lambda = \text{diag}(\lambda_1, \ldots, \lambda_p)$. + +Define canonical variables $z_t = B^{-1} y_t$. + +These satisfy the decoupled dynamics: + +```{math} +:label: chow_canonical_dynamics + +z_t = D_\lambda z_{t-1} + \varepsilon_t +``` + +where $\varepsilon_t = B^{-1} u_t$ has covariance matrix $W = B^{-1} V (B^{-1})^\top$. + +The autocovariance matrix of the canonical variables, denoted $\Gamma_k^*$, satisfies + +```{math} +:label: chow_canonical_autocov + +\Gamma_k^* = D_\lambda^k \Gamma_0^*, \quad k = 1, 2, 3, \ldots +``` + +and + +```{math} +:label: chow_gamma0_star + +\Gamma_0^* = \left( \frac{w_{ij}}{1 - \lambda_i \lambda_j} \right) +``` + +where $w_{ij}$ are elements of $W$. + +The autocovariance matrices of the original variables are then + +```{math} +:label: chow_autocov_eigen + +\Gamma_k = B \Gamma_k^* B^\top = B D_\lambda^k \Gamma_0^* B^\top, \quad k = 0, 1, 2, \ldots +``` + +The scalar autocovariance $\gamma_{ij,k} = \mathbb{E}[y_{it} y_{j,t-k}]$ is a *linear combination* of powers of the eigenvalues: + +```{math} +:label: chow_scalar_autocov + +\gamma_{ij,k} = \sum_m \sum_n b_{im} b_{jn} \gamma^*_{mn,0} \lambda_m^k = \sum_m d_{ij,m} \lambda_m^k +``` + +Compare this to the deterministic time path from initial condition $y_0$: + +```{math} +:label: chow_det_path + +y_{it} = \sum_j b_{ij} z_{j0} \lambda_j^t +``` + +Both the autocovariance function {eq}`chow_scalar_autocov` and the deterministic path {eq}`chow_det_path` are linear combinations of $\lambda_m^k$ (or $\lambda_j^t$). + +### Complex roots and damped oscillations + +When eigenvalues come in complex conjugate pairs $\lambda = r e^{\pm i\theta}$ with $r < 1$, their contribution to the autocovariance function is a **damped cosine**: + +```{math} +:label: chow_damped_cosine + +2 s r^k \cos(\theta k + \phi) +``` + +for appropriate amplitude $s$ and phase $\phi$ determined by the eigenvector loadings. + +In the deterministic model, such complex roots generate damped oscillatory time paths. + +In the stochastic model, they generate damped oscillatory autocovariance functions. + +It is in this sense that deterministic oscillations could be "maintained" in the stochastic model, but as we will see, the connection between eigenvalues and spectral peaks is more subtle than this suggests. + +## From autocovariances to spectra + +Chow’s key step is to translate the autocovariance sequence $\{\Gamma_k\}$ into a frequency-domain object. + +The **spectral density matrix** is the Fourier transform of $\Gamma_k$: + +```{math} +:label: chow_spectral_def + +F(\omega) := \frac{1}{2\pi} \sum_{k=-\infty}^{\infty} \Gamma_k e^{-i \omega k}, +\qquad \omega \in [0, \pi]. +``` + +For the VAR(1) system {eq}`chow_var1`, this sum has a closed form + +```{math} +:label: chow_spectral_closed + +F(\omega) += \frac{1}{2\pi} +\left(I - A e^{-i\omega}\right)^{-1} +V +\left(I - A^\top e^{i\omega}\right)^{-1}. +``` + +$F(\omega)$ tells us how much variation in $y_t$ is associated with cycles of (angular) frequency $\omega$. + +Higher frequencies correspond to rapid oscillations, meaning short cycles where the series completes many up-and-down movements per unit of time. + +Lower frequencies correspond to slower oscillations, meaning long cycles that unfold over extended periods. + +The corresponding cycle length (or period) is + +```{math} +:label: chow_period + +T(\omega) = \frac{2\pi}{\omega}. +``` + +Thus, a frequency of $\omega = \pi$ corresponds to the shortest possible cycle of $T = 2$ periods, while frequencies near zero correspond to very long cycles. + +When the spectral density $F(\omega)$ is concentrated at particular frequencies, it indicates that the time series exhibits pronounced cyclical behavior at those frequencies. + +The advanced lecture {doc}`advanced:estspec` explains how to estimate $F(\omega)$ from data. + +Here we focus on the model-implied spectrum. + +We saw earlier that acceleration creates complex eigenvalues, which enable oscillatory impulse responses. + +But do complex roots guarantee a spectral peak? + +Are they necessary for one? + +Chow provides precise answers for the Hansen-Samuelson model. + +## Spectral peaks in the Hansen-Samuelson model + +{cite:t}`Chow1968` provides a detailed spectral analysis of the Hansen-Samuelson multiplier-accelerator model, deriving exact conditions for when spectral peaks occur. + +The analysis reveals that in this specific model, complex roots are *necessary* for a peak, but as we will see later, this is not true in general. + +### The model as a first-order system + +The second-order Hansen-Samuelson equation can be written as a first-order system: + +```{math} +:label: chow_hs_system + +\begin{bmatrix} y_{1t} \\ y_{2t} \end{bmatrix} = +\begin{bmatrix} a_{11} & a_{12} \\ 1 & 0 \end{bmatrix} +\begin{bmatrix} y_{1,t-1} \\ y_{2,t-1} \end{bmatrix} + +\begin{bmatrix} u_{1t} \\ 0 \end{bmatrix} +``` + +where $y_{2t} = y_{1,t-1}$ is simply the lagged value of $y_{1t}$. + +This structure implies a special relationship among the autocovariances: + +```{math} +:label: chow_hs_autocov_relation + +\gamma_{11,k} = \gamma_{22,k} = \gamma_{12,k-1} = \gamma_{21,k+1} +``` + +Using the autocovariance recursion, Chow shows that this leads to the condition + +```{math} +:label: chow_hs_condition53 + +\gamma_{11,-1} = d_{11,1} \lambda_1^{-1} + d_{11,2} \lambda_2^{-1} = \gamma_{11,1} = d_{11,1} \lambda_1 + d_{11,2} \lambda_2 +``` + +which constrains the spectral density in a useful way. + +### The spectral density formula + +From equations {eq}`chow_scalar_autocov` and the scalar kernel $g_i(\omega) = (1 - \lambda_i^2)/(1 + \lambda_i^2 - 2\lambda_i \cos\omega)$, the spectral density of $y_{1t}$ is: + +```{math} +:label: chow_hs_spectral + +f_{11}(\omega) = d_{11,1} g_1(\omega) + d_{11,2} g_2(\omega) +``` + +which can be written in the combined form: + +```{math} +:label: chow_hs_spectral_combined + +f_{11}(\omega) = \frac{d_{11,1}(1 - \lambda_1^2)(1 + \lambda_2^2) + d_{11,2}(1 - \lambda_2^2)(1 + \lambda_1^2) - 2[d_{11,1}(1-\lambda_1^2)\lambda_2 + d_{11,2}(1-\lambda_2^2)\lambda_1]\cos\omega}{(1 + \lambda_1^2 - 2\lambda_1 \cos\omega)(1 + \lambda_2^2 - 2\lambda_2 \cos\omega)} +``` + +A key observation: due to condition {eq}`chow_hs_condition53`, the *numerator is not a function of $\cos\omega$*. + +Therefore, to find a maximum of $f_{11}(\omega)$, we need only find a minimum of the denominator. + +### Conditions for a spectral peak + +The first derivative of the denominator with respect to $\omega$ is: + +```{math} +:label: chow_hs_derivative + +2[(1 + \lambda_1^2)\lambda_2 + (1 + \lambda_2^2)\lambda_1] \sin\omega - 8\lambda_1 \lambda_2 \cos\omega \sin\omega +``` + +For $0 < \omega < \pi$, we have $\sin\omega > 0$, so the derivative equals zero if and only if: + +```{math} +:label: chow_hs_foc + +(1 + \lambda_1^2)\lambda_2 + (1 + \lambda_2^2)\lambda_1 = 4\lambda_1 \lambda_2 \cos\omega +``` + +For *complex conjugate roots* $\lambda_1 = r e^{i\theta}$, $\lambda_2 = r e^{-i\theta}$, substitution into {eq}`chow_hs_foc` gives: + +```{math} +:label: chow_hs_peak_condition + +\cos\omega = \frac{1 + r^2}{2r} \cos\theta +``` + +The second derivative confirms this is a maximum when $\omega < \frac{3\pi}{4}$. + +The necessary condition for a valid solution is: + +```{math} +:label: chow_hs_necessary + +-1 < \frac{1 + r^2}{2r} \cos\theta < 1 +``` + +We can interpret it as: +- When $r \approx 1$, the factor $(1+r^2)/2r \approx 1$, so $\omega \approx \theta$ +- When $r$ is small (e.g., 0.3 or 0.4), condition {eq}`chow_hs_necessary` can only be satisfied if $\cos\theta \approx 0$, meaning $\theta \approx \pi/2$ (cycles of approximately 4 periods) + +If $\theta = 54^\circ$ (corresponding to cycles of 6.67 periods) and $r = 0.4$, then $(1+r^2)/2r = 1.45$, giving $\cos\omega = 1.45 \times 0.588 = 0.85$, or $\omega = 31.5^\circ$, corresponding to cycles of 11.4 periods, which is much longer than the deterministic cycle. + +```{code-cell} ipython3 +def peak_condition_factor(r): + """Compute (1 + r^2) / (2r)""" + return (1 + r**2) / (2 * r) + +θ_deg = 54 +θ = np.deg2rad(θ_deg) +r_grid = np.linspace(0.3, 0.99, 100) + +# For each r, compute the implied peak frequency +ω_peak = [] +for r in r_grid: + factor = peak_condition_factor(r) + cos_ω = factor * np.cos(θ) + if -1 < cos_ω < 1: + ω_peak.append(np.arccos(cos_ω)) + else: + ω_peak.append(np.nan) + +ω_peak = np.array(ω_peak) +period_peak = 2 * np.pi / ω_peak + +fig, axes = plt.subplots(1, 2, figsize=(12, 4)) + +axes[0].plot(r_grid, np.rad2deg(ω_peak), lw=2) +axes[0].axhline(θ_deg, ls='--', lw=1.0, color='gray', + label=rf'$\theta = {θ_deg}°$') +axes[0].set_xlabel('eigenvalue modulus $r$') +axes[0].set_ylabel(r'peak frequency $\omega$ (degrees)') +axes[0].legend(frameon=False) + +axes[1].plot(r_grid, period_peak, lw=2) +axes[1].axhline(360/θ_deg, ls='--', lw=1.0, color='gray', + label=rf'deterministic period = {360/θ_deg:.1f}') +axes[1].set_xlabel('eigenvalue modulus $r$') +axes[1].set_ylabel('peak period') +axes[1].legend(frameon=False) + +plt.tight_layout() +plt.show() + +r_example = 0.4 +factor = peak_condition_factor(r_example) +cos_ω = factor * np.cos(θ) +ω_example = np.arccos(cos_ω) +print(f"Chow's example: r = {r_example}, θ = {θ_deg}°") +print(f" cos(ω) = {cos_ω:.3f}") +print(f" ω = {np.rad2deg(ω_example):.1f}°") +print(f" Peak period = {360/np.rad2deg(ω_example):.1f}") +``` + +As $r \to 1$, the peak frequency converges to $\theta$. + +For smaller $r$, the peak frequency can differ substantially from the deterministic oscillation frequency. + +### Real positive roots cannot produce peaks + +For *real and positive roots* $\lambda_1, \lambda_2 > 0$, the first-order condition {eq}`chow_hs_foc` cannot be satisfied. + +To see why, recall that a spectral peak at an interior frequency $\omega \in (0, \pi)$ requires + +```{math} +\cos\omega = \frac{(1 + \lambda_1^2)\lambda_2 + (1 + \lambda_2^2)\lambda_1}{4\lambda_1 \lambda_2}. +``` + +For this to have a solution, we need the right-hand side to lie in $[-1, 1]$. + +But for positive $\lambda_1, \lambda_2$, the numerator exceeds $4\lambda_1\lambda_2$: + +```{math} +:label: chow_hs_real_proof + +(1 + \lambda_1^2)\lambda_2 + (1 + \lambda_2^2)\lambda_1 - 4\lambda_1\lambda_2 = \lambda_1(1-\lambda_2)^2 + \lambda_2(1-\lambda_1)^2. +``` + +The right-hand side is a sum of two non-negative terms (each is a positive number times a square). + +It equals zero only if both $\lambda_1 = 1$ and $\lambda_2 = 1$, which violates the stability condition $|\lambda_i| < 1$. + +For any stable system with real positive roots, this expression is strictly positive, so + +```{math} +:label: chow_hs_real_impossible + +\cos\omega = \frac{(1 + \lambda_1^2)\lambda_2 + (1 + \lambda_2^2)\lambda_1}{4\lambda_1 \lambda_2} > 1, +``` + +which is impossible. + +This is a key result: in the Hansen-Samuelson model, *complex roots are necessary* for a spectral peak at interior frequencies. + +The following figure illustrates the difference in spectra between a case with complex roots and a case with real roots + +```{code-cell} ipython3 +ω_grid = np.linspace(1e-3, np.pi - 1e-3, 800) +V_hs = np.array([[1.0, 0.0], [0.0, 0.0]]) # shock only in first equation + +# Case 1: Complex roots (c=0.6, v=0.8) +c_complex, v_complex = 0.6, 0.8 +A_complex = samuelson_transition(c_complex, v_complex) +eig_complex = np.linalg.eigvals(A_complex) + +# Case 2: Real roots (c=0.8, v=0.1) +c_real, v_real = 0.8, 0.1 +A_real = samuelson_transition(c_real, v_real) +eig_real = np.linalg.eigvals(A_real) + +print( + f"Complex case (c={c_complex}, v={v_complex}): eigenvalues = {eig_complex}") +print( + f"Real case (c={c_real}, v={v_real}): eigenvalues = {eig_real}") + +F_complex = spectral_density_var1(A_complex, V_hs, ω_grid) +F_real = spectral_density_var1(A_real, V_hs, ω_grid) + +f11_complex = np.real(F_complex[:, 0, 0]) +f11_real = np.real(F_real[:, 0, 0]) + +fig, ax = plt.subplots() +ax.plot(ω_grid / np.pi, f11_complex / np.max(f11_complex), lw=2, + label=fr'complex roots ($c={c_complex}, v={v_complex}$)') +ax.plot(ω_grid / np.pi, f11_real / np.max(f11_real), lw=2, + label=fr'real roots ($c={c_real}, v={v_real}$)') +ax.set_xlabel(r'frequency $\omega/\pi$') +ax.set_ylabel('normalized spectrum') +ax.legend(frameon=False) +plt.show() +``` + +With complex roots, the spectrum has a clear interior peak. + +With real roots, the spectrum is monotonically decreasing and no interior peak is possible. + +## Real roots can produce peaks in general models + +While real positive roots cannot produce spectral peaks in the Hansen-Samuelson model, {cite:t}`Chow1968` emphasizes that this is *not true in general*. + +In multivariate systems, the spectral density of a linear combination of variables can have interior peaks even when all eigenvalues are real and positive. + +### Example + +Chow constructs the following explicit example with two real positive eigenvalues: + +```{math} +:label: chow_real_roots_example + +\lambda_1 = 0.1, \quad \lambda_2 = 0.9 +``` + +```{math} +:label: chow_real_roots_W + +w_{11} = w_{22} = 1, \quad w_{12} = 0.8 +``` + +```{math} +:label: chow_real_roots_b + +b_{m1} = 1, \quad b_{m2} = -0.01 +``` + +The spectral density of the linear combination $x_t = b_m^\top y_t$ is: + +```{math} +:label: chow_real_roots_spectrum + +f_{mm}(\omega) = \frac{0.9913}{1.01 - 0.2\cos\omega} - \frac{0.001570}{1.81 - 1.8\cos\omega} +``` + +Chow tabulates the values: + +| $\omega$ | $0$ | $\pi/8$ | $2\pi/8$ | $3\pi/8$ | $4\pi/8$ | $5\pi/8$ | $6\pi/8$ | $7\pi/8$ | $\pi$ | +|----------|-----|---------|----------|----------|----------|----------|----------|----------|-------| +| $f_{mm}(\omega)$ | 1.067 | 1.183 | 1.191 | 1.138 | 1.061 | 0.981 | 0.912 | 0.860 | 0.829 | + +The peak at $\omega$ slightly below $\pi/8$ (corresponding to periods around 11) is "quite pronounced." + +In the following figure, we reproduce this table, but with Python, we can plot a finer grid to find the peak more accurately + +```{code-cell} ipython3 +λ1, λ2 = 0.1, 0.9 +w11, w22, w12 = 1.0, 1.0, 0.8 +bm1, bm2 = 1.0, -0.01 + +# Construct the system +A_chow_ex = np.diag([λ1, λ2]) + +# W is the canonical shock covariance; we need V = B W B^T +# For diagonal A with distinct eigenvalues, B = I, so V = W +V_chow_ex = np.array([[w11, w12], [w12, w22]]) +b_chow_ex = np.array([bm1, bm2]) + +# Chow's formula +def chow_spectrum_formula(ω): + term1 = 0.9913 / (1.01 - 0.2 * np.cos(ω)) + term2 = 0.001570 / (1.81 - 1.8 * np.cos(ω)) + return term1 - term2 + +# Compute via formula and via our general method +ω_table = np.array([0, np.pi/8, 2*np.pi/8, 3*np.pi/8, 4*np.pi/8, + 5*np.pi/8, 6*np.pi/8, 7*np.pi/8, np.pi]) +f_formula = np.array([chow_spectrum_formula(ω) for ω in ω_table]) + +# General method +ω_grid_fine = np.linspace(1e-4, np.pi, 1000) +F_chow_ex = spectral_density_var1(A_chow_ex, V_chow_ex, ω_grid_fine) +f_general = spectrum_of_linear_combination(F_chow_ex, b_chow_ex) + +# Normalize to match Chow's table scale +scale = f_formula[0] / spectrum_of_linear_combination( + spectral_density_var1( + A_chow_ex, V_chow_ex, np.array([0.0])), b_chow_ex)[0] + +print("Chow's Table (equation 67):") +print("ω/π: ", " ".join([f"{ω/np.pi:.3f}" for ω in ω_table])) +print("f_mm(ω): ", " ".join([f"{f:.3f}" for f in f_formula])) + +fig, ax = plt.subplots(figsize=(9, 4)) +ax.plot(ω_grid_fine / np.pi, f_general * scale, lw=2, + label='spectrum') +ax.scatter(ω_table / np.pi, f_formula, s=50, zorder=3, + label="Chow's table values") + +# Mark the peak +i_peak = np.argmax(f_general) +ω_peak = ω_grid_fine[i_peak] +ax.axvline(ω_peak / np.pi, ls='--', lw=1.0, color='gray', alpha=0.7) +ax.set_xlabel(r'frequency $\omega/\pi$') +ax.set_ylabel(r'$f_{mm}(\omega)$') +ax.legend(frameon=False) +plt.show() + +print(f"\nPeak at ω/π ≈ {ω_peak/np.pi:.3f}, period ≈ {2*np.pi/ω_peak:.1f}") +``` + +The peak appears at $\omega/\pi \approx 0.10$, which corresponds to a cycle length of approximately 20 periods, again much longer than the deterministic cycles implied by the eigenvalues. + +### The Slutsky connection + +Chow connects this result to Slutsky's {cite}`slutsky:1927` finding that moving averages of a random series have recurrent cycles. + +The VAR(1) model can be written as an infinite moving average: + +```{math} +:label: chow_ma_rep + +y_t = u_t + A u_{t-1} + A^2 u_{t-2} + \cdots +``` + +This amounts to taking an infinite moving average of the random vectors $u_t$ with "geometrically declining" weights $A^0, A^1, A^2, \ldots$ + +For a scalar process with $0 < \lambda < 1$, no distinct cycles can emerge. + +But for a matrix $A$ with real roots between 0 and 1, cycles *can* emerge in linear combinations of the variables. + +As Chow puts it: "When neither of two (canonical) variables has distinct cycles... a linear combination can have a peak in its spectral density." + +### The general lesson + +The examples above illustrate the following central points: + +1. In the *Hansen-Samuelson model specifically*, complex roots are necessary for a spectral peak +2. But in *general multivariate systems*, complex roots are neither necessary nor sufficient +3. The full spectral shape depends on: + - The eigenvalues of $A$ + - The shock covariance structure $V$ + - How the observable of interest loads on the eigenmodes (the vector $b$) + +## A calibrated model in the frequency domain + +{cite:t}`ChowLevitan1969` use the frequency-domain objects from {cite:t}`Chow1968` to study a calibrated annual macroeconometric model. + +They work with five annual aggregates: + +- $y_1 = C$ (consumption), +- $y_2 = I_1$ (equipment plus inventories), +- $y_3 = I_2$ (construction), +- $y_4 = R_a$ (long rate), +- $y_5 = Y_1 = C + I_1 + I_2$ (private-domestic GNP), + +and add $y_6 = y_{1,t-1}$ to rewrite the original system in first-order form. + +Throughout this section, frequency is measured in cycles per year, $f = \omega/2\pi \in [0, 1/2]$. + +Following the paper, we normalize each spectrum to have area 1 over $[0, 1/2]$ so plots compare shape rather than scale. + +Our goal is to reconstruct the transition matrix $A$ and then compute and interpret the model-implied spectra, gains/coherences, and phase differences. + +### The cycle subsystem + +The paper starts from a reduced form with exogenous inputs, + +```{math} +:label: chow_reduced_full + +y_t = A y_{t-1} + C x_t + u_t. +``` + +To study cycles, they remove the deterministic component attributable to $x_t$ and focus on the zero-mean subsystem + +```{math} +:label: chow_cycle_system + +y_t = A y_{t-1} + u_t. +``` + +For second moments, the only additional ingredient is the covariance matrix $V = \mathbb E[u_t u_t^\top]$. + +Chow and Levitan compute it from structural parameters via + +```{math} +:label: chow_v_from_structural + +V = M^{-1} \Sigma (M^{-1})^\top +``` + +where $\Sigma$ is the covariance of structural residuals and $M$ is the matrix of contemporaneous structural coefficients. + +Here we take $A$ and $V$ as given and ask what they imply for spectra and cross-spectra. + +The $6 \times 6$ reduced-form shock covariance matrix $V$ (scaled by $10^{-7}$) reported by Chow and Levitan is: + +```{math} +:label: chow_V_matrix + +V = \begin{bmatrix} +8.250 & 7.290 & 2.137 & 2.277 & 17.68 & 0 \\ +7.290 & 7.135 & 1.992 & 2.165 & 16.42 & 0 \\ +2.137 & 1.992 & 0.618 & 0.451 & 4.746 & 0 \\ +2.277 & 2.165 & 0.451 & 1.511 & 4.895 & 0 \\ +17.68 & 16.42 & 4.746 & 4.895 & 38.84 & 0 \\ +0 & 0 & 0 & 0 & 0 & 0 +\end{bmatrix}. +``` + +The sixth row and column are zeros because $y_6$ is an identity (lagged $y_1$). + +The transition matrix $A$ has six characteristic roots: + +```{math} +:label: chow_eigenvalues + +\begin{aligned} +\lambda_1 &= 0.9999725, \quad \lambda_2 = 0.9999064, \quad \lambda_3 = 0.4838, \\ +\lambda_4 &= 0.0761 + 0.1125i, \quad \lambda_5 = 0.0761 - 0.1125i, \quad \lambda_6 = -0.00004142. +\end{aligned} +``` + +Two roots are near unity because two structural equations are in first differences. + +One root ($\lambda_6$) is theoretically zero because of the identity $y_5 = y_1 + y_2 + y_3$. + +The complex conjugate pair $\lambda_{4,5}$ has modulus $|\lambda_4| = \sqrt{0.0761^2 + 0.1125^2} \approx 0.136$. + +The right eigenvector matrix $B$ (columns are eigenvectors corresponding to $\lambda_1, \ldots, \lambda_6$): + +```{math} +:label: chow_B_matrix + +B = \begin{bmatrix} +-0.008 & 1.143 & 0.320 & 0.283+0.581i & 0.283-0.581i & 0.000 \\ +-0.000 & 0.013 & -0.586 & -2.151+0.742i & -2.151-0.742i & 2.241 \\ +-0.001 & 0.078 & 0.889 & -0.215+0.135i & -0.215-0.135i & 0.270 \\ +1.024 & 0.271 & 0.069 & -0.231+0.163i & -0.231-0.163i & 0.307 \\ +-0.009 & 1.235 & 0.623 & -2.082+1.468i & -2.082-1.468i & 2.766 \\ +-0.008 & 1.143 & 0.662 & 4.772+0.714i & 4.772-0.714i & -4.399 +\end{bmatrix}. +``` + +Together, $V$, $\{\lambda_i\}$, and $B$ are sufficient to compute all spectral and cross-spectral densities. + +### Reconstructing $A$ and computing $F(\omega)$ + +The paper reports $(\lambda, B, V)$, which is enough to reconstruct +$A = B \, \mathrm{diag}(\lambda_1,\dots,\lambda_6)\, B^{-1}$ and then compute the model-implied spectral objects. + +```{code-cell} ipython3 +λ = np.array([ + 0.9999725, 0.9999064, 0.4838, + 0.0761 + 0.1125j, 0.0761 - 0.1125j, -0.00004142 +], dtype=complex) + +B = np.array([ + [-0.008, 1.143, 0.320, 0.283+0.581j, 0.283-0.581j, 0.000], + [-0.000, 0.013, -0.586, -2.151+0.742j, -2.151-0.742j, 2.241], + [-0.001, 0.078, 0.889, -0.215+0.135j, -0.215-0.135j, 0.270], + [1.024, 0.271, 0.069, -0.231+0.163j, -0.231-0.163j, 0.307], + [-0.009, 1.235, 0.623, -2.082+1.468j, -2.082-1.468j, 2.766], + [-0.008, 1.143, 0.662, 4.772+0.714j, 4.772-0.714j, -4.399] +], dtype=complex) + +V = np.array([ + [8.250, 7.290, 2.137, 2.277, 17.68, 0], + [7.290, 7.135, 1.992, 2.165, 16.42, 0], + [2.137, 1.992, 0.618, 0.451, 4.746, 0], + [2.277, 2.165, 0.451, 1.511, 4.895, 0], + [17.68, 16.42, 4.746, 4.895, 38.84, 0], + [0, 0, 0, 0, 0, 0] +]) * 1e-7 + +D_λ = np.diag(λ) +A_chow = B @ D_λ @ np.linalg.inv(B) +A_chow = np.real(A_chow) +print("eigenvalues of reconstructed A:") +print(np.linalg.eigvals(A_chow).round(6)) +``` + +### Canonical coordinates + +Chow and Levitan's canonical transformation uses $z_t = B^{-1} y_t$, giving dynamics $z_t = D_\lambda z_{t-1} + e_t$. + +Accordingly, the canonical shock covariance is + +```{math} +W = B^{-1} V (B^{-1})^\top. +``` + +```{code-cell} ipython3 +B_inv = np.linalg.inv(B) +W = B_inv @ V @ B_inv.T +print("diagonal of W:") +print(np.diag(W).round(10)) +``` + +Chow and Levitan derive the following closed-form formula for the spectral density matrix: + +```{math} +:label: chow_spectral_eigen + +F(\omega) += B \left[ \frac{w_{ij}}{(1 - \lambda_i e^{-i\omega})(1 - \lambda_j e^{i\omega})} \right] B^\top, +``` + +where $w_{ij}$ are elements of the canonical shock covariance $W$. + +```{code-cell} ipython3 +def spectral_density_chow(λ, B, W, ω_grid): + """Spectral density via Chow's eigendecomposition formula.""" + p = len(λ) + F = np.zeros((len(ω_grid), p, p), dtype=complex) + for k, ω in enumerate(ω_grid): + F_star = np.zeros((p, p), dtype=complex) + for i in range(p): + for j in range(p): + denom = (1 - λ[i] * np.exp(-1j * ω)) \ + * (1 - λ[j] * np.exp(1j * ω)) + F_star[i, j] = W[i, j] / denom + F[k] = B @ F_star @ B.T + return F / (2 * np.pi) + +freq = np.linspace(1e-4, 0.5, 5000) # cycles/year in [0, 1/2] +ω_grid = 2 * np.pi * freq # radians in [0, π] +F_chow = spectral_density_chow(λ, B, W, ω_grid) +``` + +Let's plot the univariate spectra of consumption ($y_1$) and equipment plus inventories ($y_2$) + +```{code-cell} ipython3 +variable_names = ['$C$', '$I_1$', '$I_2$', '$R_a$', '$Y_1$'] +freq_ticks = [1/18, 1/9, 1/6, 1/4, 1/3, 1/2] +freq_labels = [r'$\frac{1}{18}$', r'$\frac{1}{9}$', r'$\frac{1}{6}$', + r'$\frac{1}{4}$', r'$\frac{1}{3}$', r'$\frac{1}{2}$'] + +def paper_frequency_axis(ax): + ax.set_xlim([0.0, 0.5]) + ax.set_xticks(freq_ticks) + ax.set_xticklabels(freq_labels) + ax.set_xlabel(r'frequency $\omega/2\pi$') + +# Normalized spectra (areas set to 1) +S = np.real(np.diagonal(F_chow, axis1=1, axis2=2))[:, :5] +df = np.diff(freq) +areas = np.sum(0.5 * (S[1:] + S[:-1]) * df[:, None], axis=0) +S_norm = S / areas +mask = freq >= 0.0 + +fig, axes = plt.subplots(1, 2, figsize=(10, 6)) + +# Figure I.1: consumption (log scale) +axes[0].plot(freq[mask], S_norm[mask, 0], lw=2) +axes[0].set_yscale('log') +paper_frequency_axis(axes[0]) +axes[0].set_ylabel(r'normalized $f_{11}(\omega)$') + +# Figure I.2: equipment + inventories (log scale) +axes[1].plot(freq[mask], S_norm[mask, 1], lw=2) +axes[1].set_yscale('log') +paper_frequency_axis(axes[1]) +axes[1].set_ylabel(r'normalized $f_{22}(\omega)$') + +plt.tight_layout() +plt.show() + +i_peak = np.argmax(S_norm[mask, 1]) +f_peak = freq[mask][i_peak] +``` + +The left panel corresponds to consumption and declines monotonically with frequency. + +It illustrates Granger's "typical spectral shape" for macroeconomic time series. + +The right panel corresponds to equipment plus inventories and shows the clearest (but still very flat) interior-frequency bump. + +Chow and Levitan associate the dominance of very low frequencies in both plots with strong persistence and long-run movements. + +Very large low-frequency power can arise from eigenvalues extremely close to one, which occurs mechanically when some equations are written in first differences. + +Local peaks are not automatic: complex roots may have small modulus, and multivariate interactions can generate peaks even when all roots are real. + +The interior bump in the right panel corresponds to cycles of roughly three years, with the spectrum nearly flat over cycles between about two and four years. + +(This discussion follows Section II of {cite}`ChowLevitan1969`.) + +### How variables move together across frequencies + +Beyond univariate spectra, we can ask how pairs of variables covary at each frequency. + +The **cross-spectrum** $f_{ij}(\omega) = c_{ij}(\omega) - i \cdot q_{ij}(\omega)$ decomposes into the cospectrum $c_{ij}$ and the quadrature spectrum $q_{ij}$. + +The **cross-amplitude** is $g_{ij}(\omega) = |f_{ij}(\omega)| = \sqrt{c_{ij}^2 + q_{ij}^2}$. + +The **squared coherence** measures linear association at frequency $\omega$: + +```{math} +:label: chow_coherence + +R^2_{ij}(\omega) = \frac{|f_{ij}(\omega)|^2}{f_{ii}(\omega) f_{jj}(\omega)} \in [0, 1]. +``` + +Coherence measures how much of the variance of $y_i$ at frequency $\omega$ can be "explained" by $y_j$ at the same frequency. + +High coherence means the two series move together tightly at that frequency. + +The **gain** is the frequency-response coefficient when regressing $y_i$ on $y_j$: + +```{math} +:label: chow_gain + +G_{ij}(\omega) = \frac{|f_{ij}(\omega)|}{f_{jj}(\omega)}. +``` + +It measures how much $y_i$ responds to a unit change in $y_j$ at frequency $\omega$. + +For instance, a gain of 0.9 at low frequencies means long-cycle movements in $y_j$ translate almost one-for-one to $y_i$, and a gain of 0.3 at high frequencies means short-cycle movements are dampened. + +The **phase** captures lead-lag relationships (in radians): + +```{math} +:label: chow_phase + +\Delta_{ij}(\omega) = \tan^{-1}\left( \frac{q_{ij}(\omega)}{c_{ij}(\omega)} \right). +``` + +```{code-cell} ipython3 +def cross_spectral_measures(F, i, j): + """Compute coherence, gain (y_i on y_j), and phase between variables i and j.""" + f_ij = F[:, i, j] + f_ii, f_jj = np.real(F[:, i, i]), np.real(F[:, j, j]) + g_ij = np.abs(f_ij) + coherence = (g_ij**2) / (f_ii * f_jj) + gain = g_ij / f_jj + phase = np.arctan2(-np.imag(f_ij), np.real(f_ij)) + return coherence, gain, phase +``` + +We now plot gain and coherence as in Figures II.1–II.4 of {cite}`ChowLevitan1969`. + +```{code-cell} ipython3 +gnp_idx = 4 + +fig, axes = plt.subplots(1, 2, figsize=(8, 6)) + +for idx, var_idx in enumerate([0, 1]): + coherence, gain, phase = cross_spectral_measures(F_chow, var_idx, gnp_idx) + ax = axes[idx] + + ax.plot(freq[mask], coherence[mask], + lw=2, label=rf'$R^2_{{{var_idx+1}5}}(\omega)$') + ax.plot(freq[mask], gain[mask], + lw=2, label=rf'$G_{{{var_idx+1}5}}(\omega)$') + paper_frequency_axis(ax) + ax.set_ylim([0, 1.0]) + ax.set_ylabel('gain, coherence') + ax.legend(frameon=False, loc='best') + +plt.tight_layout() +plt.show() +``` + +The gain and coherence patterns differ across components (Figures II.1–II.2 of {cite}`ChowLevitan1969`): + +- Consumption vs private-domestic GNP (left panel): + - Gain is about 0.9 at very low frequencies but falls below 0.4 for cycles shorter than four years. + - This is evidence that short-cycle income movements translate less into consumption than long-cycle movements, consistent with permanent-income interpretations. + - Coherence remains high throughout. +- Equipment plus inventories vs private-domestic GNP (right panel): + - Gain *rises* with frequency, exceeding 0.5 for short cycles. + - This is the frequency-domain signature of acceleration and volatile short-run inventory movements. + +```{code-cell} ipython3 +fig, axes = plt.subplots(1, 2, figsize=(8, 6)) + +for idx, var_idx in enumerate([2, 3]): + coherence, gain, phase = cross_spectral_measures(F_chow, var_idx, gnp_idx) + ax = axes[idx] + + ax.plot(freq[mask], coherence[mask], + lw=2, label=rf'$R^2_{{{var_idx+3}5}}(\omega)$') + ax.plot(freq[mask], gain[mask], + lw=2, label=rf'$G_{{{var_idx+3}5}}(\omega)$') + paper_frequency_axis(ax) + ax.set_ylim([0, 1.0]) + ax.set_ylabel('gain, coherence') + ax.legend(frameon=False, loc='best') + +plt.tight_layout() +plt.show() +``` + +- New construction vs private-domestic GNP (left panel): + - Gain peaks at medium cycle lengths (around 0.1 for short cycles). + - Coherence for both investment series stays fairly high across frequencies. +- Long-bond yield vs private-domestic GNP (right panel): + - Gain varies less across frequencies than real activity series. + - Coherence with output is comparatively low at business-cycle frequencies, making it hard to explain interest-rate movements by inverting a money-demand equation. + + +### Lead-lag relationships + +The phase tells us which variable leads at each frequency. + +Positive phase means output leads the component; negative phase means the component leads output. + +```{code-cell} ipython3 +fig, ax = plt.subplots() + +labels = [r'$\psi_{15}(\omega)/2\pi$', r'$\psi_{25}(\omega)/2\pi$', + r'$\psi_{35}(\omega)/2\pi$', r'$\psi_{45}(\omega)/2\pi$'] + +for var_idx in range(4): + coherence, gain, phase = cross_spectral_measures(F_chow, var_idx, gnp_idx) + phase_cycles = phase / (2 * np.pi) + ax.plot(freq[mask], phase_cycles[mask], lw=2, label=labels[var_idx]) + +ax.axhline(0, lw=0.8) +paper_frequency_axis(ax) +ax.set_ylabel('phase difference in cycles') +ax.set_ylim([-0.25, 0.25]) +ax.set_yticks(np.arange(-0.25, 0.3, 0.05), minor=True) +ax.legend(frameon=False) +plt.tight_layout() +plt.show() +``` + +The phase relationships reveal that: + +- Output leads consumption by a small fraction of a cycle (about 0.06 cycles at a 6-year period, 0.04 cycles at a 3-year period). +- Equipment plus inventories tends to lead output (by about 0.07 cycles at a 6-year period, 0.03 cycles at a 3-year period). +- New construction leads at low frequencies and is close to coincident at higher frequencies. +- The bond yield lags output slightly, remaining close to coincident in timing. + +These implied leads and lags are broadly consistent with turning-point timing summaries reported elsewhere, and simulations of the same model deliver similar lead–lag ordering at turning points (Figure III of {cite}`ChowLevitan1969`). + +### Building blocks of spectral shape + +Each eigenvalue contributes a characteristic spectral shape through the *scalar kernel* + +```{math} +:label: chow_scalar_kernel + +g_i(\omega) = \frac{1 - |\lambda_i|^2}{|1 - \lambda_i e^{-i\omega}|^2} = \frac{1 - |\lambda_i|^2}{1 + |\lambda_i|^2 - 2 \text{Re}(\lambda_i) \cos\omega + 2 \text{Im}(\lambda_i) \sin\omega}. +``` + +For real $\lambda_i$, this simplifies to + +```{math} +g_i(\omega) = \frac{1 - \lambda_i^2}{1 + \lambda_i^2 - 2\lambda_i \cos\omega}. +``` + +Each observable spectral density is a linear combination of these kernels (plus cross-terms). + +Below, we plot the scalar kernels for each eigenvalue to see how they shape the overall spectra + +```{code-cell} ipython3 +def scalar_kernel(λ_i, ω_grid): + """scalar spectral kernel g_i(ω).""" + λ_i = complex(λ_i) + mod_sq = np.abs(λ_i)**2 + return np.array( + [(1 - mod_sq) / np.abs(1 - λ_i * np.exp(-1j * ω))**2 + for ω in ω_grid]) + +fig, ax = plt.subplots(figsize=(10, 5)) +for i, λ_i in enumerate(λ): + if np.abs(λ_i) > 0.01: + g_i = scalar_kernel(λ_i, ω_grid) + label = f'$\\lambda_{i+1}$ = {λ_i:.4f}' \ + if np.isreal(λ_i) else f'$\\lambda_{i+1}$ = {λ_i:.3f}' + ax.semilogy(freq, g_i, label=label, lw=2) + +ax.set_xlabel(r'frequency $\omega/2\pi$') +ax.set_ylabel('$g_i(\\omega)$') +ax.set_xlim([1/18, 0.5]) +ax.set_xticks(freq_ticks) +ax.set_xticklabels(freq_labels) +ax.legend(frameon=False) +plt.show() +``` + +The figure reveals how eigenvalue magnitude shapes spectral contributions: + +- *Near-unit eigenvalues* ($\lambda_1, \lambda_2 \approx 1$) produce kernels sharply peaked at low frequencies as these drive the strong low-frequency power seen in the spectra above. +- *The moderate eigenvalue* ($\lambda_3 \approx 0.48$) contributes a flatter component that spreads power more evenly across frequencies. +- *The complex pair* ($\lambda_{4,5}$) has such small modulus ($|\lambda_{4,5}| \approx 0.136$) that its kernel is nearly flat, which is too weak to generate a pronounced interior peak. + +This decomposition explains why the spectra look the way they do: the near-unit eigenvalues dominate, concentrating variance at very low frequencies. + +The complex pair, despite enabling oscillatory dynamics in principle, has insufficient modulus to produce a visible spectral peak. + +## Summary + +{cite:t}`Chow1968` draws several conclusions that remain relevant for understanding business cycles. + +The acceleration principle receives strong empirical support: the negative coefficient on lagged output in investment equations is a robust finding across datasets. + +The relationship between eigenvalues and spectral peaks is more subtle than it first appears: + +- Complex roots guarantee oscillatory autocovariances, but they are neither necessary nor sufficient for a pronounced spectral peak. + +- In the Hansen–Samuelson model specifically, complex roots *are* necessary for a peak. + +- But in general multivariate systems, even real roots can produce peaks through the interaction of shocks and eigenvector loadings. + +{cite:t}`ChowLevitan1969` demonstrate what these objects look like in a calibrated system: strong low-frequency power from near-unit eigenvalues, frequency-dependent gains and coherences, and lead–lag relations that vary with cycle length. + +Their results are consistent with Granger's "typical spectral shape" for economic time series. + +That is a monotonically decreasing function of frequency, driven by the near-unit eigenvalues that arise when some equations are specified in first differences. + +Understanding whether this shape reflects the true data-generating process requires analyzing the spectral densities implied by structural econometric models. + +## Exercises + +```{exercise} +:label: chow_cycles_ex1 + +Plot impulse responses and spectra side-by-side for several values of the accelerator $v$ in the Hansen-Samuelson model, showing how acceleration strength affects both the time-domain and frequency-domain signatures. + +Use the same $v$ values as in the main text: $v \in \{0.2, 0.4, 0.6, 0.8, 0.95\}$ with $c = 0.6$. +``` + +```{solution-start} chow_cycles_ex1 +:class: dropdown +``` + +Here is one solution: + +```{code-cell} ipython3 +v_grid_ex1 = [0.2, 0.4, 0.6, 0.8, 0.95] +c_ex1 = 0.6 +freq_ex1 = np.linspace(1e-4, 0.5, 2000) +ω_grid_ex1 = 2 * np.pi * freq_ex1 +V_ex1 = np.array([[1.0, 0.0], [0.0, 0.0]]) +T_irf_ex1 = 40 + +fig, axes = plt.subplots(1, 2, figsize=(12, 5)) + +for v in v_grid_ex1: + A = samuelson_transition(c_ex1, v) + + # impulse response (left panel) + s = np.array([1.0, 0.0]) + irf = np.empty(T_irf_ex1 + 1) + for t in range(T_irf_ex1 + 1): + irf[t] = s[0] + s = A @ s + axes[0].plot(range(T_irf_ex1 + 1), irf, lw=2, label=f'$v={v}$') + + # spectrum (right panel) + F = spectral_density_var1(A, V_ex1, ω_grid_ex1) + f11 = np.real(F[:, 0, 0]) + df = np.diff(freq_ex1) + area = np.sum(0.5 * (f11[1:] + f11[:-1]) * df) + f11_norm = f11 / area + axes[1].plot(freq_ex1, f11_norm, lw=2, label=f'$v={v}$') + +axes[0].axhline(0, lw=0.8, color='gray') +axes[0].set_xlabel('time') +axes[0].set_ylabel(r'$Y_t$') +axes[0].legend(frameon=False) + +axes[1].set_xlabel(r'frequency $\omega/2\pi$') +axes[1].set_ylabel('normalized spectrum') +axes[1].set_xlim([0, 0.5]) +axes[1].set_yscale('log') +axes[1].legend(frameon=False) + +plt.tight_layout() +plt.show() +``` + +As $v$ increases, eigenvalues approach the unit circle: oscillations become more persistent in the time domain (left), and the spectral peak becomes sharper in the frequency domain (right). + +Complex roots produce a pronounced peak at interior frequencies—the spectral signature of business cycles. + +```{solution-end} +``` + +```{exercise} +:label: chow_cycles_ex2 + +Verify spectral peak condition {eq}`chow_hs_peak_condition` numerically for the Hansen-Samuelson model. + +1. For a range of eigenvalue moduli $r \in [0.3, 0.99]$ with fixed $\theta = 60°$, compute: + - The theoretical peak frequency from formula: $\cos\omega = \frac{1+r^2}{2r}\cos\theta$ + - The actual peak frequency by numerically maximizing the spectral density +2. Plot both on the same graph and verify they match. +3. Identify the range of $r$ for which no valid peak exists (when the condition {eq}`chow_hs_necessary` is violated). +``` + +```{solution-start} chow_cycles_ex2 +:class: dropdown +``` + +Here is one solution: + +```{code-cell} ipython3 +θ_ex = np.pi / 3 # 60 degrees +r_grid = np.linspace(0.3, 0.99, 50) +ω_grid_ex = np.linspace(1e-3, np.pi - 1e-3, 1000) +V_hs_ex = np.array([[1.0, 0.0], [0.0, 0.0]]) + +ω_theory = [] +ω_numerical = [] + +for r in r_grid: + # Theoretical peak + factor = (1 + r**2) / (2 * r) + cos_ω = factor * np.cos(θ_ex) + if -1 < cos_ω < 1: + ω_theory.append(np.arccos(cos_ω)) + else: + ω_theory.append(np.nan) + + # Numerical peak from spectral density + # Construct Hansen-Samuelson with eigenvalues r*exp(+-iθ) + # This corresponds to c + v = 2r*cos(θ), v = r^2 + v = r**2 + c = 2 * r * np.cos(θ_ex) - v + A_ex = samuelson_transition(c, v) + F_ex = spectral_density_var1(A_ex, V_hs_ex, ω_grid_ex) + f11 = np.real(F_ex[:, 0, 0]) + i_max = np.argmax(f11) + + # Only count as a peak if it's not at the boundary + if 5 < i_max < len(ω_grid_ex) - 5: + ω_numerical.append(ω_grid_ex[i_max]) + else: + ω_numerical.append(np.nan) + +ω_theory = np.array(ω_theory) +ω_numerical = np.array(ω_numerical) + +fig, axes = plt.subplots(1, 2, figsize=(12, 4)) + +# Plot peak frequencies +axes[0].plot(r_grid, ω_theory / np.pi, lw=2, label="Chow's formula") +axes[0].plot(r_grid, ω_numerical / np.pi, 'o', markersize=4, label='numerical') +axes[0].axhline(θ_ex / np.pi, ls='--', lw=1.0, color='gray', label=r'$\theta/\pi$') +axes[0].set_xlabel('eigenvalue modulus $r$') +axes[0].set_ylabel(r'peak frequency $\omega^*/\pi$') +axes[0].legend(frameon=False) + +# Plot the factor (1+r^2)/2r to show when peaks are valid +axes[1].plot(r_grid, (1 + r_grid**2) / (2 * r_grid), lw=2) +axes[1].axhline(1 / np.cos(θ_ex), ls='--', lw=1.0, color='red', + label=f'threshold = 1/cos({np.rad2deg(θ_ex):.0f}°) = {1/np.cos(θ_ex):.2f}') +axes[1].set_xlabel('eigenvalue modulus $r$') +axes[1].set_ylabel(r'$(1+r^2)/2r$') +axes[1].legend(frameon=False) + +plt.tight_layout() +plt.show() + +# Find threshold r below which no peak exists +valid_mask = ~np.isnan(ω_theory) +if valid_mask.any(): + r_threshold = r_grid[valid_mask][0] + print(f"Peak exists for r >= {r_threshold:.2f}") +``` + +The theoretical and numerical peak frequencies match closely. + +As $r \to 1$, the peak frequency converges to $\theta$. + +For smaller $r$, the factor $(1+r^2)/2r$ exceeds the threshold, and no valid peak exists. + +```{solution-end} +``` + +```{exercise} +:label: chow_cycles_ex3 + +In the "real roots but a peak" example, hold $A$ fixed and vary the shock correlation (the off-diagonal entry of $V$) between $0$ and $0.99$. + +When does the interior-frequency peak appear, and how does its location change? +``` + +```{solution-start} chow_cycles_ex3 +:class: dropdown +``` + +Here is one solution: + +```{code-cell} ipython3 +A_ex3 = np.diag([0.1, 0.9]) +b_ex3 = np.array([1.0, -0.01]) +corr_grid = np.linspace(0, 0.99, 50) +peak_periods = [] +for corr in corr_grid: + V_ex3 = np.array([[1.0, corr], [corr, 1.0]]) + F_ex3 = spectral_density_var1(A_ex3, V_ex3, ω_grid_ex) + f_x = spectrum_of_linear_combination(F_ex3, b_ex3) + i_max = np.argmax(f_x) + if 5 < i_max < len(ω_grid_ex) - 5: + peak_periods.append(2 * np.pi / ω_grid_ex[i_max]) + else: + peak_periods.append(np.nan) + +fig, ax = plt.subplots(figsize=(8, 4)) +ax.plot(corr_grid, peak_periods, marker='o', lw=2, markersize=4) +ax.set_xlabel('shock correlation') +ax.set_ylabel('peak period') +plt.show() + +threshold_idx = np.where(~np.isnan(peak_periods))[0] +if len(threshold_idx) > 0: + print( + f"interior peak when correlation >= {corr_grid[threshold_idx[0]]:.2f}") +``` + +The interior peak appears only when the shock correlation exceeds a threshold. + +This illustrates that spectral peaks depend on the full system structure, not just eigenvalues. + +```{solution-end} +``` + +```{exercise} +:label: chow_cycles_ex4 + +Using the calibrated Chow-Levitan parameters, compute the autocovariance matrices $\Gamma_0, \Gamma_1, \ldots, \Gamma_{10}$ using: + +1. The recursion $\Gamma_k = A \Gamma_{k-1}$ with $\Gamma_0$ from the Lyapunov equation. +2. Chow's eigendecomposition formula $\Gamma_k = B D_\lambda^k \Gamma_0^* B^\top$ where $\Gamma_0^*$ is the canonical covariance. + +Verify that both methods give the same result. +``` + +```{solution-start} chow_cycles_ex4 +:class: dropdown +``` + +Here is one solution: + +```{code-cell} ipython3 +from scipy.linalg import solve_discrete_lyapunov + +Γ_0_lyap = solve_discrete_lyapunov(A_chow, V) +Γ_recursion = [Γ_0_lyap] +for k in range(1, 11): + Γ_recursion.append(A_chow @ Γ_recursion[-1]) + +p = len(λ) +Γ_0_star = np.zeros((p, p), dtype=complex) +for i in range(p): + for j in range(p): + Γ_0_star[i, j] = W[i, j] / (1 - λ[i] * λ[j]) + +Γ_eigen = [] +for k in range(11): + D_k = np.diag(λ**k) + Γ_eigen.append(np.real(B @ D_k @ Γ_0_star @ B.T)) + +print("Comparison of Γ_5 (first 3x3 block):") +print("\nRecursion method:") +print(np.real(Γ_recursion[5][:3, :3]).round(10)) +print("\nEigendecomposition method:") +print(Γ_eigen[5][:3, :3].round(10)) +print("\nMax absolute difference:", + np.max(np.abs(np.real(Γ_recursion[5]) - Γ_eigen[5]))) +``` + +Both methods produce essentially identical results, up to numerical precision. + +```{solution-end} +``` + +```{exercise} +:label: chow_cycles_ex5 + +Modify the Chow-Levitan model by changing $\lambda_3$ from $0.4838$ to $0.95$. + +1. Recompute the spectral densities. +2. How does this change affect the spectral shape for each variable? +3. What economic interpretation might correspond to this parameter change? +``` + +```{solution-start} chow_cycles_ex5 +:class: dropdown +``` + +Here is one solution: + +```{code-cell} ipython3 +# Modify λ_3 and reconstruct the transition matrix +λ_modified = λ.copy() +λ_modified[2] = 0.95 +D_λ_mod = np.diag(λ_modified) +A_mod = np.real(B @ D_λ_mod @ np.linalg.inv(B)) + +# Compute spectra using the VAR(1) formula with original V +F_mod = spectral_density_var1(A_mod, V, ω_grid) +F_orig = spectral_density_var1(A_chow, V, ω_grid) + +# Plot ratio of spectra for output (Y_1) +f_orig = np.real(F_orig[:, 4, 4]) +f_mod = np.real(F_mod[:, 4, 4]) + +fig, ax = plt.subplots() +ax.plot(freq, f_mod / f_orig, lw=2) +ax.axhline(1.0, ls='--', lw=1, color='gray') +paper_frequency_axis(ax) +ax.set_ylabel(r"ratio: modified / original spectrum for $Y_1$") +plt.show() +``` + +The near-unit eigenvalues ($\lambda_1, \lambda_2 \approx 0.9999$) dominate the output spectrum so heavily that changing $\lambda_3$ from 0.48 to 0.95 produces only a small relative effect. + +The ratio plot reveals the change: the modified spectrum has slightly more power at low-to-medium frequencies and slightly less at high frequencies. + +Economically, increasing $\lambda_3$ adds persistence to the mode it governs. + +```{solution-end} +``` diff --git a/lectures/measurement_models.md b/lectures/measurement_models.md new file mode 100644 index 000000000..b2d6e42e0 --- /dev/null +++ b/lectures/measurement_models.md @@ -0,0 +1,1464 @@ +--- +jupytext: + text_representation: + extension: .md + format_name: myst + format_version: 0.13 + jupytext_version: 1.17.1 +kernelspec: + display_name: Python 3 (ipykernel) + language: python + name: python3 +--- + +(sargent_measurement_models)= +```{raw} jupyter +
+ + QuantEcon + +
+``` + +# Two Models of Measurements and the Investment Accelerator + +```{contents} Contents +:depth: 2 +``` + +## Overview + +"Rational expectations econometrics" aims to interpret economic time +series in terms of objects that are meaningful to economists, namely, +parameters describing preferences, technologies, information sets, +endowments, and equilibrium concepts. + +When fully worked out, rational expectations models typically deliver +a well-defined mapping from these economically interpretable parameters +to the moments of the time series determined by the model. + +If accurate observations on these time series are available, one can +use that mapping to implement parameter estimation methods based +either on the likelihood function or on the method of moments. + +```{note} This is why econometrics estimation is often called an ''inverse'' problem, while +simulating a model for given parameter values is called a ''direct problem''. The direct problem +refers to the mapping we have just described, while the inverse problem involves somehow applying an ''inverse'' of that mapping to a data set that is treated as if it were one draw from the joint probability distribution described by the mapping. +``` + +However, if only error-ridden data exist for the variables of interest, +then more steps are needed to extract parameter estimates. + +In effect, we require a model of the data reporting agency, one that +is workable enough that we can determine the mapping induced jointly +by the dynamic economic model and the measurement process to the +probability law for the measured data. + +The model chosen for the data collection agency is an aspect of an +econometric specification that can make big differences in inferences +about the economic structure. + +{cite:t}`Sargent1989` describes two alternative models of data generation +in a {doc}`permanent income ` economy in which the +investment accelerator, the mechanism studied in these two quantecon lectures -- {doc}`samuelson` and +{doc}`chow_business_cycles` -- shapes business cycle fluctuations. + +- In Model 1, the data collecting agency simply reports the + error-ridden data that it collects. +- In Model 2, the data collection agents first collects error-ridden data that satisfy + a classical errors-in-variables model, then filters the data, and reports the filtered objects. + +Although the two models have the same "deep parameters," they produce +quite different sets of restrictions on the data. + +In this lecture we follow {cite:t}`Sargent1989` and study how these +alternative measurement schemes affect empirical implications. + +We start with imports and helper functions to be used throughout this lecture to generate LaTeX output + +```{code-cell} ipython3 +import numpy as np +import pandas as pd +import matplotlib.pyplot as plt +from scipy import linalg +from IPython.display import Latex + +np.set_printoptions(precision=3, suppress=True) + +def df_to_latex_matrix(df, label=''): + """Convert DataFrame to LaTeX matrix.""" + lines = [r'\begin{bmatrix}'] + + for idx, row in df.iterrows(): + row_str = ' & '.join( + [f'{v:.4f}' if isinstance(v, (int, float)) + else str(v) for v in row]) + r' \\' + lines.append(row_str) + + lines.append(r'\end{bmatrix}') + + if label: + return '$' + label + ' = ' + '\n'.join(lines) + '$' + else: + return '$' + '\n'.join(lines) + '$' + +def df_to_latex_array(df): + """Convert DataFrame to LaTeX array.""" + n_rows, n_cols = df.shape + + # Build column format (centered columns) + col_format = 'c' * (n_cols + 1) # +1 for index + + # Start array + lines = [r'\begin{array}{' + col_format + '}'] + + # Header row + header = ' & '.join([''] + [str(c) for c in df.columns]) + r' \\' + lines.append(header) + lines.append(r'\hline') + + # Data rows + for idx, row in df.iterrows(): + row_str = str(idx) + ' & ' + ' & '.join( + [f'{v:.3f}' if isinstance(v, (int, float)) else str(v) + for v in row]) + r' \\' + lines.append(row_str) + + lines.append(r'\end{array}') + + return '$' + '\n'.join(lines) + '$' +``` + +## The economic model + +The data are generated by a linear-quadratic version of a stochastic +optimal growth model that is an instance of models described in this quantecon lecture: {doc}`perm_income`. + +A social planner chooses a stochastic process for $\{c_t, k_{t+1}\}_{t=0}^\infty$ that maximizes + +```{math} +:label: planner_obj +E \sum_{t=0}^{\infty} \beta^t \left( u_0 + u_1 c_t - \frac{u_2}{2} c_t^2 \right) +``` + +subject to the restrictions imposed by the technology + +```{math} +:label: tech_constraint +c_t + k_{t+1} = f k_t + \theta_t, \qquad \beta f^2 > 1. +``` + +Here $c_t$ is consumption, $k_t$ is the capital stock, +$f$ is the gross rate of return on capital, +and $\theta_t$ is an endowment or technology shock following + +```{math} +:label: shock_process +a(L)\,\theta_t = \varepsilon_t, +``` +where $L$ is the backward shift (or 'lag') operator and $a(z) = 1 - a_1 z - a_2 z^2 - \cdots - a_r z^r$ having all its zeroes +outside the unit circle. + +### Optimal decision rule + +The optimal decision rule for $c_t$ is + +```{math} +:label: opt_decision +c_t = \frac{-\alpha}{f-1} + + \left(1 - \frac{1}{\beta f^2}\right) + \frac{L - f^{-1} a(f^{-1})^{-1} a(L)}{L - f^{-1}}\,\theta_t + + f k_t, +\qquad +k_{t+1} = f k_t + \theta_t - c_t, +``` + +where $\alpha = u_1[1-(\beta f)^{-1}]/u_2$. + +Equations {eq}`shock_process` and {eq}`opt_decision` exhibit the +cross-equation restrictions characteristic of rational expectations +models. + +### Net income and the accelerator + +Define net output or national income as + +```{math} +:label: net_income +y_{nt} = (f-1)k_t + \theta_t. +``` + +Note that {eq}`tech_constraint` and {eq}`net_income` imply +$(k_{t+1} - k_t) + c_t = y_{nt}$. + +To obtain both a version of {cite:t}`Friedman1956`'s geometric +distributed lag consumption function and a distributed lag +accelerator, we impose two assumptions: + +1. $a(L) = 1$, so that $\theta_t$ is white noise. +2. $\beta f = 1$, so the rate of return on capital equals the rate + of time preference. + +Assumption 1 is crucial for the strict form of the accelerator. + +Relaxing it to allow serially correlated $\theta_t$ preserves an +accelerator in a broad sense but loses the sharp geometric-lag +form of {eq}`mm_accelerator`. + +Adding a second shock breaks the one-index structure entirely +and can generate nontrivial Granger causality even without +measurement error. + +Assumption 2 is less important, affecting only various constants. + +Under both assumptions, {eq}`opt_decision` simplifies to + +```{math} +:label: simple_crule +c_t = (1-f^{-1})\,\theta_t + (f-1)\,k_t. +``` + +When {eq}`simple_crule`, {eq}`net_income`, and +{eq}`tech_constraint` are combined, the optimal plan satisfies + +```{math} +:label: friedman_consumption +c_t = \left(\frac{1-\beta}{1-\beta L}\right) y_{nt}, +``` + +```{math} +:label: mm_accelerator +k_{t+1} - k_t = f^{-1} \left(\frac{1-L}{1-\beta L}\right) y_{nt}, +``` + +```{math} +:label: income_process +y_{nt} = \theta_t + (1-\beta)(\theta_{t-1} + \theta_{t-2} + \cdots). +``` + +Equation {eq}`friedman_consumption` is Friedman's consumption +model: consumption is a geometric distributed lag of income, +with the decay coefficient $\beta$ equal to the discount factor. + +Equation {eq}`mm_accelerator` is the distributed lag accelerator: +investment is a geometric distributed lag of the first difference +of income. + +This is the same mechanism that {cite:t}`Chow1968` documented +empirically (see {doc}`chow_business_cycles`). + +Equation {eq}`income_process` states that the first difference of disposable income is a +first-order moving average process with innovation equal to the innovation of the endowment shock $\theta_t$. + +As {cite:t}`Muth1960` showed, such a process is optimally forecast +via a geometric distributed lag or "adaptive expectations" scheme. + +### The accelerator puzzle + +When all variables are measured accurately and are driven by +the single shock $\theta_t$, the spectral density matrix of +$(c_t,\, k_{t+1}-k_t,\, y_{nt})$ has rank one at all frequencies. + +Each variable is an invertible one-sided distributed lag of the +same white noise, so no variable Granger-causes any other. + +Empirically, however, measures of output Granger-cause investment +but not vice versa. + +{cite:t}`Sargent1989` shows that measurement error can resolve +this puzzle. + +To illustrate, suppose first that output $y_{nt}$ is measured +perfectly while consumption and capital are each polluted by +serially correlated measurement errors $v_{ct}$ and $v_{kt}$ +orthogonal to $\theta_t$. + +Let $\bar c_t$ and $\bar k_{t+1} - \bar k_t$ denote the measured +series. Then + +```{math} +:label: meas_consumption +\bar c_t = \left(\frac{1-\beta}{1-\beta L}\right) y_{nt} + v_{ct}, +``` + +```{math} +:label: meas_investment +\bar k_{t+1} - \bar k_t + = \beta\left(\frac{1-L}{1-\beta L}\right) y_{nt} + + (v_{k,t+1} - v_{kt}), +``` + +```{math} +:label: income_process_ma +y_{nt} = \theta_t + (1-\beta)(\theta_{t-1} + \theta_{t-2} + \cdots). +``` + +In this case income Granger-causes consumption and investment +but is not Granger-caused by them. + +When each measured series is corrupted by measurement error, every +measured variable will in general Granger-cause every other. + +The strength of this Granger causality, as measured by decompositions +of $j$-step-ahead prediction error variances, depends on the relative +variances of the measurement errors. + +In this case, each observed series mixes the common signal $\theta_t$ +with idiosyncratic measurement noise. + +A series with lower measurement +error variance tracks $\theta_t$ more closely, so its innovations +contain more information about future values of the other series. + +Accordingly, in a forecast-error-variance decomposition, shocks to +better-measured series account for a larger share of other variables' +$j$-step-ahead prediction errors. + +In a one-common-index model like this one ($\theta_t$ is the common +index), better-measured variables extend more Granger causality to +less well measured series than vice versa. + +This asymmetry drives the numerical results we observe soon. + +### State-space formulation + +Let's map the economic model and the measurement process into +a linear state-space framework. + +Set $f = 1.05$ and $\theta_t \sim \mathcal{N}(0, 1)$. + +Define the state and observation vectors + +```{math} +x_t = \begin{bmatrix} k_t \\ \theta_t \end{bmatrix}, +\qquad +z_t = \begin{bmatrix} y_{nt} \\ c_t \\ \Delta k_t \end{bmatrix}, +``` + +so that the error-free data are described by the state-space system + +```{math} +:label: true_ss +\begin{aligned} +x_{t+1} &= A x_t + \varepsilon_t, \\ +z_t &= C x_t. +\end{aligned} +``` + +where $\varepsilon_t = \begin{bmatrix} 0 \\ \theta_t \end{bmatrix}$ has +covariance $E \varepsilon_t \varepsilon_t^\top = Q$ and the matrices are + +```{math} +A = \begin{bmatrix} +1 & f^{-1} \\ +0 & 0 +\end{bmatrix}, +\qquad +C = \begin{bmatrix} +f-1 & 1 \\ +f-1 & 1-f^{-1} \\ +0 & f^{-1} +\end{bmatrix}, +\qquad +Q = \begin{bmatrix} +0 & 0 \\ +0 & 1 +\end{bmatrix}. +``` + +$Q$ is singular because there is only one source of randomness +$\theta_t$; the capital stock $k_t$ evolves deterministically +given $\theta_t$. + +```{code-cell} ipython3 +# Baseline structural matrices for the true economy +f = 1.05 +β = 1 / f + +A = np.array([ + [1.0, 1.0 / f], + [0.0, 0.0] +]) + +C = np.array([ + [f - 1.0, 1.0], + [f - 1.0, 1.0 - 1.0 / f], + [0.0, 1.0 / f] +]) + +Q = np.array([ + [0.0, 0.0], + [0.0, 1.0] +]) +``` + +(true-impulse-responses)= +### True impulse responses + +Before introducing measurement error, we compute the impulse response of +the error-free variables to a unit shock $\theta_0 = 1$. + +This benchmark clarifies what changes when we later switch from +error-free variables to variables reported by the statistical agency. + +The response shows the investment accelerator clearly: the full impact on +net income $y_n$ occurs at lag 0, while consumption adjusts by only +$1 - f^{-1} \approx 0.048$ and investment absorbs the remainder. + +From lag 1 onward the economy is in its new steady state + +```{code-cell} ipython3 +def table2_irf(A, C, n_lags=6): + x = np.array([0.0, 1.0]) # k_0 = 0, theta_0 = 1 + rows = [] + for j in range(n_lags): + y_n, c, d_k = C @ x + rows.append([y_n, c, d_k]) + x = A @ x + return pd.DataFrame(rows, columns=[r'y_n', r'c', r'\Delta k'], + index=pd.Index(range(n_lags), name='lag')) + +table2 = table2_irf(A, C, n_lags=6) +display(Latex(df_to_latex_array(table2))) +``` + +## Measurement errors + +Let's add the measurement layer that generates reported data. + +The econometrician does not observe $z_t$ directly but instead +sees $\bar z_t = z_t + v_t$, where $v_t$ is a vector of measurement +errors. + +Measurement errors follow an AR(1) process + +```{math} +:label: meas_error_ar1 +v_{t+1} = D v_t + \eta_t, +``` + +where $\eta_t$ is a vector white noise with +$E \eta_t \eta_t^\top = \Sigma_\eta$ and +$E \varepsilon_t v_s^\top = 0$ for all $t, s$. + +The parameters are + +```{math} +D = \operatorname{diag}(0.6, 0.7, 0.3), +\qquad +\sigma_\eta = (0.05, 0.035, 0.65), +``` + +so the unconditional covariance of $v_t$ is + +```{math} +R = \operatorname{diag}\!\left(\frac{\sigma_{\eta,i}^2}{1 - \rho_i^2}\right). +``` + +The innovation variances are smallest for consumption +($\sigma_\eta = 0.035$), next for income ($\sigma_\eta = 0.05$), +and largest for investment ($\sigma_\eta = 0.65$). + +As in {cite:t}`Sargent1989` and our discussion above, what matters for Granger-causality +asymmetries is the overall measurement quality in the full system: +output is relatively well measured while investment is relatively +poorly measured. + +```{code-cell} ipython3 +ρ = np.array([0.6, 0.7, 0.3]) +D = np.diag(ρ) + +# Innovation std. devs of η_t +σ_η = np.array([0.05, 0.035, 0.65]) +Σ_η = np.diag(σ_η**2) + +# Unconditional covariance of measurement errors v_t +R = np.diag((σ_η / np.sqrt(1.0 - ρ**2))**2) + +print(f"f = {f}, β = 1/f = {β:.6f}") +print() +display(Latex(df_to_latex_matrix(pd.DataFrame(A), 'A'))) +display(Latex(df_to_latex_matrix(pd.DataFrame(C), 'C'))) +display(Latex(df_to_latex_matrix(pd.DataFrame(D), 'D'))) +``` + +We will analyze the two reporting schemes separately, but first we need a solver for the steady-state Kalman gain and error covariances. + +The function below iterates on the Riccati equation until convergence, +returning the Kalman gain $K$, the state covariance $S$, and the +innovation covariance $V$ + +```{code-cell} ipython3 +def steady_state_kalman(A, C_obs, Q, R, W=None, tol=1e-13, max_iter=200_000): + """ + Solve steady-state Kalman equations for + x_{t+1} = A x_t + w_{t+1} + y_t = C_obs x_t + v_t + with cov(w)=Q, cov(v)=R, cov(w,v)=W. + """ + n = A.shape[0] + m = C_obs.shape[0] + if W is None: + W = np.zeros((n, m)) + + S = Q.copy() + for _ in range(max_iter): + V = C_obs @ S @ C_obs.T + R + K = (A @ S @ C_obs.T + W) @ np.linalg.inv(V) + S_new = Q + A @ S @ A.T - K @ V @ K.T + + if np.max(np.abs(S_new - S)) < tol: + S = S_new + break + S = S_new + + V = C_obs @ S @ C_obs.T + R + K = (A @ S @ C_obs.T + W) @ np.linalg.inv(V) + return K, S, V +``` + +With structural matrices and tools we need in place, we now follow +{cite:t}`Sargent1989`'s two reporting schemes in sequence. + +## A classical model of measurements initially collected by an agency + +A data collecting agency observes a noise-corrupted version of $z_t$, namely + +```{math} +:label: model1_obs +\bar z_t = C x_t + v_t. +``` + +We refer to this as *Model 1*: the agency collects noisy +data and reports them without filtering. + +To represent the second moments of the $\bar z_t$ process, it is +convenient to obtain its population vector autoregression. + +The error vector in the vector autoregression is the +innovation to $\bar z_t$ and can be taken to be the white noise in a Wold +moving average representation, which can be obtained by "inverting" +the autoregressive representation. + +The population vector autoregression, and how it depends on the +parameters of the state-space system and the measurement error process, +carries insights about how to interpret estimated vector +autoregressions for $\bar z_t$. + +Constructing the vector autoregression is also useful as an +intermediate step in computing the likelihood of a sample of +$\bar z_t$'s as a function of the free parameters +$\{A, C, D, Q, R\}$. + +The particular method that will be used to construct the vector +autoregressive representation also proves useful as an intermediate +step in constructing a model of an optimal reporting agency. + +We use recursive (Kalman filtering) methods to obtain the +vector autoregression for $\bar z_t$. + +### Quasi-differencing + +Because the measurement errors $v_t$ are serially correlated, +the standard Kalman filter with white-noise measurement error +cannot be applied directly to $\bar z_t = C x_t + v_t$. + +An alternative is to augment the state vector with the +measurement-error AR components (see Appendix B of +{cite:t}`Sargent1989`). + +Here we take the quasi-differencing route described in +{cite:t}`Sargent1989`, which reduces the +system to one with serially uncorrelated observation noise. + +Define + +```{math} +:label: model1_qd +\tilde z_t = \bar z_{t+1} - D \bar z_t, \qquad +\bar\nu_t = C \varepsilon_t + \eta_t, \qquad +\bar C = CA - DC. +``` + +Then the state-space system {eq}`true_ss`, the measurement error +process {eq}`meas_error_ar1`, and the observation equation {eq}`model1_obs` +imply the state-space system + +```{math} +:label: model1_transformed +\begin{aligned} +x_{t+1} &= A x_t + \varepsilon_t, \\ +\tilde z_t &= \bar C\, x_t + \bar\nu_t, +\end{aligned} +``` + +where $(\varepsilon_t, \bar\nu_t)$ is a white noise process with + +```{math} +:label: model1_covs +E \begin{bmatrix} \varepsilon_t \end{bmatrix} +\begin{bmatrix} \varepsilon_t^\top & \bar\nu_t^\top \end{bmatrix} += \begin{bmatrix} Q & W_1 \\ W_1^\top & R_1 \end{bmatrix}, +\qquad +R_1 = C Q C^\top + R, \quad W_1 = Q C^\top. +``` + +System {eq}`model1_transformed` with covariances {eq}`model1_covs` is +characterized by the five matrices +$[A, \bar C, Q, R_1, W_1]$. + +### Innovations representation + +Associated with {eq}`model1_transformed` and {eq}`model1_covs` is the +**innovations representation** for $\tilde z_t$, + +```{math} +:label: model1_innov +\begin{aligned} +\hat x_{t+1} &= A \hat x_t + K_1 u_t, \\ +\tilde z_t &= \bar C \hat x_t + u_t, +\end{aligned} +``` + +where + +```{math} +:label: model1_innov_defs +\begin{aligned} +\hat x_t &= E[x_t \mid \tilde z_{t-1}, \tilde z_{t-2}, \ldots, \hat x_0] + = E[x_t \mid \bar z_t, \bar z_{t-1}, \ldots], \\ +u_t &= \tilde z_t - E[\tilde z_t \mid \tilde z_{t-1}, \tilde z_{t-2}, \ldots] + = \bar z_{t+1} - E[\bar z_{t+1} \mid \bar z_t, \bar z_{t-1}, \ldots], +\end{aligned} +``` + +$[K_1, S_1]$ are computed from the steady-state Kalman filter applied to +$[A, \bar C, Q, R_1, W_1]$, and + +```{math} +:label: model1_S1 +S_1 = E[(x_t - \hat x_t)(x_t - \hat x_t)^\top]. +``` + +From {eq}`model1_innov_defs`, $u_t$ is the innovation process for the +$\bar z_t$ process. + +### Wold representation + +System {eq}`model1_innov` and definition {eq}`model1_qd` can be used to +obtain a Wold vector moving average representation for the $\bar z_t$ process: + +```{math} +:label: model1_wold +\bar z_{t+1} = (I - DL)^{-1}\bigl[\bar C(I - AL)^{-1}K_1 L + I\bigr] u_t, +``` + +where $L$ is the lag operator. + +From {eq}`model1_transformed` and {eq}`model1_innov` the innovation +covariance is + +```{math} +:label: model1_V1 +V_1 = E\, u_t u_t^\top = \bar C\, S_1\, \bar C^\top + R_1. +``` + +Below we compute $K_1$, $S_1$, and $V_1$ numerically + +```{code-cell} ipython3 +C_bar = C @ A - D @ C +R1 = C @ Q @ C.T + R +W1 = Q @ C.T + +K1, S1, V1 = steady_state_kalman(A, C_bar, Q, R1, W1) +``` + + +### Computing coefficients in a Wold moving average representation + +To compute the moving average coefficients in {eq}`model1_wold` numerically, +define the augmented state + +```{math} +r_t = \begin{bmatrix} \hat x_{t-1} \\ \bar z_{t-1} \end{bmatrix}, +``` + +with dynamics + +```{math} +r_{t+1} = F_1 r_t + G_1 u_t, +\qquad +\bar z_t = H_1 r_t + u_t, +``` + +where + +```{math} +F_1 = +\begin{bmatrix} +A & 0 \\ +\bar C & D +\end{bmatrix}, +\quad +G_1 = +\begin{bmatrix} +K_1 \\ +I +\end{bmatrix}, +\quad +H_1 = [\bar C \;\; D]. +``` + +The moving average coefficients are then $\psi_0 = I$ and +$\psi_j = H_1 F_1^{j-1} G_1$ for $j \geq 1$. + +```{code-cell} ipython3 +F1 = np.block([ + [A, np.zeros((2, 3))], + [C_bar, D] +]) +G1 = np.vstack([K1, np.eye(3)]) +H1 = np.hstack([C_bar, D]) + + +def measured_wold_coeffs(F, G, H, n_terms=25): + psi = [np.eye(3)] + Fpow = np.eye(F.shape[0]) + for _ in range(1, n_terms): + psi.append(H @ Fpow @ G) + Fpow = Fpow @ F + return psi + + +def fev_contributions(psi, V, n_horizons=20): + """ + Returns contrib[var, shock, h-1] = contribution at horizon h. + """ + P = linalg.cholesky(V, lower=True) + out = np.zeros((3, 3, n_horizons)) + for h in range(1, n_horizons + 1): + acc = np.zeros((3, 3)) + for j in range(h): + T = psi[j] @ P + acc += T**2 + out[:, :, h - 1] = acc + return out + + +psi1 = measured_wold_coeffs(F1, G1, H1, n_terms=40) +resp1 = np.array( + [psi1[j] @ linalg.cholesky(V1, lower=True) for j in range(14)]) +decomp1 = fev_contributions(psi1, V1, n_horizons=20) +``` + +### Gaussian likelihood + +The Gaussian log-likelihood function for a sample +$\{\bar z_t,\, t=0,\ldots,T\}$, conditioned on an initial state estimate +$\hat x_0$, can be represented as + +```{math} +:label: model1_loglik +\mathcal{L}^* = -T\ln 2\pi - \tfrac{1}{2}T\ln|V_1| + - \tfrac{1}{2}\sum_{t=0}^{T-1} u_t^\top V_1^{-1} u_t, +``` + +where $u_t$ is a function of $\{\bar z_t\}$ defined by +{eq}`model1_recursion` below. + +To use {eq}`model1_innov` to compute $\{u_t\}$, it is useful to +represent it as + +```{math} +:label: model1_recursion +\begin{aligned} +\hat x_{t+1} &= (A - K_1 \bar C)\,\hat x_t + K_1 \tilde z_t, \\ +u_t &= -\bar C\,\hat x_t + \tilde z_t, +\end{aligned} +``` + +where $\tilde z_t = \bar z_{t+1} - D\bar z_t$ is the quasi-differenced +observation. + +Given $\hat x_0$, equation {eq}`model1_recursion` can be used recursively +to compute a $\{u_t\}$ process. + +Equations {eq}`model1_loglik` and {eq}`model1_recursion` give the +likelihood function of a sample of error-corrupted data +$\{\bar z_t\}$. + +### Forecast-error-variance decomposition + +To measure the relative importance of each innovation, we decompose +the $j$-step-ahead forecast-error variance of each measured variable. + +Write $\bar z_{t+j} - E_t \bar z_{t+j} = \sum_{i=0}^{j-1} \psi_i u_{t+j-i}$. + +Let $P$ be the lower-triangular Cholesky factor of $V_1$ so that the +orthogonalized innovations are $e_t = P^{-1} u_t$. + +Then the contribution of orthogonalized innovation $k$ to the +$j$-step-ahead variance of variable $m$ is +$\sum_{i=0}^{j-1} (\psi_i P)_{mk}^2$. + +The table below shows the cumulative contribution of each orthogonalized +innovation to the forecast-error variance of $y_n$, $c$, and $\Delta k$ +at horizons 1 through 20. + +Each panel fixes one orthogonalized innovation and reports its +cumulative contribution to each variable's forecast-error variance. + +Rows are forecast horizons and columns are forecasted variables. + +```{code-cell} ipython3 +horizons = np.arange(1, 21) +labels = [r'y_n', r'c', r'\Delta k'] + +def fev_table(decomp, shock_idx, horizons): + return pd.DataFrame( + np.round(decomp[:, shock_idx, :].T, 4), + columns=labels, + index=pd.Index(horizons, name='j') + ) +``` + +```{code-cell} ipython3 +shock_titles = [r'\text{A. Innovation in } y_n', + r'\text{B. Innovation in } c', + r'\text{C. Innovation in } \Delta k'] + +parts = [] +for i, title in enumerate(shock_titles): + arr = df_to_latex_array(fev_table(decomp1, i, horizons)).strip('$') + parts.append( + r'\begin{array}{c} ' + title + r' \\ ' + arr + r' \end{array}') + +display(Latex('$' + r' \quad '.join(parts) + '$')) +``` + +The income innovation accounts for substantial proportions of +forecast-error variance in all three variables, while the consumption and +investment innovations contribute mainly to their own variances. + +This is a **Granger causality** pattern: income appears to +Granger-cause consumption and investment, but not vice versa. + +This matches the paper's message that, in a one-common-index model, +the relatively best measured series has the strongest predictive content. + +Let's look at the covariance matrix of the innovations + +```{code-cell} ipython3 +print('Covariance matrix of innovations:') +df_v1 = pd.DataFrame(np.round(V1, 4), index=labels, columns=labels) +display(Latex(df_to_latex_matrix(df_v1))) +``` + +The covariance matrix of the innovations is not diagonal, but the +eigenvalues are well separated as shown below + + +```{code-cell} ipython3 +print('Eigenvalues of covariance matrix:') +print(np.sort(np.linalg.eigvalsh(V1))[::-1].round(4)) +``` + +The first eigenvalue is much larger than the others, consistent with +the presence of a dominant common shock $\theta_t$ + +### Wold impulse responses + +Impulse responses in the Wold representation are reported using orthogonalized +innovations (Cholesky factorization of $V_1$ with ordering +$y_n$, $c$, $\Delta k$). + +Under this method, lag-0 responses reflect both +contemporaneous covariance and the Cholesky ordering. + +We first define a helper function to format the response coefficients as a LaTeX array + +```{code-cell} ipython3 +lags = np.arange(14) + +def wold_response_table(resp, shock_idx, lags): + return pd.DataFrame( + np.round(resp[:, :, shock_idx], 4), + columns=labels, + index=pd.Index(lags, name='j') + ) +``` + +Now we report the impulse responses to each orthogonalized innovation in a single table with three panels + +```{code-cell} ipython3 +wold_titles = [r'\text{A. Response to } y_n \text{ innovation}', + r'\text{B. Response to } c \text{ innovation}', + r'\text{C. Response to } \Delta k \text{ innovation}'] + +parts = [] +for i, title in enumerate(wold_titles): + arr = df_to_latex_array(wold_response_table(resp1, i, lags)).strip('$') + parts.append( + r'\begin{array}{c} ' + title + r' \\ ' + arr + r' \end{array}') + +display(Latex('$' + r' \quad '.join(parts) + '$')) +``` + +At impact, the first orthogonalized innovation +loads on all three measured variables. + +At subsequent lags the income innovation generates persistent +responses in all three variables because, being the best-measured +series, its innovation is dominated by the true permanent shock +$\theta_t$. + +The consumption and investment innovations produce responses that +decay according to the AR(1) structure of their respective +measurement errors ($\rho_c = 0.7$, $\rho_{\Delta k} = 0.3$), +with little spillover to other variables. + +## A model of optimal estimates reported by an agency + +Suppose that instead of reporting the error-corrupted data $\bar z_t$, +the data collecting agency reports linear least-squares projections of +the true data on a history of the error-corrupted data. + +This model provides a possible way of interpreting two features of +the data-reporting process. + +- *seasonal adjustment*: if the components of $v_t$ have +strong seasonals, the optimal filter will assume a shape that can be +interpreted partly in terms of a seasonal adjustment filter, one that +is one-sided in current and past $\bar z_t$'s. + +- *data revisions*: if $z_t$ contains current and lagged +values of some variable of interest, then the model simultaneously +determines "preliminary," "revised," and "final" estimates as +successive conditional expectations based on progressively longer +histories of error-ridden observations. + +To make this operational, we impute to the reporting agency a model of +the joint process generating the true data and the measurement errors. + +We assume that the reporting agency has "rational expectations": it +knows the economic and measurement structure leading to +{eq}`model1_transformed`--{eq}`model1_covs`. + +To prepare its estimates, the reporting agency itself computes the +Kalman filter to obtain the innovations representation {eq}`model1_innov`. + +Rather than reporting the error-corrupted data $\bar z_t$, the agency +reports $\tilde z_t = G \hat x_t$, where $G$ is a "selection matrix," +possibly equal to $C$, for the data reported by the agency. + +The data $G \hat x_t = E[G x_t \mid \bar z_t, \bar z_{t-1}, \ldots, \hat x_0]$. + +The state-space representation for the reported data is then + +```{math} +:label: model2_state +\begin{aligned} +\hat x_{t+1} &= A \hat x_t + K_1 u_t, \\ +\tilde z_t &= G \hat x_t, +\end{aligned} +``` + +where the first line of {eq}`model2_state` is from the innovations +representation {eq}`model1_innov`. + +Note that $u_t$ is the innovation to $\bar z_{t+1}$ and is *not* the +innovation to $\tilde z_t$. + +To obtain a Wold representation for $\tilde z_t$ and the likelihood +function for a sample of $\tilde z_t$ requires that we obtain an +innovations representation for {eq}`model2_state`. + +### Innovations representation for filtered data + +To add a little generality to {eq}`model2_state` we amend it to the system + +```{math} +:label: model2_obs +\begin{aligned} +\hat x_{t+1} &= A \hat x_t + K_1 u_t, \\ +\tilde z_t &= G \hat x_t + \eta_t, +\end{aligned} +``` + +where $\eta_t$ is a type 2 white-noise measurement error process +("typos") with presumably very small covariance matrix $R_2$. + +The covariance matrix of the joint noise is + +```{math} +:label: model2_Q +E \begin{bmatrix} K_1 u_t \\ \eta_t \end{bmatrix} + \begin{bmatrix} K_1 u_t \\ \eta_t \end{bmatrix}^\top += \begin{bmatrix} Q_2 & 0 \\ 0 & R_2 \end{bmatrix}, +``` + +where $Q_2 = K_1 V_1 K_1^\top$. + +If $R_2$ is singular, it is necessary to adjust the Kalman filtering +formulas by using transformations that induce a "reduced order observer." + +In practice, we approximate a zero $R_2$ matrix with the matrix +$\epsilon I$ for a small $\epsilon > 0$ to keep the Kalman filter +numerically well-conditioned. + +For system {eq}`model2_obs` and {eq}`model2_Q`, an innovations +representation is + +```{math} +:label: model2_innov +\begin{aligned} +\check{x}_{t+1} &= A \check{x}_t + K_2 a_t, \\ +\tilde z_t &= G \check{x}_t + a_t, +\end{aligned} +``` + +where + +```{math} +:label: model2_innov_defs +\begin{aligned} +a_t &= \tilde z_t - E[\tilde z_t \mid \tilde z_{t-1}, \tilde z_{t-2}, \ldots], \\ +\check{x}_t &= E[\hat x_t \mid \tilde z_{t-1}, \tilde z_{t-2}, \ldots, \check{x}_0], \\ +S_2 &= E[(\hat x_t - \check{x}_t)(\hat x_t - \check{x}_t)^\top], \\ +[K_2, S_2] &= \text{kalmanfilter}(A, G, Q_2, R_2, 0). +\end{aligned} +``` + +Thus $\{a_t\}$ is the innovation process for the reported data +$\tilde z_t$, with innovation covariance + +```{math} +:label: model2_V2 +V_2 = E\, a_t a_t^\top = G\, S_2\, G^\top + R_2. +``` + +### Wold representation + +A Wold moving average representation for $\tilde z_t$ is found from +{eq}`model2_innov` to be + +```{math} +:label: model2_wold +\tilde z_t = \bigl[G(I - AL)^{-1} K_2 L + I\bigr] a_t, +``` + +with coefficients $\psi_0 = I$ and $\psi_j = G A^{j-1} K_2$ for +$j \geq 1$. + +Note that this is simpler than the Model 1 Wold +representation {eq}`model1_wold` because there is no quasi-differencing +to undo. + +### Gaussian likelihood + +When a method analogous to Model 1 is used, a Gaussian log-likelihood +for $\tilde z_t$ can be computed by first computing an $\{a_t\}$ sequence +from observations on $\tilde z_t$ by using + +```{math} +:label: model2_recursion +\begin{aligned} +\check{x}_{t+1} &= (A - K_2 G)\,\check{x}_t + K_2 \tilde z_t, \\ +a_t &= -G\,\check{x}_t + \tilde z_t. +\end{aligned} +``` + +The likelihood function for a sample of $T$ observations +$\{\tilde z_t\}$ is then + +```{math} +:label: model2_loglik +\mathcal{L}^{**} = -T\ln 2\pi - \tfrac{1}{2}T\ln|V_2| + - \tfrac{1}{2}\sum_{t=0}^{T-1} a_t^\top V_2^{-1} a_t. +``` + +Note that relative to computing the likelihood function +{eq}`model1_loglik` for the error-corrupted data, computing the +likelihood function for the optimally filtered data requires more +calculations. + +Both likelihood functions require that the Kalman filter +{eq}`model1_innov_defs` be computed, while the likelihood function for +the filtered data requires that the Kalman filter +{eq}`model2_innov_defs` also be computed. + +In effect, in order to interpret and use the filtered data reported by +the agency, it is necessary to retrace the steps that the agency used +to synthesize those data. + +The Kalman filter {eq}`model1_innov_defs` is supposed to be formed by +the agency. + +The agency need not use Kalman filter {eq}`model2_innov_defs` because +it does not need the Wold representation for the filtered data. + +In our parameterization $G = C$. + +```{code-cell} ipython3 +Q2 = K1 @ V1 @ K1.T +ε = 1e-6 + +K2, S2, V2 = steady_state_kalman(A, C, Q2, ε * np.eye(3)) + + +def filtered_wold_coeffs(A, C, K, n_terms=25): + psi = [np.eye(3)] + Apow = np.eye(2) + for _ in range(1, n_terms): + psi.append(C @ Apow @ K) + Apow = Apow @ A + return psi + + +psi2 = filtered_wold_coeffs(A, C, K2, n_terms=40) +resp2 = np.array( + [psi2[j] @ linalg.cholesky(V2, lower=True) for j in range(14)]) +decomp2 = fev_contributions(psi2, V2, n_horizons=20) +``` + +### Forecast-error-variance decomposition + +Because the filtered data are nearly noiseless, the innovation +covariance $V_2$ is close to singular with one dominant eigenvalue. + +This means the filtered economy is driven by essentially one shock, +just like the true economy + +```{code-cell} ipython3 +parts = [] +for i, title in enumerate(shock_titles): + arr = df_to_latex_array(fev_table(decomp2, i, horizons)).strip('$') + parts.append( + r'\begin{array}{c} ' + title + r' \\ ' + arr + r' \end{array}') + +display(Latex('$' + r' \quad '.join(parts) + '$')) +``` + +In Model 2, the first innovation accounts for virtually all forecast-error +variance, just as in the true economy where the single structural shock +$\theta_t$ drives everything. + +The second and third innovations contribute negligibly. + +This confirms that filtering strips away the measurement noise that created +the appearance of multiple independent sources of variation in Model 1. + +The covariance matrix and eigenvalues of the Model 2 innovations are + +```{code-cell} ipython3 +print('Covariance matrix of innovations:') +df_v2 = pd.DataFrame(np.round(V2, 4), index=labels, columns=labels) +display(Latex(df_to_latex_matrix(df_v2))) +``` + +```{code-cell} ipython3 +print('Eigenvalues of covariance matrix:') +print(np.sort(np.linalg.eigvalsh(V2))[::-1].round(4)) +``` + +As {cite:t}`Sargent1989` emphasizes, the two models of measurement +produce quite different inferences about the economy's dynamics despite +sharing identical underlying parameters. + +### Wold impulse responses + +We again use orthogonalized Wold representation impulse responses with a Cholesky +decomposition of $V_2$ ordered as $y_n$, $c$, $\Delta k$. + +```{code-cell} ipython3 +parts = [] +for i, title in enumerate(wold_titles): + arr = df_to_latex_array( + wold_response_table(resp2, i, lags)).strip('$') + parts.append( + r'\begin{array}{c} ' + title + r' \\ ' + arr + r' \end{array}') + +display(Latex('$' + r' \quad '.join(parts) + '$')) +``` + +The income innovation in Model 2 produces responses that closely +approximate the true impulse response function from the structural +shock $\theta_t$. + +Readers can compare the left table with the table in the +{ref}`true-impulse-responses` section above. + +The numbers are essentially the same. + +The consumption and investment innovations produce responses +that are orders of magnitude smaller, confirming that the filtered +data are driven by essentially one shock. + +Unlike Model 1, the filtered data from Model 2 +*cannot* reproduce the apparent Granger causality pattern that the +accelerator literature has documented empirically. + +Hence, at the population level, the two measurement models imply different +empirical stories even though they share the same structural economy. + +- In Model 1 (raw data), measurement noise creates multiple innovations + and an apparent Granger-causality pattern. +- In Model 2 (filtered data), innovations collapse back to essentially + one dominant shock, mirroring the true one-index economy. + +Let's verify these implications in a finite sample simulation. + +## Simulation + +The tables above characterize population moments of the two models. + +Let's simulate 80 periods of true, measured, and filtered data +to compare population implications with finite-sample behavior. + +First, we define a function to simulate the true economy, generate measured data with AR(1) measurement errors, and apply the Model 1 Kalman filter to produce filtered estimates + +```{code-cell} ipython3 +def simulate_series(seed=7909, T=80, k0=10.0): + """ + Simulate true, measured, and filtered series. + """ + rng = np.random.default_rng(seed) + + # True state/observables + θ = rng.normal(0.0, 1.0, size=T) + k = np.empty(T + 1) + k[0] = k0 + + y = np.empty(T) + c = np.empty(T) + dk = np.empty(T) + + for t in range(T): + x_t = np.array([k[t], θ[t]]) + y[t], c[t], dk[t] = C @ x_t + k[t + 1] = k[t] + (1.0 / f) * θ[t] + + # Measured data with AR(1) errors + v_prev = np.zeros(3) + v = np.empty((T, 3)) + for t in range(T): + η_t = rng.multivariate_normal(np.zeros(3), Σ_η) + v_prev = D @ v_prev + η_t + v[t] = v_prev + + z_meas = np.column_stack([y, c, dk]) + v + + # Filtered data via Model 1 transformed filter + xhat_prev = np.array([k0, 0.0]) + z_prev = np.zeros(3) + z_filt = np.empty((T, 3)) + k_filt = np.empty(T) + + for t in range(T): + z_bar_t = z_meas[t] - D @ z_prev + u_t = z_bar_t - C_bar @ xhat_prev + xhat_t = A @ xhat_prev + K1 @ u_t + + z_filt[t] = C @ xhat_t + k_filt[t] = xhat_t[0] + + xhat_prev = xhat_t + z_prev = z_meas[t] + + out = { + "y_true": y, "c_true": c, "dk_true": dk, "k_true": k[:-1], + "y_meas": z_meas[:, 0], "c_meas": z_meas[:, 1], + "dk_meas": z_meas[:, 2], + "y_filt": z_filt[:, 0], "c_filt": z_filt[:, 1], + "dk_filt": z_filt[:, 2], "k_filt": k_filt + } + return out + + +sim = simulate_series(seed=7909, T=80, k0=10.0) +``` + +We use the following helper function to plot the true series against either the measured or filtered series + +```{code-cell} ipython3 +def plot_true_vs_other(t, true_series, other_series, + other_label, ylabel=""): + fig, ax = plt.subplots(figsize=(8, 4)) + ax.plot(t, true_series, lw=2, label="true") + ax.plot(t, other_series, lw=2, ls="--", label=other_label) + ax.set_xlabel("time") + ax.set_ylabel(ylabel) + ax.legend() + plt.tight_layout() + plt.show() + + +t = np.arange(1, 81) +``` + +Let's first compare the true series with the measured series to see how measurement errors distort the data + +```{code-cell} ipython3 +--- +mystnb: + figure: + caption: True and measured consumption + name: fig-true-measured-consumption + image: + alt: True and measured consumption plotted over 80 time periods +--- +plot_true_vs_other(t, sim["c_true"], sim["c_meas"], + "measured", ylabel="consumption") +``` + +```{code-cell} ipython3 +--- +mystnb: + figure: + caption: True and measured investment + name: fig-true-measured-investment + image: + alt: True and measured investment plotted over 80 time periods +--- +plot_true_vs_other(t, sim["dk_true"], sim["dk_meas"], + "measured", ylabel="investment") +``` + +```{code-cell} ipython3 +--- +mystnb: + figure: + caption: True and measured income + name: fig-true-measured-income + image: + alt: True and measured income plotted over 80 time periods +--- +plot_true_vs_other(t, sim["y_true"], sim["y_meas"], + "measured", ylabel="income") +``` + +Investment is distorted the most because its measurement error +has the largest innovation variance ($\sigma_\eta = 0.65$), +while income is distorted the least ($\sigma_\eta = 0.05$). + +For the filtered series, we expect the Kalman filter to recover the true series more closely by stripping away measurement noise + +```{code-cell} ipython3 +--- +mystnb: + figure: + caption: True and filtered consumption + name: fig-true-filtered-consumption + image: + alt: True and filtered consumption plotted over 80 time periods +--- +plot_true_vs_other(t, sim["c_true"], sim["c_filt"], + "filtered", ylabel="consumption") +``` + +```{code-cell} ipython3 +--- +mystnb: + figure: + caption: True and filtered investment + name: fig-true-filtered-investment + image: + alt: True and filtered investment plotted over 80 time periods +--- +plot_true_vs_other(t, sim["dk_true"], sim["dk_filt"], + "filtered", ylabel="investment") +``` + +```{code-cell} ipython3 +--- +mystnb: + figure: + caption: True and filtered income + name: fig-true-filtered-income + image: + alt: True and filtered income plotted over 80 time periods +--- +plot_true_vs_other(t, sim["y_true"], sim["y_filt"], + "filtered", ylabel="income") +``` + +```{code-cell} ipython3 +--- +mystnb: + figure: + caption: True and filtered capital stock + name: fig-true-filtered-capital + image: + alt: True and filtered capital stock plotted over 80 time periods +--- +plot_true_vs_other(t, sim["k_true"], sim["k_filt"], + "filtered", ylabel="capital stock") +``` + +Indeed, Kalman-filtered estimates from Model 1 remove much of the +measurement noise and track the truth closely. + +In the true model the national income identity +$c_t + \Delta k_t = y_{n,t}$ holds exactly. + +Independent measurement errors break this accounting identity +in the measured data. + +The Kalman filter approximately restores it. + +The following figure confirms this by showing the residual $c_t + \Delta k_t - y_{n,t}$ for +both measured and filtered data + +```{code-cell} ipython3 +--- +mystnb: + figure: + caption: National income identity residual + name: fig-identity-residual +--- +fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 4)) + +ax1.plot(t, sim["c_meas"] + sim["dk_meas"] - sim["y_meas"], lw=2) +ax1.axhline(0, color='black', lw=0.8, ls='--', alpha=0.5) +ax1.set_xlabel("time") +ax1.set_ylabel("measured residual") + +ax2.plot(t, sim["c_filt"] + sim["dk_filt"] - sim["y_filt"], lw=2) +ax2.axhline(0, color='black', lw=0.8, ls='--', alpha=0.5) +ax2.set_xlabel("time") +ax2.set_ylabel("filtered residual") + +plt.tight_layout() +plt.show() +``` + +As we have predicted, the residual for the measured data is large and volatile, while the residual for the filtered data is numerically 0. + +## Summary + +{cite:t}`Sargent1989` shows how measurement error alters an +econometrician's view of a permanent income economy driven by +the investment accelerator. + +The Wold representations and variance decompositions of Model 1 +(raw measurements) and Model 2 (filtered measurements) differ +substantially, even though the underlying economy is the same. + +Measurement error can reshape inferences about which shocks +drive which variables. + +Model 1 reproduces the **Granger causality** pattern documented in +the empirical accelerator literature: income appears to Granger-cause +consumption and investment, a result {cite:t}`Sargent1989` attributes +to measurement error and signal extraction in raw reported data. + +Model 2, working with filtered data, attributes nearly all variance +to the single structural shock $\theta_t$ and *cannot* reproduce +the Granger causality pattern. + +The {doc}`Kalman filter ` effectively strips measurement +noise from the data, so the filtered series track the truth closely. + +Raw measurement error breaks the national income accounting identity, +but the near-zero residual shows that the filter approximately +restores it.