%                       Omitted variables for SEM course. Based on a slide show from 2101f16, which continues with instrumental variables, going into measurement error too. I added the matrix version of instrumental variables in 2023.

% \documentclass[serif]{beamer} % Serif for Computer Modern math font.
\documentclass[serif, handout]{beamer} % Handout mode to ignore pause statements

\hypersetup{colorlinks,linkcolor=,urlcolor=red}

\usefonttheme{serif} % Looks like Computer Modern for non-math text -- nice!
\setbeamertemplate{navigation symbols}{} % Suppress navigation symbols
% \usetheme{Berlin} % Displays sections on top
% \usetheme{Frankfurt}  % Displays section titles on top: Fairly thin but still swallows some material at bottom of crowded slides
%\usetheme{Berkeley}
\usetheme{AnnArbor}  % CambridgeUS
% I'm using this one (yellow) just to be different from Dehan.

\usepackage[english]{babel}
\usepackage{amsmath} % for binom
\usepackage{comment}
% \usepackage{graphicx} % To include pdf files!
% \definecolor{links}{HTML}{2A1B81}
% \definecolor{links}{red}
\setbeamertemplate{footline}[frame number] 

\mode<presentation>
% \mode<handout>{\setbeamercolor{background canvas}{bg=black!5}}

% Comment this out for handout
\title{Omitted Variables\footnote{See last slide for copyright information.}}
\subtitle{STA431 Spring 2023}
\date{} % To suppress date

\begin{document}

\begin{frame}
  \titlepage
\end{frame}

\begin{frame}
\frametitle{Overview}
\tableofcontents
\end{frame}

\section{Omitted Variables}

\begin{frame}
\frametitle{A Practical Data Analysis Problem}
When more explanatory variables are added to a regression model and these additional explanatory variables are correlated with explanatory variables already in the model  (as they usually are in an observational study), \pause
\begin{itemize}
    \item Statistical significance can appear when it was not present originally. 
    \item Statistical significance that was originally present can disappear. \pause
    \item Even the signs of the $\widehat{\beta}$s can change, reversing the interpretation of how their variables are related to the response variable. 
\end{itemize}
\end{frame}

\begin{frame}
\frametitle{An extreme, artificial example}
\framesubtitle{To make a point} 
Suppose that in a certain population, the correlation between age and strength is $r=-0.93$. \pause
\begin{center}
\includegraphics[width=2.7in]{Chimps}
\end{center}
% R code is at the end of the document.
\end{frame}

\begin{frame}
\frametitle{The fixed $x$ regression model}

\begin{eqnarray*}
    Y_i &=& \beta_0 + \beta_1 x_{i,1} + \cdots + \beta_k x_{i,p-1} + \epsilon_i \\ 
        &=& \mathbf{x}_i^\top \boldsymbol{\beta} + \epsilon_i,
\end{eqnarray*}
%\vspace{0.5mm}

with  $\epsilon_1 \ldots, \epsilon_n \stackrel{i.i.d.}{\sim} N(0,\sigma^2)$. \pause 
\vspace{5mm}

  \begin{itemize}
    \item If viewed as conditional on $\mathcal{X}_i = \mathbf{x}_i$, this model implies independence of $\epsilon_i$ and $\mathcal{X}_i$,   
because the conditional distribution of $\epsilon_i$ given $\mathcal{X}_i = \mathbf{x}_i$ does not depend on $\mathbf{x}_i$.  \pause 
    \item What is $\epsilon_i$? \emph{Everything else} that affects $Y_i$.  \pause 
    \item So the usual model says that if the explanatory varables are random, they have \emph{zero covariance} with all other variables that are related to $Y_i$, but are not included in the model.   
    \item For observational data (no random assignment), this assumption is almost always violated.  \pause 
    \item Does it matter?
  \end{itemize}
\end{frame}

\begin{frame}
\frametitle{Example: $Y_i = \beta_0 + \beta_1 X_{i,1} + \beta_2 X_{i,2} + \beta_3 X_{i,3} + \epsilon_i$}
\framesubtitle{As usual, the explanatory variables are random.} \pause
Suppose that the variables $X_2$ and $X_3$ affect $Y$ and are correlated with $X_1$, but they are not part of the data set.  \pause

% Say X1 is alcohol consumption, X2 is age and X3 is income

\begin{center}
\includegraphics[width=3in]{OmittedPath1}
\end{center} 

\end{frame} 

\begin{frame}
\frametitle{Statement of the model}
\framesubtitle{The explanatory variables $X_2$ and $X_3$ influence $Y$ and are correlated with $X_1$, but they are not part of the data set.}    

The values of the response variable are generated as follows:
\begin{displaymath}
    Y_i = \beta_0 + \beta_1 X_{i,1} + \beta_2 X_{i,2} + \beta_3 X_{i,3} + \epsilon_i,
\end{displaymath}
independently for $i= 1, \ldots, n$, where $\epsilon_i \sim N(0,\sigma^2)$.  
The explanatory variables are random, with expected value and variance-covariance matrix 
\begin{displaymath}
    E\left( \begin{array}{c} X_{i,1} \\ X_{i,2} \\ X_{i,3} \end{array} \right) =
     \left( \begin{array}{c} \mu_1 \\ \mu_2 \\ \mu_3 \end{array} \right)
\mbox{ ~and~ } 
cov\left( \begin{array}{c} X_{i,1} \\ X_{i,2} \\ X_{i,3} \end{array} \right) =
 \left( \begin{array}{rrr}
\phi_{11} & \phi_{12} & \phi_{13} \\ 
          & \phi_{22} & \phi_{23} \\
          &           & \phi_{33}
\end{array} \right), 
\end{displaymath} 
where $\epsilon_i$ is independent of $X_{i,1}$, $X_{i,2}$ and $X_{i,3}$.  Values of the variables $X_{i,2}$ and $X_{i,3}$ are latent, and are not included in the data set.
\end{frame}

\begin{frame}
\frametitle{Absorb $X_2$ and $X_3$}
Since $X_2$ and $X_3$ are not observed, they are absorbed by the intercept and error term. 
\begin{columns}   % Use Beamer's columns to make narrower margins!
\column{1.1\textwidth}
  
{\small
\begin{eqnarray*}
    Y_i &=& \beta_0 + \beta_1 X_{i,1} + \beta_2 X_{i,2} + \beta_3 X_{i,3} 
    + \epsilon_i \\ \pause 
        &=& (\beta_0 + \beta_2\mu_2 + \beta_3\mu_3) + \beta_1 X_{i,1} + (\beta_2 X_{i,2} + \beta_3 X_{i,3} - \beta_2\mu_2 - \beta_3\mu_3 + \epsilon_i) \\ \pause 
        &=& \beta^\prime_0 + \beta_1 X_{i,1} + \epsilon^\prime_i.
\end{eqnarray*}
} % End size
\end{columns}
 \pause 

And,
\begin{eqnarray*}
    Cov(X_{i,1},{\color{blue}\epsilon^\prime_i}) 
&=& Cov(X_{i,1}, \, {\color{blue}\beta_2 X_{i,2} + \beta_3X_{i,3}  - \beta_2\mu_2 - \beta_3\mu_3 + \epsilon_i}) \\ \pause
&=& \beta_2Cov(X_{i,1},X_{i,2}) +\beta_3 Cov(X_{i,1},X_{i,3}) + Cov(X_{i,1},\epsilon_i) \\ \pause
&=& \beta_2\phi_{12} + \beta_3\phi_{13} \neq 0.
\end{eqnarray*} 
\end{frame}

\begin{frame}
\frametitle{The ``True" Regression Model}
\framesubtitle{Almost always closer to the truth than the usual model, for observational data}  \pause 
{\LARGE
\begin{displaymath}
     Y_i = \beta_0 + \beta_1 X_i + \epsilon_i,
\end{displaymath} 
 } % End Size
 
\vspace{5mm}

where $E(X_i)=\mu_x$, $Var(X_i)=\sigma^2_x$,  $E(\epsilon_i)=0$, $Var(\epsilon_i)=\sigma^2_\epsilon$, and 
{\color{red}$Cov(X_i,\epsilon_i)=c$}.

\vspace{5mm} \pause 

Under this model,
\begin{displaymath}
     \sigma_{xy} = Cov(X_i,Y_i) = Cov(X_i,\beta_0 + \beta_1 X_i + \epsilon_i) 
             = \beta_1 \sigma^2_x + c
\end{displaymath} 
\end{frame}

\begin{frame}
\frametitle{Estimate $\beta_1$ as usual with least squares}
\framesubtitle{Recall $Cov(X_i,Y_i) = \sigma_{xy} = \beta_1 \sigma^2_x + c$} \pause 

\begin{eqnarray*}
\widehat{\beta}_1 &=& \frac{\sum_{i=1}^n(X_i-\overline{X})(Y_i-\overline{Y})}
                           {\sum_{i=1}^n(X_i-\overline{X})^2}  \\  \pause 
                  &=& \frac{\frac{1}{n}\sum_{i=1}^n(X_i-\overline{X})(Y_i-\overline{Y})}
                           {\frac{1}{n}\sum_{i=1}^n(X_i-\overline{X})^2} \\ \pause 
                  &=& \frac{\widehat{\sigma}_{xy}}{\widehat{\sigma}^2_x} \pause
                      \stackrel{p}{\rightarrow}  \frac{\sigma_{xy}}{\sigma^2_x}\\ \pause 
                  &=& \frac{\beta_1 \sigma^2_x + c}{\sigma^2_x} \\ \pause 
                  &=& \beta_1 + \frac{c}{\sigma^2_x}
\end{eqnarray*}
\end{frame}

\begin{frame}
\frametitle{$\widehat{\beta}_1 \stackrel{p}{\rightarrow} \beta_1 + \frac{c}{\sigma^2_x}$} 
\framesubtitle{It converges to the wrong thing.} \pause 
\begin{itemize}
    \item $\widehat{\beta}_1$ is inconsistent. 
    \item For large samples it could be almost anything, depending on the value of $c$, the covariance between $X_i$ and $\epsilon_i$. \pause 
    \item Small sample estimates could be accurate, but only by chance.  \pause
    \item The only time $\widehat{\beta}_1$ behaves properly is when $c=0$. \pause 
    \item Test $H_0: \beta_1=0$: Probability of making a Type I error goes to one as $n \rightarrow \infty$. % \pause 
    % \item What if $\beta_1 < 0$ but $\beta_1 + \frac{c}{\sigma^2_x} > 0$,  \pause 
    %     and you test $H_0: \beta_1=0$?
\end{itemize}
\end{frame}

\begin{frame}
\frametitle{All this applies to multiple regression}
\framesubtitle{Of course} 

\emph{When a regression model fails to include all the explanatory variables that contribute to the response variable, and those omitted explanatory variables have non-zero covariance with variables that are in the model, the regression coefficients are inconsistent.  \pause

\vspace{4mm}
Estimation and inference are almost guaranteed to be misleading, especially for large samples.}
\end{frame}


\begin{frame}
\frametitle{Correlation-Causation}
\begin{itemize}
    \item The problem of omitted variables is a technical aspect of the correlation-causation issue.  
    \item The omitted variables are ``confounding" variables. \pause 
    \item With random assignment and good procedure, $x$ and $\epsilon$ have zero covariance. \pause 
    \item But random assignment is not always possible.  \pause 
    \item Most applications of regression to observational data provide very poor information about the regression coefficients.  
    \item Is bad information better than no information at all? 
\end{itemize}
\end{frame}



\begin{frame}
\frametitle{How about another estimation method?}
\framesubtitle{Other than ordinary least squares}  
\begin{itemize}
    \item Can \emph{any} other method be successful? \pause
    \item This is a very practical question, because almost all regressions with observational data have the disease. 
\end{itemize}
\end{frame}

\begin{frame}
\frametitle{For simplicity, assume normality}
\framesubtitle{$Y_i = \beta_0 + \beta_1 X_i + \epsilon_i$}   
\begin{itemize}
    \item Assume $(X_i,\epsilon_i)$ are bivariate normal.  \pause
    \item This makes $(X_i,Y_i)$ bivariate normal.  
    \item $(X_1,Y_1), \ldots, (X_n,Y_n) \stackrel{i.i.d.}{\sim} N_2(\boldsymbol{\mu},\boldsymbol{\Sigma})$,   \pause where
\end{itemize}
\begin{displaymath}
\boldsymbol{\mu} = \left( \begin{array}{c} \mu_1 \\ \mu_2 \end{array} \right) 
           = \left( \begin{array}{c} \mu_x \\ \beta_0+\beta_1\mu_x  \end{array} \right) 
\end{displaymath}  \pause
and
\begin{displaymath}
    \boldsymbol{\Sigma} = \left( \begin{array}{c c}
        \sigma_{11} & \sigma_{12}  \\
               & \sigma_{22}
        \end{array} \right)
      = \left( \begin{array}{c c}
        \sigma^2_x & \beta_1\sigma^2_x+c  \\
                  & \beta_1^2\sigma^2_x + 2 \beta_1c + \sigma^2_\epsilon
        \end{array} \right).
\end{displaymath}  \pause                    % Homework
\begin{itemize}
    \item All you can ever learn from the data are the approximate values of $\boldsymbol{\mu}$ and $\boldsymbol{\Sigma}$. \pause
    \item Even if you knew $\boldsymbol{\mu}$ and $\boldsymbol{\Sigma}$ exactly, could you know $\beta_1$?
\end{itemize}
\end{frame}

\begin{frame}
\frametitle{Five equations in six unknowns}
The parameter is
$\theta = (\mu_x, \sigma^2_x, \sigma^2_\epsilon, c, \beta_0, \beta_1)$. \pause
The distribution of the data is determined by 
\vspace{2mm}

{\footnotesize
\begin{displaymath}
             \left( \begin{array}{c} \mu_1 \\ \mu_2 \end{array} \right) 
           = \left( \begin{array}{c} \mu_x \\ \beta_0+\beta_1\mu_x  \end{array} \right) 
           ~~\mbox{ and }~~ 
        \left( \begin{array}{c c}
        \sigma_{11} & \sigma_{12}  \\
               & \sigma_{22}
        \end{array} \right)
      = \left( \begin{array}{c c}
        \sigma^2_x & \beta_1\sigma^2_x+c  \\
                  & \beta_1^2\sigma^2_x + 2 \beta_1c + \sigma^2_\epsilon
        \end{array} \right)
\end{displaymath}  \pause
} % End size
\begin{itemize}
    \item $\mu_x=\mu_1$ and $\sigma^2_x=\sigma_{11}$. \pause
    \item The remaining 3 equations in 4 unknowns have infinitely many solutions. \pause
    \item So infinitely many sets of parameter values yield the \emph{same distribution of the sample data}. \pause
    \item This is serious trouble -- lack of parameter identifiability. \pause
    \item \emph{Definition}: If a parameter is a function of the distribution of the observable data, it is said to be \emph{identifiable}.
\end{itemize}
\end{frame}

\begin{frame}
\frametitle{Showing identifiability}
%\framesubtitle{} 
 \emph{Definition}: If a parameter is a function of the distribution of the observable data, it is said to be identifiable.
\begin{itemize}
    \item How could a parameter be a function of a distribution? \pause
    \item $d \sim F_\theta$ and $\theta = g(F_\theta)$ \pause
    \item Usually $g$ is defined in terms of moments. \pause
    \item Example: $F_\theta(x) = 1-e^{-\theta x}$  and $f_\theta(x) = \theta e^{-\theta x}$ for $x>0$. \pause 
\end{itemize}
\begin{eqnarray*}
    f_\theta(x) & = & \frac{d}{dx} \, F_\theta(x) \\ \pause
    E(X)        & = & \int_0^\infty x  f_\theta(x) \, dx  \pause = \frac{1}{\theta} \\  \pause
    \theta      & = & \frac{1}{E(X)} \pause
\end{eqnarray*} 
Sometimes people use moment-generating functions or characteristic functions instead of just moments.
\end{frame}

\begin{frame}
\frametitle{Showing identifiability is like Method of Moments Estimation} \pause
%\framesubtitle{} 
\begin{itemize}
    \item The distribution of the data is always a function of the parameters. \pause
    \item The moments are always a function of the distribution of the data. \pause
    \item If the parameters can be expressed as a function of the moments, \pause
        \begin{itemize}
            \item Put hats on to obtain MOM estimates, \pause
            \item Or observe that the parameter is a function of the distribution, and so is identifiable.
        \end{itemize}
\end{itemize}
\end{frame}

\begin{frame}
\frametitle{Back to the five equations in six unknowns}
\framesubtitle{$Y_i = \beta_0 + \beta_1 X_i + \epsilon_i$}   \pause
$\mathbf{d}_i =  \left( \begin{array}{c} X_i \\ Y_i \end{array} \right) \sim N_2(\boldsymbol{\mu},\boldsymbol{\Sigma})$, where
\pause
\vspace{2mm}
\begin{displaymath}
\begin{array}{ccccl}
\boldsymbol{\mu} & = &  \left( \begin{array}{c} \mu_1 \\ \mu_2 \end{array} \right)
           & = &  \left( \begin{array}{c} \mu_x \\ \beta_0+\beta_1\mu_x  \end{array} \right) \\  \pause
           &&&& \\
\boldsymbol{\Sigma} & = &  \left( \begin{array}{c c}
                          \sigma_{11} & \sigma_{12}  \\
                          \cdot  & \sigma_{22}
                          \end{array} \right)   
           & = & \left( \begin{array}{c c}
                        \sigma^2_x & \beta_1\sigma^2_x+c  \\
                          \cdot    & \beta_1^2\sigma^2_x + 2 \beta_1c + \sigma^2_\epsilon
                        \end{array} \right) 
\end{array}  \pause
\end{displaymath}

\vspace{5mm}
We have expressed the moments in terms of the parameters, \pause but we can't solve for $\theta = (\mu_x, \sigma^2_x, \sigma^2_\epsilon, c, \beta_0, \beta_1)$.
\end{frame}

\begin{frame}
\frametitle{Skipping the High School algebra}
\framesubtitle{$\theta = (\mu_x, \sigma^2_x, \sigma^2_\epsilon, c, \beta_0, \beta_1)$}  \pause
\begin{itemize}
     \item For \emph{any} given $\boldsymbol{\mu}$ and $\boldsymbol{\Sigma}$, all the points in a one-dimensional subset of the 6-dimensional parameter space yield $\boldsymbol{\mu}$ and $\boldsymbol{\Sigma}$, and hence the same distribution of the sample data.  \pause
     \item In that subset, values of $\beta_1$ range from $-\infty$ to $-\infty$, \pause so $\boldsymbol{\mu}$ and $\boldsymbol{\Sigma}$ could have been produced by \emph{any} value of $\beta_1$. \pause
     \item There is no way to distinguish between the possible values of $\beta_1$ based on sample data. \pause
    \item The problem is fatal,  if all you can observe is $X$ and $Y$.   % There was originally a slide entitled "Details for the record" but it was needlessly complicated and actually wrong in one place. It is cleaner to parameterize in terms of c.
\end{itemize}
\end{frame}


\section{Instrumental Variables}

\begin{frame}
\frametitle{Instrumental Variables (Wright, 1928)}
\framesubtitle{A partial solution}  \pause

{\footnotesize
\begin{itemize}
    \item An instrumental variable is a variable that is correlated with an explanatory variable, but is not correlated with any error terms and has no direct connection to the response variable. 
\begin{center}
\includegraphics[width=1.5in]{InstruVar}
\end{center} 
    \item An instrumental variable is often not the main focus of attention; it's just a tool. \pause
    \item The usual definition is that conditionally on the $x$ variables, the instrumental variables are independent of all the other variables in the model. \pause
    \item In Econometrics, the instrumental variable usually \emph{influences} the explanatory variable. 
\end{itemize}
} % End size
\end{frame}

\begin{frame}
\frametitle{ Model One: A Simple Example}
What is the contribution of income to credit card debt?  \pause
\begin{displaymath}
     Y_i = \beta_0 + \beta_1 X_i + \epsilon_i,
\end{displaymath} 
where $E(X_i)=\mu_x$, $Var(X_i)=\sigma^2_x$,  $E(\epsilon_i)=0$, $Var(\epsilon_i)=\sigma^2_\epsilon$, and $Cov(X_i,\epsilon_i)=c$. 

\end{frame}

\begin{frame}
\frametitle{A path diagram of Model One} 
$Y_i = \alpha + \beta X_i + \epsilon_i$, where $E(X_i)=\mu$, $Var(X_i)=\sigma^2_x$,  $E(\epsilon_i)=0$, $Var(\epsilon_i)=\sigma^2_\epsilon$, and $Cov(X_i,\epsilon_i)=c$. 
\begin{center}
\includegraphics[width=3in]{OmittedPath2}
\end{center} 
The least squares estimate of $\beta$ is inconsistent, and so is every other possible estimate. \pause (This is strictly true if the data are normal.)
\end{frame}

\begin{frame}
\frametitle{Model Two: Add an instrumental variable} 
An instrumental variable for an explanatory variable is another random variable that has non-zero covariance with the explanatory variable, and \emph{no direct connection with any other variable in the model.}  \pause

\vspace{5mm}

% The standard notation for instrumental variables is Z. I'm fixing this in the text, and should do it here too. Use graphics files from the text.

Focus the study on real estate agents in many cities. Include median price of resale home.  

\begin{itemize}
    \item $X$ is income.
    \item $Y$ is credit card debt.
    \item $Z$ is median price of resale home. 
\end{itemize}\pause

\begin{eqnarray*}
    X_i & = & \alpha_1 + \beta_1Z_i +\epsilon_{i1} \\ 
    Y_i & = & \alpha_2 + \beta_2X_i +\epsilon_{i2}   
\end{eqnarray*} 
\end{frame}

\begin{frame}
\frametitle{Picture of Model Two}
\framesubtitle{$Z_i$ is median price of resale home, $X_i$ is income, $Y_i$ is credit card debt.} 
\begin{eqnarray*}
    X_i & = & \alpha_1 + \beta_1Z_i +\epsilon_{i1} \\ 
    Y_i & = & \alpha_2 + \beta_2X_i +\epsilon_{i2}   
\end{eqnarray*} 

\begin{center}
\includegraphics[width=4in]{InstruVar1}
\end{center} 
Main interest is in $\beta_2$.
\end{frame}


\begin{frame}
\frametitle{Statement of Model Two}
\framesubtitle{$Z_i$ is median price of resale home, $X_i$ is income, $Y_i$ is credit card debt.} 
\begin{eqnarray*}
    X_i & = & \alpha_1 + \beta_1Z_i + \epsilon_{i1} \\ 
    Y_i & = & \alpha_2 + \beta_2X_i + \epsilon_{i2},  
\end{eqnarray*} 
where
\begin{itemize}
    \item  $E(Z_i) = \mu_z$, $Var(Z_i) = \sigma^2_z$.
    \item $E(\epsilon_{i1}) = 0$, $Var(\epsilon_{i1}) = \sigma^2_1$.
    \item $E(\epsilon_{i2}) = 0$, $Var(\epsilon_{i2}) = \sigma^2_2$.
    \item $Cov(\epsilon_{i1},\epsilon_{i2}) = c$.
    \item $Z_i$ is independent of $\epsilon_{i1}$ and $\epsilon_{i2}$.
\end{itemize}
\end{frame}



\begin{frame}
\frametitle{Calculate the covariance matrix of the observable data for Model Two. Call it $\boldsymbol{\Sigma} = [\sigma_{ij}]$}    % Homework
%\framesubtitle{}

From $X_i = \alpha_1 + \beta_1Z_i +\epsilon_{i1}$ and $Y_i = \alpha_2 + \beta_2X_i +\epsilon_{i2}$, get the symmetric matrix
\vspace{5mm}

{\LARGE $\boldsymbol{\Sigma} =$}                      
\renewcommand{\arraystretch}{1.5}
\begin{tabular}{|c|ccc|}  \hline
    &   $X$             &             $Y$                  &       $Z$     \\ \hline
$X$ & $\beta_1^2\sigma^2_z+\sigma^2_1$ & $\beta_2(\beta_1^2\sigma^2_z+\sigma^2_1)+c$ & $\beta_1\sigma^2_z$ \\ 
$Y$ & $\cdot$ & $\beta_1^2\beta_2^2\sigma^2_z + \beta_2^2\sigma^2_1 + 2\beta_2c + \sigma^2_2$ & $\beta_1\beta_2\sigma^2_z$ \\  
$Z$ & $\cdot$ & $\cdot$ & $\sigma^2_z$ \\  \hline
\end{tabular} \pause 
\renewcommand{\arraystretch}{1.0}

\vspace{5mm}

\begin{displaymath}
    \beta_2 = \frac{\sigma_{23}}{\sigma_{13}}
\end{displaymath} 
\end{frame}


% STA2101f16 continues from here, starting with
% "But of course there is measurement error."

\begin{frame}
\frametitle{Parameter Estimation for Model Two}
\framesubtitle{$X_i = \alpha_1 + \beta_1Z_i +\epsilon_{i1}$ and $Y_i = \alpha_2 + \beta_2X_i +\epsilon_{i2}$}
 $\boldsymbol{\Sigma} =$
{\footnotesize
\renewcommand{\arraystretch}{1.5}
\begin{tabular}{|c|ccc|}  \hline
    &   $Z$             &             $X$                  &       $Y$     \\ \hline
$Z$ & $\sigma^2_w$      & $\beta_1\sigma^2_w$              & $\beta_1\beta_2\sigma^2_w$  \\
$X$ & $\cdot$           & $\beta_1^2\sigma^2_w+\sigma^2_1$ & $\beta_2(\beta_1^2\sigma^2_w+\sigma^2_1)+c$ \\ 
$Y$ & $\cdot$ & $\cdot$ & $\beta_1^2\beta_2^2\sigma^2_w + \beta_2^2\sigma^2_1 + 2\beta_2c + \sigma^2_2$ \\ 
\hline
\end{tabular} 
\renewcommand{\arraystretch}{1.0}
} % End size
\pause 

% \vspace{2mm}
\begin{itemize}
    \item $ \widehat{\beta}_2 = \frac{\widehat{\sigma}_{23}}{\widehat{\sigma}_{13}}$ 
    \item All the other parameters are identifiable too.   % Homework
    \item The instrumental variable saved us.  \pause
    \item There are 9 model parameters, and 9 moments in $\boldsymbol{\mu}$ and $\boldsymbol{\Sigma}$. 
    \item The invariance principle yields explicit formulas for the MLEs. 
    \item If the data are normal, MLEs equal the Method of Moments estimates because they are both 1-1 with the moments. 
    % Homework
\end{itemize}
\end{frame}

\begin{frame}
\frametitle{Model Three: Matrix Version of Instrumental Variables} 
\framesubtitle{The usual rule is at least one instrumental variable for each explanatory variable.} 
\begin{center}
\includegraphics[width=3in]{MatrixInstruVar}
\end{center}
\end{frame}


\begin{frame}
\frametitle{Model Three is a Multivariate Regression Model} 
\framesubtitle{$\mathbf{y}_i = \boldsymbol{\beta}_0 + \boldsymbol{\beta}_1 \mathbf{x}_i +   \boldsymbol{\epsilon}_i$} 
With these additional stipulations
\begin{itemize}
    \item $cov(\mathbf{x}_i,\boldsymbol{\epsilon}_i) = \pmb{\c{C}}$, a $p \times q$ matrix of covariances.
    \item There are at least $p$ instrumental variables. Put the best $p$ in the random vector $\mathbf{z}_i$.
    \item $cov(\mathbf{x}_i,\mathbf{z}_i)=$ ${\LARGE\boldsymbol{\kappa}}$, $p \times p$ matrix of covariances. Assume ${\LARGE\boldsymbol{\kappa}}$ has an inverse.
    \item  $cov(\mathbf{z}_i)=\boldsymbol{\Phi}_z$.
\end{itemize} % $E(\mathbf{z}_i)=\boldsymbol{\mu}_z$ and
\vspace{-5mm}
\begin{center}
\includegraphics[width=2in]{MatrixInstruVar}
\end{center}
\end{frame}


\begin{frame}
\frametitle{Moments for Model Three}
\framesubtitle{$\mathbf{y}_i = \boldsymbol{\beta}_0 + \boldsymbol{\beta}_1 \mathbf{x}_i +   \boldsymbol{\epsilon}_i$, ~ $cov(\mathbf{x}_i,\boldsymbol{\epsilon}_i) = \pmb{\c{C}}$, ~
$cov(\mathbf{x}_i,\mathbf{z}_i)=$ ${\Large\boldsymbol{\kappa}}$, ~
$cov(\mathbf{z}_i)=\boldsymbol{\Phi}_z$} 
{\small
\begin{displaymath}
    \boldsymbol{\mu} = 
E\left( \begin{array}{c} 
    \mathbf{x}_i \\ \hline  
    \mathbf{y}_i \\ \hline  
    \mathbf{z}_i   \end{array} \right) =
\left( \begin{array}{c} 
    \boldsymbol{\mu}_x \\ \hline  
    \boldsymbol{\beta}_0 + \boldsymbol{\beta}_1 \boldsymbol{\mu}_x \\ \hline  
    \boldsymbol{\mu}_z   \end{array} \right)
\end{displaymath} \pause


\begin{displaymath}
\renewcommand{\arraystretch}{1.5}
    \boldsymbol{\Sigma} = 
    cov\left( \begin{array}{c} 
    \mathbf{x}_i \\ \hline  
    \mathbf{y}_i \\ \hline  
    \mathbf{z}_i   \end{array} \right) = 
    \left( \begin{array}{c|c|c}
        \boldsymbol{\Phi}_x & \boldsymbol{\Phi}_x\boldsymbol{\beta}_1^\top + \pmb{\c{C}}& 
        {\Large\boldsymbol{\kappa}}  \\ \hline
        \boldsymbol{\beta}_1\boldsymbol{\Phi}_x + \pmb{\c{C}}^\top\ & \boldsymbol{\beta}_1\boldsymbol{\Phi}_x\boldsymbol{\beta}_1^\top +
                \boldsymbol{\beta}_1\pmb{\c{C}} + 
                \pmb{\c{C}}^\top\boldsymbol{\beta}_1^\top + \boldsymbol{\Psi} &
        \boldsymbol{\beta}_1 {\large\boldsymbol{\kappa}}  \\ \hline
        {\large\boldsymbol{\kappa}}^\top & 
{\large\boldsymbol{\kappa}}^\top \boldsymbol{\beta}_1^\top & \boldsymbol{\Phi}_z  
    \end{array} \right)
\renewcommand{\arraystretch}{1.0}
\end{displaymath} 
} % End size
\end{frame} 


\begin{frame}
\frametitle{Solve for the parameters from the moments} 
\framesubtitle{For Model Three} 
\begin{itemize}
    \item Proving identifiability, so consistent estimation is possible.
    \item Obtain method of moments estimators.
%    \item Especially for $\boldsymbol{\beta}_1$.
\end{itemize}
\end{frame}

\begin{frame}
\frametitle{Start with the covariance matrix $\boldsymbol{\Sigma}$}
\framesubtitle{Parameters in the covariance matrix are
$\boldsymbol{\Phi}_x$, $\boldsymbol{\beta}_1$, 
${\large\boldsymbol{\kappa}}$, $\pmb{\c{C}}$, 
$\boldsymbol{\Psi}$, $\boldsymbol{\Phi}_z$} 
\begin{displaymath}
\renewcommand{\arraystretch}{1.5}
    \left( \begin{array}{c|c|c}
    \boldsymbol{\Sigma}_{11} & \boldsymbol{\Sigma}_{12} &     
    \boldsymbol{\Sigma}_{13}  \\ \hline
    \cdot & \boldsymbol{\Sigma}_{21} & \boldsymbol{\Sigma}_{22} \\ \hline
    \cdot & \cdot & \boldsymbol{\Sigma}_{33}
    \end{array} \right) 
=   \left( \begin{array}{c|c|c}
        \boldsymbol{\Phi}_x & \boldsymbol{\Phi}_x\boldsymbol{\beta}_1^\top + \pmb{\c{C}}& 
        {\Large\boldsymbol{\kappa}}  \\ \hline
        \cdot & \boldsymbol{\beta}_1\boldsymbol{\Phi}_x\boldsymbol{\beta}_1^\top +
                \boldsymbol{\beta}_1\pmb{\c{C}} + 
                \pmb{\c{C}}^\top\boldsymbol{\beta}_1^\top + \boldsymbol{\Psi} &
        \boldsymbol{\beta}_1 {\large\boldsymbol{\kappa}}  \\ \hline
        \cdot & \cdot & \boldsymbol{\Phi}_z  
    \end{array} \right)
\renewcommand{\arraystretch}{1.0}
\end{displaymath} \vspace{7mm}

Six matrix equations in six unknowns.
\end{frame} 


\begin{frame}
\frametitle{Solutions}
%\framesubtitle{} 
Equations
{\footnotesize
\begin{displaymath}
\renewcommand{\arraystretch}{1.5}
    \left( \begin{array}{c|c|c}
    \boldsymbol{\Sigma}_{11} & \boldsymbol{\Sigma}_{12} &     
    \boldsymbol{\Sigma}_{13}  \\ \hline
    \boldsymbol{\Sigma}_{21} & \boldsymbol{\Sigma}_{22} & \boldsymbol{\Sigma}_{23} \\ \hline
    \boldsymbol{\Sigma}_{31} & \boldsymbol{\Sigma}_{32} & \boldsymbol{\Sigma}_{33}
    \end{array} \right) 
=       \left( \begin{array}{c|c|c}
        \boldsymbol{\Phi}_x & \boldsymbol{\Phi}_x\boldsymbol{\beta}_1^\top + \pmb{\c{C}} & 
        {\Large\boldsymbol{\kappa}}  \\ \hline
        \boldsymbol{\beta}_1\boldsymbol{\Phi}_x + \pmb{\c{C}}^\top & \boldsymbol{\beta}_1\boldsymbol{\Phi}_x\boldsymbol{\beta}_1^\top +
                \boldsymbol{\beta}_1\pmb{\c{C}} + 
                \pmb{\c{C}}^\top\boldsymbol{\beta}_1^\top + \boldsymbol{\Psi} &
        \boldsymbol{\beta}_1 {\large\boldsymbol{\kappa}}  \\ \hline
        {\large\boldsymbol{\kappa}}^\top & 
{\large\boldsymbol{\kappa}}^\top \boldsymbol{\beta}_1^\top & \boldsymbol{\Phi}_z  
    \end{array} \right)
\renewcommand{\arraystretch}{1.0}
\end{displaymath} 
} % End size
Solutions
\begin{eqnarray*}
    \boldsymbol{\Phi}_x & = & \boldsymbol{\Sigma}_{11} \\
    {\Large\boldsymbol{\kappa}}_{~} & = & \boldsymbol{\Sigma}_{13} \\
    \boldsymbol{\Phi}_z  & = & \boldsymbol{\Sigma}_{33} \\ \pause
    \boldsymbol{\beta}_1  & = &  \boldsymbol{\Sigma}_{23} \boldsymbol{\Sigma}_{13}^{-1} \\ 
    \pmb{\c{C}}_{~}  & = & \boldsymbol{\Sigma}_{12} - \boldsymbol{\Sigma}_{11} \boldsymbol{\Sigma}_{31}^{-1} \boldsymbol{\Sigma}_{32}     \\ 
    \boldsymbol{\Psi}_{~}  & = & \boldsymbol{\Sigma}_{22} - \boldsymbol{\Sigma}_{23} \boldsymbol{\Sigma}_{13}^{-1} \boldsymbol{\Sigma}_{12} - \boldsymbol{\Sigma}_{21} \boldsymbol{\Sigma}_{31}^{-1} \boldsymbol{\Sigma}_{32} 
    + \boldsymbol{\Sigma}_{23} \boldsymbol{\Sigma}_{13}^{-1} \boldsymbol{\Sigma}_{11} \boldsymbol{\Sigma}_{31}^{-1} \boldsymbol{\Sigma}_{32} 
\end{eqnarray*} 

\end{frame} 

\begin{frame}
\frametitle{Solve for intercepts and expected values}
%\framesubtitle{} 
Using
\begin{displaymath}
E\left( \begin{array}{c} 
    \mathbf{x}_i \\ \hline  
    \mathbf{y}_i \\ \hline  
    \mathbf{z}_i   \end{array} \right) =
\left( \begin{array}{c} 
    \boldsymbol{\mu}_1 \\ \hline  
    \boldsymbol{\mu}_2 \\ \hline  
    \boldsymbol{\mu}_3  \end{array} \right) = 
\left( \begin{array}{c} 
    \boldsymbol{\mu}_x \\ \hline  
    \boldsymbol{\beta}_0 + \boldsymbol{\beta}_1 \boldsymbol{\mu}_x \\ \hline  
    \boldsymbol{\mu}_z   \end{array} \right)
\end{displaymath} \pause

{\LARGE
\begin{eqnarray*}
    \boldsymbol{\mu}_x & = & \boldsymbol{\mu}_1 \\
    \boldsymbol{\mu}_z & = & \boldsymbol{\mu}_3 \\ \pause 
    \boldsymbol{\beta}_0 & = & \boldsymbol{\mu}_2 - \boldsymbol{\Sigma}_{23} 
                              \boldsymbol{\Sigma}_{13}^{-1} \boldsymbol{\mu}_1   
\end{eqnarray*} 
} % End size
\end{frame}






\begin{frame}
\frametitle{Full Solution for Model Three}
\framesubtitle{For the record} 

\begin{eqnarray*}
    \boldsymbol{\Phi}_x & = & \boldsymbol{\Sigma}_{11} \\
    {\Large\boldsymbol{\kappa}}_{~} & = & \boldsymbol{\Sigma}_{13} \\
    \boldsymbol{\Phi}_z  & = & \boldsymbol{\Sigma}_{33} \\ 
    \boldsymbol{\beta}_1  & = &  \boldsymbol{\Sigma}_{23} \boldsymbol{\Sigma}_{13}^{-1} \\ 
    \pmb{\c{C}}_{~}  & = & \boldsymbol{\Sigma}_{12} - \boldsymbol{\Sigma}_{11} \boldsymbol{\Sigma}_{31}^{-1} \boldsymbol{\Sigma}_{32}     \\ 
    \boldsymbol{\Psi}_{~}  & = & \boldsymbol{\Sigma}_{22} - \boldsymbol{\Sigma}_{23} \boldsymbol{\Sigma}_{13}^{-1} \boldsymbol{\Sigma}_{12} - \boldsymbol{\Sigma}_{21} \boldsymbol{\Sigma}_{31}^{-1} \boldsymbol{\Sigma}_{32} 
    + \boldsymbol{\Sigma}_{23} \boldsymbol{\Sigma}_{13}^{-1} \boldsymbol{\Sigma}_{11} \boldsymbol{\Sigma}_{31}^{-1} \boldsymbol{\Sigma}_{32} \\
    \boldsymbol{\mu}_x & = & \boldsymbol{\mu}_1 \\
    \boldsymbol{\mu}_z & = & \boldsymbol{\mu}_3 \\ 
    \boldsymbol{\beta}_0 & = & \boldsymbol{\mu}_2 - \boldsymbol{\Sigma}_{23} 
                              \boldsymbol{\Sigma}_{13}^{-1} \boldsymbol{\mu}_1
\end{eqnarray*} 
\end{frame} 



\begin{frame}
\frametitle{Method of Moments Estimators for Model Three}
\framesubtitle{Just put hats on, and estimate population means with sample means} 

\begin{eqnarray*}
    \widehat{\boldsymbol{\Phi}}_x & = & \widehat{\boldsymbol{\Sigma}}_{11} \\
    \widehat{{\Large\boldsymbol{\kappa}}}_{~} & = & \widehat{\boldsymbol{\Sigma}}_{13} \\
    \widehat{\boldsymbol{\Phi}}_z  & = & \widehat{\boldsymbol{\Sigma}}_{33} \\ 
    \widehat{\boldsymbol{\beta}}_1  & = &  \widehat{\boldsymbol{\Sigma}}_{23} \widehat{\boldsymbol{\Sigma}}_{13}^{-1} \\ 
    \widehat{\pmb{\c{C}}}_{~}  & = & \widehat{\boldsymbol{\Sigma}}_{12} - \widehat{\boldsymbol{\Sigma}}_{11} \widehat{\boldsymbol{\Sigma}}_{31}^{-1} \widehat{\boldsymbol{\Sigma}}_{32}     \\ 
    \widehat{\boldsymbol{\Psi}}_{~}  & = & \widehat{\boldsymbol{\Sigma}}_{22} - \widehat{\boldsymbol{\Sigma}}_{23} \widehat{\boldsymbol{\Sigma}}_{13}^{-1} \widehat{\boldsymbol{\Sigma}}_{12} - \widehat{\boldsymbol{\Sigma}}_{21} \widehat{\boldsymbol{\Sigma}}_{31}^{-1} \widehat{\boldsymbol{\Sigma}}_{32} 
    + \widehat{\boldsymbol{\Sigma}}_{23} \widehat{\boldsymbol{\Sigma}}_{13}^{-1} \widehat{\boldsymbol{\Sigma}}_{11} \widehat{\boldsymbol{\Sigma}}_{31}^{-1} \widehat{\boldsymbol{\Sigma}}_{32} \\
    \widehat{\boldsymbol{\mu}}_x & = & \overline{\mathbf{x}} \\
    \widehat{\boldsymbol{\mu}}_z & = & \overline{\mathbf{z}} \\ 
    \widehat{\boldsymbol{\beta}}_0 & = & \overline{\mathbf{y}} - \widehat{\boldsymbol{\Sigma}}_{23} 
                              \widehat{\boldsymbol{\Sigma}}_{13}^{-1} \overline{\mathbf{x}}
\end{eqnarray*} 
\end{frame}


\begin{frame}
\frametitle{Count the Parameters in Model Three} 
\framesubtitle{$\boldsymbol{\theta} = ( \boldsymbol{\mu}_x, \boldsymbol{\mu}_z, \boldsymbol{\beta}_0, \boldsymbol{\beta}_1, \boldsymbol{\Phi}_x, \boldsymbol{\Psi}, \boldsymbol{\Phi}_z, \pmb{\c{C}}, {\large\boldsymbol{\kappa}} )$} 
{\small
\begin{itemize}
    \item $\boldsymbol{\mu}_x$ is a $p \times 1$ vector of expected values. That's $p$ parameters.
    \item $\boldsymbol{\mu}_z$ is a $p \times 1$ vector of expected values. That's $p$ more parameters. 
    \item $\boldsymbol{\beta}_0$ is a $q \times 1$ vector of intercepts. That's $q$ parameters.
    \item $\boldsymbol{\beta}_1$ is a $p \times q$ matrix of regression coefficients. That's $pq$ parameters.
    \item $cov(\mathbf{x}_i)=\boldsymbol{\Phi}_x$ is a $p \times p$ covariance matrix, with $p(p+1)/2$ unique elements.
    \item $cov(\boldsymbol{\epsilon}_i)=\boldsymbol{\Psi}$ is a $q \times q$ covariance matrix, with $q(q+1)/2$ unique elements.
    \item $cov(\mathbf{z}_i)=\boldsymbol{\Phi}_z$ is a $p \times p$ covariance matrix, with $p(p+1)/2$ unique elements.
    \item $cov(\mathbf{x}_i,\boldsymbol{\epsilon}_i) = \pmb{\c{C}}$ is a $p \times q$ matrix of covariances. That's $pq$ more parameters.
    \item  $cov(\mathbf{x}_i,\mathbf{z}_i)=$ ${\LARGE\boldsymbol{\kappa}}$ is $p \times p$ matrix of covariances. That's $p^2$ more parameters. 
\end{itemize}
} % End size
\end{frame}

\begin{frame}
\frametitle{Counting} \pause
%\framesubtitle{} 
\begin{itemize}
    \item  Parameters
\begin{eqnarray*}
    && 2p + q + pq + \frac{p(p+1)}{2} + \frac{q(q+1)}{2} + \frac{p(p+1)}{2} + pq + p^2 \\
    &=&  {\color{red}3p + q + 2 p^{2} + 2 p q + \frac{q^{2}}{2} + \frac{q}{2} } 
\end{eqnarray*}    \pause

    \item  Moments
        \begin{itemize}
            \item There are $2p+q$ expected values in $\boldsymbol{\mu} = (\boldsymbol{\mu}_1 | \boldsymbol{\mu}_2 | \boldsymbol{\mu}_3)^\top$. \pause
            \item $\boldsymbol{\Sigma}$ has $2p+q$ rows and $2p+q$ columns, for $(2p+q)(2p+q+1)/2$ unique elements. \pause
            \item Total number of moments is ${\color{red}3p + q + 2 p^{2} + 2 p q + \frac{q^{2}}{2} + \frac{q}{2} }$.
        \end{itemize}
\end{itemize}
\end{frame}

\begin{frame}
\frametitle{Invariance} \pause
\framesubtitle{The MLE of a 1-1 function is that function of the MLE} 
\begin{itemize}
    \item Pretend the data $\mathbf{d}_i$ are multivariate normal.
    \item Unrestricted multivariate normal MLE of $(\boldsymbol{\mu},\boldsymbol{\Sigma})$ is $(\overline{\mathbf{d}}, \widehat{\boldsymbol{\Sigma}})$. \pause
    \item The moments $(\boldsymbol{\mu},\boldsymbol{\Sigma})$ are a function of the model parameters in $\boldsymbol{\theta}$. \pause
    \item By solving equations, we have shown that the models parameters are also a function of the moments and there are the same number of moments and model parameters.
    \item The function is one-to-one (injective). \pause
    \item By invariance, $(\widehat{\boldsymbol{\mu}},\widehat{\boldsymbol{\Sigma}}) \leftrightarrow \widehat{\boldsymbol{\theta}}$. 
    \item And the MOM estimates are also the MLEs.
\end{itemize}
\end{frame}

\begin{frame}
\frametitle{Explicit formulas for the Maximum Likelihood Estimators: Model Three}
% \framesubtitle{} 

\begin{eqnarray*}
    \widehat{\boldsymbol{\beta}}_0 & = & \overline{\mathbf{y}} - \widehat{\boldsymbol{\Sigma}}_{23} \widehat{\boldsymbol{\Sigma}}_{13}^{-1} \overline{\mathbf{x}} \\
    \widehat{\boldsymbol{\beta}}_1  & = &  \widehat{\boldsymbol{\Sigma}}_{23} \widehat{\boldsymbol{\Sigma}}_{13}^{-1} \\
    \widehat{\boldsymbol{\Psi}}_{~}  & = & \widehat{\boldsymbol{\Sigma}}_{22} - \widehat{\boldsymbol{\Sigma}}_{23} \widehat{\boldsymbol{\Sigma}}_{13}^{-1} \widehat{\boldsymbol{\Sigma}}_{12} - \widehat{\boldsymbol{\Sigma}}_{21} \widehat{\boldsymbol{\Sigma}}_{31}^{-1} \widehat{\boldsymbol{\Sigma}}_{32} 
    + \widehat{\boldsymbol{\Sigma}}_{23} \widehat{\boldsymbol{\Sigma}}_{13}^{-1} \widehat{\boldsymbol{\Sigma}}_{11} \widehat{\boldsymbol{\Sigma}}_{31}^{-1} \widehat{\boldsymbol{\Sigma}}_{32} \\
    \widehat{\pmb{\c{C}}}_{~}  & = & \widehat{\boldsymbol{\Sigma}}_{12} - \widehat{\boldsymbol{\Sigma}}_{11} \widehat{\boldsymbol{\Sigma}}_{31}^{-1} \widehat{\boldsymbol{\Sigma}}_{32}     \\ 
&&\\
    \widehat{\boldsymbol{\Phi}}_x & = & \widehat{\boldsymbol{\Sigma}}_{11} \\
    \widehat{{\Large\boldsymbol{\kappa}}}_{~} & = & \widehat{\boldsymbol{\Sigma}}_{13} \\
    \widehat{\boldsymbol{\Phi}}_z  & = & \widehat{\boldsymbol{\Sigma}}_{33} \\ 
    \widehat{\boldsymbol{\mu}}_x & = & \overline{\mathbf{x}} \\
    \widehat{\boldsymbol{\mu}}_z & = & \overline{\mathbf{z}} \\ 
\end{eqnarray*} 
\end{frame}

\begin{frame}
\frametitle{Multivariate Normal Likelihood}
\framesubtitle{$\mathbf{d}_1, \ldots, \mathbf{d}_n \stackrel{iid}{\sim} N(\boldsymbol{\mu},\boldsymbol{\Sigma})$} 
%{\LARGE
\begin{eqnarray*}
    L(\boldsymbol{\mu},\boldsymbol{\Sigma}) 
    & = & \prod_{i=1}^n\frac{1}{|\boldsymbol{\Sigma}|^{\frac{1}{2}} (2 \pi)^{\frac{p}{2}}} 
                \exp\left\{ -\frac{1}{2} (\mathbf{d}_i-\boldsymbol{\mu})^\top
                 \boldsymbol{\Sigma}^{-1}(\mathbf{d}_i-\boldsymbol{\mu})\right\} \\
    &&\\
    & = & |\boldsymbol{\Sigma}|^{-\frac{n}{2}} (2\pi)^{-\frac{np}{2}} \times \\
    && 
    \exp -\frac{n}{2}\left\{ tr(\boldsymbol{\widehat{\Sigma}\Sigma}^{-1}) +
    (\overline{\mathbf{d}}-\boldsymbol{\mu})^\top \boldsymbol{\Sigma}^{-1} 
    (\overline{\mathbf{d}}-\boldsymbol{\mu}) \right\},
\end{eqnarray*} 
where $\boldsymbol{\widehat{\Sigma}} = 
    \frac{1}{n}\sum_{i=1}^n (\mathbf{d}_i-\overline{\mathbf{d}}) 
                        (\mathbf{d}_i-\overline{\mathbf{d}})^\top $
%} % End size      
\end{frame}

% herehere p. 130           \pause

\begin{comment}

\end{comment}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% ${\LARGE\boldsymbol{\kappa}}$

\begin{frame}
\frametitle{Comments}
%\framesubtitle{} 
\begin{itemize}
    \item Instrumental variables are a great technical solution to the problem of omitted variables
    \item But good instrumental variables are not easy to find.  
    \item They will not just happen to be in the data set, except by a miracle. \pause
    \item They really have to come from another universe, but still have a strong and clear connection to the explanatory variable. 
    \item Data collection has to be \emph{planned}. \pause
    \item Wright's original example was tax policy for cooking oil. 
%    \item Time series applications are common in Econometrics, but not in this course. 
\end{itemize}
\end{frame}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


\begin{frame}
\frametitle{Copyright Information}

This slide show was prepared by  \href{http://www.utstat.toronto.edu/~brunner}{Jerry Brunner},
Department of Statistics, University of Toronto. It is licensed under a 
\href{http://creativecommons.org/licenses/by-sa/3.0/deed.en_US}
     {Creative Commons Attribution - ShareAlike 3.0 Unported License}. Use any part of it as you like and share the result freely. The \LaTeX~source code is available from the course website:
\href{http://www.utstat.toronto.edu/brunner/oldclass/431s23} {\footnotesize \texttt{http://www.utstat.toronto.edu/brunner/oldclass/431s23}}

\end{frame}


\end{document}

Omitted variables are a serious problem. Instrumental variables are a great solution, if you can find them. The usual rule is at least one instrumental variable for each explanatory variable in the model. Instrumental variables are standard in econometrics, and they are becoming more common in epidemiology.

There is a simpler solution to the problem of omitted variables, and it has a clear place in the discourse of science. Yes, science is like a conversation, or a set of conversations at a big party. The discussions travel down well-worn paths. Consider conversations about hockey, or marriage. 

Somebody publishes a study or give a talk, and someone in the audience objects. ``Did you control for age?" If the answer is no, then age was an omitted variable, and maybe there is reason to believe that it's responsible for the apparent connection between some explanatory variable and the response variable. In this case, the person giving the talk is embarrassed, and the next study will include age. It's an incremental process, but you can see it as grinding slowly toward the truth. 

However, what if including the omitted variable in the data set is not enough?

This would go well in the book.




%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle{Example}

\end{frame}

\begin{frame}
\frametitle{}
% \framesubtitle{}
\begin{itemize}
    \item 
    \item 
    \item 
\end{itemize}
\end{frame}

{\LARGE
\begin{displaymath}
    
\end{displaymath}
 } % End Size
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

# Chimps


source("https://www.utstat.toronto.edu/~brunner/openSEM/fun/rmvn.txt")
set.seed(9999)
n = 100 # Each
S0 = rbind(c(1.00,9.60),
           c(9.60,144.0))
chimps = rmvn(n,c(5.5,300),S0)
humans = rmvn(n,c(17.5,100),S0)
Age = c(chimps[,1],humans[,1])
Strength = c(chimps[,2],humans[,2])
plot(Age,Strength,pch=' ')
title("Age and Strength")
points(chimps,pch='*'); points(humans,pch='o')
# summary(lm(Strength~Age))
x = c(2,20); y = 383.8795 - 16.1797 * x
lines(x,y)
text(9,300,"Chimps",font=2); text(14,100,"Humans",font=2)





sem = 'http://www.utstat.toronto.edu/~brunner/openSEM/sage/sem.sage'
load(sem)
# In EsqVar, eta = beta eta + gamma xi,   with cov(xi) = Phi
######## First IV model, with no latent variables except errors ######## 
# xi' = (W,epsilon1,epsilon2) and eta' = (X,Y) 
B = ZeroMatrix(2,2) # beta
B[1,0] = var('beta2') ; B
P = ZeroMatrix(3,3) # Phi
P[0,0] = var('sigma00'); P[1,1] = var('sigma11'); P[2,2] = var('sigma22')
P[1,2] = var('c'); P[2,1] = var('c'); P
G = ZeroMatrix(2,3) # gamma
G[0,0] = var('beta1'); G[0,1] = 1; G[1,2] = 1; G
pickout = 3,1,2 # Indices of observable variables, order eta, xi
V = EqsVar(B,G,P,pickout); V
print(latex(V)) # Agrees with hand calculations


######## Second IV model, with true income ######## 
sem = 'http://www.utstat.toronto.edu/~brunner/openSEM/sage/sem.sage'
load(sem)
# In EsqVar, eta = beta eta + gamma xi,   with cov(xi) = Phi

# eta' = (T,X,Y) and xi' = (W,epsilon1,epsilon2,epsilon3)  
B = ZeroMatrix(3,3) # beta
B[1,0] = var('beta2') ; B[2,0] = var('beta3') ; B
P = ZeroMatrix(4,4) # Phi
P[0,0] = var('sigma00'); P[1,1] = var('sigma11')
P[2,2] = var('sigma22'); P[3,3] = var('sigma33');
P[2,3] = var('c'); P[3,2] = var('c'); P
G = ZeroMatrix(3,4) # gamma
G[0,0] = var('beta1'); G[0,1] = 1; G[1,2] = 1; G[2,3] = 1; G
pickout = 4,2,3 # Indices of observable variables, order eta, xi
V = EqsVar(B,G,P,pickout); V
print(latex(V)) # Search and replace notation

# Play with nasty
P = SymmetricMatrix(4,'sigma'); P[0,0]=var('sigma00')
P[0,1]=0; P[0,2]=0; P[0,3]=0
P[1,0]=0; P[2,0]=0; P[3,0]=0; P
V = EqsVar(B,G,P,pickout); V
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\left(\begin{array}{rrr}
\sigma_{00} & \beta_{1} \beta_{2} \sigma_{00} & \beta_{1}
\beta_{3} \sigma_{00} \\
\beta_{1} \beta_{2} \sigma_{00} & \beta_{1}^{2} \beta_{2}^{2}
\sigma_{00} + \beta_{2}^{2} \sigma_{11} + \sigma_{22} &
\beta_{1}^{2} \beta_{2} \beta_{3} \sigma_{00} + \beta_{2} \beta_{3}
\sigma_{11} + c \\
\beta_{1} \beta_{3} \sigma_{00} & \beta_{1}^{2} \beta_{2} \beta_{3}
\sigma_{00} + \beta_{2} \beta_{3} \sigma_{11} + c & \beta_{1}^{2}
\beta_{3}^{2} \sigma_{00} + \beta_{3}^{2} \sigma_{11} + \sigma_{33}
\end{array}\right)

######## Third IV model, with true income and debt, correlated errors ######## 
sem = 'http://www.utstat.toronto.edu/~brunner/openSEM/sage/sem.sage'
load(sem)
# In EsqVar, eta = beta eta + gamma xi,   with cov(xi) = Phi

# eta' = (Tx,Ty,X,Y) and xi' = (epsilon1,epsilon2,epsilon3,epsilon4,W)  
B = ZeroMatrix(4,4) # beta
B[1,0] = var('beta3') ; B[2,0] = var('beta2') ; B[3,1] = var('beta4'); B

P = SymmetricMatrix(5,'sigma'); P[4,4]=var('sigma00')
# No correlations between W and the errors
for j in interval(0,3):
    P[j,4] = 0
    P[4,j] = 0
P

G = ZeroMatrix(4,5) # gamma
G[0,0] = 1; G[0,4] = var('beta1'); G[1,1] = 1; G[2,3] = 1; G[3,2] = 1; G

pickout = 9,3,4 # Indices of observable variables, order eta, xi
V = EqsVar(B,G,P,pickout); V

print(latex(V)) # Search and replace sigma00

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\left(\begin{array}{ccc}
\sigma^2_w & \beta_{1} \beta_{2} \sigma^2_w & \beta_{1}
\beta_{3} \beta_{4} \sigma^2_w \\
\beta_{1} \beta_{2} \sigma^2_w & \beta_{1}^{2} \beta_{2}^{2}
\sigma^2_w + \beta_{2}^{2} \sigma_{11} + 2 \, \beta_{2} \sigma_{14} +
\sigma_{44} & \beta_{1}^{2} \beta_{2} \beta_{3} \beta_{4}
\sigma^2_w + \beta_{2} \beta_{3} \beta_{4} \sigma_{11} + \beta_{2}
\beta_{4} \sigma_{12} + \beta_{3} \beta_{4} \sigma_{14} + \beta_{2}
\sigma_{13} + \beta_{4} \sigma_{24} + \sigma_{34} \\
\beta_{1} \beta_{3} \beta_{4} \sigma^2_w & \beta_{1}^{2} \beta_{2}
\beta_{3} \beta_{4} \sigma^2_w + \beta_{2} \beta_{3} \beta_{4}
\sigma_{11} + \beta_{2} \beta_{4} \sigma_{12} + \beta_{3} \beta_{4}
\sigma_{14} + \beta_{2} \sigma_{13} + \beta_{4} \sigma_{24} +
\sigma_{34} & \beta_{1}^{2} \beta_{3}^{2} \beta_{4}^{2} \sigma^2_w
+ \beta_{3}^{2} \beta_{4}^{2} \sigma_{11} + 2 \, \beta_{3} \beta_{4}^{2}
\sigma_{12} + 2 \, \beta_{3} \beta_{4} \sigma_{13} + \beta_{4}^{2}
\sigma_{22} + 2 \, \beta_{4} \sigma_{23} + \sigma_{33}
\end{array}\right)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

sigma_{00}



OLD2
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


######## Second IV model, with true income ######## 
sem = 'http://www.utstat.toronto.edu/~brunner/openSEM/sage/sem.sage'
load(sem)
# In EsqVar, eta = beta eta + gamma xi,   with cov(xi) = Phi

# eta' = (T,X,Y) and xi' = (W,epsilon1,epsilon2,epsilon3)  
B = ZeroMatrix(3,3) # beta
B[1,0] = var('beta2') ; B[2,0] = var('beta3') ; B
P = ZeroMatrix(4,4) # Phi
P[0,0] = var('sigma00'); P[1,1] = var('sigma11')
P[2,2] = var('sigma22'); P[3,3] = var('sigma33');
P[2,3] = var('c'); P[3,2] = var('c'); P
G = ZeroMatrix(3,4) # gamma
G[0,0] = var('beta1'); G[0,1] = 1; G[1,2] = 1; G[2,3] = 1; G
pickout = 4,2,3 # Indices of observable variables, order eta, xi
V = EqsVar(B,G,P,pickout); V
print(latex(V)) # Search and replace notation

# Play with nasty
P = SymmetricMatrix(4,'sigma'); P[0,0]=var('sigma00')
P[0,1]=0; P[0,2]=0; P[0,3]=0
P[1,0]=0; P[2,0]=0; P[3,0]=0; P
V = EqsVar(B,G,P,pickout); V


\begin{eqnarray*}
    m_2    & = & \beta_0+\beta_1 m_1 \\
    v_{12} & = & \beta_1 v_{11} + c \\ 
    v_{22} & = & \beta_1^2\ v_{11} + 2 \beta_1c + \sigma^2_\epsilon
\end{eqnarray*}