% Omitted variables for STA302 % \documentclass[serif]{beamer} % Serif for Computer Modern math font. \documentclass[serif, handout]{beamer} % Handout mode to ignore pause statements \hypersetup{colorlinks,linkcolor=,urlcolor=red} \usefonttheme{serif} % Looks like Computer Modern for non-math text -- nice! \setbeamertemplate{navigation symbols}{} % Suppress navigation symbols % \usetheme{Berlin} % Displays sections on top \usetheme{Frankfurt} % Displays section titles on top: Fairly thin but still swallows some material at bottom of crowded slides %\usetheme{Berkeley} \usepackage[english]{babel} \usepackage{amsmath} % for binom % \usepackage{graphicx} % To include pdf files! % \definecolor{links}{HTML}{2A1B81} % \definecolor{links}{red} \setbeamertemplate{footline}[frame number] \mode % \mode{\setbeamercolor{background canvas}{bg=black!5}} % Comment this out for handout \title{Omitted Variables\footnote{See last slide for copyright information.}} \subtitle{STA305 Fall 2016} \date{} % To suppress date \begin{document} \begin{frame} \titlepage \end{frame} \begin{frame} \frametitle{The fixed $x$ regression model} $y_i = \beta_0 + \beta_1 x_{i,1} + \cdots + \beta_k x_{i,k} + \epsilon_i, \mbox{ with } \epsilon_i \sim N(0,\sigma^2)$ \pause \vspace{15mm} Think of the model as \emph{conditional} given $\mathbf{X}_i = \mathbf{x}_i$. \end{frame} \begin{frame} \frametitle{Independence of $\epsilon_i$ and $\mathbf{X}_i$} \pause \begin{itemize} \item The statement $\epsilon_i \sim N(0,\sigma^2)$ is a statement about the \emph{conditional} distribution of $\epsilon_i$ given $\mathbf{X}_i$. \pause \item It says the density of $\epsilon_i$ given $\mathbf{X}_i$ does not depend on $\mathbf{X}_i$. \pause \item For convenience, assume $\mathbf{X}_i$ has a density. \pause \end{itemize} % \vspace{5mm} \begin{eqnarray*} & & f_{\epsilon|\mathbf{x}}(\epsilon|\mathbf{X}) = f_{\epsilon}(\epsilon) \\ \pause & \Rightarrow & \frac{f_{\epsilon,\mathbf{x}}(\epsilon,\mathbf{x})}{f_{\mathbf{x}}(\mathbf{X})} = f_{\epsilon}(\epsilon) \\ \pause & \Rightarrow & f_{\epsilon,\mathbf{x}}(\epsilon,\mathbf{X}) = f_{\mathbf{x}}(\mathbf{X}) f_{\epsilon}(\epsilon) \end{eqnarray*} \pause Independence! \end{frame} \begin{frame} \frametitle{The fixed $x$ regression model} \begin{displaymath} y_i = \beta_0 + \beta_1 x_{i,1} + \cdots + \beta_k x_{i,p-1} + \epsilon_i, \mbox{ with } \epsilon_i \sim N(0,\sigma^2) \end{displaymath} \pause \vspace{5mm} \begin{itemize} \item If viewed as conditional on $\mathbf{x}_i$, this model implies independence of $\epsilon_i$ and $\mathbf{x}_i$, because the conditional distribution of $\epsilon_i$ given $\mathbf{x}_i$ does not depend on $\mathbf{x}_i$. \pause \item What is $\epsilon_i$? \emph{Everything else} that affects $y_i$. \pause \item So the usual model says that if the independent varables are random, they have \emph{zero covaiance} with all other variables that are related to $y_i$, but are not included in the model. \pause \item For observational data, this assumption is almost always violated. \pause \item Does it matter? \end{itemize} \end{frame} \begin{frame} \frametitle{Example} Suppose that the variables $X_2$ and $X_3$ have an impact on $Y$ and are correlated with $X_1$, but they are not part of the data set. \pause The values of the dependent variable are generated as follows: \pause \begin{displaymath} y_i = \beta_0 + \beta_1 X_{i,1} + \beta_2 X_{i,2} + \beta_2 X_{i,3} + \epsilon_i, \end{displaymath} independently for $i= 1, \ldots, n$, where $\epsilon_i \sim N(0,\sigma^2)$. \pause The independent variables are random, with expected value and variance-covariance matrix \pause \begin{displaymath} E\left( \begin{array}{c} X_{i,1} \\ X_{i,2} \\ X_{i,3} \end{array} \right) = \left( \begin{array}{c} \mu_1 \\ \mu_2 \\ \mu_3 \end{array} \right) \mbox{ ~and~ } V\left( \begin{array}{c} X_{i,1} \\ X_{i,2} \\ X_{i,3} \end{array} \right) = \left( \begin{array}{rrr} \phi_{11} & \phi_{12} & \phi_{13} \\ & \phi_{22} & \phi_{23} \\ & & \phi_{33} \end{array} \right), \end{displaymath} \pause where $\epsilon_i$ is statistically independent of $X_{i,1}$, $X_{i,2}$ and $X_{i,3}$. \end{frame} \begin{frame} \frametitle{Absorb $X_2$ and $X_3$} \begin{columns} % Use Beamer's columns to make narrower margins! \column{1.1\textwidth} Since $X_2$ and $X_3$ are not observed, they are absorbed by the intercept and error term. \pause {\small \begin{eqnarray*} y_i &=& \beta_0 + \beta_1 X_{i,1} + \beta_2 X_{i,2} + \beta_2 X_{i,3} + \epsilon_i \\ \pause &=& (\beta_0 + \beta_2\mu_2 + \beta_3\mu_3) + \beta_1 X_{i,1} + (\beta_2 X_{i,2} + \beta_3 X_{i,3} - \beta_2\mu_2 - \beta_3\mu_3 + \epsilon_i) \\ \pause &=& \beta^*_0 + \beta_1 X_{i,1} + \epsilon^*_i. \end{eqnarray*} } % End size \pause And, \begin{displaymath} Cov(X_{i,1},\epsilon^*_i) = \beta_2\phi_{12} + \beta_3\phi_{13} \neq 0 \end{displaymath} \end{columns} \end{frame} \begin{frame} \frametitle{The ``True" Model} \framesubtitle{Almost always closer to the truth than the usual model, for observational data} \pause {\LARGE \begin{displaymath} y_i = \beta_0 + \beta_1 x_i + \epsilon_i, \end{displaymath} } % End Size \vspace{5mm} where $E(x_i)=\mu_x$, $Var(x_i)=\sigma^2_x$, $E(\epsilon_i)=0$, $Var(\epsilon_i)=\sigma^2_\epsilon$, and $Cov(x_i,\epsilon_i)=c$. \vspace{5mm} \pause Under this model, \begin{displaymath} \sigma_{xy} = Cov(x_i,y_i) = Cov(x_i,\beta_0 + \beta_1 x_i + \epsilon_i) = \beta_1 \sigma^2_x + c \end{displaymath} \end{frame} \begin{frame} \frametitle{Estimate $\beta_1$ as usual} \begin{eqnarray*} \widehat{\beta}_1 &=& \frac{\sum_{i=1}^n(x_i-\overline{X})(y_i-\overline{Y})} {\sum_{i=1}^n(x_i-\overline{X})^2} \\ \pause &=& \frac{\frac{1}{n}\sum_{i=1}^n(x_i-\overline{X})(y_i-\overline{Y})} {\frac{1}{n}\sum_{i=1}^n(x_i-\overline{X})^2} \\ \pause &=& \frac{\widehat{\sigma}_{xy}}{\widehat{\sigma}^2_x} \\ \pause &\stackrel{a.s.}{\rightarrow}& \frac{\sigma_{xy}}{\sigma^2_x}\\ \pause &=& \frac{\beta_1 \sigma^2_x + c}{\sigma^2_x} \\ \pause &=& \beta_1 + \frac{c}{\sigma^2_x} \end{eqnarray*} \end{frame} \begin{frame} \frametitle{$\widehat{\beta}_1 \stackrel{a.s.}{\rightarrow} \beta_1 + \frac{c}{\sigma^2_x}$} \pause \begin{itemize} \item $\widehat{\beta}_1$ is biased (Exercise) \pause \item It's inconsistent. \pause \item It could be almost anything, depending on the value of $c$, the covariance between $x_i$ and $\epsilon_i$. \pause \item The only time $\widehat{\beta}_1$ behaves properly is when $c=0$. \pause \item Test $H_0: \beta_1=0$: Probability of Type I error goes almost surely to one. \pause \item What if $\beta_1 < 0$ but $\beta_1 + \frac{c}{\sigma^2_x} > 0$, \pause and you test $H_0: \beta_1=0$? \end{itemize} \end{frame} \begin{frame} \frametitle{All this applies to multiple regression} \framesubtitle{Of course} \emph{When a regression model fails to include all the independent variables that contribute to the dependent variable, and those omitted independent variables have non-zero covariance with variables that are in the model, the regression coefficients are biased and inconsistent}. \end{frame} \begin{frame} \frametitle{Correlation-Causation} \begin{itemize} \item The problem of omitted variables is the technical version of the correlation-causation issue. \pause \item The omitted variables are ``confounding" variables. \pause \item With random assignment and good procedure, $x$ and $\epsilon$ have zero covariance. \pause \item But random assignment is not always possible. \pause \item Most applications of regression to observational data provide very poor information about the regression coefficients. \pause \item Is bad information better than no information at all? \pause \item Ultimately the solution is better data -- \emph{different} data. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Copyright Information} This slide show was prepared by \href{http://www.utstat.toronto.edu/~brunner}{Jerry Brunner}, Department of Statistics, University of Toronto. It is licensed under a \href{http://creativecommons.org/licenses/by-sa/3.0/deed.en_US} {Creative Commons Attribution - ShareAlike 3.0 Unported License}. Use any part of it as you like and share the result freely. The \LaTeX~source code is available from the course website: \href{http://www.utstat.toronto.edu/~brunner/oldclass/302f16} {\small\texttt{http://www.utstat.toronto.edu/$^\sim$brunner/oldclass/302f16}} \end{frame} \end{document} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Example} \end{frame} \begin{frame} \frametitle{} % \framesubtitle{} \begin{itemize} \item \item \item \end{itemize} \end{frame} {\LARGE \begin{displaymath} \end{displaymath} } % End Size %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%