% \documentclass[serif]{beamer} % Serif for Computer Modern math font. \documentclass[serif, handout]{beamer} % Handout mode to ignore pause statements \hypersetup{colorlinks,linkcolor=,urlcolor=red} % Uncomment next 2 lines instead of the first for article-style handout: % \documentclass[12pt]{article} % \usepackage{beamerarticle} \usefonttheme{serif} % Looks like Computer Modern for non-math text -- nice! \setbeamertemplate{navigation symbols}{} % Supress navigation symbols at bottom % \usetheme{Berlin} % Displays sections on top % \usetheme{Warsaw} % Displays sections on top \usetheme{Frankfurt} % Displays sections on top: Fairly thin but swallows some material at bottom of crowded slides \usepackage[english]{babel} \usepackage{tikz} % for tikzpicture \usepackage{comment} \setbeamertemplate{footline}[frame number] \mode % \mode{\setbeamercolor{background canvas}{bg=black!5}} \title{Random Explanatory Variables\footnote{See last slide for copyright information.}} \subtitle{STA302 Fall 2020} \date{} % To suppress date \begin{document} \begin{frame} \titlepage \end{frame} \begin{frame} \frametitle{Overview} \tableofcontents \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Preparation} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Preparation: Change of Variables Formula} \framesubtitle{$Y = g(X)$} \pause Two ways of writing the same thing: \vspace{10mm} \pause {\LARGE \begin{eqnarray*} E(Y) & = & \int y f_{_Y}(y) \, dy \\ E(g(X)) & = & \int g(x) f_{_X}(x) \, dx \end{eqnarray*} } % End size \end{frame} \begin{comment} \begin{displaymath} E(g(X)) = \int g(x) f_{_X}(x) \, dx \end{displaymath} \end{comment} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}{Preparation: Indicator functions} \framesubtitle{Conditional expectation and the Law of Total Probability} \pause $I_A(x)$ is the \emph{indicator function} for the set $A$. It is defined by \begin{displaymath} I_A(x) = \left\{ \begin{array}{ll} % ll means left left 1 & \mbox{for } x \in A \\ 0 & \mbox{for } x \notin A \end{array} \right. % Need that crazy invisible right period! \end{displaymath} \pause Also sometimes written $I(x \in A)$ \pause \begin{eqnarray*} E(I_A(X)) &=& \sum_x I_A(x) p(x) \pause = \sum_{x \in A} p(x) \pause \mbox{, or}\\ \pause & & \int_{-\infty}^\infty I_A(x) f(x) \, dx \pause = \int_A f(x) \, dx\\ \pause &&\\ &=& P\{ X \in A \} \end{eqnarray*} \pause So the expected value of an indicator is a probability. \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Applies to conditional probabilities too} \pause %\framesubtitle{} {\LARGE \begin{eqnarray*} E(I_A(X)|Y) &=& \sum_x I_A(x) p(x|Y) \mbox{, or}\\ \pause & & \int_{-\infty}^\infty I_A(x) f(x|Y) \, dx \\ \pause &&\\ &=& Pr\{ X \in A|Y\} \end{eqnarray*} } % End size \pause So the conditional expected value of an indicator is a \emph{conditional} probability. \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Double expectation} %\framesubtitle{} {\LARGE \begin{displaymath} E\left( X \right) = E\left( E[X|Y]\right) \pause = E(g(Y)) \end{displaymath} } % End size \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Showing $E\left( X \right) = E\left( E[X|Y]\right)$} \framesubtitle{Again note $E\left( E[X|Y]\right)$ is an example of $E(g(Y)$} \pause \begin{eqnarray*} E\left( E[X|Y]\right) & = & \int E[X|Y=y] f_y(y) \, dy \\ \pause & = & \int \left(\int x f_{x|y}(x|y) \, dx \right) f_y(y) \, dy \\ \pause & = & \int \left(\int x \frac{f_{x,y}(x,y)}{f_y(y)} \, dx \right) f_y(y) \, dy \\ \pause & = & \int \int x \, f_{x,y}(x,y) \, dx \, dy \\ \pause & = & E(h(X,Y)) \\ \pause & = & E(X) \end{eqnarray*} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Double expectation: $E\left( g(X) \right) = E\left( E[g(X)|Y]\right)$} %\framesubtitle{} \pause \begin{displaymath} E\left(E[I_A(X)|Y]\right) \pause = E[I_A(X)] \pause = Pr\{ X \in A\} \mbox{, so} \end{displaymath} \pause \begin{eqnarray*} Pr\{ X \in A\} &=& E\left(E[I_A(X)|Y]\right) \pause \\ &=& E\left(Pr\{ X \in A|Y\}\right) \pause \\ &=& \int_{-\infty}^\infty Pr\{ X \in A|Y=y\} f_y(y) \, dy \mbox{, or} \pause \\ & & \sum_y Pr\{ X \in A|Y=y\} p_y(y) \end{eqnarray*} \pause This is known as the \emph{Law of Total Probability} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Random Explanatory Variables} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Random Explanatory Variables} \pause %\framesubtitle{} Don't you think it’s strange? \pause \begin{itemize} \item In the general linear regression model, the $\mathbf{X}$ matrix is supposed to be full of fixed constants. \pause \item This is convenient mathematically. \pause Think of $E(\widehat{\boldsymbol{\beta}})$. \pause \item But in any non-experimental study, if you selected another sample you'd get different $\mathbf{X}$ values\pause, because of random sampling. \pause \item So $\mathbf{X}$ should be at least partly random variables, not fixed. \pause \item View the usual model as \emph{conditional} on $\mathcal{X}=\mathbf{X}$. \pause \item All the probabilities and expected values so far in this course are \emph{conditional} probabilities and \emph{conditional} expected values. \pause \item Conditional on $\mathcal{X}=\mathbf{X}$. \pause \item We don't want to stop there. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{$\widehat{\boldsymbol{\beta}}$ is (conditionally) unbiased} %\framesubtitle{} {\Large \begin{displaymath} E(\widehat{\boldsymbol{\beta}}|\mathcal{X}=\mathbf{X}) = \boldsymbol{\beta} \pause \end{displaymath} } % End size For \emph{any} fixed $\mathbf{X}$ \pause with linearly independent columns. \vspace{5mm} \pause It's \emph{unconditionally} unbiased too. \pause \vspace{5mm} {\Large \begin{displaymath} E\{\widehat{\boldsymbol{\beta}}\} \pause = E\{E\{\widehat{\boldsymbol{\beta}}|\mathcal{X}\}\} \pause = E\{\boldsymbol{\beta}\} \pause = \boldsymbol{\beta} \end{displaymath} } % End size \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Perhaps Clearer} %\framesubtitle{} \begin{eqnarray*} E\{\widehat{\boldsymbol{\beta}}\} \pause &=& E\{E\{\widehat{\boldsymbol{\beta}}|\mathcal{X}\}\} \\ \pause &=& \int \cdots \int E\{\widehat{\boldsymbol{\beta}}|\mathcal{X}=\mathbf{X}\} \, f(\mathbf{X}) \, d\mathbf{X} \\ \pause &=& \int \cdots \int \boldsymbol{\beta} \, f(\mathbf{X}) \, d\mathbf{X} \\ \pause &=& \boldsymbol{\beta} \int \cdots \int f(\mathbf{X})\, d\mathbf{X} \\ \pause &=& \boldsymbol{\beta} \cdot 1 = \boldsymbol{\beta}. \end{eqnarray*} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Conditional size $\alpha$ test, Critical value $f_\alpha$} \pause %\framesubtitle{} {\LARGE \begin{displaymath} Pr\{F > f_\alpha | \mathcal{X}=\mathbf{X} \} = \alpha \end{displaymath} \pause } % End size % \vspace{3mm} \begin{eqnarray*} Pr\{F > f_\alpha \} &=& \int \cdots \int Pr\{F > f_\alpha | \mathcal{X}=\mathbf{X} \} f(\mathbf{X})\, d\mathbf{X} \\ \pause &=& \int \cdots \int \alpha f(\mathbf{X})\, d\mathbf{X} \\ \pause &=& \alpha \int \cdots \int f(\mathbf{X})\, d\mathbf{X} \\ \pause &=& \alpha \end{eqnarray*} \pause A similar calculation applies to confidence intervals and prediction intervals. \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{The moral of the story} \pause %\framesubtitle{} \begin{itemize} \item Don't worry. \pause \item Even though the independent variables are often random, we can apply the usual fixed $\mathbf{X}$ model without fear. \pause \item Estimators are still unbiased. \pause \item Tests have the right Type I error probability. \pause \item Confidence intervals and prediction intervals are still correct. \pause \item And it's all distribution-free with respect to $\mathbf{X}$. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Copyright Information} This slide show was prepared by \href{http://www.utstat.toronto.edu/~brunner}{Jerry Brunner}, Department of Statistics, University of Toronto. It is licensed under a \href{http://creativecommons.org/licenses/by-sa/3.0/deed.en_US} {Creative Commons Attribution - ShareAlike 3.0 Unported License}. Use any part of it as you like and share the result freely. The \LaTeX~source code is available from the course website: \href{http://www.utstat.toronto.edu/~brunner/oldclass/302f20} {\small\texttt{http://www.utstat.toronto.edu/$^\sim$brunner/oldclass/302f20}} \end{frame} \end{document}