% \documentclass[serif]{beamer} % Serif for Computer Modern math font. \documentclass[serif, handout]{beamer} % Handout mode to ignore pause statements \hypersetup{colorlinks,linkcolor=,urlcolor=red} \usefonttheme{serif} % Looks like Computer Modern for non-math text -- nice! \setbeamertemplate{navigation symbols}{} % Supress navigation symbols at bottom \usetheme{AnnArbor} % CambridgeUS % I'm using this one (yellow) just to be different from Dehan. % \usetheme{Berlin} % Displays sections on top % \usetheme{Warsaw} % Displays sections on top % \usetheme{Frankfurt} % Displays sections on top: Fairly thin but swallows some material at bottom of crowded slides \usepackage[english]{babel} \usepackage{tikz} % for tikzpicture \usepackage{comment} \usepackage{alltt} \setbeamertemplate{footline}[frame number] \mode % \mode{\setbeamercolor{background canvas}{bg=black!5}} \title{Random Explanatory variables\footnote{See last slide for copyright information.}} \subtitle{STA 431 Spring 2023} \date{} % To suppress date \begin{document} \begin{frame} \titlepage \end{frame} \begin{frame} \frametitle{Overview} \tableofcontents \end{frame} \section{Preparation} \begin{frame} \frametitle{Change of Variables} \framesubtitle{A big theorem} \begin{eqnarray*} E(g(X)) &=& \sum_x g(x) \, p_{_X}(x) \\ \pause E(g(\mathbf{x})) &=& \sum_{x_1} \cdots \sum_{x_p} g(x_1, \ldots, x_p) \, p_{\mathbf{x}}(x_1, \ldots, x_p) \\ \pause E(g(X)) &=& \int_{-\infty}^\infty g(x) \, f_{_X}(x) \, dx \\ \pause E(g(\mathbf{x})) &=& \int_{-\infty}^\infty \cdots \int_{-\infty}^\infty g(x_1, \ldots, x_p) \, f_{\mathbf{x}}(x_1, \ldots, x_p) \, dx_1 \ldots dx_p \end{eqnarray*} \end{frame} \begin{frame}{Indicator functions} \framesubtitle{Conditional expectation and the Law of Total Probability} $I_A(x)$ is the \emph{indicator function} for the set $A$. It is defined by \begin{displaymath} I_A(x) = \left\{ \begin{array}{ll} % ll means left left 1 & \mbox{for } x \in A \\ 0 & \mbox{for } x \notin A \end{array} \right. % Need that crazy invisible right period! \end{displaymath} Also sometimes written $I(x \in A)$ \pause \begin{eqnarray*} E(g(X)) &=& E(I_A(X)) \\ \pause &=& \sum_x I_A(x) p(x) \mbox{, or}\\ & & \int_{-\infty}^\infty I_A(x) f(x) \, dx \\ \pause &&\\ &=& P\{ X \in A \} \end{eqnarray*} So the expected value of an indicator is a probability. \end{frame} \begin{frame} \frametitle{Applies to conditional probabilities too} \framesubtitle{$Y$ given $X$, like regression} {\LARGE \begin{eqnarray*} E(I_A(Y)|X=x) &=& \sum_y I_A(y) p(y|x) \mbox{, or}\\ & & \int_{-\infty}^\infty I_A(y) f(y|x) \, dy \\ &&\\ &=& Pr\{ Y \in A|X=x\} \end{eqnarray*} } % End size So the conditional expected value of an indicator is a \emph{conditional} probability. \end{frame} \begin{frame} \frametitle{Double expectation} %\framesubtitle{} {\LARGE \begin{eqnarray*} E\left( Y \right) &=& E\left\{ E(Y|X)\right\} \\ \pause &=& E_{\color{red}x}\left\{ E_y(Y|{\color{red}X})\right\} \\ &=& E_{\color{red}x}\left\{ g({\color{red}X})\right\} \end{eqnarray*} } % End size \end{frame} \begin{frame} \frametitle{Showing $E\left( Y \right) = E\left\{ E(Y|X)\right\}$} \framesubtitle{Again note $E\left\{ E(Y|X)\right\}$ is an example of $E(g(X))$} \begin{eqnarray*} E\left\{ E(Y|X)\right\} & = & \int E[Y|X=x] f_x(x) \, dx \\ & = & \int \left(\int y f_{y|x}(y|x) \, dy \right) f_x(x) \, dx \\ & = & \int \left(\int y \frac{f_{x,y}(x,y)}{f_x(x)} \, dy \right) f_x(x) \, dx \\ & = & \int \int y \, f_{x,y}(x,y) \, dy \, dx \\ & = & E(Y) \end{eqnarray*} \end{frame} \begin{frame} \frametitle{Double expectation: $E\left( Y \right) = E\left\{ E(Y|X)\right\}$} %\framesubtitle{} \begin{eqnarray*} Pr\{ Y \in A\} &=& E\left(E[I_A(Y)|X]\right) \\ &=& E\left(Pr\{ Y \in A|X\}\right) \\ \pause &&\\ &=& \int_{-\infty}^\infty Pr\{ X \in A|X=x\} f_x(x) \, dx \mbox{, or} \\ & & \sum_x Pr\{ Y \in A|X=x\} p_x(x) \end{eqnarray*} \vspace{4mm} This is known as the \emph{Law of Total Probability} \end{frame} \section{Random Explanatory Variables} \begin{frame} % \frametitle{Little example} %\framesubtitle{} \begin{center} {\LARGE Random Explanatory Variables \\ \vspace{2mm} in Regression} \end{center} \end{frame} \begin{frame} \frametitle{Example: Multivariate Regression} \framesubtitle{These are all vectors and matrices.} Independently for $i=1, \ldots, n$, \begin{displaymath} \mathbf{y}_i = \boldsymbol{\beta}_0 + \boldsymbol{\beta}_1 \mathbf{x}_i + \boldsymbol{\epsilon}_i, \mbox{ where} \end{displaymath} {\footnotesize \begin{itemize} \item $\mathbf{y}_i$ is an $q \times 1$ random vector of observable response variables, so the regression is multivariate; there are $q$ response variables. \item $\mathbf{x}_i$ is a $p \times 1$ observable random vector; there are $p$ explanatory variables. $E(\mathbf{x}_i) = \boldsymbol{\mu}_x$ and $cov(\mathbf{x}_i) = \boldsymbol{\Phi}_{p \times p}$. The vector $\boldsymbol{\mu}_x$ and the matrix $\boldsymbol{\Phi}$ are unknown parameters. \item $\boldsymbol{\beta}_0$ is a $q \times 1$ vector of unknown constants. \item $\boldsymbol{\beta}_1$ is a $q \times p$ matrix of unknown constants. These are the regression coefficients, with one row for each response variable and one column for each explanatory variable. \item $\boldsymbol{\epsilon}_i$ is a $q \times 1$ unobservable random vector with expected value zero and unknown variance-covariance matrix $cov(\boldsymbol{\epsilon}_i) = \boldsymbol{\Psi}_{q \times q}$. \item $\boldsymbol{\epsilon}_i$ is independent of $\mathbf{x}_i$. \end{itemize} } % End size \end{frame} \begin{frame} \frametitle{Three explanatory variables and two response variables} % \framesubtitle{} \begin{center} \includegraphics[width=4in]{RegressionPath2} \end{center} \end{frame} \begin{frame} \frametitle{Regression Equations} % \framesubtitle{That's matrix form} {\footnotesize In scalar form, \begin{eqnarray*} y_{i,1} & = & \beta_{1,0} + \beta_{1,1}x_{i,1} + \beta_{1,2}x_{i,2} + \beta_{1,3}x_{i,3} + \epsilon_{i,1} \\ y_{i,2} & = & \beta_{2,0} + \beta_{2,1}x_{i,1} + \beta_{2,2}x_{i,2} + \beta_{2,3}x_{i,3} + \epsilon_{i,2} \\ \end{eqnarray*} In matrix form, \begin{displaymath} \begin{array}{cccccccc} \mathbf{y}_i &=& \boldsymbol{\beta}_0 &+& \boldsymbol{\beta}_1 & \mathbf{x}_i &+& \boldsymbol{\epsilon}_i \\ & & & & & & & \\ \left( \begin{array}{c} y_{i,1} \\ y_{i,2} \end{array} \right) &=& \left( \begin{array}{c} \beta_{1,0} \\ \beta_{2,0} \end{array} \right) &+& \left( \begin{array}{ccc} \beta_{1,1} & \beta_{1,2} & \beta_{1,3} \\ \beta_{2,1} & \beta_{2,2} & \beta_{2,3} \\ \end{array} \right) & \left( \begin{array}{c} x_{i,1} \\ x_{i,2} \\ x_{i,3} \end{array} \right) &+& \left( \begin{array}{c} \epsilon_{i,1} \\ \epsilon_{i,2} \end{array} \right) \\ \end{array} \end{displaymath} } % End size \end{frame} \begin{frame}[fragile] \frametitle{Simulate Multivariate Regression} \pause %\framesubtitle{} {\footnotesize % or scriptsize % The alltt environment requires \usepackage{alltt} \begin{alltt} {\color{blue}> # Set parameter values > # Regression coefficients > beta10 = 1; beta11 = 1; beta12 = 0; beta13 = 3 > beta20 = 2; beta21 = 0; beta22 = 4; beta23 = 0 > # Expected values of x variables > mux = c(10,20,10) > # Variance-covariance matrix of x variables > Phi = rbind(c(25, 25, 15), + c(25, 100, 35), + c(15, 35, 25)) > # Variance-covariance matrix of error terms > Psi = rbind(c(500, 750), + c(750, 2000)) > source("https://www.utstat.toronto.edu/~brunner/openSEM/fun/rmvn.txt") } \end{alltt} } % End size \end{frame} \begin{frame}[fragile] \frametitle{First an experiment} %\framesubtitle{} {\footnotesize % or scriptsize % The alltt environment requires \usepackage{alltt} \begin{alltt} {\color{blue}> x = rmvn(nn=100000, mu=mux, sigma=Phi) > dim(x) } [1] 100000 3 {\color{blue}> head(x) } [,1] [,2] [,3] [1,] 8.956959 7.537267 8.256174 [2,] 21.678814 21.764190 20.103837 [3,] 10.340543 28.986937 17.104511 [4,] 3.760735 10.528940 7.981938 [5,] 9.916082 24.939210 10.681681 [6,] 7.001012 21.927595 16.729394 \end{alltt} } % End size \end{frame} \begin{frame}[fragile] \frametitle{Estimation should be very good with $n=100,000$ } %\framesubtitle{} {\footnotesize % or scriptsize % The alltt environment requires \usepackage{alltt} \begin{alltt} {\color{blue}> apply(x,MARGIN=2,FUN=mean) # Column sample means } [1] 10.00862 19.96847 10.00400 {\color{blue}> mux # Population means, for comparison } [1] 10 20 10 {\color{blue}> var(x) # Sample variance-covariance matrix with n-1 } [,1] [,2] [,3] [1,] 24.95666 25.07949 14.95961 [2,] 25.07949 100.16989 35.05957 [3,] 14.95961 35.05957 24.95647 {\color{blue}> Phi # Population variance-covariance matrix, for comparison } [,1] [,2] [,3] [1,] 25 25 15 [2,] 25 100 35 [3,] 15 35 25 \end{alltt} } % End size \end{frame} \begin{frame}[fragile] \frametitle{Simulate data from the model} %\framesubtitle{} {\footnotesize % or scriptsize % The alltt environment requires \usepackage{alltt} \begin{alltt} {\color{blue}> n = 500 > x = rmvn(nn=n, mu=mux, sigma=Phi) > epsilon = rmvn(nn=n, mu=c(0,0), sigma=Psi) > # Extract variables (for clarity) > x1 = x[,1]; x2 = x[,2]; x3 = x[,3] > epsilon1 = epsilon[,1]; epsilon2 = epsilon[,2] > # Generate y > y1 = beta10 + beta11*x1 + beta12*x2 + beta13*x3 + epsilon1 > y2 = beta20 + beta21*x1 + beta22*x2 + beta23*x3 + epsilon2 > length(y1) } [1] 500 \end{alltt} } % End size \end{frame} \begin{frame}[fragile] \frametitle{Calculate MOM estimate of $\boldsymbol{\beta}_1$ (the slopes)} %\framesubtitle{} {\Large \begin{displaymath} \widehat{\boldsymbol{\beta}}_1 = \widehat{\boldsymbol{\Sigma}}_{yx} \widehat{\boldsymbol{\Sigma}}_x^{-1} \end{displaymath} \pause } % End size {\footnotesize % or scriptsize % The alltt environment requires \usepackage{alltt} \begin{alltt} {\color{blue}> # Calculate MOM estimate of beta1 (the slopes) > y = cbind(y1,y2) > Sigmahat_x = var(x) * (n-1)/n > Sigmahat_xy = var(x,y) * (n-1)/n > beta1hat = t(Sigmahat_xy) %*% solve(Sigmahat_x) > round(beta1hat,3) } [,1] [,2] [,3] y1 0.707 0.130 2.967 y2 -0.300 4.194 0.228 \end{alltt} } % End size \end{frame} \begin{frame}[fragile] \frametitle{True $\boldsymbol{\beta}_1$ for Comparison} %\framesubtitle{} {\footnotesize % or scriptsize % The alltt environment requires \usepackage{alltt} \begin{alltt} {\color{blue}> # True beta1 > beta1 = rbind(c(beta11, beta12, beta13), + c(beta21, beta22, beta23)) > beta1 } [,1] [,2] [,3] [1,] 1 0 3 [2,] 0 4 0 {\color{blue}> # Estimated beta1 > round(beta1hat,3) } [,1] [,2] [,3] y1 0.707 0.130 2.967 y2 -0.300 4.194 0.228 \end{alltt} } % End size \end{frame} \begin{frame}[fragile] \frametitle{MOM = Least Squares} %\framesubtitle{} {\footnotesize % or scriptsize % The alltt environment requires \usepackage{alltt} \begin{alltt} {\color{blue}> # MOM estimate of slopes > round(beta1hat,3) } [,1] [,2] [,3] y1 0.707 0.130 2.967 y2 -0.300 4.194 0.228 {\color{blue}> # Least squares estimate > LSbetahat = lsfit(x,y)$coefficients #$ > t(round( LSbetahat ,3)) } Intercept X1 X2 X3 Y1 0.658 0.707 0.130 2.967 Y2 -2.440 -0.300 4.194 0.228 \end{alltt} } % End size \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{comment} \end{comment} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % See page 29 \begin{frame} \frametitle{But this is not how the standard theory goes} %\framesubtitle{} Don't you think it’s strange? \begin{itemize} \item In the general linear regression model, the $\mathbf{X}$ matrix is supposed to be full of fixed constants. \pause \item This is convenient mathematically. Think of $E(\widehat{\boldsymbol{\beta}})$. \pause \item But in any non-experimental study, \ldots \pause % if you selected another sample you'd get different $X$ values, because of random sampling. % \item So $\mathbf{X}$ should be at least partly random variables, not fixed. \pause \item View the usual model as \emph{conditional} on $\mathcal{X}=\mathbf{X}$. \item All the probabilities and expected values in the typical regression course are \emph{conditional} probabilities and \emph{conditional} expected values. \pause \item Does this make sense? \end{itemize} \end{frame} \begin{frame} \frametitle{$\widehat{\boldsymbol{\beta}}$ is (conditionally) unbiased} %\framesubtitle{} {\Large \begin{displaymath} E(\widehat{\boldsymbol{\beta}}|\mathcal{X}=\mathbf{X}) = \boldsymbol{\beta} \mbox{ for \emph{any} fixed } \mathbf{X} \end{displaymath} } % End size \vspace{5mm} \pause It's \emph{unconditionally} unbiased too. \vspace{5mm} {\Large \begin{displaymath} E\{\widehat{\boldsymbol{\beta}}\} = E\{E\{\widehat{\boldsymbol{\beta}}|\mathcal{X}\}\} = E\{\boldsymbol{\beta}\} \pause = \boldsymbol{\beta} \end{displaymath} } % End size \end{frame} \begin{frame} \frametitle{Conditional size $\alpha$ test, Critical value $f_\alpha$} %\framesubtitle{} {\LARGE \begin{displaymath} Pr\{F > f_\alpha | \mathcal{X}=\mathbf{X} \} = \alpha \end{displaymath} \pause } % End size % \vspace{3mm} \begin{eqnarray*} Pr\{F > f_\alpha \} &=& \int \cdots \int Pr\{F > f_\alpha | \mathcal{X}=\mathbf{X} \} f(\mathbf{X})\, d\mathbf{X} \\ \pause &=& \int \cdots \int \alpha f(\mathbf{X})\, d\mathbf{X} \\ \pause &=& \alpha \int \cdots \int f(\mathbf{X})\, d\mathbf{X} \\ \pause &=& \alpha \end{eqnarray*} \end{frame} \begin{frame} \frametitle{The moral of the story} %\framesubtitle{} \begin{itemize} \item Don't worry. \pause \item Even though the explanatory variables are often random, we can apply the usual fixed $\mathbf{X}$ model without fear. \item Estimators are still unbiased. \item Tests have the right Type I error probability. \item Similar arguments apply to confidence intervals and prediction intervals. \pause \item And it's all distribution-free with respect to $\mathcal{X}$. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Copyright Information} This slide show was prepared by \href{http://www.utstat.toronto.edu/~brunner}{Jerry Brunner}, Department of Statistics, University of Toronto. It is licensed under a \href{http://creativecommons.org/licenses/by-sa/3.0/deed.en_US} {Creative Commons Attribution - ShareAlike 3.0 Unported License}. Use any part of it as you like and share the result freely. The \LaTeX~source code is available from the course website: \href{http://www.utstat.toronto.edu/brunner/oldclass/431s23} {\small\texttt{http://www.utstat.toronto.edu/brunner/oldclass/431s23}} \end{frame} \end{document}