% \documentclass[serif]{beamer} % Serif for Computer Modern math font. \documentclass[serif, handout]{beamer} % Handout mode to ignore pause statements \hypersetup{colorlinks,linkcolor=,urlcolor=red} \usefonttheme{serif} % Looks like Computer Modern for non-math text -- nice! \setbeamertemplate{navigation symbols}{} % Supress navigation symbols \usetheme{Berlin} % Displays sections on top \usepackage[english]{babel} % \definecolor{links}{HTML}{2A1B81} % \definecolor{links}{red} \setbeamertemplate{footline}[frame number] \mode % \mode{\setbeamercolor{background canvas}{bg=black!5}} \title{Review of Normal Linear Regression\footnote{See last slide for copyright information.}} \subtitle{STA312 Spring 2019} \date{} % To suppress date \begin{document} \begin{frame} \titlepage \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Multiple Linear Regression with normal errors} %\framesubtitle{} {\LARGE \begin{displaymath} y_i = \beta_0 + \beta_1 x_{i,1} + \cdots + \beta_{p-1} x_{i,p-1} + \epsilon_i, \end{displaymath} \pause } % End size where \begin{itemize} \item[] $\beta_0, \ldots, \beta_{p-1}$ are unknown constants. \item[] $x_{i,j}$ are known constants. \item[] $\epsilon_1, \ldots, \epsilon_n$ are independent $N(0,\sigma^2)$ random variables. \item[] $\sigma^2$ is an unknown constant. \item[] $y_1, \ldots, y_n$ are observable random variables. \end{itemize} \pause \vspace{3mm} This implies $y_i$ are independent $N(\mu_i,\sigma^2)$\pause, with $\mu_i = \beta_0 + \beta_1 x_{i,1} + \cdots + \beta_{p-1} x_{i,p-1}$. \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Another way to think about it} \pause %\framesubtitle{} \begin{itemize} \item $y_1, \ldots, y_n$ are independent $N(\mu_i,\sigma^2)$ \pause \item $\mu_i = \beta_0 + \beta_1 x_{i,1} + \cdots + \beta_{p-1} x_{i,p-1}$ \pause \item We have substituted a regression function for the location parameter. \pause \item Anything that makes the regression function larger or smaller shifts the distribution to the right or left. \pause \item In normal regression we are always talking about $\mu_i$ as the expected value (which it is), but more generally we mean the location. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Log Likelihood} %\framesubtitle{} \begin{eqnarray*} \ell(\boldsymbol{\beta},\sigma^2) & = & \log \prod_{i=1}^n \frac{1}{\sigma\sqrt{2\pi}}e^{\frac{(y_i-\mu_i)^2}{2\sigma^2}}\\ \pause & = & -\frac{n}{2}\log\sigma^2 - \frac{n}{2}\log(2\pi) - \frac{1}{2\sigma^2} \sum_{i=1}^n (y_i-\mu_i)^2 \pause \end{eqnarray*} To make this quantity as \emph{large} as possible over all $\beta_0, \ldots, \beta_{p-1}$\pause, \begin{itemize} \item Make $\sum_{i=1}^n (y_i-\mu_i)^2$ as \emph{small} as possible. \pause \item That is, minimize $\sum_{i=1}^n (y_i - \beta_0 - \beta_1 x_{i,1} - \cdots - \beta_{p-1} x_{i,p-1})^2$. \pause \item This is a familiar problem -- least squares. \pause \item So the least-squares estimates for multiple regression are the same as the MLEs. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Vocabulary} %\framesubtitle{} \begin{itemize} \item Explanatory variables are $x$ \pause \item Response variable is $y$. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{``Control" means hold constant} %\framesubtitle{} \begin{itemize} \item Regression model with four explanatory variables. \pause \item Hold $x_1$, $x_2$ and $x_4$ constant at some fixed values. \pause \begin{eqnarray*} E(Y|\boldsymbol{X}=\boldsymbol{x}) \pause & = & \beta_0 + \beta_1x_1 + \beta_2x_2 +\beta_3x_3 + \beta_4x_4 \\ \pause & = & (\beta_0 + \beta_1x_1 + \beta_2x_2 + \beta_4x_4) + \beta_3x_3 \\ \pause \end{eqnarray*} \item The equation of a straight line with slope $\beta_3$. \pause \item Values of $x_1$, $x_2$ and $x_4$ affect only the intercept. \pause \item So $\beta_3$ is the rate at which $E(Y|\mathbf{x})$ changes as a function of $x_3$ with all other variables held constant at fixed levels. \pause \item \emph{According to the model}. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{More vocabulary} \framesubtitle{$E(Y|\boldsymbol{X}=\boldsymbol{x}) = (\beta_0 + \beta_1x_1 + \beta_2x_2 + \beta_4x_4) + \beta_3x_3$} \pause \begin{itemize} \item If $\beta_3>0$, describe the relationship between $x_3$ and (expected) $y$ as ``positive," \pause controlling for the other variables. \pause If $\beta_3<0$, negative. \pause \item Useful ways of saying ``controlling for" or ``holding constant" include \pause \begin{itemize} \item Allowing for \item Correcting for \item Taking into account \end{itemize} \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Categorical Explanatory Variables} \pause \framesubtitle{Unordered categories} \begin{itemize} \item $X=1$ means Drug, $X=0$ means Placebo. \pause \item Population mean is $E(Y|X=x) = \beta_0 + \beta_1 x$. \pause \item For patients getting the drug, mean response is \pause $E(Y|X=1) = \beta_0 + \beta_1$ \pause \item For patients getting the placebo, mean response is \pause $E(Y|X=0) = \beta_0$ \pause \item And $\beta_1$ is the difference between means, the average treatment effect. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{More than Two Categories} \pause Suppose a study has 3 treatment conditions. For example Group 1 gets Drug 1, Group 2 gets Drug 2, and Group 3 gets a placebo, so that the Explanatory Variable is Group (taking values 1,2,3) and there is some Response Variable $Y$ (maybe response to drug again). \pause \vspace{10mm} Why is $E[Y|X=x] = \beta_0 + \beta_1x$ (with $x$ = Group) a silly model? \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Indicator Dummy Variables} \framesubtitle{With intercept} \pause \begin{itemize} \item $x_1 = 1$ if Drug A, zero otherwise \item $x_2 = 1$ if Drug B, zero otherwise \pause \item $E[Y|\boldsymbol{X}=\boldsymbol{x}] = \beta_0 + \beta_1x_1 + \beta_2 x_2$. \pause \item Fill in the table. \pause \end{itemize} {\begin{center} \begin{tabular}{|c|c|c|l|} \hline Drug & $x_1$ & $x_2$ & $E(Y|\mathbf{x}) = \beta_0 + \beta_1x_1 + \beta_2 x_2$ \\ \hline $A$ & & & $\mu_1$ = \\ \hline $B$ & & & $\mu_2$ = \\ \hline Placebo & & & $\mu_3$ = \\ \hline \end{tabular} \end{center}} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Answer} \begin{itemize} \item $x_1 = 1$ if Drug A, zero otherwise \item $x_2 = 1$ if Drug B, zero otherwise \item $E[Y|\boldsymbol{X}=\boldsymbol{x}] = \beta_0 + \beta_1x_1 + \beta_2 x_2$. \pause \end{itemize} {\begin{center} \begin{tabular}{|c|c|c|l|} \hline Drug & $x_1$ & $x_2$ & $E(Y|\mathbf{x}) = \beta_0 + \beta_1x_1 + \beta_2 x_2$ \\ \hline $A$ & 1 & 0 & $\mu_1$ = $\beta_0 + \beta_1$ \\ \hline $B$ & 0 & 1 & $\mu_2$ = $\beta_0 + \beta_2$ \\ \hline Placebo & 0 & 0 & $\mu_3$ = $\beta_0$ \\ \hline \end{tabular} \end{center}} \pause Regression coefficients are contrasts with the category that has no indicator -- the \emph{reference category}. \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Indicator dummy variable coding with intercept} \pause %\framesubtitle{} \begin{itemize} \item With an intercept in the model, need $p-1$ indicators to represent a categorical explanatory variable with $p$ categories. \pause \item If you use $p$ dummy variables and an intercept, trouble. \pause \item Regression coefficients are contrasts with the category that has no indicator. \pause \item Call this the \emph{reference category}. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{What null hypotheses would you test?} \pause {\begin{center} \begin{tabular}{|c|c|c|l|} \hline Drug & $x_1$ & $x_2$ & $E(Y|\mathbf{x}) = \beta_0 + \beta_1x_1 + \beta_2 x_2$ \\ \hline $A$ & 1 & 0 & $\mu_1$ = $\beta_0 + \beta_1$ \\ \hline $B$ & 0 & 1 & $\mu_2$ = $\beta_0 + \beta_2$ \\ \hline Placebo & 0 & 0 & $\mu_3$ = $\beta_0$ \\ \hline \end{tabular} \end{center}} \pause \begin{itemize} \item Is the effect of Drug $A$ different from the placebo? \pause $H_0: \beta_1=0$ \pause \item Is Drug $A$ better than the placebo? \pause $H_0: \beta_1=0$ \pause \item Did Drug $B$ work? \pause $H_0: \beta_2=0$ \pause \item Did experimental treatment have an effect? \pause $H_0: \beta_1=\beta_2=0$ \pause \item Is there a difference between the effects of Drug $A$ and Drug $B$? \pause $H_0: \beta_1=\beta_2$ \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Now add a quantitative explanatory variable (covariate)} \framesubtitle{Covariates often come first in the regression equation} \pause \begin{itemize} \item $x_1 = 1$ if Drug A, zero otherwise \item $x_2 = 1$ if Drug B, zero otherwise \item $x_3$ = Age \pause \item $E[Y|\boldsymbol{X}=\boldsymbol{x}] = \beta_0 + \beta_1x_1 + \beta_2 x_2 + \beta_3 x_3$. \pause \end{itemize} {\begin{center} \begin{tabular}{|c|c|c|l|} \hline Drug & $x_1$ & $x_2$ & $E(Y|\mathbf{x}) = \beta_0+\beta_1x_1+\beta_2x_2+\beta_3x_3$\\ \hline A & 1 & 0 & $\mu_1$ = $(\beta_0+\beta_1)+\beta_3x_3$ \\ \hline B & 0 & 1 & $\mu_2$ = $(\beta_0+\beta_2)+\beta_3x_3$ \\ \hline Placebo & 0 & 0 & $\mu_3$ = ~~~~~$\beta_0$~~~~~$+\beta_3x_3$ \\ \hline \end{tabular} \end{center}} \pause Parallel regression lines. \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{More comments} %\framesubtitle{} {\begin{center} \begin{tabular}{|c|c|c|l|} \hline Drug & $x_1$ & $x_2$ & $E(Y|\mathbf{x}) = \beta_0+\beta_1x_1+\beta_2x_2+\beta_3x_3$\\ \hline A & 1 & 0 & $\mu_1$ = $(\beta_0+\beta_1)+\beta_3x_3$ \\ \hline B & 0 & 1 & $\mu_2$ = $(\beta_0+\beta_2)+\beta_3x_3$ \\ \hline Placebo & 0 & 0 & $\mu_3$ = ~~~~~$\beta_0$~~~~~$+\beta_3x_3$ \\ \hline \end{tabular} \end{center}} \pause \begin{itemize} \item If more than one covariate, parallel regression planes. \pause \item Non-parallel (interaction) is testable. \pause \item ``Controlling" interpretation holds. \pause \item In an experimental study, quantitative covariates are usually just observed. \pause \item Could age be related to drug? \pause \item Good covariates reduce MSE, make testing of categorical variables more sensitive. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Hypothesis Testing} \framesubtitle{Standard tests when errors are normal} \pause \begin{itemize} \item Overall $F$-test for all the explanatory variables at once \pause $H_0: \beta_1 = \beta_2 = \cdots = \beta_{p-1} = 0$ \pause \item $t$-tests for each regression coefficient: Controlling for all the others, does that explanatory variable matter? \pause $H_0: \beta_j=0$ \pause \item Test a collection of explanatory variables controlling for another collection \pause $H_0: \beta_2 = \beta_3 = \beta_5 = 0$ \pause \item Example: Controlling for mother's education and father's education, are (any of) total family income, assessed value of home and total market value of all vehicles owned by the family related to High School GPA? \pause \item Most general: Testing whether sets of linear combinations of regression coefficients differ from specified constants. \pause $H_0: \mathbf{L}\boldsymbol{\beta} = \mathbf{h}$. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Full versus Restricted Model} % Changing vocabulary: Reduced -> restricted \framesubtitle{Restricted by $H_0$} \pause \begin{itemize} \item You have 2 sets of variables, $A$ and $B$. Want to test $B$ controlling for $A$. \pause \item Fit a model with both $A$ and $B$: Call it the \emph{Full Model}. \pause \item Fit a model with just $A$: Call it the \emph{Restricted Model}. \\ \pause $R^2_F \geq R^2_R$. \pause \item The $F$-test for full versus restricted is a likelihood ratio test (exact). \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{General Linear Test of $H_0: \mathbf{L}\boldsymbol{\beta} = \mathbf{h}$} \framesubtitle{$\mathbf{L}$ is $r \times p$, rows linearly independent} \pause {\LARGE \begin{eqnarray*} F &=& \frac{(\mathbf{L}\widehat{\boldsymbol{\beta}}-\mathbf{h})^\top (\mathbf{L}(\mathbf{X}^\top \mathbf{X})^{-1}\mathbf{L}^\top)^{-1} (\mathbf{L}\widehat{\boldsymbol{\beta}}-\mathbf{h})} {r \, MSE_F} \\ &&\\ & \stackrel{H_0}{\sim} & F(r,n-p) \end{eqnarray*} \pause } % End size \begin{itemize} \item Equal to full-restricted formula. \pause \item Numerator looks like a Wald statistic, and it's no accident. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Copyright Information} This slide show was prepared by \href{http://www.utstat.toronto.edu/~brunner}{Jerry Brunner}, Department of Statistics, University of Toronto. It is licensed under a \href{http://creativecommons.org/licenses/by-sa/3.0/deed.en_US} {Creative Commons Attribution - ShareAlike 3.0 Unported License}. Use any part of it as you like and share the result freely. The \LaTeX~source code is available from the course website: \href{http://www.utstat.toronto.edu/~brunner/oldclass/312s19} {\footnotesize \texttt{http://www.utstat.toronto.edu/$^\sim$brunner/oldclass/312s19}} \end{frame} \end{document}