% \documentclass[serif]{beamer} % Serif for Computer Modern math font. \documentclass[serif, handout]{beamer} % Handout mode to ignore pause statements \hypersetup{colorlinks,linkcolor=,urlcolor=red} \usefonttheme{serif} % Looks like Computer Modern for non-math text -- nice! \setbeamertemplate{navigation symbols}{} % Suppress navigation symbols % \usetheme{Berlin} % Displays sections on prime \usetheme{Frankfurt} % Displays section titles on prime: Fairly thin but still swallows some material at bottom of crowded slides %\usetheme{Berkeley} \usepackage[english]{babel} \usepackage{amsmath} % for binom \usepackage{amsfonts} % for \mathbb{R} The set of reals % \usepackage{graphicx} % To include pdf files! % \definecolor{links}{HTML}{2A1B81} % \definecolor{links}{red} \setbeamertemplate{footline}[frame number] \mode \title{Categorical Independent Variables\footnote{See last slide for copyright information.}} \subtitle{STA 302 Fall 2017} \date{} % To suppress date \begin{document} \begin{frame} \titlepage \end{frame} \begin{frame} \frametitle{Overview} \tableofcontents \end{frame} % No interactions in 2017 \section{Indicators with Intercept} \begin{frame} \frametitle{Independent variables need not be continuous} \pause %\framesubtitle{} Code data so that $x=1$ means Drug, $x=0$ means Placebo. \pause \vspace{5mm} \begin{itemize} \item Population mean response is $E(y|x) = \beta_0 + \beta_1 x$. \pause \item For patients getting the drug, mean response is \pause $E(y|x=1) = \beta_0 + \beta_1$. \pause \item For patients getting the placebo, mean response is \pause $E(y|x=0) = \beta_0$. \pause \item Difference (treatment effect) is $\beta_1$. \pause \item Test $H_0:\beta_1=0$. \end{itemize} \end{frame} \begin{frame} \frametitle{Scatterplot} \framesubtitle{Showing the least-squares line} \pause \begin{columns} \column{0.6\textwidth} \includegraphics[width=2.5in]{IndicatorScatter} \pause \column{0.4\textwidth} Predicted response is $\widehat{y} = b_0 + b_1x $. \pause \vspace{3mm} \begin{itemize} \item[] For patients getting the drug, predicted response is \pause $\widehat{y} = b_0 + b_1 \pause = \overline{y}_1$. \pause \item[] \item[] For patients getting the placebo, predicted response is \pause $\widehat{y} = b_0 \pause = \overline{y}_0 $. \end{itemize} \vspace{10mm} \end{columns} \end{frame} \begin{frame} \frametitle{More than Two Categories} \pause %\framesubtitle{} Suppose a study has 3 treatment conditions. \pause For example \begin{itemize} \item Group 1 gets Drug 1 \item Group 2 gets Drug 2 \item Group 3 gets a placebo \pause \item So that the Explanatory Variable is Group \pause \item Taking values 1,2,3. \pause \item The dependent variable $y$ is response to drug. \pause \end{itemize} \vspace{10mm} Why is $E(y|x) = \beta_0 + \beta_1x$ (with $x$ = Group) a silly model? \end{frame} \begin{frame} \frametitle{Indicator Dummy Variables} \framesubtitle{With intercept} \pause \begin{itemize} \item $x_1 = 1$ if Drug A, zero otherwise \item $x_2 = 1$ if Drug B, zero otherwise \pause \item $E(y|\boldsymbol{x}) = \beta_0 + \beta_1x_1 + \beta_2 x_2$. \pause \item Fill in the table. \pause \end{itemize} {\begin{center} \begin{tabular}{|c|c|c|l|} \hline Drug & $x_1$ & $x_2$ & $E(Y|\mathbf{x}) = \beta_0 + \beta_1x_1 + \beta_2 x_2$ \\ \hline $A$ & & & $\mu_1$ = \\ \hline $B$ & & & $\mu_2$ = \\ \hline Placebo & & & $\mu_3$ = \\ \hline \end{tabular} \end{center}} \end{frame} \begin{frame} \frametitle{Answer} \begin{itemize} \item $x_1 = 1$ if Drug A, zero otherwise \item $x_2 = 1$ if Drug B, zero otherwise \item $E(y|\boldsymbol{x}) = \beta_0 + \beta_1x_1 + \beta_2 x_2$. \pause \end{itemize} {\begin{center} \begin{tabular}{|c|c|c|l|} \hline Drug & $x_1$ & $x_2$ & $E(Y|\mathbf{x}) = \beta_0 + \beta_1x_1 + \beta_2 x_2$ \\ \hline $A$ & 1 & 0 & $\mu_1$ = $\beta_0 + \beta_1$ \\ \hline $B$ & 0 & 1 & $\mu_2$ = $\beta_0 + \beta_2$ \\ \hline Placebo & 0 & 0 & $\mu_3$ = $\beta_0$ \\ \hline \end{tabular} \end{center}} \pause Regression coefficients are contrasts with the category that has no indicator -- the \emph{reference category}. \end{frame} \begin{frame} \frametitle{Indicator dummy variable coding with intercept} \pause %\framesubtitle{} \begin{itemize} \item With an intercept in the model, need $p-1$ indicators to represent a categorical explanatory variable with $p$ categories. \pause \item If you use $p$ dummy variables and also an intercept, trouble. \pause \item Indicators would add up to the intercept and columns of $X$ would be linearly dependent. \pause \item Regression coefficients are contrasts with the category that has no indicator. \pause \item Call this the \emph{reference category}. \end{itemize} \end{frame} \begin{frame} \frametitle{$x_1 = 1$ if Drug A, zero o.w., $x_2 = 1$ if Drug B, zero o.w.} \pause %\framesubtitle{3-d Scatterplot} Recall $\sum_{i=1}^n (y_i-m)^2$ is minimized at $m = \overline{y}$ \pause \begin{center} \includegraphics[width=3in]{ABCscatter} \end{center} \end{frame} \begin{frame} \frametitle{What null hypotheses would you test?} \pause {\begin{center} \begin{tabular}{|c|c|c|l|} \hline Drug & $x_1$ & $x_2$ & $E(Y|\mathbf{x}) = \beta_0 + \beta_1x_1 + \beta_2 x_2$ \\ \hline $A$ & 1 & 0 & $\mu_1$ = $\beta_0 + \beta_1$ \\ \hline $B$ & 0 & 1 & $\mu_2$ = $\beta_0 + \beta_2$ \\ \hline Placebo & 0 & 0 & $\mu_3$ = $\beta_0$ \\ \hline \end{tabular} \end{center}} \pause \begin{itemize} \item Is the effect of Drug $A$ different from the placebo? \pause $H_0: \beta_1=0$ \pause \item Is Drug $A$ better than the placebo? \pause $H_0: \beta_1=0$ \pause \item Did Drug $B$ work? \pause $H_0: \beta_2=0$ \pause \item Did experimental treatment have an effect? \pause $H_0: \beta_1=\beta_2=0$ \pause \item Is there a difference between the effects of Drug $A$ and Drug $B$? \pause $H_0: \beta_1=\beta_2$ \end{itemize} \end{frame} % PP Slide 18 \begin{frame} \frametitle{Now add a quantitative explanatory variable (covariate)} \framesubtitle{Covariates often come first in the regression equation} \pause \begin{itemize} \item $x_1 = 1$ if Drug A, zero otherwise \item $x_2 = 1$ if Drug B, zero otherwise \item $x_3$ = Age \pause \item $E(y|\boldsymbol{x}) = \beta_0 + \beta_1x_1 + \beta_2 x_2 + \beta_3 x_3$. \pause \end{itemize} {\begin{center} \begin{tabular}{|c|c|c|l|} \hline Drug & $x_1$ & $x_2$ & $E(Y|\mathbf{x}) = \beta_0+\beta_1x_1+\beta_2x_2+\beta_3x_3$\\ \hline A & 1 & 0 & $\mu_1$ = $(\beta_0+\beta_1)+\beta_3x_3$ \\ \hline B & 0 & 1 & $\mu_2$ = $(\beta_0+\beta_2)+\beta_3x_3$ \\ \hline Placebo & 0 & 0 & $\mu_3$ = ~~~~~$\beta_0$~~~~~$+\beta_3x_3$ \\ \hline \end{tabular} \end{center}} \pause Parallel regression lines. \end{frame} \begin{frame} \frametitle{More comments} %\framesubtitle{} {\begin{center} \begin{tabular}{|c|c|c|l|} \hline Drug & $x_1$ & $x_2$ & $E(Y|\mathbf{x}) = \beta_0+\beta_1x_1+\beta_2x_2+\beta_3x_3$\\ \hline A & 1 & 0 & $\mu_1$ = $(\beta_0+\beta_1)+\beta_3x_3$ \\ \hline B & 0 & 1 & $\mu_2$ = $(\beta_0+\beta_2)+\beta_3x_3$ \\ \hline Placebo & 0 & 0 & $\mu_3$ = ~~~~~$\beta_0$~~~~~$+\beta_3x_3$ \\ \hline \end{tabular} \end{center}} \pause \begin{itemize} \item If more than one covariate, parallel regression planes. \pause \item Non-parallel (interaction) is testable. \pause \item ``Controlling" interpretation holds. \pause \item In an experimental study, quantitative covariates are usually just observed. \pause \item Could age be related to drug? \pause \item Good covariates reduce $s^2 = \frac{\mathbf{e}^\prime\mathbf{e}}{n-k-1}$, and make tests involving the categorical variables more sensitive. \end{itemize} \end{frame} \section{Cell means coding} \begin{frame} \frametitle{Cell means coding: $p$ indicators and no intercept} %\framesubtitle{} Example: Three treatments and no covariate. \pause {\LARGE \begin{displaymath} E(y|\boldsymbol{x}) = \beta_1x_1 + \beta_2 x_2 + \beta_3 x_3 \end{displaymath} \pause } % End size \vspace{3mm} \begin{center} \begin{tabular}{|c|c|c|c|c|} \hline Drug &$x_1$&$x_2$&$x_3$&$E(y|\mathbf{x}) = \beta_1x_1+\beta_2x_2+\beta_3x_3$ \\ \hline A & 1 & 0 & 0 &$\mu_1=\beta_1$ \\ \hline B & 0 & 1 & 0 &$\mu_2=\beta_2$ \\ \hline Placebo & 0 & 0 & 1 &$\mu_3=\beta_3$ \\ \hline \end{tabular} \end{center} \pause \vspace{3mm} \begin{itemize} \item This model is equivalent to the one with $p-1$ dummy variables and the intercept. \pause \item If you have $p$ dummy variables and the intercept, the model is over-parameterized. \end{itemize} \end{frame} \begin{frame} \frametitle{Add a covariate: $x_4$} %\framesubtitle{} {\LARGE \begin{displaymath} E(y|\boldsymbol{x}) = \beta_1x_1 + \beta_2 x_2 + \beta_3 x_3 + \beta_4 x_4 \end{displaymath} \pause } % End size \begin{center} \begin{tabular}{|c|c|c|c|c|} \hline Drug &$x_1$&$x_2$&$x_3$&$E(Y|\mathbf{x}) = \beta_1x_1+\beta_2x_2+\beta_3x_3+\beta_4x_4$ \\ \hline A & 1 & 0 & 0 &$\beta_1+\beta_4x_4$ \\ \hline B & 0 & 1 & 0 &$\beta_2+\beta_4x_4$ \\ \hline Placebo & 0 & 0 & 1 &$\beta_3+\beta_4x_4$ \\ \hline \end{tabular} \end{center} \pause This model is equivalent to the one with the intercept. \end{frame} \begin{frame} \frametitle{Key to the equivalence of dummy variable coding schemes} \pause % \framesubtitle{} Clearly these $X$ matrices are one-to-one. \begin{displaymath} \left(\begin{array}{cccc} 1 & 1 & 0 & x_1 \\ 1 & 0 & 1 & x_2 \\ 1 & 0 & 0 & x_3 \\ 1 & 1 & 0 & x_4 \\ \vdots & \vdots & \vdots & \vdots \\ 1 & 0 & 1 & x_n \\ \end{array}\right) \leftrightarrow \left(\begin{array}{cccc} 1 & 0 & 0 & x_1 \\ 0 & 1 & 0 & x_2 \\ 0 & 0 & 1 & x_3 \\ 1 & 0 & 0 & x_4 \\ \vdots & \vdots & \vdots & \vdots \\ 0 & 1 & 0 & x_n \\ \end{array}\right) \end{displaymath} \pause And it's a linear transformation. \end{frame} \begin{frame} \frametitle{Matrix multiplication} \pause % \framesubtitle{} \begin{displaymath} \left(\begin{array}{cccc} {\color{red}1} & {\color{red}1} & {\color{red}0} & x_1 \\ {\color{red}1} & {\color{red}0} & {\color{red}1} & x_2 \\ {\color{red}1} & {\color{red}0} & {\color{red}0} & x_3 \\ 1 & 1 & 0 & x_4 \\ \vdots & \vdots & \vdots & \vdots \\ 1 & 0 & 1 & x_n \\ \end{array}\right) \left(\begin{array}{rrrr} {\color{red}0} & ~{\color{red}0} & {\color{red}1} & ~0 \\ {\color{red}1} & ~{\color{red}0} & {\color{red}-1} & ~0 \\ {\color{red}0} & ~{\color{red}1} & {\color{red}-1} & ~0 \\ 0 & ~0 & 0 & ~1 \end{array}\right) = \left(\begin{array}{cccc} 1 & 0 & 0 & x_1 \\ 0 & 1 & 0 & x_2 \\ 0 & 0 & 1 & x_3 \\ 1 & 0 & 0 & x_4 \\ \vdots & \vdots & \vdots & \vdots \\ 0 & 1 & 0 & x_n \\ \end{array}\right) \end{displaymath} \pause {\LARGE \begin{eqnarray*} & & \mathbf{y} = X \boldsymbol{\beta} + \boldsymbol{\epsilon} \\ \pause & \Leftrightarrow & \mathbf{y} = (XA)(A^{-1} \boldsymbol{\beta}) + \boldsymbol{\epsilon} \end{eqnarray*} \pause } % End size Transformed $X$ requires a transformed $\boldsymbol{\beta}$. \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Copyright Information} This slide show was prepared by \href{http://www.utstat.toronto.edu/~brunner}{Jerry Brunner}, Department of Statistical Sciences, University of Toronto. It is licensed under a \href{http://creativecommons.org/licenses/by-sa/3.0/deed.en_US} {Creative Commons Attribution - ShareAlike 3.0 Unported License}. Use any part of it as you like and share the result freely. The \LaTeX~source code is available from the course website: \href{http://www.utstat.toronto.edu/~brunner/oldclass/302f17} {\small\texttt{http://www.utstat.toronto.edu/$^\sim$brunner/oldclass/302f17}} \end{frame} \end{document} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{} %\framesubtitle{} \begin{itemize} \item \item \item \end{itemize} \end{frame} {\LARGE \begin{displaymath} \end{displaymath} } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% rm(list=ls()); n=20 # Binary zero = rnorm(n,4,1); one = rnorm(n,6,1) x = c(rep(0,n),rep(1,n)) y = c(zero,one) plot(x,y) xpts = c(0,1); ypts = c(mean(zero),mean(one)) lines(xpts,ypts) # 3-d x1 = c(0,0,1,1); x2 = c(0,1,0,1) plot(x1,x2,pch=' ',xlab=expression(x[1]),ylab=expression(x[2])) text(1,0,'A'); text(0,1,'B'); text(0,0,'C') %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Matrix multiplication} \pause % \framesubtitle{} \begin{displaymath} \left(\begin{array}{cccc} {\color{red}1} & 1 & 0 & x_1 \\ 1 & 0 & 1 & x_2 \\ 1 & 0 & 0 & x_3 \\ 1 & 1 & 0 & x_4 \\ \vdots & \vdots & \vdots & \vdots \\ 1 & 0 & 1 & x_n \\ \end{array}\right) \left(\begin{array}{rrrr} 0 & ~0 & 1 & ~0 \\ 1 & ~0 & -1 & ~0 \\ 0 & ~1 & -1 & ~0 \\ 0 & ~0 & 0 & ~1 \end{array}\right) = \left(\begin{array}{cccc} 1 & 0 & 0 & x_1 \\ 0 & 1 & 0 & x_2 \\ 0 & 0 & 1 & x_3 \\ 1 & 0 & 0 & x_4 \\ \vdots & \vdots & \vdots & \vdots \\ 0 & 1 & 0 & x_n \\ \end{array}\right) \end{displaymath} \pause And it's a linear transformation. \end{frame} % Not used: 2101 material was better \begin{frame} \frametitle{Three categories} %\framesubtitle{} Suppose $x_1=1$ if the subject is in Group 1, and zero otherwise, and $x_2=1$ if the subject is in Group 2, and zero otherwise, and $E(y|\boldsymbol{x}) = \beta_0 + \beta_1x_1 + \beta_2 x_2$. Fill in the table below. \pause {\begin{center} \begin{tabular}{|c|c|c|l|} \hline Drug & $x_1$ & $x_2$ & $\beta_0 + \beta_1x_1 + \beta_2 x_2$ \\ \hline $A$ & & & $\mu_1$ = \\ \hline $B$ & & & $\mu_2$ = \\ \hline Placebo & & & $\mu_3$ = \\ \hline \end{tabular} \end{center}} \end{frame} \begin{frame} \frametitle{Answer} %\framesubtitle{} $x_1=1$ if the subject is in Group 1, and zero otherwise, and $x_2=1$ if the subject is in Group 2, and zero otherwise, and $E(y|\boldsymbol{x}) = \beta_0 + \beta_1x_1 + \beta_2 x_2$. \vspace{4mm} {\begin{center} \begin{tabular}{|c|c|c|l|} \hline Drug & $x_1$ & $x_2$ & $\beta_0 + \beta_1x_1 + \beta_2 x_2$ \\ \hline $A$ & 1 & 0 & $\mu_1$ = $\beta_0 + \beta_1$ \\ \hline $B$ & 0 & 1 & $\mu_2$ = $\beta_0 + \beta_2$ \\ \hline Placebo & 0 & 0 & $\mu_3$ = $\beta_0$ \\ \hline \end{tabular} \end{center}} \pause \vspace{3mm} $x_1$ and $x_2$ are called \emph{dummy variables}. \end{frame}