% \documentclass[serif]{beamer} % Serif for Computer Modern math font.
\documentclass[serif, handout]{beamer} % Handout mode to ignore pause statements
\hypersetup{colorlinks,linkcolor=,urlcolor=red}


\usefonttheme{serif} % Looks like Computer Modern for non-math text -- nice!
\setbeamertemplate{navigation symbols}{} % Suppress navigation symbols
% \usetheme{Berlin} % Displays sections on prime
\usetheme{Frankfurt}  % Displays section titles on prime: Fairly thin but still swallows some material at bottom of crowded slides
%\usetheme{Berkeley}

\usepackage[english]{babel}
\usepackage{amsmath} % for binom
\usepackage{amsfonts} % for \mathbb{R} The set of reals
\usepackage{comment} 
% \usepackage{graphicx} % To include pdf files!
% \definecolor{links}{HTML}{2A1B81}
% \definecolor{links}{red}



\setbeamertemplate{footline}[frame number] 

\mode<presentation>

\title{Categorical Predictor Variables\footnote{See last slide for copyright information.}}
\subtitle{STA 302 Fall 2020}
\date{} % To suppress date

\begin{document}

\begin{frame}
  \titlepage
\end{frame}

\begin{frame}
\frametitle{Overview}
\tableofcontents
\end{frame}

% No interactions in 2017

\section{Indicators with Intercept}

\begin{frame}
\frametitle{Predictor variables need not be continuous} \pause
%\framesubtitle{} 
Code data so that $x=1$ means Drug, $x=0$ means Placebo. \pause
\vspace{5mm}

  \begin{itemize}
    \item Population mean response is $E(y|x) = \beta_0 + \beta_1 x$. \pause
    \item For patients getting the drug, mean response is \pause $E(y|x=1) = \beta_0 + \beta_1$. \pause
    \item For patients getting the placebo, mean response is \pause $E(y|x=0) = \beta_0$. \pause
    \item Difference (treatment effect) is $\beta_1$.  \pause
    \item Test $H_0:\beta_1=0$. \pause
    \item Same as the traditional 2-sample test.
  \end{itemize}
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{Scatterplot}
\framesubtitle{Showing the least-squares line}  \pause
\begin{columns}  
\column{0.6\textwidth}
\includegraphics[width=2.5in]{IndicatorScatter} \pause
\column{0.4\textwidth}
Predicted response is $\widehat{y} = \widehat{\beta}_0 + \widehat{\beta}_1x $. \pause
\vspace{3mm}
  \begin{itemize}
    \item[] For patients getting the drug, predicted response is  $\widehat{y} = \widehat{\beta}_0 + \widehat{\beta}_1 \pause = \overline{y}_1$. \pause
    \item[] 
    \item[] For patients getting the placebo, predicted response is  $\widehat{y} = \widehat{\beta}_0 \pause  = \overline{y}_0 $.
  \end{itemize}
  \vspace{10mm}
\end{columns}
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{More than Two Categories} \pause 
%\framesubtitle{}
Suppose a study has 3 treatment conditions. \pause   For example 
\begin{itemize}
     \item Group 1 gets Drug 1
     \item Group 2 gets Drug 2
     \item Group 3 gets a placebo \pause
     \item So that the explanatory variable is Treatment \pause
     \item Taking values 1,2,3. \pause
     \item The dependent variable $y$ is response to drug. \pause
\end{itemize}

\vspace{10mm}

Why is $E(y|x)  =  \beta_0 + \beta_1x$ (with $x$ = Treatment) a silly model?
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{Indicator Dummy Variables}
\framesubtitle{With intercept}  \pause 
\begin{itemize}
    \item $x_1 = 1$ if Drug A, zero otherwise
    \item $x_2 = 1$ if Drug B, zero otherwise \pause 
    \item $E(y|\boldsymbol{x}) = \beta_0 + \beta_1x_1 + \beta_2 x_2$. \pause 
    \item Fill in the table. \pause 
\end{itemize}
{\begin{center}
\begin{tabular}{|c|c|c|l|} \hline
Drug    & $x_1$ & $x_2$ &  $E(y|\mathbf{x}) = \beta_0 + \beta_1x_1 + \beta_2 x_2$ \\ \hline
$A$     &       &       &  $\mu_1$ =                            \\ \hline
$B$     &       &       &  $\mu_2$ =                            \\ \hline
Placebo &       &       &  $\mu_3$ =                            \\ \hline
\end{tabular}
\end{center}}
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{Answer}
\begin{itemize}
    \item $x_1 = 1$ if Drug A, zero otherwise
    \item $x_2 = 1$ if Drug B, zero otherwise
    \item $E(y|\boldsymbol{x}) = \beta_0 + \beta_1x_1 + \beta_2 x_2$. 
\end{itemize}
{\begin{center}
\begin{tabular}{|c|c|c|l|} \hline
Drug    & $x_1$ & $x_2$ &  $E(y|\mathbf{x}) = \beta_0 + \beta_1x_1 + \beta_2 x_2$ \\ \hline
$A$     &   1   &   0   &  $\mu_1$ =     $\beta_0 + \beta_1$    \\ \hline
$B$     &   0   &   1   &  $\mu_2$ =     $\beta_0 + \beta_2$    \\ \hline
Placebo &   0   &   0   &  $\mu_3$ =     $\beta_0$              \\ \hline
\end{tabular}
\end{center}} \pause 
Regression coefficients are contrasts with the category that has no indicator -- the \emph{reference category}.
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{Indicator dummy variable coding with intercept} \pause 
%\framesubtitle{} 
\begin{itemize}
    \item With an intercept in the model, need $r-1$ indicators to represent a categorical explanatory variable with $r$ categories. \pause 
    \item If you use $r$ dummy variables and also an intercept, trouble. \pause 
    \item Indicators would add up to the intercept and columns of $\mathbf{X}$ would be linearly dependent. \pause
    \item Regression coefficients are contrasts with the category that has no indicator. \pause 
    \item Call this the \emph{reference category}.
\end{itemize}
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{$x_1 = 1$ if Drug A, zero o.w., $x_2 = 1$ if Drug B, zero o.w.} \pause
\framesubtitle{$\widehat{y} = \widehat{\beta}_0 + \widehat{\beta}_1x_1 + \widehat{\beta}_2 x_2$}
Recall $\sum_{i=1}^n (y_i-m)^2$ is minimized at $m = \overline{y}$ \pause
\begin{center}
\includegraphics[width=3in]{ABCscatter}
\end{center}
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{What null hypotheses would you test?} \pause
{\begin{center}
\begin{tabular}{|c|c|c|l|} \hline
Drug    & $x_1$ & $x_2$ &  $E(y|\mathbf{x}) = \beta_0 + \beta_1x_1 + \beta_2 x_2$ \\ \hline 
$A$     &   1   &   0   &  $\mu_1$ =     $\beta_0 + \beta_1$    \\ \hline
$B$     &   0   &   1   &  $\mu_2$ =     $\beta_0 + \beta_2$    \\ \hline
Placebo &   0   &   0   &  $\mu_3$ =     $\beta_0$              \\ \hline
\end{tabular}
\end{center}} \pause
\begin{itemize}
    \item Is the effect of Drug $A$ different from the placebo?  \pause $H_0: \beta_1=0$ \pause
    \item Is Drug $A$ better than the placebo? \pause $H_0: \beta_1=0$ \pause
    \item Did Drug $B$ work? \pause $H_0: \beta_2=0$ \pause
    \item Did experimental treatment have an effect?  \pause $H_0: \beta_1=\beta_2=0$ \pause
    \item Is there a difference between the effects of Drug $A$ and Drug $B$? \pause $H_0: \beta_1=\beta_2$ 
\end{itemize}
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{Now add a quantitative explanatory variable (covariate)}
\framesubtitle{Covariates often come first in the regression equation} \pause 
\begin{itemize}
    
    \item $x_1 = 1$ if Drug A, zero otherwise
    \item $x_2 = 1$ if Drug B, zero otherwise 
    \item $x_3$ = Age \pause 
    \item $E(y|\boldsymbol{x}) = \beta_0 + \beta_1x_1 + \beta_2 x_2 + \beta_3 x_3$. \pause 
\end{itemize}
\begin{center}

\begin{tabular}{|c|c|c|l|} \hline
Drug    & $x_1$ & $x_2$ &  $E(y|\mathbf{x}) = \beta_0+\beta_1x_1+\beta_2x_2+\beta_3x_3$\\ \hline
A       &   1   &   0   &  $\mu_1$ =     \\ \hline
B       &   0   &   1   &  $\mu_2$ =     \\ \hline
Placebo &   0   &   0   &  $\mu_3$ =     \\ \hline
\end{tabular} \pause \vspace{2mm}

\begin{tabular}{|c|c|c|l|} \hline
Drug    & $x_1$ & $x_2$ &  $E(y|\mathbf{x}) = \beta_0+\beta_1x_1+\beta_2x_2+\beta_3x_3$\\ \hline
A       &   1   &   0   &  $\mu_1$ = $(\beta_0+\beta_1)+\beta_3x_3$    \\ \hline
B       &   0   &   1   &  $\mu_2$ = $(\beta_0+\beta_2)+\beta_3x_3$    \\ \hline
Placebo &   0   &   0   &  $\mu_3$ = ~~~~~$\beta_0$~~~~~$+\beta_3x_3$              \\ \hline
\end{tabular} 

\end{center}
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{Parallel Regression Lines}
%\framesubtitle{} 
\begin{center}
{\footnotesize
\begin{tabular}{|c|c|c|l|} \hline
Drug    & $x_1$ & $x_2$ &  $E(y|\mathbf{x}) = \beta_0+\beta_1x_1+\beta_2x_2+\beta_3x_3$\\ \hline
A       &   1   &   0   &  $\mu_1$ = $(\beta_0+\beta_1)+\beta_3x_3$    \\ \hline
B       &   0   &   1   &  $\mu_2$ = $(\beta_0+\beta_2)+\beta_3x_3$    \\ \hline
Placebo &   0   &   0   &  $\mu_3$ = ~~~~~$\beta_0$~~~~~$+\beta_3x_3$              \\ \hline
\end{tabular} \pause
} % End size
\includegraphics[width=2.5in]{Parallel}
\end{center} 
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{Parallel Regression Lines}
%\framesubtitle{} 
\begin{columns}  
\column{0.6\textwidth}
{\footnotesize
\begin{tabular}{|c|c|c|l|} \hline
Drug    & $x_1$ & $x_2$ &  $E(y|\mathbf{x})$\\ \hline
A       &   1   &   0   &  $\mu_1$ = $(\beta_0+\beta_1)+\beta_3x_3$    \\ \hline
B       &   0   &   1   &  $\mu_2$ = $(\beta_0+\beta_2)+\beta_3x_3$    \\ \hline
Placebo &   0   &   0   &  $\mu_3$ = ~~~~~$\beta_0$~~~~~$+\beta_3x_3$              \\ \hline
\end{tabular}
} % End size
\column{0.45\textwidth}
\includegraphics[width=2in]{Parallel}
\end{columns} 

For fixed age, is there a difference in expected immune response as a function of experimental treatment? \pause $H_0: \beta_1=\beta_2=0$. 

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{More comments}
%\framesubtitle{} 
{\begin{center}
\begin{tabular}{|c|c|c|l|} \hline
Drug    & $x_1$ & $x_2$ &  $E(y|\mathbf{x}) = \beta_0+\beta_1x_1+\beta_2x_2+\beta_3x_3$\\ \hline
A       &   1   &   0   &  $\mu_1$ = $(\beta_0+\beta_1)+\beta_3x_3$    \\ \hline
B       &   0   &   1   &  $\mu_2$ = $(\beta_0+\beta_2)+\beta_3x_3$    \\ \hline
Placebo &   0   &   0   &  $\mu_3$ = ~~~~~$\beta_0$~~~~~$+\beta_3x_3$              \\ \hline
\end{tabular}
\end{center}} \pause 
\begin{itemize}
    \item If more than one covariate, parallel regression planes. \pause 
    \item Non-parallel (interaction) is testable.  \pause 
    \item ``Controlling" interpretation holds. \pause 
    \item In an experimental study, quantitative covariates are usually just observed. \pause 
    \item Could age be related to drug? \pause 
    \item Good covariates reduce 
    \emph{MSE} $= \frac{\widehat{\boldsymbol{\epsilon}}^{\,\prime \,}
             \widehat{\boldsymbol{\epsilon}}}{n-k-1}$, 
    and make tests involving the categorical variables more sensitive.
\end{itemize}
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Cell means coding}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{Cell means coding: $r$ indicators and no intercept}
%\framesubtitle{} 
Example: Three treatments and no covariate. \pause
{\LARGE
\begin{displaymath}
    E(y|\boldsymbol{x}) = \beta_1x_1 + \beta_2 x_2 + \beta_3 x_3
\end{displaymath}  \pause 
} % End size

\vspace{3mm}

\begin{center}
\begin{tabular}{|c|c|c|c|c|} \hline
Drug  &$x_1$&$x_2$&$x_3$&$E(y|\mathbf{x}) = \beta_1x_1+\beta_2x_2+\beta_3x_3$ \\ \hline
A       & 1   &   0 &  0  &$\mu_1=\beta_1$                    \\ \hline
B       & 0   &   1 &  0  &$\mu_2=\beta_2$                    \\ \hline
Placebo & 0   &   0 &  1  &$\mu_3=\beta_3$                    \\ \hline
\end{tabular}
\end{center} \pause 

\vspace{3mm}

\begin{itemize}
    \item This model is equivalent to the one with $r-1$ dummy variables and the intercept. \pause 
    \item If you have $r$ dummy variables and also the intercept, the model is over-parameterized.
\end{itemize}
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{Add a covariate: $x_4$}
%\framesubtitle{} 
{\LARGE
\begin{displaymath}
    E(y|\boldsymbol{x}) = \beta_1x_1 + \beta_2 x_2 + \beta_3 x_3  + \beta_4 x_4
\end{displaymath}  \pause 
} % End size

\begin{center}
\begin{tabular}{|c|c|c|c|c|} \hline
Drug  &$x_1$&$x_2$&$x_3$&$E(y|\mathbf{x}) = \beta_1x_1+\beta_2x_2+\beta_3x_3+\beta_4x_4$ \\ \hline
A       & 1   &   0 &  0  &$\beta_1+\beta_4x_4$   \\ \hline
B       & 0   &   1 &  0  &$\beta_2+\beta_4x_4$   \\ \hline
Placebo & 0   &   0 &  1  &$\beta_3+\beta_4x_4$   \\ \hline
\end{tabular}
\end{center} \pause 
This model is equivalent to the one with the intercept.
\end{frame}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle{Which one should you use?}
\framesubtitle{Choose on the basis of convenience}  \pause
{\small
{\begin{center}
\begin{tabular}{|c|c|c|l|} \hline
Drug    & $x_1$ & $x_2$ &  $E(y|\mathbf{x}) = \beta_0+\beta_1x_1+\beta_2x_2+\beta_3x_3$\\ \hline
A       &   1   &   0   &  $\mu_1$ = $(\beta_0+\beta_1)+\beta_3x_3$    \\ \hline
B       &   0   &   1   &  $\mu_2$ = $(\beta_0+\beta_2)+\beta_3x_3$    \\ \hline
Placebo &   0   &   0   &  $\mu_3$ = ~~~~~$\beta_0$~~~~~$+\beta_3x_3$              \\ \hline
\end{tabular}
\end{center}} 
\begin{center}
\begin{tabular}{|c|c|c|c|c|} \hline
Drug  &$x_1$&$x_2$&$x_3$&$E(y|\mathbf{x}) = \beta_1x_1+\beta_2x_2+\beta_3x_3+\beta_4x_4$ \\ \hline
A       & 1   &   0 &  0  &$\beta_1+\beta_4x_4$   \\ \hline
B       & 0   &   1 &  0  &$\beta_2+\beta_4x_4$   \\ \hline
Placebo & 0   &   0 &  1  &$\beta_3+\beta_4x_4$   \\ \hline
\end{tabular}
\end{center} \pause 
\begin{itemize}
    \item Test whether the average response to Drug A is different from the average response to Drug B, controlling for age. What is the null hypothesis? \pause $H_0: \beta_1=\beta_2$. \pause 
    \item Suppose we want to test whether controlling for age, the average response to Drug $A$ and Drug $B$ is different from response to the placebo. What is the null hypothesis for the model with intercept? \pause $H_0: \beta_2+\beta_3=0$.
\end{itemize}
} % End size
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle{Huh?}
%\framesubtitle{}  \pause
{\small
{\begin{center}
\begin{tabular}{|c|c|c|l|} \hline
Drug    & $x_1$ & $x_2$ &  $E(y|\mathbf{x}) = \beta_0+\beta_1x_1+\beta_2x_2+\beta_3x_3$\\ \hline
A       &   1   &   0   &  $\mu_1$ = $(\beta_0+\beta_1)+\beta_3x_3$    \\ \hline
B       &   0   &   1   &  $\mu_2$ = $(\beta_0+\beta_2)+\beta_3x_3$    \\ \hline
Placebo &   0   &   0   &  $\mu_3$ = ~~~~~$\beta_0$~~~~~$+\beta_3x_3$              \\ \hline
\end{tabular}
\end{center}} 
} % End size
Controlling for age, is the average response to Drug $A$ and Drug $B$ different from mean response to the placebo?  What is the null hypothesis? $H_0: \beta_2+\beta_3=0$. \pause Really? Show your work. \pause

\begin{center}
\begin{tabular}{l l} 
       & $\frac{1}{2}[\, (\beta_0+\beta_2+\beta_1x_1)+(\beta_0+\beta_3+\beta_1x_1) \,] = \beta_0+\beta_1x_1$  \\   \pause
$\iff$ & $\beta_0+\beta_2+\beta_1x_1 + \beta_0+\beta_3+\beta_1x_1 = 2\beta_0+2\beta_1x_1$  \\ 
$\iff$ & $2\beta_0+\beta_2+\beta_3+2\beta_1x_1 = 2\beta_0+2\beta_1x_1$  \\ 
$\iff$ & $\beta_2+\beta_3=0$. \pause
\end{tabular}
\end{center}
We want to avoid this kind of thing.
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle{Easier with Cell Means Coding}
%\framesubtitle{}  \pause
{\small
\begin{center}
\begin{tabular}{|c|c|c|c|c|} \hline
Drug  &$x_1$&$x_2$&$x_3$&$E(y|\mathbf{x}) = \beta_1x_1+\beta_2x_2+\beta_3x_3+\beta_4x_4$ \\ \hline
A       & 1   &   0 &  0  &$\beta_1+\beta_4x_4$   \\ \hline
B       & 0   &   1 &  0  &$\beta_2+\beta_4x_4$   \\ \hline
Placebo & 0   &   0 &  1  &$\beta_3+\beta_4x_4$   \\ \hline
\end{tabular}
\end{center} 
} % End size
Controlling for age, is the average response to Drug $A$ and Drug $B$ different from mean response to the placebo?  What is the null hypothesis? \pause  \vspace{10mm}

$H_0: \frac{1}{2}(\beta_1+\beta_2) = \beta_3$\pause, or $H_0: \beta_1+\beta_2 = 2\beta_3$.
\end{frame}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{Key to the equivalence of dummy variable coding schemes} \pause 
% \framesubtitle{} 
Clearly these $\mathbf{X}$ matrices are one-to-one.

\begin{displaymath}
\left(\begin{array}{cccc}
1 & 1 & 0 & x_1 \\
1 & 0 & 1 & x_2 \\
1 & 0 & 0 & x_3 \\
1 & 1 & 0 & x_4 \\
\vdots & \vdots & \vdots & \vdots \\
1 & 0 & 1 & x_n \\
\end{array}\right) \leftrightarrow
\left(\begin{array}{cccc}
1 & 0 & 0 & x_1 \\
0 & 1 & 0 & x_2 \\
0 & 0 & 1 & x_3 \\
1 & 0 & 0 & x_4 \\
\vdots & \vdots & \vdots & \vdots \\
0 & 1 & 0 & x_n \\
\end{array}\right)
\end{displaymath} \pause
And it's a linear transformation.
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{Matrix multiplication} \pause 
% \framesubtitle{} 

\begin{displaymath} 
\left(\begin{array}{cccc}
{\color{red}1} & {\color{red}1} & {\color{red}0} & x_1 \\
{\color{red}1} & {\color{red}0} & {\color{red}1} & x_2 \\
{\color{red}1} & {\color{red}0} & {\color{red}0} & x_3 \\
1 & 1 & 0 & x_4 \\
\vdots & \vdots & \vdots & \vdots \\
1 & 0 & 1 & x_n \\
\end{array}\right) 
\left(\begin{array}{rrrr}
{\color{red}0} & ~{\color{red}0} & {\color{red}1} & ~0 \\
{\color{red}1} & ~{\color{red}0} & {\color{red}-1} & ~0 \\
{\color{red}0} & ~{\color{red}1} & {\color{red}-1} & ~0 \\
0 & ~0 &  0 & ~1 
\end{array}\right)
=
\left(\begin{array}{cccc}
1 & 0 & 0 & x_1 \\
0 & 1 & 0 & x_2 \\
0 & 0 & 1 & x_3 \\
1 & 0 & 0 & x_4 \\
\vdots & \vdots & \vdots & \vdots \\
0 & 1 & 0 & x_n \\
\end{array}\right)
\end{displaymath} \pause
{\LARGE
\begin{eqnarray*}
     &  & \mathbf{y} = \mathbf{X} \boldsymbol{\beta} + \boldsymbol{\epsilon}  \\  \pause 
     & \Leftrightarrow &  \mathbf{y} = (\mathbf{XA})(\mathbf{A}^{-1} \boldsymbol{\beta}) + \boldsymbol{\epsilon} 
\end{eqnarray*} \pause
} % End size
Transformed $\mathbf{X}$ implies a transformed $\boldsymbol{\beta}$.
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{Other 1-1 linear transformations of the predictor variables can be useful}
%\framesubtitle{} 
  \begin{itemize}
    \item $x_1$ = Verbal SAT, $x_2$ = Math SAT, $y$ = First year GPA. \pause
    \item $w_1 = x_1 + x_2$ is total SAT score. \pause
    \item $w_2 = x_2 - x_1$ is how much better the student did in the math part. \pause
    \item You might prefer $y_i = \beta_0 + \beta_1 w_{i,1} + \beta_2 w_{i,2} + \epsilon_i$. \pause
    \item $(w_1,w_2)$ is one-to-one with $(x_1,x_2)$. \pause
    \item $\mathbf{y} = (\mathbf{XA})(\mathbf{A}^{-1} \boldsymbol{\beta}) + \boldsymbol{\epsilon}$.
  \end{itemize}
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Interactions}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{Interactions} \pause
%\framesubtitle{} 
\begin{itemize}
    \item Interaction between predictor variables means ``It depends." \pause
    \item Relationship between one explanatory variable and the response variable \emph{depends} on the value of another explanatory variable \pause
    \item Note that an interaction is \emph{not} a relationship between explanatory variables (in this course).
\end{itemize}
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{General principle} 
%\framesubtitle{} 
\begin{itemize}
    \item Interaction between $A$ and $B$ means
        \begin{itemize}
            \item Relationship of $A$ to $y$ depends on value of $B$.
            \item Relationship of $B$ to $y$ depends on value of $A$.
        \end{itemize}. \pause
    \item The two statements are formally equivalent.
\end{itemize}
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{Interactions between explanatory variables can be} 
%\framesubtitle{} 
\begin{itemize}
    \item Quantitative by quantitative
    \item Quantitative by categorical
    \item Categorical by categorical
\end{itemize}
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{Quantitative by Quantitative} \pause
%\framesubtitle{} 
{\large
Represent the interaction by a \emph{product} of explanatory variables. \pause
\begin{eqnarray*}
y &=& \beta_0 + \beta_1 x_1 + \beta_2 x_2 + \beta_3 x_1x_2 + \epsilon \\
E(y|\mathbf{x}) &=& \beta_0 + \beta_1 x_1 + \beta_2 x_2 + \beta_3 x_1x_2  \pause
\end{eqnarray*} 
For fixed $x_2$, \pause
\begin{displaymath}
    E(y|\mathbf{x}) = (\beta_0 + \beta_2 x_2) + (\beta_1+\beta_3 x_2) x_1
\end{displaymath} \pause
\begin{itemize}
    \item Both slope and intercept depend on value of $x_2$. \pause
    \item And for fixed $x_1$, slope and intercept relating $x_2$ to $E(y)$ depend on the value of $x_1$. \pause
    \item This interpretation holds only with $x_1$ and $x_2$ (separately) in the model!
\end{itemize}

} % End size
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{Quantitative by Categorical} \pause
%\framesubtitle{} 
\begin{itemize}
    \item Separate regression line for each value of the categorical explanatory variable. \pause
    \item Interaction means slopes of regression lines are not equal.
\end{itemize} \pause
\begin{center}
\includegraphics[width=2.5in]{Cross}
\end{center}
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{A Single Regression Model} \pause
%\framesubtitle{} 
\begin{itemize}
    \item Form a product of quantitative variable times each dummy variable for the categorical variable. \pause
    \item For example, three treatments and one covariate: $x_1$ is the covariate, and $x_2$ and $x_3$ are the dummy variables. \pause
{\Large
\begin{eqnarray*}
y &=& \beta_0 + \beta_1 x_1 + \beta_2 x_2 + \beta_3 x_3 \\
                & & +\, \beta_4x_1x_2 + \beta_5 x_1x_3 + \epsilon
\end{eqnarray*} \pause
} % End size
    \item Keep $x_1$, $x_2$ and $x_3$ (separately) in the model.
\end{itemize}
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{Fill in the table}
%\framesubtitle{} 

{\Large
\begin{center}
$E(y|\mathbf{x}) = \beta_0 + \beta_1 x_1 + \beta_2 x_2 + \beta_3 x_3 + \beta_4x_1x_2 + \beta_5 x_1x_3$ \vspace{3mm}
\end{center}

\begin{tabular}{|c|c|c|c|} \hline
Treatment & $x_2$ & $x_3$ &  $E(y|\mathbf{x})$ \\ \hline
Drug $A$  &   1 &     0  & \hspace{60mm} \\ \hline
Drug $B$  &   0 &     1  &  \\ \hline
Placebo   &   0 &     0  &  \\ \hline
\end{tabular}  \vspace{3mm} % \pause

\begin{tabular}{|c|c|c|c|} \hline
Treatment & $x_2$ & $x_3$ &  $E(y|\mathbf{x})$ \\ \hline
Drug $A$  &   1 &     0  & \hspace{60mm} \\ \hline
Drug $B$  &   0 &     1  &  \\ \hline
Placebo   &   0 &     0  &  \\ \hline
\end{tabular}
} % End size
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{$E(y|\mathbf{x}) = \beta_0 + \beta_1 x_1 + \beta_2 x_2 + \beta_3 x_3 + \beta_4x_1x_2 + \beta_5 x_1x_3$}
%\framesubtitle{} 
\begin{center}
\begin{tabular}{|c|c|c|c|} \hline
Treatment & $x_2$ & $x_3$ &  $E(y|\mathbf{x})$ \\ \hline
Drug $A$  &   1 &     0  & $(\beta_0+\beta_2) + (\beta_1+\beta_4) x_1$ \\ \hline
Drug $B$  &   0 &     1  & $(\beta_0+\beta_3) + (\beta_1+\beta_5) x_1$ \\ \hline
Placebo   &   0 &     0  & $~~~~~\beta_0 ~~~~+ ~~~~~\beta_1 ~~~~x_1$ \\ \hline
\end{tabular}
\includegraphics[width=2.3in]{Non-parallel}
\end{center}
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
%\frametitle{What null hypothesis would you test?}
%\framesubtitle{} 
\begin{tabular}{|c|c|c|c|} \hline
Treatment & $x_2$ & $x_3$ &  $E(y|\mathbf{x})$ \\ \hline
Drug $A$  &   1 &     0  & $(\beta_0+\beta_2) + (\beta_1+\beta_4) x_1$ \\ \hline
Drug $B$  &   0 &     1  & $(\beta_0+\beta_3) + (\beta_1+\beta_5) x_1$ \\ \hline
Placebo   &   0 &     0  & $~~~~~\beta_0 ~~~~+ ~~~~~\beta_1 ~~~~x_1$ \\ \hline
\end{tabular} \vspace{6mm}

What null hypothesis would you test for
\begin{itemize}
    \item Equal slopes. \pause $H_0: \beta_4=\beta_5=0$. \pause
    \item Compare slope for Drug $A$ versus placebo. \pause $H_0: \beta_4=0$. \pause
    \item Compare slope for Drug $A$ versus Drug $B$. \pause $H_0: \beta_4=\beta_5$. \pause
    \item Equal regressions. \pause $H_0: \beta_2=\beta3=\beta_4=\beta_5=0$. \pause
    \item Interaction between age and treatment. $H_0: \beta_4=\beta_5=0$. \pause
    \item Effect of experimental treatment depends on age. \pause $H_0: \beta_4=\beta_5=0$. \pause
    \item For patients of average age $\overline{x}_1$, are Drugs $A$ and $B$ equally effective? \pause
          ~ $H_0: \beta_2+\beta_4\overline{x}_1 = \beta_3+\beta_5\overline{x}_1$.
\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{comment} Requires \usepackage{comment}


\end{comment}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{Copyright Information}

This slide show was prepared by  \href{http://www.utstat.toronto.edu/~brunner}{Jerry Brunner},
Department of Statistical Sciences, University of Toronto. It is licensed under a 
\href{http://creativecommons.org/licenses/by-sa/3.0/deed.en_US}
     {Creative Commons Attribution - ShareAlike 3.0 Unported License}. Use any part of it as you like and share the result freely. The \LaTeX~source code is available from the course website:
\href{http://www.utstat.toronto.edu/~brunner/oldclass/302f20} {\small\texttt{http://www.utstat.toronto.edu/$^\sim$brunner/oldclass/302f20}}

\end{frame}


\end{document}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{}
%\framesubtitle{} 
  \begin{itemize}
    \item 
    \item 
    \item 
  \end{itemize}
\end{frame}

{\LARGE
\begin{displaymath}
    
\end{displaymath} }






%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
rm(list=ls()); n=20

# Binary #######################################################################
zero = rnorm(n,4,1); one = rnorm(n,6,1)
x = c(rep(0,n),rep(1,n))
y = c(zero,one)
plot(x,y)
xpts = c(0,1); ypts = c(mean(zero),mean(one))
lines(xpts,ypts)


# 3-d ##########################################################################
x1 = c(0,0,1,1); x2 = c(0,1,0,1)
plot(x1,x2,pch=' ',xlab=expression(x[1]),ylab=expression(x[2]))
text(1,0,'A'); text(0,1,'B');  text(0,0,'C')


# Parallel Regression Lines ####################################################
rm(list=ls())
x1 = c(20,60); y1 = c(50,30)
x2 = c(20,60); y2 = c(40,20)
x3 = c(20,60); y3 = c(30,10)
Age = c(x1,x2,x3); Response = c(y1,y2,y3)
plot(Age,Response,pch=' ', xlim = c(20,75), ylim = c(5,50))
title('Age and Immune Response')
lines(x1,y1); lines(x2,y2); lines(x3,y3)
text(65,30,'Drug A'); text(65,20,'Drug B'); text(65,10,'Placebo')


# Non-parallel Regression Lines ####################################################
rm(list=ls())
x1 = c(20,60); y1 = c(50,30)
x2 = c(20,60); y2 = c(40,20)
x3 = c(20,60); y3 = c(10,15)
Age = c(x1,x2,x3); Response = c(y1,y2,y3)
plot(Age,Response,pch=' ', xlim = c(20,75), ylim = c(5,50))
title('Age and Immune Response')
lines(x1,y1); lines(x2,y2); lines(x3,y3)
text(65,30,'Drug A'); text(65,20,'Drug B'); text(65,15,'Placebo')


# Tri-colour scatterplot #######################################################
# Cross in data range. (Also there's truly a curve for blue.)
# This generates Cross.pdf
rm(list=ls())
set.seed(9999)
n = 50; mu = 75; sig=6
x1 = round(rnorm(n,mu,sig)); x2 = round(rnorm(n,mu,sig)); x3 = round(rnorm(n,mu,sig))
eps1 = round(rnorm(n,0,sig)); eps2 = round(rnorm(n,0,sig)); eps3 = round(rnorm(n,0,sig))
y1 = round(-mu + 2*x1 + eps1)
y2 = round(x2 + eps2); y2[x2<mu] = mu + eps2[x2<mu]
y3 = round(x3 + eps3)
X = c(x1,x2,x3); Y = c(y1,y2,y3) + 2
train = c(numeric(n)+1,numeric(n)+2,numeric(n)+3)
# X[X>100] = 100; Y[Y>100] = 100
# cbind(train,X,Y)
# group = factor(train); anova(lm(Y~group*X))
# plot(X,Y)
plot(X,Y, pch=' ',xlab=expression(x[1]))
points(x1,y1,col='red')
points(x2,y2,col='blue')
points(x3,y3,col='green')
reg1 = lm(y1~x1); lines(x1,reg1$fitted.values,col='red') #$
reg2 = lm(y2~x2); lines(x2,reg2$fitted.values,col='blue') #$
reg3 = lm(y3~x3); lines(x3,reg3$fitted.values,col='green') #$
title(expression(paste('Effect of Treatment Depends on ',x[1])))



%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


\begin{frame}
\frametitle{Matrix multiplication} \pause 
% \framesubtitle{} 

\begin{displaymath}
\left(\begin{array}{cccc}
{\color{red}1} & 1 & 0 & x_1 \\
1 & 0 & 1 & x_2 \\
1 & 0 & 0 & x_3 \\
1 & 1 & 0 & x_4 \\
\vdots & \vdots & \vdots & \vdots \\
1 & 0 & 1 & x_n \\
\end{array}\right) 
\left(\begin{array}{rrrr}
0 & ~0 &  1 & ~0 \\
1 & ~0 & -1 & ~0 \\
0 & ~1 & -1 & ~0 \\
0 & ~0 &  0 & ~1 
\end{array}\right)
=
\left(\begin{array}{cccc}
1 & 0 & 0 & x_1 \\
0 & 1 & 0 & x_2 \\
0 & 0 & 1 & x_3 \\
1 & 0 & 0 & x_4 \\
\vdots & \vdots & \vdots & \vdots \\
0 & 1 & 0 & x_n \\
\end{array}\right)
\end{displaymath} \pause
And it's a linear transformation.
\end{frame}





%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{Parallel Regression Lines}
%\framesubtitle{} \pause 
{\footnotesize
\begin{center}
\begin{tabular}{|c|c|c|l|} \hline
Drug    & $x_1$ & $x_2$ &  $E(y|\mathbf{x}) = \beta_0+\beta_1x_1+\beta_2x_2+\beta_3x_3$\\ \hline
A       &   1   &   0   &  $\mu_1$ = $(\beta_0+\beta_1)+\beta_3x_3$    \\ \hline
B       &   0   &   1   &  $\mu_2$ = $(\beta_0+\beta_2)+\beta_3x_3$    \\ \hline
Placebo &   0   &   0   &  $\mu_3$ = ~~~~~$\beta_0$~~~~~$+\beta_3x_3$              \\ \hline
\end{tabular}
\includegraphics[width=2in]{Parallel}
\end{center} \pause

Controlling for age, is there a difference in expected immune response as a function of experimental treatment? \pause $H_0: \beta_1=\beta_2=0$. 

} % End size

\end{frame}