% Came from STA442s12, 441s16, 441s18 % \documentclass[serif]{beamer} % Serif for Computer Modern math font. \documentclass[serif, handout]{beamer} % Handout to ignore pause statements \hypersetup{colorlinks,linkcolor=,urlcolor=red} % Uncomment next 2 lines instead of the first for article-style handout: % \documentclass[12pt]{article} % \usepackage{beamerarticle} % \usefonttheme{serif} % Looks like Computer Modern for non-math text -- nice! \setbeamertemplate{navigation symbols}{} % Supress navigation symbols at bottom % \usetheme{Berlin} % Displays sections on top % \usetheme{Warsaw} % Displays sections on top \usetheme{Frankfurt} % Displays sections on top: Fairly thin but swallows some material at bottom of crowded slides % \usetheme{AnnArbor} % CambridgeUS \usepackage[english]{babel} \usepackage{graphicx}% \usepackage{graphpap} % Graph paper for pictures. \setbeamertemplate{footline}[frame number] \mode % \mode{\setbeamercolor{background canvas}{bg=black!5}} \title{Within Cases ANOVA Part One: Multivariate and Mixed Model Approaches\footnote{See last slide for copyright information.}} \subtitle{STA441 Spring 2020} % (optional) \date{} % To suppress date % In 2018 I did multivariate as a setup, but not nested-random. This slide set is modified accordingly. % This is pretty good, but could use a few more pause statements. It's too late for 2018. \begin{document} \begin{frame} \titlepage \end{frame} %\begin{frame} %\frametitle{Overview} %\tableofcontents %\end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Overview} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Within Cases} Example: A random sample of male and female university students is weighed midway through year 1, 2, 3 and 4. The explanatory variables are gender and year (time). \pause Gender is a between-cases factor and year is a within-cases factor. \pause \begin{itemize} \item For a within-cases factor, a case contributes a response variable value for more than one value of the explanatory variable \pause --- usually all of them. \pause \item It is natural to expect data from the same case to be correlated -- \emph{not} independent. \pause \item For example, the same subject appears in several treatment conditions. \pause \item Hearing study: How does pitch affect our ability to hear faint sounds? The same subjects will hear a variety of different pitch and volume levels (in a random order). They press a key when they think they hear something. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Student's Sleep Study (\emph{Biometrika}, 1908)} \framesubtitle{First Published Example of a $t$-test} \pause \begin{itemize} \item Patients take two sleeping medicines several days apart. \pause \item Half get $A$ first, half get $B$ first. \pause \item Reported extra hours of sleep are recorded (difference from baseline). \pause \item It's natural to subtract, and test whether the mean \emph{difference} equals zero. \pause \item That's what Gossett did. \pause \item But some might do an independent $t$-test with $n_1=n_2$. \pause \item This assumes observations from the same person to be independent. \pause \item It's unrealistic, but is it harmful? \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Matched pairs, testing $H_0: \mu_1=\mu_2$} \framesubtitle{Independent \emph{v.s.} Matched $t$-test} \pause \begin{itemize} \item If population covariance between the two measurements is positive, Type I error probability of both tests is 0.05, but matched $t$-test has better power. \pause \item If population covariance between measurements is negative, matched $t$-test has Type I error probability of 0.05, but the independent $t$-test has Type I error probability greater than 0.05. \pause \end{itemize} Why? \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Why the matched $t$-test is better} %\framesubtitle{} \begin{itemize} \item Numerator of both test statistics is $\overline{d} = \overline{y}_1 - \overline{y}_2$. \pause \item Denominator is an estimate of the standard deviation of the difference. \pause \item $Corr(\overline{y}_1, \overline{y}_2) = Corr(y_{i,1},y_{i,2})$. \pause \item So $Cov(\overline{y}_1, \overline{y}_2)$ has the same sign as $Cov(y_{i,1},y_{i,2})$. \pause \item $Var(\overline{y}_1 - \overline{y}_2) = Var(\overline{y}_1) + Var(\overline{y}_2) - 2 Cov(\overline{y}_1, \overline{y}_2)$. \pause \item If $Cov(\overline{y}_1, \overline{y}_2) > 0$, pretending independence results in overestimation of $Var(\overline{y}_1 - \overline{y}_2)$. \pause \item If $Cov(\overline{y}_1, \overline{y}_2) < 0$, pretending independence results in underestimation of $Var(\overline{y}_1 - \overline{y}_2)$. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Within-cases Terminology} You may hear terms like \vspace{5mm} \pause \begin{itemize} \item \textbf{Longitudinal}: \pause The same variables are measured repeatedly over time. \pause Usually there are lots of variables, including categorical ones, and large samples. \pause If there's an experimental treatment, itŐs usually once at the beginning, like a surgery. \pause Longitudinal studies basically track what happens over time. \pause \item \textbf{Repeated measures}: \pause Usually, the same subjects experience two or more experimental treatments. \pause Usually quantitative response variables, and often small samples. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Wine Tasting Example} \framesubtitle{A single within-cases factor} \pause In a taste test of wine, 6 professional judges judged 4 wines. The numbers they gave do not exactly represent quality. Instead, they are maximum prices in dollars per bottle that the judge thinks the company can charge and still sell most of the wine. \pause \begin{itemize} \item Cases are judges: $n=6$ \pause \item Each judge tastes and rates all four wines. \pause \item The single factor is Wine: Four categories. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Archery Example: Bow and Arrow} \framesubtitle{Two within-cases factors} \pause \begin{itemize} \item Cases are archers. There are $n$ archers. \pause \item Test two bows, three arrow types. \pause \item Warmup, then each archer takes 10 shots with each Bow-Arrow combination --- 60 shots. \pause \item In a different random order for each archer, of course. \pause \item $Y_{i,1}, \ldots, Y_{i,6}$ are mean distances from arrow tip to centre of target, for $i=1, \ldots, n$. \pause \item Each $Y_{i,j}$ is based on 10 shots. \pause \item $E(Y_{i,j})=\mu_j$ for $j=1,\ldots,6$. \end{itemize} \end{frame} \begin{frame}{One Between, One Within} \pause \begin{itemize} \item Grapefruit study: Cases are $n$ grocery stores. \pause \item Within stores factor: Three price levels. \pause \item Between-stores factor: Incentive program for produce managers (Yes-No). \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}{Monkey Study} \pause \begin{itemize} \item Train monkeys on discrimination tasks, at 16, 12, 8, 4 and 2 weeks prior to treatment. \pause Different task each time, equally difficult (randomize order). \pause \item Treatment is to block function of the hippocampus (with drug, not surgery), \pause re-tested. Get 5 scores for each monkey. \begin{center} \includegraphics[width=4in]{Timeline} \end{center} \pause \item 11 randomly assigned to treatment, 7 to control \pause \item Treatment is between, time elapsed since training is within. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}{Advantages of Within-cases Designs}{If measurement of the response variable does not mess things up too much} \pause \begin{itemize} \item Convenience (sometimes). \pause \item Each case serves as its own control. A huge number of extraneous variables are automatically held constant. The result can be a very sensitive analysis. \pause \item For some models, you can have lots of measurements on just a few subjects \pause --- if you are willing to make some assumptions. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}{Three main approaches for normal response variables} {Not in chronological order} \pause \begin{itemize} \item Multivariate \item Classical Mixed model \item Covariance Structure % \item Randomization: Had wine tasting example in 2018 \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Multivariate Approach} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}{Multivariate Approach to Repeated Measures} \pause \begin{itemize} \item Multivariate methods allow the analysis of more than one response variable at the same time. \pause \item When a case (subject) provides data under more than one set of conditions, it is natural to think of the measurements as multivariate. \pause \item The humble matched $t$-test has a multivariate version (Hotelling's $T^2$). \pause \item Simultaneously test whether the means of several \emph{differences} equal zero. \pause \item Like rating of Wine One minus Wine Two, Wine Two minus Wine Three, and Wine Three minus Wine Four. \pause \item When there are also between-subjects factors (like nationality of judge), use multivariate regression methods. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Pure within-cases: Multiple factors} \framesubtitle{Archery example} \pause Each archer contributes 6 numbers: \begin{center} \begin{tabular}{|c|c|c|c|} \hline & \multicolumn{3}{c|}{\textbf{Arrow type}} \\ \hline \textbf{Bow type} & $1$ & $2$ & $3$ \\ \hline 1 & $E(y_{i,1})=\mu_{11}$ & $E(y_{i,2})=\mu_{12}$ & $E(y_{i,3})=\mu_{13}$ \\ \hline 2 & $E(y_{i,4})=\mu_{21}$ & $E(y_{i,5})=\mu_{22}$ & $E(y_{i,6})=\mu_{23}$ \\ \hline \end{tabular}\end{center} \pause \begin{itemize} \item Form (sets of) linear combinations of the response variables. \pause \item Want to test main effect of Bow Type? \pause \begin{itemize} \item $H_0: \mu_{11}+\mu_{12}+\mu_{13} = \mu_{21}+\mu_{22}+\mu_{23}$ \pause \item Calculate $L_i = y_{i,1}+y_{i,2}+y_{i,3} - (y_{i,4}+y_{i,5}+y_{i,6})$. \pause \item $E(L_i) = \mu_{11}+\mu_{12}+\mu_{13} - (\mu_{21}+\mu_{22}+\mu_{23})$. \pause \item Test $H_0: E(L_i)=0$. \pause \item Could use an ordinary matched $t$-test for this one. \end{itemize} \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Main effect for arrow type} \framesubtitle{Differences between marginal means} \begin{center} \begin{tabular}{|c|c|c|c|} \hline & \multicolumn{3}{c|}{\textbf{Arrow type}} \\ \hline \textbf{Bow type} & $1$ & $2$ & $3$ \\ \hline 1 & $E(y_{i,1})=\mu_{11}$ & $E(y_{i,2})=\mu_{12}$ & $E(y_{i,3})=\mu_{13}$ \\ \hline 2 & $E(y_{i,4})=\mu_{21}$ & $E(y_{i,5})=\mu_{22}$ & $E(y_{i,6})=\mu_{23}$ \\ \hline \end{tabular}\end{center} \pause \begin{itemize} \item $H_0: \mu_{11}+\mu_{21} = \mu_{12}+\mu_{22}$ and $\mu_{12}+\mu_{22} = \mu_{13}+\mu_{23}$ \pause \item Calculate two linear combinations for each archer: \pause \begin{itemize} \item $L_{i,1} = y_{i,1}+y_{i,4}-(y_{i,2}+y_{i,5})$ \pause \item $L_{i,2} = y_{i,2}+y_{i,5}-(y_{i,3}+y_{i,4})$ \pause \end{itemize} \item Simultaneously test $H_0: E(L_{i,1})=0$ and $E(L_{i,2})=0$.\pause \item Use Hotelling's $T^2$. \pause \item Or something equivalent. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Matched $t$-tests with \texttt{proc reg}} \pause %\framesubtitle{} \begin{itemize} \item Regression with no explanatory variables. \pause \item $y_i = \beta_0 + \epsilon_i \pause \sim \pause N(\beta_0,\sigma^2)$. \pause \item Test $H_0: \beta_0=0$. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}[fragile] \frametitle{Hotelling's $T$-squared} \framesubtitle{Multivariate matched $t$-test} \pause \begin{itemize} \item Official SAS documentation claims that SAS won't calculate Hotelling's $T$-squared, but \ldots \pause \item $T^2 = (n-1) \left(\frac{1}{\lambda}-1 \right)$, so just get Wilks' Lambda from the \texttt{mtest} statement of \texttt{proc reg}. The $p$-value will be correct. \pause \item In a regression model with \emph{no explanatory variables}, $E(\mathbf{D}_i) = \boldsymbol{\beta}_0$, so test $H_0: \boldsymbol{\beta}_0=\mathbf{0}$. \pause \begin{verbatim} proc reg; model D1 D2 D3 = ; Wine: mtest intercept=0; \end{verbatim} \pause \item Or just use the test for Wilks' lambda directly. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Designs with both between and within cases factors} \pause \begin{itemize} \item Could have main effects and interactions for between-cases factors, \pause \item Could have main effects and interactions for within-cases factors, \pause \item Could have interactions of between by within. \pause \item Again, observation from the same case are treated as multivariate. \pause \item Again we form linear combinations of response variables and test hypothesis about them. \pause \item \textbf{Recipe}: \emph{Use a regression model with effect coding dummy variables for the between-cases factors} (if any). \pause Use these same explanatory variables in every model. \pause \item Response variables (linear combinations) will vary depending on the effect being tested. \pause \item Null hypotheses for all the main effects and interactions are statements about the $\beta$ values. \end{itemize} \end{frame} %set up effect coding dummy variables for the between-cases factors (if any), and calculate response variables that are linear combinations of the variables that are recorded for each case. You can then obtain tests for all the main effects and interactions by testing null hypotheses about the values in the regression model. Sometimes the model has more than one response variable (linear combination). In this case it really is multivariate, and the second subscript on the refers to the response variable. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Main effects and interactions for the between-cases factors} \pause %\framesubtitle{} \begin{itemize} \item These are marginals, averaging $\mu$ parameters over the within-cases factors. \pause \item Let $L_i=$ the mean (or sum) of the $y_{i,j}$ values\pause, averaging or adding over $j$. \pause \item Do a standard between-cases analysis with $L_i$ as the response variable. \end{itemize} \end{frame} \begin{frame} \frametitle{Main effects and interactions for the within-cases factors} \pause %\framesubtitle{} \begin{itemize} \item Need to average $\mu$ parameters over the between-cases factors. \pause \item Effect coding! $\beta_0$ is the grand mean. \pause \item Form linear combinations as in the archery example. \pause \item Test $H_0: \beta_0=0$. \pause \item Or test multiple $\beta_{0,j}=0$ if need be. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Interactions of between by within} \pause %\framesubtitle{} \begin{itemize} \item The nature of a within-cases effect \emph{depends} on a between-cases treatment combination. \pause \item Take the linear combinations for the within-cases effect. \pause \item Test the between-cases effect on those. \pause \item For example, factors are Bow Type, Arrow Type and Gender. \pause \item Want to test the Arrow Type by Gender interaction. \pause \item Are the differences between arrow types (averaging over bow types) different for men and women? \pause \item Simultaneously test for gender differences in the two linear combinations representing arrow type one versus two and two versus three. \pause \item It's a standard multivariate test. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}[fragile] \frametitle{You could use \texttt{proc reg}} \framesubtitle{To test the arrow type by gender interaction} {\scriptsize \begin{center} \begin{tabular}{|c|c|c|c|} \hline & \multicolumn{3}{c|}{\textbf{Arrow type}} \\ \hline \textbf{Bow type} & $1$ & $2$ & $3$ \\ \hline 1 & $E(y_{i,1})=\mu_{11}$ & $E(y_{i,2})=\mu_{12}$ & $E(y_{i,3})=\mu_{13}$ \\ \hline 2 & $E(y_{i,4})=\mu_{21}$ & $E(y_{i,5})=\mu_{22}$ & $E(y_{i,6})=\mu_{23}$ \\ \hline \end{tabular}\end{center} \pause } % End size {\footnotesize % or scriptsize \begin{verbatim} L1 = y1+y4 - (y2+y5); L2 = y2+y5 - (y3+y6); \end{verbatim} \pause \begin{verbatim} proc reg; model L1 L2 = gender; arrow_by_sex: mtest gender=0; \end{verbatim} \pause } % End size Or you can let \texttt{proc glm} do the dummy variables and linear combinations for you. \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{If within-cases factors have just two levels} \framesubtitle{Like before and after, experimental vs. control} \pause \begin{itemize} \item You can always do it with a univariate analysis. \pause \item No fancy software is needed. \pause \item All three approaches to repeated measures yield the same $F$ statistics. \pause \item Make a sum variable and a difference variable. \pause \item Salmon study: Fish are Canadian or Alaskan\pause, Female or Male\pause, Growth is measured in freshwater \emph{and} marine environments. \pause \item Three factors: Species by sex by environment \pause -- environment is within cases. \pause \item Response variable is growth. \end{itemize} \end{frame} \begin{frame}[fragile] \frametitle{Salmon example} \framesubtitle{SAS code not tested} % {\footnotesize % or scriptsize \begin{verbatim} sumgrowth = freshgrowth + marinegrowth; difgrowth = freshgrowth - marinegrowth; \end{verbatim} \pause Assume effect coding for country and sex. \pause \begin{verbatim} proc reg; title2 'Between-cases effects'; model sumgrowth = country sex cs; proc reg; title2 'Within and between-within'; model difgrowth = country sex cs; \end{verbatim} What do the $t$-tests give you? % } % End size \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Classical Mixed Model Approach} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Classical Mixed Model Approach to Repeated Measures} \pause %\framesubtitle{} First we need some background. \begin{itemize} \item Nested designs \item Fixed and random effects. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Nested Designs} \framesubtitle{Example} \pause A chain of commercial business colleges is teaching a software certification course. After 6 weeks of instruction, students take a certification exam and receive a score ranging from zero to 100. \pause \begin{itemize} \item The owners want to see whether performance is related to which school students attend, or which instructor they have -- or both. \pause \item They compare two schools; one of the schools has three instructors teaching the course, and the other school has 4 instructors teaching the course. \pause \item A teacher only works in one school. \pause \item There are two categorical explanatory variables, school and teacher. \pause \item But it's not a factorial design, because ``Teacher 1" does not mean the same thing in School 1 and School 2. \item It's a different person. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Teacher is nested within school} \begin{columns} % Use Beamer's columns to use more of the margins \column{1.15\textwidth} \begin{picture} (100,100)(-100,0) % \graphpaper(0,0)(200,100) % Need \usepackage{graphpap} \put(-55,120){School One} % Co-ordinates of text lower left \put (-30,110){\line(0,-1){100}} % Down from School One % Co-ordinates, direction, length \put(-80,40){\line(1,0){100}} % Across under School One \put(-80,40){\line(0,-1){30}} % Branch to Teacher 1 \put(20,40){\line(0,-1){30}} % Branch to Teacher 3 \put(-95,0){\tiny Teacher 1} \put(-85,-10){\small $\mu_1$} \put(-45,0){\tiny Teacher 2} \put(-35,-10){\small $\mu_2$} \put(5,0){\tiny Teacher 3} \put(15,-10){\small $\mu_3$} \put(120,120){School Two} \put (150,110){\line(0,-1){70}} % Down from School Two \put(75,40){\line(1,0){150}} % Across under School Two \put(75,40){\line(0,-1){30}} % Branch to Teacher 1 School 2 \put(125,40){\line(0,-1){30}} % Branch to Teacher 2 School 2 \put(175,40){\line(0,-1){30}} % Branch to Teacher 3 School 2 \put(225,40){\line(0,-1){30}} % Branch to Teacher 4 School 2 \put(60,0){\tiny Teacher 1} \put(70,-10){\small $\mu_4$} \put(110,0){\tiny Teacher 2} \put(120,-10){\small $\mu_5$} \put(160,0){\tiny Teacher 3} \put(170,-10){\small $\mu_6$} \put(210,0){\tiny Teacher 4} \put(220,-10){\small $\mu_7$} \pause \put(-95,-40){\small Schools $H_0: \frac{1}{3}(\mu_1+\mu_2+\mu_3) = \frac{1}{4}(\mu_4+\mu_5+\mu_6+\mu_7)$} \pause \put(-95,-60){\small Teachers within Schools $H_0: \mu_1=\mu_2=\mu_3$ and $\mu_4=\mu_5=\mu_6=\mu_7$} \end{picture} \end{columns} % Unbalanced design % Teachers(Schools) pools main effect for teachers AND the interaction \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}[fragile] \frametitle{Tests of nested effects are tests of contrasts} \pause $H_0: \frac{1}{3}(\mu_1+\mu_2+\mu_3) = \frac{1}{4}(\mu_4+\mu_5+\mu_6+\mu_7)$ \pause $H_0: \mu_1=\mu_2=\mu_3$ and $\mu_4=\mu_5=\mu_6=\mu_7$ \pause \vspace{3mm} You can specify the contrasts yourself, or you can take advantage of \texttt{proc glm}'s syntax for nested models. \pause \begin{verbatim} proc glm; class school teacher; model score = school teacher(school); \end{verbatim} \pause The notation \texttt{teacher(school)} should be read ``teacher within school." \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Easy to extend the ideas} %\framesubtitle{} \begin{itemize} \item Can have more than one level of nesting. You could have climate zones, lakes within climate zones, fishing boats within lakes, \ldots \pause \item There is no problem with combining nested and factorial structures. \pause You just have to keep track of what's nested within what. \pause \item Factors that are not nested are sometimes called ``crossed." \pause \item The combination of nesting and \emph{random effects} is very powerful. \end{itemize} \end{frame} \begin{frame} \frametitle{Random Effects} \framesubtitle{As opposed to \emph{fixed effects}} \pause A random factor is one in which the \emph{values of the factor are a random sample} from a populations of values. \pause \begin{itemize} \item Randomly select 10 schools, test students at each school. \pause School is a random factor with 10 values. \pause \item Randomly select 15 naturopathic medicines for arthritis (there are quite a few), and then randomly assign arthritis patients to try them. \pause Drug is a random factor. \pause \item Randomly select 15 lakes. In each lake, measure how clear the water is at 20 randomly chosen points. \pause Lake is a random factor. \pause \item Randomly select 20 fast food outlets, survey customers in each about quality of the fries. \pause Outlet is a random factor with 20 values. \pause Amount of salt would be a fixed factor, which could be crossed with outlet. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{One random factor} \framesubtitle{A nice simple example} \pause \begin{itemize} \item Randomly select 5 farms. \pause \item Randomly select 10 cows from each farm, milk them, and record the amount of milk from each one. \pause \item The one random factor is Farm. \pause \item Total $n=50$. \pause \item The idea is that ``Farm" is a kind of random shock that pushes all the amounts of milk from the cows in the farm up or down by the same amount. \pause \item You could also think of cow (the cases are cows) as a random factor nested within farm. \pause {\Large \begin{displaymath} y_{ij} = \mu + \tau_i + \epsilon_{ij} \end{displaymath} } % End size \item $i = 1, \ldots, 5$ and $j = 1, \ldots, 10$. \end{itemize} \end{frame} \begin{frame} \frametitle{Analysis of variance} %\framesubtitle{} {\LARGE $y_{ij} = \mu + \tau_i + \epsilon_{ij}$ \pause } % End size \vspace{5mm} \begin{eqnarray*} Var(y_{ij}) & = & Var(\mu + \tau_i + \epsilon_{ij}) \\ \pause & = & Var(\tau_i) + Var(\epsilon_{ij}) \\ \pause & = & \sigma^2_\tau + \sigma^2 \pause \end{eqnarray*} \begin{itemize} \item Split the variance up into two parts: \pause The part that comes from farms, \pause and the part that comes from cows (within farms). \pause \item \emph{Analysis} of variance. \pause \item Test $H_0: \sigma^2_\tau=0$ \pause \item Estimate $\frac{\sigma^2_\tau}{\sigma^2_\tau+\sigma^2}$ \end{itemize} \end{frame} \begin{frame} \frametitle{Distribution of $y_{ij} = \mu_. + \tau_i + \epsilon_{ij}$} \pause %\framesubtitle{And associated statistics} \begin{itemize} \item $y_{ij} \sim N(\mu_.,\sigma^2_\tau+\sigma^2)$ \pause \item Observations are not all independent. \pause \item Covariance matrix of the vector of response variables is block diagonal: Matrix of matrices. \pause \begin{itemize} \item Off-diagonal matrices are all zeros. \pause \item Matrices on the diagonal ($k \times k$) have the \emph{compound symmetry} structure \pause \begin{displaymath} \left( \begin{array}{c c c} \sigma^2+\sigma^2_\tau & \sigma^2_\tau & \sigma^2_\tau \\ \sigma^2_\tau & \sigma^2+\sigma^2_\tau & \sigma^2_\tau \\ \sigma^2_\tau & \sigma^2_\tau & \sigma^2+\sigma^2_\tau \\ \end{array} \right) \end{displaymath} \pause (Except it's $10 \times 10$.) \end{itemize} \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % There was more here, including flocks and proc nested. See 2016 \begin{frame} \frametitle{Mixed models} \framesubtitle{The classical approach} \pause \begin{itemize} \item There can be both fixed and random factors in the same experiment. \pause This makes it a \emph{mixed} model. \pause \item Factors can be nested or crossed, in various patterns. \pause \item Random factors can be nested within fixed. \pause \item Fixed effects cannot be nested within random. \pause \item The interaction of any random factor with another factor (whether fixed or random) is random. \pause \item $F$-tests are often possible, \pause but they don't always use Mean Squared Error in the denominator of the F statistic. \pause \item Often, it's the Mean Square for some interaction term. \pause \item The choice of what error term to use is relatively mechanical for balanced models \pause --- based on expected mean squares. \pause \item Mechanical means SAS can do it for you. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{One more example} \framesubtitle{And some sample questions} \pause Independent random samples of 10 Canadian and 10 U.S. large companies were selected. In each company, 25 female and 25 male managers were randomly selected, and their formal education in years was recorded. \pause \begin{enumerate} \item Is this an observational study, or experimental? \pause {\tiny \color{red} Observational.} \pause \item What are the factors? \pause {\tiny \color{red} Nation, Company and Sex.} \pause \item Designate the factors as fixed or random. \pause {\tiny \color{red} Nation and Sex are fixed. \pause Company is random.} \pause \item Describe the nesting, if any. \pause {\tiny \color{red} Company is nested within Nation.} \end{enumerate} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}{Classical Mixed Model Approach to Repeated Measures} \pause \begin{itemize} \item The effect for case (person, subject) is a random shock that pushes all the observations from that case up or down by the same amount \pause -- like farm. \pause \item Case is one of the factors. \pause \item It's a \emph{random effects} factor \pause that is \emph{nested} within combinations of the between-cases factors, \pause and \emph{crosses} the within-cases factors. \pause \item There are no interactions between case and the other factors. \pause \item Uses a mixed model ANOVA. \pause \item The $F$-tests depend on balanced experimental designs. % \item Can also do it with the covariance structure approach, and don't need balance. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Pictures of crossing and nesting} %\framesubtitle{} Cases (subjects) is a random effects factor nested within combinations of the between-cases factors and crossing the within-cases factors. \pause \begin{itemize} \item Recall the archery example -- two bow types, three arrow types. \pause \item Suppose each archer only used one type of bow and one type of arrow. \pause \item Make a diagram showing the nesting/crossing of cases. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Both Factors between} \framesubtitle{Make a diagram showing the nesting/crossing of cases.} \begin{itemize} \item Each archer only uses one type of bow and one type of arrow. \item Both factors are between cases. \pause \item Cases are nested within both bow and arrow. \end{itemize} \pause \vspace{10mm} \begin{picture}(100,100)(-70,0) \thicklines %\graphpaper(0,0)(210,100) % Need \usepackage{graphpap} % Draw the cells \put (0,0){\line(1,0){210}} % Bottom \put (0,50){\line(1,0){210}} % Middle horizontal \put (0,100){\line(1,0){210}} % Top \put (0,0){\line(0,1){100}} % Left \put (70,0){\line(0,1){100}} % One-third \put (140,0){\line(0,1){100}} % Two-thirds \put (210,0){\line(0,1){100}} % Right \put (-30,50){Bow} \put(90,110){Arrow} % Draw the ellipses {\color{red} \put(35,75){\oval(50,30)} \put(105,75){\oval(50,30)} \put(175,75){\oval(50,30)} \put(35,25){\oval(50,30)} \put(105,25){\oval(50,30)} \put(175,25){\oval(50,30)} } % End color \end{picture} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{One factor between and one within} \framesubtitle{Make a diagram showing the nesting/crossing of cases.} \pause \begin{itemize} \item Suppose each archer only uses one type of bow, but all 3 types of arrow. \pause \item Bow is between cases, arrow is within (repeated measures on arrow). \pause \item Cases are nested within bow, but cross arrow. \pause \end{itemize} \vspace{10mm} \begin{picture}(100,100)(-70,0) \thicklines %\graphpaper(0,0)(210,100) % Need \usepackage{graphpap} % Draw the cells \put (0,0){\line(1,0){210}} % Bottom \put (0,50){\line(1,0){210}} % Middle horizontal \put (0,100){\line(1,0){210}} % Top \put (0,0){\line(0,1){100}} % Left \put (70,0){\line(0,1){100}} % One-third \put (140,0){\line(0,1){100}} % Two-thirds \put (210,0){\line(0,1){100}} % Right \put (-30,50){Bow} \put(90,110){Arrow} % Draw the ellipses {\color{red} \put(105,75){\oval(190,30)} \put(105,25){\oval(190,30)} } % End color \end{picture} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Another one factor between and one within} \pause \framesubtitle{Make a diagram showing the nesting/crossing of cases.} \begin{itemize} \item Suppose each archer uses both types of bow, but only one type of arrow. \pause \item Bow is within cases, Arrow is between (repeated measures on Bow). \pause \item Cases are nested within Arrow, but cross Bow. \pause \end{itemize} \vspace{10mm} \begin{picture}(100,100)(-70,0) \thicklines %\graphpaper(0,0)(210,100) % Need \usepackage{graphpap} % Draw the cells \put (0,0){\line(1,0){210}} % Bottom \put (0,50){\line(1,0){210}} % Middle horizontal \put (0,100){\line(1,0){210}} % Top \put (0,0){\line(0,1){100}} % Left \put (70,0){\line(0,1){100}} % One-third \put (140,0){\line(0,1){100}} % Two-thirds \put (210,0){\line(0,1){100}} % Right \put (-30,50){Bow} \put(90,110){Arrow} % Draw the ellipses {\color{red} \put(35,50){\oval(30,80)} \put(105,50){\oval(30,80)} \put(175,50){\oval(30,80)} } % End color \end{picture} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Both factors within} \framesubtitle{As in the original example} \pause \begin{itemize} \item Each archer uses both types of bow and all three types of arrow. \pause \item Both factors are within cases (repeated measures on both Bow and Arrow). \pause \item Cases cross both Bow and Arrow. \pause \end{itemize} \vspace{10mm} \begin{picture}(100,100)(-70,0) \thicklines %\graphpaper(0,0)(210,100) % Need \usepackage{graphpap} % Draw the cells \put (0,0){\line(1,0){210}} % Bottom \put (0,50){\line(1,0){210}} % Middle horizontal \put (0,100){\line(1,0){210}} % Top \put (0,0){\line(0,1){100}} % Left \put (70,0){\line(0,1){100}} % One-third \put (140,0){\line(0,1){100}} % Two-thirds \put (210,0){\line(0,1){100}} % Right \put (-30,50){Bow} \put(90,110){Arrow} % Draw the ellipses {\color{red} \put(105,50){\oval(190,55)} } \end{picture} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{One More Example} \framesubtitle{Without a picture} \pause \begin{itemize} \item Experienced archers and beginners try both bows and all three arrow types. \pause \item Experience is between cases, Bow and Arrow are within. \pause \item Cases are nested within experience. \pause \end{itemize} \vspace{10mm} You draw the picture. % Two Bow by Arrow panels. \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Copyright Information} This slide show was prepared by \href{http://www.utstat.toronto.edu/~brunner}{Jerry Brunner}, Department of Statistics, University of Toronto. It is licensed under a \href{http://creativecommons.org/licenses/by-sa/3.0/deed.en_US} {Creative Commons Attribution - ShareAlike 3.0 Unported License}. Use any part of it as you like and share the result freely. The \LaTeX~source code is available from the course website: \href{http://www.utstat.toronto.edu/~brunner/oldclass/441s20} {\small\texttt{http://www.utstat.toronto.edu/$^\sim$brunner/oldclass/441s20}} \end{frame} \end{document} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%