% \documentclass[serif]{beamer} % Serif for Computer Modern math font. \documentclass[serif, handout]{beamer} % Handout to ignore pause statements \hypersetup{colorlinks,linkcolor=,urlcolor=red} \usefonttheme{serif} % Looks like Computer Modern for non-math text -- nice! \setbeamertemplate{navigation symbols}{} % Suppress navigation symbols \usetheme{AnnArbor} % CambridgeUS % \usetheme{Frankfurt} % Displays section titles on top: Fairly thin but still swallows some material at bottom of crowded slides % \usetheme{Berlin} % Displays sections on top % \usetheme{Berkeley} \usepackage[english]{babel} \usepackage{amsmath} % for binom % \usepackage{graphicx} % To include pdf files! % \definecolor{links}{HTML}{2A1B81} % \definecolor{links}{red} \setbeamertemplate{footline}[frame number] \mode \title{Repeated measurement analysis of binary responses\footnote{ This slide show is an open-source document. See last slide for copyright information.}} \subtitle{Because language is discrete} \date{} % To suppress date \begin{document} \begin{frame} \titlepage \end{frame} \begin{frame} \frametitle{Modern tools} \framesubtitle{In R, the \texttt{glmer} function in the \texttt{lme4} package} \begin{itemize} \item An extension of logistic regression (\texttt{glm} in R). \pause \item $y$ = 0 or 1. \pause \item Random shock for subject again. \pause \item So it's a mixed model. \pause \item Technically a non-linear mixed model. % \pause \end{itemize} \end{frame} \begin{frame} \frametitle{Odds} \framesubtitle{An indirect representation of probability} \pause {\LARGE \begin{displaymath} \mbox{Odds} = \frac{p}{1-p} \end{displaymath} \pause } % End size \begin{itemize} \item If $P(Y=1)=1/2$, \pause Odds = .5/(1$-$.5) = 1 (to 1) \pause \item If $P(Y=1)=2/3$, \pause Odds = 2 (to 1) \pause \item If $P(Y=1)=3/5$, \pause Odds = (3/5)/(2/5) = 1.5 (to 1) \pause \item If $P(Y=1)=1/5$, \pause Odds = .25 (to 1) \end{itemize} \end{frame} \begin{frame} \frametitle{The higher the probability, the greater the odds} \pause %\framesubtitle{} {\LARGE \begin{center} \begin{tabular}{l} $\mbox{Odds} = \frac{p}{1-p}$ \\ \pause \\ $0 \leq \mbox{Odds} < \infty$ \end{tabular} \end{center} } % End size \end{frame} \begin{frame} \frametitle{A linear model for the log odds} \pause %\framesubtitle{} \begin{itemize} \item Under the surface, a multiple regression equation \pause \item Not for the probability of Yes \pause \item And not for the odds of Yes \pause \item But for the natural logarithm (base $e$, not 10) of the odds. \pause \item $-\infty < \ln(\mbox{Odds}) < \infty$ \end{itemize} \end{frame} \begin{frame} \frametitle{The greater the probability, the greater the log odds} %\framesubtitle{} \begin{center} \includegraphics[width=3in]{Monotone} \end{center} \end{frame} \begin{frame} \frametitle{Log odds range from $-\infty$ to $\infty$} %\framesubtitle{} \begin{center} \includegraphics[width=2in]{Monotone} \end{center} \pause \begin{itemize} \item Probabilities of zero and one are ruled out. \pause \item Estimated probabilities of zero and one are ruled out too. \end{itemize} \end{frame} \begin{frame} \frametitle{The odds ratio} %\framesubtitle{For pure fixed effects models, to start} \pause \begin{itemize} \item Conclusions tend to emerge in terms of odds. \pause \item Instead of a treatment adding or subtracting something from the control group mean \pause \item It might multiply the odds by something. \pause \item If the something is greater than one, it's an increase in probability. \pause \item Like the odds of a recursive structure are 1.4 times as great in the experimental condition. \pause \item If the something is less than one, it's a decrease in probability. \pause \item This is called the \emph{odds ratio}. \end{itemize} \end{frame} \begin{frame} \frametitle{Main effects: What are we testing?} \pause % \framesubtitle{Still for pure fixed effects models} \pause \begin{itemize} \item Tests for main effects are tests for differences of marginal mean log odds. \pause {\scriptsize \begin{center} \begin{tabular}{l|c|c|c||c} %\hline & \multicolumn{3}{c||}{\textbf{Treatment}} & \\ \hline & $A$ & $B$ & $C$ & \\ \hline Adult & 1.0 & 1.7 & 0.9 & 1.2 \\ \hline Child & 1.5 & 1.8 & 2.4 & 1.9 \\ \hline\hline & 1.25 & 1.75 & 1.65 & 1.55 \\ % \hline \end{tabular}\end{center} } % End size \pause \item These correspond to ratios of marginal \emph{geometric mean} odds. \pause \begin{itemize} \item Geometric mean = $(7 \cdot 2 \cdot 5 \cdot 6)^{1/4} = 4.53$ \pause (Not 5) \pause \item Geometric mean $\leq$ Arithmetic mean. \pause \end{itemize} \item Simple statements about geometric means of odds translate into complicated statements about probabilities. \pause \item Still, they often capture the reality that something is more likely under certain conditions. \end{itemize} \end{frame} \begin{frame} \frametitle{Interactions} \framesubtitle{It depends} \pause {\LARGE Is there an interaction? \vspace{5mm} \begin{center} \begin{tabular}{|c|c|c|c|} \hline & \multicolumn{3}{c|}{\textbf{Group}} \\ \hline \textbf{Treatment} & 1 & 2 & 3 \\ \hline Experimental & 0.5 & 0.7 & 0.9 \\ \hline Control & 0.2 & 0.4 & 0.6 \\ \hline % \hline \end{tabular} \end{center} } % End size \end{frame} \begin{frame} \frametitle{Interaction means different odds ratios} \framesubtitle{Not different differences between probabilities} \pause {\LARGE \begin{center} \begin{tabular}{l|l|r|r|r|} \hline & & \multicolumn{3}{c|}{\textbf{Group}} \\ \hline & & 1 & 2 & 3 \\ \hline \hline Exper & Probability & 0.50 & 0.70 & 0.90 \\ \hline & Odds & 1.00 & 2.33 & 9.00 \\ \hline \hline Control & Probability & 0.20 & 0.40 & 0.60 \\ \hline & Odds & 0.25 & 0.67 & 1.50 \\ \hline \hline \hline & Odds Ratio & 4.00 & 3.50 & 6.00 \\ \hline \end{tabular} \end{center} } % End size \end{frame} \begin{frame} \frametitle{Repeated measures} %\framesubtitle{} \begin{itemize} \item Suppose subjects appear in more than one treatment condition. \pause \item There is a baseline log odds of Yes for each treatment combination. \pause \item A random shock for each subject is added to the baseline log odds. \pause \item According to the usual model, the random shock is normally distributed. \pause \item When a subject appears in more than one condition, her log odds of Yes are pushed up or down by the same amount. \pause % \item This implies an upward or downward adjustment of the probability for each subject. \pause \item This produces correlated binary responses from the same subject. \end{itemize} \end{frame} \begin{frame} \frametitle{A two-stage probability process} \framesubtitle{Implied by the random shocks model} \pause \begin{itemize} \item Subjects in a treatment combination are a random sample from some hypothetical population. \pause \item The random sampling of subjects is Stage One. \pause \item Within each treatment combination, log odds of Yes are normally distributed. \pause \begin{itemize} \item Population mean is determined by the treatment combination. \pause \item Population variance is the same for all treatment combinations. \pause \end{itemize} \item Log odds values from the same subject are correlated. \pause % Censored: The correlation is one. \pause \item For each observation, log odds of Yes are converted to a probability. \pause \item Then a Yes or No is randomly produced. \pause \item The ``coin toss" to produce Yes or No is Stage Two. \pause \item All we can observe are the Yes and No values. \end{itemize} \end{frame} \begin{frame} \frametitle{Theoretical distribution of the probability of Yes} \framesubtitle{Based on a normal distribution of the log odds with standard deviation = 1} \pause \begin{center} \begin{tabular}{ccc} \includegraphics[width=1.5in]{pdensA} & \includegraphics[width=1.5in]{pdensB} & \includegraphics[width=1.5in]{pdensC} \end{tabular} \end{center} \end{frame} \begin{frame} \frametitle{Main points} %\framesubtitle{} \begin{itemize} \item Model implies a different log odds of Yes for each subject (in each condition). \pause \item Log odds are normally distributed. \pause \item Population mean log odds depends on the treatment combination. \pause \item Tests of main effects and interactions have the usual meaning \pause \emph{in terms of population mean log odds}. \pause \item You should continue to think in terms of probabilities (proportion of Yes responses in each condition). \pause \item You can still believe pairwise differences between means\pause, because mean log odds are different if and only if mean probability is different. \pause \item You might want to look at estimated mean log odds to see what the tests of main effects and interactions are reflecting. \end{itemize} \end{frame} \begin{frame} \frametitle{State of the art} \framesubtitle{Contemporary, not just modern} \pause \begin{itemize} \item The theory behind the tests is straightforward. \pause \item But it's asymptotic. \pause \item Meaning it's justified as the number of data points $\rightarrow \infty$. \pause \item Computation is a bit bleeding edge. \pause \item Methods for finding parameter estimates are iterative. \pause \item Convergence problems are common. \pause \item Software from different vendors does not always produce the same numbers. \pause \item It's easy to produce examples with simulated data that fail the test of large-sample accuracy \pause (yes, in R). \pause \item Let's do it anyway. \end{itemize} \end{frame} \begin{frame} \frametitle{The \texttt{glmer} function in the \texttt{lme4} package} \pause %\framesubtitle{} \begin{itemize} \item Syntax is like \texttt{lmer} for linear models. \pause \item And like \texttt{glm} for generalized linear models with fixed effects. \pause \item We are going to keep it simple. \pause \item Just add \texttt{+(1|Subject)} for the random shock (intercept). \pause \item And maybe sometimes \texttt{+(1|Item)}. \pause \item Use effect coding (\texttt{contr.sum}) if there are interactions between factors. \pause \item \texttt{Anova(model,type='III')} from the \texttt{car} package to test each effect controlling for all others. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Copyright Information} This slide show was prepared by \href{http://www.utstat.toronto.edu/~brunner}{Jerry Brunner}, Department of Statistical Sciences, University of Toronto. It is licensed under a \href{http://creativecommons.org/licenses/by-sa/3.0/deed.en_US} {Creative Commons Attribution - ShareAlike 3.0 Unported License}. Use any part of it as you like and share the result freely. The \LaTeX~source code is available from \href{http://www.utstat.toronto.edu/~brunner/workshops/mixed} {\footnotesize \texttt{http://www.utstat.toronto.edu/$^\sim$brunner/workshops/mixed}} \end{frame} \end{document} ############### R code for the figures ############### # Log odds as a function of probability Probability = seq(from=0.001,to=0.999,by=0.001) LogOdds = log(Probability/(1-Probability)) plot(Probability,LogOdds,type='l') ############################################################### # If log odds are normal, what does the density of the probability look like? f = function(y,mu=0,sigma=1) { dens = dnorm(log(y/(1-y)),mu,sigma)/(y*(1-y)) return(dens) } Mu=0; Sigma = 1 p = seq(from=0.001,to=0.999,by=0.001) Density = f(p,mu=Mu,sigma=Sigma) plot(p,Density,type='l',ylim=c(0,2.5),xlab = 'Probability of Yes') # This example solves the difficult problem of combining the display of # Greek letters with numerical values that are not hard-coded. # Ugly but flexible: * means concatenation in expressions, and we # need to substitute numerical values. First comes # an expression, and then a list of substitutions. titlestring = substitute( mu * " = " * MU , list(MU=Mu) ) title(titlestring, cex.main=3) # cex.main multiplies the font size %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Unused \begin{frame} \frametitle{Random shocks} \pause %\framesubtitle{} \begin{itemize} \item A multiple regression-like equation for the log odds \pause \item A random (normal) shock for subject is added to the log odds. \pause \item This implies an upward or downward adjustment of the probability. \pause \end{itemize} \vspace{5mm} \end{frame}