% \documentclass[serif]{beamer} % Serif for Computer Modern math font. \documentclass[serif, handout]{beamer} % Handout to ignore pause statements \hypersetup{colorlinks,linkcolor=,urlcolor=red} \usefonttheme{serif} % Looks like Computer Modern for non-math text -- nice! \setbeamertemplate{navigation symbols}{} % Suppress navigation symbols \usetheme{AnnArbor} % CambridgeUS % \usetheme{Frankfurt} % Displays section titles on top: Fairly thin but still swallows some material at bottom of crowded slides % \usetheme{Berlin} % Displays sections on top % \usetheme{Berkeley} \usepackage[english]{babel} \usepackage{amsmath} % for binom % \usepackage{graphicx} % To include pdf files! % \definecolor{links}{HTML}{2A1B81} % \definecolor{links}{red} \setbeamertemplate{footline}[frame number] \mode \title{Within-cases for binary response data using non-linear mixed models\footnote{ This slide show is an open-source document. See last slide for copyright information.}} \date{} % To suppress date \begin{document} \begin{frame} \titlepage \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Vocabulary: Linear vs. non-linear models} \pause %\framesubtitle{} \begin{itemize} \item In a linear model, $E(y|\mathbf{x})$ is a linear function of the parameters. \pause \item Ordinary regression is linear: \pause \begin{displaymath} E(y|\mathbf{x}) = \beta_0 + \beta_1 x_1 + \cdots + \beta_{p-1} x_{p-1} \end{displaymath} \pause \item Logistic regression is non-linear: \pause \begin{displaymath} E(y|\mathbf{x}) = \frac{e^{\beta_0 + \beta_1 x_1 + \cdots + \beta_{p-1} x_{p-1}}} {1+e^{\beta_0 + \beta_1 x_1 + \cdots + \beta_{p-1} x_{p-1}}} \end{displaymath} \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Within-cases for binary data: The idea} %\framesubtitle{} \begin{itemize} \item There are several binary responses for each case. \pause \item Like was the person employed right after graduation, 6 months after, one year after \ldots Yes or No. \pause \item Or did the consumer purchase at least one computer in 2016, 2017, 2018 \ldots \pause \item Or did the patient have a seizure on day 1, day 2, \ldots after treatment. \pause \item Binary choices in laboratory studies can be repeated measures. \pause \item Model: Logistic regression with a random shock for case, pushing all the log odds values for that case up and down by the same amount. \pause \item Random shock is added to the regression equation for the log odds. \pause \item Usually the random shock is normal --- what else? % \item Often described as a random intercept model. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{A random intercept model} \framesubtitle{For $i = 1, \ldots, n$ and $j = 1, \ldots, k$} \pause \begin{itemize} \item $\Delta_1, \ldots, \Delta_n \stackrel{i.i.d.}{\sim} N(0,\sigma^2)$ \pause \item Conditionally on $\Delta_i=\delta_i$ for $i = 1, \ldots, n$, \pause binary responses $y_{ij}$ are independent with \pause \begin{eqnarray*} \log\left(\frac{\pi_{ij}}{1-\pi_{ij}} \right) &=& (\beta_0 + \delta_{i}) + \beta_1 x_{i,j,1} + \ldots + \beta_{p-1} x_{i,j,p-1} \\ \pause &=& \mathbf{x}_{ij}^\prime\boldsymbol{\beta} + \delta_{i}, \pause \mbox{ so that} \\ \pause &&\\ \pi_{ij} &=& \frac{e^{\mathbf{x}_{ij}^\prime\boldsymbol{\beta} + \delta_{i}}} {1+e^{\mathbf{x}_{ij}^\prime\boldsymbol{\beta} + \delta_{i}}} \end{eqnarray*} \pause where $\pi_{ij} = P\{y_{ij} = 1|\Delta_i=\delta_i\}$. % \pause \end{itemize} \pause \vspace{3mm} Some of the $x_{ij\ell}$ could be dummy variables for time period or within-case treatment, different for $j = 1, \ldots, k$ within case $i$. % The notation is messy but the idea is clear. \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Maximum likelihood} \pause %\framesubtitle{} \begin{itemize} \item Parameter vector is $\boldsymbol{\theta} = (\beta_0, \beta_1, \ldots, \beta_{p-1}, \sigma^2)^\prime$. \pause \item Vector of binary observations $\mathbf{y}_i = (y_{i1}, \ldots, y_{ik})^\prime$ for each case. \pause \item Likelihood function is $L(\boldsymbol{\theta}) = \prod_{i=1}^n p_{\boldsymbol{\theta}}(\mathbf{y}_i)$ \pause \item Where $p_{\boldsymbol{\theta}}(\mathbf{y}_i)$ is the probability of observing the vector $\mathbf{y}_i$. \pause \item Need to calculate $p_{\boldsymbol{\theta}}(\mathbf{y}_i)$ as a function of $\boldsymbol{\theta}$ and maximize the likelihood. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Model gives us a \emph{conditional} probability} \framesubtitle{But we need the unconditional probability $p_{\boldsymbol{\theta}}(\mathbf{y}_i)$} \pause \begin{itemize} \item Given $\Delta_i=\delta_i$, the $y_{ij}$ are independent, so \pause \begin{displaymath} p_{\boldsymbol{\theta}}(\mathbf{y}_i|\Delta_i=\delta_i) = \pause \prod_{j=1}^k \left(\frac{e^{\mathbf{x}_{ij}^\prime\boldsymbol{\beta} + \delta_{i}}} {1+e^{\mathbf{x}_{ij}^\prime\boldsymbol{\beta} + \delta_{i}}} \right)^{y_{ij}} \left(1-\frac{e^{\mathbf{x}_{ij}^\prime\boldsymbol{\beta} + \delta_{i}}} {1+e^{\mathbf{x}_{ij}^\prime\boldsymbol{\beta} + \delta_{i}}} \right)^{1-y_{ij}} \end{displaymath} \pause \item This is a conditional probability. \pause \item Conditional on $\mathbf{x}_{ij}$ as well as $\delta_{i}$. \pause \item It's okay to treat $\mathbf{x}_{ij}$ as known constants because they are observed. \pause \item But $\delta_{i}$ are unobservable (latent random variables). \pause \item Integrate them out using the law of total probability. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Law of total probability} \framesubtitle{Double expectation} \pause {\small \begin{eqnarray*} p_{\boldsymbol{\theta}}(\mathbf{y}_i) & = & \int_{-\infty}^\infty p_{\boldsymbol{\theta}}(\mathbf{y}_i|\Delta_i=\delta_i) f(\delta_i|\sigma^2) \, d\delta_i \\ \pause & = & \int_{-\infty}^\infty \prod_{j=1}^k \left(\frac{e^{\mathbf{x}_{ij}^\prime\boldsymbol{\beta} + \delta_{i}}} {1+e^{\mathbf{x}_{ij}^\prime\boldsymbol{\beta} + \delta_{i}}} \right)^{y_{ij}} \left(1-\frac{e^{\mathbf{x}_{ij}^\prime\boldsymbol{\beta} + \delta_{i}}} {1+e^{\mathbf{x}_{ij}^\prime\boldsymbol{\beta} + \delta_{i}}} \right)^{1-y_{ij}} \, f(\delta|\sigma^2) \, d\delta_i \end{eqnarray*} \pause } % End size where $f(\delta|\sigma^2) = \frac{1}{\sigma\sqrt{2\pi}} \exp(-\frac{\delta^2}{2\sigma^2})$. \pause \vspace{2mm} \begin{itemize} \item The likelihood is a product of $n$ terms like this. \pause \item Nobody can do the integral. \pause \item It has to be done numerically\pause, $n$ times. \pause \item Numerical integration as well as a numerical search. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{State of the art} \framesubtitle{Contemporary, not just modern} \pause \begin{itemize} \item The theory is mainstream large-sample maximum likelihood. \pause \item Computation is a bit bleeding edge. \pause \item Methods for finding parameter estimates are iterative. \pause \item Convergence problems are common. \pause \item R and SAS give similar results for all the examples I've seen. \pause \item In R, use the \texttt{glmer} function in the \texttt{lme4} package. \pause \item In SAS, use \texttt{proc nlmixed}. \pause \item It's not at all like \texttt{proc mixed}. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Copyright Information} This slide show was prepared by \href{http://www.utstat.toronto.edu/~brunner}{Jerry Brunner}, Department of Statistical Sciences, University of Toronto. It is licensed under a \href{http://creativecommons.org/licenses/by-sa/3.0/deed.en_US} {Creative Commons Attribution - ShareAlike 3.0 Unported License}. Use any part of it as you like and share the result freely. The \LaTeX~source code is available from the course website: \vspace{5mm} \href{http://www.utstat.toronto.edu/~brunner/oldclass/441s20} {\small\texttt{http://www.utstat.toronto.edu/$^\sim$brunner/oldclass/441s20}} \end{frame} \end{document}