% \documentclass[serif]{beamer} % Serif for Computer Modern math font. \documentclass[serif, handout]{beamer} % Handout mode to ignore pause statements \hypersetup{colorlinks,linkcolor=,urlcolor=red} \usefonttheme{serif} % Looks like Computer Modern for non-math text -- nice! \setbeamertemplate{navigation symbols}{} % Supress navigation symbols \usetheme{Berlin} % Displays sections on top \usepackage[english]{babel} % \definecolor{links}{HTML}{2A1B81} % \definecolor{links}{red} \setbeamertemplate{footline}[frame number] \mode % \mode{\setbeamercolor{background canvas}{bg=black!5}} \title{Large-sample Likelihood Ratio Tests\footnote{See last slide for copyright information.}} \subtitle{STA431 Spring 2015} \date{} % To suppress date \begin{document} \begin{frame} \titlepage \end{frame} \section{Introduction} \begin{frame} \frametitle{Model and null hypothesis} % \framesubtitle{} \begin{displaymath} \begin{array}{l} D_1, \ldots, D_n \stackrel{i.i.d.}{\sim} P_\theta, \, \theta \in \Theta, \\ H_0: \theta \in \Theta_0 \mbox{ v.s. } H_A: \theta \in \Theta \cap \Theta_0^c, \\ \end{array} \end{displaymath} The data have likelihood function \begin{displaymath} L(\theta) = \prod_{i=1}^n f(d_i;\theta), \end{displaymath} where $f(d_i;\theta)$ is the density or probability mass function evaluated at $d_i$. \end{frame} \begin{frame} \frametitle{Example} \framesubtitle{$\begin{array}{l} D_1, \ldots, D_n \stackrel{i.i.d.}{\sim} P_\theta, \, \theta \in \Theta, \\ H_0: \theta \in \Theta_0 \mbox{ v.s. } H_A: \theta \in \Theta \cap \Theta_0^c, \\ \end{array}$ } \begin{displaymath} \begin{array}{l} D_1, \ldots, D_n \stackrel{i.i.d.}{\sim} N(\mu,\sigma^2) \\ H_0: \mu=\mu_0 \mbox{ v.s. } H_A: \mu \neq \mu_0 \\ \Theta_0 = \{(\mu,\sigma^2: \mu=\mu_0) \} \end{array} \end{displaymath} \begin{center} \includegraphics[width=1.8in]{ParameterSpace} \end{center} \end{frame} \begin{frame} \frametitle{Likelihood ratio} %\framesubtitle{} \begin{itemize} \item Let $\widehat{\theta}$ denote the usual Maximum Likelihood Estimate (MLE). \item That is, $\widehat{\theta}$ is the parameter value for which the likelihood function is greatest, over all $\theta \in \Theta$. \item Let $\widehat{\theta}_0$ denote the \emph{restricted} MLE. The restricted MLE is the parameter value for which the likelihood function is greatest, over all $\theta \in \Theta_0$. \item $\widehat{\theta}_0$ is \emph{restricted} by the null hypothesis $H_0: \theta \in \Theta_0$. \item $L(\widehat{\theta}_0) \leq L(\widehat{\theta})$, so that \item The \emph{likelihood ratio} $\lambda = \frac{L(\widehat{\theta}_0)}{L(\widehat{\theta})} \leq 1.$ \item The likelihood ratio will equal one if and only if the overall MLE $\widehat{\theta}$ is located in $\Theta_0$. In this case, there is no reason to reject the null hypothesis. \end{itemize} \end{frame} \begin{frame} \frametitle{The test statistic} %\framesubtitle{} \begin{itemize} \item We know $\lambda = \frac{L(\widehat{\theta}_0)}{L(\widehat{\theta})} \leq 1.$ \item If it's a \emph{lot} less than one, then the data are a lot less likely to have been observed under the null hypothesis than under the alternative hypothesis, and the null hypothesis is questionable. \item If $\lambda$ is small (close to zero), then $\ln(\lambda)$ is a large negative number, and $-2\ln\lambda$ is a large positive number. \end{itemize} \begin{displaymath} G^2 = -2 \ln \left( \frac{\max_{\theta \in \Theta_0} L(\theta)} {\max_{\theta \in \Theta} L(\theta) } \right) \end{displaymath} \end{frame} \begin{frame} \frametitle{Difference between two $-2$ loglikelihoods} %\framesubtitle{} \begin{eqnarray*} G^2 & = & -2 \ln \left( \frac{\max_{\theta \in \Theta_0} L(\theta)} {\max_{\theta \in \Theta} L(\theta) } \right) \nonumber \\ & = & -2 \ln \left( \frac{ L(\widehat{\theta}_0) } {L(\widehat{\theta}) } \right) \nonumber \\ & = & -2 \ln L(\widehat{\theta}_0) - [-2 \ln L(\widehat{\theta})] \nonumber \\ & = & -2\ell(\widehat{\theta}_0) - [-2\ell(\widehat{\theta})] . \end{eqnarray*} \begin{itemize} \item Could minimize $-2\ell(\theta)$ twice, first over all $\theta \in \Theta$, and then over all $\theta \in \Theta_0$. \item The test statistic is the difference between the two minimum values. \end{itemize} \end{frame} \begin{frame} \frametitle{Distribution of the test statistic under $H_0$} \framesubtitle{Approximate large sample distribution} Suppose the null hypothesis is that certain \emph{linear combinations} of parameter values are equal to specified constants. Then if $H_0$ is true, \begin{displaymath} G^2 = -2 \ln \left( \frac{L(\widehat{\theta}_0)} { L(\widehat{\theta}) } \right) \end{displaymath} has an approximate chi-squared distribution for large $n$. \begin{itemize} \item Degrees of freedom equals number of (non-redundant, linearly independent) equalities specified by $H_0$. \item Reject when $G^2$ is large. \end{itemize} \end{frame} \begin{frame} \frametitle{Example} %\framesubtitle{} Suppose $\boldsymbol{\theta} = (\theta_1, \ldots \theta_7)$, with \begin{displaymath} H_0: ~\theta_1=\theta_2, \theta_6=\theta_7, \frac{1}{3}\left(\theta_1+\theta_2+\theta_3\right) = \frac{1}{3}\left(\theta_4+\theta_5+\theta_6\right) \end{displaymath} Count the equals signs or write the null hypothesis in matrix form as $H_0: \mathbf{L}\boldsymbol{\theta} = \mathbf{h}$. \begin{displaymath} \left( \begin{array}{r r r r r r r} 1 & -1 & ~0 & ~0 & ~0 & ~0 & ~0 \\ 0 & 0 & 0 & 0 & 0 & 1 & -1 \\ 1 & 1 & 1 & -1 & -1 & -1 & 0 \\ \end{array} \right) \left( \begin{array}{r} \theta_1 \\ \theta_2 \\ \theta_3 \\ \theta_4 \\ \theta_5 \\ \theta_6 \\ \theta_7 \end{array} \right) = \left( \begin{array}{r} 0 \\ 0 \\ 0 \end{array} \right) \end{displaymath} Rows are linearly independent, so $df$=number of rows = 3. \end{frame} \begin{frame} \frametitle{Bernoulli example} \pause %\framesubtitle{} \begin{itemize} \item $Y_1, \ldots, Y_n \stackrel{i.i.d.}{\sim} B(1,\theta)$ \pause \item $H_0:\theta=\theta_0$ \pause \item $\Theta=(0,1)$ \pause \item $\Theta_0 = \{\theta_0\}$ \pause \item $L(\theta) = \theta^{\sum_{i=1}^n y_i} (1-\theta)^{n-\sum_{i=1}^n y_i}$ \pause \item $\widehat{\theta} = \overline{y}$ \pause \item $\widehat{\theta}_0 = \theta_0$ \end{itemize} \end{frame} \begin{frame} \frametitle{Likelihood ratio test statistic} \framesubtitle{$L(\theta) = \theta^{\sum_{i=1}^n y_i} (1-\theta)^{n-\sum_{i=1}^n y_i}$} \begin{eqnarray*} G^2 & = & -2\ln\frac{L(\widehat{\theta}_0)} {L(\widehat{\theta})} \\ \pause & = & -2\ln\frac{\theta_0^{n\overline{y}} (1-\theta_0)^{n(1-\overline{y})}} {\overline{y}^{n\overline{y}} (1-\overline{y})^{n(1-\overline{y})}} \\ \pause & = & -2\ln\left(\frac{\theta_0^{\overline{y}} (1-\theta_0)^{(1-\overline{y})}} {\overline{y}^{\overline{y}} (1-\overline{y})^{(1-\overline{y})}} \right)^n \\ \pause & = & 2n\ln\left(\frac{\theta_0^{\overline{y}} (1-\theta_0)^{(1-\overline{y})}} {\overline{y}^{\overline{y}} (1-\overline{y})^{(1-\overline{y})}} \right)^{-1} \\ \pause & = & 2n\ln\frac{\overline{y}^{\overline{y}} (1-\overline{y})^{(1-\overline{y})}} {\theta_0^{\overline{y}} (1-\theta_0)^{(1-\overline{y})}} \\ \end{eqnarray*} \end{frame} \begin{frame} \frametitle{Continued} \begin{eqnarray*} G^2 & = & 2n\ln\frac{\overline{y}^{\overline{y}} (1-\overline{y})^{(1-\overline{y})}} {\theta_0^{\overline{y}} (1-\theta_0)^{(1-\overline{y})}} \\ \pause & = & 2n\left( \ln \left(\frac{\overline{y}}{\theta_0}\right)^{\overline{y}} + \ln \left(\frac{1-\overline{y}}{1-\theta_0}\right)^{(1-\overline{y})}\right) \\ \pause & = & 2n\left( \overline{y}\ln \left(\frac{\overline{y}}{\theta_0}\right) + (1-\overline{y}) \ln \left(\frac{1-\overline{y}}{1-\theta_0}\right)\right) \\ \end{eqnarray*} \end{frame} \begin{frame} \frametitle{Coffee taste test} \framesubtitle{$n=100,~\theta_0=0.50,~\overline{y}=0.60$} \pause \begin{eqnarray*} G^2 & = & 2n\left( \overline{y}\ln \left(\frac{\overline{y}}{\theta_0}\right) + (1-\overline{y}) \ln \left(\frac{1-\overline{y}}{1-\theta_0}\right)\right) \\ \pause & = & 200\left( 0.60\ln \left(\frac{0.60}{0.50}\right) + 0.40 \ln \left(\frac{0.40}{0.50}\right)\right) \\ & = & 4.027 \end{eqnarray*} \pause $df=1$, critical value $1.96^2=3.84$. Conclude (barely) that the new coffee blend is preferred over the old. \end{frame} \begin{frame} \frametitle{Univariate normal example} %\framesubtitle{} \begin{itemize} \item $Y_1, \ldots, Y_n \stackrel{i.i.d.}{\sim} N(\mu,\sigma^2)$ \pause \item $H_0:\mu=\mu_0$ \pause \item $\Theta=\{(\mu,\sigma^2): \mu \in \mathbb{R}, \sigma^2>0\}$ \pause \item $\Theta_0 = \{(\mu,\sigma^2): \mu=\mu_0, \sigma^2>0\}$ \pause \item $L(\theta) = (\sigma^2)^{-n/2} (2\pi)^{-n/2} \exp\{-\frac{1}{2\sigma^2}\sum_{i=1}^n(y_i-\mu)^2\}$ \pause % \item $\ell(\theta) = -\frac{n}{2}\ln\sigma^2 - \frac{n}{2}\ln (2\pi) - \frac{1}{2\sigma^2}\sum_{i=1}^n(x_i-\mu)^2$ \item $\widehat{\theta} = \left(\overline{Y}, \widehat{\sigma}^2 \right)$, \pause where \begin{displaymath} \widehat{\sigma}^2 = \frac{1}{n}\sum_{i=1}^n(Y_i-\overline{Y})^2 \end{displaymath} \pause \item $\widehat{\theta}_0 = \ldots$ \end{itemize} \end{frame} \begin{frame} \frametitle{Restricted MLE} \framesubtitle{For $H_0: \mu=\mu_0$} Recall that setting derivaties to zero, we obtained \pause \begin{displaymath} \mu = \overline{y} \mbox{ and } \sigma^2 = \frac{1}{n}\sum_{i=1}^n(y_i-\mu)^2, \mbox{ so} \end{displaymath} \pause {\LARGE \begin{eqnarray*} \widehat{\mu}_0 & = & \overline{Y} \\ \pause \widehat{\sigma}_0^2 & = & \frac{1}{n}\sum_{i=1}^n(Y_i-\mu_0)^2 \\ \end{eqnarray*} } % End size \end{frame} \begin{frame} \frametitle{Likelihood ratio test statistic $G^2 = -2\ln\frac{L(\widehat{\theta}_0)} {L(\widehat{\theta})}$} \pause % \framesubtitle{} Have $L(\theta) = (\sigma^2)^{-n/2} (2\pi)^{-n/2} \exp\{-\frac{1}{2\sigma^2}\sum_{i=1}^n(y_i-\mu)^2\}$, so \pause \vspace{3mm} \begin{eqnarray*} L(\widehat{\theta}) & = & (\widehat{\sigma}^2)^{-n/2} (2\pi)^{-n/2} \exp\{-\frac{1}{2\widehat{\sigma}^2}\sum_{i=1}^n(y_i-\overline{y})^2\} \\ \pause & = & (\widehat{\sigma}^2)^{-n/2} (2\pi)^{-n/2} \exp\left\{-\frac{\sum_{i=1}^n(y_i-\overline{y})^2}{2\frac{1}{n}\sum_{i=1}^n(y_i-\overline{y})^2}\right\} \\ \pause & = & (\widehat{\sigma}^2)^{-n/2} (2\pi)^{-n/2} e^{-n/2} \end{eqnarray*} \end{frame} \begin{frame} \frametitle{Likelihood at restricted MLE} \framesubtitle{$L(\theta) = (\sigma^2)^{-n/2} (2\pi)^{-n/2} \exp\{-\frac{1}{2\sigma^2}\sum_{i=1}^n(y_i-\mu)^2\}$} \pause \begin{eqnarray*} L(\widehat{\theta}_0) & = & (\widehat{\sigma}_0^2)^{-n/2} (2\pi)^{-n/2} \exp\{-\frac{1}{2\widehat{\sigma}_0^2}\sum_{i=1}^n(y_i-\mu_0)^2\} \\ \pause & = & (\widehat{\sigma}_0^2)^{-n/2} (2\pi)^{-n/2} \exp\left\{-\frac{\sum_{i=1}^n(y_i-\mu_0)^2}{2\frac{1}{n}\sum_{i=1}^n(y_i-\mu_0)^2}\right\} \\ \pause & = & (\widehat{\sigma}_0^2)^{-n/2} (2\pi)^{-n/2} e^{-n/2} \end{eqnarray*} \end{frame} \begin{frame} \frametitle{Test statistic} %\framesubtitle{} {\small \begin{eqnarray*} G^2 & = & -2\ln\frac{L(\widehat{\theta}_0)} {L(\widehat{\theta})} \\ \pause & = & -2\ln\frac{(\widehat{\sigma}_0^2)^{-n/2} (2\pi)^{-n/2} e^{-n/2}} {(\widehat{\sigma}^2)^{-n/2} (2\pi)^{-n/2} e^{-n/2}} \\ \pause & = & -2\ln\left(\frac{\widehat{\sigma}_0^2}{\widehat{\sigma}^2}\right)^{-n/2} \\ \pause & = & n\ln \left(\frac{\widehat{\sigma}_0^2}{\widehat{\sigma}^2}\right) \\ \pause & = & n\ln \left(\frac{\frac{1}{n}\sum_{i=1}^n(Y_i-\mu_0)^2} {\frac{1}{n}\sum_{i=1}^n(Y_i-\overline{Y})^2}\right) \\ \pause & = & n\ln \left(\frac{\sum_{i=1}^n(Y_i-\mu_0)^2} {\sum_{i=1}^n(Y_i-\overline{Y})^2}\right) \\ \end{eqnarray*} } % End size \end{frame} \section{Multivariate Normal} \begin{frame} \frametitle{Multivariate normal likelihood} {\footnotesize \begin{eqnarray*} L(\boldsymbol{\mu,\Sigma}) &=& \prod_{i=1}^n \frac{1}{|\boldsymbol{\Sigma}|^{\frac{1}{2}} (2 \pi)^{\frac{p}{2}}} \exp\left\{ -\frac{1}{2} (\mathbf{y}_i-\boldsymbol{\mu})^\top \boldsymbol{\Sigma}^{-1}(\mathbf{y}_i-\boldsymbol{\mu})\right\} \\ \pause &&\\ &=& |\boldsymbol{\Sigma}|^{-n/2} (2\pi)^{-np/2} \exp\left\{ -\frac{1}{2} \sum_{i=1}^n (\mathbf{y}_i-\boldsymbol{\mu})^\top \boldsymbol{\Sigma}^{-1}(\mathbf{y}_i-\boldsymbol{\mu})\right\} \\ \pause &&\\ &=& |\boldsymbol{\Sigma}|^{-n/2} (2\pi)^{-np/2} \exp -\frac{n}{2}\left\{ tr(\boldsymbol{\widehat{\Sigma}\Sigma}^{-1}) + (\overline{\mathbf{y}}-\boldsymbol{\mu})^\top \boldsymbol{\Sigma}^{-1} (\overline{\mathbf{y}}-\boldsymbol{\mu}) \right\}, \end{eqnarray*} } \pause where $\boldsymbol{\widehat{\Sigma}} = \frac{1}{n}\sum_{i=1}^n (\mathbf{y}_i-\overline{\mathbf{y}}) (\mathbf{y}_i-\overline{\mathbf{y}})^\top $ is the sample variance-covariance matrix. \end{frame} \begin{frame} \frametitle{Sample variance-covariance matrix} \pause %\framesubtitle{} \begin{displaymath} \mathbf{Y}_i = \left(\begin{array}{c} Y_{i,1} \\ \vdots \\ Y_{i,p} \end{array} \right) ~~~~~~~~~~ \overline{\mathbf{Y}} = \left(\begin{array}{c} \overline{Y}_1 \\ \vdots \\ \overline{Y}_p \end{array} \right) \end{displaymath} \pause \vspace{3mm} $\boldsymbol{\widehat{\Sigma}} = \frac{1}{n}\sum_{i=1}^n (\mathbf{Y}_i-\overline{\mathbf{Y}}) (\mathbf{Y}_i-\overline{\mathbf{Y}})^\top $ is a $p \times p$ matrix with $(j,k)$ element \pause \begin{displaymath} \frac{1}{n}\sum_{i=1}^n (Y_{i,j}-\overline{Y}_j)(Y_{i,k}-\overline{Y}_k) \end{displaymath} \pause This is a sample variance or covariance. \end{frame} \begin{frame} \frametitle{Multivariate normal likelihood at the MLE} \framesubtitle{This will be in the denominator of every likelihood ratio test.} \pause {\footnotesize \begin{eqnarray*} L(\boldsymbol{\mu,\Sigma}) &=& |\boldsymbol{\Sigma}|^{-\frac{n}{2}} (2\pi)^{-\frac{np}{2}} \exp -\frac{n}{2}\left\{ tr(\boldsymbol{\widehat{\Sigma}\Sigma}^{-1}) + (\overline{\mathbf{y}}-\boldsymbol{\mu})^\top \boldsymbol{\Sigma}^{-1} (\overline{\mathbf{y}}-\boldsymbol{\mu}) \right\} \\ \pause &&\\ L(\widehat{\boldsymbol{\mu}}, \widehat{\boldsymbol{\Sigma}}) &=& |\widehat{\boldsymbol{\Sigma}}|^{-\frac{n}{2}} (2\pi)^{-\frac{np}{2}} e^{-\frac{np}{2}} \\ \end{eqnarray*} } \end{frame} \begin{frame} \frametitle{Test whether a set of variables are uncorrelated} \framesubtitle{Equivalent to zero covariance} \pause \begin{itemize} \item $\mathbf{Y}_1, \ldots, \mathbf{Y}_n \stackrel{i.i.d.}{\sim} N_p(\boldsymbol{\mu,\Sigma})$ \pause \item $H_0: \sigma_{ij}=0$ for $i \neq j$. \pause \item Equivalent to independence for this multivariate normal model. \pause \item Use $G^2 = -2 \ln \left( \frac{L(\widehat{\theta}_0)} {L(\widehat{\theta}) } \right)$. \item Have $L(\widehat{\theta})$. \item Need $L(\widehat{\theta}_0)$. \end{itemize} \end{frame} \begin{frame} \frametitle{Getting the restricted MLE} %\framesubtitle{} For the multivariate normal, zero covariance is equivalent to independence, so under $H_0$, \pause \begin{eqnarray*} L(\boldsymbol{\mu,\Sigma}) & = & \prod_{i=1}^n f(\mathbf{y}_i|\boldsymbol{\mu,\Sigma}) \\ \pause & = & {\color{red}\prod_{i=1}^n} \left( {\color{blue}\prod_{j=1}^p} f(y_{ij}|\mu_j,\sigma^2_j) \right) \\ \pause & = & {\color{blue}\prod_{j=1}^p} \left( {\color{red}\prod_{i=1}^n} f(y_{ij}|\mu_j,\sigma^2_j) \right) \\ \end{eqnarray*} \end{frame} \begin{frame} \frametitle{Take logs and start differentiating} %\framesubtitle{} \begin{eqnarray*} L(\boldsymbol{\mu}_0,\boldsymbol{\Sigma_0}) & = & \prod_{j=1}^p \left( \prod_{i=1}^n f(y_{ij}|\mu_j,\sigma^2_j) \right) \\ \pause &&\\ \ell(\boldsymbol{\mu_0,\Sigma_0}) & = & \sum_{j=1}^p \ln\left( \prod_{i=1}^n f(y_{ij}|\mu_j,\sigma^2_j) \right) \\ \end{eqnarray*} \pause \vspace{5mm} It's just $j$ univariate problems, which we have already done. \end{frame} \begin{frame} \frametitle{Likelihood at the restricted MLE} %\framesubtitle{} \begin{eqnarray*} L(\widehat{\boldsymbol{\mu}}_0, \widehat{\boldsymbol{\Sigma}}_0) & = & \prod_{j=1}^p \left( (\widehat{\sigma}_j^2)^{-n/2} (2\pi)^{-n/2} \exp\{-\frac{1}{2\widehat{\sigma}_j^2}\sum_{i=1}^n(y_{ij}-\overline{y}_j)^2\} \right) \\ \pause &&\\ & = & \prod_{j=1}^p \left( (\widehat{\sigma}_j^2)^{-n/2} (2\pi)^{-n/2} e^{-n/2} \right) \\ \pause &&\\ & = & \left( \prod_{j=1}^p \widehat{\sigma}_j^2 \right)^{-\frac{n}{2}} (2\pi)^{-\frac{np}{2}} e^{-\frac{np}{2}}, \pause \end{eqnarray*} where $\widehat{\sigma}_j^2$ is a diagonal element of $\widehat{\boldsymbol{\Sigma}}$. \end{frame} \begin{frame} \frametitle{Test statistic} %\framesubtitle{} {\small \begin{eqnarray*} G^2 & = & -2\ln\frac{L(\widehat{\theta}_0)} {L(\widehat{\theta})} \\ \pause & = & -2\ln\frac{\left( \prod_{j=1}^p \widehat{\sigma}_j^2 \right)^{-\frac{n}{2}} (2\pi)^{-\frac{np}{2}} e^{-\frac{np}{2}}} { |\widehat{\boldsymbol{\Sigma}}|^{-\frac{n}{2}} (2\pi)^{-\frac{np}{2}} e^{-\frac{np}{2}} } \\ \pause & = & -2\ln \left(\frac{\prod_{j=1}^p \widehat{\sigma}_j^2} {|\widehat{\boldsymbol{\Sigma}}|} \right)^{-\frac{n}{2}} \\ \pause & = & n\ln \left(\frac{\prod_{j=1}^p \widehat{\sigma}_j^2} {|\widehat{\boldsymbol{\Sigma}}|} \right) \\ \pause & = & n\left(\sum_{j=1}^p \ln\widehat{\sigma}_j^2 - \ln |\widehat{\boldsymbol{\Sigma}}| \right) \end{eqnarray*} } % End size \end{frame} \section{Numerical MLEs} \begin{frame} \frametitle{Numerical maximum likelihood} \framesubtitle{For the multivariate normal} \pause \begin{itemize} \item Often an explicit formula for $\widehat{\theta}_0$ is out of the question. \pause \item Maximize the log likelihood numerically. \pause \item Equivalently, minimize $-2\ln L(\boldsymbol{\mu},\boldsymbol{\Sigma})$. \pause \item Equivalently, minimize $-2\ln L(\boldsymbol{\mu},\boldsymbol{\Sigma})$ plus a constant. \pause \item Choose the constant well, and minimize \pause \begin{displaymath} -2\ln L(\boldsymbol{\mu},\boldsymbol{\Sigma}) - (-2\ln L(\widehat{\boldsymbol{\mu}},\widehat{\boldsymbol{\Sigma}})) \end{displaymath} over $(\boldsymbol{\mu},\boldsymbol{\Sigma}) \in \Theta_0$. \pause \item The value of this function at the stopping place is the likelihood ratio test statistic. \end{itemize} \end{frame} \begin{frame} \frametitle{What SAS \texttt{proc calis} does} \framesubtitle{Instead of minimizing $-2\ln L(\boldsymbol{\mu},\boldsymbol{\Sigma}) - (-2\ln L(\widehat{\boldsymbol{\mu}},\widehat{\boldsymbol{\Sigma}}))$} \pause {\footnotesize \begin{eqnarray*} -2\ln\frac{L(\boldsymbol{\mu},\boldsymbol{\Sigma})} {L(\widehat{\boldsymbol{\mu}} ,\widehat{\boldsymbol{\Sigma}})} & = & -2\ln\frac{|\boldsymbol{\Sigma}|^{-\frac{n}{2}} \exp -\frac{n}{2}\left\{ tr(\boldsymbol{\widehat{\Sigma}\Sigma}^{-1}) + (\overline{\mathbf{y}}-\boldsymbol{\mu})^\top \boldsymbol{\Sigma}^{-1} (\overline{\mathbf{y}}-\boldsymbol{\mu}) \right\}} {|\widehat{\boldsymbol{\Sigma}}|^{-\frac{n}{2}} e^{-\frac{np}{2}}} \\ \pause & = & n\ln\frac{|\boldsymbol{\Sigma}| \, \exp -\left\{ tr(\boldsymbol{\widehat{\Sigma}\Sigma}^{-1}) + (\overline{\mathbf{y}}-\boldsymbol{\mu})^\top \boldsymbol{\Sigma}^{-1} (\overline{\mathbf{y}}-\boldsymbol{\mu}) \right\}} {|\widehat{\boldsymbol{\Sigma}}| \, e^p } \\ \pause & = & n \left( \ln |\boldsymbol{\Sigma}| - \ln |\widehat{\boldsymbol{\Sigma}}| - tr(\boldsymbol{\widehat{\Sigma}\Sigma}^{-1}) - (\overline{\mathbf{y}}-\boldsymbol{\mu})^\top \boldsymbol{\Sigma}^{-1} (\overline{\mathbf{y}}-\boldsymbol{\mu}) + p \right) \pause \end{eqnarray*} } % End size \vspace{10mm} To avoid numerical problems, drop the $n$ and minimize the rest. \end{frame} \begin{frame} \frametitle{Minimize the ``Objective Function"} \framesubtitle{Over a restricted parameter space} Minimize \begin{displaymath} \ln |\boldsymbol{\Sigma}| - \ln |\widehat{\boldsymbol{\Sigma}}| - tr(\boldsymbol{\widehat{\Sigma}\Sigma}^{-1}) - (\overline{\mathbf{y}}-\boldsymbol{\mu})^\top \boldsymbol{\Sigma}^{-1} (\overline{\mathbf{y}}-\boldsymbol{\mu}) - p \end{displaymath} \pause Or, if $H_0$ is concerned only with $\boldsymbol{\Sigma}$ (common), estimate $\boldsymbol{\mu}$ with $\overline{\mathbf{y}}$, \pause and minimize \begin{displaymath} \ln |\boldsymbol{\Sigma}| - \ln |\widehat{\boldsymbol{\Sigma}}| - tr(\boldsymbol{\widehat{\Sigma}\Sigma}^{-1}) - p \end{displaymath} \pause {\footnotesize \begin{itemize} \item Then multiply the value at the stopping point by $n$ to get $G^2$. \pause \item Actually \texttt{proc calis} multiplies by $n-1$. \pause \item Still okay as $n \rightarrow \infty$. \pause \item Maybe it's to compensate for a possible $n-1$ in the denominator of $\widehat{\boldsymbol{\Sigma}}$. \end{itemize} } % End size \end{frame} \begin{frame} \frametitle{Later in the course} %\framesubtitle{} \begin{itemize} \item $\boldsymbol{\Sigma}$ is the covariance matrix of the \emph{observable} variables. \pause \item Model is based on systems of equations with unknown parameters $\boldsymbol{\theta} \in \Theta$. \pause \item Calculate $\boldsymbol{\Sigma} = \boldsymbol{\Sigma}(\boldsymbol{\theta})$. \pause \item Minimize the objective function \pause \begin{displaymath} \ln |\boldsymbol{\Sigma}(\boldsymbol{\theta})| - \ln |\widehat{\boldsymbol{\Sigma}}| - tr(\boldsymbol{\widehat{\Sigma}} \boldsymbol{\Sigma}(\boldsymbol{\theta})^{-1}) - p \end{displaymath} \pause over all $\boldsymbol{\theta} \in \Theta$. \end{itemize} \end{frame} \begin{frame} \frametitle{But first} %\framesubtitle{} But first a computed example of a \emph{direct} test of $H_0: \sigma_{ij}=0$ for $i \neq j$ for a multivariate normal model. \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Copyright Information} This slide show was prepared by \href{http://www.utstat.toronto.edu/~brunner}{Jerry Brunner}, Department of Statistics, University of Toronto. It is licensed under a \href{http://creativecommons.org/licenses/by-sa/3.0/deed.en_US} {Creative Commons Attribution - ShareAlike 3.0 Unported License}. Use any part of it as you like and share the result freely. The \LaTeX~source code is available from the course website: \href{http://www.utstat.toronto.edu/~brunner/oldclass/431s15} {\footnotesize \texttt{http://www.utstat.toronto.edu/$^\sim$brunner/oldclass/431s15}} \end{frame} \end{document} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{} %\framesubtitle{} \begin{itemize} \item \item \item \end{itemize} \end{frame} % \stackrel{c}{\mathbf{X}} \stackrel{\top}{\vphantom{r}_i}