% \documentclass[serif]{beamer} % Serif for Computer Modern math font. \documentclass[serif, handout]{beamer} % Handout mode to ignore pause statements \hypersetup{colorlinks,linkcolor=,urlcolor=red} \usefonttheme{serif} % Looks like Computer Modern for non-math text -- nice! \setbeamertemplate{navigation symbols}{} % Suppress navigation symbols % \usetheme{Berlin} % Displays sections on prime %\usetheme{Berkeley} % \usetheme{Frankfurt} % Displays section titles on prime: Fairly thin but still swallows some material at bottom of crowded slides \usetheme{AnnArbor} % CambridgeUS: Displays one section at a time. Good if there are a lot of sections or if they have long titles. \usepackage{comment} \usepackage{alltt} \usepackage[english]{babel} \usepackage{amsmath} % for binom \usepackage{amsfonts} % for \mathbb{R} The set of reals \usepackage{mathtools} % For symbol under multiple integrals % \usepackage{graphicx} % To include pdf files! % \definecolor{links}{HTML}{2A1B81} % \definecolor{links}{red} \setbeamertemplate{footline}[frame number] \mode \title{Tests and Confidence Intervals\footnote{See last slide for copyright information.}} \subtitle{STA 302 Fall 2020} \date{} % To suppress date \begin{document} \begin{frame} \titlepage \end{frame} \begin{frame} \frametitle{Overview} \tableofcontents \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Normal Model} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{The Normal Model} \framesubtitle{Section 7.6 in the text} {\Huge \begin{displaymath} \mathbf{y} = \mathbf{X} \boldsymbol{\beta} + \boldsymbol{\epsilon} \end{displaymath} \pause } % End size \vspace{5mm} where \begin{itemize} \item[] $\mathbf{X}$ is an $n \times (k+1)$ matrix of observed constants with linearly independent columns. \item[] $\boldsymbol{\beta}$ is a $(k+1) \times 1$ matrix of unknown constants. \item[] $\boldsymbol{\epsilon} \sim N(\mathbf{0},\sigma^2I_n)$. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Using facts about the multivariate normal} \pause %\framesubtitle{} \begin{itemize} \item For the multivariate normal, zero covariance implies independence. \item If $\mathbf{v} \sim N_p(\boldsymbol{\mu}, \boldsymbol{\Sigma})$, then \begin{itemize} \item $\mathbf{Av} + \mathbf{c} \sim N_q(\mathbf{A}\boldsymbol{\mu}+\mathbf{c}, \mathbf{A}\boldsymbol{\Sigma} \mathbf{A}^\prime)$. \item If $\boldsymbol{\Sigma}$ is positive definite, $w = (\mathbf{v}-\boldsymbol{\mu})^\prime \boldsymbol{\Sigma}^{-1}(\mathbf{v}-\boldsymbol{\mu}) \sim \chi^2(p)$. \end{itemize} \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Distribution of $\widehat{\boldsymbol{\beta}}$} \pause %\framesubtitle{} For $\mathbf{y} = X \boldsymbol{\beta} + \boldsymbol{\epsilon}$ with $\boldsymbol{\epsilon} \sim N(\mathbf{0},\sigma^2I_n)$, \begin{itemize} \item $\mathbf{y} \sim N(\mathbf{X}\boldsymbol{\beta},\sigma^2I_n)$. \pause \item $\widehat{\boldsymbol{\beta}} = (\mathbf{X}^\prime \mathbf{X})^{-1} \mathbf{X}^\prime \mathbf{y} \pause = \mathbf{Ay}$. \pause \item Earlier calculations yielded \\ $E(\widehat{\boldsymbol{\beta}}) = \boldsymbol{\beta}$ and $cov(\widehat{\boldsymbol{\beta}}) = \sigma^2(\mathbf{X}^\prime \mathbf{X})^{-1}$\pause, so \end{itemize} {\LARGE \begin{displaymath} \widehat{\boldsymbol{\beta}} \sim N_{k+1}\left(\boldsymbol{\beta},\sigma^2(\mathbf{X}^\prime \mathbf{X})^{-1}\right) \end{displaymath} } % End size \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Independence of $\widehat{\boldsymbol{\beta}}$ and $\widehat{\boldsymbol{\epsilon}}$} \framesubtitle{Like the independence of $\overline{x}$ and $s^2$} \pause \begin{displaymath} \left(\begin{array}{c} (\mathbf{X}^\prime \mathbf{X})^{-1}\mathbf{X}^\prime \\ \hline \mathbf{I}-\mathbf{H} \end{array}\right) \mathbf{y} = \left(\begin{array}{c} \widehat{\boldsymbol{\beta}} \\ \hline \widehat{\boldsymbol{\epsilon}} \end{array}\right) \end{displaymath} \pause \begin{itemize} \item So $\widehat{\boldsymbol{\beta}}$ and $\widehat{\boldsymbol{\epsilon}}$ are jointly multivariate normal. \pause \item Independence will follow from zero covariance. \pause \item Use $cov(\mathbf{Ay},\mathbf{By}) = \mathbf{A}cov(\mathbf{y}) \mathbf{B}^\prime$. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Independence of $\widehat{\boldsymbol{\beta}}$ and $\widehat{\boldsymbol{\epsilon}}$, continued} \framesubtitle{Using $cov(\mathbf{Ay},\mathbf{By}) = \mathbf{A}cov(\mathbf{y}) \mathbf{B}^\prime$} %{\LARGE \begin{eqnarray*} cov\left(\widehat{\boldsymbol{\beta}} , \widehat{\boldsymbol{\epsilon}} \right) & = & cov\left((\mathbf{X}^\prime \mathbf{X})^{-1}\mathbf{X}^\prime \mathbf{y} , (\mathbf{I}-\mathbf{H}) \mathbf{y} \right) \\ & = & (\mathbf{X}^\prime \mathbf{X})^{-1}\mathbf{X}^\prime ~ \sigma^2I_n ~ (\mathbf{I}-\mathbf{H})^\prime \\ \pause & = & \sigma^2 (\mathbf{X}^\prime \mathbf{X})^{-1}\mathbf{X}^\prime (\mathbf{I}-\mathbf{H}) \\ \pause & = & \sigma^2 \left( (\mathbf{X}^\prime \mathbf{X})^{-1}\mathbf{X}^\prime - (\mathbf{X}^\prime \mathbf{X})^{-1}\mathbf{X}^\prime {\color{blue}\mathbf{H}} \right) \\ \pause & = & \sigma^2 \left( (\mathbf{X}^\prime \mathbf{X})^{-1}\mathbf{X}^\prime - (\mathbf{X}^\prime \mathbf{X})^{-1}\mathbf{X}^\prime {\color{blue}\mathbf{X}(\mathbf{X}^\prime \mathbf{X})^{-1} \mathbf{X}^\prime } \right) \\ \pause & = & \sigma^2 \left( (\mathbf{X}^\prime \mathbf{X})^{-1}\mathbf{X}^\prime - (\mathbf{X}^\prime \mathbf{X})^{-1}\mathbf{X}^\prime \right) \\ \pause & = & \mathbf{O} \pause \end{eqnarray*} %} % End size So $\widehat{\boldsymbol{\beta}}$ and $\widehat{\boldsymbol{\epsilon}}$ are independent. \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Distribution of \emph{SSE}$/\sigma^2$} \framesubtitle{Using $(\mathbf{v}-\boldsymbol{\mu})^\prime \boldsymbol{\Sigma}^{-1}(\mathbf{v}-\boldsymbol{\mu}) \sim \chi^2(p)$.} \pause {\small Earlier, we found $(\mathbf{y}-\mathbf{X}\boldsymbol{\beta})^\prime (\mathbf{y}-\mathbf{X}\boldsymbol{\beta}) = \widehat{\boldsymbol{\epsilon}}^{\,\prime} \widehat{\boldsymbol{\epsilon}} + (\widehat{\boldsymbol{\beta}}-\boldsymbol{\beta})^\prime \mathbf{X}^\prime \mathbf{X}(\widehat{\boldsymbol{\beta}}-\boldsymbol{\beta})$. \pause \renewcommand{\arraystretch}{1.5} \begin{displaymath} \begin{array}{ccccc} \frac{1}{\sigma^2} (\mathbf{y}-\mathbf{X}\boldsymbol{\beta})^\prime (\mathbf{y}-\mathbf{X}\boldsymbol{\beta}) & = & \frac{\mbox{\small\emph{SSE}}}{\sigma^2} & + & (\widehat{\boldsymbol{\beta}}-\boldsymbol{\beta})^\prime \frac{1}{\sigma^2} \mathbf{X}^\prime\mathbf{X} (\widehat{\boldsymbol{\beta}}-\boldsymbol{\beta}) \\ \pause w & = & w_1 & + & w_2 \end{array} \end{displaymath} \pause \renewcommand{\arraystretch}{1.0} \vspace{-3mm} \begin{itemize} \item $\mathbf{y} \sim N_n(\mathbf{X}\boldsymbol{\beta},\sigma^2\mathbf{I}_n)$\pause, so \item[] \hspace{10mm} $w = (\mathbf{y}-\mathbf{X}\boldsymbol{\beta})^\prime \left( \sigma^2\mathbf{I}_n \right)^{-1} (\mathbf{y}-\mathbf{X}\boldsymbol{\beta}) \pause \sim \chi^2(n)$. \pause \item $\widehat{\boldsymbol{\beta}} \sim N_{k+1}\left(\boldsymbol{\beta},\sigma^2(\mathbf{X}^\prime \mathbf{X})^{-1}\right)$\pause, so \item[] \hspace{10mm} $w_2 = (\widehat{\boldsymbol{\beta}}-\boldsymbol{\beta})^\prime \left( \sigma^2(\mathbf{X}^\prime \mathbf{X})^{-1} \right)^{-1} (\widehat{\boldsymbol{\beta}}-\boldsymbol{\beta}) \pause \sim \chi^2(k+1)$ \pause \item $w_1$ and $w_2$ are independent because $\widehat{\boldsymbol{\beta}}$ and $\widehat{\boldsymbol{\epsilon}}$ are independent. \pause \item So $w_1 = \frac{\mbox{\small\emph{SSE}}}{\sigma^2}$ is chi-squared\pause, with degrees of freedom $n-(k+1) = n-k-1$. \hspace{10mm} $\blacksquare$ \pause \item {\color{red}This result does not depend on the model having an intercept, and it does not depend on the truth of any null hypothesis.} \end{itemize} } % End size \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{$t$ distribution} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Tests and confidence intervals for $\mathbf{a}^\prime \boldsymbol{\beta}$} \framesubtitle{For Gauss-Markov Theorem, it was called $\boldsymbol{\ell}^\prime \boldsymbol{\beta}$. \\ See Section 8.6 in the text. }\pause \begin{itemize} \item Single linear combination of the $\beta_j$ values. \item Including any individual $\beta_j$. \pause \item Use the $t$ distribution: \vspace{5mm} \end{itemize} {\LARGE \begin{displaymath} t = \frac{z}{\sqrt{w/\nu}} \sim t(\nu) \end{displaymath} } % End size \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Choosing $z$ and $w$ in $t = \frac{z}{\sqrt{w/\nu}} \sim t(\nu)$} \pause %\framesubtitle{} \begin{itemize} \item $\widehat{\boldsymbol{\beta}} \sim N_{k+1}\left(\boldsymbol{\beta},\sigma^2(\mathbf{X}^\prime \mathbf{X})^{-1}\right)$ \pause \item So $\mathbf{a}^\prime \widehat{\boldsymbol{\beta}} \sim N(\mathbf{a}^\prime \boldsymbol{\beta},\ldots)$ \pause \begin{eqnarray*} cov\left( \mathbf{a}^\prime \widehat{\boldsymbol{\beta}} \right) & = & cov\left( \mathbf{a}^\prime (\mathbf{X}^\prime \mathbf{X})^{-1}\mathbf{X}^\prime \mathbf{y} \right) \\ \pause & = & \mathbf{a}^\prime (\mathbf{X}^\prime \mathbf{X})^{-1}\mathbf{X}^\prime cov(\mathbf{y}) \left( \mathbf{a}^\prime (\mathbf{X}^\prime \mathbf{X})^{-1}\mathbf{X}^\prime \right)^\prime \\ \pause & = & \mathbf{a}^\prime (\mathbf{X}^\prime \mathbf{X})^{-1}\mathbf{X}^\prime ~\sigma^2I_n~ \mathbf{X} (\mathbf{X}^\prime \mathbf{X})^{-1} \mathbf{a} \\ \pause & = & \sigma^2 \, \mathbf{a}^\prime (\mathbf{X}^\prime \mathbf{X})^{-1}\mathbf{X}^\prime \mathbf{X} (\mathbf{X}^\prime \mathbf{X})^{-1} \mathbf{a} \\ \pause & = & \sigma^2 \, \mathbf{a}^\prime (\mathbf{X}^\prime \mathbf{X})^{-1} \mathbf{a} \end{eqnarray*} \pause \item And $\mathbf{a}^\prime \widehat{\boldsymbol{\beta}} \sim N(\mathbf{a}^\prime \boldsymbol{\beta} \, , \, \sigma^2 \mathbf{a}^\prime (\mathbf{X}^\prime \mathbf{X})^{-1} \mathbf{a})$. \pause \item Standardize $\mathbf{a}^\prime \widehat{\boldsymbol{\beta}}$, subtracting off mean and dividing by the standard deviation. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{$t = \frac{z}{\sqrt{w/\nu}} \sim t(\nu)$} %\framesubtitle{} \begin{itemize} \item $\mathbf{a}^\prime \widehat{\boldsymbol{\beta}} \sim N(\mathbf{a}^\prime \boldsymbol{\beta} \, , \, \sigma^2 \mathbf{a}^\prime (\mathbf{X}^\prime \mathbf{X})^{-1} \mathbf{a})$. \pause \item Center and scale: {\Large \begin{displaymath} z = \frac{\mathbf{a}^\prime \widehat{\boldsymbol{\beta}} - \mathbf{a}^\prime \boldsymbol{\beta}} {\sqrt{\sigma^2 \mathbf{a}^\prime (\mathbf{X}^\prime \mathbf{X})^{-1} \mathbf{a})}} \sim N(0,1) \end{displaymath} \pause % Discuss! If sigma-squared were known ... } % End size \item For the denominator, use {\Large \begin{displaymath} w = \frac{SSE}{\sigma^2} = \frac{\sum_{i=1}^n(y_i-\widehat{y}_i)^2}{\sigma^2} \sim \chi^2(n-k-1) \end{displaymath} \pause } % End size \item With $z$ and $w$ independent. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{$t = \frac{z}{\sqrt{w/(n-k-1)}} \sim t(n-k-1)$} %\framesubtitle{} With $z = \frac{\mathbf{a}^\prime \widehat{\boldsymbol{\beta}} - \mathbf{a}^\prime \boldsymbol{\beta}} {\sqrt{\sigma^2 \mathbf{a}^\prime (\mathbf{X}^\prime \mathbf{X})^{-1} \mathbf{a})}} \sim N(0,1)$ and $w = \frac{SSE}{\sigma^2} \sim \chi^2(n-k-1)$, %{\LARGE \begin{eqnarray*} t & = & \frac{z}{\sqrt{w/\nu}} \\ \pause & = & \frac{\mathbf{a}^\prime \widehat{\boldsymbol{\beta}} - \mathbf{a}^\prime \boldsymbol{\beta}} {\sqrt{\sigma^2 \mathbf{a}^\prime (\mathbf{X}^\prime \mathbf{X})^{-1} \mathbf{a})}} \left/ \sqrt{\frac{{\color{blue} \small \mbox{\emph{SSE}} }}{\sigma^2}/({\color{blue}n-k-1})} \right. \\ \pause & = & \frac{\mathbf{a}^\prime \widehat{\boldsymbol{\beta}} - \mathbf{a}^\prime \boldsymbol{\beta}} {\sqrt{{\color{blue} \small \mbox{\emph{MSE}}} \, \mathbf{a}^\prime (\mathbf{X}^\prime \mathbf{X})^{-1} \mathbf{a})}} \pause \sim t(n-k-1) \end{eqnarray*} %} % End size \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{The $t$ density} %\framesubtitle{} \begin{columns} % One wide column column to use more of the margins \column{1.2\textwidth} \includegraphics[width=5in]{CI-curve} \vspace{15mm} {\Large \hspace{20mm} If $t \sim t(df)$, then $P\{ t > t_{\alpha/2,df}\} = \frac{\alpha}{2}$. } % End size \end{columns} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Confidence Interval for $\mathbf{a}^\prime \boldsymbol{\beta}$} %\framesubtitle{} %\begin{columns} % One wide column column to use more of the margins %\column{1.2\textwidth} \begin{center} \includegraphics[width=3in]{CI-curve} \end{center} {\small \begin{eqnarray*} 1-\alpha & = & P\{ -t_{\alpha/2} < t < t_{\alpha/2} \} \\ \pause & = & P\left\{ -t_{\alpha/2} < \frac{\mathbf{a}^\prime \widehat{\boldsymbol{\beta}} - \mathbf{a}^\prime \boldsymbol{\beta}} {\sqrt{ {\footnotesize \mbox{\emph{MSE}} }~ \mathbf{a}^\prime (\mathbf{X}^\prime \mathbf{X})^{-1} \mathbf{a})}} < t_{\alpha/2} \right\} \\ \pause && \vdots \\ \pause & = & P\left\{ \mathbf{a}^\prime \widehat{\boldsymbol{\beta}} - t_{\alpha/2} \sqrt{ {\footnotesize \mbox{\emph{MSE}} }~ \mathbf{a}^\prime (\mathbf{X}^\prime \mathbf{X})^{-1} \mathbf{a})} < \mathbf{a}^\prime \boldsymbol{\beta} \right. \\ && \hspace{12mm} < \left. \mathbf{a}^\prime \widehat{\boldsymbol{\beta}} + t_{\alpha/2} \sqrt{ {\footnotesize \mbox{\emph{MSE}} }~ \mathbf{a}^\prime (\mathbf{X}^\prime \mathbf{X})^{-1} \mathbf{a})} \right\} \pause \end{eqnarray*} } % End size {\small \mbox{\emph{MSE}} }~ %\end{columns} Or, $\mathbf{a}^\prime \widehat{\boldsymbol{\beta}} \pm t_{\alpha/2} \, \sqrt{ {\footnotesize \mbox{\emph{MSE}} }~ \mathbf{a}^\prime (\mathbf{X}^\prime \mathbf{X})^{-1}\mathbf{a}}$. \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Testing $H_0: \mathbf{a}^\prime \boldsymbol{\beta} = t_0$} \pause %\framesubtitle{} \begin{itemize} \item Controlling (allowing) for High School GPA, does score on the OSSLT (Ontario Secondary School Literacy Test) predict success in university? \pause \item $y_i = \beta_0 + \beta_1 x_{i,1} + \beta_2 x_{i,2} + \epsilon_i$ \begin{itemize} \item $x_{i,1}$ = HS GPA \item $x_{i,2}$ = OSSLT \item $y_i$ = First year university GPA \end{itemize} \pause \item $y_i = {\color{red}(\beta_0 + \beta_1 x_{i,1})} + \beta_2 x_{i,2} + \epsilon_i$ \pause \item $H_0:\beta_2=0$. \pause \item $H_0: \left(\begin{array}{ccc} 0 & 0 & 1 \\ \end{array} \right) \left(\begin{array}{c} \beta_0 \\ \beta_1 \\ \beta_2 \end{array} \right) = 0$. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Test Statistic for $H_0: \mathbf{a}^\prime \boldsymbol{\beta} = t_0$} \pause %\framesubtitle{} % {\footnotesize\mbox{\emph{MSE}} }~ \begin{itemize} \item $t = \frac{\mathbf{a}^\prime \widehat{\boldsymbol{\beta}} - \mathbf{a}^\prime \boldsymbol{\beta}} {\sqrt{ {\footnotesize \mbox{\emph{MSE}} }~ \mathbf{a}^\prime (\mathbf{X}^\prime \mathbf{X})^{-1} \mathbf{a})}} \sim t(n-k-1)$ \pause \item If $H_0: \mathbf{a}^\prime \boldsymbol{\beta} = t_0$ is true, \pause \item $t^* = \frac{\mathbf{a}^\prime \widehat{\boldsymbol{\beta}} - t_0} {\sqrt{ {\footnotesize \mbox{\emph{MSE}} }~ \mathbf{a}^\prime (\mathbf{X}^\prime \mathbf{X})^{-1} \mathbf{a})}} \sim t(n-k-1)$. \pause \item The most common example is $H_0: \beta_j=0$. \pause \item Or something like $H_0: \beta_1-\beta_2=0$, if it makes sense. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{$F$ distribution} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Testing several linear combinations simultaneously} \pause \framesubtitle{Sections 8.2-8.4 in the text, especially 8.4.} Question: Does HS GPA in the first two years help predict university GPA if you know the HS GPA in years 3 and 4? \pause \begin{displaymath} y_i = \beta_0 + \beta_1 x_{i,1} + \beta_2 x_{i,2} + \beta_3 x_{i,3} + \beta_4 x_{i,4} + \epsilon_i \end{displaymath} \pause \begin{itemize} \item We are considering two competing models. \pause \item The first model has HS GPA for all four years. \pause \item The second model has HS GPA for only years 3 and 4. \pause \item The second model is obtained from the first, by setting $\beta_1=\beta_2=0$.\pause \item That's the null hypothesis. % \pause % \item $H_0: \mathbf{C}\boldsymbol{\beta} = \mathbf{0}$. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{$H_0: \beta_1=\beta_2=0$ in matrix form} %\framesubtitle{.} {\Large \begin{displaymath} \begin{array}{cccc} % 4 columns \left(\begin{array}{ccccc} 0 & 1 & 0 & 0 & 0 \\ 0 & 0 & 1 & 0 & 0 \end{array}\right) & % End of C \left( \begin{array}{c} \beta_0 \\ \beta_1 \\ \beta_2 \\ \beta_3 \\ \beta_4 \end{array} \right) % End of beta & = & % = \left( \begin{array}{c} 0 \\ 0 \end{array} \right) \\ % End of zero &&& \\ \pause \mathbf{C} & \boldsymbol{\beta} & = & \mathbf{t} \end{array} % End the big equation \end{displaymath} \pause } % End size Where $ \mathbf{C}$ is $q \times (k+1)$, with $q \leq k+1$ and linearly independent rows. \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{The $F$ Distribution} %\framesubtitle{} If $w_1 \sim \chi^2(\nu_1)$ and $w_2 \sim \chi^2(\nu_2)$ are independent, then \vspace{5mm} {\LARGE \begin{displaymath} F = \frac{w_1/\nu_1}{w_2/\nu_2} \sim F(\nu_1,\nu_2) \end{displaymath} } % End size \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{The \emph{general linear test} of $H_0: \mathbf{C}\boldsymbol{\beta} = \mathbf{t}$} \pause %\framesubtitle{} From the formula sheet, If $\mathbf{v} \sim N_p(\boldsymbol{\mu}, \boldsymbol{\Sigma})$, then $\mathbf{Av} + \mathbf{c} \sim N_q(\mathbf{A}\boldsymbol{\mu}+\mathbf{c}, \mathbf{A}\boldsymbol{\Sigma} \mathbf{A}^\prime)$, and $w = (\mathbf{v}-\boldsymbol{\mu})^\prime \boldsymbol{\Sigma}^{-1}(\mathbf{v}-\boldsymbol{\mu}) \sim \chi^2(p)$. \vspace{4mm} \pause $\widehat{\boldsymbol{\beta}} \sim N_{k+1}\left(\boldsymbol{\beta},\sigma^2(\mathbf{X}^\prime \mathbf{X})^{-1}\right)$, so $ \mathbf{C}\widehat{\boldsymbol{\beta}} \sim N_q(\mathbf{C}\boldsymbol{\beta}, \sigma^2 \mathbf{C}(\mathbf{X}^\prime \mathbf{X})^{-1}\mathbf{C}^\prime)$\pause, and if $H_0: \mathbf{C}\boldsymbol{\beta} = \mathbf{t}$ is true, \pause \begin{eqnarray*} w_1 & = & (\mathbf{C}\widehat{\boldsymbol{\beta}} -\mathbf{t})^\prime (\sigma^2\mathbf{C}(\mathbf{X}^\prime \mathbf{X})^{-1}\mathbf{C}^\prime)^{-1} (\mathbf{C}\widehat{\boldsymbol{\beta}} - \mathbf{t}) \sim \chi^2(q) \\ \pause & = & \frac{1}{\sigma^2} (\mathbf{C}\widehat{\boldsymbol{\beta}} -\mathbf{t})^\prime (\mathbf{C}(\mathbf{X}^\prime \mathbf{X})^{-1}\mathbf{C}^\prime)^{-1} (\mathbf{C}\widehat{\boldsymbol{\beta}} - \mathbf{t}) \\ \pause &&\\ w_2 & = & \frac{SSE}{\sigma^2} \sim \chi^2(n-k-1) \\ \pause &&\\ F^* & = & \frac{w_1/q}{w_2/(n-k-1)} \pause \sim F(q,n-k-1) \end{eqnarray*} \pause {\color{red}This result does not depend on the model having an intercept.} % Again, dividing by w2 has the effect of substituting MSE for sigma-squared. \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Formula for $F^*$} %\framesubtitle{} % {\footnotesize\mbox{\emph{MSE}} }~ %{\LARGE \begin{eqnarray*} F^* & = & \frac{w_1/q}{w_2/(n-k-1)} \\ \pause & = & \frac{\frac{1}{\sigma^2} (\mathbf{C}\widehat{\boldsymbol{\beta}} -\mathbf{t})^\prime (\mathbf{C}(\mathbf{X}^\prime \mathbf{X})^{-1}\mathbf{C}^\prime)^{-1} (\mathbf{C}\widehat{\boldsymbol{\beta}} - \mathbf{t})/q} {\left. \frac{{\footnotesize\mbox{\emph{SSE}} }}{\sigma^2} \right/ (n-k-1)} \\ \pause & = & \frac{(\mathbf{C}\widehat{\boldsymbol{\beta}} -\mathbf{t})^\prime (\mathbf{C}(\mathbf{X}^\prime \mathbf{X})^{-1}\mathbf{C}^\prime)^{-1} (\mathbf{C}\widehat{\boldsymbol{\beta}} - \mathbf{t})} {q ~ {\mbox{\emph{MSE}} }} \\ \pause & \stackrel{H_0}{\sim} & F(q,n-k-1) \end{eqnarray*} %} % End size \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{$F^* = \frac{(\mathbf{C}\widehat{\boldsymbol{\beta}} -\mathbf{t})^\prime (\mathbf{C}(\mathbf{X}^\prime \mathbf{X})^{-1}\mathbf{C}^\prime)^{-1} (\mathbf{C}\widehat{\boldsymbol{\beta}} - \mathbf{t})} {q ~ {\small\mbox{\emph{MSE}} }} \stackrel{H_0}{\sim} F(q,n-k-1) $ } %\framesubtitle{} \begin{center} \includegraphics[width=3.5in]{Fcurve1} \end{center} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{$p$-value } %\framesubtitle{} \begin{center} \includegraphics[width=3.5in]{Fcurve2} \end{center} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Logically equivalent null hypotheses} \pause %\framesubtitle{} \begin{displaymath} y_i = \beta_0 + \beta_1 x_{i,1} + \beta_2 x_{i,2} + \beta_3 x_{i,3} + \beta_4 x_{i,4} + \epsilon_i \end{displaymath} \pause \begin{eqnarray*} && H_0: \beta_1-\beta_2=0\pause, \beta_2-\beta_3=0\pause, \beta_3=0 \\ \pause & \Leftrightarrow & \beta_1 = \beta_2 = \beta_3=0 \end{eqnarray*} \pause Better hope it does not matter how you state $H_0$! \pause \vspace{3mm} \emph{Theorem}: Let $\mathbf{A}$ be a $q \times q$ non-singular matrix, so that $\mathbf{C}\boldsymbol{\beta} = \mathbf{t} \Leftrightarrow \mathbf{AC}\boldsymbol{\beta} = \mathbf{At}$. \pause The $F^*$ statistic for testing $H_0: (\mathbf{AC})\boldsymbol{\beta} = (\mathbf{At})$ is the same as the statistic for testing $H_0: \mathbf{C}\boldsymbol{\beta} = \mathbf{t}$. \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Proof} % {q ~ {\mbox{\emph{MSE}} }} %\framesubtitle{} Test statistic for $H_0: (\mathbf{AC})\boldsymbol{\beta} = (\mathbf{At})$ is \begin{eqnarray*} F^* & = & \frac{(\mathbf{AC}\widehat{\boldsymbol{\beta}} -\mathbf{At})^\prime \left(\mathbf{AC}(\mathbf{X}^\prime \mathbf{X})^{-1}(\mathbf{AC})^\prime\right)^{-1} (\mathbf{AC}\widehat{\boldsymbol{\beta}} - \mathbf{At})} {q ~ {\mbox{\emph{MSE}} }} \\ \pause & = & \frac{\left(\mathbf{A}(\mathbf{C}\widehat{\boldsymbol{\beta}} -\mathbf{t})\right)^\prime \left(\mathbf{AC}(\mathbf{X}^\prime \mathbf{X})^{-1} \mathbf{C}^\prime\mathbf{A}^\prime \right)^{-1} \mathbf{A}(\mathbf{C}\widehat{\boldsymbol{\beta}} - \mathbf{t})} {q ~ {\mbox{\emph{MSE}} }} \\ \pause & = & \frac{(\mathbf{C}\widehat{\boldsymbol{\beta}} -\mathbf{t})^\prime\mathbf{A}^\prime \left(\mathbf{{\color{red}A}C}(\mathbf{X}^\prime \mathbf{X})^{-1} \mathbf{C}^\prime{\color{blue}\mathbf{A}^\prime} \right)^{-1} \mathbf{A}(\mathbf{C}\widehat{\boldsymbol{\beta}} - \mathbf{t})} {q ~ {\mbox{\emph{MSE}} }} \\ \pause & = & \frac{(\mathbf{C}\widehat{\boldsymbol{\beta}} -\mathbf{t})^\prime\mathbf{A}^\prime {\color{blue}\mathbf{A}^{\prime-1}} \left(\mathbf{C}(\mathbf{X}^\prime \mathbf{X})^{-1} \mathbf{C}^\prime \right)^{-1} {\color{red}\mathbf{A}^{-1}} \mathbf{A}(\mathbf{C}\widehat{\boldsymbol{\beta}} - \mathbf{t})} {q ~ {\mbox{\emph{MSE}} }} \\ \pause & = & \frac{(\mathbf{C}\widehat{\boldsymbol{\beta}} -\mathbf{t})^\prime (\mathbf{C}(\mathbf{X}^\prime \mathbf{X})^{-1}\mathbf{C}^\prime)^{-1} (\mathbf{C}\widehat{\boldsymbol{\beta}} - \mathbf{t})} {q ~ {\mbox{\emph{MSE}} }} \pause \end{eqnarray*} which is the test statistic for $H_0: \mathbf{C}\boldsymbol{\beta} = \mathbf{t}$. \hspace{5mm} $\blacksquare$ \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Does the example fit the pattern $H_0: (\mathbf{AC})\boldsymbol{\beta} = (\mathbf{At})$ ?} \framesubtitle{ $H_0: \beta_1-\beta_2=0, \beta_2-\beta_3=0, \beta_3=0 \Leftrightarrow \beta_1 = \beta_2 = \beta_3=0$} \begin{displaymath} H_0: ~ \begin{array}{cccc} % 5 columns \left(\begin{array}{rrrrr} 0 & ~1 & -1 & 0 & ~0 \\ 0 & ~0 & 1 & -1 & ~0 \\ 0 & ~0 & 0 & 1 & ~0 \end{array}\right) & % End of C \left( \begin{array}{c} \beta_0 \\ \beta_1 \\ \beta_2 \\ \beta_3 \\ \beta_4 \end{array} \right) % End of beta & = & % = \left( \begin{array}{c} 0 \\ 0 \\ 0 \end{array} \right) \\ % End of zero \end{array} % End the matrix equation \end{displaymath} \pause Want $\mathbf{A} \left(\begin{array}{rrrrr} 0 & ~1 & -1 & 0 & ~0 \\ 0 & ~0 & 1 & -1 & ~0 \\ 0 & ~0 & 0 & 1 & ~0 \end{array}\right) = \left(\begin{array}{ccccc} 0 & 1 & 0 & 0 & 0 \\ 0 & 0 & 1 & 0 & 0 \\ 0 & 0 & 0 & 1 & 0 \end{array}\right)$ \pause \vspace{3mm} Yes: $ \left(\begin{array}{ccc} 1 & 1 & 1 \\ 0 & 1 & 1 \\ 0 & 0 & 1 \end{array}\right) \left(\begin{array}{rrrrr} 0 & ~1 & -1 & 0 & ~0 \\ 0 & ~0 & 1 & -1 & ~0 \\ 0 & ~0 & 0 & 1 & ~0 \end{array}\right) = \left(\begin{array}{ccccc} 0 & 1 & 0 & 0 & 0 \\ 0 & 0 & 1 & 0 & 0 \\ 0 & 0 & 0 & 1 & 0 \end{array}\right)$ \end{frame} % The trick I used here was to locate the matrix B_1 in C. To convert it to B_2, % let A = B_2 B_1^{-1}. This will first convert B_1 into I, then I into B_2. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% pause \begin{frame} \frametitle{Linearly equivalent null hypotheses} %\framesubtitle{} \begin{itemize} \item Two null hypotheses are \emph{linearly equivalent} if their $\mathbf{C}$ matrices are row equivalent. \pause \item Since elementary row operations correspond to multiplication by invertible matrices, all linearly equivalent null hypotheses yield the same $F$ statistic for a given set of data. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Full versus Reduced Model Approach} \framesubtitle{Also sometimes called `Extra sum of squares"} \pause \begin{itemize} \item Divide the explanatory variables into two subsets, $A$ and $B$. Want to test $B$ controlling for $A$. \pause \item For example, $A$ is HS GPA in years 3 and 4; $B$ is HS GPA in years 1 and 2. \pause \item Fit a model with both $A$ and $B$: Call it the \emph{Full Model}, or the \emph{Unrestricted Model}. \pause \item Fit a model with just $A$: Call it the \emph{Reduced Model} or \emph{Restricted Model}. \pause \item The restricted model is restricted by the null hypothesis. \pause $H_0$ says the variables in set $B$ do not matter. \\ \pause \item The $F$-test is an exact likelihood ratio test for comparing the two models. \end{itemize} \end{frame} % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{When you add the $q$ additional explanatory variables in set $B$, $R^2$ can only go up: $R^2(\mbox{\emph{full}}) \geq R^2(\mbox{\emph{reduced}})$} \pause %\framesubtitle{} By how much? Basis of the $F$ test. \pause {\LARGE \begin{eqnarray*} F^* & = & \frac{\left(R^2(\mbox{\emph{full}}) - R^2(\mbox{\emph{reduced}})\right)/q}{\left(1-R^2(\mbox{\emph{full}})\right)/(n-k-1)} \\ \pause &&\\ & = & \frac{\mbox{\emph{SSR(full)$-$SSR(reduced)}}}{q \, \mbox{\emph{MSE(full)}}} \\ \pause & \stackrel{H_0}{\sim} & F(q,n-k-1) \end{eqnarray*} } % End size \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Theorem 8.4d, page 201} %\framesubtitle{} {\LARGE \begin{eqnarray*} F^* & = & \frac{\mbox{\emph{SSR(full)$-$SSR(reduced)}}}{q \, \mbox{\emph{MSE}}} \\ &&\\ & = & \frac{(\mathbf{C}\widehat{\boldsymbol{\beta}} -\mathbf{t})^\prime (\mathbf{C}(\mathbf{X}^\prime \mathbf{X})^{-1}\mathbf{C}^\prime)^{-1} (\mathbf{C}\widehat{\boldsymbol{\beta}} - \mathbf{t})} {q \, \mbox{\emph{MSE}}} \end{eqnarray*} \pause } % End size \vspace{5mm} Proved using matrix-valued Lagrange multipliers. Proof omitted. \pause {\color{red}This result does not depend on the model having an intercept.} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Strength of Relationship: Change in $R^2$ is not enough} \pause %\framesubtitle{} {\Large \begin{eqnarray*} F^* & = & \frac{\left(R^2(\mbox{\emph{full}}) - R^2(\mbox{\emph{reduced}})\right)/q}{\left(1-R^2(\mbox{\emph{full}})\right)/(n-k-1)} \\ \pause &&\\ & = & \left( \frac{n-k-1}{q} \right) \left( \frac{p}{1-p} \right) \end{eqnarray*} \pause } % End size Where {\Large \begin{displaymath} p = \frac{R^2(\mbox{\emph{full}}) - R^2(\mbox{\emph{reduced}})} {1-R^2(\mbox{\emph{reduced}})} \pause = \frac{qF^*}{qF^*+n-k-1} \end{displaymath} } % End size \vspace{4mm} Call $p$ the ``proportion of remaining variation." \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Multiple Testing} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Multiple Testing} %\framesubtitle{} \begin{itemize} \item The primary function of hypothesis testing in science is to screen out random garbage. \pause \item Hold probability of Type I error to a low value; $\alpha=0.05$ is traditional. \pause \item The distribution theory considers each test in isolation. \pause \item But in practice, we carry out \emph{lots} of tests on a given data set. \pause \item If the data are complete random noise, the chance of getting at least one statistically significant result is quite high. \pause \item For ten independent tests, $1 - 0.95^{10} \approx 0.40$. \pause But the tests are usually not independent. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Bonferroni Correction for Multiple Tests} %\framesubtitle{} \begin{itemize} \item The curse of a thousand $t$-tests. \pause \item If the null hypotheses of a collection of tests are all true, \pause hold the probability of rejecting one or more to less than $\alpha=0.05$. \pause \item Based on Bonferroni's inequality: \pause \begin{displaymath} Pr\left\{ \bigcup_{j=1}^r A_j \right\} \leq \sum_{j=1}^r Pr\{A_j\} \end{displaymath} \pause \item Applies to any collection of $r$ tests. \pause \item Assume all $r$ null hypotheses are true. \pause \item Event $A_j$ is that null hypothesis $j$ is rejected. \pause \item Do the tests as usual, obtaining $r$ test statistics. \pause \item For each test, use the significance level $\alpha/r$ instead of $\alpha$. \end{itemize} \end{frame} \begin{frame} \frametitle{Use the significance level $\alpha/r$ instead of $\alpha$} \framesubtitle{Bonferroni Correction for $r$ Tests} \pause Assuming all $r$ null hypotheses are true, probability of rejecting at least one is \pause \begin{eqnarray*} Pr\left\{ \bigcup_{j=1}^r A_j \right\} \pause & \leq & \sum_{j=1}^r Pr\{A_j\} \\ \pause & = & \sum_{j=1}^r \alpha/r \\ \pause & = & \alpha \end{eqnarray*} \pause \begin{itemize} \item Just use critical value(s) for $\alpha/r$ instead of $\alpha$. \pause \item Or equivalently, multiply the $p$-values by $r$ and compare to $\alpha=0.05$. \pause \item Call $\alpha=0.05$ the \emph{joint} significance level. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} [fragile] \frametitle{Example} %\framesubtitle{} {\small % or scriptsize Most (all?) regression software produces \begin{itemize} \item Overall $F$-test for all the explanatory variables at once: \pause $H_0: \beta_1 = \beta_2 = \cdots = \beta_k = 0$ \pause \item $t$-tests for each regression coefficient, with $H_0: \beta_j=0$ for $j = 1, \ldots, k$. \end{itemize} \pause Analysis strategy: First look at the $F$-test. \pause \begin{itemize} \item If $H_0$ is rejected, it means at least one of the $\beta_j$ are not zero\pause, but which one(s)? \pause \item Now look at the $t$-tests. \pause \item But protect them with a Bonferroni correction for $k$ tests. \pause \item With six predictor variables and $n=53$, so $n-k-1 = 53-6-1 = 46$, \pause \end{itemize} } % End size {\footnotesize % or scriptsize % The alltt environment requires \usepackage{alltt} \begin{alltt} {\color{blue}> alpha = 0.05 > qt(1-alpha/2,46) # Unprotected critical value. } [1] 2.012896 {\color{blue}> a = alpha/6 # Protect for 6 tests > qt(1-a/2,46) # Bonferroni protected critical value. } [1] 2.757175 \end{alltt} } % End size \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Advantages and disadvantages of the Bonferroni correction} %\framesubtitle{} \begin{itemize} \item Advantage: Flexibility --- Applies to any collection of hypothesis tests. \pause \item Advantage: Easy to do. \pause \item Disadvantage: Must know what all the tests are before seeing the data. \pause \item Disadvantage: A little conservative; \pause the true joint significance level is less than $\alpha$. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Copyright Information} This slide show was prepared by \href{http://www.utstat.toronto.edu/~brunner}{Jerry Brunner}, Department of Statistical Sciences, University of Toronto. It is licensed under a \href{http://creativecommons.org/licenses/by-sa/3.0/deed.en_US} {Creative Commons Attribution - ShareAlike 3.0 Unported License}. Use any part of it as you like and share the result freely. The \LaTeX~source code is available from the course website: \href{http://www.utstat.toronto.edu/~brunner/oldclass/302f20} {\small\texttt{http://www.utstat.toronto.edu/$^\sim$brunner/oldclass/302f20}} \end{frame} \end{document} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # Picture of curve for CI rm(list=ls()) x = seq(from=-3.5,to=3.5,length=500); y = dnorm(x) plot(x,y,type='l', ann=F, axes=F, ylim = c(-.1,.8)) # Draw an axis line below x1 = c(-4,4); y1 = c(0,0); lines(x1,y1) # Draw cutoffs x2 = c(-1.3,-1.3); y2 = c(0,dnorm(-1.3)); lines(x2,y2) x3 = c(1.3,1.3); y3 = c(0,dnorm(1.3)); lines(x3,y3) lo = expression(paste(-t[alpha/2])); text(-1.3,-0.03,lo) hi = expression(paste(t[alpha/2])); text(1.3,-0.03,hi) alphaover2 = expression(alpha/2) text(-1.7,.04,alphaover2); text(1.7,.04,alphaover2) oneminusalpha = expression(1-alpha) text(0,0.1,oneminusalpha,cex=1.5) # Bigger font size Bigger font size # F distribution with critical value rm(list=ls()) x = seq(from=0,to=5,length=500); y = df(x,3,54) plot(x,y,type='l', ann=F, axes=F, ylim = c(-.1,.8)) # Draw axis lines x1 = c(-.1,5); y1 = c(0,0); lines(x1,y1) x2 = c(0,0); y2 = c(0,0.8); lines(x2,y2) text(0,-0.025,0) x3 = c(2.15,2.15); y3 = c(0,df(2.15,3,54)); lines(x3,y3) text(2.4,0.04,expression(alpha)) text(2.16,-0.025,expression(F[alpha])) # F distribution with p-value rm(list=ls()) x = seq(from=0,to=5,length=500); y = df(x,3,54) plot(x,y,type='l', ann=F, axes=F, ylim = c(-.1,.8)) # Draw axis lines x1 = c(-.1,5); y1 = c(0,0); lines(x1,y1) x2 = c(0,0); y2 = c(0,0.8); lines(x2,y2) text(0,-0.025,0) text(2.25,-0.025,expression(F^'*')) x3 = c(2.2,2.2); y3 = c(0,df(2.2,3,54)); lines(x3,y3) text(2.55,0.03,'p-value') %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{} \pause %\framesubtitle{} \begin{itemize} \item \pause \item \pause \item \end{itemize} \end{frame} {\LARGE \begin{displaymath} \end{displaymath} } % End size \begin{comment} \end{comment} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%