% \documentclass[serif]{beamer} % Get Computer Modern math font. \documentclass[serif, handout]{beamer} % Handout mode to ignore pause statements \hypersetup{colorlinks,linkcolor=,urlcolor=red} % To create handout using article mode: Comment above and uncomment below (2 places) %\documentclass[12pt]{article} %\usepackage{beamerarticle} %\usepackage[colorlinks=true, pdfstartview=FitV, linkcolor=blue, citecolor=blue, urlcolor=red]{hyperref} % For live Web links with href in article mode \usepackage{amsmath} % For \binom{n}{y} %\usepackage{graphicx} % To include pdf files! %\usepackage{fullpage} \usefonttheme{serif} % Looks like Computer Modern for non-math text -- nice! \setbeamertemplate{navigation symbols}{} % Suppress navigation symbols % \usetheme{Berlin} % Displays sections on top \usetheme{Frankfurt} % Displays section titles on top: Fairly thin but still swallows some material at bottom of crowded slides %\usetheme{Berkeley} \usepackage[english]{babel} \usepackage{amsmath} % for binom % \usepackage{graphicx} % To include pdf files! % \definecolor{links}{HTML}{2A1B81} % \definecolor{links}{red} \setbeamertemplate{footline}[frame number] \mode % \mode{\setbeamercolor{background canvas}{bg=black!5}} % Comment this out for handout \title{Some Large Sample Chi-squared Tests\footnote{See last slide for copyright information.}} \subtitle{STA442/2101 Fall 2014} \date{} % To suppress date \begin{document} \begin{frame} \titlepage \end{frame} \begin{frame} \frametitle{Overview} \tableofcontents \end{frame} \section{Large-Sample Chi-square} \begin{frame} \frametitle{Large-Sample Chi-square} Let $\mathbf{X} \sim N_p(\boldsymbol{\mu},\boldsymbol{\Sigma})$ then recall \vspace{10mm} {\LARGE \begin{displaymath} (\mathbf{X}-\boldsymbol{\mu})^\top \boldsymbol{\Sigma}^{-1}(\mathbf{X}-\boldsymbol{\mu}) \sim \chi^2 (p) \end{displaymath}} \vspace{10mm} It's true asymptotically too. \end{frame} \begin{frame} \frametitle{Using $(\mathbf{X}-\boldsymbol{\mu})^\top \boldsymbol{\Sigma}^{-1}(\mathbf{X}-\boldsymbol{\mu}) \sim \chi^2 (p)$} Suppose \begin{itemize} \item $\sqrt{n}\left(\mathbf{T}_n - \boldsymbol{\theta} \right) \stackrel{d}{\rightarrow} \mathbf{T} \sim N\left(\mathbf{0},\boldsymbol{\Sigma} \right)$ and \item $\widehat{\boldsymbol{\Sigma}}_n \stackrel{p}{\rightarrow} \boldsymbol{\Sigma}$. \end{itemize} \vspace{5mm} \pause Then approximately as $n \rightarrow \infty$, $\mathbf{T}_n \sim N\left(\boldsymbol{\theta},\frac{1}{n}\boldsymbol{\Sigma} \right)$, \pause and \begin{eqnarray*} W_n & = & \left( \mathbf{T}_n - \boldsymbol{\theta} \right)^\top \left( \frac{1}{n}\boldsymbol{\Sigma} \right)^{-1} \left(\mathbf{T}_n - \boldsymbol{\theta} \right) \sim \chi^2 (p) \\ \\\ \pause & & ~~~~~~~~~~~~~~~~~~ \rotatebox{90}{=} \\ & & n \left( \mathbf{T}_n - \boldsymbol{\theta} \right)^\top \boldsymbol{\Sigma}^{-1} \left(\mathbf{T}_n - \boldsymbol{\theta} \right) \\ \pause & \approx & n \left( \mathbf{T}_n - \boldsymbol{\theta} \right)^\top \widehat{\boldsymbol{\Sigma}}_n^{-1} \left(\mathbf{T}_n - \boldsymbol{\theta} \right) \\ \pause & \sim & \chi^2 (p) \end{eqnarray*} \end{frame} \begin{frame} \frametitle{Or we could be more precise} Suppose \begin{itemize} \item $\sqrt{n}\left(\mathbf{T}_n - \boldsymbol{\theta} \right) \stackrel{d}{\rightarrow} \mathbf{T} \sim N\left(\mathbf{0},\boldsymbol{\Sigma} \right)$ and \item $\widehat{\boldsymbol{\Sigma}}_n \stackrel{p}{\rightarrow} \boldsymbol{\Sigma}$. \end{itemize} \vspace{5mm} Then $\widehat{\boldsymbol{\Sigma}}_n^{-1} \stackrel{p}{\rightarrow} \boldsymbol{\Sigma}^{-1}$, and by a Slutsky lemma, \begin{displaymath} \left( \begin{array}{c} \sqrt{n}\left(\mathbf{T}_n - \boldsymbol{\theta} \right) \\ \widehat{\boldsymbol{\Sigma}}_n^{-1} \end{array} \right) \stackrel{d}{\rightarrow} \left( \begin{array}{c} \mathbf{T} \\ \boldsymbol{\Sigma}^{-1} \end{array} \right). \end{displaymath} By continuity, \begin{eqnarray*} W_n & = & \left( \sqrt{n}(\mathbf{T}_n - \boldsymbol{\theta}) \right)^\top \widehat{\boldsymbol{\Sigma}}_n^{-1} \sqrt{n}(\mathbf{T}_n - \boldsymbol{\theta}) \\ & = & n \left( \mathbf{T}_n - \boldsymbol{\theta} \right)^\top \widehat{\boldsymbol{\Sigma}}_n^{-1} \left(\mathbf{T}_n - \boldsymbol{\theta} \right) \\ & \stackrel{d}{\rightarrow} & \mathbf{T}^\top \boldsymbol{\Sigma}^{-1} \mathbf{T} \\ & \sim & \chi^2 (p) \end{eqnarray*} \end{frame} \begin{frame} \frametitle{If $H_0:\mathbf{L}\boldsymbol{\theta}=\mathbf{h}$ is true} \framesubtitle{Where $\mathbf{L}$ is $r \times p$ and of full row rank} \pause Asymptotically, $\mathbf{LT}_n \sim N\left(\mathbf{L}\boldsymbol{\theta}, \frac{1}{n}\mathbf{L}\boldsymbol{\Sigma}\mathbf{L}^\top \right)$. \pause So \begin{eqnarray*} & & \left( \mathbf{LT}_n - \mathbf{L}\boldsymbol{\theta} \right)^\top \left( \frac{1}{n}\mathbf{L}\boldsymbol{\Sigma}\mathbf{L}^\top \right)^{-1} \left(\mathbf{LT}_n - \mathbf{L}\boldsymbol{\theta} \right) \sim \chi^2 (r) \\ \\\ \pause & & ~~~~~~~~~~~~~~~~~~~~~~ \rotatebox{90}{=} \\ & & n \left( \mathbf{LT}_n - \mathbf{h} \right)^\top \left(\mathbf{L}\boldsymbol{\Sigma}\mathbf{L}^\top \right)^{-1} \left(\mathbf{LT}_n - \mathbf{h} \right) \\ \pause & \approx & n \left( \mathbf{LT}_n - \mathbf{h} \right)^\top \left(\mathbf{L}\widehat{\boldsymbol{\Sigma}}_n \mathbf{L}^\top \right)^{-1} \left(\mathbf{LT}_n - \mathbf{h} \right) \\ \pause = W_n & \sim & \chi^2 (r) \end{eqnarray*} \vspace{10mm} Or we could be more precise and use Slutsky lemmas. \end{frame} \begin{frame} \frametitle{Test of $H_0:\mathbf{L}\boldsymbol{\theta}=\mathbf{h}$} \framesubtitle{Where $\mathbf{L}$ is $r \times p$ and of full row rank} {\LARGE \begin{displaymath} W_n = n \left( \mathbf{LT}_n - \mathbf{h} \right)^\top \left(\mathbf{L}\widehat{\boldsymbol{\Sigma}}_n \mathbf{L}^\top \right)^{-1} \left(\mathbf{LT}_n - \mathbf{h} \right) \end{displaymath} } Distributed approximately as chi-squared with $r$ degrees of freedom under $H_0$. \vspace{10mm} If $\mathbf{T}_n$ is the maximum likelihood estimator of $\boldsymbol{\theta}$, it's called a \emph{Wald test} (and $\widehat{\boldsymbol{\Sigma}}_n$ has a special form). \end{frame} \section{Within cases} \begin{frame} \frametitle{Example: The \texttt{statclass} data} %\framesubtitle{} Fifty-eight students in a Statistics class took 8 quizzes, a midterm test and a final exam. They also had 9 computer assignments. The instructor wants to compare average performance on the four components of the grade. \pause \begin{itemize} \item How about a model? \item Should we assume normality? \pause \item Does it make sense to assume quiz marks independent of final exam marks? \pause \item Does this remind you of a matched $t$-test? \end{itemize} \end{frame} \begin{frame} \frametitle{Within cases versus between cases} %\framesubtitle{} \begin{itemize} \item Want to compare average performance under several conditions, which are often experimental conditions, but not always. \pause \item When a case (person, rat, school, etc.) appears in \emph{all} the conditions, it's called a \emph{within cases} design. \pause Think of the matched $t$-test. \item When a case appears in \emph{only one} condition, it's called a \emph{between cases} design. \pause Think of the two-sample $t$-test. \item Comparing performance on quizzes, midterm, final and computer assignments is within-cases. \end{itemize} \end{frame} \begin{frame} \frametitle{Assume multivariate normality?} %\framesubtitle{} \begin{center} \includegraphics[width=3in]{computerhistogram} \end{center} \end{frame} \begin{frame} \frametitle{A model for the \texttt{statclass} data} %\framesubtitle{} Fifty-eight students in a Statistics class took 8 quizzes, a midterm test and a final exam. They also had 9 computer assignments. \vspace{10mm} Let $\mathbf{Y}_1, \ldots, \mathbf{Y}_n$ be a random sample from an unknown distribution with mean $\boldsymbol{\mu} = (\mu_1, \mu_2, \mu_3, \mu_4)^\top$ and covariance matrix $\mathbf{\Sigma}$. \vspace{10mm} $H_0: \mu_1=\mu_2=\mu_3=\mu_4$ \end{frame} \begin{frame} \frametitle{Applying $W_n = n \left( \mathbf{LT}_n - \mathbf{h} \right)^\top \left(\mathbf{L}\widehat{\boldsymbol{\Sigma}}_n \mathbf{L}^\top \right)^{-1} \left(\mathbf{LT}_n - \mathbf{h} \right)$} \framesubtitle{To test $H_0:\mathbf{L}\boldsymbol{\theta}=\mathbf{h}$} \begin{itemize} \item Test is based on $\sqrt{n}\left(\mathbf{T}_n - \boldsymbol{\theta} \right) \stackrel{d}{\rightarrow} \mathbf{T} \sim N\left(\mathbf{0},\boldsymbol{\Sigma} \right)$ \item CLT says $\sqrt{n}\left(\overline{\mathbf{Y}}_n - \boldsymbol{\mu} \right) \stackrel{d}{\rightarrow} \mathbf{Y} \sim N\left(\mathbf{0},\boldsymbol{\Sigma} \right)$ \item So $\mathbf{T}_n=\overline{\mathbf{Y}}_n$ and $\boldsymbol{\theta}=\boldsymbol{\mu}$. \item Sample variance-covariance matrix is good enough for $\widehat{\boldsymbol{\Sigma}}_n \stackrel{p}{\rightarrow} \boldsymbol{\Sigma}$ \item Write $H_0: \mu_1=\mu_2=\mu_3=\mu_4$ as $\mathbf{L}\boldsymbol{\mu}=\mathbf{h}$ \end{itemize} \end{frame} \begin{frame} \frametitle{$H_0: \mathbf{L}\boldsymbol{\theta}=\mathbf{h}$} \framesubtitle{To test equality of four means} \begin{displaymath} \begin{array}{cccc} \mathbf{L} & \boldsymbol{\mu} & = & \mathbf{0} \\ \left( \begin{array}{rrrr} 1 & -1 & 0 & 0 \\ 0 & 1 & -1 & 0 \\ 0 & 0 & 1 & -1 \\ \end{array} \right) & \left( \begin{array}{c} \mu_1 \\ \mu_2 \\ \mu_3 \\ \mu_4 \end{array} \right) & = & \left( \begin{array}{c} 0 \\ 0 \\ 0 \end{array} \right) \end{array} \end{displaymath} \end{frame} \begin{frame}[fragile] \frametitle{Read the data} %\framesubtitle{} \begin{center} \includegraphics[width=4in]{statclass14} \end{center} {\scriptsize \begin{verbatim} > statclass = read.table("http://www.utstat.toronto.edu/~brunner/appliedf14/code_n_data/lecture/statclass.data",header=T) > head(statclass) S E Q1 Q2 Q3 Q4 Q5 Q6 Q7 Q8 C1 C2 C3 C4 C5 C6 C7 C8 C9 MT Final 1 1 2 9 1 7 8 4 3 5 2 6 10 10 10 5 0 0 0 0 55 43 2 0 2 10 10 5 9 10 8 6 8 10 10 8 9 9 9 9 10 10 66 79 3 1 2 10 10 5 10 10 10 9 8 10 10 10 10 10 10 9 10 10 94 67 4 1 2 10 10 8 9 10 7 10 9 10 10 10 9 10 10 9 10 10 81 65 5 1 1 10 6 7 9 8 8 5 7 10 9 10 9 5 6 4 8 10 57 52 6 0 1 10 9 5 8 9 8 5 6 8 7 5 6 10 6 5 9 9 77 64 > attach(statclass) \end{verbatim} } % End size \end{frame} \begin{frame}[fragile] \frametitle{Process the data a bit} \framesubtitle{And take a look} {\scriptsize {\color{blue} \begin{verbatim} > quiz = 10 * (Q1+Q2+Q3+Q4+Q5+Q6+Q7+Q8)/8 > computer = 10 * (C1+C2+C3+C4+C5+C6+C7+C8+C9)/9 > midterm = MT > final = Final > datta = cbind(quiz,computer,midterm,final); head(round(datta)) \end{verbatim} } % End color \begin{verbatim} quiz computer midterm final [1,] 49 46 55 43 [2,] 82 93 66 79 [3,] 90 99 94 67 [4,] 91 98 81 65 [5,] 75 79 57 52 [6,] 75 72 77 64 \end{verbatim} {\color{blue} \begin{verbatim} > ybar = apply(datta,2,mean); ybar \end{verbatim} } % End color \begin{verbatim} quiz computer midterm final 72.58621 83.98467 68.87931 49.44828 \end{verbatim} } % End size \end{frame} \begin{frame} \frametitle{Boxplots} \framesubtitle{\texttt{boxplot(datta); title("Score out of 100 Percent")}} \begin{center} \includegraphics[width=3in]{boxplots} \end{center} \end{frame} \begin{frame}[fragile] \frametitle{Covariances and Correlations} %\framesubtitle{} {\footnotesize % or scriptsize {\color{blue} \begin{verbatim} > sigmahat = var(datta); sigmahat \end{verbatim} } % End color \begin{verbatim} quiz computer midterm final quiz 120.66130 62.369765 60.31760 71.736993 computer 62.36977 134.894281 27.68233 6.272098 midterm 60.31760 27.682328 223.37114 99.633999 final 71.73699 6.272098 99.63400 272.777979 \end{verbatim} {\color{blue} \begin{verbatim} > cor(datta) \end{verbatim} } % End color \begin{verbatim} quiz computer midterm final quiz 1.0000000 0.48886995 0.3674063 0.39541626 computer 0.4888700 1.00000000 0.1594749 0.03269726 midterm 0.3674063 0.15947489 1.0000000 0.40363552 final 0.3954163 0.03269726 0.4036355 1.00000000 \end{verbatim} } % End size \end{frame} \begin{frame} \frametitle{Scatterplot matrix} %\framesubtitle{} \begin{center} \includegraphics[width=3in]{scatterplots} \end{center} \end{frame} \begin{frame}[fragile] \frametitle{Calculate $W_n = n \left( \mathbf{LT}_n - \mathbf{h} \right)^\top \left(\mathbf{L}\widehat{\boldsymbol{\Sigma}}_n \mathbf{L}^\top \right)^{-1} \left(\mathbf{LT}_n - \mathbf{h} \right)$} \framesubtitle{To test $H_0: \mathbf{L}\boldsymbol{\mu}=\mathbf{0}$} {\footnotesize % or scriptsize \begin{verbatim} > L = rbind(c(1,-1,0,0), + c(0,1,-1,0), + c(0,0,1,-1) ) > n = length(quiz); n [1] 58 > Wn = n * t(L %*% ybar) %*% solve(L%*%sigmahat%*%t(L)) %*% L%*%ybar > Wn [,1] [1,] 176.8238 > Wn = as.numeric(Wn) > pvalue = 1-pchisq(Wn,df=3); pvalue [1] 0 \end{verbatim} } % End size Conclude that the four means are not all equal. Which ones are different from one another? Need follow-up tests. \end{frame} \begin{frame}[fragile] \frametitle{The R function \texttt{Wtest}} \framesubtitle{``Estimated" asymptotic covariance matrix $\widehat{\mathbf{V}}_n = \frac{1}{n}\widehat{\boldsymbol{\Sigma}}_n$} \begin{eqnarray*} W_n &=& n \left( \mathbf{LT}_n - \mathbf{h} \right)^\top \left(\mathbf{L}\widehat{\boldsymbol{\Sigma}}_n \mathbf{L}^\top \right)^{-1} \left(\mathbf{LT}_n - \mathbf{h} \right) \\ &=& \left( \mathbf{LT}_n - \mathbf{h} \right)^\top \left(\mathbf{L}\widehat{\mathbf{V}}_n \mathbf{L}^\top \right)^{-1} \left(\mathbf{LT}_n - \mathbf{h} \right) \end{eqnarray*} {\scriptsize % This is the 2013 version: Used to be called WaldTest. \begin{verbatim} Wtest = function(L,Tn,Vn,h=0) # H0: L theta = h # Note Vn is the estimated asymptotic covariance matrix of Tn, # so it's Sigma-hat divided by n. For Wald tests based on numerical # MLEs, Tn = theta-hat, and Vn is the inverse of the Hessian. { Wtest = numeric(3) names(Wtest) = c("W","df","p-value") r = dim(L)[1] W = t(L%*%Tn-h) %*% solve(L%*%Vn%*%t(L)) %*% (L%*%Tn-h) W = as.numeric(W) pval = 1-pchisq(W,r) Wtest[1] = W; Wtest[2] = r; Wtest[3] = pval Wtest } # End function Wtest \end{verbatim} } % End size \end{frame} \begin{frame}[fragile] \frametitle{Illustrate the \texttt{Wtest} function} For $H_0: \mu_1=\mu_2=\mu_3=\mu_4$, got $W_n=176.8238,df=3,p\approx 0$. {\scriptsize \begin{verbatim} > V = sigmahat / length(final) > # Asymptotic covariance matrix of Y-bar is Sigma/n > LL = rbind( c(1,-1,0,0), + c(0,1,-1,0), + c(0,0,1,-1) ) > Wtest(LL,ybar,V) W df p-value 176.8238 3.0000 0.0000 > > ybar quiz computer midterm final 72.58621 83.98467 68.87931 49.44828 \end{verbatim} } % End size Is average quiz score different from midterm? {\scriptsize \begin{verbatim} > L1 = rbind(c(1,0,-1,0)); n = length(final) > Wtest(L=L1,Tn=ybar,Vn=sigmahat/n) W df p-value 3.56755878 1.00000000 0.05891887 \end{verbatim} } % End size Test the other pairwise differences between means. \end{frame} % Cut out out a big section on multinomial, CLT: See 2013 \begin{frame} \frametitle{Another application: Mean index numbers} In a study of consumers' opinions of 5 popular TV programmes, 240 consumers who watch all the shows at least once a month completed a computerized interview. On one of the screens, they indicated how much they enjoyed each programme by mouse-clicking on a 10cm line. One end of the line was labelled ``Like very much," and the other end was labelled ``Dislike very much." So each respondent contributed 5 ratings, on a continuous scale from zero to ten. \vspace{5mm} The study was commissioned by the producers of one of the shows, which will be called ``Programme $E$." Ratings of Programmes $A$ through $D$ were expressed as percentages of the rating for Programme $E$, and these were described as ``Liking indexed to programme $E$." \end{frame} \begin{frame} \frametitle{In statistical language} We have $X_{i,1}, \ldots X_{i,5}$ for $i = 1, \ldots, n$, and we calculate \begin{displaymath} Y_{i,j} = 100 \frac{X_{i,j}}{X_{i,5}} \end{displaymath} \begin{itemize} \item We want confidence intervals for the 4 mean index numbers, and tests of differences between means. \item Observations from the same respondent are definitely not independent. \item What is the distribution? \item What is a reasonable model? \end{itemize} \end{frame} \begin{frame} \frametitle{Model} %\framesubtitle{} Let $\mathbf{Y}_1, \ldots, \mathbf{Y}_n$ be a random sample from an unknown multivariate distribution $F$ with expected value $\boldsymbol{\mu}$ and covariance matrix $\boldsymbol{\Sigma}$. \vspace{10mm} One way to think about it is \begin{itemize} \item The parameter is the unknown distribution $F$. \item The parameter space is a space of distribution functions. \item $\boldsymbol{\mu}$ and $\boldsymbol{\Sigma}$ are \emph{functions} of $F$. \item We're only interested in $\boldsymbol{\mu}$. \end{itemize} \end{frame} \begin{frame} \frametitle{We have the tools we need} %\framesubtitle{} \begin{itemize} \item $\sqrt{n}(\overline{\mathbf{Y}}_n-\boldsymbol{\mu}) \stackrel{d}{\rightarrow} \mathbf{Y} \sim N\left(\mathbf{0}, \boldsymbol{\Sigma} \right)$ and \item For $\widehat{\boldsymbol{\Sigma}}_n \stackrel{p}{\rightarrow} \boldsymbol{\Sigma}$, use the sample covariance matrix. \item $H_0:\mathbf{L}\boldsymbol{\mu}=\mathbf{h}$ \end{itemize} \begin{displaymath} W_n = n \left( \mathbf{L}\overline{\mathbf{Y}}_n - \mathbf{h} \right)^\top \left(\mathbf{L}\widehat{\boldsymbol{\Sigma}}_n \mathbf{L}^\top \right)^{-1} \left(\mathbf{L}\overline{\mathbf{Y}}_n - \mathbf{h} \right) \end{displaymath} \end{frame} \begin{frame}[fragile] \frametitle{Read the data} %\framesubtitle{} {\tiny \begin{verbatim} > Y = read.table("http://www.utstat.toronto.edu/~brunner/appliedf14/code_n_data/lecture/TVshows.data") \end{verbatim} } % End size \begin{verbatim} > Y[1:4,] A B C D 1 101.3 81.0 101.8 89.6 2 94.0 85.3 76.3 100.8 3 145.4 138.7 151.0 148.3 4 72.0 86.1 96.1 96.3 > n = dim(Y)[1]; n [1] 240 \end{verbatim} \end{frame} \begin{frame}[fragile] \frametitle{Confidence intervals: $\overline{Y} \pm z_{\alpha/2}\frac{S}{\sqrt{n}}$} %\framesubtitle{} {\footnotesize \begin{verbatim} > ave = apply(Y,2,mean); ave A B C D 101.65958 98.50167 99.39958 103.94167 > v = apply(Y,2,var) # Sample variances with n-1 > stderr = sqrt(v/n) > me95 = 1.96*stderr > lower95 = ave-me95 > upper95 = ave+me95 > Z = (ave-100)/stderr > rbind(ave,marginerror95,lower95,upper95,Z) A B C D ave 101.659583 98.501667 99.3995833 103.941667 marginerror95 1.585652 1.876299 1.7463047 1.469928 lower95 100.073931 96.625368 97.6532786 102.471739 upper95 103.245236 100.377966 101.1458880 105.411594 Z 2.051385 -1.565173 -0.6738897 5.255814 \end{verbatim} } % End size \end{frame} \begin{frame}[fragile] \frametitle{What if we ``assume" normality and use $t$?} %\framesubtitle{} {\footnotesize \begin{verbatim} > rbind(ave,lower95,upper95,Z) A B C D ave 101.659583 98.501667 99.3995833 103.941667 lower95 100.073931 96.625368 97.6532786 102.471739 upper95 103.245236 100.377966 101.1458880 105.411594 Z 2.051385 -1.565173 -0.6738897 5.255814 > attach(Y) # So A, B, C, D are available > t.test(A,mu=100) One Sample t-test data: A t = 2.0514, df = 239, p-value = 0.04132 alternative hypothesis: true mean is not equal to 100 95 percent confidence interval: 100.0659 103.2533 sample estimates: mean of x 101.6596 \end{verbatim} } % End size \end{frame} \begin{frame}[fragile] \frametitle{Test equality of means} %\framesubtitle{} {\footnotesize \begin{verbatim} > S = var(Y); S A B C D A 157.0779 110.77831 106.56220 109.6234 B 110.7783 219.93950 95.66686 100.3585 C 106.5622 95.66686 190.51937 106.2501 D 109.6234 100.35851 106.25006 134.9867 > cor(Y) A B C D A 1.0000000 0.5959991 0.6159934 0.7528355 B 0.5959991 1.0000000 0.4673480 0.5824479 C 0.6159934 0.4673480 1.0000000 0.6625431 D 0.7528355 0.5824479 0.6625431 1.0000000 > > L4 = rbind( c(1,-1, 0, 0), + c(0, 1,-1, 0), + c(0, 0, 1,-1) ) > Wtest(L=L4,Tn=ave,Vn=S/n) W df p-value 7.648689e+01 3.000000e+00 2.220446e-16 \end{verbatim} } % End size \end{frame} \section{Multiple comparisons} \begin{frame}[fragile] \frametitle{Pairwise comparisons} \framesubtitle{Where is the effect coming from?} Set it up. \vspace{3mm} {\footnotesize % or scriptsize {\color{blue} \begin{verbatim} > testmatrix = diag(1,4,4) # Start with an identity matrix. > labelz = colnames(Y) > rownames(testmatrix) = labelz; colnames(testmatrix) = labelz > testmatrix \end{verbatim} } % End color \begin{verbatim} A B C D A 1 0 0 0 B 0 1 0 0 C 0 0 1 0 D 0 0 0 1 \end{verbatim} } % End size \end{frame} \begin{frame}[fragile] \frametitle{Fill the matrix} %\framesubtitle{} {\footnotesize % or scriptsize {\color{blue} \begin{verbatim} > for(i in 1:3) + { + for(j in (i+1):4) + { + LL = rbind(c(0,0,0,0)) + LL[i]=1; LL[j]=-1 + print(LL) # Just to check + W = Wtest(L=LL,Tn=ave,Vn=S/n) + testmatrix[i,j] = W[1]; testmatrix[j,i]=W[3] + } # Next j + } # Next i \end{verbatim} } % End color } % End size {\scriptsize \begin{verbatim} [,1] [,2] [,3] [,4] [1,] 1 -1 0 0 [,1] [,2] [,3] [,4] [1,] 1 0 -1 0 [,1] [,2] [,3] [,4] [1,] 1 0 0 -1 [,1] [,2] [,3] [,4] [1,] 0 1 -1 0 [,1] [,2] [,3] [,4] [1,] 0 1 0 -1 [,1] [,2] [,3] [,4] [1,] 0 0 1 -1 \end{verbatim} } % End size \end{frame} \begin{frame}[fragile] \frametitle{Look at the $\binom{4}{2}$ pairwise comparisons} %\framesubtitle{} {\footnotesize % or scriptsize {\color{blue} \begin{verbatim} > # Test statistics (chisq with 1 df) are in the upper triangle, > # p-values in lower > round(testmatrix,4) \end{verbatim} } % End color \begin{verbatim} A B C D A 1.0000 15.3954 9.1158 17.1647 B 0.0001 1.0000 0.8831 46.0573 C 0.0025 0.3474 1.0000 43.8147 D 0.0000 0.0000 0.0000 1.0000 \end{verbatim} {\color{blue} \begin{verbatim} > ave \end{verbatim} } % End color \begin{verbatim} A B C D 101.65958 98.50167 99.39958 103.94167 \end{verbatim} } % End size \vspace{2mm} \pause Average reported enjoyment was greatest for Program $D$, followed by $A$. The results are consistent with no difference between $B$ and $C$. %Other tests might be of interest also, like the average expected value of $A$ and $B$ versus the average of $C$ and $D$. \end{frame} \begin{frame} \frametitle{Multiple Comparisons} \framesubtitle{The problem} \begin{itemize} \item Most hypothesis tests are designed to be carried out in isolation. \pause \item But if you do a lot of tests and all the null hypotheses are true, the chance of rejecting at least one of them can be a lot more than $\alpha$. This is inflation of the Type I error probability. \pause \item Otherwise known as the curse of a thousand t-tests. \pause \item Multiple comparison procedures (sometimes called follow-up tests, post hoc tests, probing) try to offer a solution. \end{itemize} \end{frame} \begin{frame} \frametitle{Multiple Comparisons} \framesubtitle{A solution} \begin{itemize} \item Protect a \emph{family} of tests against Type I error at some \emph{joint} significance level $\alpha$. \item If all the null hypotheses are true, the probability of rejecting at least one is no more than $\alpha$. \item Many methods are available; we'll consider just one for now: Bonferroni. \end{itemize} \end{frame} \begin{frame} \frametitle{Bonferroni multiple comparisons} %\framesubtitle{} \begin{itemize} \item Based on Bonferroni's inequality: \begin{displaymath} Pr\left\{ \cup_{j=1}^k A_j \right\} \leq \sum_{j=1}^k Pr\{A_j\} \end{displaymath} \pause \item Applies to any collection of $k$ tests. \item Assume that all $k$ null hypotheses are true. \item Event $A_j$ is that null hypothesis $j$ is rejected. \pause \item Do the tests as usual. \pause \item Adjust the significance level, and reject each $H_0$ if $p < \alpha/k$. \pause \begin{displaymath} Pr\left\{ \cup_{j=1}^k A_j \right\} \leq \sum_{j=1}^k Pr\{A_j\} = \sum_{j=1}^k \alpha/k = \alpha \end{displaymath} \pause \item Or, adjust the $p$-values. Multiply them by $k$, and reject if $pk < \alpha$. \end{itemize} \end{frame} \begin{frame}[fragile] \frametitle{TV show example} %\framesubtitle{} {\footnotesize % or scriptsize \begin{verbatim} A B C D A 1.0000 15.3954 9.1158 17.1647 B 0.0001 1.0000 0.8831 46.0573 C 0.0025 0.3474 1.0000 43.8147 D 0.0000 0.0000 0.0000 1.0000 \end{verbatim} \pause } % End size \begin{itemize} \item There are $\binom{4}{2}=6=k$ tests in the family. \item Adjusted $\alpha$ is $0.05/6=0.0083$. \item Conclusions don't change in this case. \pause \item What if the family includes comparisons with Program $E$? Now there are 10 comparisons and $H_0$ is rejected if $p<\alpha/10=0.005$. \end{itemize} \end{frame} \begin{frame}[fragile] \frametitle{Include $Z$ tests for comparison with Program $E$} \framesubtitle{Adjusted significance level is $\alpha/10=0.005$} {\footnotesize % or scriptsize {\color{blue} \begin{verbatim} > pval = 2*pnorm(-abs(Z)) > rbind(Z,pval) \end{verbatim} } % End color \begin{verbatim} A B C D Z 2.05138485 -1.5651734 -0.6738897 5.255814e+00 pval 0.04022948 0.1175423 0.5003815 1.473709e-07 \end{verbatim} } % End size \pause \vspace{3mm} Add to the conclusions: Program $D$ is preferred to $E$, but $E$ is in a statistical tie with $A$, $B$ and $C$. \end{frame} \begin{frame} \frametitle{Advantages and disadvantages} \framesubtitle{Of the Bonferroni method} \begin{itemize} \item Advantage: Flexible – Applies to any collection of hypothesis tests. \item Advantage: Easy to do. \item[] \item Disadvantage: Must know what all the tests are before seeing the data. \pause So we were cheating. \item Disadvantage: A little conservative; the true joint significance level is less than $\alpha$. \end{itemize} \end{frame} \begin{frame} \frametitle{Practical versus statistical significance} \framesubtitle{\texttt{boxplot(Y)}} \begin{center} \includegraphics[width=3in]{TVboxplots} \end{center} \end{frame} \section{Between cases} \begin{frame} \frametitle{Between cases: Independent groups} \framesubtitle{Like a one-factor ANOVA} \begin{itemize} \item Have $n$ cases, separated into $p$ groups: Maybe experimental treatment (say, drug) or occupation of main wage earner in family. \item $n_1 + n_2 + \cdots + n_p = n$ \item Response variable is either binary or quantity of something, like annual energy consumption. \item No reason to believe normality. \item No reason to believe equal variances. \item $H_0: \mathbf{L} \boldsymbol{\mu} = \mathbf{h}$ \item For example, $H_0: \mu_1 = \ldots = \mu_p$ \item Or $\mu_2 = \mu_7$ \end{itemize} \end{frame} \begin{frame} \frametitle{Basic Idea} %\framesubtitle{} The $p$ sample means are independent random variables. Asymptotically, \begin{itemize} \item $\overline{Y}_j \sim N(\mu_j,\frac{\sigma^2_j}{n_j})$ \item The $p \times 1$ random vector $\overline{\mathbf{Y}}_n \sim N(\boldsymbol{\mu},\mathbf{V}_n)$, \item Where $\mathbf{V}_n$ is a $p \times p$ diagonal matrix with $j$th diagonal element $\frac{\sigma^2_j}{n_j}$. \item $\mathbf{L}\overline{\mathbf{Y}}_n \sim N_r(\mathbf{L}\boldsymbol{\mu},\mathbf{LV}_n\mathbf{L}^\top)$ \item ``Estimate" $\mathbf{V}_n$ with the diagonal matrix $\widehat{\mathbf{V}}_n$, $j$th diagonal element $\frac{\widehat{\sigma}^2_j}{n_j}$ \item And if $H_0: \mathbf{L}\boldsymbol{\mu} = \mathbf{h}$ is true, then asymptotically \end{itemize} \begin{displaymath} W_n = \left( \mathbf{L}\overline{\mathbf{Y}}_n - \mathbf{h} \right)^\top \left(\mathbf{L}\widehat{\mathbf{V}}_n \mathbf{L}^\top \right)^{-1} \left(\mathbf{L}\overline{\mathbf{Y}}_n - \mathbf{h} \right) \sim \chi^2(r) % 24 \end{displaymath} \end{frame} \begin{frame} \frametitle{One little technical issue} %\framesubtitle{} \begin{itemize} \item More than one $n_j$ is going to infinity. \item The rates at which they go to infinity can't be too different. \item In particular, if $n = n_1 + n_2 + \cdots + n_p$, \item Then each $\frac{n_j}{n}$ must converge to a non-zero constant (in probability). \end{itemize} \vspace{4mm} Loose asymptotic arguments lose this kind of detail. \end{frame} \begin{frame} \frametitle{Compare High School marks for students at 3 campuses} %\framesubtitle{} \begin{center} \begin{tabular}{lccc} Campus & $n$ & Mean & Standard Deviation \\ \hline SG & 3906 & 84.94 & 5.59 \\ UTM & 1583 & 79.68 & 5.82 \\ UTSC & 1849 & 79.96 & 5.98 \\ \hline \end{tabular} \end{center} \end{frame} \begin{frame}[fragile] \frametitle{Compute $W_n = \left( \mathbf{L}\overline{\mathbf{Y}}_n - \mathbf{h} \right)^\top \left(\mathbf{L}\widehat{\mathbf{V}}_n \mathbf{L}^\top \right)^{-1} \left(\mathbf{L}\overline{\mathbf{Y}}_n - \mathbf{h} \right)$} \framesubtitle{$H_0:\mu_1=\mu_2=\mu_3$} \begin{center} {\tiny \begin{tabular}{lccc} Campus & $n$ & Mean & Standard Deviation \\ \hline SG & 3906 & 84.94 & 5.59 \\ UTM & 1583 & 79.68 & 5.82 \\ UTSC & 1849 & 79.96 & 5.98 \\ \hline \end{tabular} } % End size \end{center} {\scriptsize \begin{verbatim} > source("http://www.utstat.utoronto.ca/~brunner/Rfunctions/Wtest.txt") \end{verbatim} } % End size {\footnotesize % or scriptsize \begin{verbatim} > n = c(3906,1583,1849) > ybar = c(84.94,79.68,79.96) > Vhat = diag(c(5.59,5.82,5.98)^2/n); Vhat [,1] [,2] [,3] [1,] 0.008000026 0.0000000 0.0000000 [2,] 0.000000000 0.0213976 0.0000000 [3,] 0.000000000 0.0000000 0.0193404 > L1 = rbind(c(1,-1,0), + c(0,1,-1) ) > Wtest(L1,ybar,Vhat) W df p-value 1441.58 2.00 0.00 \end{verbatim} } % End size \end{frame} \begin{frame}[fragile] \frametitle{Test difference between UTM and UTSC} %\framesubtitle{} {\tiny \begin{tabular}{lccc} Campus & $n$ & Mean & Standard Deviation \\ \hline SG & 3906 & 84.94 & 5.59 \\ UTM & 1583 & 79.68 & 5.82 \\ UTSC & 1849 & 79.96 & 5.98 \\ \hline \end{tabular} } % End size \vspace{10mm} {\footnotesize % or scriptsize \begin{verbatim} > # UTM vs. UTSC > Wtest(rbind(c(0,1,-1)),ybar,Vhat) W df p-value 1.9244931 1.0000000 0.1653622 \end{verbatim} } % End size There are two more pairwise comparisons. \end{frame} % Deleted a big delta method section. See 2013. Example is multinomial, also deleted. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Copyright Information} This slide show was prepared by \href{http://www.utstat.toronto.edu/~brunner}{Jerry Brunner}, Department of Statistics, University of Toronto. It is licensed under a \href{http://creativecommons.org/licenses/by-sa/3.0/deed.en_US} {Creative Commons Attribution - ShareAlike 3.0 Unported License}. Use any part of it as you like and share the result freely. The \LaTeX~source code is available from the course website: \href{http://www.utstat.toronto.edu/~brunner/oldclass/appliedf14} {\small\texttt{http://www.utstat.toronto.edu/$^\sim$brunner/oldclass/appliedf14}} \end{frame} \end{document} statclass = read.table("http://www.utstat.toronto.edu/~brunner/appliedf13/code_n_data/lecture/statclass.data",header=T) head(statclass) attach(statclass) quiz = 10 * (Q1+Q2+Q3+Q4+Q5+Q6+Q7+Q8)/8 computer = 10 * (C1+C2+C3+C4+C5+C6+C7+C8+C9)/9 midterm = MT final = F datta = cbind(quiz,computer,midterm,final); head(round(datta)) ybar = apply(datta,2,mean); ybar boxplot(datta); title("Score out of 100 Percent") sigmahat = var(datta); sigmahat cor(datta) L = rbind(c(1,-1,0,0), c(0,1,-1,0), c(0,0,1,-1) ) n = length(quiz); n Wn = n * t(L %*% ybar) %*% solve(L%*%sigmahat%*%t(L)) %*% L%*%ybar Wn Wn = as.numeric(Wn) pvalue = 1-pchisq(Wn,df=3); pvalue source("http://www.utstat.utoronto.ca/~brunner/Rfunctions/Wtest.txt") # Define Wtest V = sigmahat / n # Asymptotic covariance matrix of Y-bar is Sigma/n LL = rbind( c(1,-1,0,0), c(0,1,-1,0), c(0,0,1,-1) ) Wtest(LL,ybar,V) # 176.8238 # Pairwise comparisons. Make a matrix of p-values and test statistics testmatrix = diag(1,4,4) # Start with an identity matrix. labelz = colnames(datta) rownames(testmatrix) = labelz; colnames(testmatrix) = labelz testmatrix for(i in 1:3) { for(j in (i+1):4) { LL = rbind(c(0,0,0,0)) LL[i]=1; LL[j]=-1 print(LL) W = Wtest(L=LL,Tn=ybar,Vn=V) testmatrix[i,j] = W[1]; testmatrix[j,i]=W[3] } # Next j } # Next i Not interesting enough. Try the TV data. source("http://www.utstat.utoronto.ca/~brunner/Rfunctions/Wtest.txt") # Define Wtest Y = read.table("http://www.utstat.toronto.edu/~brunner/appliedf14/code_n_data/lecture/TVshows.data") n = dim(Y)[1]; n ave = apply(Y,2,mean); ave attach(Y) S = var(Y); S # Pairwise comparisons. Make a matrix of p-values and test statistics testmatrix = diag(1,4,4) # Start with an identity matrix. labelz = colnames(Y) rownames(testmatrix) = labelz; colnames(testmatrix) = labelz testmatrix for(i in 1:3) { for(j in (i+1):4) { LL = rbind(c(0,0,0,0)) LL[i]=1; LL[j]=-1 print(LL) # Just to check W = Wtest(L=LL,Tn=ave,Vn=S/n) testmatrix[i,j] = W[1]; testmatrix[j,i]=W[3] } # Next j } # Next i # Test statistics (chisq with 1 df) are in the upper triangle, # p-values in lower round(testmatrix,4) ave = apply(Y,2,mean); ave v = apply(Y,2,var) # Sample variances with n-1 stderr = sqrt(v/n) me95 = 1.96*stderr lower95 = ave-me95 upper95 = ave+me95 Z = (ave-100)/stderr rbind(ave,me95,lower95,upper95,Z) pval = 2*pnorm(-abs(Z)) rbind(Z,pval) # Practical significance boxplot(Y); title("Preference indexed to Program E") %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{} %\framesubtitle{} \begin{itemize} \item \item \item \end{itemize} \end{frame} {\LARGE \begin{displaymath} \end{displaymath} } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%