\documentclass[serif]{beamer} % Get Computer Modern math font. \hypersetup{colorlinks,linkcolor=,urlcolor=red} % To create handout using article mode: Comment above and uncomment below (2 places) %\documentclass[12pt]{article} %\usepackage{beamerarticle} %\usepackage[colorlinks=true, pdfstartview=FitV, linkcolor=blue, citecolor=blue, urlcolor=red]{hyperref} % For live Web links with href in article mode %\usepackage{amsmath} % For \binom{n}{y} %\usepackage{graphicx} % To include pdf files! %\usepackage{fullpage} \usefonttheme{serif} % Looks like Computer Modern for non-math text -- nice! \setbeamertemplate{navigation symbols}{} % Suppress navigation symbols % \usetheme{Berlin} % Displays sections on top \usetheme{Frankfurt} % Displays section titles on top: Fairly thin but still swallows some material at bottom of crowded slides %\usetheme{Berkeley} \usepackage[english]{babel} \usepackage{amsmath} % for binom % \usepackage{graphicx} % To include pdf files! % \definecolor{links}{HTML}{2A1B81} % \definecolor{links}{red} \setbeamertemplate{footline}[frame number] \mode % \mode{\setbeamercolor{background canvas}{bg=black!5}} % Comment this out for handout \title{Random Vectors Part Two\footnote{See last slide for copyright information.}} \subtitle{STA442/2101 Fall 2012} \date{} % To suppress date \begin{document} % More material required for handout in article mode. Also eliminate vspace %\title{The Multinomial Model\footnote{See last slide for copyright information.}} %\subtitle{STA 442/2101: Fall 2012} %\date{} % To suppress date %\maketitle % Don't want a separate title page for handout \begin{frame} \titlepage \end{frame} \begin{frame} \frametitle{Background Reading: Davison's \emph{Statistical models}} \framesubtitle{It may be a little bit helpful.} \begin{itemize} \item Section 2.2 on Convergence \item Pages 33-35 on the Delta method include the multivariate case \item The multinomial distribution is introduced as a homehork problem, Problem 10 on p. 37. \end{itemize} \end{frame} \begin{frame} \frametitle{Overview} \tableofcontents \end{frame} \section{Large-Sample Chi-square} \begin{frame} \frametitle{Large-Sample Chi-square} Let $\mathbf{X} \sim N_p(\boldsymbol{\mu},\boldsymbol{\Sigma})$ then recall \vspace{10mm} {\LARGE \begin{displaymath} (\mathbf{X}-\boldsymbol{\mu})^\prime \boldsymbol{\Sigma}^{-1}(\mathbf{X}-\boldsymbol{\mu}) \sim \chi^2 (p) \end{displaymath}} \vspace{10mm} It's true asymptotically too. \end{frame} \begin{frame} \frametitle{Using $(\mathbf{X}-\boldsymbol{\mu})^\prime \boldsymbol{\Sigma}^{-1}(\mathbf{X}-\boldsymbol{\mu}) \sim \chi^2 (p)$} Suppose \begin{itemize} \item $\sqrt{n}\left(\mathbf{T}_n - \boldsymbol{\theta} \right) \stackrel{d}{\rightarrow} \mathbf{T} \sim N\left(\mathbf{0},\boldsymbol{\Sigma} \right)$ and \item $\widehat{\boldsymbol{\Sigma}}_n \stackrel{p}{\rightarrow} \boldsymbol{\Sigma}$. \end{itemize} \vspace{5mm} Then approximately as $n \rightarrow \infty$, $\mathbf{T}_n \sim N\left(\boldsymbol{\theta},\frac{1}{n}\boldsymbol{\Sigma} \right)$, and \begin{eqnarray*} & & \left( \mathbf{T}_n - \boldsymbol{\theta} \right)^\prime \left( \frac{1}{n}\boldsymbol{\Sigma} \right)^{-1} \left(\mathbf{T}_n - \boldsymbol{\theta} \right) \sim \chi^2 (p) \\ \\\ & & ~~~~~~~~~~~~~~~~~~ \rotatebox{90}{=} \\ & & n \left( \mathbf{T}_n - \boldsymbol{\theta} \right)^\prime \boldsymbol{\Sigma}^{-1} \left(\mathbf{T}_n - \boldsymbol{\theta} \right) \\ & \approx & n \left( \mathbf{T}_n - \boldsymbol{\theta} \right)^\prime \widehat{\boldsymbol{\Sigma}}_n^{-1} \left(\mathbf{T}_n - \boldsymbol{\theta} \right) \\ & \sim & \chi^2 (p) \end{eqnarray*} \end{frame} \begin{frame} \frametitle{Or we could be more precise} Suppose \begin{itemize} \item $\sqrt{n}\left(\mathbf{T}_n - \boldsymbol{\theta} \right) \stackrel{d}{\rightarrow} \mathbf{T} \sim N\left(\mathbf{0},\boldsymbol{\Sigma} \right)$ and \item $\widehat{\boldsymbol{\Sigma}}_n \stackrel{p}{\rightarrow} \boldsymbol{\Sigma}$. \end{itemize} \vspace{5mm} Then $\widehat{\boldsymbol{\Sigma}}_n^{-1} \stackrel{p}{\rightarrow} \boldsymbol{\Sigma}^{-1}$, and by a Slutsky lemma, \begin{displaymath} \left( \begin{array}{c} \sqrt{n}\left(\mathbf{T}_n - \boldsymbol{\theta} \right) \\ \widehat{\boldsymbol{\Sigma}}_n \end{array} \right) \stackrel{d}{\rightarrow} \left( \begin{array}{c} \mathbf{T} \\ \boldsymbol{\Sigma} \end{array} \right). \end{displaymath} By continuity, \begin{eqnarray*} & & \left( \sqrt{n}(\mathbf{T}_n - \boldsymbol{\theta}) \right)^\prime \widehat{\boldsymbol{\Sigma}}_n^{-1} \sqrt{n}(\mathbf{T}_n - \boldsymbol{\theta}) \\ & = & n \left( \mathbf{T}_n - \boldsymbol{\theta} \right)^\prime \widehat{\boldsymbol{\Sigma}}_n^{-1} \left(\mathbf{T}_n - \boldsymbol{\theta} \right) \\ & \stackrel{d}{\rightarrow} & \mathbf{T}^\prime \boldsymbol{\Sigma} \mathbf{T} \\ & \sim & \chi^2 (p) \end{eqnarray*} \end{frame} \begin{frame} \frametitle{If $H_0:\mathbf{L}\boldsymbol{\theta}=\mathbf{h}$ is true} \framesubtitle{Where $\mathbf{L}$ is $r \times p$ and of full row rank} Asymptotically, $\mathbf{LT}_n \sim N\left(\mathbf{L}\boldsymbol{\theta}, \frac{1}{n}\mathbf{L}\boldsymbol{\Sigma}\mathbf{L}^\prime \right)$. So \begin{eqnarray*} & & \left( \mathbf{LT}_n - \mathbf{L}\boldsymbol{\theta} \right)^\prime \left( \frac{1}{n}\mathbf{L}\boldsymbol{\Sigma}\mathbf{L}^\prime \right)^{-1} \left(\mathbf{LT}_n - \mathbf{L}\boldsymbol{\theta} \right) \sim \chi^2 (r) \\ \\\ & & ~~~~~~~~~~~~~~~~~~~~~~ \rotatebox{90}{=} \\ & & n \left( \mathbf{LT}_n - \mathbf{h} \right)^\prime \left(\mathbf{L}\boldsymbol{\Sigma}\mathbf{L}^\prime \right)^{-1} \left(\mathbf{LT}_n - \mathbf{h} \right) \\ & \approx & n \left( \mathbf{LT}_n - \mathbf{h} \right)^\prime \left(\mathbf{L}\widehat{\boldsymbol{\Sigma}}_n \mathbf{L}^\prime \right)^{-1} \left(\mathbf{LT}_n - \mathbf{h} \right) \\ = W_n & \sim & \chi^2 (r) \end{eqnarray*} \vspace{10mm} Or we could be more precise and use Slutsky lemmas. \end{frame} \begin{frame} \frametitle{Test of $H_0:\mathbf{L}\boldsymbol{\theta}=\mathbf{h}$} \framesubtitle{Where $\mathbf{L}$ is $r \times p$ and of full row rank} {\LARGE \begin{displaymath} W_n = n \left( \mathbf{LT}_n - \mathbf{h} \right)^\prime \left(\mathbf{L}\widehat{\boldsymbol{\Sigma}}_n \mathbf{L}^\prime \right)^{-1} \left(\mathbf{LT}_n - \mathbf{h} \right) \end{displaymath} } Distributed approximately as non-central chi-squared with \begin{itemize} \item Degrees of freedom $r$ and \item Non-centrality parameter $\lambda = n \left( \mathbf{L}\boldsymbol{\theta} - \mathbf{h} \right)^\prime \left(\mathbf{L}\boldsymbol{\Sigma} \mathbf{L}^\prime \right)^{-1} \left(\mathbf{L}\boldsymbol{\theta} - \mathbf{h} \right)$ \end{itemize} \vspace{10mm} If $\mathbf{T}_n$ is the maximum likelihood estimator of $\boldsymbol{\theta}$, it's called a \emph{Wald test} (and $\widehat{\boldsymbol{\Sigma}}_n$ has a special form). \end{frame} % Need some examples. \section{Multinomial model} \begin{frame} \frametitle{The multinomial model} \begin{itemize} \item A good source of examples \item Also directly useful in applications \end{itemize} \end{frame} \begin{frame} \frametitle{Multinomial coefficient} \framesubtitle{For $c$ categories} From $n$ objects, number of ways to choose \begin{itemize} \item $n_1$ of type 1 \item $n_2$ of type 2 \\ ~~~~~~~$\vdots$ \item $n_c$ of type $c$ \end{itemize} \begin{displaymath} \binom{n}{n_1~\cdots~n_c} = \frac{n!}{n_1!~\cdots~n_c!} \end{displaymath} \end{frame} \begin{frame} \frametitle{Example of a multinomial coefficient} \framesubtitle{A counting problem} % Optional sub-title Of 30 graduating students who are seeking employment, how many ways are there for 15 to be employed in a job related to their field of study, 10 to be employed in a job unrelated to their field of study, and 5 unemployed? \begin{displaymath} \binom{30}{15~10~5} = 465,817,912,560 \end{displaymath} \end{frame} % Sage: factorial(30)/(factorial(5)*factorial(25)) %\subsection{Multinomial Distribution} %%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Multinomial Distribution} \framesubtitle{Denote by $M(n,\boldsymbol{\theta})$, where $\boldsymbol{\theta} = (\theta_1, \ldots, \theta_c)$} \begin{itemize} \item Statistical experiment with $c$ outcomes \item Repeated independently $n$ times \item $Pr(\mbox{Outcome }j) = \theta_j$, $j = 1, \ldots, c$ \item Number of times outcome $j$ occurs is $n_j$, $j = 1, \ldots, c$ \item An integer-valued \emph{multivariate} distribution \end{itemize} \begin{displaymath} P(n_1, \ldots, n_c) = \binom{n}{n_1~\cdots~n_c} \theta_1^{n_1} \cdots \theta_c^{n_c}, \end{displaymath} \vspace{5mm} where $0 \leq n_j \leq n$, $\displaystyle{\sum_{j=1}^c n_j=n}$, $0 < \theta_j < 1$, and $\displaystyle{\sum_{j=1}^c \theta_j=1}$. \end{frame} \begin{frame} \frametitle{Example} \framesubtitle{Recent college graduates looking for a job} \begin{itemize} \item Probability of job related to field of study = 0.60 \item Probability of job unrelated to field of study = 0.30 \item Probability of no job = 0.10 \end{itemize} Of 30 randomly chosen students, what is probability that 15 are employed in a job related to their field of study, 10 are employed in a job unrelated to their field of study, and 5 are unemployed? \begin{displaymath} \binom{30}{15~10~5} \, 0.60^{15} 0.30^{10} 0.10^5 = \frac{4933527642332542053801}{381469726562500000000000} \approx 0.0129 \end{displaymath} % Sage: factorial(30)/(factorial(15)*factorial(10)*factorial(5)) * ((6/10)^(15)*(3/10)^(10)*(1/10)^5) \end{frame} \begin{frame}[fragile] \frametitle{Calculating multinomial probabilities with R} \begin{displaymath} \binom{30}{15~10~5} \, 0.60^{15} 0.30^{10} 0.10^5 = \frac{4933527642332542053801}{381469726562500000000000} \approx 0.0129 \end{displaymath} \vspace{10mm} \begin{verbatim} > dmultinom(c(15,10,5), prob=c(.6, .3, .1)) [1] 0.01293295 \end{verbatim} \end{frame} \begin{frame} \frametitle{There are actually $c-1$ variables and $c-1$ parameters} \framesubtitle{In the multinomial with $c$ categories} \begin{eqnarray*} P(n_1, \ldots, n_{c-1}) &=& \frac{n!}{n_1!~\cdots~n_{c-1}!(n-\sum_{j=1}^{c-1}n_j)!} \\ & & ~~~~\times~ \theta_1^{n_1} \cdots \theta_{c-1}^{n_{c-1}}(1-\sum_{j=1}^{c-1}\theta_j)^{n-\sum_{j=1}^{c-1}n_j} \end{eqnarray*} \end{frame} \begin{frame} \frametitle{Marginal distributions} %\framesubtitle{} Recall \begin{displaymath} Pr\{X=x \} = \sum_y \sum_z Pr\{X=x,Y=y,Z=z \} \end{displaymath} and \begin{displaymath} Pr\{X=x,Z=z \} = \sum_y Pr\{X=x,Y=y,Z=z \} \end{displaymath} \end{frame} \begin{frame} \frametitle{Marginals of the multinomial are multinomial too} \framesubtitle{Add over $n_{c-1}$, which goes from zero to whatever is left over from the other counts.} % It would be good to shift this over to the left somehow. {\Tiny \begin{eqnarray*} & & \sum_{n_{c-1}=0}^{n-\sum_{j=1}^{c-2}n_j} \frac{n!}{n_1!~\ldots~n_{c-1}!(n-\sum_{j=1}^{c-1}n_j)!} \theta_1^{n_1} \cdots \theta_{c-1}^{n_{c-1}}(1-\sum_{j=1}^{c-1}\theta_j)^{n-\sum_{j=1}^{c-1}n_j} \times \frac{(n-\sum_{j=1}^{c-2}n_j)!}{(n-\sum_{j=1}^{c-2}n_j)!} \\ \\ \\ &=& \frac{n!}{n_1!~\ldots~n_{c-2}!(n-\sum_{j=1}^{c-2}n_j)!} \theta_1^{n_1} \cdots \theta_{c-2}^{n_{c-2}} \\ & & \times \sum_{n_{c-1}=0}^{n-\sum_{j=1}^{c-2}n_j} \frac{(n-\sum_{j=1}^{c-2}n_j)!}{n_{c-1}!(n-\sum_{j=1}^{c-2}n_j-n_{c-1})!} \theta_{c-1}^{n_{c-1}} (1-\sum_{j=1}^{c-2}\theta_j-\theta_{c-1})^{n-\sum_{j=1}^{c-2}n_j-n_{c-1}} \\ \\ \\ &=& \frac{n!}{n_1!~\ldots~n_{c-2}!(n-\sum_{j=1}^{c-2}n_j)!} \theta_1^{n_1} \cdots \theta_{c-2}^{n_{c-2}} (1-\sum_{j=1}^{c-2}\theta_j)^{n-\sum_{j=1}^{c-2}n_j}, \end{eqnarray*} } where the last equality follows from the Binomial Theorem. It's multinomial with $c-1$ categories. %This is too messy -- students are not responsible for it. \end{frame} \begin{frame} \frametitle{Observe} \framesubtitle{You are responsible for these \emph{implications} of the last slide.} \begin{itemize} \item Adding over $n_{c-1}$ throws it into the last (``leftover") category. \item Labels $1, \ldots, c$ are arbitrary, so this means you can combine any 2 categories and the result is still multinomial. \item $c$ is arbitrary, so you can keep doing it and combine any number of categories. \item When only two categories are left, the result is binomial \item $E(n_j)=n\theta_j$, $Var(n_j) = n\theta_j(1-\theta_j)$ \end{itemize} \end{frame} %\subsection{Estimation} %%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Hypothetical data file} \framesubtitle{Let $Y_{i,j}$ be indicators for category membership, $i=1, \ldots, n$ and $j=1, \ldots, c$} \begin{center} \begin{tabular}{|c|c||c|c|c|} \hline Case & Job & $Y_1$ & $Y_2$ & $Y_3$ \\ \hline \hline 1 & 1 & 1 & 0 & 0 \\ \hline 2 & 3 & 0 & 0 & 1 \\ \hline 3 & 2 & 0 & 1 & 0 \\ \hline 4 & 1 & 1 & 0 & 0 \\ \hline $\vdots$ & $\vdots$ & $\vdots$ & $\vdots$ & $\vdots$ \\ \hline $n$ & 2 & 0 & 1 & 0 \\ \hline \hline Total & & $\sum_{i=1}^ny_{i,1}$ & $\sum_{i=1}^ny_{i,2}$ & $\sum_{i=1}^ny_{i,3}$ \\ \hline \end{tabular} \end{center} Note that \begin{itemize} \item A real data file will almost never have the redundant variables $Y_1$, $Y_2$ and $Y_3$. %\item $\sum_{j=1}^cy_{i,j}=1$ \item $\sum_{i=1}^ny_{i,j}=n_j$ \end{itemize} \end{frame} \begin{frame} \frametitle{Lessons from the data file} %\framesubtitle{} \begin{itemize} \item Cases ($n$ of them) are independent $M(1,\boldsymbol{\theta})$, so $E(Y_{i,j}) = \theta_j$. \item Column totals $n_j$ count the number of times each category occurs: Joint distribution is $M(n,\boldsymbol{\theta})$ \item If you make a frequency table (frequency distribution) \begin{itemize} \item The $n_j$ counts are the cell frequencies! \item They are random variables, and now we know their joint distribution. \item Each individual (marginal) table frequency is $B(n,\theta_j)$. \item Expected value of cell frequency $j$ is $E(n_j) = n\theta_j$ \end{itemize} \item Tables of 2 and or more dimensions present no problems; form combination variables. \end{itemize} \end{frame} \begin{frame} \frametitle{Example of a frequency table} \framesubtitle{For the Jobs data} \begin{center} \begin{tabular}{lrr} \hline Job Category & Frequency & Percent \\ \hline Employed in field & 106 & 53 \\ Employed outside field & 74 & 37 \\ Unemployed & 20 & 10 \\ \hline Total & 200 & 100.0 \\ \hline \end{tabular} \end{center} \end{frame} \begin{frame} \frametitle{Estimation based on $E(Y_{i,j}) = \theta_j$} \begin{displaymath} E(\overline{\mathbf{Y}}) = E \left( \begin{array}{c} \frac{1}{n}\sum_{i=1}^nY_{i,1} \\ \\ \frac{1}{n}\sum_{i=1}^nY_{i,2} \\ \vdots \\ \frac{1}{n}\sum_{i=1}^nY_{i,c} \end{array} \right) = E \left( \begin{array}{c} \overline{Y}_1 \\ \\ \overline{Y}_2 \\ \vdots \\ \overline{Y}_c \end{array} \right) = \left( \begin{array}{c} \theta_1 \\ \\ \theta_2 \\ \vdots \\ \theta_c \end{array} \right) \end{displaymath} \vspace{5mm} The sample mean vector is \begin{itemize} \item A vector of sample proportions. \item Unbiased for $\boldsymbol{\theta}$. \item Consistent for $\boldsymbol{\theta}$ because by the Law of Large Numbers, \end{itemize} {\LARGE \begin{displaymath} \overline{\mathbf{Y}}_n \stackrel{a.s.}{\rightarrow} \boldsymbol{\theta} \end{displaymath} } \end{frame} \section{Central Limit Theorem} \begin{frame} \frametitle{Multivariate Central Limit Theorem} To avoid a singular covariance matrix, let \begin{displaymath} \boldsymbol{\theta} = \left( \begin{array}{c} \theta_1 \\ \vdots \\ \theta_{c-1} \end{array} \right) \mbox{~~~~and~~~~} \overline{\mathbf{Y}}_n = \left( \begin{array}{c} \overline{Y}_{1~~} \\ \vdots \\ \overline{Y}_{c-1} \end{array} \right) \end{displaymath} \vspace{5mm} Multivariate Central Limit Theorem says let $\mathbf{Y}_1, \ldots, \mathbf{Y}_n$ be a random sample from a distribution with expected value vector $\boldsymbol{\mu}$ and covariance matrix $\boldsymbol{\Sigma}$. Then {\LARGE \begin{displaymath} \sqrt{n}(\overline{\mathbf{Y}}_n-\boldsymbol{\mu}) \stackrel{d}{\rightarrow} \mathbf{Y} \sim N\left(\mathbf{0}, \boldsymbol{\Sigma} \right). \end{displaymath} } \end{frame} \begin{frame} \frametitle{Asymptotically speaking} \framesubtitle{That is, approximately for large $n$} $\overline{\mathbf{Y}}_n$ is normal with \begin{itemize} \item Mean $\boldsymbol{\mu}$ and \item Covariance matrix $\frac{1}{n}\boldsymbol{\Sigma}$. \end{itemize} \end{frame} \begin{frame} \frametitle{$\sqrt{n}(\overline{\mathbf{Y}}_n-\boldsymbol{\mu}) \stackrel{d}{\rightarrow} \mathbf{Y} \sim N\left(\mathbf{0}, \boldsymbol{\Sigma} \right)$} %\framesubtitle{} For multinomial data, \begin{itemize} \item Have $\boldsymbol{\mu} = \boldsymbol{\theta}$ \item Because $\mathbf{Y}_i \sim M(1,\boldsymbol{\theta})$, the diagonal elements of $\boldsymbol{\Sigma}$ are $Var(Y_{i,j}) = \theta_j(1-\theta_j)$. \item What about the off-diagonal elements? \end{itemize} \begin{eqnarray*} Cov(Y_{i,j},Y_{i,k}) & = & E(Y_{i,j}Y_{i,k}) - E(Y_{i,j}) E(Y_{i,k}) \\ & = & ~~~~~ 0 ~~~~~~~ - ~~~~\theta_j~~~~~~\theta_k \end{eqnarray*} \end{frame} \begin{frame} \frametitle{Covariance matrix for $c=4$ categories} %\framesubtitle{} {\LARGE \begin{displaymath} \boldsymbol{\Sigma}(\boldsymbol{\theta}) = \left( \begin{array}{ccc} \theta_1(1-\theta_1) & -\theta_1\theta_2 & -\theta_1\theta_3 \\ -\theta_1\theta_2 & \theta_2(1-\theta_2) & -\theta_2\theta_3 \\ -\theta_1\theta_3 & -\theta_2\theta_3 & \theta_3(1-\theta_3) \end{array} \right) \end{displaymath} } \begin{itemize} \item Last category is dropped because it's redundant. \item Consistent estimator is easy \item All you need is the frequency table \end{itemize} \end{frame} \section{Applications} \begin{frame} \frametitle{A testing problem} %\framesubtitle{} Administrators at a vocational college recognize that the percentage of students who are unemployed after graduation will vary depending upon economic conditions, but they claim that still, about twice as many students will be employed in a job related to their field of study, compared to those who get an unrelated job. To test this hypothesis, they select a random sample of 200 students from the most recent class, and observe 106 employed in a job related to their field of study, 74 employed in a job unrelated to their field of study, and 20 unemployed. Test the hypothesis using a large-sample chi-squared test, being guided by the usual 0.05 significance level. State your conclusions in symbols and words. \end{frame} \begin{frame} \frametitle{Some questions to help guide us through the problem} \begin{itemize} \item What is the model? \begin{displaymath} \mathbf{Y}_1, \ldots, \mathbf{Y}_n \stackrel{i.i.d.}{\sim} M(1,(\theta_1,\theta_2,\theta_3)) \end{displaymath} \item What is the null hypothesis, in symbols? \begin{displaymath} H_0: \theta_1=2\theta_2 \end{displaymath} \item Give the null hypothesis in matrix form as $H_0:\mathbf{L}\boldsymbol{\theta}=\mathbf{h}$ \begin{displaymath} (1, -2) \left( \begin{array}{c} \theta_1 \\ \theta_2 \end{array} \right) = (0) \end{displaymath} Last category is dropped because it's redundant. \end{itemize} \end{frame} \begin{frame} \frametitle{Test statistic} \framesubtitle{For the Wald-like test} Suppose \begin{itemize} \item $\sqrt{n}\left(\mathbf{T}_n - \boldsymbol{\theta} \right) \stackrel{d}{\rightarrow} \mathbf{T} \sim N\left(\mathbf{0},\boldsymbol{\Sigma} \right)$ and \item $\widehat{\boldsymbol{\Sigma}}_n \stackrel{p}{\rightarrow} \boldsymbol{\Sigma}$. \item $H_0:\mathbf{L}\boldsymbol{\theta}=\mathbf{h}$ is true, where $\mathbf{L}$ is $r \times p$ of full row rank. \end{itemize} Then \begin{displaymath} W_n = n \left( \mathbf{LT}_n - \mathbf{h} \right)^\prime \left(\mathbf{L}\widehat{\boldsymbol{\Sigma}}_n \mathbf{L}^\prime \right)^{-1} \left(\mathbf{LT}_n - \mathbf{h} \right) \stackrel{d}{\rightarrow} W \sim \chi^2(r). \end{displaymath} If $H_0$ is false, $W_n$ is asymptotically $\chi^2(r,\lambda)$, with \begin{displaymath} \lambda = n \left( \mathbf{L}\boldsymbol{\theta} - \mathbf{h} \right)^\prime \left(\mathbf{L}\boldsymbol{\Sigma} \mathbf{L}^\prime \right)^{-1} \left(\mathbf{L}\boldsymbol{\theta} - \mathbf{h} \right). \end{displaymath} \end{frame} \begin{frame}[fragile] \frametitle{The R function \texttt{WaldTest}} %\framesubtitle{} \begin{displaymath} W_n = n \left( \mathbf{LT}_n - \mathbf{h} \right)^\prime \left(\mathbf{L}\widehat{\boldsymbol{\Sigma}}_n \mathbf{L}^\prime \right)^{-1} \left(\mathbf{LT}_n - \mathbf{h} \right) \end{displaymath} {\footnotesize % This is the 2012 version of the WaldTest function. \begin{verbatim} WaldTest = function(L,thetahat,Vn,h=0) # H0: L theta = h # Note Vn is the asymptotic covariance matrix, so it's the # Consistent estimator divided by the MLE. For true Wald tests # based on numerical MLEs, just use the inverse of the Hessian. { WaldTest = numeric(3) names(WaldTest) = c("W","df","p-value") r = dim(L)[1] W = t(L%*%thetahat-h) %*% solve(L%*%Vn%*%t(L)) %*% (L%*%thetahat-h) W = as.numeric(W) pval = 1-pchisq(W,r) WaldTest[1] = W; WaldTest[2] = r; WaldTest[3] = pval WaldTest } # End function WaldTest \end{verbatim} } % End size \end{frame} \begin{frame}[fragile] \frametitle{Calculating the test statistic with R} %\framesubtitle{Frequencies were 106,74,20} {\footnotesize \begin{verbatim} > # Easy to modify for any multinomial problem. Omit one category. > freq = c(106,74,20); n = sum(freq) > ybar = (freq/n)[1:2] # Just the first 2 > p = length(ybar) > Sighat = matrix(nrow=p,ncol=p) # Empty matrix > for(i in 1:p) { for (j in 1:p) Sighat[i,j] = -ybar[i]*ybar[j] } > for(i in 1:p) Sighat[i,i] = ybar[i]*(1-ybar[i]) > Sighat [,1] [,2] [1,] 0.2491 -0.1961 [2,] -0.1961 0.2331 > > LL = rbind(c(1,-2)) > WaldTest(LL,ybar,Sighat/n) W df p-value 4.48649474 1.00000000 0.03416366 \end{verbatim} } % End size \end{frame} \begin{frame} \frametitle{Beer study} %\framesubtitle{} Under carefully controlled conditions, 120 beer drinkers each tasted 6 beers and indicated which one they liked best. Here are the numbers preferring each beer. \begin{center} \begin{tabular}{|l|c|c|c|c|c|c|} \hline & \multicolumn{6}{c|}{Preferred Beer} \\ \hline & 1 & 2 & 3 & 4 & 5 & 6 \\ \hline Frequency & 30 & 24 & 22 & 28 & 9 & 7 \\ \hline \end{tabular} \end{center} % x2 = rmultinom(1,size=120,prob=c(2,2,2,2,1,1)); x2 \begin{itemize} \item State a reasonable model. \end{itemize} \begin{displaymath} \mathbf{Y}_1, \ldots, \mathbf{Y}_n \stackrel{i.i.d.}{\sim} M(1,(\theta_1,\theta_2, \ldots, \theta_6)) \end{displaymath} \end{frame} \begin{frame} \frametitle{The first question} %\framesubtitle{} Is preference for the 6 beers is different in the population from which this sample was taken? \begin{itemize} \item State the null hypothesis.\begin{displaymath} \theta_1=\theta_2=\theta_3=\theta_4=\theta_5=\frac{1}{6} \end{displaymath} \item Give the null hypothesis in matrix form as $H_0:\mathbf{L}\boldsymbol{\theta}=\mathbf{h}$ \begin{displaymath} \left( \begin{array}{c c c c c} 1 & 0 & 0 & 0 & 0 \\ 0 & 1 & 0 & 0 & 0 \\ 0 & 0 & 1 & 0 & 0 \\ 0 & 0 & 0 & 1 & 0 \\ 0 & 0 & 0 & 0 & 1 \end{array} \right) \left( \begin{array}{c} \theta_1 \\ \theta_2 \\ \theta_3 \\ \theta_4 \\ \theta_5 \end{array} \right) = \left( \begin{array}{c} 1/6 \\ 1/6 \\ 1/6 \\ 1/6 \\ 1/6 \end{array} \right) \end{displaymath} \item What are the degrees of freedom of the test? \begin{displaymath} df=5 \end{displaymath} \end{itemize} \end{frame} \begin{frame}[fragile] \frametitle{Test for equal probabilities} %\framesubtitle{} {\footnotesize \begin{verbatim} > # Estimated covariance matrix for a multinomial: > # Omit one category. > freq = c(30,24,22,28,9,7); n = sum(freq) > ybar = (freq/n)[1:5] # Omit lat category > p = length(ybar) > Sighat = matrix(nrow=p,ncol=p) # Empty matrix > for(i in 1:p) { for (j in 1:p) Sighat[i,j] = -ybar[i]*ybar[j] } > for(i in 1:p) Sighat[i,i] = ybar[i]*(1-ybar[i]) > > # L matrix is the 5x5 identity for this hypothesis > alleq = numeric(5) + 1/6; alleq [1] 0.1666667 0.1666667 0.1666667 0.1666667 0.1666667 > test1 = WaldTest(L=diag(5),thetahat=ybar,Vn=Sighat/n,h=alleq); test1 W df p-value 4.405483e+01 5.000000e+00 2.257610e-08 > > round(test1,3) W df p-value 44.055 5.000 0.000 \end{verbatim} } % End size \end{frame} \begin{frame} \frametitle{Take a look} \framesubtitle{Where is the lack of fit coming from?} \begin{center} \begin{tabular}{|l|r|r|r|r|r|r|} \hline & \multicolumn{6}{c|}{Preferred Beer} \\ \hline & 1~ & 2~ & 3~ & 4~ & 5~ & 6~ \\ \hline \hline Frequency & 30 & 24 & 22 & 28 & 9 & 7 \\ \hline Expected & 20 & 20 & 20 & 20 & 20 & 20 \\ \hline Residual & 10 & 4 & 2 & 8 & -11 & -13 \\ \hline St. Residual & 2.11 & 0.91 & 0.47 & 1.73 & -3.81 & -5.06 \\ \hline \end{tabular} \end{center} To standardize the a residual, divide it by the estimated standard deviation. %# R code - Make this HW %# Examine residuals %expect = numeric(6) + n/6 %resid = freq-expect %p = freq/n %se = sqrt(n*p*(1-p)) %stresid = resid/se %round(rbind(freq,expect,resid,stresid),2) \end{frame} \begin{frame} \frametitle{A follow-up question} %\framesubtitle{} It seems that the first 4 beers are lagers and the last two are ales. The client says no one would expect preference for lagers and ales to be the same. So let's test whether preference for the 4 lagers is different, and at the same time, whether preference for the 2 ales is different. \begin{itemize} \item Why not just look at the standardized residuals, which are $Z$-tests? \item Give the null hypothesis in symbols. \end{itemize} \begin{displaymath} \begin{array}{l} \theta_1=\theta_2=\theta_3=\theta_4 \\ \\ \theta_5 = 1-\theta_1-\theta_2-\theta_3-\theta_4-\theta_5 \end{array} \end{displaymath} \end{frame} \begin{frame} \frametitle{Null hypothesis in matrix form} \framesubtitle{For $\theta_1=\theta_2=\theta_3=\theta_4$ and $\theta_1+\theta_2+\theta_3+\theta_4+2\theta_5 = 1$} \begin{displaymath} \left( \begin{array}{rrrrr} 1 & -1 & 0 & 0 & 0 \\ 0 & 1 & -1 & 0 & 0 \\ 0 & 0 & 1 & -1 & 0 \\ 1 & 1 & 1 & 1 & 2 \end{array} \right) \left( \begin{array}{c} \theta_1 \\ \theta_2 \\ \theta_3 \\ \theta_4 \\ \theta_5 \end{array} \right) = \left( \begin{array}{c} 0 \\ 0 \\ 0 \\ 1 \end{array} \right) \end{displaymath} \vspace{10mm} There are infinitely many right answers. \end{frame} \begin{frame}[fragile] \frametitle{Test for differences among ales and differences between lagers, simultaneously} %\framesubtitle{} \begin{verbatim} > # Test for diff among ales and diff between lagers > L2 = rbind( c(1,-1, 0, 0, 0), + c(0, 1,-1, 0, 0), + c(0, 0, 1,-1, 0), + c(1, 1, 1, 1, 2) ) > h2 = cbind(c(0,0,0,1)) > WaldTest(L2,ybar,Sighat/n,h2) W df p-value 1.8240899 4.0000000 0.7680721 \end{verbatim} % Likelihood ratio test p = 0.7735209 \end{frame} \begin{frame}[fragile] \frametitle{Test mean preference for ales vs. mean preference for lagers, just for completeness} The null hypothesis is \begin{eqnarray*} && \frac{1}{4} \left(\theta_1+\theta_2+\theta_3+\theta_4 \right) = \frac{1}{2} \left(\theta_5 + 1 - \sum_{j=1}^5 \theta_j \right) \\ & \Leftrightarrow & \theta_1+\theta_2+\theta_3+\theta_4 = \frac{2}{3} \end{eqnarray*} \begin{verbatim} > # Average ale vs. average lager > L3 = rbind(c(1,1,1,1,0)); h3=2/3 > WaldTest(L3,ybar,Sighat/n,h3) W df p-value 4.153846e+01 1.000000e+00 1.155747e-10 \end{verbatim} \end{frame} \begin{frame} \frametitle{State the conclusion in plain language} Consumers tend to prefer ales over lagers. Differences between ales and differences between lagers are small enough to be attributed to chance. \end{frame} % herehere Lift old between groups from last year. %%%%%%%%%%%%%%%%%%% Pick this up later \begin{frame} \frametitle{Another application: Mean index numbers} In a study of consumers' opinions of 5 popular TV programmes, 240 consumers who watch all the shows at least once a month completed a computerized interview. On one of the screens, they indicated how much they enjoyed each programme by mouse-clicking on a 10cm line. One end of the line was labelled ``Like very much," and the other end was labelled ``Dislike very much." So each respondent contributed 5 ratings, on a continuous scale from zero to ten. \vspace{5mm} The study was commissioned by the producers of one of the shows, which will be called ``Programme $E$." Ratings of Programmes $A$ through $D$ were expressed as percentages of the rating for Programme $E$, and these were described as ``Liking indexed to programme $E$. , \end{frame} \begin{frame} \frametitle{In statistical language} We have $X_{i,1}, \ldots X_{i,5}$ for $i = 1, \ldots, n$, and we calculate \begin{displaymath} Y_{i,j} = 100 \frac{X_{i,j}}{X_{i,5}} \end{displaymath} \begin{itemize} \item We want confidence intervals for the 4 mean index numbers, and tests of differences between means. \item Observations from the same respondent are definitely not independent. \item What is the distribution? \item What is a reasonable model? \end{itemize} \end{frame} \begin{frame} \frametitle{Model} %\framesubtitle{} Let $\mathbf{Y}_1, \ldots, \mathbf{Y}_n$ be a random sample from an unknown multivariate distribution $F$ with expected value $\boldsymbol{\mu}$ and covariance matrix $\boldsymbol{\Sigma}$. \vspace{10mm} One way to think about it is \begin{itemize} \item The parameter is the unknown distribution $F$. \item $\boldsymbol{\mu}$ and $\boldsymbol{\Sigma}$ are \emph{functions} of $F$. \item We're only interested in $\boldsymbol{\mu}$. \end{itemize} \end{frame} \begin{frame} \frametitle{We have the tools we need} %\framesubtitle{} \begin{itemize} \item $\sqrt{n}(\overline{\mathbf{Y}}_n-\boldsymbol{\mu}) \stackrel{d}{\rightarrow} \mathbf{Y} \sim N\left(\mathbf{0}, \boldsymbol{\Sigma} \right)$ and \item For $\widehat{\boldsymbol{\Sigma}}_n \stackrel{p}{\rightarrow} \boldsymbol{\Sigma}$, use the sample covariance matrix. \item $H_0:\mathbf{L}\boldsymbol{\mu}=\mathbf{h}$ \end{itemize} \begin{displaymath} W_n = n \left( \mathbf{L}\overline{\mathbf{Y}}_n - \mathbf{h} \right)^\prime \left(\mathbf{L}\widehat{\boldsymbol{\Sigma}}_n \mathbf{L}^\prime \right)^{-1} \left(\mathbf{L}\overline{\mathbf{Y}}_n - \mathbf{h} \right) \end{displaymath} \end{frame} \begin{frame}[fragile] \frametitle{Read the data} %\framesubtitle{} {\tiny \begin{verbatim} > Y = read.table("http://www.utstat.toronto.edu/~brunner/appliedf12/data/TVshows.data") \end{verbatim} } % End size \begin{verbatim} > Y[1:4,] A B C D 1 101.3 81.0 101.8 89.6 2 94.0 85.3 76.3 100.8 3 145.4 138.7 151.0 148.3 4 72.0 86.1 96.1 96.3 > n = dim(Y)[1]; n [1] 240 \end{verbatim} \end{frame} \begin{frame}[fragile] \frametitle{Confidence intervals} %\framesubtitle{} {\footnotesize \begin{verbatim} > ave = apply(Y,2,mean); ave A B C D 101.65958 98.50167 99.39958 103.94167 > v = apply(Y,2,var) # Sample variances with n-1 > stderr = sqrt(v/n) > me95 = 1.96*stderr > lower95 = ave-me95 > upper95 = ave+me95 > Z = (ave-100)/stderr > rbind(ave,marginerror95,lower95,upper95,Z) A B C D ave 101.659583 98.501667 99.3995833 103.941667 marginerror95 1.585652 1.876299 1.7463047 1.469928 lower95 100.073931 96.625368 97.6532786 102.471739 upper95 103.245236 100.377966 101.1458880 105.411594 Z 2.051385 -1.565173 -0.6738897 5.255814 \end{verbatim} } % End size \end{frame} \begin{frame}[fragile] \frametitle{What if we ``assume" normality?} %\framesubtitle{} {\footnotesize \begin{verbatim} > rbind(ave,lower95,upper95,Z) A B C D ave 101.659583 98.501667 99.3995833 103.941667 lower95 100.073931 96.625368 97.6532786 102.471739 upper95 103.245236 100.377966 101.1458880 105.411594 Z 2.051385 -1.565173 -0.6738897 5.255814 > attach(Y) # So A, B, C, D are available > t.test(A,mu=100) One Sample t-test data: A t = 2.0514, df = 239, p-value = 0.04132 alternative hypothesis: true mean is not equal to 100 95 percent confidence interval: 100.0659 103.2533 sample estimates: mean of x 101.6596 \end{verbatim} } % End size \end{frame} \begin{frame}[fragile] \frametitle{Test equality of means} %\framesubtitle{} {\footnotesize \begin{verbatim} > S = var(Y); S A B C D A 157.0779 110.77831 106.56220 109.6234 B 110.7783 219.93950 95.66686 100.3585 C 106.5622 95.66686 190.51937 106.2501 D 109.6234 100.35851 106.25006 134.9867 > cor(Y) A B C D A 1.0000000 0.5959991 0.6159934 0.7528355 B 0.5959991 1.0000000 0.4673480 0.5824479 C 0.6159934 0.4673480 1.0000000 0.6625431 D 0.7528355 0.5824479 0.6625431 1.0000000 > > L4 = rbind( c(1,-1, 0, 0), + c(0, 1,-1, 0), + c(0, 0, 1,-1) ) > WaldTest(L=L4,thetahat=ave,Vn=S/n) W df p-value 7.648689e+01 3.000000e+00 2.220446e-16 \end{verbatim} } % End size \end{frame} \section{Delta Method} \begin{frame} \frametitle{The Delta Method} If a sequence of random vectors converges to something nice in distribution, the Delta Method helps with the convergence of \emph{functions} of those random vectors. \end{frame} \begin{frame} \frametitle{The Jacobian} %\framesubtitle{} \begin{itemize} \item Univariate version of the Delta method says $\sqrt{n}\left( g(T_n)- g(\theta) \right) \stackrel{d}{\rightarrow} g^\prime(\theta) \, T $ \item In the multivariate version, the derivative is replaced by a matrix of partial derivatives. \item Say the function $g$ maps $\mathbb{R}^3$ into $\mathbb{R}^2$. Then \begin{displaymath} \mbox{\.{g}}(\boldsymbol{\theta}) = \left( \begin{array}{ccc} \frac{\partial }{\partial \theta_1}g_1(\boldsymbol{\theta}) & \frac{\partial }{\partial \theta_2}g_1(\boldsymbol{\theta}) & \frac{\partial }{\partial \theta_3}g_1(\boldsymbol{\theta}) \\ \\ \frac{\partial }{\partial \theta_1}g_2(\boldsymbol{\theta}) & \frac{\partial }{\partial \theta_2}g_2(\boldsymbol{\theta}) & \frac{\partial }{\partial \theta_3}g_2(\boldsymbol{\theta}) \end{array} \right) \end{displaymath} where $\boldsymbol{\theta}=(\theta_1,\theta_2,\theta_3)^\prime$. \item It's a Jacobian. \end{itemize} \end{frame} \begin{frame} \frametitle{The Multivariate Delta Method} Let $g: \mathbb{R}^d \rightarrow \mathbb{R}^k$ be such that the elements of \.{g}$(\mathbf{x}) = \left[ \frac{\partial g_i}{\partial x_j} \right]_{k \times d}$ are continuous in a neighborhood of $\boldsymbol{\theta} \in \mathbb{R}^d$. \begin{itemize} \item If $\mathbf{T}_n$ is a sequence of $d$-dimensional random vectors such that $\sqrt{n}(\mathbf{T}_n-\boldsymbol{\theta}) \stackrel{d}{\rightarrow} \mathbf{T}$, then $\sqrt{n}(g(\mathbf{T}_n)-g(\boldsymbol{\theta})) \stackrel{d}{\rightarrow} \mbox{\.{g}} (\boldsymbol{\theta}) \mathbf{T}$. \item In particular, if $\sqrt{n}(\mathbf{T}_n-\boldsymbol{\theta}) \stackrel{d}{\rightarrow} \mathbf{T} \sim N(\mathbf{0},\mathbf{\Sigma})$, then $\sqrt{n}(g(\mathbf{T}_n)-g(\boldsymbol{\theta})) \stackrel{d}{\rightarrow} \mathbf{Y} \sim N(\mathbf{0}, \mbox{\.{g}}(\boldsymbol{\theta})\mathbf{\Sigma}\mbox{\.{g}}(\boldsymbol{\theta}) ^\prime)$. \end{itemize} \end{frame} \begin{frame}[fragile] \frametitle{Example: The Japanese car study} In a study commissioned by the Honda motor company\footnote{Not really. All the numbers are made up, based loosely on figures I found on the Internet.}, a random sample of 1200 Canadians who purchased a new Japanese car within the past 12 months gave the make of the most recent one they bought. Company executives like to look at percent differences, compared to choice of their own brand. {\scriptsize \begin{verbatim} > car = c(316, 414, 138, 254, 28, 50) > names(car) = c("Honda", "Toyota", "Nissan", "Mazda", "Mitsubishi", "Suburu") > perdiff = round(100 * (car-car[1])/car[1],1) > car Honda Toyota Nissan Mazda Mitsubishi Suburu 316 414 138 254 28 50 > perdiff Honda Toyota Nissan Mazda Mitsubishi Suburu 0.0 31.0 -56.3 -19.6 -91.1 -84.2 \end{verbatim} } % End size \end{frame} \begin{frame} \frametitle{Multinomial model with 6 categories} %\framesubtitle{} \begin{itemize} % \item It's a multinomial model with 6 categories. \item Make Honda the ``other" category, for symmetry. \item Parameters are $\boldsymbol{\theta} = (\theta_1, \ldots, \theta_5)^\prime$. \item For compactness, let $\theta_0=1-\sum_{k=1}^5 \theta_k$, the probability of buying a Honda. \item Estimate $\boldsymbol{\theta}$ with the vector of sample proportions $\overline{\mathbf{X}} = (\overline{X}_1, \ldots, \overline{X}_5)^\prime$. \item The \emph{functions} of $\boldsymbol{\theta}$ that interest us are the population percent differences: \begin{displaymath} g_i(\boldsymbol{\theta}) = 100\left(\frac{\theta_i-\theta_0}{\theta_0}\right) = 100\left(\frac{\theta_i}{\theta_0}-1\right) \end{displaymath} for $i=1, \ldots, 5$. \end{itemize} \end{frame} \begin{frame} \frametitle{The function $g(\boldsymbol{\theta})$} \framesubtitle{Written a better way} \begin{displaymath} g(\boldsymbol{\theta}) = \left( \begin{array}{c} g_1(\boldsymbol{\theta}) \\ \\ g_2(\boldsymbol{\theta}) \\ \\ g_3(\boldsymbol{\theta}) \\ \\ g_4(\boldsymbol{\theta}) \\ \\ g_5(\boldsymbol{\theta}) \end{array} \right) = \left( \begin{array}{c} 100\left(\frac{\theta_1}{\theta_0}-1\right) \\ \\ 100\left(\frac{\theta_2}{\theta_0}-1\right) \\ \\ 100\left(\frac{\theta_3}{\theta_0}-1\right) \\ \\ 100\left(\frac{\theta_4}{\theta_0}-1\right) \\ \\ 100\left(\frac{\theta_5}{\theta_0}-1\right) \end{array} \right) \end{displaymath} \end{frame} \begin{frame} \frametitle{The Jacobian \.{g}$(\boldsymbol{\theta})$ is a $5 \times 5$ matrix of partial derivatives.} %\framesubtitle{} That's only because $g: \mathbb{R}^5 \rightarrow \mathbb{R}^5$. Usually \.{g}$(\boldsymbol{\theta})$ has more columns than rows. \vspace{10mm} \vspace{10mm} Using $\theta_0=1-\sum_{k=1}^5 \theta_k$ and noting $\frac{\partial \theta_0}{\partial \theta_j} = -1$, \begin{displaymath} \frac{\partial g_i}{\partial \theta_j} = \frac{\partial}{\partial \theta_j} 100\left(\frac{\theta_i}{\theta_0}-1\right) = \left\{ \begin{array}{ll} 100 \frac{\theta_i+\theta_0}{\theta_0^2} & \mbox{ if } i = j \\ &\\ 100 \frac{\theta_i}{\theta_0^2} & \mbox{ if } i \neq j \end{array} \right. % Need that crazy invisible right period. \end{displaymath} \end{frame} \begin{frame} \frametitle{The Jacobian} \begin{displaymath} \mbox{\.{g}}(\boldsymbol{\theta}) = \frac{100}{\theta_0^2} \left( \begin{array}{ccccc} \theta_1+\theta_0 & \theta_1 & \theta_1 & \theta_1 & \theta_1 \\ \theta_2 & \theta_2+\theta_0 & \theta_2 & \theta_2 & \theta_2 \\ \theta_3 & \theta_3 & \theta_3+\theta_0 & \theta_3 & \theta_3 \\ \theta_4 & \theta_4 & \theta_4 & \theta_4+\theta_0 & \theta_4 \\ \theta_5 & \theta_5 & \theta_5 & \theta_5 & \theta_5+\theta_0 \end{array} \right) \end{displaymath} \end{frame} \begin{frame} \frametitle{The asymptotic covariance matrix} %\framesubtitle{Of the vector of percent differences} The Delta Method says \begin{displaymath} \sqrt{n}(g(\overline{\mathbf{X}}_n)-g(\boldsymbol{\theta})) \stackrel{d}{\rightarrow} \mathbf{Y} \sim N(\mathbf{0}, \mbox{\.{g}}(\boldsymbol{\theta})\mathbf{\Sigma}(\boldsymbol{\theta})\mbox{\.{g}}(\boldsymbol{\theta}) ^\prime) \end{displaymath} So the asymptotic covariance matrix of $g(\overline{\mathbf{X}}_n)$ (the percent differences from Honda) is $\frac{1}{n}$\.g$(\boldsymbol{\theta})\mathbf{\Sigma}(\boldsymbol{\theta})\mbox{\.{g}}(\boldsymbol{\theta}) ^\prime$, where {\tiny \begin{displaymath} \mbox{\.{g}}(\boldsymbol{\theta}) = \frac{100}{\theta_0^2} \left( \begin{array}{ccccc} \theta_1+\theta_0 & \theta_1 & \theta_1 & \theta_1 & \theta_1 \\ \theta_2 & \theta_2+\theta_0 & \theta_2 & \theta_2 & \theta_2 \\ \theta_3 & \theta_3 & \theta_3+\theta_0 & \theta_3 & \theta_3 \\ \theta_4 & \theta_4 & \theta_4 & \theta_4+\theta_0 & \theta_4 \\ \theta_5 & \theta_5 & \theta_5 & \theta_5 & \theta_5+\theta_0 \end{array} \right) \end{displaymath} } % End size and {\tiny \begin{displaymath} \boldsymbol{\Sigma}(\boldsymbol{\theta}) = \left( \begin{array}{ccccc} \theta_1(1-\theta_1) & -\theta_1\theta_2 & -\theta_1\theta_3 & -\theta_1\theta_4 & -\theta_1\theta_5 \\ -\theta_1\theta_2 & \theta_2(1-\theta_2) & -\theta_2\theta_3 & -\theta_2\theta_4 & -\theta_2\theta_5 \\ -\theta_1\theta_3 & -\theta_2\theta_3 & \theta_3(1-\theta_3) & -\theta_3\theta_4 & -\theta_3\theta_5 \\ -\theta_1\theta_4 & -\theta_2\theta_4 & -\theta_3\theta_4 & \theta_4(1-\theta_4) & -\theta_4\theta_5 \\ -\theta_1\theta_5 & -\theta_2\theta_5 & -\theta_3\theta_5 & -\theta_4\theta_5 & \theta_5(1-\theta_5) \\ \end{array} \right) \end{displaymath} } % End size Estimate $\theta_j$ with $\overline{X}_j$. \end{frame} \begin{frame}[fragile] \frametitle{Calculate $\boldsymbol{\Sigma}(\widehat{\boldsymbol{\theta}})$} {\color{blue} {\footnotesize \begin{verbatim} > # Calculate estimated Sigma(theta). Did this kind of thing earlier. > p = length(xbar) > Sighat = matrix(nrow=p,ncol=p) # Empty matrix > for(i in 1:p) { for (j in 1:p) Sighat[i,j] = -xbar[i]*xbar[j] } > for(i in 1:p) Sighat[i,i] = xbar[i]*(1-xbar[i]) > Sighat \end{verbatim} } % End size } % End color {\footnotesize \begin{verbatim} [,1] [,2] [,3] [,4] [,5] [1,] 0.225975 -0.039675000 -0.073025000 -0.0080500000 -0.0143750000 [2,] -0.039675 0.101775000 -0.024341667 -0.0026833333 -0.0047916667 [3,] -0.073025 -0.024341667 0.166863889 -0.0049388889 -0.0088194444 [4,] -0.008050 -0.002683333 -0.004938889 0.0227888889 -0.0009722222 [5,] -0.014375 -0.004791667 -0.008819444 -0.0009722222 0.0399305556 \end{verbatim} } % End size \end{frame} \begin{frame}[fragile] \frametitle{Calculate \.g$(\widehat{\boldsymbol{\theta}})$} {\color{blue} {\footnotesize \begin{verbatim} > # Calculate estimated gdot > xbar0 = car[1]/n # Proportion buying Honda > gdot = matrix(nrow=p,ncol=p) # Empty matrix > for(i in 1:p) gdot[i,] = numeric(p)+xbar[i] # Replace each row > gdot = gdot + diag(numeric(p)+xbar0) > gdot = 100/xbar0^2 * gdot > gdot \end{verbatim} } % End size } % End color {\footnotesize \begin{verbatim} [,1] [,2] [,3] [,4] [,5] [1,] 877.26326 497.51642 497.51642 497.51642 497.51642 [2,] 165.83881 545.58564 165.83881 165.83881 165.83881 [3,] 305.23954 305.23954 684.98638 305.23954 305.23954 [4,] 33.64845 33.64845 33.64845 413.39529 33.64845 [5,] 60.08652 60.08652 60.08652 60.08652 439.83336 \end{verbatim} } % End size \end{frame} \begin{frame}[fragile] \frametitle{Asymptotic covariance matrix of percent differences} {\color{blue} {\footnotesize \begin{verbatim} > # The approximate asymptotic covariance matrix of > # percent differences will be called V > > V = (1/n) * gdot %*% Sighat %*% t(gdot) > carnames = c("Toyota", "Nissan", "Mazda", "Mitsubishi", "Suburu") > rownames(V) = carnames; colnames(V) = carnames > V \end{verbatim} } % End size } % End color {\footnotesize \begin{verbatim} Toyota Nissan Mazda Mitsubishi Suburu Toyota 95.777160 18.105819 33.325203 3.6736445 6.5600794 Nissan 18.105819 19.855174 11.108401 1.2245482 2.1866931 Mazda 33.325203 11.108401 45.882527 2.2538785 4.0247830 Mitsubishi 3.673644 1.224548 2.253878 3.0524969 0.4436769 Suburu 6.560079 2.186693 4.024783 0.4436769 5.7994905 \end{verbatim} } % End size \end{frame} \begin{frame}[fragile] \frametitle{Confidence intervals} {\color{blue} {\footnotesize \begin{verbatim} > StdError = sqrt(diag(V)) > MarginError95 = 1.96*StdError > PercentDiff = perdif[2:6] > Lower95 = PercentDiff - MarginError95 > Upper95 = PercentDiff + MarginError95 > > # Report > rbind(car,Percent); rbind(PercentDiff,MarginError95,Lower95,Upper95) \end{verbatim} } % End size } % End color {\footnotesize \begin{verbatim} Honda Toyota Nissan Mazda Mitsubishi Suburu car 316.00 414.0 138.0 254.00 28.00 50.00 Percent 26.33 34.5 11.5 21.17 2.33 4.17 Toyota Nissan Mazda Mitsubishi Suburu PercentDiff 31.00000 -56.300000 -19.600000 -91.100000 -84.200000 MarginError95 19.18170 8.733592 13.276382 3.424394 4.720098 Lower95 11.81830 -65.033592 -32.876382 -94.524394 -88.920098 Upper95 50.18170 -47.566408 -6.323618 -87.675606 -79.479902 \end{verbatim} } % End size \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Copyright Information} This slide show was prepared by \href{http://www.utstat.toronto.edu/~brunner}{Jerry Brunner}, Department of Statistics, University of Toronto. It is licensed under a \href{http://creativecommons.org/licenses/by-sa/3.0/deed.en_US} {Creative Commons Attribution - ShareAlike 3.0 Unported License}. Use any part of it as you like and share the result freely. The \LaTeX~source code is available from the course website: \href{http://www.utstat.toronto.edu/~brunner/oldclass/appliedf12} {\small\texttt{http://www.utstat.toronto.edu/$^\sim$brunner/oldclass/appliedf12}} \end{frame} \end{document} Suppose \begin{itemize} \item $\sqrt{n}\left(\mathbf{T}_n - \boldsymbol{\theta} \right) \stackrel{d}{\rightarrow} \mathbf{T} \sim N\left(\mathbf{0},\boldsymbol{\Sigma} \right)$ and \item $\widehat{\boldsymbol{\Sigma}}_n \stackrel{p}{\rightarrow} \boldsymbol{\Sigma}$. \end{itemize} \sin^{-1}\left(\sqrt{\overline{X}_m}\right) \sin^{-1}\left(\sqrt{\theta}\right) \begin{displaymath} \sqrt{m}\left( g(\overline{X}_m)- g(\theta) \right) \stackrel{d}{\rightarrow} Y \sim N\left(0,(g^\prime(\theta))^2\theta(1-\theta)\right). \end{displaymath} } CLT says $\sqrt{n}(\overline{X}_n-\lambda) \stackrel{d}{\rightarrow} T \sim N(0,\lambda)$. $\sqrt{n}\left( g(\overline{X}_n)- g(\lambda) \right) = \sqrt{n}\left( 2\sqrt{\overline{X}_n}- 2\sqrt{\lambda} \right) \stackrel{d}{\rightarrow} Z \sim N(0,1)$ \Pr\{-z < 2\sqrt{n}\left(\sqrt{\overline{X}_n}-\sqrt{\lambda}\right) < z \} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{} %\framesubtitle{} \begin{itemize} \item \item \item \end{itemize} \end{frame} {\LARGE \begin{displaymath} \end{displaymath} } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% {\footnotesize $E(\mathbf{X})= \left[ \begin{array}{c} 1 \\ 0 \\ 6 \end{array} \right]$, $V(\mathbf{X})=\left[ \begin{array}{c c c} 2 & 1 & 0 \\ 1 & 4 & 0 \\ 0 & 0 & 2 \end{array} \right]$ } \vspace{10mm} \begin{frame} \frametitle{$\overline{X}$ and $S^2$ independent } %\framesubtitle{} \begin{displaymath} \mathbf{X} = \left( \begin{array}{c} X_1 \\ \vdots \\ X_n \end{array} \right) \sim \end{displaymath} \end{frame} {\tiny \begin{center} \begin{tabular}{|c|c|c|} \hline $Y_1$ & $Y_2$ & $Y_3$ \\ \hline \hline 1 & 0 & 0 \\ \hline 0 & 0 & 1 \\ \hline 0 & 1 & 0 \\ \hline 1 & 0 & 0 \\ \hline $\vdots$ & $\vdots$ & $\vdots$ \\ \hline 0 & 1 & 0 \\ \hline \hline $\sum_{i=1}^ny_{i,1}$ & $\sum_{i=1}^ny_{i,2}$ & $\sum_{i=1}^ny_{i,3}$ \\ \hline \end{tabular} \end{center} } % End tiny \item Multivariate Central Limit Theorem says that $\overline{\textbf{Y}}_n$ is approximately multivariate normal for large $n$. \begin{frame} \frametitle{Univariate Central Limit Theorem} \begin{itemize} \item Because $n_j \sim B(n,\theta_j)$, $\frac{n_j}{n} = \overline{Y}_j$ is approximately $N\left(\theta_j,\frac{\theta_j(1-\theta_j)}{n}\right)$. \item Approximate $\theta_j$ with $\overline{Y}_j$ in the variance if necessary. \item Can be used in confidence intervals and tests about a single parameter. \end{itemize} \end{frame} \begin{frame} \frametitle{Confidence interval for a single parameter $\theta_j$} \framesubtitle{95\% confidence interval for true proportion unemployed} \begin{center} {\tiny \begin{tabular}{lrr} \hline Job Category & Frequency & Percent \\ \hline Employed in field & 106 & 53 \\ Employed outside field & 74 & 37 \\ Unemployed & 20 & 10 \\ \hline Total & 200 & 100.0 \\ \hline \end{tabular} } \end{center} \begin{displaymath} \overline{Y}_3 \stackrel{approx}{\sim} N\left(\theta_3,\frac{\theta_3(1-\theta_3)}{n}\right) \end{displaymath} So a confidence interval for $\theta_3$ is \begin{eqnarray*} \overline{Y}_3 \pm 1.96\sqrt{\frac{\overline{Y}_3(1-\overline{Y}_3)}{n}} & = & 0.10 \pm 1.96\sqrt{\frac{0.10(1-0.10)}{200}} \\ & = & 0.10 \pm 0.042 \\ & = & (0.058,0.242) \end{eqnarray*} % me = 1.96*sqrt(.1*.9/200); me % .1-me; .1+me \end{frame}