\documentclass[12pt]{article} %\usepackage{amsbsy} % for \boldsymbol and \pmb \usepackage{graphicx} % To include pdf files! \usepackage{amsmath} \usepackage{amsbsy} \usepackage{amsfonts} \usepackage[colorlinks=true, pdfstartview=FitV, linkcolor=blue, citecolor=blue, urlcolor=blue]{hyperref} % For links \usepackage{fullpage} %\pagestyle{empty} % No page numbers \begin{document} %\enlargethispage*{1000 pt} \begin{center} {\Large \textbf{STA 2101/442 Assignment Four}}\footnote{Copyright information is at the end of the last page.} \vspace{1 mm} \end{center} \noindent Except for Questions \ref{SATmean} and \ref{distraction}, these problems are practice for the quiz, and are not to be handed in. Please do the homework using the formula sheet, which is posted on the course home page. \textbf{Please bring the R printouts for Questions \ref{SATmean} and \ref{distraction} to the quiz.} \begin{enumerate} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% MGF %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \item The joint moment-generating function of a $p$-dimensional random vector $\mathbf{X}$ is defined as $M_{\mathbf{X}}(\mathbf{t}) = E\left(e^{\mathbf{t}^\top \mathbf{X}} \right)$. Find the moment-generating function of \begin{enumerate} \item Let $\mathbf{Y} = \mathbf{AX}$, where $\mathbf{A}$ is a matrix of constants. Find the moment-generating function of $\mathbf{Y}$. \item Let $\mathbf{Y} = \mathbf{X} + \mathbf{c}$, where $\mathbf{c}$ is a $p \times 1$ vector of constants. Find the moment-generating function of $\mathbf{Y}$. \end{enumerate} \item Let $Z_1, \ldots, Z_p \stackrel{i.i.d.}{\sim}N(0,1)$, and \begin{displaymath} \mathbf{Z} = \left( \begin{array}{c} Z_1 \\ \vdots \\ Z_p \end{array} \right). \end{displaymath} \begin{enumerate} \item What is the joint moment-generating function of $\mathbf{Z}$? Show some work. \item Let $\mathbf{Y} = \boldsymbol{\Sigma}^{1/2}\mathbf{Z} + \boldsymbol{\mu}$, where $\boldsymbol{\Sigma}$ is a $p \times p$ symmetric \emph{non-negative definite} matrix and $\boldsymbol{\mu} \in \mathbb{R}^p$. \begin{enumerate} \item What is $E(\mathbf{Y})$? \item What is the variance-covariance matrix of $\mathbf{Y}$? Show some work. \item What is the moment-generating function of $\mathbf{Y}$? Show your work. \end{enumerate} \end{enumerate} \item We say the $p$-dimensional random vector $\mathbf{Y}$ is multivariate normal with expected value $\boldsymbol{\mu}$ and variance-covariance matrix $\boldsymbol{\Sigma}$, and write $\mathbf{Y} \sim N_p(\boldsymbol{\mu}, \boldsymbol{\Sigma})$, when $\mathbf{Y}$ has moment-generating function $ M_{_\mathbf{Y}}(\mathbf{t}) = e^{\mathbf{t}^\top\boldsymbol{\mu} + \frac{1}{2} \mathbf{t}^\top\boldsymbol{\Sigma}\mathbf{t}}$. \begin{enumerate} \item Let $\mathbf{Y} \sim N_p(\boldsymbol{\mu}, \boldsymbol{\Sigma})$ and $\mathbf{W}=\mathbf{AY}$, where $\mathbf{A}$ is an $r \times p$ matrix of constants. What is the distribution of $\mathbf{W}$? Show your work. \item Let $\mathbf{Y} \sim N_p(\boldsymbol{\mu}, \boldsymbol{\Sigma})$ and $\mathbf{W}=\mathbf{Y}+\mathbf{c}$, where $\mathbf{A}$ is an $p \times 1$ vector of constants. What is the distribution of $\mathbf{W}$? Show your work. \end{enumerate} \item Let $\mathbf{Y} \sim N_2(\boldsymbol{\mu}, \boldsymbol{\Sigma})$, with \begin{displaymath} \mathbf{Y} = \left(\begin{array}{c} Y_1 \\ Y_2 \end{array}\right) ~~~~~ \boldsymbol{\mu} = \left(\begin{array}{c} \mu_1 \\ \mu_2 \end{array}\right) ~~~~~ \boldsymbol{\Sigma} = \left(\begin{array}{cc} \sigma^2_1 & 0 \\ 0 & \sigma^2_2 \end{array}\right) \end{displaymath} Using moment-generating functions, show $Y_1$ and $Y_2$ are independent. \item Let $\mathbf{X}= (X_1,X_2,X_3)^\prime$ be multivariate normal with \begin{displaymath} \boldsymbol{\mu} = \left[ \begin{array}{c} 1 \\ 0 \\ 6 \end{array} \right] \mbox{ and } \boldsymbol{\Sigma} = \left[ \begin{array}{c c c} 1 & 0 & 0 \\ 0 & 2 & 0 \\ 0 & 0 & 1 \end{array} \right] . \end{displaymath} Let $Y_1=X_1+X_2$ and $Y_2=X_2+X_3$. Find the joint distribution of $Y_1$ and $Y_2$. \item Let $X_1$ be Normal$(\mu_1, \sigma^2_1)$, and $X_2$ be Normal$(\mu_2, \sigma^2_2)$, independent of $X_1$. What is the joint distribution of $Y_1=X_1+X_2$ and $Y_2=X_1-X_2$? What is required for $Y_1$ and $Y_2$ to be independent? Hint: Use matrices. \item Let $\mathbf{Y}~=~\mathbf{X} \boldsymbol{\beta}~+~\boldsymbol{\epsilon}$, where $\mathbf{X}$ is an $n \times p$ matrix of known constants, $\boldsymbol{\beta}$ is a $p \times 1$ vector of unknown constants, and $\boldsymbol{\epsilon}$ is multivariate normal with mean zero and covariance matrix $\sigma^2 \mathbf{I}_n$, where $\sigma^2 > 0$ is a constant. In the following, it may be helpful to recall that $(\mathbf{A}^{-1})^\prime=(\mathbf{A}^\prime)^{-1}$. \begin{enumerate} \item What is the distribution of $\mathbf{Y}$? \item The maximum likelihood estimate (MLE) of $\boldsymbol{\beta}$ is $\hat{\boldsymbol{\beta}} = (\mathbf{X}^\prime \mathbf{X})^{-1} \mathbf{X}^\prime \mathbf{Y}$. What is the distribution of $\hat{\boldsymbol{\beta}}$? Show the calculations. \item Let $\widehat{\mathbf{Y}}=\mathbf{X}\hat{\boldsymbol{\beta}}$. What is the distribution of $\widehat{\mathbf{Y}}$? Show the calculations. \item Let the vector of residuals $\mathbf{e}= (\mathbf{Y}-\widehat{\mathbf{Y}})$. What is the distribution of $\mathbf{e}$? Show the calculations. Simplify both the expected value (which is zero) and the covariance matrix. \end{enumerate} \item Show that if $\mathbf{X} \sim N_p(\boldsymbol{\mu},\boldsymbol{\Sigma})$, with $\boldsymbol{\Sigma}$ positive definite, then $Y = (\mathbf{X}-\boldsymbol{\mu})^\prime \boldsymbol{\Sigma}^{-1}(\mathbf{X}-\boldsymbol{\mu})$ has a chi-square distribution with $p$ degrees of freedom. \item Let $X_1, \ldots, X_n$ be a random sample from a $N(\mu,\sigma^2)$ distribution. \begin{enumerate} \item Show $Cov(\overline{X},(X_j-\overline{X}))=0$ for $j=1, \ldots, n$. \item Show that $\overline{X}$ and $S^2$ are independent. \end{enumerate} \newpage %%%%%%%%%% Wald-like Test %%%%%%%%%% \item The statistic \begin{displaymath} W_n = n \left( \mathbf{LT}_n - \mathbf{h} \right)^\prime \left(\mathbf{L}\widehat{\boldsymbol{\Sigma}}_n \mathbf{L}^\prime \right)^{-1} \left(\mathbf{LT}_n - \mathbf{h} \right) \end{displaymath} provides a large-sample test of $H_0:\mathbf{L}\boldsymbol{\theta}=\mathbf{h}$. Let $X_1, \ldots, X_n$ be a random sample from a $B(1,\theta)$ distribution. \begin{enumerate} \item Write down and simplify the $W_n$ statistic for testing $H_0: \theta = \theta_0$ versus $H_1: \theta \neq \theta_0$. \item Your answer is related to a $Z$-test of this same null hypothesis. Write down the formula for the $Z$ statistic. \end{enumerate} \item \label{SATmean} In Assignment One, you tested difference between means for the Verbal SAT and the Math SAT score. A link to the SAT data is available from the course home page. Using R, please calculate the $W_n$ statistic to test this hypothesis. Feel free to use my code directly. Note that the statistic $\mathbf{T}_n$ is of dimension \emph{two}. Guided by the usual $\alpha=0.05$ significance level, what do you conclude? Be able to state your conclusion in plain, non-statistical language. Bring your R printout to the quiz. \item A team of botanists grew fungus in a nutrient solution in test tubes. Each day for seven days, one of their graduate students carefully measured the length of the fungus in each of $n$ tubes. The scientists were interested in lots of things, including whether average growth was linear or not. Denote the expected amount of fungus at day $j$ by $\mu_j$. \begin{enumerate} \item What is the null hypothesis, in symbols? \item Assuming that the scientists wish to make as few assumptions as possible and $n$ is large, the $W_n$ statistic is natural for this problem. What is $\mathbf{T}_n$? \item What is $\mathbf{L}$? \item What is $\mathbf{h}$? \item What is a convenient choice for $\widehat{\boldsymbol{\Sigma}}_n$? How many rows and columns? \end{enumerate} \newpage \item \label{distraction} In a study of the psychology of attention, subjects attempted to solve word problems while listening to distracting background noise. The distracting material was either music, or spoken words related to the problem they were trying to solve. The distracting material was presented at three different levels of loudness. Each subject attempted 10 problems at each combination of loudness and type of distraction, for a total of 60 problems. Order of presentation was randomized. Data for each subject are number correct in each of the six treatment combinations. The data are available in the file \texttt{distract.data}, available at \noindent \href{http://www.utstat.toronto.edu/~brunner/appliedf14/code_n_data/hw/distract.data} {\texttt{http://www.utstat.toronto.edu/$\sim$brunner/appliedf14/code\_n\_data/hw/distract.data}}. There is a link from the course website under \texttt{Data sets} in case the one in this document does not work. See \texttt{help(read.table)} if necessary. \begin{enumerate} \item Produce a table showing the sample mean for each of the six treatment conditions. \item Give a large-sample 95\% confidence interval for each treatment mean. \item Now test whether the six treatment means (expected values) are equal; as usual, $\alpha=0.05$. You may use my \href{http://www.utstat.utoronto.ca/~brunner/Rfunctions/Wtest.txt} {\texttt{Wtest}} function. There is a link from the course website under \texttt{Data sets} in case the one in this document does not work. Just to make sure we are doing things the same way, my test statistic value is $W_n = 757.293$. In plain, non-statistical language, what do you conclude? \item Now we will compare \emph{averages} of expected values. Those who have had a course in experimental design will recognize that we are testing differences between marginal means. Test the difference between the average expected test performance for Voice distraction and the average expected test performance for Music distraction. Be able to state a \emph{directional} conclusion in plain, non-statistical language, if a conclusion is justified by the test. \item Now just for Voice distraction, is there any effect of volume? Do the test and state a conclusion in plain language. Don't bother with follow-up tests yet; we'll do that next week. \item Now just for Music distraction, is there any effect of volume? Do the test and state a conclusion in plain language. Don't bother with follow-up tests yet; we'll do that next week. \end{enumerate} Please bring your R printout for this question to the quiz. \end{enumerate} % http://www.utstat.toronto.edu/~brunner/appliedf14/code_n_data/hw/distract.data \vspace{10mm} \noindent \begin{center}\begin{tabular}{l} \hspace{6in} \\ \hline \end{tabular}\end{center} This assignment was prepared by \href{http://www.utstat.toronto.edu/~brunner}{Jerry Brunner}, Department of Statistics, University of Toronto. It is licensed under a \href{http://creativecommons.org/licenses/by-sa/3.0/deed.en_US} {Creative Commons Attribution - ShareAlike 3.0 Unported License}. Use any part of it as you like and share the result freely. The \LaTeX~source code is available from the course website: \href{http://www.utstat.toronto.edu/~brunner/oldclass/appliedf14} {\texttt{http://www.utstat.toronto.edu/$\sim$brunner/oldclass/appliedf14}} \end{document}