\documentclass[11pt]{article} %\usepackage{amsbsy} % for \boldsymbol and \pmb \usepackage{graphicx} % To include pdf files! \usepackage{amsmath} \usepackage{amsbsy} \usepackage{amsfonts} % for \mathbb{R} The set of reals \usepackage[colorlinks=true, pdfstartview=FitV, linkcolor=blue, citecolor=blue, urlcolor=blue]{hyperref} % For links \usepackage{fullpage} %\pagestyle{empty} % No page numbers \begin{document} %\enlargethispage*{1000 pt} \begin{center} {\Large \textbf{STA 302f16 Assignment Six}}\footnote{Copyright information is at the end of the last page.} \vspace{1 mm} \end{center} \noindent These problems are preparation for the quiz in tutorial on Thursday October 27th, and are not to be handed in. \begin{enumerate} \item Let the continuous random vectors $\mathbf{y_1}$ and $\mathbf{y_2}$ be independent. Show that their joint moment-generating function is the product of their moment-generating functions. Since $\mathbf{y_1}$ and $\mathbf{y_2}$ are continuous, you will integrate. It is okay to represent a multiple integral with a single integral sign. Start with the partitioned random vector \begin{displaymath} \mathbf{y} = \left( \begin{array}{c} \mathbf{y_1} \\ \hline \mathbf{y_2} \end{array} \right) \mbox{ and corresponding } \mathbf{t} = \left( \begin{array}{c} \mathbf{t_1} \\ \hline \mathbf{t_2} \end{array} \right) \end{displaymath} \item Recall that $\mathbf{e} \sim N(\mathbf{0},\sigma^2(I-H))$. What is the distribution of $\mathbf{w} = \mathbf{X}^\prime \, \mathbf{e}$? \begin{enumerate} \item Answer the question. \item Show the calculation of expected value and variance-covariance matrix. \item Is this a surprise? Answer Yes or No. \item What is the probability that $\mathbf{w}=\mathbf{0}$? The answer is a single number. \end{enumerate} \item In the multiple linear regression model, let the columns of the $X$ matrix be linearly independent, so that the columns of $X^\prime X$ are linearly independent as well (no need for a proof because you did this in an earlier assignment). Either (a) show that $(X^\prime X)^{-1/2}$ is symmetric, or (b) show by a simple numerical example that $(X^\prime X)^{-1/2}$ may not be symmetric. \item Assume the general linear regression model with normal errors. Label each of the following statements True (meaning always true) or False (meaning not always true). You should be able to justify your answers. It may help to know that four of the statements are true. \begin{enumerate} \item $\widehat{\mathbf{y}} = \mathbf{X} \boldsymbol{\beta} + \boldsymbol{\epsilon}$ \item $\mathbf{y} = \mathbf{X} \mathbf{b} + \mathbf{e}$. \item $\widehat{\mathbf{y}} = \mathbf{X} \mathbf{b} + \mathbf{e}$ \item $\mathbf{y} = \mathbf{X} \boldsymbol{\beta}$ \item $\mathbf{X}^\prime\boldsymbol{\epsilon} = \mathbf{0}$ \item $(\mathbf{y}-\mathbf{X}\boldsymbol{\beta})^\prime (\mathbf{y}-\mathbf{X}\boldsymbol{\beta}) = \boldsymbol{\epsilon}^\prime\boldsymbol{\epsilon}$. \item $\mathbf{e}^\prime \, \mathbf{e} = \mathbf{0}$ \item $\mathbf{e}^\prime \, \mathbf{e} = \mathbf{y}^\prime \, \mathbf{e}$. \item $w = \frac{\boldsymbol{\epsilon}^\prime\boldsymbol{\epsilon}}{\sigma^2}$ has a chi-squared distribution. \item $E(\boldsymbol{\epsilon}^\prime\boldsymbol{\epsilon})=0$ \item $E(\mathbf{e}^\prime \, \mathbf{e})=0$ \end{enumerate} \pagebreak %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \item This is a repeat from last week. Just write down the answers. \begin{enumerate} \item Let $x\sim N(\mu,\sigma^2)$ and $y=ax+b$, where $a$ and $b$ are constants. What is the distribution of $y$? \item Let $x\sim N(\mu,\sigma^2)$ and $z = \frac{x-\mu}{\sigma}$. What is the distribution of $z$? \item Let $x_1, \ldots, x_n$ be a random sample from a $N(\mu,\sigma^2)$ distribution. What is the distribution of $y = \sum_{i=1}^nx_i$? \item Let $x_1, \ldots, x_n$ be a random sample from a $N(\mu,\sigma^2)$ distribution. What is the distribution of the sample mean $\overline{x}$? \item Let $x_1, \ldots, x_n$ be a random sample from a $N(\mu,\sigma^2)$ distribution. What is the distribution of $z = \frac{\sqrt{n}(\overline{x}-\mu)}{\sigma}$? \item Let $x_1, \ldots, x_n$ be independent random variables, with $x_i \sim N(\mu_i,\sigma_i^2)$. Let $a_1, \ldots, a_n$ be constants. What is the distribution of $y = \sum_{i=1}^n a_ix_i$? \item Let $x_1, \ldots, x_n$ be independent random variables with $x_i \sim \chi^2(\nu_i)$ for $i=1, \ldots, n$. What is the distribution of $y = \sum_{i=1}^n x_i$? \item Let $z \sim N(0,1)$. What is the distribution of $y=z^2$? \item Let $x_1, \ldots, x_n$ be random sample from a $N(\mu,\sigma^2)$ distribution. What is the distribution of $y = \frac{1}{\sigma^2} \sum_{i=1}^n\left(x_i-\mu \right)^2$? \item Let $y=x_1+x_2$, where $x_1$ and $x_2$ are independent, $x_1\sim\chi^2(\nu_1)$ and $y\sim\chi^2(\nu_1+\nu_2)$, where $\nu_1$ and $\nu_2$ are both positive. What is the distribution of $x_2$? \end{enumerate} \item Show that if $\mathbf{w} \sim N_p(\boldsymbol{\mu},\Sigma)$, with $\Sigma$ positive definite, then $y = (\mathbf{w}-\boldsymbol{\mu})^\prime \Sigma^{-1}(\mathbf{w}-\boldsymbol{\mu})$ has a chi-squared distribution with $p$ degrees of freedom. \item \label{normalsample} Let $y_1, \ldots, y_n$ be a random sample from a $N(\mu,\sigma^2)$ distribution. The sample variance is $s^2 = \frac{\sum_{i=1}^n\left(y_i-\overline{y} \right)^2 }{n-1}$. \begin{enumerate} \item Show $Cov(\overline{y},(y_j-\overline{y}))=0$ for any $j=1, \ldots, n$. \item How do you know that $\overline{y}$ and $s^2$ are independent? \item Show that \begin{displaymath} \frac{(n-1)s^2}{\sigma^2} \sim \chi^2(n-1). \end{displaymath} Hint: $\sum_{i=1}^n\left(y_i-\mu \right)^2 = \sum_{i=1}^n\left(y_i-\overline{y} + \overline{y} - \mu \right)^2 = \ldots$ \end{enumerate} \item Recall the definition of the $t$ distribution. If $z \sim N(0,1)$, $w \sim \chi^2(\nu)$ and $z$ and $w$ are independent, then $t = \frac{z}{\sqrt{w/\nu}}$ is said to have a $t$ distribution with $\nu$ degrees of freedom, and we write $t \sim t(\nu)$. As in the last question, let $y_1, \ldots, y_n$ be random sample from a $N(\mu,\sigma^2)$ distribution. Show that $t = \frac{\sqrt{n}(\overline{y}-\mu)}{s} \sim t(n-1)$. \item For the general linear regression model with normal error terms, show that the \\ $n \times (k+1)$ matrix of covariances $C(\mathbf{e},\mathbf{b}) = \mathbf{0}$. Why does this show that $SSE = \mathbf{e}^\prime\mathbf{e}$ and $\mathbf{b}$ are independent? \item Calculate $C(\mathbf{e},\widehat{\mathbf{y}})$; show your work. Why should you have known this answer without doing the calculation, assuming normal error terms? Why does the assumption of normality matter? \item In an earlier Assignment, you proved that \begin{displaymath} (\mathbf{y}-X\boldsymbol{\beta})^\prime (\mathbf{y}-X\boldsymbol{\beta}) = \mathbf{e}^\prime \, \mathbf{e} + (\mathbf{b}-\boldsymbol{\beta})^\prime X^\prime X(\mathbf{b}-\boldsymbol{\beta}). \end{displaymath} Starting with this expression and assuming normality, show that $\mathbf{e}^\prime \, \mathbf{e}/\sigma^2 \sim \chi^2(n-k-1)$. Use the formula sheet. \item The $t$ distribution is defined as follows. Let $Z\sim N(0,1)$ and $W \sim \chi^2(\nu)$, with $Z$ and $W$ independent. Then $T = \frac{Z}{\sqrt{W/\nu}}$ is said to have a $t$ distribution with $\nu$ degrees of freedom, and we write $T \sim t(\nu)$. For the general fixed effects linear regression model, tests and confidence intervals for linear combinations of regression coefficients are very useful. Derive the appropriate $t$ distribution and some applications by following these steps. Let $\boldsymbol{\ell}$ be a $k+1 \times 1$ vector of constants. \begin{enumerate} \item What is the distribution of $\boldsymbol{\ell}^\prime \mathbf{b}$? Show a little work. Your answer includes both the expected value and the variance. \item Now standardize the difference (subtract off the mean and divide by the standard deviation) to obtain a standard normal. \item Divide by the square root of a well-chosen chi-squared random variable, divided by its degrees of freedom, and simplify. Call the result $t$. \item How do you know numerator and denominator are independent? \item Suppose you wanted to test $H_0: \boldsymbol{\ell}^\prime\boldsymbol{\beta} = c$. Write down a formula for the test statistic. A statistic is a function of the sample data that is \emph{not} a function of any unknown parameters. \item For a regression model with four independent variables, suppose you wanted to test $H_0: \beta_2=0$. Give the vector $\boldsymbol{\ell}$. \item For a regression model with four independent variables, suppose you wanted to test $H_0: \beta_1=\beta_2$. Give the vector $\boldsymbol{\ell}$. \item Letting $t_{\alpha/2}$ denote the point cutting off the top $\alpha/2$ of the $t$ distribution with $n-k-1$ degrees of freedom, derive the $(1-\alpha) \times 100\%$ confidence interval for $\boldsymbol{\ell}^\prime\boldsymbol{\beta}$. ``Derive" means show the High School algebra. \end{enumerate} \item For a multiple regression model with an intercept, let $SST = \sum_{i=1}^n(y_i-\overline{y})^2$, $SSE = \sum_{i=1}^n(y_i-\widehat{y}_i)^2$ and $SSR = \sum_{i=1}^n(\widehat{y}_i-\overline{y})^2$. Show $SST=SSR+SSE$. Hint: Add and subtract $\widehat{y}_i$. Switch to matrix notation partway through the calculation. \item For the general linear regression model with normal error terms, show that if the model has an intercept, $\mathbf{e}$ and $\overline{y}$ are independent. Here are some ingredients to start you out. For the model with intercept, \begin{enumerate} \item What does $X^\prime\mathbf{e} = \mathbf{0}$ tell you about $\sum_{i=1}^n e_i$? \item Therefore what do you know about $\sum_{i=1}^n y_i$ and $\sum_{i=1}^n \widehat{y}_i$? \item Show that the least squares plane must pass through the point $(\overline{x}_1, ,\ldots \overline{x}_k, \overline{y})$. Start with a scalar expression for $\widehat{y}_i$. \item Now show that $\mathbf{e}$ and $\overline{y}$ are independent. \end{enumerate} \item Continue assuming that the regression model has an intercept. Many statistical programs automatically provide an \emph{overall} test that says none of the independent variables makes any difference. If you can't reject that, you're in trouble. If $H_0: \beta_1 = \cdots = \beta_k = 0$ is true, \begin{enumerate} \item What is the distribution of $y_i$? \item What is the distribution of $\frac{SST}{\sigma^2}$? Just write down the answer. Check Problem~\ref{normalsample}. \end{enumerate} \item Still assuming $H_0: \beta_1 = \cdots = \beta_k = 0$ is true, what is the distribution of $SSR/\sigma^2$? Use the formula sheet and show your work. \item \label{Fstat} Recall the definition of the $F$ distribution. If $W_1 \sim \chi^2(\nu_1)$ and $W_2 \sim \chi^2(\nu_2)$ are independent, $F = \frac{W_1/\nu_1}{W_2/\nu_2} \sim F(\nu_1,\nu_2)$. Show that $F = \frac{SSR/k}{SSE/(n-k-1)}$ has an $F$ distribution under $H_0: \beta_1 = \cdots = \beta_k = 0$? Refer to the results of questions above as you use them. \item The null hypothesis $H_0: \beta_1 = \cdots = \beta_k = 0$ is less and less believable as $R^2$ becomes larger. Show that the $F$ statistic of Question~\ref{Fstat} is an increasing function of $R^2$ for fixed $n$ and $k$. This mean it makes sense to reject $H_0$ for large values of $F$. \end{enumerate} % \vspace{90mm} \noindent \begin{center}\begin{tabular}{l} \hspace{6in} \\ \hline \end{tabular}\end{center} This assignment was prepared by \href{http://www.utstat.toronto.edu/~brunner}{Jerry Brunner}, Department of Statistical Sciences, University of Toronto. It is licensed under a \href{http://creativecommons.org/licenses/by-sa/3.0/deed.en_US} {Creative Commons Attribution - ShareAlike 3.0 Unported License}. Use any part of it as you like and share the result freely. The \LaTeX~source code is available from the course website: \href{http://www.utstat.toronto.edu/~brunner/oldclass/302f16} {\small\texttt{http://www.utstat.toronto.edu/$^\sim$brunner/oldclass/302f16}} \end{document} % Next time