\documentclass[12pt]{article} %\usepackage{amsbsy} % for \boldsymbol and \pmb \usepackage{graphicx} % To include pdf files! \usepackage{amsmath} \usepackage{amsbsy} \usepackage{amsfonts} % for \mathbb{R} The set of reals \usepackage[colorlinks=true, pdfstartview=FitV, linkcolor=blue, citecolor=blue, urlcolor=blue]{hyperref} % For links \usepackage{fullpage} %\pagestyle{empty} % No page numbers \begin{document} %\enlargethispage*{1000 pt} \begin{center} {\Large \textbf{STA 302f13 Assignment Five}}\footnote{Copyright information is at the end of the last page.} \vspace{1 mm} \end{center} \noindent These problems are preparation for the quiz in tutorial on Friday October 18th, and are not to be handed in. For reference, the general linear model in matrix form is $\mathbf{Y} = \mathbf{X}\boldsymbol{\beta} + \boldsymbol{\epsilon}$, where $\mathbf{X}$ is an $n \times (k+1)$ matrix of observable constants, the columns of $\mathbf{X}$ are linearly independent, $\boldsymbol{\beta}$ is a $(k+1) \times 1$ vector of unknown constants (parameters), and $\boldsymbol{\epsilon}$ is an $n \times 1$ vector of unobservable random variables with $E(\boldsymbol{\epsilon})=\mathbf{0}$ and $cov(\boldsymbol{\epsilon})=\sigma^2\mathbf{I}_n$, where $\sigma^2>0$ is an unknown constant parameter. The least squares estimator of $\boldsymbol{\beta}$ is $\widehat{\boldsymbol{\beta}} = (\mathbf{X}^\prime\mathbf{X})^{-1}\mathbf{X}^\prime\mathbf{Y}$. \begin{enumerate} \item \label{regthruorigin} Let $Y_i = \beta x_i + \epsilon_i$ for $i=1, \ldots, n$, where $\epsilon_1, \ldots, \epsilon_n$ are a random sample from a distribution with expected value zero and variance $\sigma^2$, and $\beta$ and $\sigma^2$ are unknown constants. The numbers $x_1, \ldots, x_n$ are known, observed constants. This is a special case of the general linear model, which is given above in matrix form. \begin{enumerate} \item What is $\mathbf{X^\prime X}$? \item What is $\mathbf{X^\prime Y}$? \item $(\mathbf{X^\prime X})^{-1}$? \item What is $\widehat{\boldsymbol{\beta}} = (\mathbf{X}^\prime\mathbf{X})^{-1} \mathbf{X}^\prime\mathbf{Y}$? \item You already know that $\widehat{\beta}$ is unbiased for $\beta$. It is also a \emph{linear} estimator of the form $\mathbf{c}^\prime \mathbf{Y}= \sum_{i=1}^n c_i Y_i$, so it is a \emph{linear unbiased estimator}. What is $c_i$? \end{enumerate} \item \label{GM} If we want to estimate $\mathbf{a}^\prime\boldsymbol{\beta}$ based on sample data, the Gauss-Markov Theorem tells us that the most natural choice is also (in a sense) the best choice. This question leads you through the proof of the Gauss-Markov Theorem. Your class notes should help. \begin{enumerate} \item What is the most natural choice for estimating $\mathbf{a}^\prime\boldsymbol{\beta}$? \item Show that it's unbiased. \item The natural estimator is a \emph{linear} unbiased estimator of the form $\mathbf{c}_0^\prime \mathbf{Y}$. What is the $n \times 1$ vector $\mathbf{c}_0$? \item Of course there are lots of other possible linear unbiased estimators of $\mathbf{a}^\prime\boldsymbol{\beta}$. They are all of the form $\mathbf{c}^\prime \mathbf{Y}$; the natural estimator $\mathbf{c}_0^\prime \mathbf{Y}$ is just one of these. The best one is the one with the smallest variance, because its distribution is the most concentrated around the right answer. What is $Var(\mathbf{c}^\prime \mathbf{Y})$? Show your work. \item We insist that $\mathbf{c}^\prime \mathbf{Y}$ be unbiased. Show that if $E(\mathbf{c}^\prime \mathbf{Y}) = \mathbf{a}^\prime\boldsymbol{\beta}$ for \emph{all} $\boldsymbol{\beta} \in \mathbb{R}^{k+1}$, we must have $\mathbf{X}^\prime\mathbf{c} = \mathbf{a}$. \item So, the task is to minimize $Var(\mathbf{c}^\prime \mathbf{Y})$ by minimizing $\mathbf{c}^\prime\mathbf{c}$ over all $\mathbf{c}$ subject to the constraint $\mathbf{X}^\prime\mathbf{c} = \mathbf{a}$. As preparation for this, show $(\mathbf{c}-\mathbf{c}_0)^\prime\mathbf{c}_0 = 0$. \item Using the result of the preceding question, show \begin{displaymath} \mathbf{c}^\prime\mathbf{c} = (\mathbf{c}-\mathbf{c}_0)^\prime(\mathbf{c}-\mathbf{c}_0) + \mathbf{c}_0^\prime\mathbf{c}_0. \end{displaymath} \item Since the formula for $\mathbf{c}_0$ has no $\mathbf{c}$ in it, what choice of $\mathbf{c}$ minimizes the preceding expression? How do you know that the minimum is unique? \end{enumerate} The conclusion is that $\mathbf{c}_0^\prime \mathbf{Y} = \mathbf{a}^\prime\widehat{\boldsymbol{\beta}}$ is the Best Linear Unbiased Estimator (BLUE) of $\mathbf{a}^\prime\boldsymbol{\beta}$. \item For the model of Question~\ref{regthruorigin}, let $\widehat{\beta}_2 = \frac{\overline{Y}_n}{\overline{x}_n}$. \begin{enumerate} \item Show that $\widehat{\beta}_2$ is unbiased for $\beta$. \item Which has the smaller variance, $\widehat{\beta}_2$ or your estimator from Question~\ref{regthruorigin}? How do you know? This is quick if you see it. \end{enumerate} \item For the model of Question~\ref{regthruorigin}, let $\widehat{\beta}_3 = \frac{1}{n}\sum_{i=1}^n \frac{Y_i}{x_i} $. \begin{enumerate} \item Show that $\widehat{\beta}_3$ is unbiased for $\beta$. \item Which has the smaller variance, $\widehat{\beta}_3$ or your estimator from Question~\ref{regthruorigin}? How do you know? Again, this is quick. \end{enumerate} \item \label{nox} The first parts of this question were in Assignment One. Let $Y_1, \ldots, Y_n$ be independent random variables with $E(Y_i)=\mu$ and $Var(Y_i)=\sigma^2$ for $i=1, \ldots, n$. \begin{enumerate} \item Write down $E(\overline{Y})$ and $Var(\overline{Y})$. \item Let $c_1, \ldots, c_n$ be constants and define the linear combination $L$ by $L = \sum_{i=1}^n c_i Y_i$. What condition on the $c_i$ values makes $L$ an unbiased estimator of $\mu$? \item Is $\overline{Y}$ a special case of $L$? If so, what are the $c_i$ values? \item What is $Var(L)$? \item Now show that $Var(\overline{Y}) < Var(L)$ for every unbiased $L \neq \overline{Y}$. Hint: Add and subtract $\frac{1}{n}$ as in Question~\ref{GM}. \end{enumerate} \item Another way to express the model of Question~\ref{nox} is to say $Y_i = \mu + \epsilon_i$ for $i=1, \ldots, n$, where $\epsilon_1, \ldots, \epsilon_n$ are independent random variables from a distribution with expected value zero and variance $\sigma^2$. This is a regression with \emph{no independent variables} (weird), and $\beta_0=\mu$. \begin{enumerate} \item What is the $\mathbf{X}$ matrix? \item What is $(\mathbf{X}^\prime\mathbf{X})^{-1} \mathbf{X}^\prime\mathbf{Y}$? \item Now how do you know $Var(\overline{Y}) < Var(L)$ in Question~\ref{nox}, without any calculations? \end{enumerate} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% MVN via MGF %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \pagebreak \item For this question, you may use the following, as well as standard properties of moment-generating functions. \begin{itemize} \item[] $Y \sim N(\mu,\sigma^2)$ means $M_Y(t) = e^{\mu t + \frac{1}{2}\sigma^2 t^2}$ % \hspace{5mm} % $Y \sim \chi^2(\nu)$ means $M_Y(t) = (1-2t)^{-\nu/2}$ \item[] $M_{\mathbf{Y}}(\mathbf{t}) = E(e^{\mathbf{t}^\prime\mathbf{Y}})$ \hspace{10mm} $M_{\mathbf{AY}}(\mathbf{t}) = M_{\mathbf{Y}}(\mathbf{A}^\prime\mathbf{t})$ \hspace{10mm} $M_{\mathbf{Y}+\mathbf{c}}(\mathbf{t}) = e^{\mathbf{t}^\prime\mathbf{c}} M_{\mathbf{Y}}(\mathbf{t})$ \end{itemize} \begin{enumerate} \item Let $Z_1, \ldots, Z_p$ be independent standard normal random variables, and $\mathbf{Z} = (Z_1, \ldots, Z_p)^\prime$. \begin{enumerate} \item What is $E(\mathbf{Z})$? \item What is $cov(\mathbf{Z})$? \item What is $M_{\mathbf{Z}}(\mathbf{t})$? \item Let $\boldsymbol{\Sigma}$ be a $p \times p$ symmetric non-negative definite (real) matrix and $\boldsymbol{\mu} \in \mathbb{R}^p$. Letting $\mathbf{Y} = \boldsymbol{\Sigma}^{\frac{1}{2}}\mathbf{Z} + \boldsymbol{\mu}$, show that $M_{\mathbf{Y}}(\mathbf{t}) = e^{\mathbf{t}^\prime\boldsymbol{\mu} + \frac{1}{2} \mathbf{t}^\prime \boldsymbol{\Sigma} \mathbf{t}}$. A random vector with this moment-generating function will be called \emph{multivariate normal} with parameters $\boldsymbol{\mu}$ and $\boldsymbol{\Sigma}$, and we will write $\mathbf{Y} \sim N_p(\boldsymbol{\mu}, \boldsymbol{\Sigma})$. \end{enumerate} \item Let $\mathbf{Y} \sim N_p(\boldsymbol{\mu}, \boldsymbol{\Sigma})$, and let $\mathbf{A}$ be an $r \times p$ matrix of real constants. Show that the random vector $\mathbf{AY}$ has a multivariate normal distribution. Give the mean vector and covariance matrix. \item Let $\mathbf{X}= (X_1,X_2,X_3)^\prime$ be multivariate normal with \begin{displaymath} \boldsymbol{\mu} = \left[ \begin{array}{c} 1 \\ 0 \\ 6 \end{array} \right] \mbox{ and } \boldsymbol{\Sigma} = \left[ \begin{array}{c c c} 1 & 0 & 0 \\ 0 & 2 & 0 \\ 0 & 0 & 1 \end{array} \right] . \end{displaymath} Let $Y_1=X_1+X_2$ and $Y_2=X_2+X_3$. Find the joint distribution of $Y_1$ and $Y_2$. \item Let $X_1$ be Normal$(\mu_1, \sigma^2_1)$, and $X_2$ be Normal$(\mu_2, \sigma^2_2)$, independent of $X_1$. What is the joint distribution of $Y_1=X_1+X_2$ and $Y_2=X_1-X_2$? What is required for $Y_1$ and $Y_2$ to be independent? Hint: Use matrices. \item Show that if $\mathbf{Y} \sim N_p(\boldsymbol{\mu},\boldsymbol{\Sigma})$ where the covariance matrix $\boldsymbol{\Sigma}$ is strictly positive definite, $W = (\mathbf{Y}-\boldsymbol{\mu})^\prime \boldsymbol{\Sigma}^{-1}(\mathbf{Y}-\boldsymbol{\mu})$ has a chi-squared distribution with $p$ degrees of freedom. \end{enumerate} \end{enumerate} \vspace{20mm} \noindent \begin{center}\begin{tabular}{l} \hspace{6in} \\ \hline \end{tabular}\end{center} This assignment was prepared by \href{http://www.utstat.toronto.edu/~brunner}{Jerry Brunner}, Department of Statistical Sciences, University of Toronto. It is licensed under a \href{http://creativecommons.org/licenses/by-sa/3.0/deed.en_US} {Creative Commons Attribution - ShareAlike 3.0 Unported License}. Use any part of it as you like and share the result freely. The \LaTeX~source code is available from the course website: \href{http://www.utstat.toronto.edu/~brunner/oldclass/302f13} {\small\texttt{http://www.utstat.toronto.edu/$^\sim$brunner/oldclass/302f13}} \end{document} R work for simple regression set.seed(444) x = x = c(1,8,3,6,4,7) y = 10 -2*x + rpois(6,10) plot(x,y) cbind(x,y) x; y > x; y [1] 1 8 3 6 4 7 [1] 14 2 14 10 9 9 \begin{tabular}{crrrrrr} \hline $x$ & 1 & 8 & 3 & 6 & 4 & 7 \\ $y$ & 14 & 2 & 14 & 10 & 9 & 9 \\ \hline \end{tabular}