\documentclass[12pt]{article} %\usepackage{amsbsy} % for \boldsymbol and \pmb \usepackage{graphicx} % To include pdf files! \usepackage{amsmath} \usepackage{amsbsy} \usepackage{amsfonts} % for \mathbb{R} The set of reals \usepackage[colorlinks=true, pdfstartview=FitV, linkcolor=blue, citecolor=blue, urlcolor=blue]{hyperref} % For links \usepackage{fullpage} %\pagestyle{empty} % No page numbers \begin{document} %\enlargethispage*{1000 pt} \begin{center} {\Large \textbf{STA 302f17 Assignment Five}}\footnote{Copyright information is at the end of the last page.} \vspace{1 mm} \end{center} \noindent Except for Problem~\ref{computer}, these problems are preparation for the quiz in tutorial on Thursday October 26th, and are not to be handed in. As usual, \textbf{at times you may be asked to prove something that is not true}. In this case you should say why the statement is not always true. Please bring your printout for Problem~\ref{computer} to the quiz. Do not write anything on the printout in advance of the quiz, except possibly your name and student number. \begin{enumerate} \item \label{nox} Let $y_1, \ldots, y_n$ be independent random variables with $E(y_i)=\mu$ and $Var(y_i)=\sigma^2$ for $i=1, \ldots, n$. \begin{enumerate} \item Write down $E(\overline{y})$ and $Var(\overline{y})$. \item Let $c_1, \ldots, c_n$ be constants and define the linear combination $L$ by $L = \sum_{i=1}^n c_i y_i$. What condition on the $c_i$ values makes $L$ an unbiased estimator of $\mu$? Recall that $L$ unbiased means that $E(L)=\mu$ for \emph{all} real $\mu$. Treat the cases $\mu=0$ and $\mu \neq 0$ separately. \item Is $\overline{y}$ a special case of $L$? If so, what are the $c_i$ values? \item What is $Var(L)$ for general $L$? \item Now show that $Var(\overline{y}) < Var(L)$ for every unbiased $L \neq \overline{y}$. Hint: $\sum_{i=1}^n(c_i-\overline{c})^2 = \sum_{i=1}^n c_i^2 - \frac{(\sum_{i=1}^n c_i)^2}{n}$. \end{enumerate} This is the simplest case of the Gauss-Markov Theorem. \item \label{GM} For the general linear model $\mathbf{y} = X\boldsymbol{\beta} + \boldsymbol{\epsilon}$, suppose we want to estimate the linear combination $\boldsymbol{\ell}^\prime\boldsymbol{\beta}$ based on sample data. The Gauss-Markov Theorem tells us that the most natural choice is also (in a sense) the best choice. This question leads you through the proof of the Gauss-Markov Theorem. Your class notes should help. Also see your answer to Question~\ref{nox}. \begin{enumerate} \item What is the most natural choice for estimating $\boldsymbol{\ell}^\prime\boldsymbol{\beta}$? \item Show that it's unbiased. \item The natural estimator is a \emph{linear} unbiased estimator of the form $\mathbf{c}_0^\prime \mathbf{y}$. What is the $n \times 1$ vector $\mathbf{c}_0$? \item Of course there are lots of other possible linear unbiased estimators of $\boldsymbol{\ell}^\prime\boldsymbol{\beta}$. They are all of the form $\mathbf{c}^\prime \mathbf{y}$; the natural estimator $\mathbf{c}_0^\prime \mathbf{y}$ is just one of these. The best one is the one with the smallest variance, because its distribution is the most concentrated around the right answer. \begin{enumerate} \item What is $Var(\mathbf{c}^\prime \mathbf{y})$? Show your work. \item What is $Var(\mathbf{c}_0^\prime \mathbf{y})$? Show your work. \end{enumerate} \item We insist that $\mathbf{c}^\prime \mathbf{y}$ be unbiased. Show that if $E(\mathbf{c}^\prime \mathbf{y}) = \boldsymbol{\ell}^\prime\boldsymbol{\beta}$ for \emph{all} $\boldsymbol{\beta} \in \mathbb{R}^{k+1}$, we must have $X^\prime\mathbf{c} = \boldsymbol{\ell}$. \item Show that if $\mathbf{c}$ satisfies $E(\mathbf{c}^\prime \mathbf{y}) = \boldsymbol{\ell}^\prime\boldsymbol{\beta}$ for all $\boldsymbol{\beta} \in \mathbb{R}^{k+1}$, then $H\mathbf{c} = \mathbf{c}_0$. That is, $\mathbf{c}_0$ is the projection of $\mathbf{c}$ onto the space spanned by the columns of the $X$ matrix. \item By direct calculation, show $Var(\mathbf{c}^\prime \mathbf{y}) \geq Var(\mathbf{c}_0^\prime \mathbf{y})$. This means that no linear combination can have a smaller variance than $\mathbf{c}_0^\prime \mathbf{y}$. \item Show that if $Var(\mathbf{c}^\prime \mathbf{y}) = Var(\mathbf{c}_0^\prime \mathbf{y})$, then $\mathbf{c} = \mathbf{c}_0$. This means that no other linear combination of $\mathbf{y}$ values can even tie the variance of $\mathbf{c}_0^\prime \mathbf{y}$. \end{enumerate} The conclusion is that $\mathbf{c}_0^\prime \mathbf{y} = \boldsymbol{\ell}^\prime\mathbf{b}$ is the Best Linear Unbiased Estimator (BLUE) of $\boldsymbol{\ell}^\prime\boldsymbol{\beta}$. \item The model for simple regression through the origin is $y_i = \beta x_i + \epsilon_i$, where $\epsilon_1, \ldots, \epsilon_n$ are independent with expected value $0$ and variance $\sigma^2$. In previous homework, you found the least squares estimate of $\beta$ to be $b = \frac{\sum_{i=1}^n x_iy_i}{\sum_{i=1}^n x_i^2}$. \begin{enumerate} \item What is $Var(b)$? \item Let $b_2 = \frac{\overline{y}_n}{\overline{x}_n}$. \begin{enumerate} \item Is $b_2$ an unbiased estimator of $\beta$? Answer Yes or No and show your work. \item Is $b_2$ a linear combination of the $y_i$ variables, of the form $L = \sum_{i=1}^n c_i y_i$? Is so, what is $c_i$? \item What is $Var(b_2)$? \item How do you know $Var(b) < Var(b_2)$? No calculations are necessary. \end{enumerate} \item Let $b_3 = \frac{1}{n}\sum_{i=1}^n \frac{y_i}{x_i} $. \begin{enumerate} \item Is $b_3$ an unbiased estimator of $\beta$? Answer Yes or No and show your work. \item Is $b_3$ a linear combination of the $y_i$ variables, of the form $L = \sum_{i=1}^n c_i y_i$? Is so, what is $c_i$? \item What is $Var(b_3)$? \item How do you know $Var(b) < Var(b_3)$? No calculations are necessary. \end{enumerate} \end{enumerate} %\item For the general linear regression model, assume that the columns of $X$ are linearly independent, so that $(X^\prime X)^{-1}$ exists and $\mathbf{b}$ is well defined. Starting from the definition on the formula sheet, prove that $\mathbf{e} = \mathbf{0}$. \item In practice, $\mathbf{e}$ will never be zero. Why? It may help to think of the least-squares line on a two-dimensional scatterplot. \item Show that \emph{if} the hat matrix $H$ has an inverse, then $\mathbf{e} = \mathbf{0}$. Start by calculating $H\mathbf{e}$. \item Recall that the rank of a product is the minimum of the ranks. Why does this imply that the hat matrix has no inverse if $n>k+1$? \item True or False: The sum of residuals is always equal to zero. \item True or False: The sum of \emph{expected} residuals is always equal to zero. \item True or False: The sum of residuals is always equal to zero if the model has an intercept. \item Sometimes one can learn by just playing around. Suppose we fit a regression model, obtaining $\mathbf{b}$, $\widehat{\mathbf{y}}$, $\mathbf{e}$ and so on. Then we fit another regression model with the same independent variables, but this time using $\widehat{\mathbf{y}}$ as the dependent variable instead of $\mathbf{y}$. \begin{enumerate} \item Denote the vector of estimated regression coefficients from the new model by $\mathbf{b}_2$. Calculate $\mathbf{b}_2$ and simplify. Should you be surprised at this answer? \item Calculate $\widehat{\widehat{\mathbf{y}}}= X\mathbf{b}_2$. Why is this not surprising if you think in terms of projections? \end{enumerate} \item Now consider another regression model with the same independent variables but with $\mathbf{e}$ as the dependent variable. What is $\mathbf{b}_3$? What is $\widehat{\widehat{\mathbf{y}}}= X\mathbf{b}_3$? %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% MVN via MGF %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Need new formula sheet! % \pagebreak \item The joint moment-generating function of a $p$-dimensional random vector $\mathbf{x}$ is defined as $M_{x}(\mathbf{t}) = E\left(e^{\mathbf{t}^\prime \mathbf{x}} \right)$. \begin{enumerate} \item Let $\mathbf{y} = A\mathbf{x}$, where $A$ is a matrix of constants. Find the moment-generating function of $\mathbf{y}$. \item Let $\mathbf{y} = \mathbf{x} + \mathbf{c}$, where $\mathbf{c}$ is a $p \times 1$ vector of constants. Find the moment-generating function of $\mathbf{y}$. \end{enumerate} \item Let $z_1, \ldots, z_p \stackrel{i.i.d.}{\sim}N(0,1)$, and \begin{displaymath} \mathbf{z} = \left( \begin{array}{c} z_1 \\ \vdots \\ z_p \end{array} \right). \end{displaymath} \begin{enumerate} \item What is the joint moment-generating function of $\mathbf{z}$? Show some work. \item Let $\mathbf{y} = \Sigma^{1/2}\mathbf{z} + \boldsymbol{\mu}$, where $\Sigma$ is a $p \times p$ symmetric \emph{non-negative definite} matrix and $\boldsymbol{\mu} \in \mathbb{R}^p$. \begin{enumerate} \item What is $E(\mathbf{y})$? \item What is the variance-covariance matrix of $\mathbf{y}$? Show some work. \item What is the moment-generating function of $\mathbf{y}$? Show your work. \end{enumerate} \end{enumerate} \item We say the $p$-dimensional random vector $\mathbf{y}$ is multivariate normal with expected value $\boldsymbol{\mu}$ and variance-covariance matrix $\Sigma$, and write $\mathbf{y} \sim N_p(\boldsymbol{\mu}, \Sigma)$, when $\mathbf{y}$ has moment-generating function $ M_{_\mathbf{y}}(\mathbf{t}) = e^{\mathbf{t}^\prime\boldsymbol{\mu} + \frac{1}{2} \mathbf{t}^\prime\Sigma\mathbf{t}}$. \begin{enumerate} \item Let $\mathbf{y} \sim N_p(\boldsymbol{\mu}, \Sigma)$ and $\mathbf{w}=A\mathbf{y}$, where $A$ is an $r \times p$ matrix of constants. What is the distribution of $\mathbf{w}$? Use moment-generating functions to prove your answer. \item Let $\mathbf{y} \sim N_p(\boldsymbol{\mu}, \Sigma)$ and $\mathbf{w}=\mathbf{y}+\mathbf{c}$, where $\mathbf{c}$ is an $p \times 1$ vector of constants. What is the distribution of $\mathbf{w}$? Use moment-generating functions to prove your answer. \end{enumerate} \item Let $\mathbf{y} \sim N_2(\boldsymbol{\mu}, \Sigma)$, with \begin{displaymath} \mathbf{y} = \left(\begin{array}{c} y_1 \\ y_2 \end{array}\right) ~~~~~ \boldsymbol{\mu} = \left(\begin{array}{c} \mu_1 \\ \mu_2 \end{array}\right) ~~~~~ \Sigma = \left(\begin{array}{cc} \sigma^2_1 & 0 \\ 0 & \sigma^2_2 \end{array}\right) \end{displaymath} Using moment-generating functions, show $y_1$ and $y_2$ are independent. \item Let $x= (x_1,x_2,x_3)^\prime$ be multivariate normal with \begin{displaymath} \boldsymbol{\mu} = \left[ \begin{array}{c} 1 \\ 0 \\ 6 \end{array} \right] \mbox{ and } \Sigma = \left[ \begin{array}{c c c} 1 & 0 & 0 \\ 0 & 2 & 0 \\ 0 & 0 & 1 \end{array} \right] . \end{displaymath} Let $y_1=x_1+x_2$ and $y_2=x_2+x_3$. Find the joint distribution of $y_1$ and $y_2$. \item Let $x_1$ be Normal$(\mu_1, \sigma^2_1)$, and $x_2$ be Normal$(\mu_2, \sigma^2_2)$, independent of $x_1$. What is the joint distribution of $y_1=x_1+x_2$ and $y_2=x_1-x_2$? What is required for $y_1$ and $y_2$ to be independent? Hint: Use matrices. \item Here are some distribution facts that you will need to know without looking at a formula sheet in order to follow the proofs. You are responsible for the proofs of these facts too, but here you are just supposed to write down the answers. \begin{enumerate} \item Let $x\sim N(\mu,\sigma^2)$ and $y=ax+b$, where $a$ and $b$ are constants. What is the distribution of $y$? \item Let $x\sim N(\mu,\sigma^2)$ and $z = \frac{x-\mu}{\sigma}$. What is the distribution of $z$? \item Let $x_1, \ldots, x_n$ be a random sample from a $N(\mu,\sigma^2)$ distribution. What is the distribution of $y = \sum_{i=1}^nx_i$? \item Let $x_1, \ldots, x_n$ be a random sample from a $N(\mu,\sigma^2)$ distribution. What is the distribution of the sample mean $\overline{x}$? \item Let $x_1, \ldots, x_n$ be a random sample from a $N(\mu,\sigma^2)$ distribution. What is the distribution of $z = \frac{\sqrt{n}(\overline{x}-\mu)}{\sigma}$? \item Let $x_1, \ldots, x_n$ be independent random variables, with $x_i \sim N(\mu_i,\sigma_i^2)$. Let $a_1, \ldots, a_n$ be constants. What is the distribution of $y = \sum_{i=1}^n a_ix_i$? \item Let $x_1, \ldots, x_n$ be independent random variables with $x_i \sim \chi^2(\nu_i)$ for $i=1, \ldots, n$. What is the distribution of $y = \sum_{i=1}^n x_i$? \item Let $z \sim N(0,1)$. What is the distribution of $y=z^2$? \item Let $x_1, \ldots, x_n$ be random sample from a $N(\mu,\sigma^2)$ distribution. What is the distribution of $y = \frac{1}{\sigma^2} \sum_{i=1}^n\left(x_i-\mu \right)^2$? \item Let $y=x_1+x_2$, where $x_1$ and $x_2$ are independent, $x_1\sim\chi^2(\nu_1)$ and $y\sim\chi^2(\nu_1+\nu_2)$, where $\nu_1$ and $\nu_2$ are both positive. What is the distribution of $x_2$? \end{enumerate} \item \label{computer} The \texttt{statclass} data consist of Quiz average, Computer assignment average, Midterm score and Final Exam score from a statistics class, long ago. At the R prompt, type {\scriptsize \begin{verbatim} statclass = read.table("http://www.utstat.utoronto.ca/~brunner/data/legal/LittleStatclassdata.txt") \end{verbatim} } % End size You now have access to the \texttt{statclass} data, just as you have access to the \texttt{trees} data set used in lecture, or any other R data set. \begin{enumerate} \item Calculate $\mathbf{b}$ with the \texttt{lm} function. What is $b_2$? The answer is a number on your printout. \item What is the predicted Final Exam score for a student with a Quiz average of 8.5, a Computer average of 5, and a Midterm mark of 60\%? The answer is a number. Be able to do this kind of thing on the quiz with a calculator. My answer is 63.84144. \item For any fixed Quiz Average and Computer Average, a score one point higher on the Midterm yields a predicted mark on the Final Exam that is \underline{\hspace{10mm}} higher. \item For any fixed Quiz Average and Midterm score, a Computer average that is one point higher yields a predicted mark on the Final Exam that is \underline{\hspace{10mm}} higher. Or is it lower? \end{enumerate} \end{enumerate} % \vspace{90mm} \noindent \begin{center}\begin{tabular}{l} \hspace{6in} \\ \hline \end{tabular}\end{center} This assignment was prepared by \href{http://www.utstat.toronto.edu/~brunner}{Jerry Brunner}, Department of Statistical Sciences, University of Toronto. It is licensed under a \href{http://creativecommons.org/licenses/by-sa/3.0/deed.en_US} {Creative Commons Attribution - ShareAlike 3.0 Unported License}. Use any part of it as you like and share the result freely. The \LaTeX~source code is available from the course website: \href{http://www.utstat.toronto.edu/~brunner/oldclass/302f17} {\small\texttt{http://www.utstat.toronto.edu/$^\sim$brunner/oldclass/302f17}} \end{document} R work for simple regression set.seed(444) x = x = c(1,8,3,6,4,7) y = 10 -2*x + rpois(6,10) plot(x,y) cbind(x,y) x; y > x; y [1] 1 8 3 6 4 7 [1] 14 2 14 10 9 9 \begin{tabular}{crrrrrr} \hline $x$ & 1 & 8 & 3 & 6 & 4 & 7 \\ $y$ & 14 & 2 & 14 & 10 & 9 & 9 \\ \hline \end{tabular} \item Let $X_1, \ldots, X_n$ be random sample from a $N(\mu,\sigma^2)$ distribution. You may use the independence of $\overline{X}$ and $S^2$ without proof, for now. statclass = read.table("http://www.utstat.utoronto.ca/~brunner/data/legal/LittleStatclassdata.txt") attach(statclass) # statclass mod = lm(FinalExam ~ QuizAve + CompAve + MidTerm) mod$coefficients #$ joe = data.frame(QuizAve=8.5, CompAve=5, MidTerm=60) predict(mod,joe) # QuizAve=8.5, CompAve=5, MidTerm=60 % Next time \item Show that if $X \sim N_p(\boldsymbol{\mu},\boldsymbol{\Sigma})$, with $\boldsymbol{\Sigma}$ positive definite, then $Y = (X-\boldsymbol{\mu})^\prime \boldsymbol{\Sigma}^{-1}(X-\boldsymbol{\mu})$ has a chi-square distribution with $p$ degrees of freedom. \item Let $X_1, \ldots, X_n$ be a random sample from a $N(\mu,\sigma^2)$ distribution. \begin{enumerate} \item Show $Cov(\overline{X},(X_j-\overline{X}))=0$ for $j=1, \ldots, n$. \item Show that $\overline{X}$ and $S^2$ are independent. \item Show that \begin{displaymath} \frac{(n-1)S^2}{\sigma^2} \sim \chi^2(n-1), \end{displaymath} where $S^2 = \frac{\sum_{i=1}^n\left(X_i-\overline{X} \right)^2 }{n-1}$. Hint: $\sum_{i=1}^n\left(X_i-\mu \right)^2 = \sum_{i=1}^n\left(X_i-\overline{X} + \overline{X} - \mu \right)^2 = \ldots$ \end{enumerate} \item Recall the definition of the $t$ distribution. If $Z\sim N(0,1)$, $W \sim \chi^2(\nu)$ and $Z$ and $W$ are independent, then $T = \frac{Z}{\sqrt{W/\nu}}$ is said to have a $t$ distribution with $\nu$ degrees of freedom, and we write $T \sim t(\nu)$. As in the last question, let $X_1, \ldots, X_n$ be random sample from a $N(\mu,\sigma^2)$ distribution. Show that $T = \frac{\sqrt{n}(\overline{X}-\mu)}{S} \sim t(n-1)$.