% 302f16Assignment1.tex REVIEW \documentclass[12pt]{article} %\usepackage{amsbsy} % for \boldsymbol and \pmb \usepackage{graphicx} % To include pdf files! \usepackage{amsmath} \usepackage{amsbsy} \usepackage{amsfonts} \usepackage[colorlinks=true, pdfstartview=FitV, linkcolor=blue, citecolor=blue, urlcolor=blue]{hyperref} % For links \usepackage{fullpage} %\pagestyle{empty} % No page numbers \begin{document} %\enlargethispage*{1000 pt} \begin{center} {\Large \textbf{STA 302f16 Assignment One}}\footnote{Copyright information is at the end of the last page.} \vspace{1 mm} \end{center} \noindent Please do these review questions in preparation for Quiz One; they are not to be handed in. This material will be on the final exam only indirectly. The following formulas will be supplied with Quiz One. You may use them without proof. \begin{center} \renewcommand{\arraystretch}{1.5} \begin{tabular}{ll} $E(x) = \sum_x \, x \, p_{_x}(x)$ & $E(x) = \int_{-\infty}^\infty x f_{_x}(x) \, dx$ \\ $E(g(x)) = \sum_x g(x) \, p_{_x}(x)$ & $E(g(\mathbf{x})) = \sum_{x_1} \cdots \sum_{x_p} g(x_1, \ldots, x_p) \, p_{_\mathbf{x}}(x_1, \ldots, x_p) $ \\ $E(g(x)) = \int_{-\infty}^\infty g(x) \, f_{_x}(x) \, dx$ & $E(g(\mathbf{x})) = \int_{-\infty}^\infty \cdots \int_{-\infty}^\infty g(x_1, \ldots, x_p) \, f_{_\mathbf{x}}(x_1, \ldots, x_p) \, dx_1 \ldots dx_p $ \\ $E(\sum_{i=1}^na_ix_i) = \sum_{i=1}^na_iE(x_i)$ & $Var(x) = E\left( \, (x-\mu_{_x})^2 \, \right)$ \\ $Cov(x,y) = E\left( \, (x-\mu_{_x})(y-\mu_{_y}) \, \right)$ & $Corr(x,y) = \frac{Cov(x,y)}{\sqrt{Var(x)Var(y)} } $ \end{tabular} \renewcommand{\arraystretch}{1.0} \end{center} \vspace{3mm} \noindent The first three problems are very elementary, but they may help to clarify some basic concepts. Please recall the following. Suppose the discrete random variable $y=g(x)$. To find the probability distribution of $y$, list the possible values of $y$ and then add up the $x$ probabilities corresponding to each value. \begin{enumerate} \item The random variable $x$ is uniformly distributed on the integers $\{-3, -2, -1, 0, 1, 2, 3\}$, meaning $P(x=-1) = P(x=-2) = \cdots = P(x=3) = \frac{1}{7}$. Let $y=x^2$. \begin{enumerate} \item What is $E(x)$? The answer is a number. Show your work. % zero \item Calculate the variance of $x$. The answer is a number. Show your work. % 4 \item What is $P(y=-1)$? \item What is $P(y=9)$? \item What is the probability distribution of $y$? Give the $y$ values with their probabilities. \item What is $E(y)$? The answer is a number. Did you already do this question? % 4 \end{enumerate} \item The discrete random variables $x$ and $y$ have joint distribution \begin{center} \begin{tabular}{c|ccc} & $x=1$ & $x=2$ & $x=3$ \\ \hline $y=1$ & $2/12$ & $3/12$ & $1/12$ \\ $y=2$ & $2/12$ & $1/12$ & $3/12$ \\ \end{tabular} \end{center} \begin{enumerate} \item What is the marginal distribution of $x$? List the values with their probabilities. \item What is the marginal distribution of $y$? List the values with their probabilities. \item Are $x$ and $y$ independent? Answer Yes or No and show some work. \item Calculate $E(x)$. Show your work. \item Denote a ``centered" version of $x$ by $x_c = x - E(x) = x-\mu_{_x}$. \begin{enumerate} \item What is the probability distribution of $x_c$? Give the values with their probabilities. \item What is $E(x_c)$? Show your work. \item What is the probability distribution of $x_c^2$? Give the values with their probabilities. \item What is $E(x_c^2)$? Show your work. \end{enumerate} \item What is $Var(x)$? If you have been paying attention, you don't have to show any work. \item Calculate $E(y)$. Show your work. \item Calculate $Var(y)$. Show your work. You may use Question~\ref{handyA} if you wish. \item Calculate $Cov(x,y)$. Show your work. You may use Question~\ref{handyB} if you wish. \item Let $Z_1 = g_1(x,y) = x+y$. What is the probability distribution of $Z_1$? Show some work. \item Calculate $E(Z_1)$. Show your work. \item Do we have $E(x+y) = E(x)+E(y)$? Answer yes or No. Note that the answer \emph{does not require independence}. \item Let $Z_2 = g_2(x,y) = xy$. What is the probability distribution of $Z_2$? List the values with their probabilities. Show some work. \item Calculate $E(Z_2)$. Show your work. \item Do we have $E(xy) = E(x)E(y)$? Answer yes or No. The connection to independence is established in Question~\ref{prod}. \end{enumerate} \item \label{notsofast} Here is another joint distribution. The point of this question is that you can have zero covariance without independence. \begin{center} \begin{tabular}{c|ccc} & $x=1$ & $x=2$ & $x=3$ \\ \hline $y=1$ & $3/12$ & $1/12$ & $3/12$ \\ $y=2$ & $1/12$ & $3/12$ & $1/12$ \\ \end{tabular} \end{center} \begin{enumerate} \item Calculate $Cov(x,y)$. Show your work. You may use Question~\ref{handyB} if you wish. % 17/6 - (2)(17/12) \item Are $x$ and $y$ independent? Answer Yes or No and show some work. \end{enumerate} \item Let $x$ be a discrete random variable and let $a$ be a constant. Using the expression for $E(g(x))$ at the beginning of this assignment, show $E(a)=a$. Is the result still true if $x$ is continuous? % \item Let $a$ be a constant and $Pr\{y=a\}=1$. Find $Var(y)$. Show your work. \item \label{prod} Let $x_1$ and $x_2$ be continuous random variables that are \emph{independent}. Using the expression for $E(g(\mathbf{x}))$ at the beginning of this assignment, show $E(x_1 x_2) = E(x_1)E(x_2)$. Draw an arrow to the place in your answer where you use independence, and write ``This is where I use independence." Because $x_1$ and $x_2$ are continuous, you will need to integrate. Does your proof still apply if $x_1$ and $x_2$ are discrete? \item Using the definitions of variance covariance along with the linear property $E(\sum_{i=1}^na_iy_i) = \sum_{i=1}^na_iE(y_i)$ (no integrals), show the following: \begin{enumerate} \item \label{handyA}$Var(y) = E(y^2)-\mu_y^2$ \item \label{handyB}$Cov(x,y)=E(xy)-E(x)E(y)$ \item If $x$ and $y$ are independent, $Cov(x,y) = 0$. Of course you may use Problem~\ref{prod}. Important: Does this contradict Question~\ref{notsofast}? \end{enumerate} \item Let $x$ be a random variable and $a$ be a constant. Show \begin{enumerate} \item $Var(ax) = a^2Var(x)$. \item $Var(x+a) = Var(x)$. \end{enumerate} \item Show $Var(x+y) = Var(x) + Var(y) + 2Cov(x,y)$. \item Let $x$ and $y$ be random variables, and let $a$ and $b$ be constants. Show $Cov(x+a,y+b) = Cov(x,y)$. \item Let $x$ and $y$ be random variables, with $E(x)=\mu_x$, $E(y)=\mu_y$, $Var(x)=\sigma^2_x$, $Var(y)=\sigma^2_y$, $Cov(x,y) = \sigma_{xy}$ and $Corr(x,y) = \rho_{xy}$. Let $a$ and $b$ be non-zero constants. \begin{enumerate} \item Find $Cov(ax,y)$. \item Find $Corr(ax,y)$. Do not forget that $a$ could be negative. \end{enumerate} \item Let $y_1, \ldots, y_n$ be numbers, and $\overline{y}=\frac{1}{n}\sum_{i=1}^ny_i$. Show \begin{enumerate} \item $\sum_{i=1}^n(y_i-\overline{y})=0$ \item $\sum_{i=1}^n(y_i-\overline{y})^2=\sum_{i=1}^ny_i^2 \,-\, n\overline{y}^2$ \item The sum of squares $Q_m = \sum_{i=1}^n(y_i-m)^2$ is minimized when $m = \overline{y}$. \end{enumerate} % \pagebreak %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \item Let $x_1, \ldots, x_n$ and $y_1, \ldots, y_n$ be numbers, with $\overline{x}=\frac{1}{n}\sum_{i=1}^nx_i$ and $\overline{y}=\frac{1}{n}\sum_{i=1}^ny_i$. Show $\sum_{i=1}^n(x_i-\overline{x})(y_i-\overline{y}) = \sum_{i=1}^n x_iy_i \,-\, n\overline{x} \, \overline{y}$. \item Let $y_1, \ldots, y_n$ be independent random variables with $E(y_i)=\mu$ and $Var(y_i)=\sigma^2$ for $i=1, \ldots, n$. For this question, please use definitions and familiar properties of expected value, not integrals. \begin{enumerate} \item Find $E(\sum_{i=1}^ny_i)$. Are you using independence? \item Find $Var\left(\sum_{i=1}^n y_i\right)$. What earlier questions are you using in connection with independence? \item Using your answer to the last question, find $Var(\overline{y})$. \item A statistic $T$ is an \emph{unbiased estimator} of a parameter $\theta$ if $E(T)=\theta$. Show that $\overline{y}$ is an unbiased estimator of $\mu$. This is very quick. \item Let $a_1, \ldots, a_n$ be constants and define the linear combination $L$ by $L = \sum_{i=1}^n a_i y_i$. Show that if $\sum_{i=1}^n a_i = 1$, then $L$ is an unbiased estimator of $\mu$. \item Is $\overline{y}$ a special case of $L$? If so, what are the $a_i$ values? \item What is $Var(L)$? \end{enumerate} \item Here is a simple linear regression model. Let $y = \beta_0 + \beta_1 x + \epsilon$, where $\beta_0$ and $\beta_1$ are constants (typically unknown), $x$ is a known, observable constant, and $\epsilon$ is a random variable with expected value zero and variance $\sigma^2$. \begin{enumerate} \item What is $E(y)?$ \item What is $Var(y)$? \item Suppose that the distribution of $\epsilon$ is normal, so that it has density $f(\epsilon) = \frac{1}{\sigma\sqrt{2\pi}} e^{-\frac{\epsilon^2}{2\sigma^2}}$. Find the distribution of $y$. Show your work. Hint: differentiate the cumulative distribution function of $y$. \item Suppose there are two equations: \begin{eqnarray*} y_1 & = & \beta_0 + \beta_1 x_1 + \epsilon_1 \\ y_2 & = & \beta_0 + \beta_1 x_2 + \epsilon_2 \end{eqnarray*} with $E(\epsilon_1) = E(\epsilon_2) = 0$, $Var(\epsilon_1) = Var(\epsilon_2) = \sigma^2$ and $Cov(\epsilon_1, \epsilon_2)=0$. Note that $x_1$ and $x_2$ are constants, not random variables. What is $Cov(y_1,y_2)$? You don't need to show any work. Just refer to a problem you solved earlier in this assignment. \end{enumerate} \pagebreak %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \item \label{numbers} Let $\mathbf{A} = \left( \begin{array}{rr} 2 & 5 \\ 1 & -4 \\ 0 & 3 \end{array} \right)$ and $\mathbf{B} = \left( \begin{array}{rr} 1 & 0 \\ 2 & 3 \\ -1 & 3 \end{array} \right)$. Which of the following are possible to compute? Don't do the calculations. Just answer each one Possible or Impossible. \begin{enumerate} \item $\mathbf{A}+\mathbf{B}$ \item $\mathbf{A}-\mathbf{B}$ \item $\mathbf{AB}$ \item $\mathbf{A}^\prime\mathbf{B}$ \item $\mathbf{B}^\prime\mathbf{A}$ \item $\mathbf{A}/\mathbf{B}$ \item $\mathbf{B}/\mathbf{A}$ \end{enumerate} \item For the matrices of Question~\ref{numbers}, verify that \begin{displaymath} \mathbf{A}^\prime\mathbf{B} = \left( \begin{array}{rr} 4 & 3 \\ -6 & -3 \end{array} \right) \mbox{ ~~~~~and~~~~~ } \mathbf{B}^\prime\mathbf{A} = \left( \begin{array}{rrr} 2 & 19 & 13 \\ 1 & -10 & -13 \\ 0 & 9 & 9 \end{array} \right). \end{displaymath} \item Let $\mathbf{c} = \left( \begin{array}{rr} 2 \\ 1 \\ 0 \end{array} \right)$ and $\mathbf{d} = \left( \begin{array}{rr} 1 \\2 \\ -1 \end{array} \right)$. Verify that \begin{displaymath} \mathbf{c}^\prime\mathbf{d} = 4 \mbox{ ~~~~~and~~~~~ } \mathbf{c}\mathbf{d}^\prime = \left( \begin{array}{rrr} 2 & 4 & -2 \\ 1 & 2 & -1 \\ 0 & 0 & 0 \end{array} \right). \end{displaymath} \item Matrix multiplication does not commute. That is, if $\mathbf{A}$ and $\mathbf{B}$ are matrices, in general it is \emph{not} true that $\mathbf{AB} = \mathbf{BA}$ unless both matrices are $1 \times 1$. Establish this important fact by making up a simple numerical example in which $\mathbf{A}$ and $\mathbf{B}$ are both $2 \times 2$ matrices. Carry out the multiplication, showing $\mathbf{AB} \neq \mathbf{BA}$. % \pagebreak \item Let $\mathbf{A}$ be a square matrix with the determinant of $\mathbf{A}$ (denoted $|\mathbf{A}|$) equal to zero. What does this tell you about $\mathbf{A}^{-1}$? No proof is required here. \item Recall that $\mathbf{A}$ symmetric means $\mathbf{A=A^\prime}$. Let $\mathbf{X}$ be an $n$ by $p$ matrix. Prove that $\mathbf{X^\prime X}$ is symmetric. \item Let $\mathbf{X}$ be an $n$ by $p$ matrix with $n \neq p$. Why is it incorrect to say that $(\mathbf{X^\prime X})^{-1}= \mathbf{X}^{-1}\mathbf{X}^{\prime -1}$? \end{enumerate} % \vspace{130mm} \noindent \begin{center}\begin{tabular}{l} \hspace{6in} \\ \hline \end{tabular}\end{center} This assignment was prepared by \href{http://www.utstat.toronto.edu/~brunner}{Jerry Brunner}, Department of Statistical Sciences, University of Toronto. It is licensed under a \href{http://creativecommons.org/licenses/by-sa/3.0/deed.en_US} {Creative Commons Attribution - ShareAlike 3.0 Unported License}. Use any part of it as you like and share the result freely. The \LaTeX~source code is available from the course website: \href{http://www.utstat.toronto.edu/~brunner/oldclass/302f16} {\small\texttt{http://www.utstat.toronto.edu/$^\sim$brunner/oldclass/302f16}} \end{document} A = rbind(c(2,5), c(1,-4), c(0,3) ) B = rbind(c(1,0), c(2,3), c(-1,3) )