% 305s14regular1.tex REVIEW \documentclass[10pt]{article} %\usepackage{amsbsy} % for \boldsymbol and \pmb \usepackage{graphicx} % To include pdf files! \usepackage{amsmath} \usepackage{amsbsy} \usepackage{amsfonts} \usepackage[colorlinks=true, pdfstartview=FitV, linkcolor=blue, citecolor=blue, urlcolor=blue]{hyperref} % For links \usepackage{fullpage} %\pagestyle{empty} % No page numbers \begin{document} %\enlargethispage*{1000 pt} \begin{center} {\Large \textbf{STA 305s14 Regular Assignment One}}\footnote{Copyright information is at the end of the last page.} \vspace{1 mm} \end{center} \noindent This assignment is strictly review. Problems are preparation for Term Test One on Feb. 3d, and will not be handed in. Use the formula sheet, which is posted on the course home page. \vspace{3mm} \begin{enumerate} %%%%%%%%%% T-F, mostly about p-values \item Label each statement below True or False. Write ``T" or ``F" beside each statement. Assume the $\alpha=0.05$ significance level. If there are True-False questions on the term test or final exam, you will need to get most of them right (say 8 out of 10) in order to get any credit. \begin{enumerate} \item \underline{\hspace{10mm}} The $p$-value is the probability that the null hypothesis is true. % F \item \underline{\hspace{10mm}} The $p$-value is the probability that the null hypothesis is false. % F \item \underline{\hspace{10mm}} In a study comparing a new drug to the current standard treatment, the null hypothesis is rejected. This means the new drug is ineffective. % F \item \underline{\hspace{10mm}} We observe $r = -0.70$, $p = .009$. We conclude that high values of $X$ tend to go with low values of $Y$ and low values of $X$ tend to go with high values of $Y$. % T \item \underline{\hspace{10mm}} The $p$-value is the probability of failing to replicate significant results in a second independent random sample of the same size. %F \item \underline{\hspace{10mm}} The greater the $p$-value, the stronger the evidence that the independent and dependent variable are related. %F \item \underline{\hspace{10mm}} The greater the $p$-value, the stronger the evidence against the null hypothesis. % T \item \underline{\hspace{10mm}} If $p > .05$ we reject the null hypothesis at the .05 level. %F \item \underline{\hspace{10mm}} If $p < .05$ we reject the null hypothesis at the .05 level. % T \item \underline{\hspace{10mm}} In a study comparing a new drug to the current standard treatment, $p > .05$. We conclude that the new drug and the existing treatment are not equally effective. %F \item \underline{\hspace{10mm}} The 95\% confidence interval for $\beta_3$ is from $-0.26$ to $3.12$. This means $P\{-0.26 < \beta_3 < 3.12\} = 0.95$. % F \item \underline{\hspace{10mm}} When you add another independent variable in multiple regression, $R^2$ cannot go down. %T \item \underline{\hspace{10mm}} We observe $r = 0.50$, $p = .002$. This means that 50\% of the variation in the dependent variable is explained by a linear relationship with the independent variable. %F \item \underline{\hspace{10mm}} The $p$-value is the maximum significance level $\alpha$ such that the null hypothesis is rejected. % F \item \underline{\hspace{10mm}} The $p$-value is the minimum significance level $\alpha$ such that the null hypothesis is rejected. % T \end{enumerate} %%%%%%%%%% Scalar variance and covariance \item In the following, $X$ and $Y$ are random variables, while $a$ and $b$ are fixed constants. Using the definitions of variance covariance (see formula sheet) along with familiar properties of expected value, show the following: \begin{enumerate} \item $Var(Y) = E(Y^2)-E(Y)^2$ \item $Cov(X,Y)=E(XY)-E(X)E(Y)$ \item $Var(aX+b) = a^2 Var(X)$ % Important \item $Var(a)=0$ \item $Cov(X+a,Y+b)=Cov(X,Y)$ % Important \item $Var(aX+bY)=a^2Var(X)+b^2Var(Y)+2abCov(X,Y)$ \end{enumerate} \pagebreak \item Let $y_1, \ldots, y_n$ be numbers, and $\overline{y}=\frac{1}{n}\sum_{i=1}^ny_i$. Show \begin{enumerate} \item $\sum_{i=1}^n(y_i-\overline{y})=0$ \item $\sum_{i=1}^n(y_i-\overline{y})^2=\sum_{i=1}^ny_i^2 \,-\, n\overline{y}^2$ \item The sum of squares $Q_m = \sum_{i=1}^n(y_i-m)^2$ is minimized when $m = \overline{y}$. \end{enumerate} \item Let $Y_1, \ldots, Y_n$ be independent random variables with $E(Y_i)=\mu$ and $Var(Y_i)=\sigma^2$ for $i=1, \ldots, n$. Let $a_1, \ldots, a_n$ be constants and define the linear combination $L$ by $L = \sum_{i=1}^n a_i Y_i$. \begin{enumerate} \item What is $E(L)$? Show your work. Do you use independence? Answer Yes or No. If the answer is Yes, indicate where you use it by drawing an arrow to one of the equals signs, and writing ``I use independence here." \item What is $Var(L)$? Show your work. Do you use independence? Answer Yes or No. If the answer is Yes, indicate where you use it by drawing an arrow to one of the equals signs, and writing ``I use independence here." \item A statistic $T$ is an \emph{unbiased estimator} of a parameter $\theta$ if $E(T)=\theta$. Suppose that $L$ is an unbiased estimator of $\mu$. Does this mean that $a_i=\frac{1}{n}$ for $i=1, \ldots, n$? Answer Yes or No. If the answer is Yes, prove it. If the answer is No, give another set of constants $a_1, \ldots, a_n$ that make $L$ unbiased. \end{enumerate} % \pagebreak %%%%%%%%%% Matrices and random vectors \item Let $\mathbf{X}$ be an $n$ by $p$ matrix with $n \neq p$. Why is it incorrect to say that $(\mathbf{X^\prime X})^{-1}= \mathbf{X}^{-1}\mathbf{X}^{\prime -1}$? \item Let $\mathbf{X}$ be a random vector with expected value $\boldsymbol{\mu}_x$, and let $\mathbf{Y}$ be a random vector with expected value $\boldsymbol{\mu}_y$. Using the definitions on the formula sheet, \begin{enumerate} \item Show $cov(\mathbf{Y}) = E\{\mathbf{YY}^\prime\} - \boldsymbol{\mu}_y\boldsymbol{\mu}_y^\prime$. Why is it incorrect (and worth zero marks) to say $E\{\mathbf{Y}^2\} - \boldsymbol{\mu}_y^2$? \item Let $\mathbf{A}$ be a matrix of constants (of the right dimensions). Show $cov(\mathbf{AY}) = \mathbf{A}cov(\mathbf{Y}) \mathbf{A}^\prime$. Why is it incorrect (and worth zero marks) to say $\mathbf{A}^2cov(\mathbf{Y})$? \end{enumerate} %%%%%%%%%% Distributions \item In this course, you will not need to prove any distribution facts using moment-generating functions. But you have to know some standard results that are not directly on the formula sheet. Just write down the answers to the following. \begin{enumerate} \item Let $Y_1, \ldots, Y_k$ be independent chi-squared random variables with respective parameters $\nu_1, \ldots, \nu_k$. What is the distribution of $Y = \sum_{j=1}^k Y_j$? \item Let $Y \sim N(\mu,\sigma^2)$. What is the distribution of $\frac{Y-\mu}{\sigma}$? \item $Z \sim N(0,1)$. What is the distribution of $Z^2$? \item Let $Y_1, \ldots, Y_n$ be a random sample (meaning they are independent and identically distributed) from a normal distribution with expected value $\mu$ and variance $\sigma^2$. \begin{enumerate} \item What is the distribution of $Y = \sum_{i=1}^n Y_i$? \item What is the distribution of $\overline{Y}$? \end{enumerate} \end{enumerate} \item Again, let $Y_1, \ldots, Y_n$ be a random sample from a univariate normal distribution with expected value $\mu$ and variance $\sigma^2$. Using material from the formula sheet (not moment-generating functions), \emph{prove} the distribution of $Y = \sum_{i=1}^n Y_i$. Let $\mathbf{1}$ denote an $n \times 1$ vector of ones. \item Show that if $\mathbf{Y} \sim N_p(\boldsymbol{\mu},\boldsymbol{\Sigma})$ where the covariance matrix $\boldsymbol{\Sigma}$ is strictly positive definite, then $W = (\mathbf{Y}-\boldsymbol{\mu})^\prime \boldsymbol{\Sigma}^{-1}(\mathbf{Y}-\boldsymbol{\mu})$ has a chi-squared distribution with $p$ degrees of freedom. It will help to start by finding the distribution of $\boldsymbol{\Sigma}^{-1/2}(\mathbf{Y}-\boldsymbol{\mu})$. \pagebreak %%%%%%%%%% Regression \item For the general linear regression model in which $\mathbf{X}$ is an $n$ by $(k+1)$ matrix of constants, \begin{enumerate} \item Give $E(\mathbf{Y})$. Show the calculation. \item Give $cov(\mathbf{Y})$. Show the calculation. \item What is the distribution of $\mathbf{Y}$? Just write down the answer. What fact on the formula sheet lets you do this? \item Give $E(\widehat{\boldsymbol{\beta}})$. Show the calculations. Simplify. \item Give $cov(\widehat{\boldsymbol{\beta}})$. Show the calculations. Simplify. \item What is the distribution of $\widehat{\boldsymbol{\beta}}$? Just write down the answer. What fact on the formula sheet lets you do this? \end{enumerate} \item \label{regthruorigin} Let $Y_i = \beta x_i + \epsilon_i$ for $i=1, \ldots, n$, where $\epsilon_1, \ldots, \epsilon_n$ are a random sample from a distribution with expected value zero and variance $\sigma^2$, and $\beta$ and $\sigma^2$ are unknown constants. The numbers $x_1, \ldots, x_n$ are known, observed constants. This is a special case of the general linear model given on the formula sheet. \begin{enumerate} \item What is $\mathbf{X^\prime X}$? \item What is $\mathbf{X^\prime Y}$? \item $(\mathbf{X^\prime X})^{-1}$? \item What is $\widehat{\boldsymbol{\beta}}$? \end{enumerate} \item \label{simplereg} Let $Y_i = \beta_0 + \beta_1 x_i + \epsilon_i$ for $i=1, \ldots, n$, where $\epsilon_1, \ldots, \epsilon_n$ are a random sample from a distribution with expected value zero and variance $\sigma^2$, and $\beta_0$, $\beta_1$ and $\sigma^2$ are unknown constants. The numbers $x_1, \ldots, x_n$ are known, observed constants. This is a special case of the general linear model given on the formula sheet. \begin{enumerate} \item What is $\boldsymbol{\beta}$? \item What is $\mathbf{X^\prime X}$? \item What is $\mathbf{X^\prime Y}$? \end{enumerate} %%%%%%%%%% %%%%%%%%%% %%%%%%%%%% \end{enumerate} \vspace{55mm} \noindent \begin{center}\begin{tabular}{l} \hspace{6in} \\ \hline \end{tabular}\end{center} This assignment was prepared by \href{http://www.utstat.toronto.edu/~brunner}{Jerry Brunner}, Department of Statistical Sciences, University of Toronto. It is licensed under a \href{http://creativecommons.org/licenses/by-sa/3.0/deed.en_US} {Creative Commons Attribution - ShareAlike 3.0 Unported License}. Use any part of it as you like and share the result freely. The \LaTeX~source code is available from the course website: \href{http://www.utstat.toronto.edu/~brunner/oldclass/305s14} {\small\texttt{http://www.utstat.toronto.edu/$^\sim$brunner/oldclass/305s14}} \end{document} \item \begin{enumerate} \item \item \item \end{enumerate} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \item \underline{\hspace{10mm}} The $p$-value is the maximum significance level $\alpha$ such that the null hypothesis is rejected. \item \underline{\hspace{10mm}} \item \underline{\hspace{10mm}}