\documentclass[10pt]{article} %\usepackage{amsbsy} % for \boldsymbol and \pmb \usepackage{graphicx} % To include pdf files! \usepackage{amsmath} \usepackage{amsbsy} \usepackage{amsfonts} \usepackage[colorlinks=true, pdfstartview=FitV, linkcolor=blue, citecolor=blue, urlcolor=blue]{hyperref} % For links \usepackage{fullpage} %\pagestyle{empty} % No page numbers \begin{document} %\enlargethispage*{1000 pt} \begin{center} {\Large \textbf{STA 2101/442 Assignment Three}}\footnote{Copyright information is at the end of the last page.} \vspace{1 mm} \end{center} \noindent The questions are just practice for the quiz, and are not to be handed in. Use R as necessary for Question~\ref{screws}, and \textbf{bring your printout to the quiz.} \vspace{1mm} \begin{enumerate} \item \label{inverseCDF} This is about how to simulate from a continuous univariate distribution. Let the random variable $X$ have a continuous distribution with density $f_X(x)$ and cumulative distribution function $F_X(x)$. Suppose the cumulative distribution function is strictly increasing over the set of $x$ values where $0 < F_X(x) < 1$, so that $F_X(x)$ has an inverse. Let $U$ have a uniform distribution over the interval $(0,1)$. Show that the random variable $Y = F_X^{-1}(U)$ has the same distribution as $X$. Hint: You will need an expression for $F_U(u) = Pr\{U \leq u\}$, where $0 \leq u \leq 1$. % \item Suppose $\sqrt{n}(T_n-\theta) \stackrel{d}{\rightarrow} T$. Show $T_n \stackrel{p}{\rightarrow} \theta$. Please use Slutsky lemmas rather than definitions. \item Let $X_1 , \ldots, X_n$ be a random sample from a Binomial distribution with parameters $3$ and $\theta$. That is, \begin{displaymath} P(X_i = x_i) = \binom{3}{x_i} \theta^{x_i} (1-\theta)^{3-x_i}, \end{displaymath} for $x_i=0,1,2,3$. Find the maximum likelihood estimator of $\theta$, and show that it is strongly consistent. \item Let $X_1 , \ldots, X_n$ be a random sample from a continuous distribution with density \begin{displaymath} f(x;\tau) = \frac{\tau^{1/2}}{\sqrt{2\pi}} \, e^{-\frac{\tau x^2}{2}}, \end{displaymath} where the parameter $\tau>0$. Let \begin{displaymath} \widehat{\tau} = \frac{n}{\sum_{i=1}^n X_i^2}. \end{displaymath} Is $ \widehat{\tau}$ a consistent estimator of $\tau$? Answer Yes or No and prove your answer. Hint: You can just write down $E(X^2)$ by inspection. This is a very familiar distribution. \item Let $X_1, \ldots, X_n$ be a random sample from a distribution with mean $\mu$. Show that $T_n = \frac{1}{n+400}\sum_{i=1}^n X_i$ is a strongly consistent estimator of $\mu$. % That could be a quiz Q \item Let $X_1, \ldots, X_n$ be a random sample from a distribution with mean $\mu$ and variance $\sigma^2$. Prove that the sample variance $S^2=\frac{\sum_{i=1}^n(X_i-\overline{X})^2}{n-1}$ is a strongly consistent estimator of $\sigma^2$. \item \label{randiv} Independently for $i = 1 , \ldots, n$, let \begin{displaymath} Y_i = \beta X _i + \epsilon_i, \end{displaymath} where $E(X_i)=E(\epsilon_i)=0$, $Var(X_i)=\sigma^2_X$, $Var(\epsilon_i)=\sigma^2_\epsilon$, and $\epsilon_i$ is independent of $X_i$. Let \begin{displaymath} \widehat{\beta} = \frac{\sum_{i=1}^n X_i Y_i}{\sum_{i=1}^n X_i^2}. \end{displaymath} Is $ \widehat{\beta}$ a consistent estimator of $\beta$? Answer Yes or No and prove your answer. \item In this problem, you'll use (without proof) the \emph{variance rule}, which says that if $\theta$ is a real constant and $T_1, T_2, \ldots$ is a sequence of random variables with \begin{displaymath} \lim_{n \rightarrow \infty} E(T_n) = \theta \mbox{ and } \lim_{n \rightarrow \infty} Var(T_n) = 0, \end{displaymath} then $ T_n \stackrel{P}{\rightarrow} \theta$. In Problem~\ref{randiv}, the independent variables are random. Here they are fixed constants, which is more standard (though a little strange if you think about it). Accordingly, let \begin{displaymath} Y_i = \beta x_i + \epsilon_i \end{displaymath} for $i=1, \ldots, n$, where $\epsilon_1, \ldots, \epsilon_n$ are a random sample from a distribution with expected value zero and variance $\sigma^2$, and $\beta$ and $\sigma^2$ are unknown constants. \begin{enumerate} \item What is $E(Y_i)$? \item What is $Var(Y_i)$? \item Find the Least Squares estimate of $\beta$ by minimizing $Q=\sum_{i=1}^n(Y_i-\beta x_i)^2$ over all values of $\beta$. Let $\widehat{\beta}_n$ denote the point at which $Q$ is minimal. \item Is $\widehat{\beta}_n$ unbiased? Answer Yes or No and show your work. \item Give a nice simple condition on the $x_i$ values that guarantees $\widehat{\beta}_n$ will be consistent. Show your work. Remember, in this model the $x_i$ are fixed constants, not random variables. \item Let $\widehat{\beta}_{2,n} = \frac{\overline{Y}_n}{\overline{x}_n}$. Is $\widehat{\beta}_{2,n}$ unbiased? Consistent? Answer Yes or No to each question and show your work. Do you need a condition on the $x_i$ values ? \item Prove that $\widehat{\beta}_n$ is a more accurate estimator than $\widehat{\beta}_{2,n}$ in the sense that it has smaller variance. Hint: The sample variance of the independent variable values cannot be negative. \end{enumerate} \item Let $X$ be a random variable with expected value $\mu$ and variance $\sigma^2$. Show $\frac{X}{n} \stackrel{p}{\rightarrow} 0$. \item Let $X_1 , \ldots, X_n$ be a random sample from a Gamma distribution with $\alpha=\beta=\theta>0$. That is, the density is \begin{displaymath} f(x;\theta) = \frac{1}{\theta^\theta \Gamma(\theta)} e^{-x/\theta} x^{\theta-1}, \end{displaymath} for $x>0$. Let $\widehat{\theta} = \overline{X}_n$. Is $ \widehat{\theta}$ a consistent estimator of $\theta$? Answer Yes or No and prove your answer. \item The ordinary univariate Central Limit Theorem says that if $X_1, \ldots, X_n$ are a random sample (independent and identically distributed) from a distribution with expected value $\mu$ and variance $\sigma^2$, then \begin{displaymath} Z_n^{(1)} = \frac{\sqrt{n}(\overline{X}_n-\mu)}{\sigma} \stackrel{d}{\rightarrow} Z \sim N(0,1). \end{displaymath} An application of some Slutsky theorems (see lecture slides) shows that also, \begin{displaymath} Z_n^{(2)} = \frac{\sqrt{n}(\overline{X}_n-\mu)}{\widehat{\sigma}_n} \stackrel{d}{\rightarrow} Z \sim N(0,1), \end{displaymath} where $\widehat{\sigma}_n$ is any consistent estimator of $\sigma$. For this problem, suppose that $X_1, \ldots, X_n$ are Bernoulli($\theta$). \begin{enumerate} \item What is $\mu$? \item What is $\sigma^2$? \item Re-write $Z_n^{(1)}$ for the Bernoulli exanple. \item What about $ Z_n = \frac{\sqrt{n}(\overline{X}_n-\theta)} {\sqrt{\overline{X}_n(1-\overline{X}_n)}}$? Does $Z_n$ converge in distribution to a standard normal? Why or why not? \item What about the $t$ statistic $T_n = \frac{\sqrt{n}(\overline{X}_n-\mu)}{S_n}$, where $S_n$ is the sample standard deviation? Does $T_n$ converge in distribution to a standard normal? Why or why not? \end{enumerate} %%%%%%%%%%%% Univariate Delta Method problems removed - see 2013 %%%%%%%%%% %%%%%%%%% Random Vectors %%%%%%%%%%%%%% \item If the $p \times 1$ random vector $\mathbf{X}$ has variance-covariance matrix $\mathbf{\Sigma}$ and $\mathbf{A}$ is an $m \times p$ matrix of constants, prove that the variance-covariance matrix of $\mathbf{AX}$ is $\mathbf{A \Sigma A}^\prime$. Start with the definition of a variance-covariance matrix: \begin{displaymath} V(\mathbf{Z})=E(\mathbf{Z}-\boldsymbol{\mu}_z)(\mathbf{Z}-\boldsymbol{\mu}_z)^\prime. \end{displaymath} \item If the $p \times 1$ random vector $\mathbf{X}$ has mean $\boldsymbol{\mu}$ and variance-covariance matrix $\mathbf{\Sigma}$, show $\mathbf{\Sigma} = E(\mathbf{XX}^\prime) - \boldsymbol{\mu \mu}^\prime$. \item Let the $p \times 1$ random vector $\mathbf{X}$ have mean $\boldsymbol{\mu}$ and variance-covariance matrix $\mathbf{\Sigma}$, and let $\mathbf{c}$ be a $p \times 1$ vector of constants. Find $V(\mathbf{X}+\mathbf{c})$. Show your work. \item Let $\mathbf{X}$ be a $p \times 1$ random vector with mean $\boldsymbol{\mu}_x$ and variance-covariance matrix $\mathbf{\Sigma}_x$, and let $\mathbf{Y}$ be a $q \times 1$ random vector with mean $\boldsymbol{\mu}_y$ and variance-covariance matrix $\mathbf{\Sigma}_y$. Recall that $C(\mathbf{X},\mathbf{Y})$ is the $p \times q$ matrix $ C(\mathbf{X},\mathbf{Y}) = E\left((\mathbf{X}-\boldsymbol{\mu}_x)(\mathbf{Y}-\boldsymbol{\mu}_y)^\prime\right)$. \begin{enumerate} \item What is the $(i,j)$ element of $C(\mathbf{X},\mathbf{Y})$? \item For this item, $p=q$. Find an expression for $V(\mathbf{X}+\mathbf{Y})$ in terms of $\mathbf{\Sigma}_x$, $\mathbf{\Sigma}_y$ and $C(\mathbf{X},\mathbf{Y})$. Show your work. \item Simplify further for the special case where $Cov(X_i,Y_j)=0$ for all $i$ and $j$. \item Let $\mathbf{c}$ be a $p \times 1$ vector of constants and $\mathbf{d}$ be a $q \times 1$ vector of constants. Find $ C(\mathbf{X}+\mathbf{c}, \mathbf{Y}+\mathbf{d})$. Show your work. \end{enumerate} %%%%%%%%% End of Random Matrices %%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%% MGF %%%%%%%%%%%%%%%%%%%%%%%%% \item \label{mgfstart} Denote the moment-generating function of a random variable $Y$ by $M_Y(t)$. The moment-generating function is defined by $M_Y(t) = E(e^{Yt})$. Recall that the moment-generating function corresponds uniquely to the probability distribution. \begin{enumerate} \item Let $a$ be a constant. Prove that $M_{aX}(t) = M_X(at)$. \item Prove that $M_{X+a}(t) = e^{at}M_X(t)$. \item Let $X_1$ and $X_2$ be \emph{independent} random variables. Prove that \begin{displaymath} M_{X_1+X_2}(t) = M_{X_1}(t) \, M_{X_1}(t). \end{displaymath} For convenience, you may assume that $X_1$ and $X_2$ are continuous, so you will integrate. This result extends to $M_{\sum_{i=1}^n X_i}(t) = \prod_{i=1}^n M_{X_i}(t)$, but you don't have to show it. (You could use induction.) \end{enumerate} \item Recall that if $X\sim N(\mu,\sigma^2)$, it has moment-generating function $M_X(t) = e^{\mu t + \frac{1}{2}\sigma^2t^2}$. \begin{enumerate} \item Let $X\sim N(\mu,\sigma^2)$ and $Y=aX+b$, where $a$ and $b$ are constants. Find the distribution of $Y$. Show your work. \item Let $X\sim N(\mu,\sigma^2)$ and $Z = \frac{X-\mu}{\sigma}$. Find the distribution of $Z$. \item Let $X_1, \ldots, X_n$ be random sample from a $N(\mu,\sigma^2)$ distribution. Find the distribution of $Y = \sum_{i=1}^nX_i$. \item Let $X_1, \ldots, X_n$ be random sample from a $N(\mu,\sigma^2)$ distribution. Find the distribution of the sample mean $\overline{X}$. \item Let $X_1, \ldots, X_n$ be random sample from a $N(\mu,\sigma^2)$ distribution. Find the distribution of $Z = \frac{\sqrt{n}(\overline{X}-\mu)}{\sigma}$. \end{enumerate} \newpage \item A Chi-squared random variable $X$ with parameter $\nu>0$ has moment-generating function $M_X(t) = (1-2t)^{-\nu/2}$. \begin{enumerate} \item Let $X_1, \ldots, X_n$ be independent random variables with $X_i \sim \chi^2(\nu_i)$ for $i=1, \ldots, n$. Find the distribution of $Y = \sum_{i=1}^n X_i$. \item Let $Z \sim N(0,1)$. Find the distribution of $Y=Z^2$. For this one, you need to integrate. Recall that the density of a normal random variable is $f(x) = \frac{1}{\sigma\sqrt{2\pi}}e^{-\frac{(x-\mu)^2}{2\sigma^2}}$. \item Let $X_1, \ldots, X_n$ be random sample from a $N(\mu,\sigma^2)$ distribution. Find the distribution of $Y = \frac{1}{\sigma^2} \sum_{i=1}^n\left(X_i-\mu \right)^2$. \item Let $Y=X_1+X_2$, where $X_1$ and $X_2$ are independent, $X_1\sim\chi^2(\nu_1)$ and $Y\sim\chi^2(\nu_1+\nu_2)$, where $\nu_1$ and $\nu_2$ are both positive. Show $X_2\sim\chi^2(\nu_2)$. \item Let $X_1, \ldots, X_n$ be random sample from a $N(\mu,\sigma^2)$ distribution. Show \begin{displaymath} \frac{(n-1)S^2}{\sigma^2} \sim \chi^2(n-1), \end{displaymath} where $S^2 = \frac{\sum_{i=1}^n\left(X_i-\overline{X} \right)^2 }{n-1}$. Hint: $\sum_{i=1}^n\left(X_i-\mu \right)^2 = \sum_{i=1}^n\left(X_i-\overline{X} + \overline{X} - \mu \right)^2 = \ldots$ You may use the independence of $\overline{X}$ and $S^2$ without proof, for now. \end{enumerate} \item Recall the definition of the $t$ distribution. If $Z\sim N(0,1)$, $W \sim \chi^2(\nu)$ and $Z$ and $W$ are independent, then $T = \frac{Z}{\sqrt{W/\nu}}$ is said to have a $t$ distribution with $\nu$ degrees of freedom, and we write $T \sim t(\nu)$. As in the last question, let $X_1, \ldots, X_n$ be random sample from a $N(\mu,\sigma^2)$ distribution. Show that $T = \frac{\sqrt{n}(\overline{X}-\mu)}{S} \sim t(n-1)$. Once again, you may use the independence of $\overline{X}$ and $S^2$ without proof for now. % \newpage \item \label{screws} Fine machine screws are manufactured so as to have a diameter of one millimetre, but of course nothing is perfect. The screws have an \emph{expected} diameter of one millimetre, and if the manufacturing process is running properly, they also have a very small standard deviation. As long as the standard deviation is three micrometres (thousandths of a millimetre) or less, virtually all the screws will fit properly. The industrial quality control process involves taking repeated samples of screws, measuring them, and determining whether the standard deviation is greater than three. Three features of this application are a bit unusual. First, the data really \emph{are} normal. Variation from screw to screw is driven by a large number of separate tiny influences that more or less add up, and so the Central Limit Theorem applies. Second, nobody pays much attention to the mean; it's virtually always about one millimetre. When the manufacturing process starts to go goes wrong, what happens is that the variance goes up. Third, nobody cares if the standard deviation is \emph{less} than three micrometres. They only worry if it's too big, because then they have to stop the assembly line and service the machines. So, a one-tailed test really is appropriate. It's a pain to measure those screws, so the engineers take samples of size ten. The most recent sample yields a sample mean of $1002.687$ and a sample standard deviation of $4.51$. \begin{enumerate} \item What is the model? \item What is the null hypothesis, in symbols? What is the alternative hypothesis? \item An earlier problem suggests a test statistic. Write down the formula. \item Use R to calculate the $p$-value. The answer is a number. Do you reject $H_0$ at $\alpha=0.05$? Do you stop the assembly line? \item Derive a $(1-\alpha)100\%$ confidence interval for $\sigma$ (not $\sigma^2$). Show your work. \item Calculate your confidence interval for the numerical data given above, using $\alpha=0.05$. Your answer is a set of two numbers. \item Don't you think a \emph{one-sided} confidence interval would be better here? Derive the formula for a statistic (say $L$, for upper limit) such that $Pr\{\sigma < L \} = 1-\alpha$. \item Calculate your one-sided confidence interval for the numerical data given above, using $\alpha=0.05$. Your answer is a single number. \item I hope that you are at least a little uncomfortable with that sample size of $n=10$. Is it enough? The answer to such questions is always another question: ``Enough for what?" Suppose that if the true value of $\sigma$ is 4 or more, the quality control engineers want to be able to detect it with probability at least 0.90, using the usual $\alpha=0.05$ significance level. What's the smallest sample size they can get away with? Please approach the problem this way. \begin{enumerate} \item First, derive a formula for the power of the test, for general $n$, $\alpha$, $\sigma_0$ and true $\sigma$. \item What is the power for an $n$ of exactly 10 (the engineers' intuitivre choice) when the true value of $\sigma$ is 4? The answer is a number \item Then, plug in all the numbers except $n$. Starting with a nice small sample size (one lower than 10), increase $n$, calculating the power each time, until the power exceeds 0.90. Your final answer is a number. \end{enumerate} \end{enumerate} \end{enumerate} \vspace{20mm} \noindent \begin{center}\begin{tabular}{l} \hspace{6in} \\ \hline \end{tabular}\end{center} This assignment was prepared by \href{http://www.utstat.toronto.edu/~brunner}{Jerry Brunner}, Department of Statistics, University of Toronto. It is licensed under a \href{http://creativecommons.org/licenses/by-sa/3.0/deed.en_US} {Creative Commons Attribution - ShareAlike 3.0 Unported License}. Use any part of it as you like and share the result freely. The \LaTeX~source code is available from the course website: \href{http://www.utstat.toronto.edu/~brunner/oldclass/appliedf14} {\texttt{http://www.utstat.toronto.edu/$^\sim$brunner/oldclass/appliedf14}} \end{document} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % MVN for next time \item Let $\mathbf{X}= (X_1,X_2,X_3)^\prime$ be multivariate normal with \begin{displaymath} \boldsymbol{\mu} = \left[ \begin{array}{c} 1 \\ 0 \\ 6 \end{array} \right] \mbox{ and } \boldsymbol{\Sigma} = \left[ \begin{array}{c c c} 1 & 0 & 0 \\ 0 & 2 & 0 \\ 0 & 0 & 1 \end{array} \right] . \end{displaymath} Let $Y_1=X_1+X_2$ and $Y_2=X_2+X_3$. Find the joint distribution of $Y_1$ and $Y_2$. \item Let $X_1$ be Normal$(\mu_1, \sigma^2_1)$, and $X_2$ be Normal$(\mu_2, \sigma^2_2)$, independent of $X_1$. What is the joint distribution of $Y_1=X_1+X_2$ and $Y_2=X_1-X_2$? What is required for $Y_1$ and $Y_2$ to be independent? Hint: Use matrices. \item Let $\mathbf{Y}~=~\mathbf{X} \boldsymbol{\beta}~+~\boldsymbol{\epsilon}$, where $\mathbf{X}$ is an $n \times p$ matrix of known constants, $\boldsymbol{\beta}$ is a $p \times 1$ vector of unknown constants, and $\boldsymbol{\epsilon}$ is multivariate normal with mean zero and covariance matrix $\sigma^2 \mathbf{I}_n$, where $\sigma^2 > 0$ is a constant. In the following, it may be helpful to recall that $(\mathbf{A}^{-1})^\prime=(\mathbf{A}^\prime)^{-1}$. \begin{enumerate} \item What is the distribution of $\mathbf{Y}$? \item The maximum likelihood estimate (MLE) of $\boldsymbol{\beta}$ is $\hat{\boldsymbol{\beta}} = (\mathbf{X}^\prime \mathbf{X})^{-1} \mathbf{X}^\prime \mathbf{Y}$. What is the distribution of $\hat{\boldsymbol{\beta}}$? Show the calculations. \item Let $\widehat{\mathbf{Y}}=\mathbf{X}\hat{\boldsymbol{\beta}}$. What is the distribution of $\widehat{\mathbf{Y}}$? Show the calculations. \item Let the vector of residuals $\mathbf{e}= (\mathbf{Y}-\widehat{\mathbf{Y}})$. What is the distribution of $\mathbf{e}$? Show the calculations. Simplify both the expected value (which is zero) and the covariance matrix. \end{enumerate} \pagebreak \item Show that if $\mathbf{X} \sim N_p(\boldsymbol{\mu},\boldsymbol{\Sigma})$, $Y = (\mathbf{X}-\boldsymbol{\mu})^\prime \boldsymbol{\Sigma}^{-1}(\mathbf{X}-\boldsymbol{\mu})$ has a chi-square distribution with $p$ degrees of freedom. \item Let $X_1, \ldots, X_n$ be a random sample from a $N(\mu,\sigma^2)$ distribution. Show $Cov(\overline{X},(X_j-\overline{X}))=0$ for $j=1, \ldots, n$. This is the key to showing $\overline{X}$ and $S^2$ independent, a fact you may use without proof in the next problem. \item Recall that the chi-squared distribution with $\nu$ degrees of freedom is just Gamma with $\alpha=\frac{\nu}{2}$ and $\beta=2$. So if $X\sim\chi^2(\nu)$, it has moment-generating function $M_X(t) = (1-2t)^{-\nu/2}$. \begin{enumerate} \item Let $Y=X_1+X_2$, where $X_1$ and $X_2$ are independent, $X_1\sim\chi^2(\nu_1)$ and $Y\sim\chi^2(\nu_1+\nu_2)$, where $\nu_1$ and $\nu_2$ are both positive. Show $X_2\sim\chi^2(\nu_2)$. \item Let $X_1, \ldots, X_n$ be random sample from a $N(\mu,\sigma^2)$ distribution. Show \begin{displaymath} \frac{(n-1)S^2}{\sigma^2} \sim \chi^2(n-1). \end{displaymath} Hint: $\sum_{i=1}^n\left(X_i-\mu \right)^2 = \sum_{i=1}^n\left(X_i-\overline{X} + \overline{X} - \mu \right)^2 = \ldots$ \end{enumerate} % chisq work set.seed(9999); n = 10; sigma=4 x = rnorm(n,1000,sigma) xbar = mean(x); xbar S = round(sqrt(var(x)),2); S Y = (n-1)*S^2/9; Y pval = 1-pchisq(Y,n-1); pval > set.seed(9999); n = 10; sigma=4 > x = rnorm(n,1000,sigma) > xbar = mean(x); xbar [1] 1002.687 > S = round(sqrt(var(x)),2); S [1] 4.51 > Y = (n-1)*S^2/9; Y [1] 20.3401 > pval = 1-pchisq(Y,n-1); pval [1] 0.01592569 % My q19 solution # 19d) n=10; S = 4.5; sigsq0 = 3^2 W = (n-1)*S^2/sigsq0; W pval = 1-pchisq(W,n-1); pval # 19f) alpha = 0.05; n=10; S = 4.5 a = qchisq(alpha/2,n-1); a # 1-alpha/2 above it b = qchisq(1-alpha/2,n-1); b # alpha/2 above it lower = sqrt((n-1)*S^2/b); lower upper = sqrt((n-1)*S^2/a); upper