\documentclass[12pt]{article} %\usepackage{amsbsy} % for \boldsymbol and \pmb \usepackage{graphicx} % To include pdf files! \usepackage{amsmath} \usepackage{amsbsy} \usepackage{amsfonts} \usepackage[colorlinks=true, pdfstartview=FitV, linkcolor=blue, citecolor=blue, urlcolor=blue]{hyperref} % For links \usepackage{fullpage} %\pagestyle{empty} % No page numbers \begin{document} %\enlargethispage*{1000 pt} \begin{center} {\Large \textbf{STA 2101/442 Assignment Three}}\footnote{Copyright information is at the end of the last page.} \vspace{1 mm} \end{center} \noindent The questions are just practice for the quiz, and are not to be handed in. Use R as necessary for Question~\ref{integration}, and \textbf{bring your printout to the quiz.} \vspace{1mm} \begin{enumerate} \item \label{inverseCDF} This is about how to simulate from a continuous univariate distribution. Let the random variable $X$ have a continuous distribution with density $f_X(x)$ and cumulative distribution function $F_X(x)$. Suppose the cumulative distribution function is strictly increasing over the set of $x$ values where $0 < F_X(x) < 1$, so that $F_X(x)$ has an inverse. Let $U$ have a uniform distribution over the interval $(0,1)$. Show that the random variable $Y = F_X^{-1}(U)$ has the same distribution as $X$. Hint: You will need an expression for $F_U(u) = Pr\{U \leq u\}$, where $0 \leq u \leq 1$. % \item Suppose $\sqrt{n}(T_n-\theta) \stackrel{d}{\rightarrow} T$. Show $T_n \stackrel{p}{\rightarrow} \theta$. Please use Slutsky lemmas rather than definitions. \item Let $X_1 , \ldots, X_n$ be a random sample from a Binomial distribution with parameters $3$ and $\theta$. That is, \begin{displaymath} P(X_i = x_i) = \binom{3}{x_i} \theta^{x_i} (1-\theta)^{3-x_i}, \end{displaymath} for $x_i=0,1,2,3$. Find the maximum likelihood estimator of $\theta$, and show that it is strongly consistent. \item Let $X_1 , \ldots, X_n$ be a random sample from a continuous distribution with density \begin{displaymath} f(x;\tau) = \frac{\tau^{1/2}}{\sqrt{2\pi}} \, e^{-\frac{\tau x^2}{2}}, \end{displaymath} where the parameter $\tau>0$. Let \begin{displaymath} \widehat{\tau} = \frac{n}{\sum_{i=1}^n X_i^2}. \end{displaymath} Is $ \widehat{\tau}$ a consistent estimator of $\tau$? Answer Yes or No and prove your answer. Hint: You can just write down $E(X^2)$ by inspection. This is a very familiar distribution. \item Let $X_1, \ldots, X_n$ be a random sample from a distribution with mean $\mu$. Show that $T_n = \frac{1}{n+400}\sum_{i=1}^n X_i$ is a strongly consistent estimator of $\mu$. % That could be a quiz Q \item Let $X_1, \ldots, X_n$ be a random sample from a distribution with mean $\mu$ and variance $\sigma^2$. Prove that the sample variance $S^2=\frac{\sum_{i=1}^n(X_i-\overline{X})^2}{n-1}$ is a strongly consistent estimator of $\sigma^2$. \item \label{randiv} Independently for $i = 1 , \ldots, n$, let \begin{displaymath} Y_i = \beta X _i + \epsilon_i, \end{displaymath} where $E(X_i)=E(\epsilon_i)=0$, $Var(X_i)=\sigma^2_X$, $Var(\epsilon_i)=\sigma^2_\epsilon$, and $\epsilon_i$ is independent of $X_i$. Let \begin{displaymath} \widehat{\beta} = \frac{\sum_{i=1}^n X_i Y_i}{\sum_{i=1}^n X_i^2}. \end{displaymath} Is $ \widehat{\beta}$ a consistent estimator of $\beta$? Answer Yes or No and prove your answer. \item In this problem, you'll use (without proof) the \emph{variance rule}, which says that if $\theta$ is a real constant and $T_1, T_2, \ldots$ is a sequence of random variables with \begin{displaymath} \lim_{n \rightarrow \infty} E(T_n) = \theta \mbox{ and } \lim_{n \rightarrow \infty} Var(T_n) = 0, \end{displaymath} then $ T_n \stackrel{P}{\rightarrow} \theta$. In Problem~\ref{randiv}, the independent variables are random. Here they are fixed constants, which is more standard (though a little strange if you think about it). Accordingly, let \begin{displaymath} Y_i = \beta x_i + \epsilon_i \end{displaymath} for $i=1, \ldots, n$, where $\epsilon_1, \ldots, \epsilon_n$ are a random sample from a distribution with expected value zero and variance $\sigma^2$, and $\beta$ and $\sigma^2$ are unknown constants. \begin{enumerate} \item What is $E(Y_i)$? \item What is $Var(Y_i)$? \item Find the Least Squares estimate of $\beta$ by minimizing $Q=\sum_{i=1}^n(Y_i-\beta x_i)^2$ over all values of $\beta$. Let $\widehat{\beta}_n$ denote the point at which $Q$ is minimal. \item Is $\widehat{\beta}_n$ unbiased? Answer Yes or No and show your work. \item Give a nice simple condition on the $x_i$ values that guarantees $\widehat{\beta}_n$ will be consistent. Show your work. Remember, in this model the $x_i$ are fixed constants, not random variables. \item Let $\widehat{\beta}_{2,n} = \frac{\overline{Y}_n}{\overline{x}_n}$. Is $\widehat{\beta}_{2,n}$ unbiased? Consistent? Answer Yes or No to each question and show your work. Do you need a condition on the $x_i$ values ? \item Prove that $\widehat{\beta}_n$ is a more accurate estimator than $\widehat{\beta}_{2,n}$ in the sense that it has smaller variance. Hint: The sample variance of the independent variable values cannot be negative. \end{enumerate} % \item Let $X$ be a random variable with expected value $\mu$ and variance $\sigma^2$. Show $\frac{X}{n} \stackrel{p}{\rightarrow} 0$. \item Let $X_1 , \ldots, X_n$ be a random sample from a Gamma distribution with $\alpha=\beta=\theta>0$. That is, the density is \begin{displaymath} f(x;\theta) = \frac{1}{\theta^\theta \Gamma(\theta)} e^{-x/\theta} x^{\theta-1}, \end{displaymath} for $x>0$. Let $\widehat{\theta} = \overline{X}_n$. Is $ \widehat{\theta}$ a consistent estimator of $\theta$? Answer Yes or No and prove your answer. \item The ordinary univariate Central Limit Theorem says that if $X_1, \ldots, X_n$ are a random sample (independent and identically distributed) from a distribution with expected value $\mu$ and variance $\sigma^2$, then \begin{displaymath} Z_n^{(1)} = \frac{\sqrt{n}(\overline{X}_n-\mu)}{\sigma} \stackrel{d}{\rightarrow} Z \sim N(0,1). \end{displaymath} An application of some Slutsky theorems (see lecture slides) shows that also, \begin{displaymath} Z_n^{(2)} = \frac{\sqrt{n}(\overline{X}_n-\mu)}{\widehat{\sigma}_n} \stackrel{d}{\rightarrow} Z \sim N(0,1), \end{displaymath} where $\widehat{\sigma}_n$ is any consistent estimator of $\sigma$. For this problem, suppose that $X_1, \ldots, X_n$ are Bernoulli($\theta$). \begin{enumerate} \item What is $\mu$? \item What is $\sigma^2$? \item Re-write $Z_n^{(1)}$ for the Bernoulli exanple. \item What about $ Z_n = \frac{\sqrt{n}(\overline{X}_n-\theta)} {\sqrt{\overline{X}_n(1-\overline{X}_n)}}$? Does $Z_n$ converge in distribution to a standard normal? Why or why not? \item What about the $t$ statistic $T_n = \frac{\sqrt{n}(\overline{X}_n-\mu)}{S_n}$, where $S_n$ is the sample standard deviation? Does $T_n$ converge in distribution to a standard normal? Why or why not? \end{enumerate} %%%%%%%%%%%% Univariate Delta Method problems removed - see 2013 %%%%%%%%%% %%%%%%%%% Random Vectors %%%%%%%%%%%%%% \item If the $p \times 1$ random vector $\mathbf{X}$ has variance-covariance matrix $\mathbf{\Sigma}$ and $\mathbf{A}$ is an $m \times p$ matrix of constants, prove that the variance-covariance matrix of $\mathbf{AX}$ is $\mathbf{A \Sigma A}^\prime$. Start with the definition of a variance-covariance matrix: \begin{displaymath} V(\mathbf{Z})=E(\mathbf{Z}-\boldsymbol{\mu}_z)(\mathbf{Z}-\boldsymbol{\mu}_z)^\prime. \end{displaymath} \item If the $p \times 1$ random vector $\mathbf{X}$ has mean $\boldsymbol{\mu}$ and variance-covariance matrix $\mathbf{\Sigma}$, show $\mathbf{\Sigma} = E(\mathbf{XX}^\prime) - \boldsymbol{\mu \mu}^\prime$. \item Let the $p \times 1$ random vector $\mathbf{X}$ have mean $\boldsymbol{\mu}$ and variance-covariance matrix $\mathbf{\Sigma}$, and let $\mathbf{c}$ be a $p \times 1$ vector of constants. Find $V(\mathbf{X}+\mathbf{c})$. Show your work. \item Let $\mathbf{X}$ be a $p \times 1$ random vector with mean $\boldsymbol{\mu}_x$ and variance-covariance matrix $\mathbf{\Sigma}_x$, and let $\mathbf{Y}$ be a $q \times 1$ random vector with mean $\boldsymbol{\mu}_y$ and variance-covariance matrix $\mathbf{\Sigma}_y$. Recall that $C(\mathbf{X},\mathbf{Y})$ is the $p \times q$ matrix $ C(\mathbf{X},\mathbf{Y}) = E\left((\mathbf{X}-\boldsymbol{\mu}_x)(\mathbf{Y}-\boldsymbol{\mu}_y)^\prime\right)$. \begin{enumerate} \item What is the $(i,j)$ element of $C(\mathbf{X},\mathbf{Y})$? \item For this item, $p=q$. Find an expression for $V(\mathbf{X}+\mathbf{Y})$ in terms of $\mathbf{\Sigma}_x$, $\mathbf{\Sigma}_y$ and $C(\mathbf{X},\mathbf{Y})$. Show your work. \item Simplify further for the special case where $Cov(X_i,Y_j)=0$ for all $i$ and $j$. \item Let $\mathbf{c}$ be a $p \times 1$ vector of constants and $\mathbf{d}$ be a $q \times 1$ vector of constants. Find $ C(\mathbf{X}+\mathbf{c}, \mathbf{Y}+\mathbf{d})$. Show your work. \end{enumerate} %%%%%%%%% End of Random Matrices %%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%% MGF %%%%%%%%%%%%%%%%%%%%%%%%% \item \label{mgfstart} Denote the moment-generating function of a random variable $Y$ by $M_Y(t)$. The moment-generating function is defined by $M_Y(t) = E(e^{Yt})$. Recall that the moment-generating function corresponds uniquely to the probability distribution. \begin{enumerate} \item Let $a$ be a constant. Prove that $M_{aX}(t) = M_X(at)$. \item Prove that $M_{X+a}(t) = e^{at}M_X(t)$. \item Let $X_1$ and $X_2$ be \emph{independent} random variables. Prove that \begin{displaymath} M_{X_1+X_2}(t) = M_{X_1}(t) \, M_{X_1}(t). \end{displaymath} For convenience, you may assume that $X_1$ and $X_2$ are continuous, so you will integrate. This result extends to $M_{\sum_{i=1}^n X_i}(t) = \prod_{i=1}^n M_{X_i}(t)$, but you don't have to show it. (You could use induction.) \end{enumerate} \item Recall that if $X\sim N(\mu,\sigma^2)$, it has moment-generating function $M_X(t) = e^{\mu t + \frac{1}{2}\sigma^2t^2}$. \begin{enumerate} \item Let $X\sim N(\mu,\sigma^2)$ and $Y=aX+b$, where $a$ and $b$ are constants. Find the distribution of $Y$. Show your work. \item Let $X\sim N(\mu,\sigma^2)$ and $Z = \frac{X-\mu}{\sigma}$. Find the distribution of $Z$. \item Let $X_1, \ldots, X_n$ be random sample from a $N(\mu,\sigma^2)$ distribution. Find the distribution of $Y = \sum_{i=1}^nX_i$. \item Let $X_1, \ldots, X_n$ be random sample from a $N(\mu,\sigma^2)$ distribution. Find the distribution of the sample mean $\overline{X}$. \item Let $X_1, \ldots, X_n$ be random sample from a $N(\mu,\sigma^2)$ distribution. Find the distribution of $Z = \frac{\sqrt{n}(\overline{X}-\mu)}{\sigma}$. \end{enumerate} % \newpage \item A Chi-squared random variable $X$ with parameter $\nu>0$ has moment-generating function $M_X(t) = (1-2t)^{-\nu/2}$. \begin{enumerate} \item Let $X_1, \ldots, X_n$ be independent random variables with $X_i \sim \chi^2(\nu_i)$ for $i=1, \ldots, n$. Find the distribution of $Y = \sum_{i=1}^n X_i$. \item Let $Z \sim N(0,1)$. Find the distribution of $Y=Z^2$. For this one, you need to integrate. Recall that the density of a normal random variable is $f(x) = \frac{1}{\sigma\sqrt{2\pi}}e^{-\frac{(x-\mu)^2}{2\sigma^2}}$. \item Let $X_1, \ldots, X_n$ be random sample from a $N(\mu,\sigma^2)$ distribution. Find the distribution of $Y = \frac{1}{\sigma^2} \sum_{i=1}^n\left(X_i-\mu \right)^2$. \item Let $Y=X_1+X_2$, where $X_1$ and $X_2$ are independent, $X_1\sim\chi^2(\nu_1)$ and $Y\sim\chi^2(\nu_1+\nu_2)$, where $\nu_1$ and $\nu_2$ are both positive. Show $X_2\sim\chi^2(\nu_2)$. \item Let $X_1, \ldots, X_n$ be random sample from a $N(\mu,\sigma^2)$ distribution. Show \begin{displaymath} \frac{(n-1)S^2}{\sigma^2} \sim \chi^2(n-1), \end{displaymath} where $S^2 = \frac{\sum_{i=1}^n\left(X_i-\overline{X} \right)^2 }{n-1}$. Hint: $\sum_{i=1}^n\left(X_i-\mu \right)^2 = \sum_{i=1}^n\left(X_i-\overline{X} + \overline{X} - \mu \right)^2 = \ldots$ You may use the independence of $\overline{X}$ and $S^2$ without proof, for now. \end{enumerate} \item Recall the definition of the $t$ distribution. If $Z\sim N(0,1)$, $W \sim \chi^2(\nu)$ and $Z$ and $W$ are independent, then $T = \frac{Z}{\sqrt{W/\nu}}$ is said to have a $t$ distribution with $\nu$ degrees of freedom, and we write $T \sim t(\nu)$. As in the last question, let $X_1, \ldots, X_n$ be random sample from a $N(\mu,\sigma^2)$ distribution. Show that $T = \frac{\sqrt{n}(\overline{X}-\mu)}{S} \sim t(n-1)$. Once again, you may use the independence of $\overline{X}$ and $S^2$ without proof for now. % \newpage \item \label{integration} Here is an integral you cannot do in closed form, and numerical integration is challenging. For example, R's \texttt{integrate} function fails. \begin{displaymath} \int_0^{1/2} e^{\cos(1/x)} \, dx \end{displaymath} Using R, approximate the integral with Monte Carlo integration, and give a 99\% confidence interval for your answer. You need to produce 3 numbers: the estimate, a lower confidence limit and an upper confidence limit. \end{enumerate} \vspace{50mm} \noindent \begin{center}\begin{tabular}{l} \hspace{6in} \\ \hline \end{tabular}\end{center} This assignment was prepared by \href{http://www.utstat.toronto.edu/~brunner}{Jerry Brunner}, Department of Statistics, University of Toronto. It is licensed under a \href{http://creativecommons.org/licenses/by-sa/3.0/deed.en_US} {Creative Commons Attribution - ShareAlike 3.0 Unported License}. Use any part of it as you like and share the result freely. The \LaTeX~source code is available from the course website: \href{http://www.utstat.toronto.edu/~brunner/oldclass/appliedf16} {\texttt{http://www.utstat.toronto.edu/$^\sim$brunner/oldclass/appliedf16}} \end{document} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%