\documentclass[11pt]{article} %\usepackage{amsbsy} % for \boldsymbol and \pmb \usepackage{amsmath} \usepackage[colorlinks=true, pdfstartview=FitV, linkcolor=blue, citecolor=blue, urlcolor=blue]{hyperref} % \pagestyle{empty} % No page numbers \oddsidemargin=0in % Good for US Letter paper \evensidemargin=0in \textwidth=6.3in \topmargin=-0.5in \headheight=0.2in \headsep=0.5in \textheight=9.0in \begin{document} %\enlargethispage*{1000 pt} \begin{center} {\Large \textbf{STA 312f22 Assignment One}}\footnote{This assignment was prepared by \href{http://www.utstat.toronto.edu/~brunner}{Jerry Brunner}, Department of Statistics, University of Toronto. It is licensed under a \href{http://creativecommons.org/licenses/by-sa/3.0/deed.en_US} {Creative Commons Attribution - ShareAlike 3.0 Unported License}. Use any part of it as you like and share the result freely. The \LaTeX~source code is available from the course website: \href{http://www.utstat.toronto.edu/~brunner/oldclass/312f22} {\texttt{http://www.utstat.toronto.edu/$^\sim$brunner/oldclass/312f22}} } \vspace{1 mm} \end{center} \noindent Do this review assignment in preparation for the quiz on Friday, Sept. 23d. The problems are practice for the quiz, and are not to be handed in. The first part of this assignment is based on material that you probably know already. However, the notation used in Statistics can be an obstacle for some students when they are dming maximum likelihood problems, so we will review the following basic rules. \begin{itemize} \item The distributive law: $a(b+c)=ab+ac$. You may see this in a form like \begin{displaymath} \theta \sum_{i=1}^n x_i = \sum_{i=1}^n \theta x_i \end{displaymath} \item Power of a product is the product of powers: $(ab)^c = a^c \, b^c$. You may see this in a form like \begin{displaymath} \left(\prod_{i=1}^n x_i\right)^\alpha = \prod_{i=1}^n x_i^\alpha \end{displaymath} \item Multiplication is addition of exponents: $a^b a^c = a^{b+c}$. You may see this in a form like \begin{displaymath} \prod_{i=1}^n \theta e^{-\theta x_i} = \theta^n \exp(-\theta \sum_{i=1}^n x_i) \end{displaymath} \item Powering is multiplication of exponents: $(a^b)^c = a^{bc}$. You may see this in a form like \begin{displaymath} (e^{\mu t + \frac{1}{2}\sigma^2 t^2})^n = e^{n\mu t + \frac{1}{2}n\sigma^2 t^2} \end{displaymath} \item The log (that's the natural log, $\ln$ on many calculators) of a product is sum of logs: $\log(ab) = \log(a)+\log(b)$. You may see this in a form like \begin{displaymath} \log \prod_{i=1}^n x_i = \sum_{i=1}^n \log x_i \end{displaymath} \item The log of a power is the exponent times the log: $\log(a^b)=b\,\log(a)$. You may see this in a form like \begin{displaymath} \log(\theta^n) = n \log \theta \end{displaymath} \item The log is the inverse of the exponential function: $\log(e^a) = a$. You may see this in a form like \begin{displaymath} \log\left( \theta^n \exp(-\theta \sum_{i=1}^n x_i) \right) = n \log \theta - \theta \sum_{i=1}^n x_i \end{displaymath} \end{itemize} \pagebreak \begin{enumerate} \item Choose the correct answer. \begin{enumerate} \item $\prod_{i=1}^n e^{x_i}=$ \begin{enumerate} \item $\exp(\prod_{i=1}^n x_i)$ \item $e^{nx_i}$ \item $\exp(\sum_{i=1}^n x_i)$ % \end{enumerate} \item $\prod_{i=1}^n \lambda e^{-\lambda x_i}=$ \begin{enumerate} \item $\lambda e^{-\lambda^n x_i}$ \item $\lambda^n e^{-\lambda n x_i}$ \item $\lambda^n \exp(-\lambda \sum_{i=1}^n x_i)$ % \item $\lambda^n \exp(-n\lambda \sum_{i=1}^n x_i)$ \item $\lambda^n \exp(-\lambda^n \sum_{i=1}^n x_i)$ \end{enumerate} \item $\prod_{i=1}^n a_i^b=$ \begin{enumerate} \item $n a^b$ \item $a^{nb}$ \item $(\prod_{i=1}^n a_i)^b$ % \end{enumerate} \item $\prod_{i=1}^n a^{b_i}=$ \begin{enumerate} \item $n a^{b_i}$ \item $a^{n b_i}$ \item $\sum_{i=1}^n a^{b_i}$ \item {\Large$a^{\prod_{i=1}^n b_i}$} \item {\Large$a^{\sum_{i=1}^n b_i}$} % \end{enumerate} \item $\left( e^{\lambda(e^t-1)} \right)^n = $ \begin{enumerate} \item $n e^{\lambda(e^t-1)}$ \item $e^{n\lambda(e^t-1)}$ % \item $e^{\lambda(e^{nt}-1)}$ \item $e^{n\lambda(e^{t}-n)}$ \end{enumerate} \item $\left(\prod_{i=1}^n e^{-\lambda x_i}\right)^2=$ \begin{enumerate} \item $e^{-2n\lambda x_i}$ \item $e^{-2\lambda \sum_{i=1}^n x_i}$ % \item $2e^{-\lambda \sum_{i=1}^n x_i}$ \end{enumerate} \end{enumerate} \pagebreak \item True, or False? \begin{enumerate} \item $\sum_{i=1}^n \frac{1}{x_i} = \frac{1}{\sum_{i=1}^n x_i}$ % F \item $\prod_{i=1}^n \frac{1}{x_i} = \frac{1}{\prod_{i=1}^n x_i}$ % T \item $\frac{a}{b+c}=\frac{a}{b}+\frac{a}{c}$ % F \item $\log(a+b) = \log(a) + \log(b)$ % F \item $e^{a+b} = e^a + e^b$ % F \item $e^{a+b} = e^a e^b$ % T \item $e^{ab} = e^a e^b$ % F \item $\prod_{i=1}^n (x_i+y_i) = \prod_{i=1}^n x_i + \prod_{i=1}^n y_i$ % F \item $\log (\prod_{i=1}^n a_i^b) = b \sum_{i=1}^n \log(a_i)$ % T \item $\sum_{i=1}^n \prod_{j=1}^n a_j = n \prod_{j=1}^n a_j$ \item $\sum_{i=1}^n \prod_{j=1}^n a_i = \sum_{i=1}^n a_i^n$ % T \item $\sum_{i=1}^n \prod_{j=1}^n a_{i,j} = \prod_{j=1}^n \sum_{i=1}^n a_{i,j}$ % F \end{enumerate} \item Simplify. \begin{enumerate} \item $\log \prod_{i=1}^n \theta^{x_i} (1-\theta)^{1-{x_i}}$ % \item $\log \prod_{i=1}^n \binom{m}{{x_i}} \theta^{x_i} (1-\theta)^{m-x_i}$ \item $\log \prod_{i=1}^n \frac{e^{-\lambda}\lambda^{x_i}}{x_i!}$ \item $\log \prod_{i=1}^n \theta (1-\theta)^{x_i-1}$ \item $\log \prod_{i=1}^n \frac{1}{\theta} e^{-x_i/\theta}$ \item $\log \prod_{i=1}^n \frac{1}{\beta^\alpha \Gamma(\alpha)} e^{-x_i/\beta} x_i^{\alpha - 1}$ \item $\log \prod_{i=1}^n \frac{1}{2^{\nu/2}\Gamma(\nu/2)} e^{-x_i/2} x_i^{\nu/2 - 1}$ \item $\log \prod_{i=1}^n \frac{1}{\sigma \sqrt{2\pi}} e^{-\frac{(x_i-\mu)^2}{2 \sigma^2}}$ \item $\prod_{i=1}^n \frac{1}{\beta-\alpha} I(\alpha \leq x_i \leq \beta)$ (Express in terms of the minimum = $y_1$ and maximum = $y_n$. Note that $I$ is the \href{https://en.wikipedia.org/wiki/Indicator_function} {indicator function}.) \end{enumerate} \item For each of the following distributions, derive a general expression for the Maximum Likelihood Estimator (MLE). You don't have to do the second derivative test. Then use the data to calculate a numerical estimate. \begin{enumerate} \item $P(y|\pi)=\pi(1-\pi)^y$ for $y=0,1,\ldots$, where $0<\pi<1$. Data: \texttt{4, 0, 1, 0, 1, 3, 2, 16, 3, 0, 4, 3, 6, 16, 0, 0, 1, 1, 6, 10}. Answer: 0.2061856 % Geometric .25, thetahat = 1/(1+ybar) \item $P(y|\lambda)= \frac{e^{-\lambda}\lambda^y}{y!}$ for $y=0,1,\ldots$, where $\lambda>0$. Data: \texttt{7, 7, 6, 4, 2, 5, 2, 3, 7, 2}. Answer: 4.5 % Poisson Lambda=5, x-bar=4.5 \item $Pr\{Y=1\}=\pi$ and $Pr\{Y=0\}=1-\pi$, where $0<\pi<1$. Data: \texttt{1 1 1 1 0 1 0 1 0 0 1 0 1 0}. Answer: 0.5714286 % Bernoulli pi=0.50, MLE=ybar \end{enumerate} \item Read Chapter 1 (Introduction) in the textbook, and do Problems 1.1, 1.2, 1.3, 1.9, 1.10. %, 1.12, 1.16 and 1.18. In Problem 1.16, the experiment is viewed as generating a \emph{single} observation from a binomial distribution. % Deleted problem 1.7 because it was Russian roulette. % Problem 1.9 is EXACTLY the same as the main example in my introductory slides: 60-40. Maybe that's not a bad thing (or maybe I'm just being lazy because I don't want to re-do the slides). % Problem 1.12 is challenging, but it illustrates what can happen with zero cell frequencies. This example will be useful later. \newpage \item The discrete random variables $X$ and $Y$ have joint distribution \begin{center} \begin{tabular}{c|ccc} & $x=1$ & $x=2$ & $x=3$ \\ \hline $y=1$ & $3/12$ & $1/12$ & $3/12$ \\ $y=2$ & $1/12$ & $3/12$ & $1/12$ \\ \end{tabular} \end{center} \begin{enumerate} \item What is the marginal distribution of $X$? List the values with their probabilities. \item What is the marginal distribution of $Y$? List the values with their probabilities. \item What is the conditional distribution of $X$ given $Y=2$? List the values with their probabilities. \item Are $X$ and $Y$ independent? Answer Yes or No and show some work. \end{enumerate} \item Nothing is perfect, and that definitely applies to medical tests. Suppose a blood test is used to detect a certain kind of thyroid disease. The \emph{prevalence} of the disease is the probability that a randomly chosen person actually has the disease. Even with a perfect test, this could never be known exactly without testing the whole population. The \emph{sensitivity} of the test is a conditional probability. It is the probability that a person who actually has the disease will test positive. The \emph{specificity} of the test is another conditional probability. It is the probability that a person who does not have the disease will test negative. Suppose that the sensitivity of the test is 95\%, the specificity is 90\%, and the underlying rate of the disease is one percent. So it's a pretty good test for a rare condition. \begin{enumerate} \item What proportion of people in the population will test positive for the disease? The answer is a single number. Show your work. My answer is 0.1085, or 10.85\%. The moral of the story is that if you confuse prevalence with the probability of testing positive (a very natural mistake), this good test can make you think the prevalence of the disease is nearly eleven times as great as it actually is. \item What proportion of patents who test positive actually have the disease? My answer is $\frac{95}{1085}=0.088$. So, a bit over 91\% of the medical treatments (surgery, etc.) based on this good diagnostic test are unnecessary and maybe harmful. \end{enumerate} \item This problem establishes a result that will be used later in our course. It requires you to remember that the sum of two independent Poisson random variables also has a Poisson distribution. So, let the number of girls born on one day in Toronto have a Poisson distribution with parameter $\lambda_1$, and let let the number of boys have a Poisson distribution with parameter $\lambda_2$. These two random variables are independent, and the whole thing is based upon the reasonable assumption that boys and girls are being born according to independent Poisson processes -- see class notes. Anyway, \emph{given} that $n$ babies were born on a particular day, what is the probability distribution of the number of girls born on that day? It is a well-known distribution. Show all your work. Here are two hints. First, the word ``given" is a clue that you are being asked for a conditional probability. The second hint is that you should start by specifying the possible values of the number of girls born, given that a total of $n$ babies were born. This is the \emph{support} of the distribution for which you are being asked. \end{enumerate} \end{document}