\documentclass[11pt]{article} %\usepackage{amsbsy} % for \boldsymbol and \pmb \usepackage{graphicx} % To include pdf files! \usepackage{amsmath} \usepackage{amsbsy} \usepackage{amsfonts} \usepackage[colorlinks=true, pdfstartview=FitV, linkcolor=blue, citecolor=blue, urlcolor=blue]{hyperref} % For links \usepackage{fullpage} %\pagestyle{empty} % No page numbers \begin{document} %\enlargethispage*{1000 pt} \begin{center} {\Large \textbf{STA 2101/442 Assignment 1 (Review)}}\footnote{This assignment was prepared by \href{http://www.utstat.toronto.edu/~brunner}{Jerry Brunner}, Department of Statistics, University of Toronto. It is licensed under a \href{http://creativecommons.org/licenses/by-sa/3.0/deed.en_US} {Creative Commons Attribution - ShareAlike 3.0 Unported License}. Use any part of it as you like and share the result freely. The \LaTeX~source code is available from the course website: \href{http://www.utstat.toronto.edu/~brunner/oldclass/appliedf18} {\texttt{http://www.utstat.toronto.edu/$^\sim$brunner/oldclass/appliedf18}}} \vspace{1 mm} \end{center} \noindent The questions on this assignment are not to be handed in. They are practice for the midterm test and final exam. %quiz on Friday September 15th, and are not to be handed in. For the linear algebra part starting with Question~\ref{firstmat}, there is an excellent review in Chapter Two of Renscher and Schaalje's \emph{Linear models in statistics}. The chapter has more material than you need for this course. Note they use $\mathbf{A}^\prime$ for the transpose, while in this course we'll use $\mathbf{A}^\top$. \begin{enumerate} \item In a political poll, a random sample of $n$ registered voters are to indicate which of two candidates they prefer. State a reasonable model for these data, in which the population proportion of registered voters favouring Candidate $A$ is denoted by $\theta$. Denote the observations $Y_1, \ldots, Y_n$. \item A medical researcher conducts a study using twenty-seven litters of cancer-prone mice. Two members are randomly selected from each litter, and all mice are subjected to daily doses of cigarette smoke. For each pair of mice, one is randomly assigned to Drug A and one to Drug B. Time (in weeks) until the first clinical sign of cancer is recorded. \begin{enumerate} \item State a reasonable model for these data. Remember, a statistical model is a set of assertions that partly specify the probability distribution of the observable data. For simplicity, you may assume that the study continues until all the mice get cancer, and that log time until cancer has a normal distribution. \item What is the parameter space for your model? \end{enumerate} \item Suppose that volunteer patients undergoing elective surgery at a large hospital are randomly assigned to one of three different pain killing drugs, and one week after surgery they rate the amount of pain they have experienced on a scale from zero (no pain) to 100 (extreme pain). \begin{enumerate} \item State a reasonable model for these data. For simplicity, you may assume normality. \item What is the parameter space? \end{enumerate} \item Let $X_1, \ldots, X_n$ be a random sample (meaning independent and identically distributed) from a distribution with density $f(x) = \frac{\theta}{x^{\theta+1}}$ for $x>1$, where $\theta>0$. \begin{enumerate} \item Find the maximum likelihood estimator of $\theta$. Show your work. The answer is a formula involving $X_1, \ldots, X_n$. \item Suppose you observe these data: \texttt{1.37, 2.89, 1.52, 1.77, 1.04, 2.71, 1.19, 1.13, 15.66, 1.43}. Calculate the maximum likelihood estimate. My answer is 1.469102. \end{enumerate} % See 2014 for good quiz questions, cut out of the 2016 assignment. % \pagebreak %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \item Label each statement below True or False. Write ``T" or ``F" beside each statement. Assume the $\alpha=0.05$ significance level. \begin{enumerate} \item \underline{\hspace{10mm}} The $p$-value is the probability that the null hypothesis is true. % F \item \underline{\hspace{10mm}} The $p$-value is the probability that the null hypothesis is false. % F \item \underline{\hspace{10mm}} In a study comparing a new drug to the current standard treatment, the null hypothesis is rejected. We conclude that the new drug is ineffective. % F \item \underline{\hspace{10mm}} If $p > .05$ we reject the null hypothesis at the .05 level. % F \item \underline{\hspace{10mm}} If $p < .05$ we reject the null hypothesis at the .05 level. % T \item \underline{\hspace{10mm}} The greater the $p$-value, the stronger the evidence against the null hypothesis. % F \item \underline{\hspace{10mm}} In a study comparing a new drug to the current standard treatment, $p > .05$. We conclude that the new drug and the existing treatment are not equally effective. %F \item \underline{\hspace{10mm}} The 95\% confidence interval for $\beta_3$ is from $-0.26$ to $3.12$. This means $P\{-0.26 < \beta_3 < 3.12\} = 0.95$. % F \end{enumerate} \item \label{t} Let $Y_1, \ldots, Y_n$ be a random sample from a normal distribution with mean $\mu$ and variance $\sigma^2$, so that $T = \frac{\sqrt{n}(\overline{Y}-\mu)}{S} \sim t(n-1)$. This is something you don't need to prove, for now. \begin{enumerate} \item Derive a $(1-\alpha)100\%$ confidence interval for $\mu$. ``Derive" means show all the high school algebra. Use the symbol $t_{\alpha/2}$ for the number satisfying $Pr(T>t_{\alpha/2})= \alpha/2$. \item \label{ci} A random sample with $n=23$ yields $\overline{Y} = 2.57$ and a sample variance of $S^2=5.85$. Using the critical value $t_{0.025}=2.07$, give a 95\% confidence interval for $\mu$. The answer is a pair of numbers. \item Test $H_0: \mu=3$ at $\alpha=0.05$. \begin{enumerate} \item Give the value of the $T$ statistic. The answer is a number. \item State whether you reject $H_0$, Yes or No. \item Can you conclude that $\mu$ is different from 3? Answer Yes or No. \item If the answer is Yes, state whether $\mu>3$ or $\mu<3$. Pick one. \end{enumerate} \item Show that using a $t$-test, $H_0:\mu=\mu_0$ is rejected at significance level $\alpha$ if and only the $(1-\alpha)100\%$ confidence interval for $\mu$ does not include $\mu_0$. The problem is easier if you start by writing the set of $T$ values for which $H_0$ is \emph{not} rejected. \item In Question~\ref{ci}, does this mean $Pr\{1.53<\mu<3.61\}=0.95$? Answer Yes or No and briefly explain. \end{enumerate} \item Let $Y_1, \ldots, Y_n$ be a random sample from a distribution with mean $\mu$ and standard deviation $\sigma$. \begin{enumerate} \item Show that the sample variance $S^2=\frac{\sum_{i=1}^n(Y_i-\overline{Y})^2}{n-1}$ is an unbiased estimator of $\sigma^2$. \item Denote the sample standard deviation by $S = \sqrt{S^2}$. Assume that the data come from a continuous distribution, so that $Var(S) > 0$. Using this fact, show that $S$ is a \emph{biased} estimator of $\sigma$. \end{enumerate} \pagebreak %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \item In the \emph{centered} linear regression model, sample means are subtracted from the explanatory variables, so that values above average are positive and values below average are negative. Here is a version with one explanatory variable. For $i=1, \ldots, n$, let $y_i = \beta_0 + \beta_1(x_i-\overline{x}) + \epsilon_i$, where \begin{itemize} \item[] $\beta_0$ and $\beta_1$ are unknown constants (parameters). \item[] $x_i$ are known, observed constants. \item[] $\epsilon_1, \ldots, \epsilon_n$ are unobservable random variables with $E(\epsilon_i)=0$, $Var(\epsilon_i)=\sigma^2$ and $Cov(\epsilon_i,\epsilon_j)=0$ for $i \neq j$. \item[] $\sigma^2$ is an unknown constant (parameter). \item[] $y_1, \ldots, y_n$ are observable random variables. \end{itemize} \begin{enumerate} \item What is $E(y_i)$? $Var(y_i)$? \item Prove that $Cov(y_i,y_j)=0$. Use the definition \\ $Cov(U,V) = E\{(U-E(U))(V-E(V)) \}$. \item If $\epsilon_i$ and $\epsilon_j$ are independent (not just uncorrelated), then so are $y_i$ and $y_j$, because functions of independent random variables are independent. Proving this in full generality requires advanced definitions, but in this case the functions are so simple that we can get away with an elementary definition. Let $X_1$ and $X_2$ be independent random variables, meaning $P\{X_1 \leq x_1, X_2 \leq x_2 \} = P\{X_1 \leq x_1\} P\{X_2 \leq x_2\}$ for all real $x_1$ and $x_2$. Let $Y_1=X_1+a$ and $Y_2=X_2+b$, where $a$ and $b$ are constants. Prove that $Y_1$ and $Y_2$ are independent. \item In \emph{least squares estimation}, we observe random variables $y_1, \ldots, y_n$ whose distributions depend on a parameter $\theta$, which could be a vector. To estimate $\theta$, write the expected value of $y_i$ as a function of $\theta$, say $E_\theta(y_i)$, and then estimate $\theta$ by the value that gets the observed data values as close as possible to their expected values. To do this, minimize \begin{displaymath} Q = \sum_{i=1}^n\left(y_i-E_\theta(y_i)\right)^2 . \end{displaymath} The value of $\theta$ that makes $Q$ as small as possible is the least squares estimate. Using this framework, find the least squares estimates of $\beta_0$ and $\beta_1$ for the centered regression model. The answer is a pair of formulas. Show your work. \item Because of the centering, it is possible to verify that the solution actually \emph{minimizes} the sum of squares $Q$, using only single-variable second derivative tests. Do this part too. \item How about a least squares estimate of $\sigma^2$? \item You know that the least squares estimators $\widehat{\beta}_0$ and $\widehat{\beta}_1$ must be unbiased, but show it by calculating their expected values for this particular case. \item Calculate $\widehat{\beta}_0$ and $\widehat{\beta}_1$ for the following data. Your answer is a pair of numbers. % \begin{center} ~~~~~ \begin{tabular}{c|ccccc} $x$ & 8 & 7 & 7 & 9 & 4 \\ \hline $y$ & 9 & 13 & 9 & 8 & 6 \end{tabular} % \end{center} ~~~~~ I get $\widehat{\beta}_1 = \frac{1}{2}$. \pagebreak %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \item Going back to the general setting (not just the numerical example with $n=5$), suppose the $\epsilon_i$ are normally distributed. \begin{enumerate} \item What is the distribution of $y_i$? \item Write the log likelihood function. \item Obtain the maximum likelihood estimates of $\beta_0$ and $\beta_1$; don't bother with $\sigma^2$. The answer is a pair of formulas. \emph{Don't do more work than you have to!} As soon as you realize that you have already solved this problem, stop and write down the answer. \end{enumerate} \item Still for this centered model with a single explanatory variable, suppose we centered the $y_i$ values too. In this case what is the least squares estimate of $\beta_0$? Show your work. \end{enumerate} \item Consider the centered \emph{multiple} regression model \begin{displaymath} y_i = \beta_0 + \beta_1 (x_{i,1}-\overline{x}_1) + \cdots + \beta_{p-1} (x_{i,p-1}-\overline{x}_{p-1}) + \epsilon_i \end{displaymath} with the usual details. \begin{enumerate} \item What is $E_{\boldsymbol{\beta}}(y_i)$? \item What is the least squares estimate of $\beta_0$? Show your work. \item For an ordinary uncentered regression model, what is the height of the least squares plane at the point where all $x$ variables are equal to their sample mean values? \end{enumerate} \item Suppose that volunteer patients undergoing elective surgery at a large hospital are randomly assigned to one of three different pain killing drugs, and one week after surgery they rate the amount of pain they have experienced on a scale from zero (no pain) to 100 (extreme pain). Write a multiple regression model for these data; specify how the explanatory variables are defined. \item Let \begin{tabular}{ccc} $\mathbf{A} = \left( \begin{array}{c c} 1 & 2 \\ 2 & 4 \end{array} \right) $ & $\mathbf{B} = \left( \begin{array}{c c} 0 & 2 \\ 2 & 1 \end{array} \right) $ & $\mathbf{C} = \left( \begin{array}{c c} 2 & 0 \\ 1 & 2 \end{array} \right) $ \end{tabular} \begin{enumerate} \item Calculate $\mathbf{AB}$ and $\mathbf{AC}$ \item Do we have $\mathbf{AB} = \mathbf{AC}$? Answer Yes or No. \item Prove $\mathbf{B} = \mathbf{C}$. Show your work. \end{enumerate} \item Let $\mathbf{X}$ be an $n$ by $p$ matrix with $n \neq p$. Why is it incorrect to say that $(\mathbf{X^\top X})^{-1}= \mathbf{X}^{-1}\mathbf{X}^{\top -1}$? \item \label{ss} Let $\mathbf{a}$ be an $n \times 1$ matrix of real constants. How do you know $\mathbf{a}^\top\mathbf{a}\geq 0$? \item The $p \times p$ matrix $\boldsymbol{\Sigma}$ is said to be \emph{positive definite} if $\mathbf{a}^\top \boldsymbol{\Sigma} \mathbf{a} > 0$ for all $p \times 1$ vectors $\mathbf{a} \neq \mathbf{0}$. Show that the eigenvalues of a positive definite matrix are all strictly positive. Hint: start with the definition of an eigenvalue and the corresponding eigenvalue: $\boldsymbol{\Sigma}\mathbf{v} = \lambda \mathbf{v}$. Eigenvectors are typically scaled to have length one, so you may assume $\mathbf{v}^\top \mathbf{v} = 1$. \pagebreak %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \item Recall the \emph{spectral decomposition} of a symmetric matrix (for example, a variance-covariance matrix). Any such matrix $\boldsymbol{\Sigma}$ can be written as $\boldsymbol{\Sigma} = \mathbf{P} \boldsymbol{\Lambda} \mathbf{P}^\top$, where $\mathbf{P}$ is a matrix whose columns are the (orthonormal) eigenvectors of $\boldsymbol{\Sigma}$, $\boldsymbol{\Lambda}$ is a diagonal matrix of the corresponding eigenvalues, and $\mathbf{P}^\top\mathbf{P} =~\mathbf{P}\mathbf{P}^\top =~\mathbf{I}$. If $\boldsymbol{\Sigma}$ is real, the eigenvalues are real as well. \begin{enumerate} \item Let $\boldsymbol{\Sigma}$ be a square symmetric matrix with eigenvalues that are all strictly positive. \begin{enumerate} \item What is $\boldsymbol{\Lambda}^{-1}$? \item Show $\boldsymbol{\Sigma}^{-1} = \mathbf{P} \boldsymbol{\Lambda}^{-1} \mathbf{P}^\top$ \end{enumerate} \item Let $\boldsymbol{\Sigma}$ be a square symmetric matrix, and this time the eigenvalues are non-negative. \begin{enumerate} \item What do you think $\boldsymbol{\Lambda}^{1/2}$ might be? \item Define $\boldsymbol{\Sigma}^{1/2}$ as $\mathbf{P} \boldsymbol{\Lambda}^{1/2} \mathbf{P}^\top$. Show $\boldsymbol{\Sigma}^{1/2}$ is symmetric. \item Show $\boldsymbol{\Sigma}^{1/2}\boldsymbol{\Sigma}^{1/2} = \boldsymbol{\Sigma}$, justifying the notation. \end{enumerate} \item Now return to the situation where the eigenvalues of the square symmetric matrix $\boldsymbol{\Sigma}$ are all strictly positive. Define $\boldsymbol{\Sigma}^{-1/2}$ as $\mathbf{P} \boldsymbol{\Lambda}^{-1/2} \mathbf{P}^\top$, where the elements of the diagonal matrix $\boldsymbol{\Lambda}^{-1/2}$ are the reciprocals of the corresponding elements of $\boldsymbol{\Lambda}^{1/2}$. \begin{enumerate} \item Show that the inverse of $\boldsymbol{\Sigma}^{1/2}$ is $\boldsymbol{\Sigma}^{-1/2}$, justifying the notation. \item Show $\boldsymbol{\Sigma}^{-1/2} \boldsymbol{\Sigma}^{-1/2} = \boldsymbol{\Sigma}^{-1}$. \end{enumerate} \item Let $\boldsymbol{\Sigma}$ be a symmetric, positive definite matrix. How do you know that $\boldsymbol{\Sigma}^{-1}$ exists? \end{enumerate} % \pagebreak \item Let $\mathbf{X}$ be an $n \times p$ matrix of constants. The idea is that $\mathbf{X}$ is the ``design matrix" in the linear model $\mathbf{y} = \mathbf{X}\boldsymbol{\beta} + \boldsymbol{\epsilon}$, so this problem is really about linear regression. \begin{enumerate} % \item Recall that $\mathbf{A}$ symmetric means $\mathbf{A=A^\top}$. Let $\mathbf{X}$ be an $n$ by $p$ matrix. Show that $\mathbf{X^\top X}$ is symmetric. \item Recall the definition of linear independence. The columns of $\mathbf{A}$ are said to be \emph{linearly dependent} if there exists a column vector $\mathbf{v} \neq \mathbf{0}$ with $\mathbf{Av} = \mathbf{0}$. If $\mathbf{Av} = \mathbf{0}$ implies $\mathbf{v} = \mathbf{0}$, the columns of $\mathbf{A}$ are said to be linearly \emph{independent}. Show that if the columns of $\mathbf{X}$ are linearly independent, then $\mathbf{X}^\top\mathbf{X}$ is positive definite. \item Show that if $\mathbf{X}^\top\mathbf{X}$ is positive definite then $(\mathbf{X}^\top\mathbf{X})^{-1}$ exists. \item Show that if $(\mathbf{X}^\top\mathbf{X})^{-1}$ exists then the columns of $\mathbf{X}$ are linearly independent. \end{enumerate} This is a good problem because it establishes that the least squares estimator $\widehat{\boldsymbol{\beta}} = (\mathbf{X}^\top\mathbf{X})^{-1}\mathbf{X}^\top\mathbf{y}$ exists if and only if the columns of $\mathbf{X}$ are linearly independent. \end{enumerate} \end{document} # R for regression data x = c(8, 7, 7, 9, 4); dx = x-7 y = c(9, 13, 9, 8, 6); dy = y-9 cbind(dx,dy) lm(y~dx); sum(dx*dy)/sum(dx^2) # beta0hat = 9, beta1hat = 0.5 # R for two-sample t-test x = c(6,10,8,12); mean(x) y = c(7, 4, 8, 7, 9); mean(y) t.test(x,y,var.equal=T) Two Sample t-test data: x and y t = 1.3528, df = 7, p-value = 0.2182 alternative hypothesis: true difference in means is not equal to 0 95 percent confidence interval: -1.495899 5.495899 sample estimates: mean of x mean of y 9 7 df = 1:10 critvalue = qt(0.975,df) round(rbind(df,critvalue),3) # Round the whole thing to 3 digits s2p = ((4-1)*var(x)+(5-1)*var(y))/7; s2p # 4.857143 se = sqrt(s2p*(1/4+1/5)); se # 1.478416 low = 2 - 2.365*se; low # -1.496454 high = 2 + 2.365*se; high # 5.496454 tstat = 2/se; tstat # 1.352799