\documentclass[12pt]{article} %\usepackage{amsbsy} % for \boldsymbol and \pmb \usepackage{graphicx} % To include pdf files! \usepackage{amsmath} \usepackage{amsbsy} \usepackage{amsfonts} \usepackage[colorlinks=true, pdfstartview=FitV, linkcolor=blue, citecolor=blue, urlcolor=blue]{hyperref} % For links \usepackage{fullpage} %\pagestyle{empty} % No page numbers \begin{document} %\enlargethispage*{1000 pt} \begin{center} {\Large \textbf{STA 2101/442 Assignment 1 (Mostly Review)}}\footnote{This assignment was prepared by \href{http://www.utstat.toronto.edu/~brunner}{Jerry Brunner}, Department of Statistics, University of Toronto. It is licensed under a \href{http://creativecommons.org/licenses/by-sa/3.0/deed.en_US} {Creative Commons Attribution - ShareAlike 3.0 Unported License}. Use any part of it as you like and share the result freely. The \LaTeX~source code is available from the course website: \href{http://www.utstat.toronto.edu/~brunner/oldclass/appliedf16} {\texttt{http://www.utstat.toronto.edu/$^\sim$brunner/oldclass/appliedf16}}} \vspace{1 mm} \end{center} \noindent Questions \ref{sat} and \ref{powervar} are to be done with R; please bring your printouts to the quiz and be prepared to hand them in if requested. Remember, the computer assignments in this course are \emph{not group projects}. % You are expected to do the work yourself. It is an academic offence to present someone else's work as your own, or to allow your work to be copied for this purpose. To repeat: the person who allows her/his work to be copied is equally guilty, and subject to disciplinary action by the university. The other questions are practice for the quiz on Friday September 30th, and are not to be handed in. For the linear algebra part starting with Question~\ref{firstmat}, there is an excellent review in Chapter Two of Renscher and Schaalje's \emph{Linear models in statistics}. The chapter has more material than you need for this course. Note they use $\mathbf{A}^\prime$ for the transpose, while in this course we'll use $\mathbf{A}^\top$. \begin{enumerate} % \item Let $Y_1, \ldots, Y_n$ be numbers, and $\overline{Y}=\frac{1}{n}\sum_{i=1}^nY_i$. Show that the sum of squares $Q_m = \sum_{i=1}^n(Y_i-m)^2$ is minimized when $m = \overline{Y}$. \item Let $X_1, \ldots, X_n$ be a random sample (meaning independent and identically distributed) from a distribution with density $f(x) = \frac{\theta}{x^{\theta+1}}$ for $x>1$, where $\theta>0$. \begin{enumerate} \item Find the maximum likelihood estimator of $\theta$. Show your work. The answer is a formula involving $X_1, \ldots, X_n$. \item Suppose you observe these data: \texttt{1.37, 2.89, 1.52, 1.77, 1.04, 2.71, 1.19, 1.13, 15.66, 1.43}. Calculate the maximum likelihood estimate. My answer is 1.469102. \end{enumerate} % See 2014 for good quiz questions, cut out of this assignment. \item Let $Y_1, \ldots, Y_n$ be a random sample from a distribution with mean $\mu$ and standard deviation $\sigma$. \begin{enumerate} \item Show that the sample variance $S^2=\frac{\sum_{i=1}^n(Y_i-\overline{Y})^2}{n-1}$ is an unbiased estimator of $\sigma^2$. \item Denote the sample standard deviation by $S = \sqrt{S^2}$. Assume that the data come from a continuous distribution, so it's easy to see that $Var(S) \neq 0$. Using this fact, show that $S$ is a \emph{biased} estimator of $\sigma$. \end{enumerate} \item \label{t} Let $Y_1, \ldots, Y_n$ be a random sample from a normal distribution with mean $\mu$ and variance $\sigma^2$, so that $T = \frac{\sqrt{n}(\overline{Y}-\mu)}{S} \sim t(n-1)$. This is something you don't need to prove, for now. \begin{enumerate} \item Derive a $(1-\alpha)100\%$ confidence interval for $\mu$. ``Derive" means show all the high school algebra. Use the symbol $t_{\alpha/2}$ for the number satisfying $Pr(T>t_{\alpha/2})= \alpha/2$. \item \label{ci} A random sample with $n=23$ yields $\overline{Y} = 2.57$ and a sample variance of $S^2=5.85$. Using the critical value $t_{0.025}=2.07$, give a 95\% confidence interval for $\mu$. The answer is a pair of numbers. \pagebreak \item Test $H_0: \mu=3$ at $\alpha=0.05$. \begin{enumerate} \item Give the value of the $T$ statistic. The answer is a number. \item State whether you reject $H_0$, Yes or No. \item Can you conclude that $\mu$ is different from 3? Answer Yes or No. \item If the answer is Yes, state whether $\mu>3$ or $\mu<3$. Pick one. \end{enumerate} \item Show that using a $t$-test, $H_0:\mu=\mu_0$ is rejected at significance level $\alpha$ if and only the $(1-\alpha)100\%$ confidence interval for $\mu$ does not include $\mu_0$. The problem is easier if you start by writing the set of $T$ values for which $H_0$ is \emph{not} rejected. \item In Question~\ref{ci}, does this mean $Pr\{1.53<\mu<3.61\}=0.95$? Answer Yes or No and briefly explain. \end{enumerate} \item Label each statement below True or False. Write ``T" or ``F" beside each statement. Assume the $\alpha=0.05$ significance level. If your answer is False, be able to explain why. \begin{enumerate} \item \underline{\hspace{10mm}} The $p$-value is the probability that the null hypothesis is true. % F \item \underline{\hspace{10mm}} The $p$-value is the probability that the null hypothesis is false. % F \item \underline{\hspace{10mm}} In a study comparing a new drug to the current standard treatment, the null hypothesis is rejected. This means the new drug is ineffective. % F \item \underline{\hspace{10mm}} We observe a Pearson correlation coefficient of $r = -0.70$, $p = .009$. We conclude that high values of $X$ tend to go with low values of $Y$ and low values of $X$ tend to go with high values of $Y$. % T \item \underline{\hspace{10mm}} The greater the $p$-value, the stronger the evidence against the null hypothesis. % F \item \underline{\hspace{10mm}} If $p > .05$ we reject the null hypothesis at the .05 level. %F \item \underline{\hspace{10mm}} If $p < .05$ we reject the null hypothesis at the .05 level. % T \item \underline{\hspace{10mm}} In a study comparing a new drug to the current standard treatment, $p > .05$. We conclude that the new drug and the existing treatment are not equally effective. %F \item \underline{\hspace{10mm}} The 95\% confidence interval for $\beta_3$ is from $-0.26$ to $3.12$. This means $P\{-0.26 < \beta_3 < 3.12\} = 0.95$. % F \end{enumerate} \item For $i=1, \ldots, n$, let $Y_i = \beta_0 + \beta_1 x_i + \epsilon_1$, where $\beta_0$ and $\beta_1$ are unknown constants, $x_1, \ldots, x_n$ are known observable constants, and $\epsilon_1, \ldots, \epsilon_n$ are random variables with expected value zero. \begin{enumerate} \item What is $E(Y_i)$? \item Clearly $E(Y_i)$ is a function of the unknown parameters $\beta_0$ and $\beta_1$. The \emph{least squares estimates} of $\beta_0$ and $\beta_1$ are the numbers that make the $Y_i$ values closest to their expected values in the sense of minimizing the quantity $Q = \sum_{i=1}^n\left(Y_i-E(Y_i)\right)^2$. Find the least squares estimates of $\beta_0$ and $\beta_1$. Show your work. \item Now let $\epsilon_1, \ldots, \epsilon_n$ be independent normal random variables with expected value zero and common variance $\sigma^2$. \begin{enumerate} \item What is the distribution of $Y_i$? Just write down the answer. \item Find the maximum likelihood estimates of $\beta_0$ and $\beta_1$. You may stop as soon as you realize that you have already done the problem. \end{enumerate} % \pagebreak \item Calculate your estimates for the following data: \begin{verbatim} x y --- 5 6 4 7 3 4 4 5 2 4 \end{verbatim} If you wish you can check your answer with R, but you will not turn in any R output for this question. % 2.1538 0.8462 \end{enumerate} % \pagebreak \item \label{sat} In the United States, admission to university is based partly on high school marks and recommendations, and partly on applicants' performance on a standardized multiple choice test called the Scholastic Aptitude Test (SAT). The SAT has two sub-tests, Verbal and Math. A university administrator selected a random sample of 200 applicants, and recorded the Verbal SAT, the Math SAT and first-year university Grade Point Average (GPA) for each student. The data are given \href{http://www.utstat.toronto.edu/~brunner/data/legal/openSAT.data.txt} {\texttt{SAT}} data file. The university administrator knows that the Verbal and Math SAT tests have the same number of questions, and the maximum score on both is 800. But are they equally difficult on average for this population of students? Using R, do a reasonable analysis to answer the question. You can read the data into a data frame with \begin{verbatim} sat = read.table(http://www.utstat.toronto.edu/~brunner/data/legal/openSAT.data.txt) \end{verbatim} Bring your printout to the quiz; you may be asked to hand it in. Be ready to \begin{itemize} \item State your model. \item Justify your choice of model. Would you expect Verbal and Math scores from the same student to be independent? \item State your null and alternative hypotheses, in symbols. \item Express your conclusion (if any) in plain, non-statistical language that could be understood by someone who never had a Statistics course. Your answer is something about which test is more difficult for these students. Marks will be deducted for use of technical terms like null hypothesis, significance level, critical value, $p$-value, and so on even if what you say is correct. \end{itemize} There is more than one correct answer to this question. I did the analysis several different ways, and I consider all of them correct. I can think of about five more acceptable ways that I did not try. The number of bad ways to analyze the data is virtually unlimited. \item \label{powervar} Please use R for the numerical parts of the following question. Bring your printout to the quiz. It may be handed in. As in Question \ref{t}, let $Y_1, \ldots, Y_n$ be a random sample from a normal distribution with mean $\mu$ and variance $\sigma^2$. This implies that $W = \frac{\sum_{i=1}^n(Y_i-\overline{Y})^2}{\sigma^2} = \frac{(n-1)S^2}{\sigma^2}$ has a chi-squared distribution with $n-1$ degrees of freedom, a fact you may use without proof for now. \begin{enumerate} \item Suppose we want to test $H_0:\sigma^2 \leq \sigma^2_0$ against $H_1:\sigma^2 > \sigma^2_0$. Give the formula for a reasonable test statistic. \item We are interested in testing $H_0:\sigma^2 \leq 4$ against $H_1:\sigma^2 > 4$. \begin{enumerate} \item What is the critical value at $\alpha=0.05$ for $n=25$? The answer is a number. Get it with R. \item A sample of size 25 yields a sample mean of 10.08 and a sample variance of $S^2=6.82$. Using R, calculate the test statistic and the $p$-value. Do you reject $H_0$ at $\alpha=0.05$? \item Suppose that the true variance is $\sigma^2=5$. What is the probability of rejecting the null hypothesis? Do a bit of hand calculation and then use R to obtain a numerical answer. This is the power of the test for $\sigma^2=5$ when $n=25$. % n = 25; crit=qchisq(0.95,n-1); 1-pchisq(4/5*crit,n-1) \item That was pathetic. The sample size is far too small. What is the smallest $n$ that yields a power of at least 0.8 when $\sigma^2=5$? The answer is a number. \end{enumerate} \end{enumerate} % \pagebreak % \enlargethispage*{1000 pt} \item \label{firstmat} Which statement is true? (Quantities in boldface are matrices of constants.) \begin{enumerate} \item $\mathbf{A(B+C) = AB+AC}$ \item $\mathbf{A(B+C) = BA+CA}$ \item Both a and b \item Neither a nor b \end{enumerate} \item Which statement is true? \begin{enumerate} \item $a\mathbf{(B+C)}=a\mathbf{B} + a\mathbf{C}$ \item $a\mathbf{(B+C)}=\mathbf{B}a + \mathbf{C}a$ \item Both a and b \item Neither a nor b \end{enumerate} \pagebreak \item Which statement is true? \begin{enumerate} \item $\mathbf{(B+C)A = AB+AC}$ \item $\mathbf{(B+C)A = BA+CA}$ \item Both a and b \item Neither a nor b \end{enumerate} \item Which statement is true? \begin{enumerate} \item $\mathbf{(AB)^\top = A^\top B^\top}$ \item $\mathbf{(AB)^\top = B^\top A^\top}$ \item Both a and b \item Neither a nor b \end{enumerate} \item Which statement is true? \begin{enumerate} \item $\mathbf{A^{\top\top} = A }$ \item $\mathbf{A^{\top\top\top} = A^\top }$ \item Both a and b \item Neither a nor b \end{enumerate} \item Suppose that the square matrices $\mathbf{A}$ and $\mathbf{B}$ both have inverses and are the same size. Which statement is true? \begin{enumerate} \item $\mathbf{(AB)}^{-1} = \mathbf{A}^{-1}\mathbf{B}^{-1}$ \item $\mathbf{(AB)}^{-1} = \mathbf{B}^{-1}\mathbf{A}^{-1}$ \item Both a and b \item Neither a nor b \end{enumerate} \item Which statement is true? \begin{enumerate} \item $\mathbf{(A+B)^\top = A^\top + B^\top}$ \item $\mathbf{(A+B)^\top = B^\top + A^\top }$ \item $\mathbf{(A+B)^\top = (B+A)^\top}$ \item All of the above \item None of the above \end{enumerate} \pagebreak \item Which statement is true? \begin{enumerate} \item $(a+b)\mathbf{C} = a\mathbf{C}+ b\mathbf{C}$ \item $(a+b)\mathbf{C} = \mathbf{C}a+ \mathbf{C}b$ \item $(a+b)\mathbf{C} = \mathbf{C}(a+b)$ \item All of the above \item None of the above \end{enumerate} \item Let \begin{tabular}{ccc} $\mathbf{A} = \left( \begin{array}{c c} 1 & 2 \\ 2 & 4 \end{array} \right) $ & $\mathbf{B} = \left( \begin{array}{c c} 0 & 2 \\ 2 & 1 \end{array} \right) $ & $\mathbf{C} = \left( \begin{array}{c c} 2 & 0 \\ 1 & 2 \end{array} \right) $ \end{tabular} \begin{enumerate} \item Calculate $\mathbf{AB}$ and $\mathbf{AC}$ \item Do we have $\mathbf{AB} = \mathbf{AC}$? Answer Yes or No. \item Prove $\mathbf{B} = \mathbf{C}$. Show your work. \end{enumerate} \item Let $\mathbf{A}$ be a square matrix with the determinant of $\mathbf{A}$ (denoted $|\mathbf{A}|$) equal to zero. What does this tell you about $\mathbf{A}^{-1}$? No proof is required here. \item Recall that an inverse of the square matrix $\mathbf{A}$ (denoted $\mathbf{A}^{-1}$) is defined by two properties: $\mathbf{A}^{-1}\mathbf{A=I}$ and $\mathbf{AA}^{-1}=\mathbf{I}$. Prove that inverses are unique, as follows. Let $\mathbf{B}$ and $\mathbf{C}$ both be inverses of $\mathbf{A}$. Show that $\mathbf{B=C}$. \item Suppose that the square matrices $\mathbf{A}$ and $\mathbf{B}$ both have inverses. Prove that $\mathbf{(AB)}^{-1} = \mathbf{B}^{-1}\mathbf{A}^{-1}$. You have two things to show. \item Let $\mathbf{X}$ be an $n$ by $p$ matrix with $n \neq p$. Why is it incorrect to say that $(\mathbf{X^\top X})^{-1}= \mathbf{X}^{-1}\mathbf{X}^{\top -1}$? \item \label{ivt} Let $\mathbf{A}$ be a non-singular square matrix. Prove $(\mathbf{A}^{-1})^\top=(\mathbf{A}^\top)^{-1}$. \item Using Question~\ref{ivt}, prove that the if the inverse of a symmetric matrix exists, it is also symmetric. \item \label{ss} Let $\mathbf{a}$ be an $n \times 1$ matrix of real constants. How do you know $\mathbf{a}^\top\mathbf{a}\geq 0$? \pagebreak \small \item Recall the \emph{spectral decomposition} of a symmetric matrix (for example, a variance-covariance matrix). Any such matrix $\boldsymbol{\Sigma}$ can be written as $\boldsymbol{\Sigma} = \mathbf{P} \boldsymbol{\Lambda} \mathbf{P}^\top$, where $\mathbf{P}$ is a matrix whose columns are the (orthonormal) eigenvectors of $\boldsymbol{\Sigma}$, $\boldsymbol{\Lambda}$ is a diagonal matrix of the corresponding eigenvalues, and $\mathbf{P}^\top\mathbf{P} =~\mathbf{P}\mathbf{P}^\top =~\mathbf{I}$. If $\boldsymbol{\Sigma}$ is real, the eigenvalues are real as well. \begin{enumerate} \item Let $\boldsymbol{\Sigma}$ be a square symmetric matrix with eigenvalues that are all strictly positive. \begin{enumerate} \item What is $\boldsymbol{\Lambda}^{-1}$? \item Show $\boldsymbol{\Sigma}^{-1} = \mathbf{P} \boldsymbol{\Lambda}^{-1} \mathbf{P}^\top$ \end{enumerate} \item Let $\boldsymbol{\Sigma}$ be a square symmetric matrix, and this time the eigenvalues are non-negative. \begin{enumerate} \item What do you think $\boldsymbol{\Lambda}^{1/2}$ might be? \item Define $\boldsymbol{\Sigma}^{1/2}$ as $\mathbf{P} \boldsymbol{\Lambda}^{1/2} \mathbf{P}^\top$. Show $\boldsymbol{\Sigma}^{1/2}$ is symmetric. \item Show $\boldsymbol{\Sigma}^{1/2}\boldsymbol{\Sigma}^{1/2} = \boldsymbol{\Sigma}$, justifying the notation. \end{enumerate} \item Now return to the situation where the eigenvalues of the square symmetric matrix $\boldsymbol{\Sigma}$ are all strictly positive. Define $\boldsymbol{\Sigma}^{-1/2}$ as $\mathbf{P} \boldsymbol{\Lambda}^{-1/2} \mathbf{P}^\top$, where the elements of the diagonal matrix $\boldsymbol{\Lambda}^{-1/2}$ are the reciprocals of the corresponding elements of $\boldsymbol{\Lambda}^{1/2}$. \begin{enumerate} \item Show that the inverse of $\boldsymbol{\Sigma}^{1/2}$ is $\boldsymbol{\Sigma}^{-1/2}$, justifying the notation. \item Show $\boldsymbol{\Sigma}^{-1/2} \boldsymbol{\Sigma}^{-1/2} = \boldsymbol{\Sigma}^{-1}$. \end{enumerate} \item The (square) matrix $\boldsymbol{\Sigma}$ is said to be \emph{positive definite} if $\mathbf{a}^\top \boldsymbol{\Sigma} \mathbf{a} > 0$ for all vectors $\mathbf{a} \neq \mathbf{0}$. Show that the eigenvalues of a positive definite matrix are all strictly positive. Hint: start with the definition of an eigenvalue and the corresponding eigenvalue: $\boldsymbol{\Sigma}\mathbf{v} = \lambda \mathbf{v}$. Eigenvectors are typically scaled to have length one, so you may assume $\mathbf{v}^\top \mathbf{v} = 1$. \item Let $\boldsymbol{\Sigma}$ be a symmetric, positive definite matrix. Putting together a couple of results you have proved above, establish that $\boldsymbol{\Sigma}^{-1}$ exists. \end{enumerate} % \pagebreak \item Let $\mathbf{X}$ be an $n \times p$ matrix of constants. The idea is that $\mathbf{X}$ is the ``design matrix" in the linear model $\mathbf{Y} = \mathbf{X}\boldsymbol{\beta} + \boldsymbol{\epsilon}$, so this problem is really about linear regression. \begin{enumerate} \item Recall that $\mathbf{A}$ symmetric means $\mathbf{A=A^\top}$. Let $\mathbf{X}$ be an $n$ by $p$ matrix. Show that $\mathbf{X^\top X}$ is symmetric. \item Recall the definition of linear independence. The columns of $\mathbf{A}$ are said to be \emph{linearly dependent} if there exists a column vector $\mathbf{v} \neq \mathbf{0}$ with $\mathbf{Av} = \mathbf{0}$. If $\mathbf{Av} = \mathbf{0}$ implies $\mathbf{v} = \mathbf{0}$, the columns of $\mathbf{A}$ are said to be linearly \emph{independent}. Show that if the columns of $\mathbf{X}$ are linearly independent, then $\mathbf{X}^\top\mathbf{X}$ is positive definite. \item Show that if $\mathbf{X}^\top\mathbf{X}$ is positive definite then $(\mathbf{X}^\top\mathbf{X})^{-1}$ exists. \item Show that if $(\mathbf{X}^\top\mathbf{X})^{-1}$ exists then the columns of $\mathbf{X}^\top\mathbf{X}$ are linearly independent. \item Show that if the columns of $\mathbf{X}^\top\mathbf{X}$ are linearly independent, then the columns of $\mathbf{X}$ are linearly independent. \end{enumerate} This is a good problem because it establishes that the least squares estimator $\widehat{\boldsymbol{\beta}} = (\mathbf{X}^\top\mathbf{X})^{-1}\mathbf{X}^\top\mathbf{Y}$ exists if and only if the columns of $\mathbf{X}$ are linearly independent. \end{enumerate} \end{document} ############ R work for power ############ n = 25; crit=qchisq(0.95,n-1); 1-pchisq(4/5*crit,n-1) n = 25:200 crit=qchisq(0.95,n-1) power = 1-pchisq(4/5*crit,n-1) cbind(n,power) n = 500; crit=qchisq(0.95,n-1); 1-pchisq(4/5*crit,n-1) n = 25; crit=qchisq(0.95,n-1); power = 1-pchisq(4/5*crit,n-1) while(power<0.8) { n = n+1 crit=qchisq(0.95,n-1) power = 1-pchisq(4/5*crit,n-1) } cat("n =",n," Power =",power,"\n") n = 245 Power = 0.801211 ############ R work for the sat data sat = read.table("http://www.utstat.utoronto.ca/~brunner/appliedf14/code_n_data/hw/sat.data") attach(sat) D = VERBAL-MATH t.test(D) One Sample t-test data: D t = -9.047, df = 199, p-value < 2.2e-16 alternative hypothesis: true mean is not equal to 0 95 percent confidence interval: -65.62406 -42.13594 sample estimates: mean of x -53.88 library(help = "stats") help(wilcox.test) > wilcox.test(D) Wilcoxon signed rank test with continuity correction data: D V = 3632.5, p-value = 4.881e-15 alternative hypothesis: true location is not equal to 0 # Sign test? length(VERBAL[VERBAL>MATH]) length(VERBAL[VERBAL length(VERBAL[VERBAL>MATH]) [1] 52 > > length(VERBAL[VERBAL