% 431s23Assignment3.tex     A bit of random vectors, MVN, Estimation
\documentclass[12pt]{article} 
%\usepackage{amsbsy} % for \boldsymbol and \pmb 
\usepackage{graphicx} % To include pdf files!
\usepackage{amsmath}
\usepackage{amsbsy}
\usepackage{amsfonts}
\usepackage{comment}
\usepackage[colorlinks=true, pdfstartview=FitV, linkcolor=blue, citecolor=blue, urlcolor=blue]{hyperref} % For links
\usepackage{fullpage}
%\pagestyle{empty} % No page numbers


\begin{document}

%\enlargethispage*{1000 pt} 


\begin{center}   
{\Large \textbf{STA 431s23 Assignment Three}}\footnote{This assignment was prepared by  \href{http://www.utstat.toronto.edu/~brunner}{Jerry Brunner},
Department of Statistical Sciences, University of Toronto. It is licensed under a 
\href{http://creativecommons.org/licenses/by-sa/3.0/deed.en_US}
     {Creative Commons Attribution - ShareAlike 3.0 Unported License}. Use any part of it as you like and share the result freely. The \LaTeX~source code is available from the course website:
\href{http://www.utstat.toronto.edu/brunner/oldclass/431s23} {\small\texttt{http://www.utstat.toronto.edu/brunner/oldclass/431s23}}}
\vspace{1 mm}
\end{center}

\noindent
\emph{For the Quiz on Friday Feb.~3d, please bring printouts of your full R input for Question~\ref{mysterylogistic}. The other problems are not to be handed in. They are practice for the Quiz.}
\vspace{2mm}
\hrule

\begin{enumerate} 

\item Let $cov(\mathbf{x}) = \boldsymbol{\Sigma} = \mathbf{CDC}^\top$. The random vector $cov(\mathbf{x})$ has four elements, so that the matrix of eigenvectors may be written
\begin{equation*}
    \mathbf{C} = \left( \begin{array}{c|c|c|c} 
    \mathbf{v}_1 & \mathbf{v}_2 & \mathbf{v}_3 & \mathbf{v}_4 
                 \end{array}\right),
\end{equation*}
where the $\mathbf{v}_j$ are the eigenvectors. What is $\mathbf{v}_3^\top \mathbf{C}$?

\item Let $\mathbf{x} \sim N_p(\mathbf{0}, \boldsymbol{\Sigma})$, with $\boldsymbol{\Sigma} = \mathbf{CDC}^\top$. Let $\mathbf{v}_j$ be the eigenvector (a column of $\mathbf{C}$) corresponding to the eigenvalue $\lambda_j$ of $\boldsymbol{\Sigma}$.
    \begin{enumerate}
        \item What is the distribution of the scalar random variable $y = \mathbf{v}_j^\top \mathbf{x}$?
        \item What is the distribution of $\mathbf{y} = \mathbf{C}^\top \mathbf{x}$?
        \item How do you know that the elements of $\mathbf{y}$ are independent?
    \end{enumerate}
The elements of $\mathbf{y}$ are called the \emph{principal components} of $\mathbf{x}$. 

    \item Let $\mathbf{x}= (x_1,x_2,x_3)^\top$ be multivariate normal with
    \begin{displaymath}
    \boldsymbol{\mu} = 
    \left( \begin{array}{c} 1 \\ 0 \\ 6
    \end{array} \right) \mbox{ and }
    \boldsymbol{\Sigma} =
     \left( \begin{array}{c c c}
                 1 & 0 & 0 \\
                 0 & 2 & 0 \\
                 0 & 0 & 1
    \end{array} \right) .
    \end{displaymath}
    \begin{enumerate}
        \item Derive the joint distribution of $x_1$ and $x_2$ using matrices.
        \item Let $y_1=x_1+x_2$ and $y_2=x_2+x_3$. Find the joint distribution of $y_1$ and $y_2$ using matrices.
    \end{enumerate}

    \item Let $x_1$ be Normal$(\mu_1, \sigma^2_1)$, and $x_2$ be Normal$(\mu_2, \sigma^2_2)$, independent of $x_1$. What is the joint distribution of $y_1=x_1+x_2$ and $y_2=x_1-x_2$? What is required for $y_1$ and $y_2$ to be independent? Hint: Use matrices.

    \item If $\mathbf{x} \sim N_p(\boldsymbol{\mu},\boldsymbol{\Sigma} )$, then you know the distribution of $\mathbf{Ax}$ from the formula sheet.  Use this result to obtain the distribution of the sample mean under normal random sampling. That is, let $x_1, \ldots, x_n$ be a random sample from a $N(\mu,\sigma^2)$ distribution. Find the distribution of $\overline{x}$. You might want to use $\mathbf{1}$ to represent an $n \times 1$ column vector of ones.

\pagebreak
    \item  \label{quad} This problem will guide you through the proof that if $\mathbf{w} \sim N_p(\boldsymbol{\mu},\boldsymbol{\Sigma})$ with $\boldsymbol{\Sigma}$ positive definite, $y = (\mathbf{w}-\boldsymbol{\mu})^\top
           \boldsymbol{\Sigma}^{-1}(\mathbf{w}-\boldsymbol{\mu})$ 
has a chi-squared distribution with $p$ degrees of freedom. 
    \begin{enumerate}
        \item What is the distribution of $\mathbf{w}-\boldsymbol{\mu}$? Just write down the answer.
        \item Because $\boldsymbol{\Sigma}$ is positive definite, we know that $\boldsymbol{\Sigma}^{-1/2}$ exists; there is no need to prove it. 
What is the distribution of $\mathbf{z} = \boldsymbol{\Sigma}^{-1/2}(\mathbf{w}-\boldsymbol{\mu})$? Show some work. 
        \item How do you know that $\mathbf{z}$ is made up of \emph{independent} standard normals?
        \item What is the distribution of $\mathbf{z}^\top\mathbf{z}$? Hint: What is the distribution of a squared standard normal? What is the distribution of a sum of independent chi-squares?
        \item Calculate $\mathbf{z}^\top\mathbf{z} = (\mathbf{w}-\boldsymbol{\mu})^\top
           \boldsymbol{\Sigma}^{-1}(\mathbf{w}-\boldsymbol{\mu})$. Proved. 
    \end{enumerate}

\vspace{3mm}
\hrule

\item Let $x_1, \ldots, x_n$ be a random sample from a Poisson distribution with expected value $\lambda > 0$.
    \begin{enumerate}
        \item What is the parameter of this model?
        \item What is the parameter space? See the lecture slides for how to write it.
    \end{enumerate}


\item Let \label{uvn} $x_1, \ldots, x_n$ be a random sample from a normal distribution with expected value $\mu$ and variance $\sigma^2$. 
        \begin{enumerate}
            \item What is the parameter space for this model?
            \item Obtain the Maximum Likelihood Estimator of the pair $\theta = (\mu,\sigma^2)$ by specializing a result on the formula sheet. You can just write down the answer.  
            \item Find a Method of Moments estimator of $\theta$. Use the fact that $E(x_i)=\mu$ and $Var(x_i)=\sigma^2$. This is very quick. Don't waste time and effort doing unnecessary things. 
            \item \label{numbers} In the following R output, data are in the vector $x$. Based on this, give $\widehat{\theta}$. Your answer is a pair of numbers. I needed a calculator because R's \texttt{var} function uses $n-1$ in the denominator. 
\begin{verbatim}
> c(length(x),mean(x),var(x))
[1]  20.0000  94.3800 155.1554
\end{verbatim} % MLE = MOM = (94.38, 147.3976)
            \item Give the maximum likelihood estimator of the standard deviation $\sigma$. The answer is a number. Do it the easy way. How do you know that this is okay?
        \end{enumerate}

% New order from here
\newpage %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\item  Let $x_1 , \ldots, x_n$ be a random sample from a continuous distribution with density
\begin{displaymath}
    f(x;\theta) = \frac{1}{\theta^{1/2}\sqrt{2\pi}} \, e^{-\frac{x^2}{2\theta}},
\end{displaymath}
where the parameter $\theta>0$.  Propose a reasonable estimator for the parameter $\theta$, and use the Law of Large Numbers to show that your estimator is consistent.

\item  Let $x_1 , \ldots, x_n$ be a random sample from a Gamma distribution with $\alpha=\beta=\theta>0$. That is, the density is
\begin{displaymath}
    f(x;\theta) = \frac{1}{\theta^\theta \Gamma(\theta)} e^{-x/\theta} x^{\theta-1}, 
\end{displaymath}
for $x>0$.  Let $\widehat{\theta} = \overline{x}_n$. Is $ \widehat{\theta}$ consistent for $\theta$? Answer Yes or No and prove your answer. Hint: The expected value of a Gamma random variable is $\alpha\beta$.

\item \label{varconsistent} Let $x_1, \ldots, x_n$ be a random sample from a distribution with mean $\mu_x$ and variance $\sigma^2_x$. The formula sheet has a formula for the sample variance $\widehat{\sigma}^2$. Show that $\widehat{\sigma}_x^2$ is a consistent estimator of $\sigma_x^2$.

\item \label{covconsistent} Let $(x_1, y_1), \ldots, (x_n,y_n)$ be a random sample from a bivariate distribution with $E(x_i)=\mu_x$, $E(y_i)=\mu_y$, $Var(x_i)=\sigma^2_x$, $Var(y_i)=\sigma^2_y$, and $Cov(x_i,y_i)=\sigma_{xy}$. The formula sheet has a formula for the sample covariance $\widehat{\sigma}_{xy}$. Show that $\widehat{\sigma}_{xy}$ is a consistent estimator of  $\sigma_{xy}$.

\item Let $x_1, \ldots, x_n$ be a random sample from a distribution with expected value $\mu$ and variance $\sigma^2_x$. Independently of $x_1, \ldots, x_n$, let $y_1, \ldots, y_n$ be a random sample from a distribution with the same expected value $\mu$, and a variance $\sigma^2_y$ that might be different from $\sigma^2_x$. Let $t_n= \alpha \overline{x}_n + (1-\alpha) \overline{y}_n$, where  $0 \leq \alpha \leq 1$. Is $t_n$ always a consistent estimator of $\mu$? Answer Yes or No and show your work. % Always is deliberately misleading. Have confidence!

\item \label{simple} Independently for $i=1, \ldots, n$, let $y_i = \beta x_i + \epsilon_i$, where $x_i \sim N(\mu_x,\sigma^2_x)$, $\epsilon_i \sim N(0,\sigma^2_\epsilon)$, and  $x_i$ and $\epsilon_i$ are independent.
    \begin{enumerate}
        \item What are the parameters of this model?
        \item What is the parameter space? 
        \item Write the joint distribution of $x_i$ and $\epsilon_i$ in matrix form.
        \item Obtain the joint distribution of $x_i$ and $y_i$ by writing 

\begin{equation*}
\left(\begin{array}{c} x_i \\ y_i \end{array}\right) 
= \mathbf{A} \left(\begin{array}{c} x_i \\ \epsilon_i \end{array}\right).
\end{equation*}
Give the matrix $\mathbf{A}$, and calculate the vector of expected values and the covariance matrix. 
        \item To check your work, obtain $E(y_i)$, $Var(y_i)$ and $Cov(x_i,y_i)$ with scalar (non-matrix) calculations. 
        \item Based on your work so far, you have \emph{two} potential method of moments estimators for $\beta$, one based on the vector of expected values, and the other based on the covariance matrix. Let $\widehat{\beta}_1$ denote the estimator based on the expected values, and let $\widehat{\beta}_2$ denote the estimator based on the covariance matrix. 
            \begin{enumerate}
                \item Give the formula for $\widehat{\beta}_2$, and show it is consistent. 
                \item Show that $\widehat{\beta}_1 \stackrel{p}{\rightarrow} \beta$ in most of the parameter space.
                \item However, consistency means that the estimator converges to the parameter in probability \emph{everywhere} in the parameter space. Where in the parameter space does $\widehat{\beta}_1$ fail?
                \item \emph{This last item is optional, and will not be on the quiz or on the final exam.} To see exactly how $\widehat{\beta}_1$ fails, use the fact that the ratio of two independent standard normal random variables is a standard Cauchy. Start by simplifying $\overline{y}_n$. You can take it for granted that functions of independent random variables are still independent. If you have followed this path without getting lost, you will conclude that if $\mu_x=0$, the distribution of $\widehat{\beta}_1$ is Cauchy, but multiplied by a constant and centered on $\beta_1$. Notably, the distribution of $\widehat{\beta}_1$ is the same for all $n$. As $n \rightarrow \infty$, it stays exactly the same, never changing at all. It certainly does not shrink down to any constant, including $\beta$. 
            \end{enumerate}
    \end{enumerate}

\item The formula sheet has a useful expression for the multivariate normal likelihood. 
    \begin{enumerate}
        \item Show that you understand the notation by giving the univariate version, in which $x_1, \ldots, x_n \stackrel{i.i.d}{\sim} N(\mu,\sigma^2)$. Your answer will have no matrix notation for the trace, transpose or inverse.
        \item Now starting with the univariate normal density $f(x; \mu,\sigma^2) =  \frac{1}{\sigma \sqrt{2\pi}} 
                  \exp \left\{-\frac{1}{2}\frac{(x-\mu)^2}{\sigma^2}\right\}$, show that the univariate normal likelihood is the same as your answer to the previous question. Hint: Add and subtract $\overline{x}$.
        \item How does this expression allow you to see \emph{without differentiating} that the MLE of $\mu$ is $\overline{x}$? % A.5.2 in the text.
    \end{enumerate}


\item \label{mysterylogistic} Let $x_1, \ldots, x_n$ be a random sample from a distribution with density
\begin{displaymath}
    f(x) = \frac{\theta e^{\theta(x-\mu)}}{(1+e^{\theta(x-\mu)})^2}
\end{displaymath}
for $x$ real, where $-\infty < \mu < \infty$ and $\theta > 0$.     
Numerical data are available at \\
\href{http://www.utstat.toronto.edu/brunner/openSEM/data/mystery2.data.txt}
     {\texttt{http://www.utstat.toronto.edu/brunner/openSEM/data/mystery2.data.txt}}. \vspace{2mm}
    \begin{enumerate}
        \item Find the maximum likelihood estimates of $\mu$ and $\theta$. 
        \item Obtain an approximate 95\% confidence interval for $\theta$.
        \item Test $H_0: \mu=2.1$ at the $\alpha=0.05$ significance level with a large-sample $z$-test.
    \end{enumerate}
\textbf{Please bring a printout of your full R input and output to the quiz.}











\end{enumerate} % End of all the questions


\end{document}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% xX X X YY Y Y Y




% Save this one for next time.


\item Independently for $i=1, \ldots, n$, let $\mathbf{y}_i = \boldsymbol{\beta}_0 + \boldsymbol{\beta}_1 \mathbf{x}_i +   \boldsymbol{\epsilon}_i$, where
\begin{itemize}
    \item $\mathbf{y}_i$ is an $q \times 1$ random vector of observable response variables; there are $q$ response variables. 
    \item $\mathbf{x}_i$ is a $p \times 1$ observable random vector; there are $p$ explanatory variables. $E(\mathbf{x}_i) = \boldsymbol{\mu}_x$ and $cov(\mathbf{x}_i) = \boldsymbol{\Phi}_{p \times p}$. The positive definite matrix $\boldsymbol{\Phi}$ is unknown. 
    \item $\boldsymbol{\beta}_0$ is a  $q \times 1$ matrix of unknown constants.
    \item $\boldsymbol{\beta}_1$ is a  $q \times p$ matrix of unknown constants.  
    \item $\boldsymbol{\epsilon}_i$ is a $q \times 1$ random vector with expected value zero and unknown positive definite variance-covariance matrix $cov(\boldsymbol{\epsilon}_i) = \boldsymbol{\Psi}_{q \times q}$.  
    \item $\boldsymbol{\epsilon}_i$ is independent of $\mathbf{x}_i$.
\end{itemize}
Letting $\mathbf{d}_i = \left(\begin{array}{c} \mathbf{x}_i  \\ \hline  \mathbf{y}_i \end{array} \right)$, we have 
$cov(\mathbf{d}_i) = \boldsymbol{\Sigma} = \left( \begin{array}{c|c}
                \boldsymbol{\Sigma}_x & \boldsymbol{\Sigma}_{xy} \\ \hline
                \boldsymbol{\Sigma}_{yx} & \boldsymbol{\Sigma}_y    
                                                 \end{array} \right)$, and 
$\widehat{\boldsymbol{\Sigma}} = \left( \begin{array}{c|c}
        \widehat{\boldsymbol{\Sigma}}_x & \widehat{\boldsymbol{\Sigma}}_{xy} \\ \hline
        \widehat{\boldsymbol{\Sigma}}_{yx} & \widehat{\boldsymbol{\Sigma}}_y    
                                                 \end{array} \right)$.
    \begin{enumerate}
        \item Give the dimensions (number of rows and columns) of the following matrices: \\
$\mathbf{d}_i$, $\boldsymbol{\Sigma}$, $\boldsymbol{\Sigma}_{x}$, $\boldsymbol{\Sigma}_{y}$, $\boldsymbol{\Sigma}_{xy}$, $\boldsymbol{\Sigma}_{yx}$.
        \item Write the parts of $\boldsymbol{\Sigma}$ in terms of the unknown parameter matrices.
        \item Give a Method of Moments Estimator for $\boldsymbol{\Phi}$. Just write it down. 
        \item Obtain formulas for the Method of Moments Estimators of $\boldsymbol{\beta}_1$, $\boldsymbol{\beta}_0$ and $\boldsymbol{\Psi}$. Show your work. You may give $\widehat{\boldsymbol{\beta}}_0$   in terms of $\widehat{\boldsymbol{\beta}}_1$, but simplify $\widehat{\boldsymbol{\Psi}}$.
        \item If the distributions of $\mathbf{x}_i$ and $\boldsymbol{\epsilon}_i$ are multivariate normal, how do you know that your Method of Moments estimates are also the MLEs?
    \end{enumerate}





%%%%%%%%%%%%%%%%%%%%%%%%% Consistency, lifted from Exercises A.5 %%%%%%%%%%%%%%%%%%%%%%%


\item Let $X_1, \ldots, X_n$ be a random sample from a distribution with mean $\mu$. Show that $T_n = \frac{1}{n+400}\sum_{i=1}^n X_i$ is consistent for $\mu$. Hint: If a sequence of constants $a_n \rightarrow a$ as an ordinary limit, you can view the constants as degenerate random variables and write $a_n \stackrel{p}{\rightarrow} a$. Then you can use continuous mapping and so on with confidence.

\item \label{varconsistent} Let $X_1, \ldots, X_n$ be a random sample from a distribution with mean $\mu$ and variance $\sigma^2$. Prove that the sample variance $S^2=\frac{\sum_{i=1}^n(X_i-\overline{X})^2}{n-1}$ is consistent for $\sigma^2$. 

\item \label{covconsistent} Let $(X_1, Y_1), \ldots, (X_n,Y_n)$ be a random sample from a bivariate distribution with $E(X_i)=\mu_x$, $E(Y_i)=\mu_y$, $Var(X_i)=\sigma^2_x$, $Var(Y_i)=\sigma^2_y$, and $Cov(X_i,Y_i)=\sigma_{xy}$. Show that the sample covariance 
$S_{xy} = \frac{\sum_{i=1}^n(X_i-\overline{X})(Y_i-\overline{Y})}{n-1}$ is a consistent estimator of $\sigma_{xy}$.

\item Let $X_1 , \ldots, X_n$ be a random sample from a Poisson distribution with parameter $\lambda$. You know that $E(X_i)=Var(X_i)=\lambda$; there is no need to prove it.

From the Law of Large Numbers, it follows immediately that $\overline{X}_n$ is consistent for $\lambda$. Let
\begin{displaymath}
    \widehat{\lambda} = \frac{\sum_{i=1}^n (X_i-\overline{X}_n)^2}{n-4}.
\end{displaymath}
Is $\widehat{\lambda}$ also consistent for $\lambda$? Answer Yes or No and prove your answer.


\item 
    \begin{enumerate}
        \item 
        \item 
    \end{enumerate}


\item For the model pf Problem~\ref{simple}, show that the method of moments estimator of $\beta$ is consistent. You may use the consistency of the sample variance and covariance without proof.