% \documentclass[serif]{beamer} % Serif for Computer Modern math font.
\documentclass[serif, handout]{beamer} % Handout to ignore pause statements

\hypersetup{colorlinks,linkcolor=,urlcolor=red}


\usefonttheme{serif} % Looks like Computer Modern for non-math text -- nice!
\setbeamertemplate{navigation symbols}{} % Suppress navigation symbols
% \usetheme{Berlin} % Displays sections on top
\usetheme{Frankfurt}  % Displays section titles on top: Fairly thin but still swallows some material at bottom of crowded slides
%\usetheme{Berkeley}

\usepackage[english]{babel}
\usepackage{amsmath} % for binom
\usepackage{alltt} 
% \usepackage{graphicx} % To include pdf files!
% \definecolor{links}{HTML}{2A1B81}
% \definecolor{links}{red}
\setbeamertemplate{footline}[frame number] 

\mode<presentation>

\title{Random Vectors and Multivariate Normal\footnote{See last slide for copyright information.}}
\subtitle{STA 431 Spring 2023}
\date{} % To suppress date

\begin{document}

\begin{frame}
  \titlepage
\end{frame}

\begin{frame}
\frametitle{Overview}
\tableofcontents
\end{frame}

\section{Random Vectors and Matrices}

\begin{frame}
\frametitle{Random Vectors and Matrices}
\framesubtitle{See Section A.3 in Appendix A.} 
\begin{itemize}
    \item A \emph{random matrix} is just a matrix of random variables. 
    \item Their joint probability distribution is the distribution of the random matrix. \pause
    \item  Random matrices with just one column (say, $p \times 1$) may be called \emph{random vectors}.
\end{itemize}
\end{frame}

\begin{frame}
\frametitle{Expected Value}
%\framesubtitle{} 

The expected value of a random matrix is defined as the matrix of expected values.  

\vspace{4mm}
Denoting the $p \times c$ random matrix $\mathbf{X}$ by $[x_{i,j}]$, 
% Bracket notation just works better here.
\begin{displaymath}
    E(\mathbf{X}) = [E(x_{i,j})].
\end{displaymath}
\end{frame}

\begin{frame} 
\frametitle{Immediately we have natural properties like}
%\framesubtitle{} 
If the random matrices $\mathbf{X}$ and $\mathbf{Y}$ are the same size, \vspace{3mm}
\begin{eqnarray*}
    E(\mathbf{X}+\mathbf{Y}) &=&  \pause E([x_{i,j}+y_{i,j}])       \\ \pause
                             &=& [E(x_{i,j}+y_{i,j})]       \\ \pause
                             &=& [E(x_{i,j})+E(y_{i,j})]    \\ \pause
                             &=& [E(x_{i,j})]+[E(y_{i,j})]  \\ \pause
                             &=& E(\mathbf{X})+E(\mathbf{Y}). 
\end{eqnarray*}
\end{frame} 

\begin{frame}
\frametitle{Moving a constant matrix through the expected value sign} \pause
Let $\mathbf{A} = [a_{i,j}]$ be an $r \times p$ matrix of constants, while $\mathbf{X}$ is still a $p \times c$ random matrix. Then  \pause
\begin{eqnarray}
    E(\mathbf{AX}) 
        &=& E\left(\left[\sum_{k=1}^p a_{i,k}x_{k,j}\right]\right) \nonumber \\ \pause
        &=& \left[E\left(\sum_{k=1}^p a_{i,k}x_{k,j}\right)\right] \nonumber \\ \pause
        &=& \left[\sum_{k=1}^p a_{i,k}E(x_{k,j})\right] \nonumber \\ \pause
        &=& \mathbf{A}E(\mathbf{X}). \nonumber
\end{eqnarray} \pause

Similar calculations yield $E(\mathbf{AXB}) = \mathbf{A}E(\mathbf{X})\mathbf{B}$.
\end{frame}

\begin{frame}
\frametitle{Variance-Covariance Matrices} 
Let $\mathbf{x}$ be a $p \times 1$ random vector with $E(\mathbf{x}) = \boldsymbol{\mu}$.  \pause
The \emph{variance-covariance matrix} of $\mathbf{x}$ (sometimes just called the \emph{covariance matrix}), denoted by $cov(\mathbf{x})$, is defined as  
\begin{displaymath}
    cov(\mathbf{x}) = E\left\{ (\mathbf{x}-\boldsymbol{\mu})
                             (\mathbf{x}-\boldsymbol{\mu})^\top\right\}.
\end{displaymath}
\end{frame}


\begin{frame}
\frametitle{$cov(\mathbf{x}) = E\left\{ (\mathbf{x}-\boldsymbol{\mu})
                             (\mathbf{x}-\boldsymbol{\mu})^\top\right\}$} 
\begin{columns}   % Use Beamer's columns to use more of the margins!
\column{1.1\textwidth}
{\scriptsize
\begin{eqnarray*}
   cov(\mathbf{x}) &=& E\left\{
        \left( \begin{array}{c}
        x_1-\mu_1 \\  x_2-\mu_2 \\ x_3-\mu_3
        \end{array} \right)
        \left( \begin{array}{c c c}
        x_1-\mu_1 &  x_2-\mu_2 & x_3-\mu_3
        \end{array} \right) \right\}  \\ \pause
  &=& E\left\{
        \left( \begin{array}{l l l}
        (x_1-\mu_1)^2 &  (x_1-\mu_1)(x_2-\mu_2) & (x_1-\mu_1)(x_3-\mu_3) \\
        (x_2-\mu_2)(x_1-\mu_1) &  (x_2-\mu_2)^2 & (x_2-\mu_2)(x_3-\mu_3) \\
        (x_3-\mu_3)(x_1-\mu_1) &  (x_3-\mu_3)(x_2-\mu_2) & (x_3-\mu_3)^2 \\
        \end{array} \right) \right\}  \\ \pause
         \nonumber \\
  &=& 
    \left( \begin{array}{l l l}
    E\{(x_1-\mu_1)^2\} &  E\{(x_1-\mu_1)(x_2-\mu_2)\} & E\{(x_1-\mu_1)(x_3-\mu_3)\} \\
    E\{(x_2-\mu_2)(x_1-\mu_1)\} &  E\{(x_2-\mu_2)^2\} & E\{(x_2-\mu_2)(x_3-\mu_3)\} \\
    E\{(x_3-\mu_3)(x_1-\mu_1)\} &  E\{(x_3-\mu_3)(x_2-\mu_2)\} & E\{(x_3-\mu_3)^2\} \\
        \end{array} \right)   \\ \pause
         \nonumber \\
  &=& 
    \left( \begin{array}{l l l}
    Var(x_1) &  Cov(x_1,x_2) & Cov(x_1,x_3) \\
    Cov(x_1,x_2) &  Var(x_2) & Cov(x_2,x_3) \\
    Cov(x_1,x_3) &  Cov(x_2,x_3) & Var(x_3) \\
        \end{array} \right) .  \\ \pause
         \nonumber 
\end{eqnarray*}
So, the covariance matrix $cov(\mathbf{x})$ is a $p \times p$ symmetric matrix with variances on the main diagonal and covariances on the off-diagonals. 
}% End size
\end{columns}
\end{frame}

\begin{frame}
\frametitle{Covariance matrix of a $1 \times 1$ random vector}
\framesubtitle{That is, a scalar random variable} 
% From this point in the course, scalar random variables are lower case.
{\LARGE
\begin{eqnarray*}
    cov(\mathbf{x}) & = & E\left\{ (\mathbf{x}-\boldsymbol{\mu})
                          (\mathbf{x}-\boldsymbol{\mu})^\top\right\} \\
    & = & E\left\{ (x-\mu)(x-\mu) \right\} \\
    & = & E\left\{ (x-\mu)^2 \right\} \\
    & = & Var(x)
\end{eqnarray*} 
} % End size
\end{frame}

\begin{frame}
\frametitle{A rule analogous to $Var(a\,x) = a^2\,Var(x)$} 
Let $\mathbf{x}$ be a $p \times 1$ random vector with $E(\mathbf{x}) = \boldsymbol{\mu}$ and $cov(\mathbf{x}) = \boldsymbol{\Sigma}$, while $\mathbf{A}$ is an $r \times p$ matrix of constants. Then 
\begin{eqnarray*} 
    cov(\mathbf{Ax}) 
    &=&  
    E\left\{ (\mathbf{Ax}-\mathbf{A}\boldsymbol{\mu})
             (\mathbf{Ax}-\mathbf{A}\boldsymbol{\mu})^\top \right\}  \\ \pause
    &=& 
    E\left\{ \mathbf{A}(\mathbf{x}-\boldsymbol{\mu})
             \left(\mathbf{A}(\mathbf{x}-\boldsymbol{\mu})\right)^\top 
             \right\}  \\ \pause
    &=& 
    E\left\{ \mathbf{A}(\mathbf{x}-\boldsymbol{\mu})
             (\mathbf{x}-\boldsymbol{\mu})^\top \mathbf{A}^\top
             \right\} \nonumber \\ \pause
    &=&      \mathbf{A}E\{(\mathbf{x}-\boldsymbol{\mu})
             (\mathbf{x}-\boldsymbol{\mu})^\top\} \mathbf{A}^\top
              \\ \pause
    &=&      \mathbf{A}cov(\mathbf{x}) \mathbf{A}^\top \nonumber \\
    &=&      \mathbf{A}\boldsymbol{\Sigma}\mathbf{A}^\top 
\end{eqnarray*}
\end{frame}

\begin{frame}
\frametitle{Positive definite is a natural assumption}
\framesubtitle{For covariance matrices}  
  \begin{itemize}
    \item Let $cov(\mathbf{x}) = \boldsymbol{\Sigma}$ \pause
    \item $\boldsymbol{\Sigma}$ positive definite means $\mathbf{a}^\top \boldsymbol{\Sigma} \mathbf{a} > 0$
          for all $\mathbf{a} \neq \mathbf{0}$. \pause
    \item $y = \mathbf{a}^\top \mathbf{x} = a_1x_1 + \cdots + a_p x_p$ is a scalar random variable. \pause
    \item $Var(y) = \mathbf{a}^\top cov(\mathbf{x}) \mathbf{a}  
          = \mathbf{a}^\top \boldsymbol{\Sigma} \mathbf{a}$   \pause 
    \item $\boldsymbol{\Sigma}$ positive definite just says that the variance of any (non-trivial) linear combination is positive.  
    \item This is usually what you want.
  \end{itemize}



\end{frame}


\begin{frame}
\frametitle{Matrix of covariances between two random vectors}  
Let $\mathbf{x}$ be a $p \times 1$ random vector with $E(\mathbf{x}) = \boldsymbol{\mu}_x$ and let $\mathbf{y}$ be a $q \times 1$ random vector with $E(\mathbf{y}) = \boldsymbol{\mu}_y$.   

\vspace{3mm}

The $p \times q$ matrix of covariances between the elements of $\mathbf{x}$ and the elements of $\mathbf{y}$ is  
\begin{displaymath}
    cov(\mathbf{x,y}) = E\left\{ (\mathbf{x}-\boldsymbol{\mu}_x)
                             (\mathbf{y}-\boldsymbol{\mu}_y)^\top\right\}.
\end{displaymath} \pause
Note $cov(\mathbf{x,x}) = cov(\mathbf{x})$.
\end{frame}


\begin{frame}
\frametitle{Adding a constant has no effect}
\framesubtitle{On variances and covariances}  

It's clear from the definitions
  \begin{itemize}
    \item $cov(\mathbf{x}) = E\left\{ (\mathbf{x}-\boldsymbol{\mu})
                             (\mathbf{x}-\boldsymbol{\mu})^\top\right\}$
    \item $cov(\mathbf{x,y}) = E\left\{ (\mathbf{x}-\boldsymbol{\mu}_x)
                             (\mathbf{y}-\boldsymbol{\mu}_y)^\top\right\}$
  \end{itemize}  
That
  \begin{itemize}
    \item $ cov(\mathbf{x} + \mathbf{a}) = cov(\mathbf{x})$ 
    \item $cov(\mathbf{x} + \mathbf{a},\mathbf{y} + \mathbf{b}) 
        = cov(\mathbf{x},\mathbf{y})$
  \end{itemize}
\vspace{5mm}  \pause
For example, $E(\mathbf{x} + \mathbf{a}) = \boldsymbol{\mu} + \mathbf{a}$, so   
\begin{eqnarray*}
    cov(\mathbf{x} + \mathbf{a}) 
    & = & E\left\{ (\mathbf{x}+\mathbf{a}-(\boldsymbol{\mu}+\mathbf{a}))
          (\mathbf{x}+\mathbf{a}-(\boldsymbol{\mu}+\mathbf{a}))^\top\right\} \\  \pause
    & = & E\left\{ (\mathbf{x}-\boldsymbol{\mu})
                   (\mathbf{x}-\boldsymbol{\mu})^\top\right\} \\  \pause
    & = & cov(\mathbf{x})
\end{eqnarray*}
\end{frame}

\begin{frame}
\frametitle{Here's a useful formula}
%\framesubtitle{} 
Let $E(\mathbf{y}) = \boldsymbol{\mu}$, $cov(\mathbf{y}) = \boldsymbol{\Sigma}$, and let $\mathbf{A}$ and $\mathbf{B}$ be matrices of constants. Then 


 {\LARGE
\begin{displaymath}
    cov(\mathbf{Ay},\mathbf{By})  = \mathbf{A}\boldsymbol{\Sigma} \mathbf{B}^\top.
\end{displaymath} }

\end{frame}



%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{Centering}
%\framesubtitle{Based on $cov(\mathbf{x} + \mathbf{a}) = cov(\mathbf{x})$} \pause


Denote the \emph{centered} version of the random vector $\mathbf{x}$ by $\stackrel{c}{\mathbf{x}} = \mathbf{x} - \boldsymbol{\mu}_x$, \pause  so that

  \begin{itemize}
    \item $E(\stackrel{c}{\mathbf{x}})=\mathbf{0}$     and 
    \item $E(\stackrel{c}{\mathbf{x}}\stackrel{c}{\mathbf{x}}
\stackrel{\top}{\vphantom{r}}) = E\left\{ (\mathbf{x}-\boldsymbol{\mu}_x)
(\mathbf{x}-\boldsymbol{\mu}_x)^\top\right\} = cov(\mathbf{x})$    and \\
    \item $E(\stackrel{c}{\mathbf{x}}\stackrel{c}{\vspace{.4mm}\mathbf{y}}
    \stackrel{\top}{\vphantom{r}}) = E\left\{ (\mathbf{x}-\boldsymbol{\mu}_x)
(\mathbf{y}-\boldsymbol{\mu}_y)^\top\right\} = cov(\mathbf{x},\mathbf{y})$
  \end{itemize}
\end{frame}

\begin{frame}
\frametitle{Linear combinations of random vectors}
%\framesubtitle{These are matrices, but they could be scalars} 
\begin{eqnarray*}
    \mathbf{L} & = & \mathbf{A}_1\mathbf{x}_1 + \cdots +  \mathbf{A}_m\mathbf{x}_m
                     + \mathbf{b} \\ \pause
      \stackrel{c}{\mathbf{L}}  & = & \mathbf{L} - E(\mathbf{L}) \\ \pause
       & = &  \mathbf{A}_1\mathbf{x}_1 + \cdots +  \mathbf{A}_m\mathbf{x}_m + \mathbf{b} \\
       && \hspace{-3mm}-\mathbf{A}_1\boldsymbol{\mu}_1 - \cdots -  \mathbf{A}_m\boldsymbol{\mu}_m  - \mathbf{b} \\ \pause
       & = & \mathbf{A}_1(\mathbf{x}_1-\boldsymbol{\mu}_1) + \cdots + 
             \mathbf{A}_m(\mathbf{x}_m-\boldsymbol{\mu}_m) \\ \pause
      & = & \mathbf{A}_1 \stackrel{c}{\mathbf{x}}_1 + \cdots + 
            \mathbf{A}_m \stackrel{c}{\mathbf{x}}_m
\end{eqnarray*} 
 \pause
% \vspace{5mm}
So that 
\begin{eqnarray*}
    cov(\mathbf{L})  
    & = & E(\stackrel{c}{\mathbf{L}}\stackrel{c}{\mathbf{L}}
\stackrel{\top}{\vphantom{r}}) \\ 
    cov(\mathbf{L}_1,\mathbf{L}_2) 
    & = & E(\stackrel{c}{\mathbf{L}}_1\,\stackrel{c}{\mathbf{L}}
    \stackrel{\top}{\vphantom{r}_2})   
\end{eqnarray*}

%\vspace{1mm}
%In words: To calculate variances and covariances of linear combinations, one may simply discard added constants, center all the random vectors, and take expected values of products.
\end{frame}


\begin{frame}
\frametitle{$cov(\mathbf{L}_1,\mathbf{L}_2) = 
            E(\stackrel{c}{\mathbf{L}}_1\,\stackrel{c}{\mathbf{L}}
            \stackrel{\top}{\vphantom{r}_2})$}
%\framesubtitle{} 
Let

{\LARGE
\begin{eqnarray*}
    \stackrel{c}{\mathbf{L}}_1 & = & \mathbf{A}_1 \stackrel{c}{\mathbf{x}}_1 + \cdots + 
            \mathbf{A}_m \stackrel{c}{\mathbf{x}}_m \\
    \stackrel{c}{\mathbf{L}}_2 & = & \mathbf{B}_1 \stackrel{c}{\mathbf{y}}_1 + \cdots + 
            \mathbf{B}_n \stackrel{c}{\mathbf{y}}_n 
\end{eqnarray*} 
} % End size
\end{frame}


\begin{frame}
\frametitle{A better rule for covariances of linear combinations}
%\framesubtitle{} 
\begin{columns}  
\column{1.2\textwidth} % To use more margin
{\footnotesize
\begin{eqnarray*}
    cov(\mathbf{L}_1,\mathbf{L}_2) 
    & = & E\left\{\stackrel{c}{\mathbf{L}}_1\,\stackrel{c}{\mathbf{L}}
    \stackrel{\top}{\vphantom{r}_2}\right\}  \\ \pause
    & = & E\left\{ \left( \mathbf{A}_1\stackrel{c}{\mathbf{x}}_1 + \cdots + 
             \mathbf{A}_m\stackrel{c}{\mathbf{x}}_1\right)
             \left( \mathbf{B}_1\stackrel{c}{\mathbf{y}}_1 + \cdots + 
             \mathbf{B}_n\stackrel{c}{\mathbf{y}}_n\right)^\top \right\}\\
    & = & E\left\{ \left( \mathbf{A}_1\stackrel{c}{\mathbf{x}}_1 + \cdots + 
             \mathbf{A}_m\stackrel{c}{\mathbf{x}}_1\right)
             \left( \stackrel{c}{\mathbf{y}}\stackrel{\top}{\vphantom{r}_1} \mathbf{B}_1^\top + \cdots + 
             \stackrel{c}{\mathbf{y}}\stackrel{\top}{\vphantom{r}_n} \mathbf{B}_n^\top\right) \right\}\\
% \stackrel{c}{\mathbf{x}}_1 
% \stackrel{c}{\mathbf{y}}\stackrel{\top}{\vphantom{r}_1}       
& = & E\left\{\mathbf{A}_1\stackrel{c}{\mathbf{x}}_1 
\stackrel{c}{\vspace{-.2mm}\mathbf{y}}\stackrel{\top}{\vphantom{r}_1} \mathbf{B}_1^\top  
+ \mathbf{A}_1\stackrel{c}{\mathbf{x}}_1 
  \stackrel{c}{\vspace{-.2mm}\mathbf{y}}\stackrel{\top}{\vphantom{r}_2} \mathbf{B}_2^\top
+ \cdots +
\mathbf{A}_m\stackrel{c}{\mathbf{x}}_m 
  \stackrel{c}{\vspace{-.2mm}\mathbf{y}}\stackrel{\top}{\vphantom{r}_n} \mathbf{B}_n^\top
\right\}  \pause \\
& = & \mathbf{A}_1E\left\{\stackrel{c}{\mathbf{x}}_1 
\stackrel{c}{\vspace{-.2mm}\mathbf{y}}\stackrel{\top}{\vphantom{r}_1}\right\} \mathbf{B}_1^\top  
+ \mathbf{A}_1E\left\{\stackrel{c}{\mathbf{x}}_1 
  \stackrel{c}{\vspace{-.2mm}\mathbf{y}}\stackrel{\top}{\vphantom{r}_2}\right\} \mathbf{B}_2^\top
+ \cdots +
\mathbf{A}_mE\left\{\stackrel{c}{\mathbf{x}}_m 
  \stackrel{c}{\vspace{-.2mm}\mathbf{y}}\stackrel{\top}{\vphantom{r}_n} \right\} \mathbf{B}_n^\top
  \pause \\
    & = & \mathbf{A}_1 \, cov(\mathbf{x}_1,\mathbf{y}_1) \, \mathbf{B}_1^\top + \pause
          \mathbf{A}_1 \, cov(\mathbf{x}_1,\mathbf{y}_2) \, \mathbf{B}_2^\top + 
      \cdots + \mathbf{A}_m \, cov(\mathbf{x}_m,\mathbf{y}_n) \, \mathbf{B}_n^\top   \pause \\
    & = & \sum_{i=1}^m \sum_{j=1}^n \mathbf{A}_i \, cov(\mathbf{x}_i,\mathbf{y}_j) \, \mathbf{B}_j^\top
\end{eqnarray*} \pause
} % End size    
\end{columns}    
That is, calculate the covariance of each term in $\mathbf{L}_1$ with each term in $\mathbf{L}_2$, and add them up.
\end{frame}


\begin{frame}
\frametitle{Example: $cov(\mathbf{x}+\mathbf{y})$}
% \framesubtitle{Using the centering rule} \pause
\begin{eqnarray*}
    cov(\mathbf{x}+\mathbf{y}) & = & cov(\mathbf{x}+\mathbf{y}, \mathbf{x}+\mathbf{y})
\\ \pause
& = & cov(\mathbf{x},\mathbf{x}) + cov(\mathbf{x},\mathbf{y})
      + cov(\mathbf{y},\mathbf{x}) + cov(\mathbf{y},\mathbf{y}) \\ \pause
     & = & cov(\mathbf{x}) + cov(\mathbf{y}) +
           cov(\mathbf{x},\mathbf{y}) + cov(\mathbf{y},\mathbf{x})
\end{eqnarray*} \pause


\begin{itemize}
    \item $cov(\mathbf{y},\mathbf{x}) \neq cov(\mathbf{x},\mathbf{y})$
    \item $cov(\mathbf{y},\mathbf{x}) = cov(\mathbf{x},\mathbf{y})^\top$
\end{itemize}
%Use $cov(\mathbf{x,y}) = E\left\{ (\mathbf{x}-\boldsymbol{\mu}_x)
%                             (\mathbf{y}-\boldsymbol{\mu}_y)^\top\right\}$ to see this. 

\end{frame}

\section{Multivariate Normal}

\begin{frame}
\frametitle{The Multivariate Normal Distribution}
The $p \times 1$ random vector $\mathbf{x}$ is said to have a \emph{multivariate normal distribution}, and we write $\mathbf{x} \sim N_p(\boldsymbol{\mu},\boldsymbol{\Sigma})$, if $\mathbf{x}$ has (joint) density

\begin{displaymath}
f(\mathbf{x}) = \frac{1}{|\boldsymbol{\Sigma}|^{\frac{1}{2}} (2 \pi)^{\frac{p}{2}}} 
                \exp\left\{ -\frac{1}{2} (\mathbf{x}-\boldsymbol{\mu})^\top
                 \boldsymbol{\Sigma}^{-1}(\mathbf{x}-\boldsymbol{\mu})\right\},
\end{displaymath}
where $\boldsymbol{\mu}$ is $p \times 1$ and $\boldsymbol{\Sigma}$ is $p \times p$ symmetric and positive definite.
\end{frame}

\begin{frame}
\frametitle{The Bivariate Normal Density}
\framesubtitle{Multivariate normal with $p=2$ variables} 
\begin{center}
\includegraphics[width=3in]{BivariateNormal}
\end{center}
\end{frame}

\begin{frame}
\frametitle{Analogies}
% \framesubtitle{()}
Multivariate normal reduces to the univariate normal when $p=1$.

\vspace{4mm}

\begin{itemize}
    \item Univariate Normal
        \begin{itemize}
            \item $f(x) =  \frac{1}{\sigma \sqrt{2\pi}} 
                  \exp \left\{-\frac{1}{2}\frac{(x-\mu)^2}{\sigma^2}\right\}$  
            \item $E(x)=\mu, Var(x) = \sigma^2$   
            \item $\frac{(x-\mu)^2}{\sigma^2} \sim \chi^2 (1)$
        \end{itemize}  \pause

\vspace{3mm} 

    \item Multivariate Normal
        \begin{itemize}
            \item $f(\mathbf{x}) = 
            \frac{1}{|\boldsymbol{\Sigma}|^{\frac{1}{2}} (2 \pi)^{\frac{p}{2}}} 
                \exp\left\{ -\frac{1}{2} (\mathbf{x}-\boldsymbol{\mu})^\top
                 \boldsymbol{\Sigma}^{-1}(\mathbf{x}-\boldsymbol{\mu})\right\}$  
            \item $E(\mathbf{x})= \boldsymbol{\mu}$, $cov(\mathbf{x}) = \boldsymbol{\Sigma}$
            \item $(\mathbf{x}-\boldsymbol{\mu})^\top
                 \boldsymbol{\Sigma}^{-1}(\mathbf{x}-\boldsymbol{\mu}) \sim \chi^2 (p)$
        \end{itemize}
\end{itemize}
\end{frame} 


\begin{frame}
\frametitle{More properties of the multivariate normal}
% 
  \begin{itemize}
    \item If $\mathbf{c}$ is a vector of constants, $\mathbf{x}+\mathbf{c} \sim
             N(\mathbf{c}+\boldsymbol{\mu},\boldsymbol{\Sigma})$         
    \item If $\mathbf{A}$ is a matrix of constants, $\mathbf{Ax} \sim
             N(\mathbf{A}\boldsymbol{\mu},\mathbf{A}\boldsymbol{\Sigma}\mathbf{A}^\top)$  
    \item Linear combinations of multivariate normals are multivariate normal. 
    \item All the marginals (dimension less than $p$) of $\mathbf{x}$ are (multivariate) normal.  
    \item For the multivariate normal, zero covariance implies independence. The multivariate normal is the only continuous distribution with this property. 
  \end{itemize}
\end{frame}

% MVN likelihood

\begin{frame}
\frametitle{Multivariate Normal Likelihood}
\framesubtitle{$\mathbf{x}_1, \ldots, \mathbf{x}_n \stackrel{iid}{\sim} N(\boldsymbol{\mu},\boldsymbol{\Sigma})$} 
%{\LARGE
\begin{eqnarray*}
    L(\boldsymbol{\mu},\boldsymbol{\Sigma}) 
    & = & \prod_{i=1}^n\frac{1}{|\boldsymbol{\Sigma}|^{\frac{1}{2}} (2 \pi)^{\frac{p}{2}}} 
                \exp\left\{ -\frac{1}{2} (\mathbf{x}-\boldsymbol{\mu})^\top
                 \boldsymbol{\Sigma}^{-1}(\mathbf{x}-\boldsymbol{\mu})\right\} \\
    &&\\
    & = & |\boldsymbol{\Sigma}|^{-\frac{n}{2}} (2\pi)^{-\frac{np}{2}} \times \\
    && 
    \exp -\frac{n}{2}\left\{ tr(\boldsymbol{\widehat{\Sigma}\Sigma}^{-1}) +
    (\overline{\mathbf{x}}-\boldsymbol{\mu})^\top \boldsymbol{\Sigma}^{-1} 
    (\overline{\mathbf{x}}-\boldsymbol{\mu}) \right\},
\end{eqnarray*} 
where $\boldsymbol{\widehat{\Sigma}} = 
    \frac{1}{n}\sum_{i=1}^n (\mathbf{x}_i-\overline{\mathbf{x}}) 
                        (\mathbf{x}_i-\overline{\mathbf{x}})^\top $
%} % End size
\end{frame}


\begin{frame}
\frametitle{Simulating from a multivariate norrmal} 
%\framesubtitle{} 
\begin{itemize}
    \item Simulation of univariate normals is built-in. Use \texttt{rnorm()}. 
    \item  Say you want to simulate from 
          $\mathbf{x} \sim N_p(\boldsymbol{\mu},\boldsymbol{\Sigma})$. \pause
    \item Generate $\mathbf{z} \sim N_p(\mathbf{0},\mathbf{I})$. \pause
    \item Calculate $\boldsymbol{\Sigma}^{\frac{1}{2}}$ using spectral decomposition. \pause
    \item Let $\mathbf{x} = \boldsymbol{\Sigma}^{\frac{1}{2}} \mathbf{z} + \boldsymbol{\mu}$ \pause $ \sim N_p(\boldsymbol{\mu},\boldsymbol{\Sigma})$. 
\end{itemize}
\end{frame}



%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}[fragile]
\frametitle{The \texttt{rmvn} Function}
%\framesubtitle{} 
{\scriptsize
% The alltt environment requires  \usepackage{alltt} 
\begin{alltt}
{\color{blue}> source("https://www.utstat.toronto.edu/~brunner/openSEM/fun/rmvn.txt")
> A = rbind(c(1.0,0.5),
+           c(0.5,1.0))
> A }
     [,1] [,2]
[1,]  1.0  0.5
[2,]  0.5  1.0
{\color{blue}> datta = rmvn(10,mu=c(0,0),sigma=A); datta }
              [,1]        [,2]
 [1,] -2.643825316 -0.69926774
 [2,] -1.572814887 -0.21980248
 [3,] -0.387355643 -0.75080547
 [4,] -0.168534571 -1.28075830
 [5,] -0.716922363 -0.06556707
 [6,] -0.272368211 -0.15602646
 [7,] -0.007593983  0.59682941
 [8,]  0.436463462  1.02248006
 [9,] -0.193334362 -1.23877080
[10,] -0.859909183 -0.36091445
\end{alltt}
} % End size
\end{frame}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]
\frametitle{For the Record}
%\framesubtitle{} 
{\footnotesize % or scriptsize
\begin{verbatim}
#              rmvn: Simulate from multivariate normal
rmvn <- function(nn,mu,sigma)
# Returns an nn by kk matrix, rows are independent MVN(mu,sigma)
    {
    kk <- length(mu)
    dsig <- dim(sigma)
    if(dsig[1] != dsig[2]) stop("Sigma must be square.")
    if(dsig[1] != kk) stop("Sizes of sigma and mu are inconsistent.")
    ev <- eigen(sigma)
    if(min(eigen(sigma)$values) < 0)
      stop("Sigma must have non-negative eigenvalues.")
    sqrl <- diag(sqrt(ev$values))
    PP <- ev$vectors
    ZZ <- rnorm(nn*kk) ; dim(ZZ) <- c(kk,nn)
    out <- t(PP%*%sqrl%*%ZZ+mu)
    return(out)
    }# End of function rmvn
\end{verbatim} 
} % End size
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


\begin{frame}
\frametitle{Copyright Information}

This slide show was prepared by  \href{http://www.utstat.toronto.edu/~brunner}{Jerry Brunner},
Department of Statistical Sciences, University of Toronto. It is licensed under a 
\href{http://creativecommons.org/licenses/by-sa/3.0/deed.en_US}
     {Creative Commons Attribution - ShareAlike 3.0 Unported License}. Use any part of it as you like and share the result freely. The \LaTeX~source code is available from the course website:
\href{http://www.utstat.toronto.edu/brunner/oldclass/431s23} {\small\texttt{http://www.utstat.toronto.edu/brunner/oldclass/431s23}}

\end{frame}


\end{document}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}
\frametitle{}
%\framesubtitle{} 
  \begin{itemize}
    \item 
    \item 
    \item 
  \end{itemize}
\end{frame}

{\LARGE
\begin{displaymath}
    
\end{displaymath} }




%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%



%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%