% \documentclass[serif]{beamer} % Serif for Computer Modern math font. \documentclass[serif, handout]{beamer} % Handout to ignore pause statements \hypersetup{colorlinks,linkcolor=,urlcolor=red} \usefonttheme{serif} % Looks like Computer Modern for non-math text -- nice! \setbeamertemplate{navigation symbols}{} % Suppress navigation symbols % \usetheme{Berlin} % Displays sections on top \usetheme{Frankfurt} % Displays section titles on top: Fairly thin but still swallows some material at bottom of crowded slides %\usetheme{Berkeley} \usepackage[english]{babel} \usepackage{amsmath} % for binom \usepackage{alltt} % \usepackage{graphicx} % To include pdf files! % \definecolor{links}{HTML}{2A1B81} % \definecolor{links}{red} \setbeamertemplate{footline}[frame number] \mode \title{Random Vectors and Multivariate Normal\footnote{See last slide for copyright information.}} \subtitle{STA 431 Spring 2023} \date{} % To suppress date \begin{document} \begin{frame} \titlepage \end{frame} \begin{frame} \frametitle{Overview} \tableofcontents \end{frame} \section{Random Vectors and Matrices} \begin{frame} \frametitle{Random Vectors and Matrices} \framesubtitle{See Section A.3 in Appendix A.} \begin{itemize} \item A \emph{random matrix} is just a matrix of random variables. \item Their joint probability distribution is the distribution of the random matrix. \pause \item Random matrices with just one column (say, $p \times 1$) may be called \emph{random vectors}. \end{itemize} \end{frame} \begin{frame} \frametitle{Expected Value} %\framesubtitle{} The expected value of a random matrix is defined as the matrix of expected values. \vspace{4mm} Denoting the $p \times c$ random matrix $\mathbf{X}$ by $[x_{i,j}]$, % Bracket notation just works better here. \begin{displaymath} E(\mathbf{X}) = [E(x_{i,j})]. \end{displaymath} \end{frame} \begin{frame} \frametitle{Immediately we have natural properties like} %\framesubtitle{} If the random matrices $\mathbf{X}$ and $\mathbf{Y}$ are the same size, \vspace{3mm} \begin{eqnarray*} E(\mathbf{X}+\mathbf{Y}) &=& \pause E([x_{i,j}+y_{i,j}]) \\ \pause &=& [E(x_{i,j}+y_{i,j})] \\ \pause &=& [E(x_{i,j})+E(y_{i,j})] \\ \pause &=& [E(x_{i,j})]+[E(y_{i,j})] \\ \pause &=& E(\mathbf{X})+E(\mathbf{Y}). \end{eqnarray*} \end{frame} \begin{frame} \frametitle{Moving a constant matrix through the expected value sign} \pause Let $\mathbf{A} = [a_{i,j}]$ be an $r \times p$ matrix of constants, while $\mathbf{X}$ is still a $p \times c$ random matrix. Then \pause \begin{eqnarray} E(\mathbf{AX}) &=& E\left(\left[\sum_{k=1}^p a_{i,k}x_{k,j}\right]\right) \nonumber \\ \pause &=& \left[E\left(\sum_{k=1}^p a_{i,k}x_{k,j}\right)\right] \nonumber \\ \pause &=& \left[\sum_{k=1}^p a_{i,k}E(x_{k,j})\right] \nonumber \\ \pause &=& \mathbf{A}E(\mathbf{X}). \nonumber \end{eqnarray} \pause Similar calculations yield $E(\mathbf{AXB}) = \mathbf{A}E(\mathbf{X})\mathbf{B}$. \end{frame} \begin{frame} \frametitle{Variance-Covariance Matrices} Let $\mathbf{x}$ be a $p \times 1$ random vector with $E(\mathbf{x}) = \boldsymbol{\mu}$. \pause The \emph{variance-covariance matrix} of $\mathbf{x}$ (sometimes just called the \emph{covariance matrix}), denoted by $cov(\mathbf{x})$, is defined as \begin{displaymath} cov(\mathbf{x}) = E\left\{ (\mathbf{x}-\boldsymbol{\mu}) (\mathbf{x}-\boldsymbol{\mu})^\top\right\}. \end{displaymath} \end{frame} \begin{frame} \frametitle{$cov(\mathbf{x}) = E\left\{ (\mathbf{x}-\boldsymbol{\mu}) (\mathbf{x}-\boldsymbol{\mu})^\top\right\}$} \begin{columns} % Use Beamer's columns to use more of the margins! \column{1.1\textwidth} {\scriptsize \begin{eqnarray*} cov(\mathbf{x}) &=& E\left\{ \left( \begin{array}{c} x_1-\mu_1 \\ x_2-\mu_2 \\ x_3-\mu_3 \end{array} \right) \left( \begin{array}{c c c} x_1-\mu_1 & x_2-\mu_2 & x_3-\mu_3 \end{array} \right) \right\} \\ \pause &=& E\left\{ \left( \begin{array}{l l l} (x_1-\mu_1)^2 & (x_1-\mu_1)(x_2-\mu_2) & (x_1-\mu_1)(x_3-\mu_3) \\ (x_2-\mu_2)(x_1-\mu_1) & (x_2-\mu_2)^2 & (x_2-\mu_2)(x_3-\mu_3) \\ (x_3-\mu_3)(x_1-\mu_1) & (x_3-\mu_3)(x_2-\mu_2) & (x_3-\mu_3)^2 \\ \end{array} \right) \right\} \\ \pause \nonumber \\ &=& \left( \begin{array}{l l l} E\{(x_1-\mu_1)^2\} & E\{(x_1-\mu_1)(x_2-\mu_2)\} & E\{(x_1-\mu_1)(x_3-\mu_3)\} \\ E\{(x_2-\mu_2)(x_1-\mu_1)\} & E\{(x_2-\mu_2)^2\} & E\{(x_2-\mu_2)(x_3-\mu_3)\} \\ E\{(x_3-\mu_3)(x_1-\mu_1)\} & E\{(x_3-\mu_3)(x_2-\mu_2)\} & E\{(x_3-\mu_3)^2\} \\ \end{array} \right) \\ \pause \nonumber \\ &=& \left( \begin{array}{l l l} Var(x_1) & Cov(x_1,x_2) & Cov(x_1,x_3) \\ Cov(x_1,x_2) & Var(x_2) & Cov(x_2,x_3) \\ Cov(x_1,x_3) & Cov(x_2,x_3) & Var(x_3) \\ \end{array} \right) . \\ \pause \nonumber \end{eqnarray*} So, the covariance matrix $cov(\mathbf{x})$ is a $p \times p$ symmetric matrix with variances on the main diagonal and covariances on the off-diagonals. }% End size \end{columns} \end{frame} \begin{frame} \frametitle{Covariance matrix of a $1 \times 1$ random vector} \framesubtitle{That is, a scalar random variable} % From this point in the course, scalar random variables are lower case. {\LARGE \begin{eqnarray*} cov(\mathbf{x}) & = & E\left\{ (\mathbf{x}-\boldsymbol{\mu}) (\mathbf{x}-\boldsymbol{\mu})^\top\right\} \\ & = & E\left\{ (x-\mu)(x-\mu) \right\} \\ & = & E\left\{ (x-\mu)^2 \right\} \\ & = & Var(x) \end{eqnarray*} } % End size \end{frame} \begin{frame} \frametitle{A rule analogous to $Var(a\,x) = a^2\,Var(x)$} Let $\mathbf{x}$ be a $p \times 1$ random vector with $E(\mathbf{x}) = \boldsymbol{\mu}$ and $cov(\mathbf{x}) = \boldsymbol{\Sigma}$, while $\mathbf{A}$ is an $r \times p$ matrix of constants. Then \begin{eqnarray*} cov(\mathbf{Ax}) &=& E\left\{ (\mathbf{Ax}-\mathbf{A}\boldsymbol{\mu}) (\mathbf{Ax}-\mathbf{A}\boldsymbol{\mu})^\top \right\} \\ \pause &=& E\left\{ \mathbf{A}(\mathbf{x}-\boldsymbol{\mu}) \left(\mathbf{A}(\mathbf{x}-\boldsymbol{\mu})\right)^\top \right\} \\ \pause &=& E\left\{ \mathbf{A}(\mathbf{x}-\boldsymbol{\mu}) (\mathbf{x}-\boldsymbol{\mu})^\top \mathbf{A}^\top \right\} \nonumber \\ \pause &=& \mathbf{A}E\{(\mathbf{x}-\boldsymbol{\mu}) (\mathbf{x}-\boldsymbol{\mu})^\top\} \mathbf{A}^\top \\ \pause &=& \mathbf{A}cov(\mathbf{x}) \mathbf{A}^\top \nonumber \\ &=& \mathbf{A}\boldsymbol{\Sigma}\mathbf{A}^\top \end{eqnarray*} \end{frame} \begin{frame} \frametitle{Positive definite is a natural assumption} \framesubtitle{For covariance matrices} \begin{itemize} \item Let $cov(\mathbf{x}) = \boldsymbol{\Sigma}$ \pause \item $\boldsymbol{\Sigma}$ positive definite means $\mathbf{a}^\top \boldsymbol{\Sigma} \mathbf{a} > 0$ for all $\mathbf{a} \neq \mathbf{0}$. \pause \item $y = \mathbf{a}^\top \mathbf{x} = a_1x_1 + \cdots + a_p x_p$ is a scalar random variable. \pause \item $Var(y) = \mathbf{a}^\top cov(\mathbf{x}) \mathbf{a} = \mathbf{a}^\top \boldsymbol{\Sigma} \mathbf{a}$ \pause \item $\boldsymbol{\Sigma}$ positive definite just says that the variance of any (non-trivial) linear combination is positive. \item This is usually what you want. \end{itemize} \end{frame} \begin{frame} \frametitle{Matrix of covariances between two random vectors} Let $\mathbf{x}$ be a $p \times 1$ random vector with $E(\mathbf{x}) = \boldsymbol{\mu}_x$ and let $\mathbf{y}$ be a $q \times 1$ random vector with $E(\mathbf{y}) = \boldsymbol{\mu}_y$. \vspace{3mm} The $p \times q$ matrix of covariances between the elements of $\mathbf{x}$ and the elements of $\mathbf{y}$ is \begin{displaymath} cov(\mathbf{x,y}) = E\left\{ (\mathbf{x}-\boldsymbol{\mu}_x) (\mathbf{y}-\boldsymbol{\mu}_y)^\top\right\}. \end{displaymath} \pause Note $cov(\mathbf{x,x}) = cov(\mathbf{x})$. \end{frame} \begin{frame} \frametitle{Adding a constant has no effect} \framesubtitle{On variances and covariances} It's clear from the definitions \begin{itemize} \item $cov(\mathbf{x}) = E\left\{ (\mathbf{x}-\boldsymbol{\mu}) (\mathbf{x}-\boldsymbol{\mu})^\top\right\}$ \item $cov(\mathbf{x,y}) = E\left\{ (\mathbf{x}-\boldsymbol{\mu}_x) (\mathbf{y}-\boldsymbol{\mu}_y)^\top\right\}$ \end{itemize} That \begin{itemize} \item $ cov(\mathbf{x} + \mathbf{a}) = cov(\mathbf{x})$ \item $cov(\mathbf{x} + \mathbf{a},\mathbf{y} + \mathbf{b}) = cov(\mathbf{x},\mathbf{y})$ \end{itemize} \vspace{5mm} \pause For example, $E(\mathbf{x} + \mathbf{a}) = \boldsymbol{\mu} + \mathbf{a}$, so \begin{eqnarray*} cov(\mathbf{x} + \mathbf{a}) & = & E\left\{ (\mathbf{x}+\mathbf{a}-(\boldsymbol{\mu}+\mathbf{a})) (\mathbf{x}+\mathbf{a}-(\boldsymbol{\mu}+\mathbf{a}))^\top\right\} \\ \pause & = & E\left\{ (\mathbf{x}-\boldsymbol{\mu}) (\mathbf{x}-\boldsymbol{\mu})^\top\right\} \\ \pause & = & cov(\mathbf{x}) \end{eqnarray*} \end{frame} \begin{frame} \frametitle{Here's a useful formula} %\framesubtitle{} Let $E(\mathbf{y}) = \boldsymbol{\mu}$, $cov(\mathbf{y}) = \boldsymbol{\Sigma}$, and let $\mathbf{A}$ and $\mathbf{B}$ be matrices of constants. Then {\LARGE \begin{displaymath} cov(\mathbf{Ay},\mathbf{By}) = \mathbf{A}\boldsymbol{\Sigma} \mathbf{B}^\top. \end{displaymath} } \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Centering} %\framesubtitle{Based on $cov(\mathbf{x} + \mathbf{a}) = cov(\mathbf{x})$} \pause Denote the \emph{centered} version of the random vector $\mathbf{x}$ by $\stackrel{c}{\mathbf{x}} = \mathbf{x} - \boldsymbol{\mu}_x$, \pause so that \begin{itemize} \item $E(\stackrel{c}{\mathbf{x}})=\mathbf{0}$ and \item $E(\stackrel{c}{\mathbf{x}}\stackrel{c}{\mathbf{x}} \stackrel{\top}{\vphantom{r}}) = E\left\{ (\mathbf{x}-\boldsymbol{\mu}_x) (\mathbf{x}-\boldsymbol{\mu}_x)^\top\right\} = cov(\mathbf{x})$ and \\ \item $E(\stackrel{c}{\mathbf{x}}\stackrel{c}{\vspace{.4mm}\mathbf{y}} \stackrel{\top}{\vphantom{r}}) = E\left\{ (\mathbf{x}-\boldsymbol{\mu}_x) (\mathbf{y}-\boldsymbol{\mu}_y)^\top\right\} = cov(\mathbf{x},\mathbf{y})$ \end{itemize} \end{frame} \begin{frame} \frametitle{Linear combinations of random vectors} %\framesubtitle{These are matrices, but they could be scalars} \begin{eqnarray*} \mathbf{L} & = & \mathbf{A}_1\mathbf{x}_1 + \cdots + \mathbf{A}_m\mathbf{x}_m + \mathbf{b} \\ \pause \stackrel{c}{\mathbf{L}} & = & \mathbf{L} - E(\mathbf{L}) \\ \pause & = & \mathbf{A}_1\mathbf{x}_1 + \cdots + \mathbf{A}_m\mathbf{x}_m + \mathbf{b} \\ && \hspace{-3mm}-\mathbf{A}_1\boldsymbol{\mu}_1 - \cdots - \mathbf{A}_m\boldsymbol{\mu}_m - \mathbf{b} \\ \pause & = & \mathbf{A}_1(\mathbf{x}_1-\boldsymbol{\mu}_1) + \cdots + \mathbf{A}_m(\mathbf{x}_m-\boldsymbol{\mu}_m) \\ \pause & = & \mathbf{A}_1 \stackrel{c}{\mathbf{x}}_1 + \cdots + \mathbf{A}_m \stackrel{c}{\mathbf{x}}_m \end{eqnarray*} \pause % \vspace{5mm} So that \begin{eqnarray*} cov(\mathbf{L}) & = & E(\stackrel{c}{\mathbf{L}}\stackrel{c}{\mathbf{L}} \stackrel{\top}{\vphantom{r}}) \\ cov(\mathbf{L}_1,\mathbf{L}_2) & = & E(\stackrel{c}{\mathbf{L}}_1\,\stackrel{c}{\mathbf{L}} \stackrel{\top}{\vphantom{r}_2}) \end{eqnarray*} %\vspace{1mm} %In words: To calculate variances and covariances of linear combinations, one may simply discard added constants, center all the random vectors, and take expected values of products. \end{frame} \begin{frame} \frametitle{$cov(\mathbf{L}_1,\mathbf{L}_2) = E(\stackrel{c}{\mathbf{L}}_1\,\stackrel{c}{\mathbf{L}} \stackrel{\top}{\vphantom{r}_2})$} %\framesubtitle{} Let {\LARGE \begin{eqnarray*} \stackrel{c}{\mathbf{L}}_1 & = & \mathbf{A}_1 \stackrel{c}{\mathbf{x}}_1 + \cdots + \mathbf{A}_m \stackrel{c}{\mathbf{x}}_m \\ \stackrel{c}{\mathbf{L}}_2 & = & \mathbf{B}_1 \stackrel{c}{\mathbf{y}}_1 + \cdots + \mathbf{B}_n \stackrel{c}{\mathbf{y}}_n \end{eqnarray*} } % End size \end{frame} \begin{frame} \frametitle{A better rule for covariances of linear combinations} %\framesubtitle{} \begin{columns} \column{1.2\textwidth} % To use more margin {\footnotesize \begin{eqnarray*} cov(\mathbf{L}_1,\mathbf{L}_2) & = & E\left\{\stackrel{c}{\mathbf{L}}_1\,\stackrel{c}{\mathbf{L}} \stackrel{\top}{\vphantom{r}_2}\right\} \\ \pause & = & E\left\{ \left( \mathbf{A}_1\stackrel{c}{\mathbf{x}}_1 + \cdots + \mathbf{A}_m\stackrel{c}{\mathbf{x}}_1\right) \left( \mathbf{B}_1\stackrel{c}{\mathbf{y}}_1 + \cdots + \mathbf{B}_n\stackrel{c}{\mathbf{y}}_n\right)^\top \right\}\\ & = & E\left\{ \left( \mathbf{A}_1\stackrel{c}{\mathbf{x}}_1 + \cdots + \mathbf{A}_m\stackrel{c}{\mathbf{x}}_1\right) \left( \stackrel{c}{\mathbf{y}}\stackrel{\top}{\vphantom{r}_1} \mathbf{B}_1^\top + \cdots + \stackrel{c}{\mathbf{y}}\stackrel{\top}{\vphantom{r}_n} \mathbf{B}_n^\top\right) \right\}\\ % \stackrel{c}{\mathbf{x}}_1 % \stackrel{c}{\mathbf{y}}\stackrel{\top}{\vphantom{r}_1} & = & E\left\{\mathbf{A}_1\stackrel{c}{\mathbf{x}}_1 \stackrel{c}{\vspace{-.2mm}\mathbf{y}}\stackrel{\top}{\vphantom{r}_1} \mathbf{B}_1^\top + \mathbf{A}_1\stackrel{c}{\mathbf{x}}_1 \stackrel{c}{\vspace{-.2mm}\mathbf{y}}\stackrel{\top}{\vphantom{r}_2} \mathbf{B}_2^\top + \cdots + \mathbf{A}_m\stackrel{c}{\mathbf{x}}_m \stackrel{c}{\vspace{-.2mm}\mathbf{y}}\stackrel{\top}{\vphantom{r}_n} \mathbf{B}_n^\top \right\} \pause \\ & = & \mathbf{A}_1E\left\{\stackrel{c}{\mathbf{x}}_1 \stackrel{c}{\vspace{-.2mm}\mathbf{y}}\stackrel{\top}{\vphantom{r}_1}\right\} \mathbf{B}_1^\top + \mathbf{A}_1E\left\{\stackrel{c}{\mathbf{x}}_1 \stackrel{c}{\vspace{-.2mm}\mathbf{y}}\stackrel{\top}{\vphantom{r}_2}\right\} \mathbf{B}_2^\top + \cdots + \mathbf{A}_mE\left\{\stackrel{c}{\mathbf{x}}_m \stackrel{c}{\vspace{-.2mm}\mathbf{y}}\stackrel{\top}{\vphantom{r}_n} \right\} \mathbf{B}_n^\top \pause \\ & = & \mathbf{A}_1 \, cov(\mathbf{x}_1,\mathbf{y}_1) \, \mathbf{B}_1^\top + \pause \mathbf{A}_1 \, cov(\mathbf{x}_1,\mathbf{y}_2) \, \mathbf{B}_2^\top + \cdots + \mathbf{A}_m \, cov(\mathbf{x}_m,\mathbf{y}_n) \, \mathbf{B}_n^\top \pause \\ & = & \sum_{i=1}^m \sum_{j=1}^n \mathbf{A}_i \, cov(\mathbf{x}_i,\mathbf{y}_j) \, \mathbf{B}_j^\top \end{eqnarray*} \pause } % End size \end{columns} That is, calculate the covariance of each term in $\mathbf{L}_1$ with each term in $\mathbf{L}_2$, and add them up. \end{frame} \begin{frame} \frametitle{Example: $cov(\mathbf{x}+\mathbf{y})$} % \framesubtitle{Using the centering rule} \pause \begin{eqnarray*} cov(\mathbf{x}+\mathbf{y}) & = & cov(\mathbf{x}+\mathbf{y}, \mathbf{x}+\mathbf{y}) \\ \pause & = & cov(\mathbf{x},\mathbf{x}) + cov(\mathbf{x},\mathbf{y}) + cov(\mathbf{y},\mathbf{x}) + cov(\mathbf{y},\mathbf{y}) \\ \pause & = & cov(\mathbf{x}) + cov(\mathbf{y}) + cov(\mathbf{x},\mathbf{y}) + cov(\mathbf{y},\mathbf{x}) \end{eqnarray*} \pause \begin{itemize} \item $cov(\mathbf{y},\mathbf{x}) \neq cov(\mathbf{x},\mathbf{y})$ \item $cov(\mathbf{y},\mathbf{x}) = cov(\mathbf{x},\mathbf{y})^\top$ \end{itemize} %Use $cov(\mathbf{x,y}) = E\left\{ (\mathbf{x}-\boldsymbol{\mu}_x) % (\mathbf{y}-\boldsymbol{\mu}_y)^\top\right\}$ to see this. \end{frame} \section{Multivariate Normal} \begin{frame} \frametitle{The Multivariate Normal Distribution} The $p \times 1$ random vector $\mathbf{x}$ is said to have a \emph{multivariate normal distribution}, and we write $\mathbf{x} \sim N_p(\boldsymbol{\mu},\boldsymbol{\Sigma})$, if $\mathbf{x}$ has (joint) density \begin{displaymath} f(\mathbf{x}) = \frac{1}{|\boldsymbol{\Sigma}|^{\frac{1}{2}} (2 \pi)^{\frac{p}{2}}} \exp\left\{ -\frac{1}{2} (\mathbf{x}-\boldsymbol{\mu})^\top \boldsymbol{\Sigma}^{-1}(\mathbf{x}-\boldsymbol{\mu})\right\}, \end{displaymath} where $\boldsymbol{\mu}$ is $p \times 1$ and $\boldsymbol{\Sigma}$ is $p \times p$ symmetric and positive definite. \end{frame} \begin{frame} \frametitle{The Bivariate Normal Density} \framesubtitle{Multivariate normal with $p=2$ variables} \begin{center} \includegraphics[width=3in]{BivariateNormal} \end{center} \end{frame} \begin{frame} \frametitle{Analogies} % \framesubtitle{()} Multivariate normal reduces to the univariate normal when $p=1$. \vspace{4mm} \begin{itemize} \item Univariate Normal \begin{itemize} \item $f(x) = \frac{1}{\sigma \sqrt{2\pi}} \exp \left\{-\frac{1}{2}\frac{(x-\mu)^2}{\sigma^2}\right\}$ \item $E(x)=\mu, Var(x) = \sigma^2$ \item $\frac{(x-\mu)^2}{\sigma^2} \sim \chi^2 (1)$ \end{itemize} \pause \vspace{3mm} \item Multivariate Normal \begin{itemize} \item $f(\mathbf{x}) = \frac{1}{|\boldsymbol{\Sigma}|^{\frac{1}{2}} (2 \pi)^{\frac{p}{2}}} \exp\left\{ -\frac{1}{2} (\mathbf{x}-\boldsymbol{\mu})^\top \boldsymbol{\Sigma}^{-1}(\mathbf{x}-\boldsymbol{\mu})\right\}$ \item $E(\mathbf{x})= \boldsymbol{\mu}$, $cov(\mathbf{x}) = \boldsymbol{\Sigma}$ \item $(\mathbf{x}-\boldsymbol{\mu})^\top \boldsymbol{\Sigma}^{-1}(\mathbf{x}-\boldsymbol{\mu}) \sim \chi^2 (p)$ \end{itemize} \end{itemize} \end{frame} \begin{frame} \frametitle{More properties of the multivariate normal} % \begin{itemize} \item If $\mathbf{c}$ is a vector of constants, $\mathbf{x}+\mathbf{c} \sim N(\mathbf{c}+\boldsymbol{\mu},\boldsymbol{\Sigma})$ \item If $\mathbf{A}$ is a matrix of constants, $\mathbf{Ax} \sim N(\mathbf{A}\boldsymbol{\mu},\mathbf{A}\boldsymbol{\Sigma}\mathbf{A}^\top)$ \item Linear combinations of multivariate normals are multivariate normal. \item All the marginals (dimension less than $p$) of $\mathbf{x}$ are (multivariate) normal. \item For the multivariate normal, zero covariance implies independence. The multivariate normal is the only continuous distribution with this property. \end{itemize} \end{frame} % MVN likelihood \begin{frame} \frametitle{Multivariate Normal Likelihood} \framesubtitle{$\mathbf{x}_1, \ldots, \mathbf{x}_n \stackrel{iid}{\sim} N(\boldsymbol{\mu},\boldsymbol{\Sigma})$} %{\LARGE \begin{eqnarray*} L(\boldsymbol{\mu},\boldsymbol{\Sigma}) & = & \prod_{i=1}^n\frac{1}{|\boldsymbol{\Sigma}|^{\frac{1}{2}} (2 \pi)^{\frac{p}{2}}} \exp\left\{ -\frac{1}{2} (\mathbf{x}-\boldsymbol{\mu})^\top \boldsymbol{\Sigma}^{-1}(\mathbf{x}-\boldsymbol{\mu})\right\} \\ &&\\ & = & |\boldsymbol{\Sigma}|^{-\frac{n}{2}} (2\pi)^{-\frac{np}{2}} \times \\ && \exp -\frac{n}{2}\left\{ tr(\boldsymbol{\widehat{\Sigma}\Sigma}^{-1}) + (\overline{\mathbf{x}}-\boldsymbol{\mu})^\top \boldsymbol{\Sigma}^{-1} (\overline{\mathbf{x}}-\boldsymbol{\mu}) \right\}, \end{eqnarray*} where $\boldsymbol{\widehat{\Sigma}} = \frac{1}{n}\sum_{i=1}^n (\mathbf{x}_i-\overline{\mathbf{x}}) (\mathbf{x}_i-\overline{\mathbf{x}})^\top $ %} % End size \end{frame} \begin{frame} \frametitle{Simulating from a multivariate norrmal} %\framesubtitle{} \begin{itemize} \item Simulation of univariate normals is built-in. Use \texttt{rnorm()}. \item Say you want to simulate from $\mathbf{x} \sim N_p(\boldsymbol{\mu},\boldsymbol{\Sigma})$. \pause \item Generate $\mathbf{z} \sim N_p(\mathbf{0},\mathbf{I})$. \pause \item Calculate $\boldsymbol{\Sigma}^{\frac{1}{2}}$ using spectral decomposition. \pause \item Let $\mathbf{x} = \boldsymbol{\Sigma}^{\frac{1}{2}} \mathbf{z} + \boldsymbol{\mu}$ \pause $ \sim N_p(\boldsymbol{\mu},\boldsymbol{\Sigma})$. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}[fragile] \frametitle{The \texttt{rmvn} Function} %\framesubtitle{} {\scriptsize % The alltt environment requires \usepackage{alltt} \begin{alltt} {\color{blue}> source("https://www.utstat.toronto.edu/~brunner/openSEM/fun/rmvn.txt") > A = rbind(c(1.0,0.5), + c(0.5,1.0)) > A } [,1] [,2] [1,] 1.0 0.5 [2,] 0.5 1.0 {\color{blue}> datta = rmvn(10,mu=c(0,0),sigma=A); datta } [,1] [,2] [1,] -2.643825316 -0.69926774 [2,] -1.572814887 -0.21980248 [3,] -0.387355643 -0.75080547 [4,] -0.168534571 -1.28075830 [5,] -0.716922363 -0.06556707 [6,] -0.272368211 -0.15602646 [7,] -0.007593983 0.59682941 [8,] 0.436463462 1.02248006 [9,] -0.193334362 -1.23877080 [10,] -0.859909183 -0.36091445 \end{alltt} } % End size \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}[fragile] \frametitle{For the Record} %\framesubtitle{} {\footnotesize % or scriptsize \begin{verbatim} # rmvn: Simulate from multivariate normal rmvn <- function(nn,mu,sigma) # Returns an nn by kk matrix, rows are independent MVN(mu,sigma) { kk <- length(mu) dsig <- dim(sigma) if(dsig[1] != dsig[2]) stop("Sigma must be square.") if(dsig[1] != kk) stop("Sizes of sigma and mu are inconsistent.") ev <- eigen(sigma) if(min(eigen(sigma)$values) < 0) stop("Sigma must have non-negative eigenvalues.") sqrl <- diag(sqrt(ev$values)) PP <- ev$vectors ZZ <- rnorm(nn*kk) ; dim(ZZ) <- c(kk,nn) out <- t(PP%*%sqrl%*%ZZ+mu) return(out) }# End of function rmvn \end{verbatim} } % End size \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Copyright Information} This slide show was prepared by \href{http://www.utstat.toronto.edu/~brunner}{Jerry Brunner}, Department of Statistical Sciences, University of Toronto. It is licensed under a \href{http://creativecommons.org/licenses/by-sa/3.0/deed.en_US} {Creative Commons Attribution - ShareAlike 3.0 Unported License}. Use any part of it as you like and share the result freely. The \LaTeX~source code is available from the course website: \href{http://www.utstat.toronto.edu/brunner/oldclass/431s23} {\small\texttt{http://www.utstat.toronto.edu/brunner/oldclass/431s23}} \end{frame} \end{document} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{} %\framesubtitle{} \begin{itemize} \item \item \item \end{itemize} \end{frame} {\LARGE \begin{displaymath} \end{displaymath} } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%