% \documentclass[serif]{beamer} % Serif for Computer Modern math font. \documentclass[serif, handout]{beamer} % Handout mode to ignore pause statements \hypersetup{colorlinks,linkcolor=,urlcolor=red} \usefonttheme{serif} % Looks like Computer Modern for non-math text -- nice! \setbeamertemplate{navigation symbols}{} % Suppress navigation symbols % \usetheme{Berlin} % Displays sections on prime \usetheme{Frankfurt} % Displays section titles on prime: Fairly thin but still swallows some material at bottom of crowded slides %\usetheme{Berkeley} \usepackage[english]{babel} \usepackage{amsmath} % for binom \usepackage{amsfonts} % for \mathbb{R} The set of reals % \usepackage{graphicx} % To include pdf files! % \definecolor{links}{HTML}{2A1B81} % \definecolor{links}{red} \setbeamertemplate{footline}[frame number] \mode \title{The Multivariate Normal Distribution\footnote{See last slide for copyright information.}} \subtitle{STA 302 Fall 2017} \date{} % To suppress date \begin{document} \begin{frame} \titlepage \end{frame} \begin{frame} \frametitle{Overview} \tableofcontents \end{frame} % Had another version where I started with the density. See for example 2101f13 or 431 \section{Moment-generating Functions} \begin{frame} \frametitle{Joint moment-generating function} \framesubtitle{Of a $p$-dimensional random vector $\mathbf{x}$} \begin{itemize} \item $M_{\mathbf{x}}(\mathbf{t}) = E\left(e^{\mathbf{t}^\prime \mathbf{x}} \right)$ \pause \item For example, $M_{_{(x_1,x_2,x_3)}}(t_1,t_2,t_3) = E\left( e^{x_1t_1+x_2t_2+x_3t_3} \right)$ \pause \item Just write $M(\mathbf{t})$ if there is no ambiguity. \end{itemize} \pause \vspace{10mm} Section 4.3 of \emph{Linear models in statistics} has some material on moment-generating functions (optional). \end{frame} \begin{frame} \frametitle{Uniqueness} \framesubtitle{Proof omitted} Joint moment-generating functions correspond uniquely to joint probability distributions.\pause \begin{itemize} \item $M(\mathbf{t})$ is a function of $F(\mathbf{x})$. \pause \begin{itemize} \item Step One: $f(\mathbf{x}) = \frac{\partial}{\partial x_1} \cdots \frac{\partial}{\partial x_p} F(\mathbf{x})$. \pause \item For example, $\frac{\partial}{\partial x_1} \frac{\partial}{\partial x_2} \int_{-\infty}^{x_2} \int_{-\infty}^{x_1} f(y_1,y_2) \, dy_1dy_2$ \pause \item Step Two: $M(\mathbf{t}) = \int \cdots \int e^{\mathbf{t}^\prime \mathbf{x}} f(\mathbf{x}) \, d\mathbf{x}$ \pause \item Could write $M(\mathbf{t}) = g\left(F(\mathbf{x})\right)$. \pause \end{itemize} \item Uniqueness says the function $g$ is one-to-one\pause, so that $F(\mathbf{x}) = g^{-1}\left(M(\mathbf{t})\right)$. \end{itemize} \end{frame} \begin{frame} \frametitle{$g^{-1}\left(M(\mathbf{t})\right) = F(\mathbf{x})$} \framesubtitle{A two-variable example} \pause \begin{columns} % Use Beamer's columns to use more of the margins! \column{1.2\textwidth} \begin{displaymath} \begin{array}{ccc} g^{-1}\left(M(\mathbf{t})\right) & = & F(\mathbf{x}) \\ & & \\ \pause g^{-1}\left( \int_{-\infty}^\infty \int_{-\infty}^\infty e^{x_1t_1+x_2t_2} f(x_1,x_2) \, dx_1dx_2 \right) &=& \int_{-\infty}^{x_2} \int_{-\infty}^{x_1} f(y_1,y_2) \, dy_1dy_2 \end{array} \end{displaymath} \end{columns} \end{frame} \begin{frame} \frametitle{Theorem} \framesubtitle{} Two random vectors $\mathbf{x}_1$ and $\mathbf{x}_2$ are independent if and only if the moment-generating function of their joint distribution is the product of their moment-generating functions. \end{frame} \begin{frame} \frametitle{Proof} \framesubtitle{Two random vectors are independent if and only if the moment-generating function of their joint distribution is the product of their moment-generating functions.} \pause Independence therefore the MGFs factor is an exercise. \pause \begin{eqnarray*} M_{x_1,x_2}(t_1,t_2) & = & M_{x_1}(t_1)M_{x_2}(t_2) \\ \pause & = & \left(\int_{-\infty}^\infty e^{x_1t_1} f_{x_1}(x_1) \, dx_1 \right) \left(\int_{-\infty}^\infty e^{x_2t_2} f_{x_2}(x_2) \, dx_2\right) \\ \pause & = & \int_{-\infty}^\infty \int_{-\infty}^\infty e^{x_1t_1}e^{x_2t_2} f_{x_1}(x_1)f_{x_2}(x_2) \, dx_1dx_2 \\ \pause & = & \int_{-\infty}^\infty \int_{-\infty}^\infty e^{x_1t_1+x_2t_2} f_{x_1}(x_1)f_{x_2}(x_2) \, dx_1dx_2 \end{eqnarray*} \end{frame} \begin{frame} \frametitle{Proof continued} \pause %\framesubtitle{} Have $M_{x_1,x_2}(t_1,t_2) = \int_{-\infty}^\infty \int_{-\infty}^\infty e^{x_1t_1+x_2t_2} f_{x_1}(x_1)f_{x_2}(x_2) \, dx_1dx_2$. \pause \vspace{2mm} Using $F(\mathbf{x}) = g^{-1}\left(M(\mathbf{t})\right)$, \pause \begin{eqnarray*} F(x_1,x_2) & = & \pause g^{-1}\left( \int_{-\infty}^\infty \int_{-\infty}^\infty e^{x_1t_1+x_2t_2} f_{x_1}(x_1)f_{x_2}(x_2) \, dx_1dx_2 \right) \\ \pause & = & \int_{-\infty}^{x_2} \int_{-\infty}^{x_1} f_{x_1}(y_1)f_{x_2}(y_2) \, dy_1dy_2 \\ \pause & = & \int_{-\infty}^{x_2} f_{x_2}(y_2) \left(\int_{-\infty}^{x_1} f_{x_1}(y_1) \, dy_1 \right) dy_2 \\ \pause & = & \int_{-\infty}^{x_2} f_{x_2}(y_2)F_{x_1}(x_1) \, dy_2 \\ \pause & = & F_{x_1}(x_1) \int_{-\infty}^{x_2}f_{x_2}(y_2) \, dy_2 \\ \pause & = & F_{x_1}(x_1) \, F_{x_2}(x_2) \pause \end{eqnarray*} So that $x_1$ and $x_2$ are independent. $\blacksquare$ \end{frame} \begin{frame} \frametitle{A helpful distinction} %\framesubtitle{} \begin{itemize} \item If $x_1$ and $x_2$ are independent, \pause \begin{displaymath} M_{_{x_1+x_2}}(t) = M_{_{x_1}}(t) M_{_{x_2}}(t) \end{displaymath} \pause \item[] \item $x_1$ and $x_2$ are independent if and only if \pause \begin{displaymath} M_{_{x_1,x_2}}(t_1,t_2) = M_{_{x_1}}(t_1) M_{_{x_2}}(t_2) \end{displaymath} \end{itemize} \end{frame} \begin{frame} \frametitle{Theorem: Functions of independent random vectors are independent} \pause Show $\mathbf{x}_1$ and $\mathbf{x}_2$ independent implies that $\mathbf{y}_1=g_1(\mathbf{x}_1)$ and $\mathbf{y}_2=g_2(\mathbf{x}_2)$ are independent. \pause \vspace{2mm} {\footnotesize Let $ \mathbf{y} = \left(\begin{array}{c} \mathbf{y}_1 \\ \hline \mathbf{y}_2 \end{array}\right) = \left(\begin{array}{c} g_1(\mathbf{x}_1) \\ \hline g_2(\mathbf{x}_2) \end{array}\right) \mbox{ and } \mathbf{t} = \left(\begin{array}{c} \mathbf{t}_1 \\ \hline \mathbf{t}_2 \end{array}\right). \mbox{ Then}$ \pause {\scriptsize \begin{eqnarray*} M_{\mathbf{y}}(\mathbf{t}) & = & E\left(e^{\mathbf{t}^\prime \mathbf{y}} \right) \\ \pause & = & E\left(e^{\mathbf{t}_1^\prime \mathbf{y}_1 + \mathbf{t}_2^\prime \mathbf{y}_2} \right) \pause = E\left(e^{\mathbf{t}_1^\prime \mathbf{y}_1}e^{\mathbf{t}_2^\prime \mathbf{y}_2} \right)\\ \pause & = & E\left(e^{\mathbf{t}_1^\prime g_1(\mathbf{x}_1)} e^{\mathbf{t}_2^\prime g_2(\mathbf{x}_2)} \right)\\ \pause & = & \int \int e^{\mathbf{t}_1^\prime g_1(\mathbf{x}_1)} e^{\mathbf{t}_2^\prime g_2(\mathbf{x}_2)} f_{\mathbf{x}_1}(\mathbf{x}_1)f_{\mathbf{x}_2}(\mathbf{x}_2) \, d\mathbf{x}_1 d\mathbf{x}_2 \\ \pause & = & \int e^{\mathbf{t}_2^\prime g_2(\mathbf{x}_2)} f_{\mathbf{x}_2}(\mathbf{x}_2) \left( \int e^{\mathbf{t}_1^\prime g_1(\mathbf{x}_1)} f_{\mathbf{x}_1}(\mathbf{x}_1) \, d\mathbf{x}_1 \right) d\mathbf{x}_2 \\ \pause & = & \int e^{\mathbf{t}_2^\prime g_2(\mathbf{x}_2)} f_{\mathbf{x}_2}(\mathbf{x}_2) M_{g_1(\mathbf{x}_1)}(\mathbf{t}_1) d\mathbf{x}_2 \\ \pause & = & M_{g_1(\mathbf{x}_1)}(\mathbf{t}_1) M_{g_2(\mathbf{x}_2)}(\mathbf{t}_2) \pause = M_{\mathbf{y}_1}(\mathbf{t}_1) M_{\mathbf{y}_2}(\mathbf{t}_2) \end{eqnarray*} \pause } % End size So $\mathbf{y}_1$ and $\mathbf{y}_2$ are independent. ~$\blacksquare$ } % End size \end{frame} \begin{frame} \frametitle{$M_{A\mathbf{x}}(\mathbf{t}) = M_{\mathbf{x}}(A^\prime\mathbf{t}) $} \framesubtitle{Analogue of $M_{ax}(t) = M_{x}(at)$} \pause {\LARGE \begin{eqnarray*} M_{A\mathbf{x}}(\mathbf{t}) & = & E\left(e^{\mathbf{t}^\prime A\mathbf{x}} \right) \\ \pause & = & E\left(e^{\left(A^\prime\mathbf{t}\right)^\prime \mathbf{x}} \right) \\ \pause & = & M_{\mathbf{x}}(A^\prime\mathbf{t}) \end{eqnarray*} } % End size \vspace{3mm} \pause Note that $\mathbf{t}$ is the same length as $\mathbf{y} = A\mathbf{x}$: The number of rows in $A$. \end{frame} \begin{frame} \frametitle{$M_{\mathbf{x}+\mathbf{c}}(\mathbf{t}) = e^{\mathbf{t}^\prime \mathbf{c} } M_{\mathbf{x}}(\mathbf{t})$ } \framesubtitle{Analogue of $M_{x+c}(t) = e^{ct}M_{x}(t)$} \pause {\LARGE \begin{eqnarray*} M_{\mathbf{x}+\mathbf{c}}(\mathbf{t}) & = & E\left(e^{\mathbf{t}^\prime (\mathbf{x}+\mathbf{c})} \right) \\ \pause & = & E\left(e^{\mathbf{t}^\prime\mathbf{x}+\mathbf{t}^\prime\mathbf{c}} \right) \\ \pause & = & e^{\mathbf{t}^\prime\mathbf{c}} \, E\left(e^{\mathbf{t}^\prime \mathbf{x}} \right) \\ \pause & = & e^{\mathbf{t}^\prime \mathbf{c} } M_{\mathbf{x}}(\mathbf{t}) \\ \end{eqnarray*} } % End size \end{frame} \section{Definition} \begin{frame} \frametitle{Distributions may be defined in terms of moment-generating functions} \pause % \framesubtitle{Start with univariate normal} Build up the multivariate normal from univariate normals. \vspace{4mm} \pause \begin{itemize} \item If $y \sim N(\mu,\sigma^2)$, then $M_{_y}(t) = e^{\mu t + \frac{1}{2}\sigma^2t^2}$ \pause \item Moment-generating functions correspond uniquely to probability distributions. \pause \item So \emph{define} a normal random variable with expected value $\mu$ and variance $\sigma^2$ as a random variable with moment-generating function $e^{\mu t + \frac{1}{2}\sigma^2t^2}$. \pause \item This has one surprising consequence \ldots \end{itemize} \end{frame} \begin{frame} \frametitle{Degenerate random variables} A \emph{degenerate} random variable has all the probability concentrated at a single value, say $Pr\{y=y_0\}= 1$. \pause Then %\framesubtitle{} {\LARGE \begin{eqnarray*} M_{_y}(t) & = & E(e^{yt}) \\ \pause & = & \sum_y e^{yt} p(y) \\ \pause & = & e^{y_0t} \, \cdot \,p(y_0) \\ \pause & = & e^{y_0t} \, \cdot \, 1 \\ & = & e^{y_0t} \end{eqnarray*} } % End size \end{frame} \begin{frame} \frametitle{If $Pr\{y=y_0\}= 1$, then $M_{_y}(t) = e^{y_0t}$} \pause %\framesubtitle{} \begin{itemize} \item This is of the form $e^{\mu t + \frac{1}{2}\sigma^2t^2}$ with $\mu=y_0$ and $\sigma^2=0$. \pause \item So $y \sim N(y_0,0)$. \pause \item That is, degenerate random variables are ``normal" with variance zero. \pause \item Call them \emph{singular} normals. \pause \item This will be surprisingly handy later. \end{itemize} \end{frame} \begin{frame} \frametitle{Independent standard normals} %\framesubtitle{} Let $z_1, \ldots, z_p \stackrel{i.i.d.}{\sim}N(0,1)$. \pause {\LARGE \begin{displaymath} \mathbf{z} = \left( \begin{array}{c} z_1 \\ \vdots \\ z_p \end{array} \right) \end{displaymath} \pause \vspace{4mm} \begin{displaymath} E(\mathbf{z}) = \mathbf{0} ~~~~~~~~~~~ cov(\mathbf{z}) = I_p \end{displaymath} } % End size \end{frame} \begin{frame} \frametitle{Moment-generating function of $\mathbf{z}$} \framesubtitle{Using $e^{\mu t + \frac{1}{2}\sigma^2t^2}$} % {\LARGE \begin{eqnarray*} M_{_\mathbf{z}}(\mathbf{t}) & = & \prod_{j=1}^p M_{z_j}(t_j) \\ \pause & = & \prod_{j=1}^p e^{\frac{1}{2}t_j^2} \\ \pause & = & e^{\frac{1}{2}\sum_{j=1}^p t_j^2} \\ \pause & = & e^{\frac{1}{2}\mathbf{t}^\prime\mathbf{t}} \end{eqnarray*} % } % End size \end{frame} \begin{frame} \frametitle{Transform $\mathbf{z}$ to get a general multivariate normal} \framesubtitle{Remember: $A$ non-negative definite means $\mathbf{v}^\prime A\mathbf{v} \geq 0$} \pause Let $ \Sigma$ be a $p \times p$ symmetric \emph{non-negative definite} matrix \pause and $\boldsymbol{\mu} \in \mathbb{R}^p$. \pause Let $\mathbf{y} = \Sigma^{1/2}\mathbf{z} + \boldsymbol{\mu}$. \pause \begin{itemize} \item The elements of $\mathbf{y}$ are linear combinations of independent standard normals. \pause \item Linear combinations of normals should be normal. \pause \item $\mathbf{y}$ has a multivariate distribution. \pause \item We'd like to call $\mathbf{y}$ a \emph{multivariate normal}. % \item With mean $\boldsymbol{\mu}$ and covariance matrix $\Sigma$. \end{itemize} \end{frame} \begin{frame} \frametitle{Moment-generating function of $\mathbf{y} = \Sigma^{1/2}\mathbf{z} + \boldsymbol{\mu}$} \framesubtitle{Remember: $M_{A\mathbf{x}}(\mathbf{t}) = M_{\mathbf{x}}(A^\prime\mathbf{t})$ and $M_{\mathbf{x}+\mathbf{c}}(\mathbf{t}) = e^{\mathbf{t}^\prime \mathbf{c} } M_{\mathbf{x}}(\mathbf{t})$ and $M_{_\mathbf{z}}(\mathbf{t}) = e^{\frac{1}{2}\mathbf{t}^\prime\mathbf{t}}$} \pause \vspace{-1mm} \begin{eqnarray*} M_{_\mathbf{y}}(\mathbf{t}) & = & M_{_{ \Sigma^{1/2}\mathbf{z} + \boldsymbol{\mu}}}(\mathbf{t}) \\ \pause & = & e^{\mathbf{t}^\prime\boldsymbol{\mu}} \, M_{_{ \Sigma^{1/2}\mathbf{z}}}(\mathbf{t}) \\ \pause & = & e^{\mathbf{t}^\prime\boldsymbol{\mu}} \, M_{_{\mathbf{z}}}( \Sigma^{1/2 \, ^\prime}\mathbf{t}) \\ \pause & = & e^{\mathbf{t}^\prime\boldsymbol{\mu}} \, M_{_{\mathbf{z}}}( \Sigma^{1/2}\mathbf{t}) \\ \pause & = & e^{\mathbf{t}^\prime\boldsymbol{\mu}} \, e^{\frac{1}{2} ( \Sigma^{1/2}\mathbf{t})^\prime( \Sigma^{1/2}\mathbf{t})} \\ \pause & = & e^{\mathbf{t}^\prime\boldsymbol{\mu}} \, e^{\frac{1}{2} \mathbf{t}^\prime \Sigma^{1/2} \Sigma^{1/2}\mathbf{t}} \\ \pause & = & e^{\mathbf{t}^\prime\boldsymbol{\mu}} \, e^{\frac{1}{2} \mathbf{t}^\prime \Sigma\mathbf{t}} \\ \pause & = & e^{\mathbf{t}^\prime\boldsymbol{\mu}+\frac{1}{2} \mathbf{t}^\prime \Sigma\mathbf{t}} \end{eqnarray*} \pause \vspace{-1mm} So \emph{define} a multivariate normal random variable $\mathbf{y}$ as one with moment-generating function $ M_{_\mathbf{y}}(\mathbf{t}) = e^{\mathbf{t}^\prime\boldsymbol{\mu} + \frac{1}{2} \mathbf{t}^\prime \Sigma\mathbf{t}}$. \end{frame} \begin{frame} \frametitle{Compare univariate and multivariate normal moment-generating functions} \pause %\framesubtitle{} \begin{tabular}{ll} Univariate & $M_{_y}(t) = e^{\mu t + \frac{1}{2}\sigma^2t^2}$ \\ & \\ Multivariate & $ M_{_\mathbf{y}}(\mathbf{t}) = e^{\mathbf{t}^\prime\boldsymbol{\mu}+\frac{1}{2} \mathbf{t}^\prime \Sigma\mathbf{t}}$ \end{tabular} \vspace{10mm} \pause So the univariate normal is a special case of the multivariate normal with $p=1$. \end{frame} \section{Properties} \begin{frame} \frametitle{Mean and covariance matrix} \framesubtitle{For a univariate normal, $E(y)=\mu$ and $Var(y)=\sigma^2$} \pause Recall $\mathbf{y} = \Sigma^{1/2}\mathbf{z} + \boldsymbol{\mu}$. \pause \vspace{2mm} \begin{eqnarray*} E(\mathbf{y}) & = & \boldsymbol{\mu} \\ \pause cov(\mathbf{y}) & = & \Sigma^{1/2} cov(\mathbf{z}) \Sigma^{1/2\prime} \\ \pause & = & \Sigma^{1/2} \, I \, \Sigma^{1/2} \\ \pause & = & \Sigma \end{eqnarray*} \vspace{2mm} \pause We will say $\mathbf{y}$ is multivariate normal with expected value $\boldsymbol{\mu}$ and variance-covariance matrix $ \Sigma$\pause, and write $\mathbf{y} \sim N_p(\boldsymbol{\mu}, \Sigma)$. \vspace{2mm} \pause Note that because $ M_{_\mathbf{y}}(\mathbf{t}) = e^{\mathbf{t}^\prime\boldsymbol{\mu}+\frac{1}{2} \mathbf{t}^\prime \Sigma\mathbf{t}}$, $\boldsymbol{\mu}$ and $ \Sigma$ completely determine the distribution. \end{frame} \begin{frame} \frametitle{Probability density function of $\mathbf{y} \sim N_p(\boldsymbol{\mu}, \Sigma)$} \framesubtitle{Remember, $ \Sigma$ is only positive \emph{semi}-definite.} \pause It is easy to write down the density of $\mathbf{z} \sim N_p(\mathbf{0},I)$ as a product of standard normals. \pause \vspace{2mm} If $ \Sigma$ is strictly positive definite (and not otherwise), the density of $\mathbf{y} = \Sigma^{1/2}\mathbf{z} + \boldsymbol{\mu}$ can be obtained using the Jacobian Theorem \pause as \begin{displaymath} f(\mathbf{y}) = \frac{1}{| \Sigma|^{\frac{1}{2}} (2 \pi)^{\frac{p}{2}}} \exp\left\{ -\frac{1}{2} (\mathbf{y}-\boldsymbol{\mu})^\prime \Sigma^{-1}(\mathbf{y}-\boldsymbol{\mu})\right\} \end{displaymath} \pause This is usually how the multivariate normal is defined. \end{frame} \begin{frame} \frametitle{$\Sigma$ positive definite?} \pause \begin{itemize} \item Positive definite means that for any non-zero $p \times 1$ vector $\mathbf{a}$, we have $\mathbf{a}^\prime \Sigma \mathbf{a} > 0$. \pause \item Since the one-dimensional random variable $w=\sum_{i=1}^p a_i y_i$ may be written as $w=\mathbf{a}^\prime \mathbf{y}$ \pause and $Var(w)=cov(\mathbf{a}^\prime \mathbf{y})=\mathbf{a}^\prime \Sigma \mathbf{a}$, \pause it is natural to require that $ \Sigma$ be positive definite. \pause \item All it means is that every non-zero linear combination of $\mathbf{y}$ values has a positive variance. Often, this is what you want. % \item But if there is $\mathbf{a}\neq\mathbf{0}$ with $\mathbf{a}^\prime \Sigma \mathbf{a} = 0$, then $Var(W) = Var(\mathbf{a}^\prime\mathbf{y}) = 0$. That is $W$ has a degenerate distribution (but it's still still normal). \end{itemize} \end{frame} \begin{frame} \frametitle{Singular normal: $ \Sigma$ is positive \emph{semi}-definite.} \pause %\framesubtitle{} Suppose there is $\mathbf{a}\neq\mathbf{0}$ with $\mathbf{a}^\prime \Sigma \mathbf{a} = 0$. Let $w = \mathbf{a}^\prime\mathbf{y}$. \pause \begin{itemize} \item Then $Var(w) = cov(\mathbf{a}^\prime\mathbf{y}) = \mathbf{a}^\prime \Sigma \mathbf{a} = 0$. \pause That is, $w$ has a degenerate distribution (but it's still still normal). \pause \item In this case we describe the distribution of $\mathbf{y}$ as a \emph{singular} multivariate normal. \pause \item Including the singular case saves a lot of extra work in later proofs. \pause \item We will insist that a singular multivariate normal is still multivariate normal, even though it has no density. \end{itemize} \end{frame} \begin{frame} \frametitle{Distribution of $A\mathbf{y}$ } \framesubtitle{Recall $\mathbf{y} \sim N_p(\boldsymbol{\mu}, \Sigma)$ means $ M_{_\mathbf{y}}(\mathbf{t}) = e^{\mathbf{t}^\prime\boldsymbol{\mu}+\frac{1}{2} \mathbf{t}^\prime \Sigma\mathbf{t}}$} \pause Let $\mathbf{y} \sim N_p(\boldsymbol{\mu}, \Sigma)$, and $\mathbf{w}=A\mathbf{y}$, where $A$ is an $r \times p$ matrix. \pause {\LARGE \begin{eqnarray*} M_{_\mathbf{w}}(\mathbf{t}) & = & M_{_{A\mathbf{y}}}(\mathbf{t}) \\ \pause & = & M_{_\mathbf{y}}(A^\prime\mathbf{t}) \\ \pause & = & e^{(A^\prime\mathbf{t})^\prime\boldsymbol{\mu}} \, e^{\frac{1}{2} (A^\prime\mathbf{t})^\prime \Sigma(A^\prime\mathbf{t})} \\ \pause & = & e^{\mathbf{t}^\prime (A\boldsymbol{\mu})} \, e^{\frac{1}{2} \mathbf{t}^\prime (A \Sigma A^\prime) \mathbf{t}} \\ \pause & = & e^{\mathbf{t}^\prime (A\boldsymbol{\mu})+\frac{1}{2} \mathbf{t}^\prime (A \Sigma A^\prime) \mathbf{t}} \pause \end{eqnarray*} } % End size Recognize moment-generating function and conclude {\LARGE \begin{displaymath} \mathbf{w} \sim N_r(A\boldsymbol{\mu}, A \Sigma A^\prime) \end{displaymath} } % End size \end{frame} \begin{frame} \frametitle{Exercise} \framesubtitle{Use moment-generating functions, of course.} {\LARGE Let $\mathbf{y} \sim N_p(\boldsymbol{\mu}, \Sigma)$. \vspace{10mm} Show $\mathbf{y} + \mathbf{c} \sim N_p(\boldsymbol{\mu}+\mathbf{c}, \Sigma)$. } % End size \end{frame} \begin{frame} \frametitle{Zero covariance implies independence for the multivariate normal.} \pause %\framesubtitle{} \begin{itemize} \item Independence always implies zero covariance. \pause \item For the multivariate normal, zero covariance also implies independence. \pause \item The multivariate normal is the only continuous distribution with this property. \end{itemize} \end{frame} \begin{frame} \frametitle{Show zero covariance implies independence} \framesubtitle{By showing $ M_{\mathbf{y}}(\mathbf{t}) = M_{\mathbf{y}_1}(\mathbf{t}_1) M_{\mathbf{y}_2}(\mathbf{t}_2) $} \vspace{5mm} \pause Let $\mathbf{y} \sim N(\boldsymbol{\mu}, \Sigma)$, with \begin{displaymath} \mathbf{y} = \left(\begin{array}{c} \mathbf{y}_1 \\ \hline \mathbf{y}_2 \end{array}\right) ~~~~~ \boldsymbol{\mu} = \left(\begin{array}{c} \boldsymbol{\mu}_1 \\ \hline \boldsymbol{\mu}_2 \end{array}\right) ~~~~~ \Sigma = \left(\begin{array}{c|c} \Sigma_1 & \mathbf{0} \\ \hline \mathbf{0} & \Sigma_2 \end{array}\right) ~~~~~ \mathbf{t} = \left(\begin{array}{c} \mathbf{t}_1 \\ \hline \mathbf{t}_2 \end{array}\right) \end{displaymath} \pause \begin{eqnarray*} M_{\mathbf{y}}(\mathbf{t}) & = & E\left(e^{\mathbf{t}^\prime \mathbf{y}} \right) \\ \pause & = & E\left(e^{\left(\begin{array}{c} \mathbf{t}_1 \\ \hline \mathbf{t}_2 \end{array}\right)^\prime \mathbf{y}} \right) \\ \pause & = & \ldots \end{eqnarray*} \end{frame} \begin{frame} \frametitle{Continuing the calculation: $ M_{_\mathbf{y}}(\mathbf{t}) = e^{\mathbf{t}^\prime\boldsymbol{\mu} +\frac{1}{2} \mathbf{t}^\prime \Sigma\mathbf{t}}$} \framesubtitle{$\mathbf{y} = \left(\begin{array}{c} \mathbf{y}_1 \\ \hline \mathbf{y}_2 \end{array}\right) ~~~~~ \boldsymbol{\mu} = \left(\begin{array}{c} \boldsymbol{\mu}_1 \\ \hline \boldsymbol{\mu}_2 \end{array}\right) ~~~~~ \Sigma = \left(\begin{array}{c|c} \Sigma_1 & \mathbf{0} \\ \hline \mathbf{0} & \Sigma_2 \end{array}\right) ~~~~~ \mathbf{t} = \left(\begin{array}{c} \mathbf{t}_1 \\ \hline \mathbf{t}_2 \end{array}\right)$} {\footnotesize \begin{columns} % Use Beamer's columns to use more of the margins! \column{1.2\textwidth} \begin{eqnarray*} M_{\mathbf{y}}(\mathbf{t}) & = &E\left(e^{\left(\begin{array}{c} \mathbf{t}_1 \\ \hline \mathbf{t}_2 \end{array}\right)^\prime \mathbf{y}} \right) \\ \pause & = & \exp\left\{(\mathbf{t}_1^\prime|\mathbf{t}_2^\prime) \left(\begin{array}{c} \boldsymbol{\mu}_1 \\ \hline \boldsymbol{\mu}_2 \end{array}\right) \right\} \exp\left\{\frac{1}{2} (\mathbf{t}_1^\prime|\mathbf{t}_2^\prime) \left(\begin{array}{c|c} \Sigma_1 & \mathbf{0} \\ \hline \mathbf{0} & \Sigma_2 \end{array}\right) \left(\begin{array}{c} \mathbf{t}_1 \\ \hline \mathbf{t}_2 \end{array}\right) \right\} \\ \pause & = & e^{\mathbf{t}_1^\prime \boldsymbol{\mu}_1 + \mathbf{t}_2^\prime \boldsymbol{\mu}_2} \exp\left\{\frac{1}{2} \left(\mathbf{t}_1^\prime \Sigma_1 | \mathbf{t}_2^\prime \Sigma_2 \right) \left(\begin{array}{c} \mathbf{t}_1 \\ \hline \mathbf{t}_2 \end{array}\right)\right\} \\ \pause & = & e^{\mathbf{t}_1^\prime \boldsymbol{\mu}_1 + \mathbf{t}_2^\prime \boldsymbol{\mu}_2} \exp\left\{\frac{1}{2} \left(\mathbf{t}_1^\prime \Sigma_1 \mathbf{t}_1 + \mathbf{t}_2^\prime \Sigma_2 \mathbf{t}_2\right) \right\} \\ \pause & = & e^{\mathbf{t}_1^\prime \boldsymbol{\mu}_1} ~ e^{\mathbf{t}_2^\prime \boldsymbol{\mu}_2} ~ e^{\frac{1}{2} (\mathbf{t}_1^\prime \Sigma_1 \mathbf{t}_1)} ~ e^{\frac{1}{2} (\mathbf{t}_2^\prime \Sigma_2 \mathbf{t}_2)} \\ \pause & = & e^{\mathbf{t}_1^\prime \boldsymbol{\mu}_1+\frac{1}{2} (\mathbf{t}_1^\prime \Sigma_1 \mathbf{t}_1)} ~ e^{\mathbf{t}_2^\prime \boldsymbol{\mu}_2+\frac{1}{2} (\mathbf{t}_2^\prime \Sigma_2 \mathbf{t}_2)} \\ \pause & = & M_{\mathbf{y}_1}(\mathbf{t}_1) M_{\mathbf{y}_2}(\mathbf{t}_2) \pause \end{eqnarray*} ~~~~~So $\mathbf{y}_1$ and $\mathbf{y}_2$ are independent. ~$\blacksquare$ \end{columns} } % End size \end{frame} \begin{frame} \frametitle{An easy example} \framesubtitle{If you do it the easy way} Let $y_1 \sim N(1,2)$, $y_2 \sim N(2,4)$ and $y_3 \sim N(6,3)$ be independent, with $w_1=y_1+y_2$ and $w_2=y_2+y_3$. Find the joint distribution of $w_1$ and $w_2$. \pause \vspace{10mm} \begin{displaymath} \left( \begin{array}{c} w_1 \\ w_2 \end{array} \right) = \left( \begin{array}{c c c} 1 & 1 & 0 \\ 0 & 1 & 1 \end{array} \right) \left( \begin{array}{c} y_1 \\ y_2 \\ y_3 \end{array} \right) \end{displaymath} \vspace{10mm} \pause $\mathbf{w} = A\mathbf{y} \sim N(A\boldsymbol{\mu},A \Sigma A^\prime)$ \end{frame} \begin{frame} \frametitle{$\mathbf{w} = A\mathbf{y} \sim N(A\boldsymbol{\mu},A \Sigma A^\prime)$} \framesubtitle{$y_1 \sim N(1,2)$, $y_2 \sim N(2,4)$ and $y_3 \sim N(6,3)$ are independent} \pause \begin{eqnarray*} A\boldsymbol{\mu} & = & \left( \begin{array}{c c c} 1 & 1 & 0 \\ 0 & 1 & 1 \end{array} \right) \left( \begin{array}{c} 1 \\ 2 \\ 6 \end{array} \right) = \left( \begin{array}{c} 3 \\ 8 \end{array} \right) \\ \pause A \Sigma A^\prime & = & \left( \begin{array}{c c c} 1 & 1 & 0 \\ 0 & 1 & 1 \end{array} \right) \left( \begin{array}{c c c} 2 & 0 & 0 \\ 0 & 4 & 0 \\ 0 & 0 & 3 \end{array} \right) \left( \begin{array}{c c} 1 & 0 \\ 1 & 1 \\ 0 & 1 \end{array} \right) \\ \pause & = & \left( \begin{array}{c c} 6 & 4 \\ 4 & 7 \end{array} \right) \end{eqnarray*} \end{frame} \begin{frame} \frametitle{Marginal distributions are multivariate normal} \pause \framesubtitle{$\mathbf{y} \sim N_p(\boldsymbol{\mu}, \Sigma)$, so $\mathbf{w} = A\mathbf{y} \sim N(A\boldsymbol{\mu},A \Sigma A^\prime)$} Find the distribution of {\LARGE \begin{displaymath} \left( \begin{array}{c c c c} 0 & 1 & 0 & 0\\ 0 & 0 & 0 & 1 \end{array} \right) \left( \begin{array}{c} y_1 \\ y_2 \\ y_3 \\ y_4 \end{array} \right) = \left( \begin{array}{c} y_2 \\ y_4 \end{array} \right) \end{displaymath} } % End size \pause Bivariate normal. The expected value is easy. \end{frame} \begin{frame} \frametitle{Covariance matrix} \framesubtitle{Of $A\mathbf{y}$} \pause %{\LARGE \begin{eqnarray*} cov( A\mathbf{y}) & = & A \Sigma A^\prime \\ & = & \left( \begin{array}{c c c c} 0 & 1 & 0 & 0\\ 0 & 0 & 0 & 1 \end{array} \right) \left( \begin{array}{c c c c} \sigma^2_1 & \sigma_{1,2} & \sigma_{1,3} & \sigma_{1,4} \\ \sigma_{1,2} & \sigma^2_2 & \sigma_{2,3} & \sigma_{2,4} \\ \sigma_{1,3} & \sigma_{2,3} & \sigma^2_3 & \sigma_{3,4} \\ \sigma_{1,4} & \sigma_{2,4} & \sigma_{3,4} & \sigma^2_4 \\ \end{array} \right) \left( \begin{array}{c c c} 0 & 0 \\ 1 & 0 \\ 0 & 0 \\ 0 & 1 \end{array} \right) \\ \pause & = & \left( \begin{array}{c c c c} \sigma_{1,2} & \sigma^2_2 & \sigma_{2,3} & \sigma_{2,4} \\ \sigma_{1,4} & \sigma_{2,4} & \sigma_{3,4} & \sigma^2_4 \\ \end{array} \right) \left( \begin{array}{c c c} 0 & 0 \\ 1 & 0 \\ 0 & 0 \\ 0 & 1 \end{array} \right) \\ \pause & = & \left( \begin{array}{c c} \sigma^2_2 & \sigma_{2,4} \\ \sigma_{2,4} & \sigma^2_4 \end{array} \right) \end{eqnarray*} %} % End size \pause Marginal distributions of a multivariate normal are multivariate normal, with the original means, variances and covariances. \end{frame} \begin{frame} \frametitle{Summary} \pause % \begin{itemize} \item If $\mathbf{c}$ is a vector of constants, $\mathbf{x}+\mathbf{c} \sim N(\mathbf{c}+\boldsymbol{\mu}, \Sigma)$. \pause \item If $A$ is a matrix of constants, $A\mathbf{x} \sim N(A\boldsymbol{\mu},A \Sigma A^\prime)$. \pause \item Linear combinations of multivariate normals are multivariate normal. \pause \item All the marginals (dimension less than $p$) of $\mathbf{x}$ are (multivariate) normal, but it is possible in theory to have a collection of univariate normals whose joint distribution is not multivariate normal. \pause \item For the multivariate normal, zero covariance implies independence. \pause The multivariate normal is the only continuous distribution with this property. \end{itemize} \end{frame} \section{$\chi^2$ and $t$ distributions} \begin{frame} \frametitle{Showing $(\mathbf{x}-\boldsymbol{\mu})^\prime \Sigma^{-1}(\mathbf{x}-\boldsymbol{\mu}) \sim \chi^2 (p)$} \framesubtitle{$ \Sigma$ has to be positive definite this time} \begin{eqnarray*} \mathbf{x} & \sim & N\left(\boldsymbol{\mu}, \Sigma\right) \\ \pause \mathbf{y} = \mathbf{x}-\boldsymbol{\mu} & \sim & N\left(\mathbf{0},\ \Sigma\right) \\ \pause \mathbf{z} = \Sigma^{-\frac{1}{2}} \mathbf{y} & \sim & N\left(\mathbf{0}, \Sigma^{-\frac{1}{2}} \Sigma \Sigma^{-\frac{1}{2}} \right) \\ \pause & = & N\left(\mathbf{0}, \Sigma^{-\frac{1}{2}} \Sigma^{\frac{1}{2}} ~ \Sigma^{\frac{1}{2}} \Sigma^{-\frac{1}{2}} \right) \\ \pause & = & N\left(\mathbf{0}, I\right) \end{eqnarray*} \pause So $\mathbf{z}$ is a vector of $p$ independent standard normals, \pause and \begin{displaymath} \mathbf{y}^\prime \Sigma^{-1} \mathbf{y} \pause = ( \Sigma^{-\frac{1}{2}} \mathbf{y})^\prime ( \Sigma^{-\frac{1}{2}} \mathbf{y}) \pause = \mathbf{z}^\prime \mathbf{z} \pause = \sum_{j=1}^p z_i^2 \pause \sim \chi^2(p) ~~~~~~~~~~ \blacksquare \end{displaymath} \end{frame} \begin{frame} \frametitle{$\overline{x}$ and $s^2$ independent } \framesubtitle{$x_1, \ldots, x_n \stackrel{i.i.d}{\sim} N(\mu,\sigma^2)$} \begin{displaymath} \begin{array}{lcl} \mathbf{x} = \left( \begin{array}{c} x_1 \\ \vdots \\ x_n \end{array} \right) \sim N\left(\mu\mathbf{1},\sigma^2I \right) &~~~~& \pause \mathbf{y} = \left( \begin{array}{c} x_1-\overline{x} \\ \vdots \\ x_n-\overline{x} \\\\ \overline{x} \end{array} \right) = A\mathbf{x} \end{array} \end{displaymath} \pause Note $A$ is $(n+1) \times n$, so $cov(A\mathbf{x}) = \sigma^2\mathbf{AA}^\prime$ is $(n+1) \times (n+1)$, singular. \end{frame} \begin{frame} \frametitle{The argument} \begin{displaymath} \mathbf{y} = A\mathbf{x} = \left( \begin{array}{c} x_1-\overline{x} \\ \vdots \\ x_{n}-\overline{x} \\\\ \overline{x} \end{array} \right) = \left( \begin{array}{c} \\\\ \mathbf{y}_2 \\\\ \hline \\ \overline{x} \end{array} \right) \end{displaymath} \pause \begin{itemize} \item $\mathbf{y}$ is multivariate normal because $\mathbf{x}$ is multivariate normal. \pause \item $Cov\left(\overline{x},(x_j-\overline{x})\right)=0$ (Exercise) \pause \item So $\overline{x}$ and $\mathbf{y}_2$ are independent. \pause \item So $\overline{x}$ and $S^2 = g(\mathbf{y}_2)$ are independent. ~~$\blacksquare$ \end{itemize} \end{frame} \begin{frame} \frametitle{Leads to the $t$ distribution} %\framesubtitle{} If \begin{itemize} \item $z \sim N(0,1)$ and \item $y \sim \chi^2(\nu)$ and \pause \item $z$ and $y$ are independent, then we say\pause \end{itemize} \begin{displaymath} T = \frac{z}{\sqrt{y/\nu}} \sim t(\nu) \end{displaymath} \end{frame} \begin{frame} \frametitle{Random sample from a normal distribution} Let $x_1, \ldots, x_n \stackrel{i.i.d.}{\sim} N(\mu,\sigma^2)$. Then \begin{itemize} \item $\frac{\sqrt{n}(\overline{x}-\mu)}{\sigma} \sim N(0,1)$ \pause and \item $\frac{(n-1)S^2}{\sigma^2} \sim \chi^2(n-1)$ \pause and \item These quantities are independent, \pause so \begin{eqnarray*} T & = & \frac{\sqrt{n}(\overline{x}-\mu)/\sigma} {\sqrt{\frac{(n-1)S^2}{\sigma^2}/(n-1)}} \\ \pause &&\\ & = & \frac{\sqrt{n}(\overline{x}-\mu)}{S} \sim t(n-1) \end{eqnarray*} \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Copyright Information} This slide show was prepared by \href{http://www.utstat.toronto.edu/~brunner}{Jerry Brunner}, Department of Statistical Sciences, University of Toronto. It is licensed under a \href{http://creativecommons.org/licenses/by-sa/3.0/deed.en_US} {Creative Commons Attribution - ShareAlike 3.0 Unported License}. Use any part of it as you like and share the result freely. The \LaTeX~source code is available from the course website: \href{http://www.utstat.toronto.edu/~brunner/oldclass/302f17} {\small\texttt{http://www.utstat.toronto.edu/$^\sim$brunner/oldclass/302f17}} \end{frame} \end{document} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{} %\framesubtitle{} \begin{itemize} \item \item \item \end{itemize} \end{frame} {\LARGE \begin{displaymath} \end{displaymath} } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Omitted slides %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Independence} {\Large Two random vectors $\mathbf{X}_1$ and $\mathbf{X}_2$ are independent if and only if the moment-generating function of their joint distribution is the product of their moment-generating functions. } % End size \end{frame} \begin{frame} \frametitle{Proof: Suppose $\mathbf{X}_1$ and $\mathbf{X}_2$ are independent, with} %\framesubtitle{} $\mathbf{X} = \left(\begin{array}{c} \mathbf{X}_1 \\ \hline \mathbf{X}_2 \end{array}\right)$ and $\mathbf{t} = \left(\begin{array}{c} \mathbf{t}_1 \\ \hline \mathbf{t}_2 \end{array}\right)$. Then \begin{eqnarray*} M_{\mathbf{X}}(\mathbf{t}) & = & E\left(e^{\mathbf{t}^\prime \mathbf{X}} \right) \\ & = & E\left(e^{\mathbf{t}_1^\prime \mathbf{X}_1 + \mathbf{t}_2^\prime \mathbf{X}_2} \right) = E\left(e^{\mathbf{t}_1^\prime \mathbf{X}_1}e^{\mathbf{t}_2^\prime \mathbf{X}_2} \right)\\ & = & \int \int e^{\mathbf{t}_1^\prime \mathbf{x}_1} e^{\mathbf{t}_2^\prime \mathbf{x}_2} f_{\mathbf{X}_1}(\mathbf{x}_1)f_{\mathbf{X}_2}(\mathbf{x}_2) \, d(\mathbf{x}_1) d(\mathbf{x}_2) \\ & = & \int e^{\mathbf{t}_2^\prime \mathbf{x}_2} \left(\int e^{\mathbf{t}_1^\prime \mathbf{x}_1} f_{\mathbf{X}_1}(\mathbf{x}_1)\, d(\mathbf{x}_1) \right) f_{\mathbf{X}_2}(\mathbf{x}_2) \, d(\mathbf{x}_2) \\ & = & \int e^{\mathbf{t}_2^\prime \mathbf{x}_2} M_{\mathbf{X}_1}(\mathbf{t}_1) f_{\mathbf{X}_2}(\mathbf{x}_2) \, d(\mathbf{x}_2) \\ & = & M_{\mathbf{X}_1}(\mathbf{t}_1) M_{\mathbf{X}_2}(\mathbf{t}_2) \end{eqnarray*} By uniqueness, it's an if and only if. \end{frame} \begin{frame} \frametitle{Multivariate normal likelihood} \framesubtitle{For reference} {\footnotesize \begin{eqnarray*} L(\boldsymbol{\mu,\Sigma}) &=& \prod_{i=1}^n \frac{1}{| \Sigma|^{\frac{1}{2}} (2 \pi)^{\frac{p}{2}}} \exp\left\{ -\frac{1}{2} (\mathbf{x}_i-\boldsymbol{\mu})^\prime \Sigma^{-1}(\mathbf{x}_i-\boldsymbol{\mu})\right\} \\ \pause &&\\ &=& | \Sigma|^{-n/2} (2\pi)^{-np/2} \exp\left\{ -\frac{1}{2} \sum_{i=1}^n (\mathbf{x}_i-\boldsymbol{\mu})^\prime \Sigma^{-1}(\mathbf{x}_i-\boldsymbol{\mu})\right\} \\ \pause &&\\ &=& | \Sigma|^{-n/2} (2\pi)^{-np/2} \exp -\frac{n}{2}\left\{ tr(\boldsymbol{\widehat{\Sigma}\Sigma}^{-1}) + (\overline{\mathbf{x}}-\boldsymbol{\mu})^\prime \Sigma^{-1} (\overline{\mathbf{x}}-\boldsymbol{\mu}) \right\}, \end{eqnarray*} } \pause where $\boldsymbol{\widehat{\Sigma}} = \frac{1}{n}\sum_{i=1}^n (\mathbf{x}_i-\overline{\mathbf{x}}) (\mathbf{x}_i-\overline{\mathbf{x}})^\prime $ is the sample variance-covariance matrix. \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%