\documentclass[serif]{beamer} % Get Computer Modern math font. \hypersetup{colorlinks,linkcolor=,urlcolor=red} \usefonttheme{serif} % Looks like Computer Modern for non-math text -- nice! \setbeamertemplate{navigation symbols}{} % Suppress navigation symbols % \usetheme{Berlin} % Displays sections on top \usetheme{Frankfurt} % Displays section titles on top: Fairly thin but still swallows some material at bottom of crowded slides %\usetheme{Berkeley} \usepackage[english]{babel} \usepackage{amsmath} % for binom % \usepackage{graphicx} % To include pdf files! % \definecolor{links}{HTML}{2A1B81} % \definecolor{links}{red} \setbeamertemplate{footline}[frame number] \mode \title{Random Vectors\footnote{See last slide for copyright information.}} \subtitle{STA 302 Fall 2013} \date{} % To suppress date \begin{document} \begin{frame} \titlepage \end{frame} \begin{frame} \frametitle{Overview} \tableofcontents \end{frame} \section{Definitions and Basic Results} \begin{frame} \frametitle{Random Vectors and Matrices} \framesubtitle{See Chapter 3 of \emph{Linear models in statistics} for more detail.} A \emph{random matrix} is just a matrix of random variables. Their joint probability distribution is the distribution of the random matrix. Random matrices with just one column (say, $p \times 1$) may be called \emph{random vectors}. \end{frame} \begin{frame} \frametitle{Expected Value} %\framesubtitle{} The expected value of a matrix is defined as the matrix of expected values. Denoting the $p \times c$ random matrix $\mathbf{X}$ by $[X_{i,j}]$, \begin{displaymath} E(\mathbf{X}) = [E(X_{i,j})]. \end{displaymath} \end{frame} \begin{frame} \frametitle{Immediately we have natural properties like} %\framesubtitle{} \begin{eqnarray} E(\mathbf{X}+\mathbf{Y}) &=& E([X_{i,j}]+[Y_{i,j}]) \nonumber \\ &=& [E(X_{i,j}+Y_{i,j})] \nonumber \\ &=& [E(X_{i,j})+E(Y_{i,j})] \nonumber \\ &=& [E(X_{i,j})]+[E(Y_{i,j})] \nonumber \\ &=& E(\mathbf{X})+E(\mathbf{Y}). \nonumber \end{eqnarray} \end{frame} \begin{frame} \frametitle{Moving a constant through the expected value sign} Let $\mathbf{A} = [a_{i,j}]$ be an $r \times p$ matrix of constants, while $\mathbf{X}$ is still a $p \times c$ random matrix. Then \begin{eqnarray} E(\mathbf{AX}) &=& E\left(\left[\sum_{k=1}^p a_{i,k}X_{k,j}\right]\right) \nonumber \\ &=& \left[E\left(\sum_{k=1}^p a_{i,k}X_{k,j}\right)\right] \nonumber \\ &=& \left[\sum_{k=1}^p a_{i,k}E(X_{k,j})\right] \nonumber \\ &=& \mathbf{A}E(\mathbf{X}). \nonumber \end{eqnarray} Similar calculations yield $E(\mathbf{AXB}) = \mathbf{A}E(\mathbf{X})\mathbf{B}$. \end{frame} \begin{frame} \frametitle{Variance-Covariance Matrices} Let $\mathbf{X}$ be a $p \times 1$ random vector with $E(\mathbf{X}) = \boldsymbol{\mu}$. The \emph{variance-covariance matrix} of $\mathbf{X}$ (sometimes just called the \emph{covariance matrix}), denoted by $cov(\mathbf{X})$, is defined as \begin{displaymath} cov(\mathbf{X}) = E\left\{ (\mathbf{X}-\boldsymbol{\mu}) (\mathbf{X}-\boldsymbol{\mu})^\prime\right\}. \end{displaymath} \end{frame} \begin{frame} \frametitle{$cov(\mathbf{X}) = E\left\{ (\mathbf{X}-\boldsymbol{\mu}) (\mathbf{X}-\boldsymbol{\mu})^\prime\right\}$} {\scriptsize \begin{eqnarray} cov(\mathbf{X}) &=& E\left\{ \left( \begin{array}{c} X_1-\mu_1 \\ X_2-\mu_2 \\ X_3-\mu_3 \end{array} \right) \left( \begin{array}{c c c} X_1-\mu_1 & X_2-\mu_2 & X_3-\mu_3 \end{array} \right) \right\} \nonumber \\ &=& E\left\{ \left( \begin{array}{l l l} (X_1-\mu_1)^2 & (X_1-\mu_1)(X_2-\mu_2) & (X_1-\mu_1)(X_3-\mu_3) \\ (X_2-\mu_2)(X_1-\mu_1) & (X_2-\mu_2)^2 & (X_2-\mu_2)(X_3-\mu_3) \\ (X_3-\mu_3)(X_1-\mu_1) & (X_3-\mu_3)(X_2-\mu_2) & (X_3-\mu_3)^2 \\ \end{array} \right) \right\} \nonumber \\ \nonumber \\ &=& \left( \begin{array}{l l l} E\{(X_1-\mu_1)^2\} & E\{(X_1-\mu_1)(X_2-\mu_2)\} & E\{(X_1-\mu_1)(X_3-\mu_3)\} \\ E\{(X_2-\mu_2)(X_1-\mu_1)\} & E\{(X_2-\mu_2)^2\} & E\{(X_2-\mu_2)(X_3-\mu_3)\} \\ E\{(X_3-\mu_3)(X_1-\mu_1)\} & E\{(X_3-\mu_3)(X_2-\mu_2)\} & E\{(X_3-\mu_3)^2\} \\ \end{array} \right) \nonumber \\ \nonumber \\ &=& \left( \begin{array}{l l l} Var(X_1) & Cov(X_1,X_2) & Cov(X_1,X_3) \\ Cov(X_1,X_2) & Var(X_2) & Cov(X_2,X_3) \\ Cov(X_1,X_3) & Cov(X_2,X_3) & Var(X_3) \\ \end{array} \right) . \nonumber \\ \nonumber \end{eqnarray} So, the covariance matrix $cov(\mathbf{X})$ is a $p \times p$ symmetric matrix with variances on the main diagonal and covariances on the off-diagonals. } \end{frame} \begin{frame} \frametitle{Analogous to $Var(a\,X) = a^2\,Var(X)$} Let $\mathbf{X}$ be a $p \times 1$ random vector with $E(\mathbf{X}) = \boldsymbol{\mu}$ and $cov(\mathbf{X}) = \boldsymbol{\Sigma}$, while $\mathbf{A} = [a_{i,j}]$ is an $r \times p$ matrix of constants. Then \begin{eqnarray*} \label{vax} cov(\mathbf{AX}) &=& E\left\{ (\mathbf{AX}-\mathbf{A}\boldsymbol{\mu}) (\mathbf{AX}-\mathbf{A}\boldsymbol{\mu})^\prime \right\} \\ &=& E\left\{ \mathbf{A}(\mathbf{X}-\boldsymbol{\mu}) \left(\mathbf{A}(\mathbf{X}-\boldsymbol{\mu})\right)^\prime \right\} \\ &=& E\left\{ \mathbf{A}(\mathbf{X}-\boldsymbol{\mu}) (\mathbf{X}-\boldsymbol{\mu})^\prime \mathbf{A}^\prime \right\} \nonumber \\ &=& \mathbf{A}E\{(\mathbf{X}-\boldsymbol{\mu}) (\mathbf{X}-\boldsymbol{\mu})^\prime\} \mathbf{A}^\prime \\ &=& \mathbf{A}cov(\mathbf{X}) \mathbf{A}^\prime \nonumber \\ &=& \mathbf{A}\boldsymbol{\Sigma}\mathbf{A}^\prime \end{eqnarray*} \end{frame} \begin{frame} \frametitle{Positive definite is a natural assumption} \framesubtitle{For covariance matrices} \begin{itemize} \item $cov(\mathbf{X}) = \boldsymbol{\Sigma}$ \item $\boldsymbol{\Sigma}$ positive definite means $\mathbf{a}^\prime \boldsymbol{\Sigma} \mathbf{a} > 0$. for all $\mathbf{a} \neq \mathbf{0}$. \item $Y = \mathbf{a}^\prime \mathbf{X} = a_1X_1 + \cdots + a_p X_p$ is a scalar random variable. \item $Var(Y) = \mathbf{a}^\prime \boldsymbol{\Sigma} \mathbf{a}$ \item $\boldsymbol{\Sigma}$ positive definite just says that the variance of any (non-trivial) linear combination is positive. \item This is often what you want (but not always). \end{itemize} \end{frame} \begin{frame} \frametitle{Matrix of covariances between two random vectors} Let $\mathbf{X}$ be a $p \times 1$ random vector with $E(\mathbf{X}) = \boldsymbol{\mu}_x$ and let $\mathbf{Y}$ be a $q \times 1$ random vector with $E(\mathbf{Y}) = \boldsymbol{\mu}_y$. The $p \times q$ matrix of covariances between the elements of $\mathbf{X}$ and the elements of $\mathbf{Y}$ is \begin{displaymath} C(\mathbf{X,Y}) = E\left\{ (\mathbf{X}-\boldsymbol{\mu}_x) (\mathbf{Y}-\boldsymbol{\mu}_y)^\prime\right\}. \end{displaymath} \end{frame} \begin{frame} \frametitle{Adding a constant has no effect} \framesubtitle{On variances and covariances} It's clear from the definitions: \begin{itemize} \item $cov(\mathbf{X}) = E\left\{ (\mathbf{X}-\boldsymbol{\mu}) (\mathbf{X}-\boldsymbol{\mu})^\prime\right\}$ \item $C(\mathbf{X,Y}) = E\left\{ (\mathbf{X}-\boldsymbol{\mu}_x) (\mathbf{Y}-\boldsymbol{\mu}_y)^\prime\right\}$ \end{itemize} That \begin{itemize} \item $ cov(\mathbf{X} + \mathbf{a}) = cov(\mathbf{X})$ \item $C(\mathbf{X} + \mathbf{a},\mathbf{Y} + \mathbf{b}) = C(\mathbf{X},\mathbf{Y})$ \end{itemize} \vspace{5mm} For example, $E(\mathbf{X} + \mathbf{a}) = \boldsymbol{\mu} + \mathbf{a}$, so \begin{eqnarray*} cov(\mathbf{X} + \mathbf{a}) & = & E\left\{ (\mathbf{X}+\mathbf{a}-(\boldsymbol{\mu}+\mathbf{a})) (\mathbf{X}+\mathbf{a}-(\boldsymbol{\mu}+\mathbf{a}))^\prime\right\} \\ & = & E\left\{ (\mathbf{X}-\boldsymbol{\mu}) (\mathbf{X}-\boldsymbol{\mu})^\prime\right\} \\ & = & cov(\mathbf{X}) \end{eqnarray*} \end{frame} \section{Moment-generating Functions} \begin{frame} \frametitle{Moment-generating function} \framesubtitle{Of a $p$-dimensional random vector $\mathbf{X}$} {\LARGE \begin{itemize} \item $M_{\mathbf{X}}(\mathbf{t}) = E\left(e^{\mathbf{t}^\prime \mathbf{X}} \right)$ \item Corresponds uniquely to the probability distribution. \end{itemize} } % End size \vspace{10mm} Section 4.3 of \emph{Linear models in statistics} has some material on moment-generating functions. \end{frame} \begin{frame} \frametitle{$M_{\mathbf{AX}}(\mathbf{t}) = M_{\mathbf{X}}(\mathbf{A}^\prime\mathbf{t}) $} \framesubtitle{Analogue of $M_{aX}(t) = M_{X}(at)$} {\LARGE \begin{eqnarray*} M_{\mathbf{AX}}(\mathbf{t}) & = & E\left(e^{\mathbf{t}^\prime \mathbf{AX}} \right) \\ & = & E\left(e^{\left(\mathbf{A}^\prime\mathbf{t}\right)^\prime \mathbf{X}} \right) \\ & = & M_{\mathbf{X}}(\mathbf{A}^\prime\mathbf{t}) \end{eqnarray*} } % End size \vspace{3mm} Note that $\mathbf{t}$ is the same length as $\mathbf{Y} = \mathbf{AX}$: The number of rows in $\mathbf{A}$. \end{frame} \begin{frame} \frametitle{$M_{\mathbf{X}+\mathbf{c}}(\mathbf{t}) = e^{\mathbf{t}^\prime \mathbf{c} } M_{\mathbf{X}}(\mathbf{t})$} \framesubtitle{Analogue of $M_{X+c}(t) = e^{ct}M_{X}(t)$} {\LARGE \begin{eqnarray*} M_{\mathbf{X}+\mathbf{c}}(\mathbf{t}) & = & E\left(e^{\mathbf{t}^\prime (\mathbf{X}+\mathbf{c})} \right) \\ & = & E\left(e^{\mathbf{t}^\prime\mathbf{X}+\mathbf{t}^\prime\mathbf{c}} \right) \\ & = & e^{\mathbf{t}^\prime\mathbf{c}} \, E\left(e^{\mathbf{t}^\prime \mathbf{X}} \right) \\ & = & e^{\mathbf{t}^\prime \mathbf{c} } M_{\mathbf{X}}(\mathbf{t}) \end{eqnarray*} } % End size \end{frame} \begin{frame} \frametitle{Independence} {\Large Two random vectors $\mathbf{X}_1$ and $\mathbf{X}_2$ are independent if and only if the moment-generating function of their joint distribution is the product of their moment-generating functions. } % End size \end{frame} \begin{frame} \frametitle{Proof: Suppose $\mathbf{X}_1$ and $\mathbf{X}_2$ are independent, with} %\framesubtitle{} $\mathbf{X} = \left(\begin{array}{c} \mathbf{X}_1 \\ \hline \mathbf{X}_2 \end{array}\right)$ and $\mathbf{t} = \left(\begin{array}{c} \mathbf{t}_1 \\ \hline \mathbf{t}_2 \end{array}\right)$. Then \begin{eqnarray*} M_{\mathbf{X}}(\mathbf{t}) & = & E\left(e^{\mathbf{t}^\prime \mathbf{X}} \right) \\ & = & E\left(e^{\mathbf{t}_1^\prime \mathbf{X}_1 + \mathbf{t}_2^\prime \mathbf{X}_2} \right) = E\left(e^{\mathbf{t}_1^\prime \mathbf{X}_1}e^{\mathbf{t}_2^\prime \mathbf{X}_2} \right)\\ & = & \int \int e^{\mathbf{t}_1^\prime \mathbf{x}_1} e^{\mathbf{t}_2^\prime \mathbf{x}_2} f_{\mathbf{X}_1}(\mathbf{x}_1)f_{\mathbf{X}_2}(\mathbf{x}_2) \, d(\mathbf{x}_1) d(\mathbf{x}_2) \\ & = & \int e^{\mathbf{t}_2^\prime \mathbf{x}_2} \left(\int e^{\mathbf{t}_1^\prime \mathbf{x}_1} f_{\mathbf{X}_1}(\mathbf{x}_1)\, d(\mathbf{x}_1) \right) f_{\mathbf{X}_2}(\mathbf{x}_2) \, d(\mathbf{x}_2) \\ & = & \int e^{\mathbf{t}_2^\prime \mathbf{x}_2} M_{\mathbf{X}_1}(\mathbf{t}_1) f_{\mathbf{X}_2}(\mathbf{x}_2) \, d(\mathbf{x}_2) \\ & = & M_{\mathbf{X}_1}(\mathbf{t}_1) M_{\mathbf{X}_2}(\mathbf{t}_2) \end{eqnarray*} By uniqueness, it's an if and only if. \end{frame} \begin{frame} \frametitle{$\mathbf{X}_1$ and $\mathbf{X}_2$ independent implies that $\mathbf{Y}_1=g_1(\mathbf{X}_1)$ and $\mathbf{Y}_2=g_2(\mathbf{X}_2)$ are independent.} Let \begin{displaymath} \mathbf{Y} = \left(\begin{array}{c} \mathbf{Y}_1 \\ \hline \mathbf{Y}_2 \end{array}\right) = \left(\begin{array}{c} g_1(\mathbf{X}_1) \\ \hline g_2(\mathbf{X}_2) \end{array}\right) \mbox{ and } \mathbf{t} = \left(\begin{array}{c} \mathbf{t}_1 \\ \hline \mathbf{t}_2 \end{array}\right). \mbox{ Then} \end{displaymath} \begin{eqnarray*} M_{\mathbf{Y}}(\mathbf{t}) & = & E\left(e^{\mathbf{t}^\prime \mathbf{Y}} \right) \\ & = & E\left(e^{\mathbf{t}_1^\prime \mathbf{Y}_1 + \mathbf{t}_2^\prime \mathbf{Y}_2} \right) = E\left(e^{\mathbf{t}_1^\prime \mathbf{Y}_1}e^{\mathbf{t}_2^\prime \mathbf{Y}_2} \right)\\ & = & E\left(e^{\mathbf{t}_1^\prime g_1(\mathbf{X}_1)} e^{\mathbf{t}_2^\prime g_2(\mathbf{X}_2)} \right)\\ & = & \int \int e^{\mathbf{t}_1^\prime g_1(\mathbf{x}_1)} e^{\mathbf{t}_2^\prime g_2(\mathbf{x}_2)} f_{\mathbf{X}_1}(\mathbf{x}_1)f_{\mathbf{X}_2}(\mathbf{x}_2) \, d(\mathbf{x}_1) d(\mathbf{x}_2) \\ & = & M_{g_1(\mathbf{X}_1)}(\mathbf{t}_1) M_{g_2(\mathbf{X}_2)}(\mathbf{t}_2) \\ & = & M_{\mathbf{Y}_1}(\mathbf{t}_1) M_{\mathbf{Y}_2}(\mathbf{t}_2) \end{eqnarray*} So $\mathbf{Y}_1$ and $\mathbf{Y}_2$ are independent. \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Copyright Information} This slide show was prepared by \href{http://www.utstat.toronto.edu/~brunner}{Jerry Brunner}, Department of Statistical Sciences, University of Toronto. It is licensed under a \href{http://creativecommons.org/licenses/by-sa/3.0/deed.en_US} {Creative Commons Attribution - ShareAlike 3.0 Unported License}. Use any part of it as you like and share the result freely. The \LaTeX~source code is available from the course website: \href{http://www.utstat.toronto.edu/~brunner/oldclass/302f13} {\small\texttt{http://www.utstat.toronto.edu/$^\sim$brunner/oldclass/302f13}} \end{frame} \end{document} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{} %\framesubtitle{} \begin{itemize} \item \item \item \end{itemize} \end{frame} {\LARGE \begin{displaymath} \end{displaymath} } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%