% \documentclass[serif]{beamer} % Serif for Computer Modern math font. \documentclass[serif, handout]{beamer} % Handout mode to ignore pause statements \hypersetup{colorlinks,linkcolor=,urlcolor=red} \usefonttheme{serif} % Looks like Computer Modern for non-math text -- nice! \setbeamertemplate{navigation symbols}{} % Suppress navigation symbols % \usetheme{Berlin} % Displays sections on top \usetheme{Frankfurt} % Displays section titles on top: Fairly thin but still swallows some material at bottom of crowded slides %\usetheme{Berkeley} \usepackage[english]{babel} \usepackage{amsmath} % for binom % \usepackage{graphicx} % To include pdf files! % \definecolor{links}{HTML}{2A1B81} % \definecolor{links}{red} \setbeamertemplate{footline}[frame number] \mode \title{Background: Matrices and Random Vectors\footnote{See last slide for copyright information.}} \subtitle{STA431 Spring 2017} \date{} % To suppress date \begin{document} \begin{frame} \titlepage \end{frame} \begin{frame} \frametitle{Overview} \tableofcontents \end{frame} \section{Matrices} \begin{frame} \frametitle{Matrices} \begin{itemize} \item $\mathbf{A} = [a_{ij}]$ % \pause \item Transpose: $\mathbf{A}^\top = [a_{ji}]$ % \pause \item Multiplication: $\mathbf{AB} \neq \mathbf{BA}$ % \pause \item $(\mathbf{AB})^\top = \mathbf{B}^\top\mathbf{A}^\top$ \pause \item Inverse of a \emph{square} matrix: $\mathbf{A}^{-1}\mathbf{A} = \mathbf{AA}^{-1} = \mathbf{I}$. \pause (Only need to show it in one direction.) \pause % Suppose $\mathbf{A}$ and $\mathbf{B}$ are square matrices with $\mathbf{AB}=\mathbf{I}$. Seek to show $\mathbf{BA}=\mathbf{I}$. If $\mathbf{B}^{-1}$ does not exist, its columns are linearly dependent, and there is a non-zero vector $\mathbf{v}$ with $\mathbf{Bv} = \mathbf{0}$. But then $\mathbf{ABv} = \mathbf{Iv} = \mathbf{0}$ with $\mathbf{v} \neq \mathbf{0}$. So $\mathbf{B}^{-1}$ exists. Then $\mathbf{AB}=\mathbf{I} \Rightarrow \mathbf{ABB}^{-1}=\mathbf{IB}^{-1} \Rightarrow \mathbf{A}=\mathbf{B}^{-1} \Rightarrow \mathbf{BA} = \mathbf{BB}^{-1} = \mathbf{I}$. \item $(\mathbf{A}^{-1})^\top = (\mathbf{A}^\top)^{-1}$ % \pause % \item Positive definite: $\mathbf{v}^\top \mathbf{A} \mathbf{v} > 0$ for all $p \times 1$ vectors $\mathbf{v} \neq \mathbf{0}$. \end{itemize} \end{frame} \begin{frame} \frametitle{Trace of a square matrix: Sum of the diagonal elements} \begin{displaymath} tr(\mathbf{A}) = \sum_{i=1}^n a_{i,i} \end{displaymath} \pause \vspace{10mm} \begin{itemize} \item Of course $tr(\mathbf{A}+\mathbf{B}) = tr(\mathbf{A}) + tr(\mathbf{B})$, \pause \item $tr(\mathbf{A}) = tr(\mathbf{A}^\top)$, etc. \pause \item But less obviously, even though $\mathbf{AB} \neq \mathbf{BA}$, \pause \item $tr(\mathbf{AB}) = tr(\mathbf{BA})$ \end{itemize} \end{frame} \begin{frame} \frametitle{Proof of $tr(\mathbf{AB}) = tr(\mathbf{BA})$} \framesubtitle{Using $\mathbf{AB} = \mathbf{C} = [c_{i,j}] = \sum_{k} a_{i,k}b_{k,j}$} \pause Let $\mathbf{A}$ be an $r \times p$ matrix and $\mathbf{B}$ be a $p \times r$ matrix, so that the product matrices $\mathbf{AB}$ and $\mathbf{BA}$ are both defined. \pause \begin{eqnarray*} tr(\mathbf{AB}) &=& \sum_{i=1}^r \left(\sum_{k=1}^p a_{i,k}b_{k,i} \right) \\ \pause &=& \sum_{k=1}^p \left(\sum_{i=1}^r b_{k,i}a_{i,k} \right) \\ \pause &=& tr(\mathbf{BA}) \end{eqnarray*} \end{frame} \section{Random Vectors} \begin{frame} \frametitle{Random vectors} \framesubtitle{Expected values and variance-covariance matrices} \pause \begin{itemize} \item $E(\mathbf{X}) = [E(X_{i,j})]$ % \pause \item $E(\mathbf{X}+\mathbf{Y}) = E(\mathbf{X})+E(\mathbf{Y})$ % \pause \item $E(\mathbf{AXB}) = \mathbf{A}E(\mathbf{X})\mathbf{B}$ % \pause \item $cov(\mathbf{X}) = E\left\{ (\mathbf{X}-\boldsymbol{\mu}) (\mathbf{X}-\boldsymbol{\mu})^\top\right\}$ \pause \item $cov(\mathbf{AX}) = \mathbf{A}cov(\mathbf{X})\mathbf{A}^\top$ \pause \item $cov(\mathbf{X,Y}) = E\left\{ (\mathbf{X}-\boldsymbol{\mu}_x) (\mathbf{Y}-\boldsymbol{\mu}_y)^\top\right\}$ \pause \item $cov(\mathbf{X+a}) = cov(\mathbf{X})$ \pause \item $cov(\mathbf{X+a,Y+b}) =cov(\mathbf{X,Y})$ \end{itemize} \end{frame} \begin{frame} \frametitle{The Centering Rule} \framesubtitle{Based on $cov(\mathbf{X} + \mathbf{a}) = cov(\mathbf{X})$} \pause Often, variance and covariance calculations can be simplified by subtracting off constants first. \pause Denote the \emph{centered} version of $\mathbf{X}$ by $\stackrel{c}{\mathbf{X}} = \mathbf{X} - E(\mathbf{X})$, \pause so that \begin{itemize} \item $E(\stackrel{c}{\mathbf{X}})=\mathbf{0}$ \pause and \item $cov(\stackrel{c}{\mathbf{X}}) = E(\stackrel{c}{\mathbf{X}}\stackrel{c}{\mathbf{X}} \stackrel{\top}{\vphantom{r}}) = cov(\mathbf{X})$ \end{itemize} \end{frame} \begin{frame} \frametitle{Linear combinations} \framesubtitle{These are matrices, but they could be scalars} \begin{eqnarray*} \mathbf{L}_{~} & = & \mathbf{A}_1\mathbf{X}_1 + \cdots + \mathbf{A}_m\mathbf{X}_m + \mathbf{b} \\ \pause \stackrel{c}{\mathbf{L}}_{~} & = & \mathbf{A}_1 \stackrel{c}{\mathbf{X}}_1 + \cdots + \mathbf{A}_m \stackrel{c}{\mathbf{X}}_m,\mbox{ where} \\ \pause \stackrel{c}{\mathbf{X}}_j & = & \mathbf{X}_j - E(\mathbf{X}_j) \mbox{ for } j=1,\ldots,m. \end{eqnarray*} \pause % \vspace{5mm} The centering rule says \pause \begin{eqnarray*} cov(\mathbf{L}) & = & E(\stackrel{c}{\mathbf{L}}\stackrel{c}{\mathbf{L}} \stackrel{\top}{\vphantom{r}}) \\ \pause cov(\mathbf{L}_1,\mathbf{L}_2) & = & E(\stackrel{c}{\mathbf{L}}_1\,\stackrel{c}{\mathbf{L}} \stackrel{\top}{\vphantom{r}_2}) \pause \end{eqnarray*} \vspace{5mm} In words: To calculate variances and covariances of linear combinations, one may simply discard added constants, center all the random vectors, and take expected values of products. \end{frame} \begin{frame} \frametitle{Example: $cov(\mathbf{X}+\mathbf{Y})$} \framesubtitle{Using the centering rule} \pause \begin{eqnarray*} cov(\mathbf{X}+\mathbf{Y}) & = & E(\stackrel{c}{\mathbf{X}}+\stackrel{c}{\mathbf{Y}}) (\stackrel{c}{\mathbf{X}}+\stackrel{c}{\mathbf{Y}})^\top \\ \pause & = & E(\stackrel{c}{\mathbf{X}}+\stackrel{c}{\mathbf{Y}}) (\stackrel{c}{\mathbf{X}}\stackrel{\top}{\vphantom{~}} + \stackrel{c}{\mathbf{Y}}\stackrel{\top}{\vphantom{~}}) \\ \pause & = & E(\stackrel{c}{\mathbf{X}}\stackrel{c}{\mathbf{X}} \stackrel{\top}{\vphantom{~}}) + E(\stackrel{c}{\mathbf{Y}}\stackrel{c}{\mathbf{Y}} \stackrel{\top}{\vphantom{~}}) + E(\stackrel{c}{\mathbf{X}}\stackrel{c}{\mathbf{Y}} \stackrel{\top}{\vphantom{~}}) + E(\stackrel{c}{\mathbf{Y}}\stackrel{c}{\mathbf{X}} \stackrel{\top}{\vphantom{~}}) \\ \pause & = & cov(\mathbf{X}) + cov(\mathbf{Y}) + cov(\mathbf{X},\mathbf{Y}) + cov(\mathbf{Y},\mathbf{X}) \end{eqnarray*} \pause \begin{itemize} \item Does $cov(\mathbf{Y},\mathbf{X}) = cov(\mathbf{X},\mathbf{Y})$? \pause \item Does $cov(\mathbf{Y},\mathbf{X}) = cov(\mathbf{X},\mathbf{Y})^\top$? \pause \end{itemize} Use $cov(\mathbf{X,Y}) = E\left\{ (\mathbf{X}-\boldsymbol{\mu}_x) (\mathbf{Y}-\boldsymbol{\mu}_y)^\top\right\}$ \end{frame} \section{Multivariate Normal} \begin{frame} \frametitle{The Multivariate Normal Distribution} The $p \times 1$ random vector $\mathbf{X}$ is said to have a \emph{multivariate normal distribution}, and we write $\mathbf{X} \sim N(\boldsymbol{\mu},\boldsymbol{\Sigma})$, if $\mathbf{X}$ has (joint) density \begin{displaymath} f(\mathbf{x}) = \frac{1}{|\boldsymbol{\Sigma}|^{\frac{1}{2}} (2 \pi)^{\frac{p}{2}}} \exp\left\{ -\frac{1}{2} (\mathbf{x}-\boldsymbol{\mu})^\top \boldsymbol{\Sigma}^{-1}(\mathbf{x}-\boldsymbol{\mu})\right\}, \end{displaymath} where $\boldsymbol{\mu}$ is $p \times 1$ and $\boldsymbol{\Sigma}$ is $p \times p$ symmetric and positive definite. \end{frame} \begin{frame} \frametitle{The Bivariate Normal Density} \framesubtitle{Multivariate normal with $p=2$ variables} \begin{center} \includegraphics[width=3in]{BivariateNormal} \end{center} \end{frame} \begin{frame} \frametitle{Analogies} % \framesubtitle{()} Multivariate normal reduces to the univariate normal when $p=1$ \vspace{4mm} \begin{itemize} \pause \item Univariate Normal \begin{itemize} \item $f(x) = \frac{1}{\sigma \sqrt{2\pi}} \exp \left\{-\frac{1}{2}\frac{(x-\mu)^2}{\sigma^2}\right\}$ \pause \item $E(X)=\mu, Var(X) = \sigma^2$ \pause \item $\frac{(X-\mu)^2}{\sigma^2} \sim \chi^2 (1)$ \pause \end{itemize} \vspace{3mm} \item Multivariate Normal \begin{itemize} \item $f(\mathbf{x}) = \frac{1}{|\boldsymbol{\Sigma}|^{\frac{1}{2}} (2 \pi)^{\frac{p}{2}}} \exp\left\{ -\frac{1}{2} (\mathbf{x}-\boldsymbol{\mu})^\top \boldsymbol{\Sigma}^{-1}(\mathbf{x}-\boldsymbol{\mu})\right\}$ \pause \item $E(\mathbf{X})= \boldsymbol{\mu}$, $cov(\mathbf{X}) = \boldsymbol{\Sigma}$ \pause \item $(\mathbf{X}-\boldsymbol{\mu})^\top \boldsymbol{\Sigma}^{-1}(\mathbf{X}-\boldsymbol{\mu}) \sim \chi^2 (p)$ \end{itemize} \end{itemize} \end{frame} \begin{frame} \frametitle{More properties of the multivariate normal} % \begin{itemize} \item If $\mathbf{c}$ is a vector of constants, $\mathbf{X}+\mathbf{c} \sim N(\mathbf{c}+\boldsymbol{\mu},\boldsymbol{\Sigma})$ \pause \item If $\mathbf{A}$ is a matrix of constants, $\mathbf{AX} \sim N(\mathbf{A}\boldsymbol{\mu},\mathbf{A}\boldsymbol{\Sigma}\mathbf{A}^\top)$ \pause \item Linear combinations of multivariate normals are multivariate normal. \pause \item All the marginals (dimension less than $p$) of $\mathbf{X}$ are (multivariate) normal. \pause \item For the multivariate normal, zero covariance implies independence. The multivariate normal is the only continuous distribution with this property. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Copyright Information} This slide show was prepared by \href{http://www.utstat.toronto.edu/~brunner}{Jerry Brunner}, Department of Statistical Sciences, University of Toronto. Except for the picture taken from Carroll et al.'s \emph{Measurement error in non-linear models}, it is licensed under a \href{http://creativecommons.org/licenses/by-sa/3.0/deed.en_US} {Creative Commons Attribution - ShareAlike 3.0 Unported License}. Use any part of it as you like and share the result freely. The \LaTeX~source code is available from the course website: \vspace{3mm} \href{http://www.utstat.toronto.edu/~brunner/oldclass/431s17} {\small\texttt{http://www.utstat.toronto.edu/$^\sim$brunner/oldclass/431s17}} \end{frame} \end{document} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% {\LARGE \begin{displaymath} \end{displaymath} } \begin{frame} \frametitle{} %\framesubtitle{} \begin{itemize} \item \item \item \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # Perspective Plot of MVN rm(list=ls()) beta = 1 x = y = seq(from=-5.6,to=5.6,by=0.2); n = length(x) Density = matrix(nrow=n,ncol=n) for(i in 1:n) { mu = beta*x[i] Density[,i] = dnorm(y,mean=mu)*dnorm(x[i]) } persp(x,y,Density) # title("Bivariate Normal Density") %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%