\documentclass[serif]{beamer} % Get Computer Modern math font. \hypersetup{colorlinks,linkcolor=,urlcolor=red} \usefonttheme{serif} % Looks like Computer Modern for non-math text -- nice! \setbeamertemplate{navigation symbols}{} % Suppress navigation symbols % \usetheme{Berlin} % Displays sections on top \usetheme{Frankfurt} % Displays section titles on top: Fairly thin but still swallows some material at bottom of crowded slides %\usetheme{Berkeley} \usepackage[english]{babel} \usepackage{amsmath} % for binom % \usepackage{graphicx} % To include pdf files! % \definecolor{links}{HTML}{2A1B81} % \definecolor{links}{red} \setbeamertemplate{footline}[frame number] \mode \title{More Linear Algebra\footnote{See last slide for copyright information.}} \subtitle{STA431 Winter/Spring 2013} \date{} % To suppress date \begin{document} \begin{frame} \titlepage \end{frame} \begin{frame} \frametitle{Overview} \tableofcontents \end{frame} \section{Things you already know} \begin{frame} \frametitle{You already know about} \begin{itemize} \item Matrices $\mathbf{A} = [a_{ij}]$ \item Matrix addition and subtraction $\mathbf{A}+\mathbf{B} = [a_{ij}+b_{ij}]$ \item Scalar multiplication $a \mathbf{B} = [a\,b_{ij}]$ \item Matrix multiplication $\mathbf{AB} = \left[ \displaystyle{\sum_k}a_{ik}b_{kj} \right]$ \item Inverse $\mathbf{A}^{-1}\mathbf{A} = \mathbf{AA}^{-1} = \mathbf{I}$ \item Transpose $\mathbf{A}^\prime = [a_{ji}]$ \item Symmetric matrices $\mathbf{A} = \mathbf{A}^\prime$ \item Determinants \item Linear independence \end{itemize} \end{frame} \begin{frame} \frametitle{Linear independence} %\framesubtitle{} $\mathbf{X}$ be an $n \times p$ matrix of constants.The columns of $\mathbf{X}$ are said to be \emph{linearly dependent} if there exists $\mathbf{v} \neq \mathbf{0}$ with $\mathbf{Xv} = \mathbf{0}$. We will say that the columns of $\mathbf{X}$ are linearly \emph{independent} if $\mathbf{Xv} = \mathbf{0}$ implies $\mathbf{v} = \mathbf{0}$. \vspace{10mm} For example, show that $\mathbf{A}^{-1}$ exists implies that the columns of $\mathbf{A}$ are linearly independent. \begin{displaymath} \mathbf{Av} = \mathbf{0} \Rightarrow \mathbf{A}^{-1}\mathbf{Av} = \mathbf{A}^{-1}\mathbf{0} \Rightarrow \mathbf{v} = \mathbf{0} \end{displaymath} \end{frame} \begin{frame} \frametitle{How to show $\mathbf{A}^{-1\prime}= \mathbf{A}^{\prime-1}$} Suppose $\mathbf{B} = \mathbf{A}^{-1}$, meaning $\mathbf{AB} = \mathbf{BA} = \mathbf{I}$. Must show two things: $\mathbf{B}^\prime\mathbf{A}^\prime = \mathbf{I}$ and $\mathbf{A}^\prime\mathbf{B}^\prime = \mathbf{I}$. \begin{eqnarray*} \mathbf{AB} = \mathbf{I} & \Rightarrow & \mathbf{B}^\prime\mathbf{A}^\prime = \mathbf{I}^\prime = \mathbf{I} \\ \mathbf{BA} = \mathbf{I} & \Rightarrow & \mathbf{A}^\prime\mathbf{B}^\prime = \mathbf{I}^\prime = \mathbf{I} \end{eqnarray*} $\blacksquare$ \end{frame} \section{Trace} \begin{frame} \frametitle{Trace of a square matrix: Sum of the diagonal elements} \begin{displaymath} tr(\mathbf{A}) = \sum_{i=1}^n a_{i,i}. \end{displaymath} \vspace{10mm} \begin{itemize} \item Of course $tr(\mathbf{A}+\mathbf{B}) = tr(\mathbf{A}) + tr(\mathbf{B})$, etc. \item But less obviously, even though $\mathbf{AB} \neq \mathbf{BA}$, \item $tr(\mathbf{AB}) = tr(\mathbf{BA})$ \end{itemize} \end{frame} \begin{frame} \frametitle{$tr(\mathbf{AB}) = tr(\mathbf{BA})$} Let $\mathbf{A}$ be an $r \times p$ matrix and $\mathbf{B}$ be a $p \times r$ matrix, so that the product matrices $\mathbf{AB}$ and $\mathbf{BA}$ are both defined. {\small \begin{eqnarray*} tr(\mathbf{AB}) &=& tr\left( \left[ \sum_{k=1}^p a_{i,k}b_{k,j} \right] \right) \\ &=& \sum_{i=1}^r \sum_{k=1}^p a_{i,k}b_{k,i} \\ &=& \sum_{k=1}^p \sum_{i=1}^r b_{k,i}a_{i,k} \\ &=& \sum_{i=1}^p \sum_{k=1}^r b_{i,k}a_{k,i} ~~~\mbox{ (Switching }i\mbox{ and } k) \\ &=& tr\left( \left[ \sum_{k=1}^r b_{i,k}a_{k,j} \right] \right) \\ &=& tr(\mathbf{BA}) \end{eqnarray*} } % End size \end{frame} \begin{frame} \frametitle{Eigenvalues and eigenvectors} %\framesubtitle{} Let $\mathbf{A} = [a_{i,j}]$ be an $n \times n$ matrix, so that the following applies to square matrices. $\mathbf{A}$ is said to have an \emph{eigenvalue} $\lambda$ and (non-zero) \emph{eigenvector} $\mathbf{x}$ corresponding to $\lambda$ if \begin{displaymath} \mathbf{Ax} = \lambda\mathbf{x}. \end{displaymath} \begin{itemize} \item Eigenvalues are the $\lambda$ values that solve the determinantal equation $|\mathbf{A}-\lambda\mathbf{I}| = 0$. \item The determinant is the product of the eigenvalues: $|\mathbf{A}| = \prod_{i=1}^n \lambda_i$ \end{itemize} \end{frame} \section{Spectral decomposition} \begin{frame} \frametitle{Spectral decomposition of symmetric matrices} %\framesubtitle{} The \emph{Spectral decomposition theorem} says that every square and symmetric matrix $\mathbf{A} = [a_{i,j}]$ may be written \begin{equation*} \mathbf{A} = \mathbf{P}\boldsymbol{\Lambda} \mathbf{P}^\prime, \end{equation*} where the columns of $\mathbf{P}$ (which may also be denoted $\mathbf{x}_1, \ldots, \mathbf{x}_n$) are the eigenvectors of $\mathbf{A}$, and the diagonal matrix $\mathbf{\Lambda}$ contains the corresponding eigenvalues. \begin{displaymath} \mathbf{\Lambda} = \left[ \begin{array}{c c c c } \lambda_1 & 0 & \cdots & 0 \\ 0 & \lambda_2 & \cdots & 0 \\ \vdots & \vdots & \ddots & \vdots \\ 0 & 0 & \cdots & \lambda_n \\ \end{array} \right] \end{displaymath} Because the eigenvectors are orthonormal, $\mathbf{P}$ is an orthogonal matrix; that is, $\mathbf{PP}^\prime = \mathbf{P}^\prime\mathbf{P} = \mathbf{I}$. \end{frame} \section{Positive definite matrices} \begin{frame} \frametitle{Positive definite matrices} %\framesubtitle{} The $n \times n$ matrix $\mathbf{A}$ is said to be \emph{positive definite} if \begin{displaymath} \mathbf{y}^\prime \mathbf{A} \mathbf{y} > 0 \end{displaymath} for \emph{all} $n \times 1$ vectors $\mathbf{y} \neq \mathbf{0}$. It is called \emph{non-negative definite} (or sometimes positive semi-definite) if $\mathbf{y}^\prime \mathbf{A} \mathbf{y} \geq 0$. \end{frame} \begin{frame} \frametitle{Example: Show $\mathbf{X}^\prime\mathbf{X}$ non-negative definite} Let $\mathbf{X}$ be an $n \times p$ matrix of real constants and $\mathbf{y}$ be $p \times 1$. Then $\mathbf{Z} = \mathbf{Xy}$ is $n \times 1$, and \begin{eqnarray*} & & \mathbf{y}^\prime \, (\mathbf{X}^\prime\mathbf{X}) \, \mathbf{y} \\ & = & (\mathbf{Xy})^\prime (\mathbf{Xy}) \\ & = & \mathbf{Z}^\prime \mathbf{Z} \\ & = & \sum_{i=1}^n Z_i^2 \geq 0 \end{eqnarray*} \end{frame} \begin{frame} \frametitle{Some properties of symmetric positive definite matrices} \framesubtitle{Variance-covariance matrices are often assumed positive definite.} \begin{itemize} \item[] Positive definite \item[$\Rightarrow$] All eigenvalues positive $\Leftrightarrow$ Determinant positive \item[$\Rightarrow$] Inverse exists $\Leftrightarrow$ Columns (rows) linearly independent \end{itemize} \vspace{5mm} If a real symmetric matrix is also non-negative definite, as a variance-covariance matrix \emph{must} be, Inverse exists $\Rightarrow$ Positive definite \end{frame} \begin{frame} \frametitle{Showing Positive definite $\Rightarrow$ Eigenvalues positive} \framesubtitle{For example} Let $\mathbf{A}$ be square and symmetric as well as positive definite. \begin{itemize} \item Spectral decomposition says $\mathbf{A} = \mathbf{P\Lambda P}^\prime$. \item Using $\mathbf{y}^\prime \mathbf{A} \mathbf{y} > 0$, let $\mathbf{y}$ be an eigenvector, say the third one. \item Because eigenvectors are orthonormal, \end{itemize} \begin{eqnarray*} \mathbf{y}^\prime \mathbf{A} \mathbf{y} & = & \mathbf{y}^\prime \mathbf{P\Lambda P}^\prime \mathbf{y} \\ & = & ( \begin{array}{ccccc} 0 & 0 & 1 & \cdots & 0 \end{array} ) \left( \begin{array}{c c c c } \lambda_1 & 0 & \cdots & 0 \\ 0 & \lambda_2 & \cdots & 0 \\ \vdots & \vdots & \ddots & \vdots \\ 0 & 0 & \cdots & \lambda_n \\ \end{array} \right) \left( \begin{array}{c} 0 \\ 0 \\ 1 \\ \vdots \\ 0 \end{array} \right) \\ & = & \lambda_3 \\ & > & 0 \end{eqnarray*} \end{frame} \section{Square root matrices} \begin{frame} \frametitle{Square root matrices} % \framesubtitle{For symmetric matrices} % Real if non-neg def {\small Define \begin{displaymath} \boldsymbol{\Lambda}^{1/2} = \left( \begin{array}{c c c c } \sqrt{\lambda_1} & 0 & \cdots & 0 \\ 0 & \sqrt{\lambda_2} & \cdots & 0 \\ \vdots & \vdots & \ddots & \vdots \\ 0 & 0 & \cdots & \sqrt{\lambda_n} \\ \end{array} \right) \end{displaymath} So that \begin{eqnarray*} \boldsymbol{\Lambda}^{1/2} \boldsymbol{\Lambda}^{1/2} & = & \left( \begin{array}{c c c c } \sqrt{\lambda_1} & 0 & \cdots & 0 \\ 0 & \sqrt{\lambda_2} & \cdots & 0 \\ \vdots & \vdots & \ddots & \vdots \\ 0 & 0 & \cdots & \sqrt{\lambda_n} \\ \end{array} \right) \left( \begin{array}{c c c c } \sqrt{\lambda_1} & 0 & \cdots & 0 \\ 0 & \sqrt{\lambda_2} & \cdots & 0 \\ \vdots & \vdots & \ddots & \vdots \\ 0 & 0 & \cdots & \sqrt{\lambda_n} \\ \end{array} \right) \\ & = & \left( \begin{array}{c c c c } \lambda_1 & 0 & \cdots & 0 \\ 0 & \lambda_2 & \cdots & 0 \\ \vdots & \vdots & \ddots & \vdots \\ 0 & 0 & \cdots & \lambda_n \\ \end{array} \right) = \boldsymbol{\Lambda} \end{eqnarray*} } % End size \end{frame} \begin{frame} \frametitle{For a general symmetric matrix $\mathbf{A}$} Define \begin{displaymath} \mathbf{A}^{1/2} = \mathbf{P} \boldsymbol{\Lambda}^{1/2} \mathbf{P}^\prime \end{displaymath} So that \begin{eqnarray*} \mathbf{A}^{1/2}\mathbf{A}^{1/2} & = & \mathbf{P} \boldsymbol{\Lambda}^{1/2} \mathbf{P}^\prime \mathbf{P} \boldsymbol{\Lambda}^{1/2} \mathbf{P}^\prime \\ & = & \mathbf{P} \boldsymbol{\Lambda}^{1/2} \, \mathbf{I} \, \boldsymbol{\Lambda}^{1/2} \mathbf{P}^\prime \\ & = & \mathbf{P} \boldsymbol{\Lambda}^{1/2} \boldsymbol{\Lambda}^{1/2} \mathbf{P}^\prime \\ & = & \mathbf{P} \boldsymbol{\Lambda} \mathbf{P}^\prime \\ & = & \mathbf{A} \end{eqnarray*} \end{frame} \begin{frame} \frametitle{More about symmetric positive definite matrices} \framesubtitle{Show as exercises} Let $\mathbf{A}$ be symmetric and positive definite. Then $\mathbf{A}^{-1} = \mathbf{P}\boldsymbol{\Lambda}^{-1} \mathbf{P}^\prime $. \vspace{5mm} Letting $\mathbf{B} = \mathbf{P}\boldsymbol{\Lambda}^{-1/2} \mathbf{P}^\prime $, \begin{eqnarray*} \mathbf{B} & = & \left( \mathbf{A}^{-1} \right)^{1/2} \\ \mathbf{B} & = & \left( \mathbf{A}^{1/2} \right)^{-1} \\ \end{eqnarray*} This justifies saying $\mathbf{A}^{-1/2} = \mathbf{P}\boldsymbol{\Lambda}^{-1/2} \mathbf{P}^\prime $ \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Copyright Information} This slide show was prepared by \href{http://www.utstat.toronto.edu/~brunner}{Jerry Brunner}, Department of Statistical Sciences, University of Toronto. It is licensed under a \href{http://creativecommons.org/licenses/by-sa/3.0/deed.en_US} {Creative Commons Attribution - ShareAlike 3.0 Unported License}. Use any part of it as you like and share the result freely. The \LaTeX~source code is available from the course website: \href{http://www.utstat.toronto.edu/~brunner/oldclass/431s13} {\small\texttt{http://www.utstat.toronto.edu/$^\sim$brunner/oldclass/431s31}} \end{frame} \end{document} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%