% \documentclass[serif]{beamer} % Serif for Computer Modern math font. \documentclass[serif, handout]{beamer} % Handout mode to ignore pause statements \hypersetup{colorlinks,linkcolor=,urlcolor=red} \usefonttheme{serif} % Looks like Computer Modern for non-math text -- nice! \setbeamertemplate{navigation symbols}{} % Suppress navigation symbols % \usetheme{Berlin} % Displays sections on top % \usetheme{Frankfurt} % Displays section titles on top: Fairly thin but still swallows some material at bottom of crowded slides %\usetheme{Berkeley} \usetheme{AnnArbor} % CambridgeUS % I'm using this one (yellow) just to be different from Dehan. \usepackage{comment} \usepackage[english]{babel} \usepackage{amsmath} % for binom \usepackage{tikz} % \usepackage{graphicx} % To include pdf files! % \definecolor{links}{HTML}{2A1B81} % \definecolor{links}{red} \setbeamertemplate{footline}[frame number] \mode \title{Rotating the Principal Components\footnote{See last slide for copyright information.}} \subtitle{STA431 Spring 2023} \date{} % To suppress date \begin{document} \begin{frame} \titlepage \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Summary} %\framesubtitle{} \begin{itemize} \item Rotation is what makes exploratory factor analysis results understandable. \item R's stand-alone \texttt{varimax} function can also be used to rotate principal components. \item The result is a set of uncorrelated linear combinations of the variables that explain exactly the same amount of variance as the original components, but are easier to interpret. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Setting} %\framesubtitle{} \begin{itemize} \item Standardized data vector $\mathbf{z}$ is $k \times 1$ \item $cov(\mathbf{z}) = \boldsymbol{\Sigma}$ \item $\boldsymbol{\Sigma} = \mathbf{CDC}^\top$ \item $\mathbf{y} = \mathbf{C}^\top\mathbf{z} \iff \mathbf{z} = \mathbf{Cy}$ % \item $\mathbf{y} = \mathbf{C}^\top\mathbf{z}$ \item $cov(\mathbf{y}) = \mathbf{D}$ \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Standardize and Select Principal Components} \framesubtitle{Standardize first for convenience} \pause \begin{columns} \column{0.3\textwidth} \begin{itemize} \item $cov(\mathbf{y}) = \mathbf{D}$ \item Let $\mathbf{y}_2 = \mathbf{D}^{-\frac{1}{2}}\mathbf{y}$ \item $cov(\mathbf{y}_2) = \mathbf{I}_k$ \end{itemize} \pause \column{0.7\textwidth} \begin{eqnarray*} cor(\mathbf{d},\mathbf{y}) & = & cov(\mathbf{z},\mathbf{y}_2) \\ \pause & = & cov(\mathbf{z},\mathbf{D}^{-\frac{1}{2}}\mathbf{y}) \\ \pause & = & cov(\mathbf{z},\mathbf{D}^{-\frac{1}{2}}\mathbf{C}^\top\mathbf{z}) \\ \pause & = & cov(\mathbf{z},\mathbf{z}) \left( \mathbf{D}^{-\frac{1}{2}}\mathbf{C}^\top \right)^\top \\ \pause & = & \boldsymbol{\Sigma} \mathbf{CD}^{-\frac{1}{2}} \\ \pause & = & \mathbf{CDC}^\top \mathbf{CD}^{-\frac{1}{2}} \\ \pause & = & \mathbf{CD}^{\frac{1}{2}} \end{eqnarray*} \end{columns} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{$cor(\mathbf{z},\mathbf{y}) = \mathbf{CD}^{\frac{1}{2}}$ is a good matrix} %\framesubtitle{} \begin{itemize} \item Square all the elements and get components of variance. \item Squared correlations add to one for each row. \item Squared correlations add to eigenvalues for each column. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Squared correlations add to one for each row.} \framesubtitle{$cor(\mathbf{z},\mathbf{y}) = \mathbf{CD}^{\frac{1}{2}}$} Look at diagonal elements of {\LARGE \begin{eqnarray*} \mathbf{CD}^{\frac{1}{2}} \left( \mathbf{CD}^{\frac{1}{2}} \right)^\top \pause & = & \mathbf{CD}^{\frac{1}{2}} \mathbf{D}^{\frac{1}{2}}\mathbf{C}^\top \\ \pause & = & \mathbf{CDC}^\top \\ \pause & = & \boldsymbol{\Sigma} = cov(\mathbf{z}) \end{eqnarray*} } % End size Diagaonal elements are all ones. \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Squared correlations add to eigenvalues for each column} \framesubtitle{$cor(\mathbf{z},\mathbf{y}) = \mathbf{CD}^{\frac{1}{2}}$} Look at diagonal elements of {\LARGE \begin{eqnarray*} \left( \mathbf{CD}^{\frac{1}{2}} \right)^\top \mathbf{CD}^{\frac{1}{2}} \pause & = & \mathbf{D}^{\frac{1}{2}}\mathbf{C}^\top \mathbf{CD}^{\frac{1}{2}} \\ \pause & = & \mathbf{D}^{\frac{1}{2}} \mathbf{D}^{\frac{1}{2}} \\ \pause & = & \mathbf{D} \end{eqnarray*} } % End size Eigenvalues. \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Select First $p$ principal Components} \framesubtitle{Probably those with eigenvalues greater than one} % Recalling that the standardized components are $\mathbf{y}_2 = \mathbf{D}^{-\frac{1}{2}}\mathbf{y}$, \pause \begin{eqnarray*} \mathbf{z} & = & \mathbf{Cy} \\ \pause & = & \mathbf{CD}^{\frac{1}{2}} \mathbf{D}^{-\frac{1}{2}} \, \mathbf{y} \\ \pause & = & \underbrace{\mathbf{CD}^{\frac{1}{2}}}_{k \times k} \, \underbrace{\mathbf{y}_2}_{k \times 1} \\ \pause & = & ( \underbrace{\mathbf{L}}_{k \times p} ~ | \underbrace{\mathbf{M}}_{k \times (k-p)} ) \left( \begin{array}{l} \mathbf{f} \\ \hline \mathbf{g} \end{array} \right) \begin{array}{l} \mbox{\scriptsize $\leftarrow p \times 1$} \\ \mbox{\scriptsize $\leftarrow (k-p) \times 1$} \end{array} \\ \pause & = & \mathbf{Lf} + \mathbf{M}\mathbf{g} \\ \pause & = & \mathbf{Lf} + \mathbf{e} \end{eqnarray*} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{$\mathbf{z} = \mathbf{Lf} + \mathbf{e}$} %\framesubtitle{} \begin{itemize} \item It looks like a factor analysis model. %, except that the ``factors" in $\mathbf{f}$ and the error terms in $\mathbf{e}$ are observable. (Well \ldots) \pause \item $\mathbf{f}$ contains the first $p$ principal components, standardized. \pause \item $cov(\mathbf{f}) = \mathbf{I}_p$. \item $\mathbf{L}$ contains the first $p$ columns of $cor(\mathbf{d},\mathbf{y}) = \mathbf{CD}^{\frac{1}{2}}$. \pause \item Recalling \begin{eqnarray*} \mathbf{z} & = & ( \mathbf{L} ~ | ~ \mathbf{M} ) \left( \begin{array}{l} \mathbf{f} \\ \hline \mathbf{g} \end{array} \right) \\ \pause & = & \mathbf{Lf} + \mathbf{M}\mathbf{g} \\ & = & \mathbf{Lf} + \mathbf{e} , \end{eqnarray*} have $cov(\mathbf{f},\mathbf{e}) = \mathbf{O}$. \pause \vspace{2mm} \item Results for factor analysis apply: \begin{itemize} \item Components of variance explained by $\mathbf{f}$ are squared correlations. \item Communalities (explained variance of each variable) are not affected by rotation. \end{itemize} \end{itemize} \end{frame} % commonality. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Rotate} %\framesubtitle{} {\LARGE \begin{eqnarray*} \mathbf{z} & = & \mathbf{Lf} + \mathbf{e} \\ \pause & = & \mathbf{L \, R}^\top \mathbf{R} \, \mathbf{f} + \mathbf{e} \\ \pause & = & (\mathbf{LR}^\top) \, (\mathbf{R} \mathbf{f}) + \mathbf{e} \\ \pause & = & \mathbf{L}_2 \mathbf{f}^\prime + \mathbf{e}, \end{eqnarray*} } % End size \noindent where $\mathbf{L}_2$ is the ``rotated factor matrix," and $\mathbf{f}^\prime$ are the rotated principal components. \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{$\mathbf{z} = \mathbf{L}_2 \mathbf{f}^\prime + \mathbf{e}$} %\framesubtitle{} \begin{itemize} \item $cov(\mathbf{f}^\prime) = \mathbf{I}_p$, so the rotated components are still uncorrelated. \item $cov(\mathbf{z}, \mathbf{f}^\prime) = \mathbf{L}_2$ is a matrix of correlations. \item You can examine $\widehat{\mathbf{L}}_2$ to determine what the rotated factors \emph{mean} in terms of the original variables. \pause \item Rotation affects how much variance each component explains, but not the total amount of variance explained. \item Rotation does \emph{not} affect the amount of explained variance for each variable. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{In Practice it's Very Simple} %\framesubtitle{} \begin{itemize} \item Extract sample principal components. and decide how many to keep. \item Put the ones you decide to keep in $\mathbf{Y}_{n \times p}$. \pause \item Apply a varimax rotation to estimated $cor(\mathbf{D},\mathbf{Y})$. This is $\widehat{L}_2$. \pause \item If you like the result, \begin{itemize} \item Standardize the principal components in $\mathbf{Y}$, using $n$ in the denominator. Call the result $\mathbf{W}$. The rows of $\mathbf{W}$ are approximately $\mathbf{f}_1, \ldots \mathbf{f}_n$. \pause \item Compute $\mathbf{W} \mathbf{R}^\top$, where $\mathbf{R}^\top$ is the rotation matrix located by \texttt{varimax}. \item The rows are the rotated sample principal components. \end{itemize} \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Copyright Information} This slide show was prepared by \href{http://www.utstat.toronto.edu/~brunner}{Jerry Brunner}, Department of Statistical Sciences, University of Toronto. It is licensed under a \href{http://creativecommons.org/licenses/by-sa/3.0/deed.en_US} {Creative Commons Attribution - ShareAlike 3.0 Unported License}. Use any part of it as you like and share the result freely. The \LaTeX~source code is available from the course website: \href{http://www.utstat.toronto.edu/~brunner/oldclass/431s23} {\small\texttt{http://www.utstat.toronto.edu/brunner/oldclass/431s23}} \end{frame} \end{document}