% This version is an improvement on STA431s17 % \documentclass[serif]{beamer} % Serif for Computer Modern math font. \documentclass[serif, handout]{beamer} % Handout mode to ignore pause statements \hypersetup{colorlinks,linkcolor=,urlcolor=red} \usefonttheme{serif} % Looks like Computer Modern for non-math text -- nice! \setbeamertemplate{navigation symbols}{} % Suppress navigation symbols % \usetheme{Berlin} % Displays sections on top % \usetheme{Frankfurt} % Displays section titles on top: Fairly thin but still swallows some material at bottom of crowded slides % \usetheme{Berkeley} \usetheme{AnnArbor} % CambridgeUS % I'm using this one (yellow) just to be different from Dehan. \usepackage[english]{babel} \usepackage{amsmath} % for binom % \usepackage{graphicx} % To include pdf files! % \definecolor{links}{HTML}{2A1B81} % \definecolor{links}{red} \setbeamertemplate{footline}[frame number] \mode \title{Confirmatory Factor Analysis: Part One\footnote{See last slide for copyright information.}} \subtitle{STA431 Spring 2023} \date{} % To suppress date % Earlier versions preserved standardized observed variables, left over from exploratory factor analysis. In this edition, just drop that feature. \begin{document} \begin{frame} \titlepage \end{frame} \begin{frame} \frametitle{A confirmatory factor analysis model} \framesubtitle{One Factor: Starting simply} % Put model equations and path diagram side by side. \begin{tabular}{cc} \raisebox{.45in}{\parbox{2in} { \begin{eqnarray*} d_1 &=& \lambda_1 F + e_1 \\ d_2 &=& \lambda_2 F + e_2 \\ d_3 &=& \lambda_3 F + e_3 \end{eqnarray*} }} % End parbox and then raisebox & \includegraphics[width=1.5in]{OneFactor} \end{tabular} \pause \begin{itemize} \item $Var(F)=1$ \item $Var(e_j)=\omega_j$ \item $F,e_1,e_2,e_3$ all independent. \pause \vspace{2mm} \end{itemize} This is also an exploratory factor analysis model with one factor. \end{frame} \begin{frame} \frametitle{Calculate $\boldsymbol{\Sigma}$} %\framesubtitle{} \begin{columns} \column{0.3\textwidth} \begin{eqnarray*} d_1 &=& \lambda_1 F + e_1 \\ d_2 &=& \lambda_2 F + e_2 \\ d_3 &=& \lambda_3 F + e_3 \end{eqnarray*} \column{0.7\textwidth} \begin{displaymath} {\Large \boldsymbol{\Sigma} } ~~~=~~~ \begin{array}{c|ccc} & d_1 & d_2 & d_3 \\ \hline d_1 & \lambda_1^2 + \omega_1 &\lambda_1\lambda_2 & \lambda_1\lambda_3 \\ d_2 & & \lambda_2^2 + \omega_2 & \lambda_2\lambda_3 \\ d_3 & & & \lambda_3^2 + \omega_3 \end{array} \end{displaymath} \pause \end{columns} \vspace{10mm} Are the parameters identifiable? \pause What if just one $\lambda$ is zero? \end{frame} \begin{frame} \frametitle{Suppose no factor loadings equal zero} %\framesubtitle{} {\footnotesize \begin{displaymath} \boldsymbol{\Sigma} = \left(\begin{array}{ccc} \sigma_{11} & \sigma_{12} & \sigma_{13} \\ & \sigma_{22} & \sigma_{23} \\ & & \sigma_{33} \end{array}\right) = \left(\begin{array}{ccc} \lambda_1^2 + \omega_1 &\lambda_1\lambda_2 & \lambda_1\lambda_3 \\ & \lambda_2^2 + \omega_2 & \lambda_2\lambda_3 \\ & & \lambda_3^2 + \omega_3 \end{array}\right) \end{displaymath} \pause } % End size \begin{eqnarray*} \lambda_1^2 & = & \frac{\sigma_{12}\sigma_{13}}{\sigma_{23}} \pause = \frac{\lambda_1\lambda_2 \, \lambda_1\lambda_3}{\lambda_2\lambda_3} \\ \pause &&\\ \lambda_2^2 & = & \frac{\sigma_{12}\sigma_{23}}{\sigma_{13}} \\ &&\\ \lambda_3^2 & = & \frac{\sigma_{13}\sigma_{23}}{\sigma_{12}} \end{eqnarray*} \pause \begin{itemize} \item Squared factor loadings are identifiable, but not the loadings. \item Replace all $\lambda_j$ with $-\lambda_j$, get same $\boldsymbol{\Sigma}$ \pause \item Likelihood function will have two maxima, same height. \item Which one you find depends on where you start. \end{itemize} \end{frame} \begin{frame} \frametitle{Solution: Decide on the sign of one loading} \framesubtitle{Based on \emph{meaning}. Suppose these are math tests.} \begin{center} \includegraphics[width=2.5in]{OneFactor} \end{center} \pause \begin{itemize} \item Is $F$ math ability or math \emph{inability}? You decide. \pause \item It's just a matter of naming the factors. \end{itemize} \end{frame} \begin{frame} \frametitle{If $\lambda_1>0$} %\framesubtitle{} %{\footnotesize \begin{displaymath} \boldsymbol{\Sigma} = \left(\begin{array}{ccc} \sigma_{11} & \sigma_{12} & \sigma_{13} \\ & \sigma_{22} & \sigma_{23} \\ & & \sigma_{33} \end{array}\right) = \left(\begin{array}{ccc} \lambda_1^2 + \omega_1 &\lambda_1\lambda_2 & \lambda_1\lambda_3 \\ & \lambda_2^2 + \omega_2 & \lambda_2\lambda_3 \\ & & \lambda_3^2 + \omega_3 \end{array}\right) \end{displaymath} \pause %} % End size \vspace{10mm} \begin{itemize} \item Signs of $\lambda_2$ and $\lambda_3$ can be recovered right away from $\boldsymbol{\Sigma}$. \pause \item And all the parameters are identified. \end{itemize} \end{frame} \begin{frame} \frametitle{Add another variable: $d_4 = \lambda_4 F + e_4$} \pause %\framesubtitle{} {\footnotesize \begin{displaymath} \boldsymbol{\Sigma} = \left(\begin{array}{cccc} \lambda_1^2 + \omega_1 &\lambda_1\lambda_2 & \lambda_1\lambda_3 & \lambda_1\lambda_4 \\ & \lambda_2^2 + \omega_2 & \lambda_2\lambda_3 & \lambda_2\lambda_4 \\ & & \lambda_3^2 + \omega_3 & \lambda_3\lambda_4 \\ & & & \lambda_4^2 + \omega_4 \end{array}\right) \end{displaymath} \pause } % End size \begin{itemize} \item Parameters will all be identifiable as long as 3 out of 4 loadings are non-zero, and one sign is known. \pause \item For example, if only $\lambda_1=0$ then the top row = 0, \pause and you can get $\lambda_2, \lambda_3, \lambda_4$ as before. \pause \item For 5 variables, two loadings can be zero, etc. \pause \item How many equality restrictions with 4 variables? $6-4=2$. % \pause \item Inequality restrictions? It's like an Easter egg hunt. \end{itemize} \end{frame} % Going back to the covariance matrix, we can use those additional covariances. \begin{frame} \frametitle{Four Observable Variables} % \framesubtitle{} \begin{center} \includegraphics[width=3in]{OneFactor4vars-a} \end{center} \end{frame} \begin{frame} \frametitle{Four Observable Variables} % \framesubtitle{} \begin{center} \includegraphics[width=3in]{OneFactor4vars-b} \end{center} \end{frame} \begin{frame} \frametitle{Identifying the covariances between error terms} % \framesubtitle{} \begin{center} \includegraphics[width=1.75in]{OneFactor4vars-b} \end{center} {\small \begin{eqnarray*} d_2 &=& \lambda_2 F + e_2 \\ d_4 &=& \lambda_4 F + e_4 \\ \pause \sigma_{24}=Cov(d_2,d_4) &=& \lambda_2\lambda_4 Var(F) + Cov(e_2,e_4) \\ \pause &=& \lambda_2\lambda_4 + \omega_{24} \\ \pause &\Rightarrow& \omega_{24} = \sigma_{24} - \lambda_2\lambda_4 \end{eqnarray*} } % End size \end{frame} \begin{frame} \frametitle{Now add another factor} \framesubtitle{$Var(F_1)=Var(F_2) = 1$} \begin{center} \includegraphics[width=3in]{TwoFactors} \end{center} %{\footnotesize \begin{eqnarray*} d_1 & = & \lambda_1 F_1 + e_1 \\ & \vdots & \\ d_6 &=& \lambda_6 F_2 + e_6 \end{eqnarray*} %} % End size \end{frame} \begin{frame} \frametitle{Covariance matrix of observable variables} %\framesubtitle{} \begin{displaymath} \boldsymbol{\Sigma} = \left(\begin{array}{rrrrrr} \lambda_1^2 + \omega_1 & \lambda_{1} \lambda_{2} & \lambda_{1} \lambda_{3} & \lambda_{1} \lambda_{4} \phi_{12} & \lambda_{1} \lambda_{5} \phi_{12} & \lambda_{1} \lambda_{6} \phi_{12} \\ & \lambda_2^2 + \omega_2 & \lambda_{2} \lambda_{3} & \lambda_{2} \lambda_{4} \phi_{12} & \lambda_{2} \lambda_{5} \phi_{12} & \lambda_{2} \lambda_{6} \phi_{12} \\ & & \lambda_3^2 + \omega_3 & \lambda_{3} \lambda_{4} \phi_{12} & \lambda_{3} \lambda_{5} \phi_{12} & \lambda_{3} \lambda_{6} \phi_{12} \\ & & & \lambda_4^2 + \omega_4 & \lambda_{4} \lambda_{5} & \lambda_{4} \lambda_{6} \\ & & & & \lambda_5^2 + \omega_5 & \lambda_{5} \lambda_{6} \\ & & & & & \lambda_6^2 + \omega_6 \end{array}\right) \end{displaymath} \pause \begin{itemize} \item Identify $\lambda_1, \lambda_2, \lambda_3$ from set One (assuming one sign is known). \pause \item Identify $\lambda_4, \lambda_5, \lambda_6$ from set Two (lower right). \pause \item Identify $\phi_{12}$ from any unused covariance. \pause \item What if you added more variables? \item What if you added more factors? \end{itemize} \end{frame} \begin{frame} \frametitle{Three-variable identification rule} \framesubtitle{For standardized factors} For a factor analysis model, the parameters will be identifiable provided \pause \begin{itemize} \item Errors are independent of one another and of the factors. \item Variances of all factors equal one. \item Each observed variable is a function of only one factor. \item There are at least three observable variables with non-zero loadings per factor. \item The sign of one non-zero loading is known for each factor. \end{itemize} \pause Maybe we can do better. \end{frame} \begin{frame} \frametitle{Reference Variable Definition} %\framesubtitle{} A \emph{reference variable} for a latent variable is an observable variable that is a function only of that latent variable and an error term. The factor loading is non-zero. \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Three-variable identification rule for standardized factors: Version Two} \pause \framesubtitle{Notice this is for a single-factor model} \begin{itemize} \item For a factor analysis model with a single standardized factor and three reference variables, the parameters will be identifiable provided that the errors are independent of one another and of the factor, and that the sign of one factor loading is known. \pause \item Additional observed variables may be added to the model provided that for each additional variable, the error term of the additional variable has zero covariance with the error term at least one reference variable. \pause \item The error terms of the additional variables may have non-zero covariance with one another. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Factor Model Combination Rule} %\framesubtitle{} \begin{itemize} \item Suppose there are two factor analysis models $A$ and $B$ with identifiable parameters. \item Every factor has at least one reference variable. \pause \item If the error terms of $A$ have zero covariance with the error terms of $B$, the only additional parameters to be identified are the covariances between the $A$ factors and the $B$ factors. \pause \item These may be identified from the covariances of the $A$ reference variables and the $B$ reference variables (one per factor). \pause \item What covariances between error terms are permitted? \pause Covariances between the error terms of the ``other" (non reference) variables in each set are allowed. % \item Alternatively \ldots % I think in 2053 I had a big handwritten segnment here under "Alternatively". Oh yeah, it was about straight arrows in place of covariances between error terms. I believe this is taken care of in unit 2. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Copyright Information} This slide show was prepared by \href{http://www.utstat.toronto.edu/~brunner}{Jerry Brunner}, Department of Statistical Sciences, University of Toronto. It is licensed under a \href{http://creativecommons.org/licenses/by-sa/3.0/deed.en_US} {Creative Commons Attribution - ShareAlike 3.0 Unported License}. Use any part of it as you like and share the result freely. The \LaTeX~source code is available from the course website: \href{http://www.utstat.toronto.edu/brunner/oldclass/431s23} {\small\texttt{http://www.utstat.toronto.edu/brunner/oldclass/431s23}} \end{frame} \end{document} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Frame Title} %\framesubtitle{} \begin{itemize} \item \item \item \end{itemize} \end{frame} {\LARGE \begin{displaymath} \end{displaymath} } % End size %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Inequality constraints} \framesubtitle{Slippery devils} {\footnotesize \begin{displaymath} \boldsymbol{\Sigma} = \left(\begin{array}{ccc} 1 & \sigma_{12} & \sigma_{13} \\ & 1 & \sigma_{23} \\ & & 1 \end{array}\right) = \left(\begin{array}{ccc} 1 &\lambda_1\lambda_2 & \lambda_1\lambda_3 \\ & 1 & \lambda_2\lambda_3 \\ & & 1 \end{array}\right) \end{displaymath} } % End size \begin{itemize} \item $\sigma_{12}\sigma_{13}\sigma_{23} = \lambda_1^2\lambda_2^2\lambda_3^2$, so $0 \leq \sigma_{12}\sigma_{13}\sigma_{23} < 1$. \item But $\sigma_{12}\sigma_{13}\sigma_{23} < 1$ is true of \emph{any} correlation matrix, so that's one inequality, not two. \item How about $\lambda_1^2 = \frac{\sigma_{12}\sigma_{13}}{\sigma_{23}} $ so $0 \leq \frac{\sigma_{12}\sigma_{13}}{\sigma_{23}} <1$? \end{itemize} \begin{eqnarray*} & & \frac{\sigma_{12}\sigma_{13}}{\sigma_{23}} \frac{\sigma_{23}}{\sigma_{23}} < 1 \\ & \Rightarrow & 0 \leq \sigma_{12}\sigma_{13}\sigma_{23} < \sigma_{23}^2 \end{eqnarray*} % Do this with the other two, get something geometrically complicated ... \end{frame} Sage work for inequality constraints -- Basically came up empty. sem = 'http://www.utstat.toronto.edu/~brunner/openSEM/sage/sem.sage' load(sem) var('lambda1 lambda2 lambda3') S = SymmetricMatrix(3,'sigma',corr=True); S S.eigenvalues() # Terrifying. Sage does not know sigma_ij is even real. d = S.determinant(); d factor(expand(d)) # Nothing good Sigma = S(sigma12=lambda1*lambda2, sigma13=lambda1*lambda3, sigma23=lambda2*lambda3) Sigma Sigma.determinant() expand(_)