\documentclass[12pt]{article} 
%\usepackage{amsbsy} % for \boldsymbol and \pmb 
\usepackage{graphicx} % To include pdf files!
\usepackage{amsmath}
\usepackage{amsbsy}
\usepackage{amsfonts}
\usepackage[colorlinks=true, pdfstartview=FitV, linkcolor=blue, citecolor=blue, urlcolor=blue]{hyperref} % For links
% \usepackage{fullpage}
%\pagestyle{empty} % No page numbers

% To use more of the top and bottom margins than fullpage
\oddsidemargin=-.2in                  % Good for US Letter paper
\evensidemargin=-.2in
\textwidth=6.6in
\topmargin=-1.1in
\headheight=0.2in
\headsep=0.5in
\textheight=9.4in

\begin{document}
%\enlargethispage*{1000 pt} 

\begin{center}   
{\Large \textbf{STA 2053 Assignment 3 (Random vectors and measurement error)}}\footnote{This assignment was prepared by  \href{http://www.utstat.toronto.edu/~brunner}{Jerry Brunner},
Department of Statistical Sciences, University of Toronto. It is licensed under a 
\href{http://creativecommons.org/licenses/by-sa/3.0/deed.en_US}
     {Creative Commons Attribution - ShareAlike 3.0 Unported License}. Use any part of it as you like and share the result freely. The \LaTeX~source code is available from the course website:
\href{http://www.utstat.toronto.edu/brunner/oldclass/2053f22} {\texttt{http://www.utstat.toronto.edu/brunner/oldclass/2053f22}}}
\vspace{1 mm}
\end{center}

\begin{center}  
\begin{tabular}{ll}
$cov(\mathbf{x}) = 
E\left\{(\mathbf{x}-\boldsymbol{\mu}_x)(\mathbf{x}-\boldsymbol{\mu}_x)^\top\right\}$ &
$cov(\mathbf{x,y}) = E\left\{ (\mathbf{x}-\boldsymbol{\mu}_x)
                             (\mathbf{y}-\boldsymbol{\mu}_y)^\top\right\}$ \\
 &

\end{tabular}

\end{center}
\vspace{1 mm}


\noindent
Thesel questions are not to be handed in. They are practice for the quiz on October 31st.  

\begin{enumerate} 

\item Let $\mathbf{x}$ be a random vector, and let $\mathbf{A}$ and $\mathbf{B}$ be matrices of constants (of the right dimensions).
        \begin{enumerate}
            \item Show $cov(\mathbf{Ax}) = \mathbf{A}\boldsymbol{\Sigma}_x\mathbf{A}^\top$
            \item Show $cov(\mathbf{Ax},\mathbf{Bx}) = \mathbf{A} \boldsymbol{\Sigma}_x \mathbf{B}^\top$.
        \end{enumerate}

\item Let $\mathbf{x}$ be a $p \times 1$ random vector and let $\mathbf{y}$ and $\mathbf{z}$ be $q \times 1$ random vectors. Show that $cov(\mathbf{x},\mathbf{y}+\mathbf{z}) = cov(\mathbf{x},\mathbf{y}) + cov(\mathbf{x},\mathbf{z})$. 

\item Let
\begin{displaymath}
    \mathbf{x}_i = \left(\begin{array}{c} x_{i,i} \\ \vdots \\ x_{i,p} \end{array}\right)
    \mbox{ and } 
    \overline{\mathbf{x}} = \frac{1}{n} \sum_{i=1}^n \mathbf{x}_i =
    \left(\begin{array}{c} \overline{x}_1 \\ \vdots \\ \overline{x}_p  \end{array}\right).
\end{displaymath}
Let the $p \times p$ matrix $\boldsymbol{\widehat{\Sigma}} = 
\frac{1}{n}\sum_{i=1}^n (\mathbf{x}_i-\overline{\mathbf{x}}) 
                        (\mathbf{x}_i-\overline{\mathbf{x}})^\top $.
Give a \emph{scalar} formula for element $(2,3)$ of $\boldsymbol{\widehat{\Sigma}}$. If you get stuck, an example with $p=3$ should help.

\item  Let $\mathbf{x}$ and $\mathbf{y}$ be random vectors, and let $\mathbf{A}$ and $\mathbf{B}$ be matrices of constants. Starting from the definition, find $cov(\mathbf{Ax},\mathbf{By})$. Show your work. Of course we are assuming that the matrices are the right size.

\item \label{centeringrule} Denote the centered version of a general random vector $\mathbf{y}$ by $\stackrel{c}{\mathbf{y}} = \mathbf{y} - \boldsymbol{\mu}_y$. 
Let $\mathbf{L} = \mathbf{A}_1\mathbf{x}_1 + \cdots +  \mathbf{A}_m\mathbf{x}_m
                  + \mathbf{b}$, where the $\mathbf{A}_j$  are matrices of constants, and $\mathbf{b}$ is a vector of constants. Show $\stackrel{c}{\mathbf{L}} = \mathbf{A}_1 \stackrel{c}{\mathbf{x}}_1 + \cdots + \mathbf{A}_m \stackrel{c}{\mathbf{x}}_m$.

\item Let $\mathbf{x}$ and $\mathbf{y}$ be $p \times 1$ random vectors. State whether the following is true or false, and show your work. $cov(\mathbf{x} + \mathbf{y}) = cov(\mathbf{x}) + cov(\mathbf{y}) + 2cov(\mathbf{x}, \mathbf{y})$.

\vspace{3mm} \hrule \vspace{2mm} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\item\label{testlength} Suppose we have two equivalent measurements with uncorrelated measurement error:
\begin{eqnarray*}
    W_1 & = & X + e_1  \\ 
    W_2 & = & X + e_2,          
\end{eqnarray*}
where $E(X)=\mu_x$, $Var(X)=\sigma^2_x$,  $E(e_1)=E(e_2)=0$, $Var(e_1)=Var(e_2)=\sigma^2_e$, and  $X$, $e_1$ and $e_2$ are all independent. What if we were to measure the true score $X$ by adding the two imperfect measurements together? Would the result be more reliable?
        \begin{enumerate}
            \item Let $S=W_1+W_2$. Calculate the reliability of $S$. Recall that reliability is defined as the squared correlation between the true score and the surface measurement.
            \item Suppose you take $n$ independent measurements (in psychometric theory, these would be called equivalent test items). What is the reliability of $S=\sum_{i=1}^n W_i$? Show your work.
            \item What is the reliability of $\overline{W}_n=\frac{1}{n}\sum_{i=1}^n W_i$? Show your work.
            \item What happens to the reliability of $S$ and $\overline{W}_n$ as the number of measurements $n \rightarrow \infty$? 
        \end{enumerate}

% Equivalent test items may be largely a fantasy, but equivalent \emph{tests} is a goal that can be closely approximated in practice. In the two equations above, $W_1$ and $W_2$ might be tests composed of multiple items. Each item might have a different error variance. But if the two \emph{sums} or \emph{averages} of the error variances are the same, the two tests are equivalent. This is nice, because it tells you that two tests do not need to be matched item for item in order to be equivalent.

% A 2015 HW question, improved in 2017
\item \label{randiv} This question explores the consequences of ignoring measurement error in the explanatory variable when there is only one explanatory variable. Independently for $i = 1 , \ldots, n$, let 
\begin{eqnarray*}
    Y_i & = &  \beta X_i + \epsilon_i \\
    W_i & = & X_i + e_i
\end{eqnarray*}
where all random variables are normal with expected value zero, $Var(X_i)=\phi>0$,  $Var(\epsilon_i)=\psi>0$, $Var(e_i)=\omega>0$ and $\epsilon_i$, $e_i$ and $X_i$ are all independent.  The variables $W_i$ and $Y_i$ are observable, while $X_i$ is latent. Error terms are never observable. 
    \begin{enumerate}
        \item What is the parameter vector $\boldsymbol{\theta}$ for this model?
        \item Denote the covariance matrix of the observable variables by  $\boldsymbol{\Sigma} = [\sigma_{ij}]$. The unique $\sigma_{ij}$ values are the moments, and there is a covariance structure equation for each one. Calculate the variance-covariance matrix $\boldsymbol{\Sigma}$ of the observable variables, expressed as a function of the model parameters. You now have the covariance structure equations.
        \item Does this model pass the test of the parameter count rule? Answer Yes or No and give the numbers.
        \item Are there any points in the parameter space where the parameter $\beta$ is identifiable? Are there infinitely many, or just one point?
        \item The naive estimator of $\beta$ is 
            \begin{displaymath}
                \widehat{\beta}_n = \frac{\sum_{i=1}^n W_i Y_i}{\sum_{i=1}^n W_i^2}.
            \end{displaymath}        
Is $\widehat{\beta}_n$ a consistent estimator of $\beta$? Why can you answer this question without doing any calculations?
        \item Go ahead and do the calculation. To what does $\widehat{\beta}_n$ converge?
        \item Are there any points in the parameter space for which $\widehat{\beta}_n$ converges to the right answer? Compare your answer to the set of points where $\beta$ is identifiable.
        \item Suppose the reliability of $W_i$ were known, or to be more realistic, suppose that a good estimate of the reliability were available; call it $r^2_{wx}$. How could you use $r^2_{wx}$ to improve $\widehat{\beta}_n$? Give the formula for an improved estimator of $\beta$.  
%        \item Because of correlated measurement error, one suspects that many published estimates of reliability are too high. Suppose $r^2_{wx}$ is an overestimate of the true reliability $\rho^2_{wx}$. What effect does this have on your improved estimate of $\beta$? 
    \end{enumerate}

% The core of this was on the 2015 final, but it's enhanced in 2017.
\item The improved version of $\widehat{\beta}_n$ in the last question is an example of \emph{correction for attenuation} (weakening) caused by measurement error. Here is the version that applies to correlation. Independently for $i=1, \ldots, n$, let

% Need eqnarray inside a parbox to make it the cell of a table
\begin{tabular}{ccc}
\parbox[m]{1.5in} {
\begin{eqnarray*}
    D_{i,1} &=& F_{i,1} + e_{i,1}   \\
    D_{i,2} &=& F_{i,2} + e_{i,2}   \\
&&
\end{eqnarray*}    
} % End parbox
&
$cov\left( \begin{array}{c} F_{i,1} \\ F_{i,2} \end{array} \right)
= \left( \begin{array}{c c}
        \phi_{11} & \phi_{12}  \\
        \phi_{12} & \phi_{22}
        \end{array} \right)$ &
$cov\left( \begin{array}{c} e_{i,1} \\ e_{i,2} \end{array} \right)
= \left( \begin{array}{c c}
        \omega_1  & 0  \\
            0     & \omega_2
        \end{array} \right)$
\end{tabular}

\noindent
To make this concrete, it would be natural for psychologists to be interested in the correlation between intelligence and self-esteem, but what they want to know is the correlation between \emph{true} intelligence and \emph{true} self-esteem, not just the between score on an IQ test and score on a self-esteem questionnaire. So for subject $i$, let $F_{i,1}$ represent true intelligence and $F_{i,2}$ represent true self-esteem, while $D_{i,1}$ is the subject's score on an intelligence test and $D_{i,1}$ is score on a self-esteem questionnaire.
    \begin{enumerate}
        \item Make a path diagram of this model.
        \item Show that $|Corr(D_{i,1},D_{i,2})| \leq |Corr(F_{i,1},F_{i,2})|$. That is, measurement error weakens (attenuates) the correlation.
        \item Suppose the reliability of $D_{i,1}$ is $\rho^2_1$ and the reliability of $D_{i,2}$ is $\rho^2_2$. If you knew these values, how could you apply $\rho^2_1$ and $\rho^2_2$ to $Corr(D_{i,1},D_{i,2})$, to obtain $Corr(F_{i,1},F_{i,2})$?
        \item You obtain a sample correlation between IQ score and self-esteem score of $r = 0.25$, which is disappointingly low. From other data, the estimated reliability of the IQ test is $r^2_1 = 0.90$, and the estimated reliability of the self-esteem scale is $r^2_2 = 0.75$. Give an estimate of the correlation between true intelligence and true self-esteem. The answer is a number. % 0.25 / (0.9*0.75) = 0.3703704
    \end{enumerate}

% 2015 Final
\item This is a simplified version of the situation where one is attempting to ``control" for explanatory variables that are measured with error. People do this all the time, and it doesn't work. Independently for $i=1, \ldots, n$,  let
\begin{eqnarray*}
   Y_i &=& \beta_1 X_{i,1} + \beta_2 X_{i,2} + \epsilon_i   \\
   W_i &=& X_{i,1} + e_i,   
\end{eqnarray*}    
where $cov\left( \begin{array}{c} X_{i,1} \\ X_{i,2} \end{array} \right)
= \left( \begin{array}{c c}
        \phi_{11} & \phi_{12}  \\
        \phi_{12} & \phi_{22}
        \end{array} \right)$, 
$V(\epsilon_i) = \psi$, $V(e_1) = \omega$, all the expected values are zero, and the error terms $\epsilon_i$ and $ e_i$ are independent of one another, and also independent of $X_{i,1}$ and $X_{i,2}$. The variable $X_{i,1}$ is latent, while the variables $W_i$, $Y_i$ and $X_{i,2}$ are observable. What people usually do in situations like this is fit a model like 
$Y_i = \beta_1 W_i + \beta_2 X_{i,2} + \epsilon_i$, and test $H_0: \beta_2 = 0$. That is, they ignore the measurement error in variables for which they are ``controlling."
    \begin{enumerate}
        \item Suppose $H_0: \beta_2 = 0$ is true. Does the ordinary least squares estimator
\begin{displaymath}
    \widehat{\beta}_2 = 
\frac{\sum_{i=1}^nW_i^2 \sum_{i=1}^nX_{i,2}Y_i - \sum_{i=1}^nW_iX_{i,2}\sum_{i=1}^nW_iY_i}
     {\sum_{i=1}^n W_i^2 \sum_{i=1}^n X_{i,2}^2 - (\sum_{i=1}^nW_iX_{i,2})^2 }
\end{displaymath}
converge to the true value of $\beta_2 = 0$ as $n \rightarrow \infty$ everywhere in the parameter space? Answer Yes or No and show your work.
        \item Under what conditions (that is, for what values of other parameters) does 
$ \widehat{\beta}_2 \stackrel{p}{\rightarrow} 0$ when  $\beta_2 = 0$?
    \end{enumerate} 

% 2015 HW 
\item Finally we have a solution, though as usual there is a little twist. Independently for $i=1, \ldots, n$, let
\begin{eqnarray*}
    Y_{i~~} &=& \beta X_i + \epsilon_i   \\
    V_{i~~} &=& Y_i + e_i      \\
    W_{i,1} &=& X_i + e_{i,1}  \\ 
    W_{i,2} &=& X_i + e_{i,2}
\end{eqnarray*}
where 
\begin{itemize}
     \item $Y_i$ is a latent variable.
     \item $V_i$, $W_{i,1}$ and $W_{i,2}$ are all observable variables.
     \item $X_i$ is a normally distributed \emph{latent} variable with mean zero and variance $\phi>0$.
     \item $\epsilon_i$ is normally distributed with mean zero and variance $\psi>0$.
     \item $e_{i}$ is normally distributed with mean zero and variance $\omega>0$.
     \item $e_{i,1}$ is normally distributed with mean zero and variance $\omega_1>0$.
     \item $e_{i,2}$ is normally distributed with mean zero and variance $\omega_2>0$.
     \item $X_i$, $\epsilon_i$, $e_i$, $e_{i,1}$ and $e_{i,2}$ are all independent of one another.
\end{itemize}
        \begin{enumerate}
            \item Make a path diagram of this model.
            \item What is the parameter vector $\boldsymbol{\theta}$ for this model? 
            \item Does the model pass the test of the Parameter Count Rule? Answer Yes or No and give the numbers.
            \item Calculate the variance-covariance matrix of the observable variables as a function of the model parameters. Show your work.
            \item Is the parameter vector identifiable at every point in the parameter space? Answer Yes or No and prove your answer.  
            \item Some parameters are identifible, while others are not. Which ones are identifiable?
            \item If $\beta$ (the paramter of main interest) is identifiable, propose a Method of Moments estimator for it and prove that your proposed estimator is consistent. 
            \item Suppose the sample variance-covariance matrix $\widehat{\boldsymbol{\Sigma}}$ is
\begin{verbatim}
                  W1    W2     V
            W1 38.53 21.39 19.85
            W2 21.39 35.50 19.00
            V  19.85 19.00 28.81
\end{verbatim}
Give a reasonable estimate of $\beta$. There is more than one right answer. The answer is a number. (Is this the Method of Moments estimate you proposed? It does not have to be.) \textbf{Circle your answer.}

            \item Describe how you could re-parameterize this model to make the parameters all identifiable, allowing you do maximum likelihood.
        \end{enumerate}

    

% \newpage

    \item \label{onestage} Here is a one-stage formulation of the double measurement regression model. % See the text for some discussion. 
Independently for $i=1, \ldots, n$, let
\begin{eqnarray*} 
    \mathbf{w}_{i,1} & = &  \mathbf{x}_i + \mathbf{e}_{i,1}              \\
    \mathbf{v}_{i,1} & = &  \mathbf{y}_i + \mathbf{e}_{i,2}  \nonumber   \\
    \mathbf{w}_{i,2} & = &  \mathbf{x}_i + \mathbf{e}_{i,3},  \nonumber  \\
    \mathbf{v}_{i,2} & = &  \mathbf{y}_i + \mathbf{e}_{i,4}, \nonumber   \\
    \mathbf{y}_i & = &  \boldsymbol{\beta} \mathbf{x}_i + \boldsymbol{\epsilon}_i
    \nonumber
\end{eqnarray*}
where
\begin{itemize}
    \item[] $\mathbf{y}_i$ is a $q \times 1$ random vector of latent response variables. Because $q$ can be greater than one, the regression is multivariate.
    \item[] $\boldsymbol{\beta}$ is an $q \times p$ matrix of unknown constants. These are the regression coefficients, with one row for each response variable and one column for each explanatory variable.
    \item[] $\mathbf{x}_i$ is a $p \times 1$ random vector of latent explanatory variables, with expected value zero and variance-covariance matrix $\boldsymbol{\Phi}_x$, a  $p \times p$ symmetric  and positive definite matrix of unknown constants.
    \item[] $\boldsymbol{\epsilon}_i$ is the error term of the latent regression. It is a $q \times 1$ random vector with expected value zero and variance-covariance matrix $\boldsymbol{\Psi}$, a $q \times q$ symmetric and positive definite matrix of unknown constants.
    \item[] $\mathbf{w}_{i,1}$ and $\mathbf{w}_{i,2}$ are $p \times 1$ observable random vectors, each representing $\mathbf{x}_i$ plus random error.
    \item[] $\mathbf{v}_{i,1}$ and $\mathbf{v}_{i,2}$ are $q \times 1$ observable random vectors, each representing $\mathbf{y}_i$ plus random error.
    \item[] $\mathbf{e}_{i,1}, \ldots, \mathbf{e}_{i,4}$ are the measurement errors in $\mathbf{W}_{i,1}, \mathbf{V}_{i,1}, \mathbf{W}_{i,2}$ and $\mathbf{V}_{i,2}$ respectively. Joining the vectors of measurement errors into a single long vector $\mathbf{e}_i$, its covariance matrix may be written as a partitioned matrix
\begin{equation*}
    cov(\mathbf{e}_i) = cov\left(\begin{array}{c}
          \mathbf{e}_{i,1} \\ \mathbf{e}_{i,2} \\ \mathbf{e}_{i,3} \\ \mathbf{e}_{i,4}
                 \end{array}\right)
        =     
        \left( \begin{array}{c|c|c|c}
        \boldsymbol{\Omega}_{11} & \boldsymbol{\Omega}_{12} & 
        \mathbf{0}               & \mathbf{0}                 \\ \hline
 \boldsymbol{\Omega}_{12}^\top & \boldsymbol{\Omega}_{22} &     
       \mathbf{0}                & \mathbf{0}                 \\ \hline
\mathbf{0} & \mathbf{0} &  \boldsymbol{\Omega}_{33} & \boldsymbol{\Omega}_{34} \\ \hline
\mathbf{0} & \mathbf{0} & \boldsymbol{\Omega}_{34}^\top & \boldsymbol{\Omega}_{44}
    \end{array} \right) = \boldsymbol{\Omega}.
\end{equation*}
    \item[] In addition, the matrices of covariances between $\mathbf{X}_i, \boldsymbol{\epsilon}_i$ and $\mathbf{e}_i$ are all zero. 
\end{itemize}

Collecting $\mathbf{W}_{i,1}$, $\mathbf{W}_{i,2}$, $\mathbf{V}_{i,1}$ and $\mathbf{V}_{i,2}$ into a single long data vector $\mathbf{D}_i$, we write its variance-covariance matrix as a partitioned matrix:

\begin{displaymath} 
    \boldsymbol{\Sigma} = 
    \left( \begin{array}{c|c|c|c}
        \boldsymbol{\Sigma}_{11} & \boldsymbol{\Sigma}_{12} & 
        \boldsymbol{\Sigma}_{13} & \boldsymbol{\Sigma}_{14 } \\ \hline
                                 & \boldsymbol{\Sigma}_{22} &     
        \boldsymbol{\Sigma}_{23} &  \boldsymbol{\Sigma}_{24} \\ \hline
             &  &  \boldsymbol{\Sigma}_{33} & \boldsymbol{\Sigma}_{34} \\ \hline
             &  &  & \boldsymbol{\Sigma}_{44}
    \end{array} \right), 
\end{displaymath}   
where the covariance matrix of $\mathbf{W}_{i,1}$ is $\boldsymbol{\Sigma}_{11}$, the covariance matrix of $\mathbf{V}_{i,1}$ is $\boldsymbol{\Sigma}_{22}$, the matrix of covariances between $\mathbf{W}_{i,1}$ and $\mathbf{V}_{i,1}$ is $\boldsymbol{\Sigma}_{12}$, and so on.
        \begin{enumerate}
            \item Write the elements of the partitioned matrix $\boldsymbol{\Sigma}$ in terms of the parameter matrices of the model. Be able to show your work for each one.
            \item Prove that all the model parameters are identifiable by solving the covariance structure equations. 
            \item Give a Method of Moments estimator of $\boldsymbol{\Phi}_x$. There is more than one reasonable answer. Remember, your estimator cannot be a function of any unknown parameters, or you get a zero. For a particular sample, will your estimate be in the parameter space? Mine is.
            \item Give a Method of Moments estimator for $\boldsymbol{\beta}$. Remember, your estimator cannot be a function of any unknown parameters, or you get a zero. How do you know your estimator is consistent? Use $\widehat{\boldsymbol{\Sigma}} \stackrel{p}{\rightarrow} \boldsymbol{\Sigma}$.
        \end{enumerate}
%  that is \emph{not} the MLE added after the question was assigned in 2013. But in 2015 I specified MOM instead. 

    \item \label{nconstr} For the double measurement regression model of Question \ref{onestage},
        \begin{enumerate}
            \item How many unknown parameters appear in the covariance matrix of the observable variables?
            \item How many unique variances and covariances are there in the covariance matrix of the observable variables? This is also the number of covariance structure equations.
            \item How many equality constraints does the model impose on the covariance matrix of the observable variables? What are they?
            \item Does the number of covariance structure equations minus the number of parameters equal the number of constraints?
        \end{enumerate}    
\end{enumerate} % End of questions


\end{document}

% It was too long. Leave this for next time. 


    \item Question \ref{Rpig} (the R part of this assignment) will use the \emph{Pig Birth Data}. As part of a much larger study, farmers filled out questionnaires about various aspects of their farms. Some questions were asked twice, on two different questionnaires several months apart. Buried in all the questions were
    \begin{itemize}
        \item Number of breeding sows (female pigs) at the farm on June 1st
        \item Number of sows giving birth later that summer.
    \end{itemize}
There are two readings of these variables, one from each questionnaire. We will assume (maybe incorrectly) that because the questions were buried in a lot of other material and were asked months apart, that errors of measurement are independent between the two questionnaires. However, errors of measurement might be correlated within a questionnaire.

        \begin{enumerate}
            \item Propose a reasonable model for these data, using the usual notation. Give all the details. You may assume normality if you wish. 
            \item Make a path diagram of the model you have proposed. 
            \item Write the model equations again, this time in centered form. The little $c$ symbols above the variables can be invisible. 
            \item Of course it is hopeless to identify the expected values and intercepts, so we will concentrate on the covariance matrix. Calculate the covariance matrix of one observable data vector $\mathbf{d}_i$.
            \item Even though you have a general result that applies to this case, prove that all the parameters in the covariance matrix are identifiable. 
            \item If there are any equality constraints on the covariance matrix, say what they are.
            \item Based on your answer to the last question, how many degrees of freedom should there be in the chi-squared test for model fit? Does this agree with your answer to Question~\ref{nconstr}?
            \item \label{mombetahat} Give a consistent estimator of $\beta$  that is \emph{not} the MLE, and explain why it's consistent. You may use the consistency of sample variances and covariances without proof. Your estimator \emph{must not} be a function of any unknown parameters, or you get a zero on this part. 
        \end{enumerate}

% \pagebreak

    \item \label{Rpig}
The Pig Birth Data are given in the file
\href{http://www.utstat.toronto.edu/brunner/openSEM/data/openpigs2.data.txt}
     {\texttt{openpigs2.data.txt}}.
There are $n=114$ farms; please verify that you are reading the correct number of cases. 

    \begin{enumerate}
        \item Start by reading the data and then using the \texttt{var} function to produce a sample covariance matrix of all the observable variables. Don't worry about $n$ versus $n-1$.
        \item Use \texttt{lavaan} to fit your model. If you experience numerical problems you are doing something differently from the way I did it. When I fit a good model everything was fine. When I fit a poor model there was trouble. Just to verify that we are fitting the same model, my estimate of the variance of the latent exogenous variable is 357.145. 
        \item Does your model fit the data adequately? Answer Yes or No and give three numbers: a chi-squared statistic, the degrees of freedom, and a $p$-value. % G^2 = 0.087, df = 1, p = 0.768
        \item \label{betahat} For each breeding sow present in September, what is the predicted number giving birth that summer? Your answer is a single number from the output of \texttt{summary}. It is not an integer. % betahat =  0.757
        \item Using your answer to Question~\ref{mombetahat}, the output of \texttt{var} and a calculator, give a \emph{numerical} version of your consistent estimate of $\beta$. How does it compare to the MLE? 
% 0.5*(272.67101+260.02857)/348.52989 = 0.7642093 v.s. MLE of 0.7567. Pretty good!
        \item Since maximum likelihood estimates are asymptotically normal, % (approximately normal for large samples), 
a large-sample confidence interval is $\widehat{\theta} \pm 1.96 se$, where $se$ is the standard error (estimated standard deviation) of $\widehat{\theta}$. Give a large-sample confidence interval for your answer to \ref{betahat}.
        \item Recall that reliability of a measurement is the proportion of its variance that does \emph{not} come from measurement error. What is the estimated reliability of number of breeding sows from questionnaire two? The answer is a number, which you get with a calculator and the output. 
% 1 - 93.82358/(0.7567^2*360.30522+33.93153 +93.82358) =  0.7191449 from proc calis MLEs
        \item Is there evidence of correlated measurement error within questionnaires? Answer Yes or No and give some numbers from the results file to support your conclusion.
        \item The answer to that last question was based on two separate tests. Though it is already pretty convincing, conduct a \emph{single} Wald (not likelihood ratio) test of the two null hypotheses simultaneously.  Give the Wald chi-squared statistic, the degrees of freedom and the $p$-value. What do you conclude? Is there evidence of correlated measurement error, or not? % W =45.41656 , df=2, p < 0.0001  

    \end{enumerate}


\vspace{2mm}
\noindent
Please bring your \emph{complete} R printout from Question~\ref{Rpig} to the quiz, showing all input and output. It may be handed in.