STA313 F 2004 Handout 7

Path Model 1 with R



# path1.R               R --vanilla < path1.R
#
#                        Path Model 1 with R
#
# Simulate some data
B1 <- 1 ; B2  <- 2 ; B3 <- 3
set.seed(9999)
n <- 300 ; e0 <- rnorm(n)
x1 <- rnorm(n,0,2)+e0 ; x2 <- rnorm(n,0,3)+e0
e1 <- rnorm(n,0,sqrt(2)) ; e2 <- rnorm(n,0,sqrt(3))
# Now we know all the true parameter values:
# v(x1)=5  v(x2)=10  c(x1,x2)=1  v(e1)=2  v(e2)=3 and B1 B2 B3 above.
truetheta <- c(5,10,1,2,3,B1,B2,B3)
names(truetheta) <- c("sigsqx1","sigsqx2","sigma12","sigsqe1",
                      "sigsqe2","b1","b2","b3")
#
# Compute endogenous variables.
y1 <- B1*x1+e1
y2 <- B2*y1+B3*x2+e2
path1data <- cbind(x1,x2,y1,y2) # Data always = manafest variables
# For non-simulated data,   path1data <- read.table("path1.dat") or something
#
# Define functions
#
source("mvnLL.R")
pathone1 <- function(theta,datta) # Full (unresticted) model
      #             1        2       3      4       5    6  7  8
    { # theta = (sigsqx1,sigsqx2,sigma12,sigsqe1,sigsqe2,b1,b2,b3) 8 parameters
      # Elements of the covariance matrix SIGij are functions of the
      # parameter vector.
      SIG11 <- theta[1];  SIG12 <- theta[3];  SIG13 <- theta[6]*theta[1]
      SIG14 <- theta[6]*theta[7]*theta[1]+theta[8]*theta[3]

      SIG22 <- theta[2]; SIG23 <- theta[6]*theta[3]
      SIG24 <- theta[6]*theta[7]*theta[3]+theta[8]*theta[2]

      SIG33 <- theta[6]^2*theta[1]+theta[4]
      SIG34 <- theta[6]^2*theta[7]*theta[1]+theta[6]*theta[8]*theta[3] +
               theta[7]*theta[4]

      SIG44 <- theta[6]^2*theta[7]^2*theta[1] + theta[7]^2*theta[4] +
               theta[8]^2*theta[2] + theta[5] +
               2*theta[6]*theta[7]*theta[8]*theta[3]
      covmat <- rbind( c(SIG11,SIG12,SIG13,SIG14),
                       c(SIG12,SIG22,SIG23,SIG24),
                       c(SIG13,SIG23,SIG33,SIG34),
                       c(SIG14,SIG24,SIG34,SIG44)   )
    pathone1 <-  mvnLL(covmat,datta)
    pathone1
    } # End of pathone1

pathone0 <- function(theta,datta) # Reduced (resticted) model with b3=0
      #             1        2       3      4       5    6  7
    { # theta = (sigsqx1,sigsqx2,sigma12,sigsqe1,sigsqe2,b1,b2) 7 parameters
      # Elements of the covariance matrix SIGij are functions of the
      # parameter vector
      SIG11 <- theta[1];  SIG12 <- theta[3];  SIG13 <- theta[6]*theta[1]
      SIG14 <- theta[6]*theta[7]*theta[1]

      SIG22 <- theta[2]; SIG23 <- theta[6]*theta[3]
      SIG24 <- theta[6]*theta[7]*theta[3]

      SIG33 <- theta[6]^2*theta[1]+theta[4]
      SIG34 <- theta[6]^2*theta[7]*theta[1]+theta[7]*theta[4]

      SIG44 <- theta[6]^2*theta[7]^2*theta[1] + theta[7]^2*theta[4] +
               theta[5]
      covmat <- rbind( c(SIG11,SIG12,SIG13,SIG14),
                       c(SIG12,SIG22,SIG23,SIG24),
                       c(SIG13,SIG23,SIG33,SIG34),
                       c(SIG14,SIG24,SIG34,SIG44)   )
    pathone0 <-  mvnLL(covmat,datta)
    pathone0
    } # End of pathone0

# Get nice starting values. You don't have to extract the numbers from the
# output like this. You can just look at output with your eyes and type in
# the numbers you see. Like start1[1] <- 5.18 instead of
#                           start1[1] <- exmat[1,1].
  start1 <- numeric(8) ; start0 <- numeric(7)
      # For manafest exogenous vars, just use sample covariance matrix.
        exmat <- var(path1data[,1:2]) ; exmat
        start1[1] <- exmat[1,1] ; start1[2] <- exmat[2,2]
                                  start1[3] <- exmat[1,2]
        start0[1] <- exmat[1,1] ; start0[2] <- exmat[2,2]
                                  start0[3] <- exmat[1,2]
      # For b values, use regression. MSE will estimate variance of error term
      # Regression on y1 is the same for rest and unrest model.
        reg1 <- lm(y1~-1+x1)  # -1 means no intercept
        anova(reg1)
        anova(reg1)[2,3]
        start1[4] <- anova(reg1)[2,3] ; start0[4] <- anova(reg1)[2,3]
        summary(reg1)
        reg1$coefficients[1]
        start1[6] <- reg1$coefficients[1] ; start0[6] <- reg1$coefficients[1]
      # Regression on y2 for unrestricted model
        reg21 <- lm(y2 ~ -1 + y1 + x2)
        anova(reg21)
        start1[5] <- anova(reg21)[3,3]
        reg21$coefficients
        start1[7] <- reg21$coefficients[1]
         start1[8] <- reg21$coefficients[2]
      # Regression on y2 for restricted model
        reg20 <- lm(y2 ~ -1 + y1)
        anova(reg20)
        start0[5] <- anova(reg20)[2,3]
        summary(reg20)
        start0[7] <- reg20$coefficients[1]
# Now look at the starting values
Start0 <- c(start0,0) # To make length equal to start1
cbind(truetheta,start1,Start0)

unrest <- nlm(pathone1,start1,datta=path1data) ; unrest
rest   <- nlm(pathone0,start0,datta=path1data) ; rest
G <- rest$minimum - unrest$minimum
pval <- 1-pchisq(G,1)
G ; pval

################# End of Program path1.R #################

Here is the output.

R : Copyright 2001, The R Development Core Team
Version 1.4.0  (2001-12-19)

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type `license()' or `licence()' for distribution details.

R is a collaborative project with many contributors.
Type `contributors()' for more information.

Type `demo()' for some demos, `help()' for on-line help, or
`help.start()' for a HTML browser interface to help.
Type `q()' to quit R.

> # path1.R               R --vanilla < path1.R
> #
> #                        Path Model 1 with R
> #
> # Simulate some data
> B1 <- 1 ; B2  <- 2 ; B3 <- 3
> set.seed(9999)
> n <- 300 ; e0 <- rnorm(n)
> x1 <- rnorm(n,0,2)+e0 ; x2 <- rnorm(n,0,3)+e0
> e1 <- rnorm(n,0,sqrt(2)) ; e2 <- rnorm(n,0,sqrt(3))
> # Now we know all the true parameter values:
> # v(x1)=5  v(x2)=10  c(x1,x2)=1  v(e1)=2  v(e2)=3 and B1 B2 B3 above.
> truetheta <- c(5,10,1,2,3,B1,B2,B3)
> names(truetheta) <- c("sigsqx1","sigsqx2","sigma12","sigsqe1",
+                       "sigsqe2","b1","b2","b3")
> #
> # Compute endogenous variables.
> y1 <- B1*x1+e1
> y2 <- B2*y1+B3*x2+e2
> path1data <- cbind(x1,x2,y1,y2) # Data always = manafest variables
> # For non-simulated data,   path1data <- read.table("path1.dat") or something
> #
> # Define functions
> #
> source("mvnLL.R")
> pathone1 <- function(theta,datta) # Full (unresticted) model
+       #             1        2       3      4       5    6  7  8
+     { # theta = (sigsqx1,sigsqx2,sigma12,sigsqe1,sigsqe2,b1,b2,b3) 8 parameters
+       # Elements of the covariance matrix SIGij are functions of the
+       # parameter vector.
+       SIG11 <- theta[1];  SIG12 <- theta[3];  SIG13 <- theta[6]*theta[1]
+       SIG14 <- theta[6]*theta[7]*theta[1]+theta[8]*theta[3]
+
+       SIG22 <- theta[2]; SIG23 <- theta[6]*theta[3]
+       SIG24 <- theta[6]*theta[7]*theta[3]+theta[8]*theta[2]
+
+       SIG33 <- theta[6]^2*theta[1]+theta[4]
+       SIG34 <- theta[6]^2*theta[7]*theta[1]+theta[6]*theta[8]*theta[3] +
+                theta[7]*theta[4]
+
+       SIG44 <- theta[6]^2*theta[7]^2*theta[1] + theta[7]^2*theta[4] +
+                theta[8]^2*theta[2] + theta[5] +
+                2*theta[6]*theta[7]*theta[8]*theta[3]
+       covmat <- rbind( c(SIG11,SIG12,SIG13,SIG14),
+                        c(SIG12,SIG22,SIG23,SIG24),
+                        c(SIG13,SIG23,SIG33,SIG34),
+                        c(SIG14,SIG24,SIG34,SIG44)   )
+     pathone1 <-  mvnLL(covmat,datta)
+     pathone1
+     } # End of pathone1
>
> pathone0 <- function(theta,datta) # Reduced (resticted) model with b3=0
+       #             1        2       3      4       5    6  7
+     { # theta = (sigsqx1,sigsqx2,sigma12,sigsqe1,sigsqe2,b1,b2) 7 parameters
+       # Elements of the covariance matrix SIGij are functions of the
+       # parameter vector
+       SIG11 <- theta[1];  SIG12 <- theta[3];  SIG13 <- theta[6]*theta[1]
+       SIG14 <- theta[6]*theta[7]*theta[1]
+
+       SIG22 <- theta[2]; SIG23 <- theta[6]*theta[3]
+       SIG24 <- theta[6]*theta[7]*theta[3]
+
+       SIG33 <- theta[6]^2*theta[1]+theta[4]
+       SIG34 <- theta[6]^2*theta[7]*theta[1]+theta[7]*theta[4]
+
+       SIG44 <- theta[6]^2*theta[7]^2*theta[1] + theta[7]^2*theta[4] +
+                theta[5]
+       covmat <- rbind( c(SIG11,SIG12,SIG13,SIG14),
+                        c(SIG12,SIG22,SIG23,SIG24),
+                        c(SIG13,SIG23,SIG33,SIG34),
+                        c(SIG14,SIG24,SIG34,SIG44)   )
+     pathone0 <-  mvnLL(covmat,datta)
+     pathone0
+     } # End of pathone0
>
> # Get nice starting values. You don't have to extract the numbers from the
> # output like this. You can just look at output with your eyes and type in
> # the numbers you see. Like start1[1] <- 5.18 instead of
> #                           start1[1] <- exmat[1,1].
>   start1 <- numeric(8) ; start0 <- numeric(7)
>       # For manafest exogenous vars, just use sample covariance matrix.
>         exmat <- var(path1data[,1:2]) ; exmat
          x1        x2
x1 5.1786163 0.4464832
x2 0.4464832 9.5407043
>         start1[1] <- exmat[1,1] ; start1[2] <- exmat[2,2]
>                                   start1[3] <- exmat[1,2]
>         start0[1] <- exmat[1,1] ; start0[2] <- exmat[2,2]
>                                   start0[3] <- exmat[1,2]
>       # For b values, use regression. MSE will estimate variance of error term
>       # Regression on y1 is the same for rest and unrest model.
>         reg1 <- lm(y1~-1+x1)  # -1 means no intercept
>         anova(reg1)
Analysis of Variance Table

Response: y1
           Df  Sum Sq Mean Sq F value    Pr(>F)
x1          1 1557.30 1557.30  768.01 < 2.2e-16 ***
Residuals 299  606.28    2.03
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1
>         anova(reg1)[2,3]

2.027694
>         start1[4] <- anova(reg1)[2,3] ; start0[4] <- anova(reg1)[2,3]
>         summary(reg1)

Call:
lm(formula = y1 ~ -1 + x1)

Residuals:
     Min       1Q   Median       3Q      Max
-4.79074 -0.94044  0.01468  0.98386  3.41464

Coefficients:
   Estimate Std. Error t value Pr(>|t|)
x1  1.00020    0.03609   27.71   <2e-16 ***
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1

Residual standard error: 1.424 on 299 degrees of freedom
Multiple R-Squared: 0.7198,     Adjusted R-squared: 0.7188
F-statistic:   768 on 1 and 299 DF,  p-value:     0

>         reg1$coefficients[1]
      x1
1.000205
>         start1[6] <- reg1$coefficients[1] ; start0[6] <- reg1$coefficients[1]
>       # Regression on y2 for unrestricted model
>         reg21 <- lm(y2 ~ -1 + y1 + x2)
>         anova(reg21)
Analysis of Variance Table

Response: y2
           Df  Sum Sq Mean Sq F value    Pr(>F)
y1          1  9000.8  9000.8  2786.4 < 2.2e-16 ***
x2          1 25507.3 25507.3  7896.4 < 2.2e-16 ***
Residuals 298   962.6     3.2
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1
>         start1[5] <- anova(reg21)[3,3]
>         reg21$coefficients
      y1       x2
2.047839 2.983915
>         start1[7] <- reg21$coefficients[1]
>          start1[8] <- reg21$coefficients[2]
>       # Regression on y2 for restricted model
>         reg20 <- lm(y2 ~ -1 + y1)
>         anova(reg20)
Analysis of Variance Table

Response: y2
           Df  Sum Sq Mean Sq F value    Pr(>F)
y1          1  9000.8  9000.8  101.67 < 2.2e-16 ***
Residuals 299 26469.9    88.5
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1
>         start0[5] <- anova(reg20)[2,3]
>         summary(reg20)

Call:
lm(formula = y2 ~ -1 + y1)

Residuals:
     Min       1Q   Median       3Q      Max
-26.0718  -7.5312  -0.8035   5.6530  25.0619

Coefficients:
   Estimate Std. Error t value Pr(>|t|)
y1   2.0396     0.2023   10.08   <2e-16 ***
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1

Residual standard error: 9.409 on 299 degrees of freedom
Multiple R-Squared: 0.2538,     Adjusted R-squared: 0.2513
F-statistic: 101.7 on 1 and 299 DF,  p-value:     0

>         start0[7] <- reg20$coefficients[1]
> # Now look at the starting values
> Start0 <- c(start0,0) # To make length equal to start1
> cbind(truetheta,start1,Start0)
        truetheta    start1     Start0
sigsqx1         5 5.1786163  5.1786163
sigsqx2        10 9.5407043  9.5407043
sigma12         1 0.4464832  0.4464832
sigsqe1         2 2.0276943  2.0276943
sigsqe2         3 3.2302363 88.5279856
b1              1 1.0002050  1.0002050
b2              2 2.0478390  2.0396378
b3              3 2.9839146  0.0000000
>
> unrest <- nlm(pathone1,start1,datta=path1data) ; unrest
$minimum
[1] 5132.435

$estimate
[1] 5.1613452 9.5089096 0.4450373 2.0181513 3.2060569 1.0018946 2.0486456
[8] 2.9828288

$gradient
[1] -1.191198e-04  1.683380e-05  5.320544e-04 -1.153683e-04  9.985541e-05
[6] -4.175764e-04 -8.790195e-05  4.201662e-04

$code
[1] 1

$iterations
[1] 37

> rest   <- nlm(pathone0,start0,datta=path1data) ; rest
$minimum
[1] 6125.473

$estimate
[1]  5.1613436  9.5089194  0.4450127  2.0181505 87.8083356  1.0018943  2.0498683

$gradient
[1] -1.103092e-04  6.379620e-05  2.273737e-04 -1.762071e-04 -3.810607e-05
[6] -9.785816e-04 -1.344364e-04

$code
[1] 1

$iterations
[1] 50

> G <- rest$minimum - unrest$minimum
> pval <- 1-pchisq(G,1)
> G ; pval
[1] 993.038
[1] 0
>
> ################# End of Program path1.R #################
>