STA313 F 2004 Handout 7
Path Model 1 with R
# path1.R R --vanilla < path1.R
#
# Path Model 1 with R
#
# Simulate some data
B1 <- 1 ; B2 <- 2 ; B3 <- 3
set.seed(9999)
n <- 300 ; e0 <- rnorm(n)
x1 <- rnorm(n,0,2)+e0 ; x2 <- rnorm(n,0,3)+e0
e1 <- rnorm(n,0,sqrt(2)) ; e2 <- rnorm(n,0,sqrt(3))
# Now we know all the true parameter values:
# v(x1)=5 v(x2)=10 c(x1,x2)=1 v(e1)=2 v(e2)=3 and B1 B2 B3 above.
truetheta <- c(5,10,1,2,3,B1,B2,B3)
names(truetheta) <- c("sigsqx1","sigsqx2","sigma12","sigsqe1",
"sigsqe2","b1","b2","b3")
#
# Compute endogenous variables.
y1 <- B1*x1+e1
y2 <- B2*y1+B3*x2+e2
path1data <- cbind(x1,x2,y1,y2) # Data always = manafest variables
# For non-simulated data, path1data <- read.table("path1.dat") or something
#
# Define functions
#
source("mvnLL.R")
pathone1 <- function(theta,datta) # Full (unresticted) model
# 1 2 3 4 5 6 7 8
{ # theta = (sigsqx1,sigsqx2,sigma12,sigsqe1,sigsqe2,b1,b2,b3) 8 parameters
# Elements of the covariance matrix SIGij are functions of the
# parameter vector.
SIG11 <- theta[1]; SIG12 <- theta[3]; SIG13 <- theta[6]*theta[1]
SIG14 <- theta[6]*theta[7]*theta[1]+theta[8]*theta[3]
SIG22 <- theta[2]; SIG23 <- theta[6]*theta[3]
SIG24 <- theta[6]*theta[7]*theta[3]+theta[8]*theta[2]
SIG33 <- theta[6]^2*theta[1]+theta[4]
SIG34 <- theta[6]^2*theta[7]*theta[1]+theta[6]*theta[8]*theta[3] +
theta[7]*theta[4]
SIG44 <- theta[6]^2*theta[7]^2*theta[1] + theta[7]^2*theta[4] +
theta[8]^2*theta[2] + theta[5] +
2*theta[6]*theta[7]*theta[8]*theta[3]
covmat <- rbind( c(SIG11,SIG12,SIG13,SIG14),
c(SIG12,SIG22,SIG23,SIG24),
c(SIG13,SIG23,SIG33,SIG34),
c(SIG14,SIG24,SIG34,SIG44) )
pathone1 <- mvnLL(covmat,datta)
pathone1
} # End of pathone1
pathone0 <- function(theta,datta) # Reduced (resticted) model with b3=0
# 1 2 3 4 5 6 7
{ # theta = (sigsqx1,sigsqx2,sigma12,sigsqe1,sigsqe2,b1,b2) 7 parameters
# Elements of the covariance matrix SIGij are functions of the
# parameter vector
SIG11 <- theta[1]; SIG12 <- theta[3]; SIG13 <- theta[6]*theta[1]
SIG14 <- theta[6]*theta[7]*theta[1]
SIG22 <- theta[2]; SIG23 <- theta[6]*theta[3]
SIG24 <- theta[6]*theta[7]*theta[3]
SIG33 <- theta[6]^2*theta[1]+theta[4]
SIG34 <- theta[6]^2*theta[7]*theta[1]+theta[7]*theta[4]
SIG44 <- theta[6]^2*theta[7]^2*theta[1] + theta[7]^2*theta[4] +
theta[5]
covmat <- rbind( c(SIG11,SIG12,SIG13,SIG14),
c(SIG12,SIG22,SIG23,SIG24),
c(SIG13,SIG23,SIG33,SIG34),
c(SIG14,SIG24,SIG34,SIG44) )
pathone0 <- mvnLL(covmat,datta)
pathone0
} # End of pathone0
# Get nice starting values. You don't have to extract the numbers from the
# output like this. You can just look at output with your eyes and type in
# the numbers you see. Like start1[1] <- 5.18 instead of
# start1[1] <- exmat[1,1].
start1 <- numeric(8) ; start0 <- numeric(7)
# For manafest exogenous vars, just use sample covariance matrix.
exmat <- var(path1data[,1:2]) ; exmat
start1[1] <- exmat[1,1] ; start1[2] <- exmat[2,2]
start1[3] <- exmat[1,2]
start0[1] <- exmat[1,1] ; start0[2] <- exmat[2,2]
start0[3] <- exmat[1,2]
# For b values, use regression. MSE will estimate variance of error term
# Regression on y1 is the same for rest and unrest model.
reg1 <- lm(y1~-1+x1) # -1 means no intercept
anova(reg1)
anova(reg1)[2,3]
start1[4] <- anova(reg1)[2,3] ; start0[4] <- anova(reg1)[2,3]
summary(reg1)
reg1$coefficients[1]
start1[6] <- reg1$coefficients[1] ; start0[6] <- reg1$coefficients[1]
# Regression on y2 for unrestricted model
reg21 <- lm(y2 ~ -1 + y1 + x2)
anova(reg21)
start1[5] <- anova(reg21)[3,3]
reg21$coefficients
start1[7] <- reg21$coefficients[1]
start1[8] <- reg21$coefficients[2]
# Regression on y2 for restricted model
reg20 <- lm(y2 ~ -1 + y1)
anova(reg20)
start0[5] <- anova(reg20)[2,3]
summary(reg20)
start0[7] <- reg20$coefficients[1]
# Now look at the starting values
Start0 <- c(start0,0) # To make length equal to start1
cbind(truetheta,start1,Start0)
unrest <- nlm(pathone1,start1,datta=path1data) ; unrest
rest <- nlm(pathone0,start0,datta=path1data) ; rest
G <- rest$minimum - unrest$minimum
pval <- 1-pchisq(G,1)
G ; pval
################# End of Program path1.R #################
Here is the output.
R : Copyright 2001, The R Development Core Team
Version 1.4.0 (2001-12-19)
R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type `license()' or `licence()' for distribution details.
R is a collaborative project with many contributors.
Type `contributors()' for more information.
Type `demo()' for some demos, `help()' for on-line help, or
`help.start()' for a HTML browser interface to help.
Type `q()' to quit R.
> # path1.R R --vanilla < path1.R
> #
> # Path Model 1 with R
> #
> # Simulate some data
> B1 <- 1 ; B2 <- 2 ; B3 <- 3
> set.seed(9999)
> n <- 300 ; e0 <- rnorm(n)
> x1 <- rnorm(n,0,2)+e0 ; x2 <- rnorm(n,0,3)+e0
> e1 <- rnorm(n,0,sqrt(2)) ; e2 <- rnorm(n,0,sqrt(3))
> # Now we know all the true parameter values:
> # v(x1)=5 v(x2)=10 c(x1,x2)=1 v(e1)=2 v(e2)=3 and B1 B2 B3 above.
> truetheta <- c(5,10,1,2,3,B1,B2,B3)
> names(truetheta) <- c("sigsqx1","sigsqx2","sigma12","sigsqe1",
+ "sigsqe2","b1","b2","b3")
> #
> # Compute endogenous variables.
> y1 <- B1*x1+e1
> y2 <- B2*y1+B3*x2+e2
> path1data <- cbind(x1,x2,y1,y2) # Data always = manafest variables
> # For non-simulated data, path1data <- read.table("path1.dat") or something
> #
> # Define functions
> #
> source("mvnLL.R")
> pathone1 <- function(theta,datta) # Full (unresticted) model
+ # 1 2 3 4 5 6 7 8
+ { # theta = (sigsqx1,sigsqx2,sigma12,sigsqe1,sigsqe2,b1,b2,b3) 8 parameters
+ # Elements of the covariance matrix SIGij are functions of the
+ # parameter vector.
+ SIG11 <- theta[1]; SIG12 <- theta[3]; SIG13 <- theta[6]*theta[1]
+ SIG14 <- theta[6]*theta[7]*theta[1]+theta[8]*theta[3]
+
+ SIG22 <- theta[2]; SIG23 <- theta[6]*theta[3]
+ SIG24 <- theta[6]*theta[7]*theta[3]+theta[8]*theta[2]
+
+ SIG33 <- theta[6]^2*theta[1]+theta[4]
+ SIG34 <- theta[6]^2*theta[7]*theta[1]+theta[6]*theta[8]*theta[3] +
+ theta[7]*theta[4]
+
+ SIG44 <- theta[6]^2*theta[7]^2*theta[1] + theta[7]^2*theta[4] +
+ theta[8]^2*theta[2] + theta[5] +
+ 2*theta[6]*theta[7]*theta[8]*theta[3]
+ covmat <- rbind( c(SIG11,SIG12,SIG13,SIG14),
+ c(SIG12,SIG22,SIG23,SIG24),
+ c(SIG13,SIG23,SIG33,SIG34),
+ c(SIG14,SIG24,SIG34,SIG44) )
+ pathone1 <- mvnLL(covmat,datta)
+ pathone1
+ } # End of pathone1
>
> pathone0 <- function(theta,datta) # Reduced (resticted) model with b3=0
+ # 1 2 3 4 5 6 7
+ { # theta = (sigsqx1,sigsqx2,sigma12,sigsqe1,sigsqe2,b1,b2) 7 parameters
+ # Elements of the covariance matrix SIGij are functions of the
+ # parameter vector
+ SIG11 <- theta[1]; SIG12 <- theta[3]; SIG13 <- theta[6]*theta[1]
+ SIG14 <- theta[6]*theta[7]*theta[1]
+
+ SIG22 <- theta[2]; SIG23 <- theta[6]*theta[3]
+ SIG24 <- theta[6]*theta[7]*theta[3]
+
+ SIG33 <- theta[6]^2*theta[1]+theta[4]
+ SIG34 <- theta[6]^2*theta[7]*theta[1]+theta[7]*theta[4]
+
+ SIG44 <- theta[6]^2*theta[7]^2*theta[1] + theta[7]^2*theta[4] +
+ theta[5]
+ covmat <- rbind( c(SIG11,SIG12,SIG13,SIG14),
+ c(SIG12,SIG22,SIG23,SIG24),
+ c(SIG13,SIG23,SIG33,SIG34),
+ c(SIG14,SIG24,SIG34,SIG44) )
+ pathone0 <- mvnLL(covmat,datta)
+ pathone0
+ } # End of pathone0
>
> # Get nice starting values. You don't have to extract the numbers from the
> # output like this. You can just look at output with your eyes and type in
> # the numbers you see. Like start1[1] <- 5.18 instead of
> # start1[1] <- exmat[1,1].
> start1 <- numeric(8) ; start0 <- numeric(7)
> # For manafest exogenous vars, just use sample covariance matrix.
> exmat <- var(path1data[,1:2]) ; exmat
x1 x2
x1 5.1786163 0.4464832
x2 0.4464832 9.5407043
> start1[1] <- exmat[1,1] ; start1[2] <- exmat[2,2]
> start1[3] <- exmat[1,2]
> start0[1] <- exmat[1,1] ; start0[2] <- exmat[2,2]
> start0[3] <- exmat[1,2]
> # For b values, use regression. MSE will estimate variance of error term
> # Regression on y1 is the same for rest and unrest model.
> reg1 <- lm(y1~-1+x1) # -1 means no intercept
> anova(reg1)
Analysis of Variance Table
Response: y1
Df Sum Sq Mean Sq F value Pr(>F)
x1 1 1557.30 1557.30 768.01 < 2.2e-16 ***
Residuals 299 606.28 2.03
---
Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1
> anova(reg1)[2,3]
2.027694
> start1[4] <- anova(reg1)[2,3] ; start0[4] <- anova(reg1)[2,3]
> summary(reg1)
Call:
lm(formula = y1 ~ -1 + x1)
Residuals:
Min 1Q Median 3Q Max
-4.79074 -0.94044 0.01468 0.98386 3.41464
Coefficients:
Estimate Std. Error t value Pr(>|t|)
x1 1.00020 0.03609 27.71 <2e-16 ***
---
Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1
Residual standard error: 1.424 on 299 degrees of freedom
Multiple R-Squared: 0.7198, Adjusted R-squared: 0.7188
F-statistic: 768 on 1 and 299 DF, p-value: 0
> reg1$coefficients[1]
x1
1.000205
> start1[6] <- reg1$coefficients[1] ; start0[6] <- reg1$coefficients[1]
> # Regression on y2 for unrestricted model
> reg21 <- lm(y2 ~ -1 + y1 + x2)
> anova(reg21)
Analysis of Variance Table
Response: y2
Df Sum Sq Mean Sq F value Pr(>F)
y1 1 9000.8 9000.8 2786.4 < 2.2e-16 ***
x2 1 25507.3 25507.3 7896.4 < 2.2e-16 ***
Residuals 298 962.6 3.2
---
Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1
> start1[5] <- anova(reg21)[3,3]
> reg21$coefficients
y1 x2
2.047839 2.983915
> start1[7] <- reg21$coefficients[1]
> start1[8] <- reg21$coefficients[2]
> # Regression on y2 for restricted model
> reg20 <- lm(y2 ~ -1 + y1)
> anova(reg20)
Analysis of Variance Table
Response: y2
Df Sum Sq Mean Sq F value Pr(>F)
y1 1 9000.8 9000.8 101.67 < 2.2e-16 ***
Residuals 299 26469.9 88.5
---
Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1
> start0[5] <- anova(reg20)[2,3]
> summary(reg20)
Call:
lm(formula = y2 ~ -1 + y1)
Residuals:
Min 1Q Median 3Q Max
-26.0718 -7.5312 -0.8035 5.6530 25.0619
Coefficients:
Estimate Std. Error t value Pr(>|t|)
y1 2.0396 0.2023 10.08 <2e-16 ***
---
Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1
Residual standard error: 9.409 on 299 degrees of freedom
Multiple R-Squared: 0.2538, Adjusted R-squared: 0.2513
F-statistic: 101.7 on 1 and 299 DF, p-value: 0
> start0[7] <- reg20$coefficients[1]
> # Now look at the starting values
> Start0 <- c(start0,0) # To make length equal to start1
> cbind(truetheta,start1,Start0)
truetheta start1 Start0
sigsqx1 5 5.1786163 5.1786163
sigsqx2 10 9.5407043 9.5407043
sigma12 1 0.4464832 0.4464832
sigsqe1 2 2.0276943 2.0276943
sigsqe2 3 3.2302363 88.5279856
b1 1 1.0002050 1.0002050
b2 2 2.0478390 2.0396378
b3 3 2.9839146 0.0000000
>
> unrest <- nlm(pathone1,start1,datta=path1data) ; unrest
$minimum
[1] 5132.435
$estimate
[1] 5.1613452 9.5089096 0.4450373 2.0181513 3.2060569 1.0018946 2.0486456
[8] 2.9828288
$gradient
[1] -1.191198e-04 1.683380e-05 5.320544e-04 -1.153683e-04 9.985541e-05
[6] -4.175764e-04 -8.790195e-05 4.201662e-04
$code
[1] 1
$iterations
[1] 37
> rest <- nlm(pathone0,start0,datta=path1data) ; rest
$minimum
[1] 6125.473
$estimate
[1] 5.1613436 9.5089194 0.4450127 2.0181505 87.8083356 1.0018943 2.0498683
$gradient
[1] -1.103092e-04 6.379620e-05 2.273737e-04 -1.762071e-04 -3.810607e-05
[6] -9.785816e-04 -1.344364e-04
$code
[1] 1
$iterations
[1] 50
> G <- rest$minimum - unrest$minimum
> pval <- 1-pchisq(G,1)
> G ; pval
[1] 993.038
[1] 0
>
> ################# End of Program path1.R #################
>