STA313 F 2004 Handout 7
Path Model 1 with R
# path1.R R --vanilla < path1.R # # Path Model 1 with R # # Simulate some data B1 <- 1 ; B2 <- 2 ; B3 <- 3 set.seed(9999) n <- 300 ; e0 <- rnorm(n) x1 <- rnorm(n,0,2)+e0 ; x2 <- rnorm(n,0,3)+e0 e1 <- rnorm(n,0,sqrt(2)) ; e2 <- rnorm(n,0,sqrt(3)) # Now we know all the true parameter values: # v(x1)=5 v(x2)=10 c(x1,x2)=1 v(e1)=2 v(e2)=3 and B1 B2 B3 above. truetheta <- c(5,10,1,2,3,B1,B2,B3) names(truetheta) <- c("sigsqx1","sigsqx2","sigma12","sigsqe1", "sigsqe2","b1","b2","b3") # # Compute endogenous variables. y1 <- B1*x1+e1 y2 <- B2*y1+B3*x2+e2 path1data <- cbind(x1,x2,y1,y2) # Data always = manafest variables # For non-simulated data, path1data <- read.table("path1.dat") or something # # Define functions # source("mvnLL.R") pathone1 <- function(theta,datta) # Full (unresticted) model # 1 2 3 4 5 6 7 8 { # theta = (sigsqx1,sigsqx2,sigma12,sigsqe1,sigsqe2,b1,b2,b3) 8 parameters # Elements of the covariance matrix SIGij are functions of the # parameter vector. SIG11 <- theta[1]; SIG12 <- theta[3]; SIG13 <- theta[6]*theta[1] SIG14 <- theta[6]*theta[7]*theta[1]+theta[8]*theta[3] SIG22 <- theta[2]; SIG23 <- theta[6]*theta[3] SIG24 <- theta[6]*theta[7]*theta[3]+theta[8]*theta[2] SIG33 <- theta[6]^2*theta[1]+theta[4] SIG34 <- theta[6]^2*theta[7]*theta[1]+theta[6]*theta[8]*theta[3] + theta[7]*theta[4] SIG44 <- theta[6]^2*theta[7]^2*theta[1] + theta[7]^2*theta[4] + theta[8]^2*theta[2] + theta[5] + 2*theta[6]*theta[7]*theta[8]*theta[3] covmat <- rbind( c(SIG11,SIG12,SIG13,SIG14), c(SIG12,SIG22,SIG23,SIG24), c(SIG13,SIG23,SIG33,SIG34), c(SIG14,SIG24,SIG34,SIG44) ) pathone1 <- mvnLL(covmat,datta) pathone1 } # End of pathone1 pathone0 <- function(theta,datta) # Reduced (resticted) model with b3=0 # 1 2 3 4 5 6 7 { # theta = (sigsqx1,sigsqx2,sigma12,sigsqe1,sigsqe2,b1,b2) 7 parameters # Elements of the covariance matrix SIGij are functions of the # parameter vector SIG11 <- theta[1]; SIG12 <- theta[3]; SIG13 <- theta[6]*theta[1] SIG14 <- theta[6]*theta[7]*theta[1] SIG22 <- theta[2]; SIG23 <- theta[6]*theta[3] SIG24 <- theta[6]*theta[7]*theta[3] SIG33 <- theta[6]^2*theta[1]+theta[4] SIG34 <- theta[6]^2*theta[7]*theta[1]+theta[7]*theta[4] SIG44 <- theta[6]^2*theta[7]^2*theta[1] + theta[7]^2*theta[4] + theta[5] covmat <- rbind( c(SIG11,SIG12,SIG13,SIG14), c(SIG12,SIG22,SIG23,SIG24), c(SIG13,SIG23,SIG33,SIG34), c(SIG14,SIG24,SIG34,SIG44) ) pathone0 <- mvnLL(covmat,datta) pathone0 } # End of pathone0 # Get nice starting values. You don't have to extract the numbers from the # output like this. You can just look at output with your eyes and type in # the numbers you see. Like start1[1] <- 5.18 instead of # start1[1] <- exmat[1,1]. start1 <- numeric(8) ; start0 <- numeric(7) # For manafest exogenous vars, just use sample covariance matrix. exmat <- var(path1data[,1:2]) ; exmat start1[1] <- exmat[1,1] ; start1[2] <- exmat[2,2] start1[3] <- exmat[1,2] start0[1] <- exmat[1,1] ; start0[2] <- exmat[2,2] start0[3] <- exmat[1,2] # For b values, use regression. MSE will estimate variance of error term # Regression on y1 is the same for rest and unrest model. reg1 <- lm(y1~-1+x1) # -1 means no intercept anova(reg1) anova(reg1)[2,3] start1[4] <- anova(reg1)[2,3] ; start0[4] <- anova(reg1)[2,3] summary(reg1) reg1$coefficients[1] start1[6] <- reg1$coefficients[1] ; start0[6] <- reg1$coefficients[1] # Regression on y2 for unrestricted model reg21 <- lm(y2 ~ -1 + y1 + x2) anova(reg21) start1[5] <- anova(reg21)[3,3] reg21$coefficients start1[7] <- reg21$coefficients[1] start1[8] <- reg21$coefficients[2] # Regression on y2 for restricted model reg20 <- lm(y2 ~ -1 + y1) anova(reg20) start0[5] <- anova(reg20)[2,3] summary(reg20) start0[7] <- reg20$coefficients[1] # Now look at the starting values Start0 <- c(start0,0) # To make length equal to start1 cbind(truetheta,start1,Start0) unrest <- nlm(pathone1,start1,datta=path1data) ; unrest rest <- nlm(pathone0,start0,datta=path1data) ; rest G <- rest$minimum - unrest$minimum pval <- 1-pchisq(G,1) G ; pval ################# End of Program path1.R #################
Here is the output.
R : Copyright 2001, The R Development Core Team Version 1.4.0 (2001-12-19) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type `license()' or `licence()' for distribution details. R is a collaborative project with many contributors. Type `contributors()' for more information. Type `demo()' for some demos, `help()' for on-line help, or `help.start()' for a HTML browser interface to help. Type `q()' to quit R. > # path1.R R --vanilla < path1.R > # > # Path Model 1 with R > # > # Simulate some data > B1 <- 1 ; B2 <- 2 ; B3 <- 3 > set.seed(9999) > n <- 300 ; e0 <- rnorm(n) > x1 <- rnorm(n,0,2)+e0 ; x2 <- rnorm(n,0,3)+e0 > e1 <- rnorm(n,0,sqrt(2)) ; e2 <- rnorm(n,0,sqrt(3)) > # Now we know all the true parameter values: > # v(x1)=5 v(x2)=10 c(x1,x2)=1 v(e1)=2 v(e2)=3 and B1 B2 B3 above. > truetheta <- c(5,10,1,2,3,B1,B2,B3) > names(truetheta) <- c("sigsqx1","sigsqx2","sigma12","sigsqe1", + "sigsqe2","b1","b2","b3") > # > # Compute endogenous variables. > y1 <- B1*x1+e1 > y2 <- B2*y1+B3*x2+e2 > path1data <- cbind(x1,x2,y1,y2) # Data always = manafest variables > # For non-simulated data, path1data <- read.table("path1.dat") or something > # > # Define functions > # > source("mvnLL.R") > pathone1 <- function(theta,datta) # Full (unresticted) model + # 1 2 3 4 5 6 7 8 + { # theta = (sigsqx1,sigsqx2,sigma12,sigsqe1,sigsqe2,b1,b2,b3) 8 parameters + # Elements of the covariance matrix SIGij are functions of the + # parameter vector. + SIG11 <- theta[1]; SIG12 <- theta[3]; SIG13 <- theta[6]*theta[1] + SIG14 <- theta[6]*theta[7]*theta[1]+theta[8]*theta[3] + + SIG22 <- theta[2]; SIG23 <- theta[6]*theta[3] + SIG24 <- theta[6]*theta[7]*theta[3]+theta[8]*theta[2] + + SIG33 <- theta[6]^2*theta[1]+theta[4] + SIG34 <- theta[6]^2*theta[7]*theta[1]+theta[6]*theta[8]*theta[3] + + theta[7]*theta[4] + + SIG44 <- theta[6]^2*theta[7]^2*theta[1] + theta[7]^2*theta[4] + + theta[8]^2*theta[2] + theta[5] + + 2*theta[6]*theta[7]*theta[8]*theta[3] + covmat <- rbind( c(SIG11,SIG12,SIG13,SIG14), + c(SIG12,SIG22,SIG23,SIG24), + c(SIG13,SIG23,SIG33,SIG34), + c(SIG14,SIG24,SIG34,SIG44) ) + pathone1 <- mvnLL(covmat,datta) + pathone1 + } # End of pathone1 > > pathone0 <- function(theta,datta) # Reduced (resticted) model with b3=0 + # 1 2 3 4 5 6 7 + { # theta = (sigsqx1,sigsqx2,sigma12,sigsqe1,sigsqe2,b1,b2) 7 parameters + # Elements of the covariance matrix SIGij are functions of the + # parameter vector + SIG11 <- theta[1]; SIG12 <- theta[3]; SIG13 <- theta[6]*theta[1] + SIG14 <- theta[6]*theta[7]*theta[1] + + SIG22 <- theta[2]; SIG23 <- theta[6]*theta[3] + SIG24 <- theta[6]*theta[7]*theta[3] + + SIG33 <- theta[6]^2*theta[1]+theta[4] + SIG34 <- theta[6]^2*theta[7]*theta[1]+theta[7]*theta[4] + + SIG44 <- theta[6]^2*theta[7]^2*theta[1] + theta[7]^2*theta[4] + + theta[5] + covmat <- rbind( c(SIG11,SIG12,SIG13,SIG14), + c(SIG12,SIG22,SIG23,SIG24), + c(SIG13,SIG23,SIG33,SIG34), + c(SIG14,SIG24,SIG34,SIG44) ) + pathone0 <- mvnLL(covmat,datta) + pathone0 + } # End of pathone0 > > # Get nice starting values. You don't have to extract the numbers from the > # output like this. You can just look at output with your eyes and type in > # the numbers you see. Like start1[1] <- 5.18 instead of > # start1[1] <- exmat[1,1]. > start1 <- numeric(8) ; start0 <- numeric(7) > # For manafest exogenous vars, just use sample covariance matrix. > exmat <- var(path1data[,1:2]) ; exmat x1 x2 x1 5.1786163 0.4464832 x2 0.4464832 9.5407043 > start1[1] <- exmat[1,1] ; start1[2] <- exmat[2,2] > start1[3] <- exmat[1,2] > start0[1] <- exmat[1,1] ; start0[2] <- exmat[2,2] > start0[3] <- exmat[1,2] > # For b values, use regression. MSE will estimate variance of error term > # Regression on y1 is the same for rest and unrest model. > reg1 <- lm(y1~-1+x1) # -1 means no intercept > anova(reg1) Analysis of Variance Table Response: y1 Df Sum Sq Mean Sq F value Pr(>F) x1 1 1557.30 1557.30 768.01 < 2.2e-16 *** Residuals 299 606.28 2.03 --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 > anova(reg1)[2,3] 2.027694 > start1[4] <- anova(reg1)[2,3] ; start0[4] <- anova(reg1)[2,3] > summary(reg1) Call: lm(formula = y1 ~ -1 + x1) Residuals: Min 1Q Median 3Q Max -4.79074 -0.94044 0.01468 0.98386 3.41464 Coefficients: Estimate Std. Error t value Pr(>|t|) x1 1.00020 0.03609 27.71 <2e-16 *** --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 Residual standard error: 1.424 on 299 degrees of freedom Multiple R-Squared: 0.7198, Adjusted R-squared: 0.7188 F-statistic: 768 on 1 and 299 DF, p-value: 0 > reg1$coefficients[1] x1 1.000205 > start1[6] <- reg1$coefficients[1] ; start0[6] <- reg1$coefficients[1] > # Regression on y2 for unrestricted model > reg21 <- lm(y2 ~ -1 + y1 + x2) > anova(reg21) Analysis of Variance Table Response: y2 Df Sum Sq Mean Sq F value Pr(>F) y1 1 9000.8 9000.8 2786.4 < 2.2e-16 *** x2 1 25507.3 25507.3 7896.4 < 2.2e-16 *** Residuals 298 962.6 3.2 --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 > start1[5] <- anova(reg21)[3,3] > reg21$coefficients y1 x2 2.047839 2.983915 > start1[7] <- reg21$coefficients[1] > start1[8] <- reg21$coefficients[2] > # Regression on y2 for restricted model > reg20 <- lm(y2 ~ -1 + y1) > anova(reg20) Analysis of Variance Table Response: y2 Df Sum Sq Mean Sq F value Pr(>F) y1 1 9000.8 9000.8 101.67 < 2.2e-16 *** Residuals 299 26469.9 88.5 --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 > start0[5] <- anova(reg20)[2,3] > summary(reg20) Call: lm(formula = y2 ~ -1 + y1) Residuals: Min 1Q Median 3Q Max -26.0718 -7.5312 -0.8035 5.6530 25.0619 Coefficients: Estimate Std. Error t value Pr(>|t|) y1 2.0396 0.2023 10.08 <2e-16 *** --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 Residual standard error: 9.409 on 299 degrees of freedom Multiple R-Squared: 0.2538, Adjusted R-squared: 0.2513 F-statistic: 101.7 on 1 and 299 DF, p-value: 0 > start0[7] <- reg20$coefficients[1] > # Now look at the starting values > Start0 <- c(start0,0) # To make length equal to start1 > cbind(truetheta,start1,Start0) truetheta start1 Start0 sigsqx1 5 5.1786163 5.1786163 sigsqx2 10 9.5407043 9.5407043 sigma12 1 0.4464832 0.4464832 sigsqe1 2 2.0276943 2.0276943 sigsqe2 3 3.2302363 88.5279856 b1 1 1.0002050 1.0002050 b2 2 2.0478390 2.0396378 b3 3 2.9839146 0.0000000 > > unrest <- nlm(pathone1,start1,datta=path1data) ; unrest $minimum [1] 5132.435 $estimate [1] 5.1613452 9.5089096 0.4450373 2.0181513 3.2060569 1.0018946 2.0486456 [8] 2.9828288 $gradient [1] -1.191198e-04 1.683380e-05 5.320544e-04 -1.153683e-04 9.985541e-05 [6] -4.175764e-04 -8.790195e-05 4.201662e-04 $code [1] 1 $iterations [1] 37 > rest <- nlm(pathone0,start0,datta=path1data) ; rest $minimum [1] 6125.473 $estimate [1] 5.1613436 9.5089194 0.4450127 2.0181505 87.8083356 1.0018943 2.0498683 $gradient [1] -1.103092e-04 6.379620e-05 2.273737e-04 -1.762071e-04 -3.810607e-05 [6] -9.785816e-04 -1.344364e-04 $code [1] 1 $iterations [1] 50 > G <- rest$minimum - unrest$minimum > pval <- 1-pchisq(G,1) > G ; pval [1] 993.038 [1] 0 > > ################# End of Program path1.R ################# >