data(wcgs, package = "faraway")
head(wcgs) #not run: str(wcgs); plot(wcgs); help(wcgs)
## age height weight sdp dbp chol behave cigs dibep chd typechd timechd
## 2001 49 73 150 110 76 225 A2 25 B no none 1664
## 2002 42 70 160 154 84 177 A2 20 B no none 3071
## 2003 42 69 160 110 78 181 B3 0 A no none 3071
## 2004 41 68 152 124 78 132 B4 20 A no none 3064
## 2005 59 70 150 144 86 255 B3 20 A yes infdeath 1885
## 2006 44 72 204 150 90 182 B4 0 A no none 3102
## arcus
## 2001 absent
## 2002 present
## 2003 absent
## 2004 absent
## 2005 present
## 2006 absent
sum(is.na(wcgs)) # there are a few missing values; much further sleuthing finds arcus and chol have some missing values
## [1] 14
Fitting a binary model:
glm(chd ~ ., family = binomial, data = wcgs)
## Warning: glm.fit: algorithm did not converge
##
## Call: glm(formula = chd ~ ., family = binomial, data = wcgs)
##
## Coefficients:
## (Intercept) age height weight
## 2.657e+01 -1.582e-14 -1.066e-13 1.016e-14
## sdp dbp chol behaveA2
## 1.967e-14 -2.099e-14 8.989e-16 -4.352e-13
## behaveB3 behaveB4 cigs dibepB
## -1.818e-13 -1.213e-13 -6.687e-15 NA
## typechdinfdeath typechdnone typechdsilent timechd
## 1.358e-08 -5.313e+01 -1.802e-07 9.534e-16
## arcuspresent
## 3.478e-13
##
## Degrees of Freedom: 3139 Total (i.e. Null); 3124 Residual
## (14 observations deleted due to missingness)
## Null Deviance: 1769
## Residual Deviance: 1.822e-08 AIC: 32
## let's check personality type, along with a few other variables
heartmod <- glm(chd ~ age + weight + sdp + dbp + behave + cigs, family = binomial, data = wcgs)
summary(heartmod)
##
## Call:
## glm(formula = chd ~ age + weight + sdp + dbp + behave + cigs,
## family = binomial, data = wcgs)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.1892 -0.4445 -0.3357 -0.2514 2.7338
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -9.642543 0.911219 -10.582 < 2e-16 ***
## age 0.065466 0.011887 5.508 3.64e-08 ***
## weight 0.008492 0.003144 2.701 0.00692 **
## sdp 0.018970 0.006238 3.041 0.00236 **
## dbp 0.001236 0.010371 0.119 0.90515
## behaveA2 0.040297 0.219097 0.184 0.85407
## behaveB3 -0.666342 0.241078 -2.764 0.00571 **
## behaveB4 -0.575313 0.317293 -1.813 0.06980 .
## cigs 0.022630 0.004149 5.455 4.90e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1781.2 on 3153 degrees of freedom
## Residual deviance: 1638.4 on 3145 degrees of freedom
## AIC: 1656.4
##
## Number of Fisher Scoring iterations: 6
heartmod2 <- update(heartmod, . ~ . - behave)
summary(heartmod2)
##
## Call:
## glm(formula = chd ~ age + weight + sdp + dbp + cigs, family = binomial,
## data = wcgs)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.1267 -0.4392 -0.3455 -0.2683 2.8030
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -10.356451 0.873676 -11.854 < 2e-16 ***
## age 0.069985 0.011805 5.928 3.06e-09 ***
## weight 0.008904 0.003125 2.849 0.00438 **
## sdp 0.019470 0.006177 3.152 0.00162 **
## dbp 0.002298 0.010302 0.223 0.82352
## cigs 0.024870 0.004124 6.030 1.64e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1781.2 on 3153 degrees of freedom
## Residual deviance: 1662.1 on 3148 degrees of freedom
## AIC: 1674.1
##
## Number of Fisher Scoring iterations: 5
anova(heartmod, heartmod2)
## Analysis of Deviance Table
##
## Model 1: chd ~ age + weight + sdp + dbp + behave + cigs
## Model 2: chd ~ age + weight + sdp + dbp + cigs
## Resid. Df Resid. Dev Df Deviance
## 1 3145 1638.4
## 2 3148 1662.1 -3 -23.698