Binary data

data(wcgs, package = "faraway")
head(wcgs) #not run: str(wcgs); plot(wcgs); help(wcgs)
##      age height weight sdp dbp chol behave cigs dibep chd  typechd timechd
## 2001  49     73    150 110  76  225     A2   25     B  no     none    1664
## 2002  42     70    160 154  84  177     A2   20     B  no     none    3071
## 2003  42     69    160 110  78  181     B3    0     A  no     none    3071
## 2004  41     68    152 124  78  132     B4   20     A  no     none    3064
## 2005  59     70    150 144  86  255     B3   20     A yes infdeath    1885
## 2006  44     72    204 150  90  182     B4    0     A  no     none    3102
##        arcus
## 2001  absent
## 2002 present
## 2003  absent
## 2004  absent
## 2005 present
## 2006  absent
sum(is.na(wcgs)) # there are a few missing values; much further sleuthing finds arcus and chol have some missing values
## [1] 14

Fitting a binary model:

glm(chd ~ ., family = binomial, data = wcgs)
## Warning: glm.fit: algorithm did not converge
## 
## Call:  glm(formula = chd ~ ., family = binomial, data = wcgs)
## 
## Coefficients:
##     (Intercept)              age           height           weight  
##       2.657e+01       -1.582e-14       -1.066e-13        1.016e-14  
##             sdp              dbp             chol         behaveA2  
##       1.967e-14       -2.099e-14        8.989e-16       -4.352e-13  
##        behaveB3         behaveB4             cigs           dibepB  
##      -1.818e-13       -1.213e-13       -6.687e-15               NA  
## typechdinfdeath      typechdnone    typechdsilent          timechd  
##       1.358e-08       -5.313e+01       -1.802e-07        9.534e-16  
##    arcuspresent  
##       3.478e-13  
## 
## Degrees of Freedom: 3139 Total (i.e. Null);  3124 Residual
##   (14 observations deleted due to missingness)
## Null Deviance:       1769 
## Residual Deviance: 1.822e-08     AIC: 32
## let's check personality type, along with a few other variables
heartmod <- glm(chd ~ age + weight + sdp + dbp + behave + cigs, family = binomial, data = wcgs)
summary(heartmod)
## 
## Call:
## glm(formula = chd ~ age + weight + sdp + dbp + behave + cigs, 
##     family = binomial, data = wcgs)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.1892  -0.4445  -0.3357  -0.2514   2.7338  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -9.642543   0.911219 -10.582  < 2e-16 ***
## age          0.065466   0.011887   5.508 3.64e-08 ***
## weight       0.008492   0.003144   2.701  0.00692 ** 
## sdp          0.018970   0.006238   3.041  0.00236 ** 
## dbp          0.001236   0.010371   0.119  0.90515    
## behaveA2     0.040297   0.219097   0.184  0.85407    
## behaveB3    -0.666342   0.241078  -2.764  0.00571 ** 
## behaveB4    -0.575313   0.317293  -1.813  0.06980 .  
## cigs         0.022630   0.004149   5.455 4.90e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1781.2  on 3153  degrees of freedom
## Residual deviance: 1638.4  on 3145  degrees of freedom
## AIC: 1656.4
## 
## Number of Fisher Scoring iterations: 6
heartmod2 <- update(heartmod, . ~ . - behave)
summary(heartmod2)
## 
## Call:
## glm(formula = chd ~ age + weight + sdp + dbp + cigs, family = binomial, 
##     data = wcgs)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.1267  -0.4392  -0.3455  -0.2683   2.8030  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -10.356451   0.873676 -11.854  < 2e-16 ***
## age           0.069985   0.011805   5.928 3.06e-09 ***
## weight        0.008904   0.003125   2.849  0.00438 ** 
## sdp           0.019470   0.006177   3.152  0.00162 ** 
## dbp           0.002298   0.010302   0.223  0.82352    
## cigs          0.024870   0.004124   6.030 1.64e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1781.2  on 3153  degrees of freedom
## Residual deviance: 1662.1  on 3148  degrees of freedom
## AIC: 1674.1
## 
## Number of Fisher Scoring iterations: 5
anova(heartmod, heartmod2)
## Analysis of Deviance Table
## 
## Model 1: chd ~ age + weight + sdp + dbp + behave + cigs
## Model 2: chd ~ age + weight + sdp + dbp + cigs
##   Resid. Df Resid. Dev Df Deviance
## 1      3145     1638.4            
## 2      3148     1662.1 -3  -23.698