# let's use the low birthweight data again.
# Taking a look at the first 3 rows we see:
> HLdata[1:3,]
id low age lwt race smoke ptl ht ui ftv bwt
1 4 1 28 120 3 1 1 0 1 0 709
2 10 1 29 130 1 0 0 0 1 2 1021
3 11 1 34 187 2 1 0 1 0 0 1135
# fit a logit model
> logit1.out <- glm(low~age+lwt+smoke+ht+ui, family=binomial, data=HLdata)
> summary(logit1.out)
Call:
glm(formula = low ~ age + lwt + smoke + ht + ui, family = binomial,
data = HLdata)
Deviance Residuals:
Min 1Q Median 3Q Max
-1.6549 -0.8356 -0.6462 1.1251 1.9925
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 1.399788 1.079770 1.296 0.19485
age -0.034073 0.033662 -1.012 0.31144
lwt -0.015447 0.006581 -2.347 0.01891 *
smoke 0.647539 0.336548 1.924 0.05435 .
ht 1.893272 0.683215 2.771 0.00559 **
ui 0.884606 0.443959 1.993 0.04631 *
---
Signif. codes: 0 `***' 0.001 `**' 0q.01 `*' 0.05 `.' 0.1 ` ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 234.67 on 188 degrees of freedom
Residual deviance: 211.78 on 183 degrees of freedom
AIC: 223.78
Number of Fisher Scoring iterations: 3
# fit another logit model including race
> HLdata$AfrAm <- HLdata$race==2
> HLdata$othrace <- HLdata$race==3
> logit2.out <- glm(low~age+lwt+AfrAm+othrace+smoke+ht+ui,
family=binomial, data=HLdata)
> summary(logit2.out )
Call:
glm(formula = low ~ age + lwt + AfrAm + othrace + smoke + ht +
ui, family = binomial, data = HLdata)
Deviance Residuals:
Min 1Q Median 3Q Max
-1.7323 -0.8329 -0.5345 0.9869 2.1673
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 0.437166 1.189626 0.367 0.71326
age -0.018254 0.035289 -0.517 0.60497
lwt -0.016284 0.006839 -2.381 0.01726 *
AfrAmTRUE 1.280595 0.525871 2.435 0.01488 *
othraceTRUE 0.901839 0.433378 2.081 0.03744 *
smoke 1.027530 0.393022 2.614 0.00894 **
ht 1.857574 0.687974 2.700 0.00693 **
ui 0.895376 0.448023 1.999 0.04566 *
---
Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 234.67 on 188 degrees of freedom
Residual deviance: 203.95 on 181 degrees of freedom
AIC: 219.95
Number of Fisher Scoring iterations: 3
# OK, let's conduct a likelihood ratio test of model 1 vs. model 2
# Here the constrained model is model 1 and the unconstrained model is
# model 2. Since 2 constraints are applied, the test statistic under
# the null follows a chi-square distribution with 2 degrees of freedom
> lr <- deviance(logit1.out) - deviance(logit2.out)
> lr
[1] 7.829775
> 1 - pchisq(lr, 2)
[1] 0.01994279
# The p-value of 0.0199 indicates that there is reason to believe
# (at the 0.02 level) that the constraints implied by model 1 do
# not hold.
# We could also look at BIC to pick models. The AIC() function in R
# will return BIC values if the argument k is set to log(n)
> nrow(HLdata)
[1] 189
> bic1 <- AIC(logit1.out, k=log(189))
> bic2 <- AIC(logit2.out, k=log(189))
> bic1
[1] 243.2283
> bic2
[1] 245.8820
> bic2 - bic1
[1] 2.653719
# This indicates moderate support for model 1 over model 2. Nonetheless,
# given that we have strong reason to believe that race should be in the
# model we may well want to stick with model 2.
# Let's conduct a Wald test of whether the coefficients on smoking and
# hypertension are equal to each other in the second model
> Q <- matrix(c(0,0,0,0,0,1,-1,0), 1, 8)
> beta <- coef(logit2.out)
> r <- 0
> V <- vcov(logit2.out)
> W <- t(Q %*% beta - r) %*% solve(Q %*% V %*% t(Q)) %*% (Q %*% beta - r)
> W
[,1]
[1,] 1.107668
> 1 - pchisq(W, 1)
[,1]
[1,] 0.2925894
# the p-value of 0.293 suggests that there is no reason to believe that
# the null hypothesis (that the coefficients are equal) is not true.