##--------------------------------------------------------------## ## Script for Lecture 5: ## ## Generalized Linear Models and ## ## Mixed-Effects Models in R ## ## John Fox ## ## Introduction to the R Statistical Computing Environment ## ## ICPSR ## ## 2021 ## ##--------------------------------------------------------------## # Generalized linear models # binary logit model library("car") library("effects") ?Cowles mod.cowles <- glm(volunteer ~ sex + extraversion*neuroticism, family=binomial, data=Cowles) S(mod.cowles) Anova(mod.cowles) plot(predictorEffects(mod.cowles)) plot(Effect(c("extraversion", "neuroticism"), mod.cowles)) plot(Effect(c("extraversion", "neuroticism"), mod.cowles), lines=list(multiline=TRUE)) # functions like anova(), linearHypothesis(), and deltaMethod() also work # Poisson and quasi-Poisson regression some(Ornstein) nrow(Ornstein) ?Ornstein (tab <- xtabs(~interlocks, data=Ornstein)) x <- as.numeric(names(tab)) # the names are the distinct values of interlocks plot(x, as.vector(tab), type="h", xlab="Number of Interlocks", ylab="Frequency") points(x, tab, pch=16) mod.ornstein <- glm(interlocks ~ log2(assets) + nation + sector, family=poisson, data=Ornstein) S(mod.ornstein) Anova(mod.ornstein) plot(predictorEffects(mod.ornstein)) # quasi-Poisson model, allowing for overdispersion mod.ornstein.q <- update(mod.ornstein, family=quasipoisson) S(mod.ornstein.q) Anova(mod.ornstein.q) # negative-binomial model, also accommodates overdisperson library("MASS") # for glm.nb() ?glm.nb mod.ornstein.nb <- glm.nb(interlocks ~ log2(assets) + nation + sector, data=Ornstein) S(mod.ornstein.nb) compareCoefs(mod.ornstein.q, mod.ornstein.nb) # Many diagnostics work with GLMs # effects plots with partial residuals plot(Effect(c("extraversion", "neuroticism"), mod.cowles,residuals=TRUE), partial.residuals=list(span=0.9)) # component-plus-residuals plots brief(mod.ornstein.q2 <- glm(interlocks ~ assets + nation + sector, family=quasipoisson, data=Ornstein)) crPlot(mod.ornstein.q2, "assets") crPlot(mod.ornstein.q, "log2(assets)") # added-variable plots avPlot(mod.ornstein.q, "log2(assets)", id=list(n=2, method="mahal")) # influence plots influencePlot(mod.cowles) mod.cowles.2 <- update(mod.cowles, subset = !(rownames(Cowles) %in% c(1364, 1371))) compareCoefs(mod.cowles, mod.cowles.2) # A mixed-effects models for longitudinal data # Blackmore data library("nlme") brief(Blackmore, c(10, 10)) # first and last 10 obs. dim(Blackmore) sum(Blackmore$exercise == 0) Blackmore$log.exercise <- log(Blackmore$exercise + 5/60, 2) #logs, base 2 (add 5 minutes to avoid 0s) par(mfrow=c(1, 2), mar=c(5, 4, 1, 4)) boxplot(exercise ~ group, ylab="Exercise (hours/week)", data=Blackmore) boxplot(log.exercise ~ group, ylab=expression(log[2]*(Exercise + 5/60)), data=Blackmore) axis(4, at=seq(-2, 4, by=2), labels=round(2^seq(-2, 4, by=2) - 5/60, 1)) mtext("Exercise (hours/week)", 4, line=3) set.seed(12345) # for reproducibility pat <- with(Blackmore, sample(unique(subject[group=='patient']), 20) # 20 patients ) Pat.20 <- with(Blackmore, groupedData(log.exercise ~ age | subject, data=Blackmore[is.element(subject, pat),]) ) con <- with(Blackmore, sample(unique(subject[group=='control']), 20) # 20 controls ) Con.20 <- with(Blackmore, groupedData(log.exercise ~ age | subject, data=Blackmore[is.element(subject, con),]) ) print(plot(Con.20, main='Control Subjects', cex.main=1, xlab='Age', ylab=expression(log[2]*Exercise), ylim=1.2*range(Con.20$log.exercise, Pat.20$log.exercise), layout=c(10, 2), aspect=1.0), position=c(0, 0, 1, .5), more=TRUE) print(plot(Pat.20, main='Patients', cex.main=1, xlab='Age', ylab=expression(log[2]*Exercise), ylim=1.2*range(Con.20$log.exercise, Pat.20$log.exercise), layout=c(10, 2), aspect=1.0), position=c(0, .5, 1, 1)) pat.list <- lmList(log.exercise ~ age | subject, subset= group == "patient", data=Blackmore) con.list <- lmList(log.exercise ~ age | subject, subset= group == "control", data=Blackmore) pat.coef <- coef(pat.list) con.coef <- coef(con.list) par(mfrow=c(1, 2)) boxplot(pat.coef[,1], con.coef[,1], main='Intercepts', cex.main=1, names=c('Patients', 'Controls')) boxplot(pat.coef[,2], con.coef[,2], main='Slopes', cex.main=1, names=c('Patients', 'Controls')) bm.mod.1 <- lme(log.exercise ~ I(age - 8)*group, random = ~ I(age - 8) | subject, data=Blackmore) S(bm.mod.1) bm.mod.2 <- update(bm.mod.1, random = ~ 1 | subject) # no random slopes anova(bm.mod.1, bm.mod.2) bm.mod.3 <- update(bm.mod.1, random = ~ I(age - 8) - 1 | subject) # no random intercepts anova(bm.mod.1, bm.mod.3) bm.mod.4 <- update(bm.mod.1, correlation = corCAR1(form = ~ age |subject)) # CAR1 errors; this model doesn't converge bm.mod.5 <- update(bm.mod.1, random = ~ 1 | subject, correlation = corCAR1(form = ~ age |subject)) # CAR1 errors, no random slopes bm.mod.6 <- update(bm.mod.1, random = ~ I(age - 8) - 1 | subject, correlation = corCAR1(form = ~ age |subject)) # CAR1 errors, no random intercepts logLik(bm.mod.1) logLik(bm.mod.5) logLik(bm.mod.6) BIC(bm.mod.1, bm.mod.5, bm.mod.6) AIC(bm.mod.1, bm.mod.5, bm.mod.6) compareCoefs(bm.mod.1, bm.mod.5, bm.mod.6) S(bm.mod.5) Anova(bm.mod.5) # effect plots for the fixed effects plot(Effect(c("age", "group"), bm.mod.5)) plot(Effect(c("age", "group"), bm.mod.5, transformation=list(link=function(x) log2(x + 5/60), inverse=function(x) 2^x - 5/60)), type="response", lines=list(multiline=TRUE), confint=list(style="bars"), axes=list(x=list(age=list(lab="Age (years)"), rug=FALSE), y=list(lab="Exercise (hours/week)")), lattice=list(key.args=list(x = 0.20, y = 0.75, corner = c(0, 0), padding.text = 1.25)), main="") # some diagnostics are also available for mixed models plot(Effect(c("age", "group"), bm.mod.5, residuals=TRUE))