discrepancy between GLMMadaptive and glmmTMB
Closed this issue · 3 comments
I'm comparing models fitted with your package and glmmTMB, to see how results match or differ. Here you see two examples. In the first, the coefficients from the count-component differ, while the zero-inflation part are identical. In the second example, coefficients both of the count-component and zero-inflated part are (almost) identical.
Do you have any ideas where the differences in the first model come from?
library(glmmTMB)
library(GLMMadaptive)
data("Salamanders")
m1 <- glmmTMB(
count ~ spp + mined + (1 | site),
ziformula = ~ spp + mined,
family = truncated_poisson(),
data = Salamanders
)
m2 <- mixed_model(
count ~ spp + mined,
random = ~1 | site,
zi_fixed = ~ spp + mined,
family = hurdle.poisson(),
data = Salamanders
)
summary(m1)
#> Family: truncated_poisson ( log )
#> Formula: count ~ spp + mined + (1 | site)
#> Zero inflation: ~spp + mined
#> Data: Salamanders
#>
#> AIC BIC logLik deviance df.resid
#> 1790.2 1866.1 -878.1 1756.2 627
#>
#> Random effects:
#>
#> Conditional model:
#> Groups Name Variance Std.Dev.
#> site (Intercept) 0.05318 0.2306
#> Number of obs: 644, groups: site, 23
#>
#> Conditional model:
#> Estimate Std. Error z value Pr(>|z|)
#> (Intercept) -0.06702 0.20605 -0.325 0.7450
#> sppPR -0.52093 0.27872 -1.869 0.0616 .
#> sppDM 0.22458 0.14485 1.550 0.1210
#> sppEC-A -0.19548 0.20122 -0.972 0.3313
#> sppEC-L 0.64672 0.13229 4.889 1.01e-06 ***
#> sppDES-L 0.60514 0.13027 4.645 3.39e-06 ***
#> sppDF 0.04602 0.15397 0.299 0.7650
#> minedno 1.01447 0.18724 5.418 6.02e-08 ***
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Zero-inflation model:
#> Estimate Std. Error z value Pr(>|z|)
#> (Intercept) 1.7556 0.2833 6.196 5.78e-10 ***
#> sppPR 1.6785 0.3964 4.234 2.30e-05 ***
#> sppDM -0.4269 0.3502 -1.219 0.22288
#> sppEC-A 1.1046 0.3684 2.998 0.00272 **
#> sppEC-L -0.4269 0.3502 -1.219 0.22288
#> sppDES-L -0.6716 0.3519 -1.909 0.05631 .
#> sppDF -0.4269 0.3502 -1.219 0.22288
#> minedno -2.4038 0.2089 -11.508 < 2e-16 ***
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(m2)
#>
#> Call:
#> mixed_model(fixed = count ~ spp + mined, random = ~1 | site,
#> data = Salamanders, family = hurdle.poisson(), zi_fixed = ~spp +
#> mined)
#>
#> Data Descriptives:
#> Number of Observations: 644
#> Number of Groups: 23
#>
#> Model:
#> family: hurdle poisson
#> link: log
#>
#> Fit statistics:
#> log.Lik AIC BIC
#> -1005.88 2045.759 2065.063
#>
#> Random effects covariance matrix:
#> StdDev
#> (Intercept) 2.603956
#>
#> Fixed effects:
#> Estimate Std.Err z-value p-value
#> (Intercept) -3.1554 0.2253 -14.0030 < 1e-04
#> sppPR 0.2035 0.2431 0.8368 0.40271196
#> sppDM 0.9135 0.1587 5.7577 < 1e-04
#> sppEC-A 0.6263 0.1941 3.2277 0.00124803
#> sppEC-L 1.4843 0.1376 10.7857 < 1e-04
#> sppDES-L 0.9575 0.1449 6.6075 < 1e-04
#> sppDF 0.3277 0.1573 2.0832 0.03722871
#> minedno 0.7276 0.1896 3.8370 0.00012453
#>
#> Zero-part coefficients:
#> Estimate Std.Err z-value p-value
#> (Intercept) 1.7556 0.2833 6.1963 < 1e-04
#> sppPR 1.6785 0.3964 4.2341 < 1e-04
#> sppDM -0.4269 0.3502 -1.2190 0.2228629
#> sppEC-A 1.1045 0.3684 2.9981 0.0027163
#> sppEC-L -0.4269 0.3502 -1.2190 0.2228629
#> sppDES-L -0.6716 0.3519 -1.9087 0.0563035
#> sppDF -0.4269 0.3502 -1.2190 0.2228629
#> minedno -2.4038 0.2089 -11.5077 < 1e-04
#>
#> Integration:
#> method: adaptive Gauss-Hermite quadrature rule
#> quadrature points: 11
#>
#> Optimization:
#> method: hybrid EM and quasi-Newton
#> converged: TRUE
library(glmmTMB)
library(GLMMadaptive)
set.seed(1234)
n <- 100 # number of subjects
K <- 8 # number of measurements per subject
t_max <- 5 # maximum follow-up time
# we constuct a data frame with the design:
# everyone has a baseline measurment, and then measurements at random follow-up times
DF <- data.frame(id = rep(seq_len(n), each = K),
time = c(replicate(n, c(0, sort(runif(K - 1, 0, t_max))))),
sex = rep(gl(2, n/2, labels = c("male", "female")), each = K))
# design matrices for the fixed and random effects non-zero part
X <- model.matrix(~ sex * time, data = DF)
Z <- model.matrix(~ 1, data = DF)
# design matrices for the fixed and random effects zero part
X_zi <- model.matrix(~ sex, data = DF)
Z_zi <- model.matrix(~ 1, data = DF)
betas <- c(1.5, 0.05, 0.05, -0.03) # fixed effects coefficients non-zero part
shape <- 2 # shape/size parameter of the negative binomial distribution
gammas <- c(-1.5, 0.5) # fixed effects coefficients zero part
D11 <- 0.5 # variance of random intercepts non-zero part
D22 <- 0.4 # variance of random intercepts zero part
# we simulate random effects
b <- cbind(rnorm(n, sd = sqrt(D11)), rnorm(n, sd = sqrt(D22)))
# linear predictor non-zero part
eta_y <- as.vector(X %*% betas + rowSums(Z * b[DF$id, 1, drop = FALSE]))
# linear predictor zero part
eta_zi <- as.vector(X_zi %*% gammas + rowSums(Z_zi * b[DF$id, 2, drop = FALSE]))
# we simulate negative binomial longitudinal data
DF$y <- rnbinom(n * K, size = shape, mu = exp(eta_y))
# we set the extra zeros
DF$y[as.logical(rbinom(n * K, size = 1, prob = plogis(eta_zi)))] <- 0
fm1 <- mixed_model(y ~ sex * time, random = ~ 1 | id, data = DF,
family = zi.poisson(), zi_fixed = ~ sex)
fm2 <- glmmTMB(y ~ sex * time + (1 | id), data = DF,
family = poisson(), ziformula = ~ sex)
summary(fm1)
#>
#> Call:
#> mixed_model(fixed = y ~ sex * time, random = ~1 | id, data = DF,
#> family = zi.poisson(), zi_fixed = ~sex)
#>
#> Data Descriptives:
#> Number of Observations: 800
#> Number of Groups: 100
#>
#> Model:
#> family: zero-inflated poisson
#> link: log
#>
#> Fit statistics:
#> log.Lik AIC BIC
#> -2223.937 4461.874 4480.11
#>
#> Random effects covariance matrix:
#> StdDev
#> (Intercept) 0.8272898
#>
#> Fixed effects:
#> Estimate Std.Err z-value p-value
#> (Intercept) 1.5276 0.1288 11.8605 < 1e-04
#> sexfemale 0.0174 0.1819 0.0954 0.92396
#> time -0.0040 0.0159 -0.2542 0.79937
#> sexfemale:time 0.0004 0.0230 0.0188 0.98496
#>
#> Zero-part coefficients:
#> Estimate Std.Err z-value p-value
#> (Intercept) -1.2106 0.1411 -8.5811 < 1e-04
#> sexfemale 0.4986 0.1823 2.7348 0.0062411
#>
#> Integration:
#> method: adaptive Gauss-Hermite quadrature rule
#> quadrature points: 11
#>
#> Optimization:
#> method: hybrid EM and quasi-Newton
#> converged: TRUE
summary(fm2)
#> Family: poisson ( log )
#> Formula: y ~ sex * time + (1 | id)
#> Zero inflation: ~sex
#> Data: DF
#>
#> AIC BIC logLik deviance df.resid
#> 4462.6 4495.4 -2224.3 4448.6 793
#>
#> Random effects:
#>
#> Conditional model:
#> Groups Name Variance Std.Dev.
#> id (Intercept) 0.6841 0.8271
#> Number of obs: 800, groups: id, 100
#>
#> Conditional model:
#> Estimate Std. Error z value Pr(>|z|)
#> (Intercept) 1.5273202 0.1287453 11.863 <2e-16 ***
#> sexfemale 0.0168426 0.1818901 0.093 0.926
#> time -0.0040292 0.0158893 -0.254 0.800
#> sexfemale:time 0.0004784 0.0229638 0.021 0.983
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Zero-inflation model:
#> Estimate Std. Error z value Pr(>|z|)
#> (Intercept) -1.2108 0.1413 -8.570 < 2e-16 ***
#> sexfemale 0.4985 0.1826 2.731 0.00632 **
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Thanks for noticing this. I've tried first with a simulation to see if something is consistently wrong, i.e., check the following:
library("GLMMadaptive")
library("glmmTMB")
#> Warning: package 'glmmTMB' was built under R version 3.5.2
simulate_fun <- function (seed) {
set.seed(seed)
n <- 200 # number of subjects
K <- 8 # number of measurements per subject
t_max <- 5 # maximum follow-up time
# we constuct a data frame with the design:
# everyone has a baseline measurment, and then measurements at random follow-up times
DF <- data.frame(id = rep(seq_len(n), each = K),
time = c(replicate(n, c(0, sort(runif(K - 1, 0, t_max))))),
sex = rep(gl(2, n/2, labels = c("male", "female")), each = K))
# design matrices for the fixed and random effects non-zero part
X <- model.matrix(~ sex + time, data = DF)
Z <- model.matrix(~ 1, data = DF)
# design matrices for the fixed effects zero part
X_zi <- model.matrix(~ sex + time, data = DF)
betas <- c(1.5, 0.05, 0.05) # fixed effects coefficients non-zero part
shape <- 2 # shape/size parameter of the negative binomial distribution
gammas <- c(-1.5, -0.5, 0.5) # fixed effects coefficients zero part
D11 <- 1 # variance of random intercepts non-zero part
# we simulate random effects
b <- cbind(rnorm(n, sd = sqrt(D11)))
# linear predictor non-zero part
eta_y <- as.vector(X %*% betas + rowSums(Z * b[DF$id, 1, drop = FALSE]))
# linear predictor zero part
eta_zi <- as.vector(X_zi %*% gammas)
# we simulate truncated Poisson longitudinal data
DF$y <- qpois(runif(n * K, ppois(0, exp(eta_y)), 1), exp(eta_y))
# we set the zeros
DF$y[as.logical(rbinom(n * K, size = 1, prob = plogis(eta_zi)))] <- 0
DF
}
########################################################################################
M <- 100
betas_GLMMadaptive <- betas_glmmTMB <- matrix(0.0, M, 3)
gammas_GLMMadaptive <- gammas_glmmTMB <- matrix(0.0, M, 3)
for (m in seq_len(M)) {
DF_m <- simulate_fun(2019 + m)
m1 <- mixed_model(y ~ sex + time, random = ~ 1 | id, data = DF_m,
family = hurdle.poisson(), zi_fixed = ~ sex + time)
betas_GLMMadaptive[m, ] <- fixef(m1)
gammas_GLMMadaptive[m, ] <- fixef(m1, sub_model = "zero_part")
##########
m2 <- glmmTMB(y ~ sex + time + (1 | id), ziformula = ~ sex + time,
family = truncated_poisson(), data = DF_m)
betas_glmmTMB[m, ] <- fixef(m2)$cond
gammas_glmmTMB[m, ] <- fixef(m2)$zi
}
# Bias
true_betas <- c(1.5, 0.05, 0.05)
rbind(Bias_GLMMadaptive = colMeans(betas_GLMMadaptive - rep(true_betas, each = M)),
Bias_glmmTMB = colMeans(betas_glmmTMB - rep(true_betas, each = M)))
#> [,1] [,2] [,3]
#> Bias_GLMMadaptive 0.02122751 -0.01035611 -0.0001626549
#> Bias_glmmTMB 0.02083213 -0.01037756 -0.0001601303
true_gammas <- c(-1.5, -0.5, 0.5)
rbind(Bias_GLMMadaptive = colMeans(gammas_GLMMadaptive - rep(true_gammas, each = M)),
Bias_glmmTMB = colMeans(gammas_glmmTMB - rep(true_gammas, each = M)))
#> [,1] [,2] [,3]
#> Bias_GLMMadaptive -0.002330292 0.01484655 0.0001198375
#> Bias_glmmTMB -0.002330305 0.01484668 0.0001197215
# RMSE
rbind(RMSE_GLMMadaptive = sqrt(colMeans((betas_GLMMadaptive - rep(true_betas, each = M))^2)),
RMSE_glmmTMB = sqrt(colMeans((betas_glmmTMB - rep(true_betas, each = M))^2)))
#> [,1] [,2] [,3]
#> RMSE_GLMMadaptive 0.1039681 0.1427301 0.007961253
#> RMSE_glmmTMB 0.1040858 0.1430581 0.007960939
rbind(RMSE_GLMMadaptive = sqrt(colMeans((gammas_GLMMadaptive - rep(true_gammas, each = M))^2)),
RMSE_glmmTMB = sqrt(colMeans((gammas_glmmTMB - rep(true_gammas, each = M))^2)))
#> [,1] [,2] [,3]
#> RMSE_GLMMadaptive 0.1182866 0.1079911 0.03894126
#> RMSE_glmmTMB 0.1182863 0.1079912 0.03894105
This suggests that the two packages are doing the same in simulated data.
Then I looked at the number of quadrature points. Package glmmTMB does Laplace approximation, which is equivalent to nAGQ = 1
. When I set nAGQ = 2
(because of a small problem in the code, currently nAGQ = 1
does not work - I'll fix it soon) to mixed_model()
I get results that are in the same direction as in glmmTMB, i.e.,
library("GLMMadaptive")
library("glmmTMB")
#> Warning: package 'glmmTMB' was built under R version 3.5.2
data("Salamanders")
m1 <- glmmTMB(count ~ spp + mined + (1 | site), ziformula = ~ spp + mined,
family = truncated_poisson(), data = Salamanders)
m2 <- mixed_model(count ~ spp + mined, random = ~ 1 | site, zi_fixed = ~ spp + mined,
family = hurdle.poisson(), data = Salamanders, nAGQ = 2)
fixef(m1)$cond
#> (Intercept) sppPR sppDM sppEC-A sppEC-L sppDES-L
#> -0.06702286 -0.52092708 0.22457540 -0.19548416 0.64672238 0.60513701
#> sppDF minedno
#> 0.04602476 1.01446593
fixef(m2)
#> (Intercept) sppPR sppDM sppEC-A sppEC-L sppDES-L
#> -0.70042204 -0.65075508 0.06425725 -0.01005135 1.05811127 0.48668936
#> sppDF minedno
#> 0.08016362 0.91360728
fixef(m1)$z
#> (Intercept) sppPR sppDM sppEC-A sppEC-L sppDES-L
#> 1.7555900 1.6784724 -0.4269123 1.1045525 -0.4269123 -0.6715877
#> sppDF minedno
#> -0.4269123 -2.4037967
fixef(m2, "zero_part")
#> (Intercept) sppPR sppDM sppEC-A sppEC-L sppDES-L
#> 1.7555979 1.6785013 -0.4269270 1.1045352 -0.4269270 -0.6715963
#> sppDF minedno
#> -0.4269270 -2.4037889
whereas when I set nAGQ
to a higher number is where the differences occur, e.g.,
library("GLMMadaptive")
library("glmmTMB")
#> Warning: package 'glmmTMB' was built under R version 3.5.2
data("Salamanders")
m1 <- glmmTMB(count ~ spp + mined + (1 | site), ziformula = ~ spp + mined,
family = truncated_poisson(), data = Salamanders)
m2 <- mixed_model(count ~ spp + mined, random = ~ 1 | site, zi_fixed = ~ spp + mined,
family = hurdle.poisson(), data = Salamanders, nAGQ = 25)
fixef(m1)$cond
#> (Intercept) sppPR sppDM sppEC-A sppEC-L sppDES-L
#> -0.06702286 -0.52092708 0.22457540 -0.19548416 0.64672238 0.60513701
#> sppDF minedno
#> 0.04602476 1.01446593
fixef(m2)
#> (Intercept) sppPR sppDM sppEC-A sppEC-L sppDES-L
#> -3.1900355 0.1976119 0.9137361 0.6538284 1.4999425 0.9589269
#> sppDF minedno
#> 0.3426018 0.7137289
fixef(m1)$z
#> (Intercept) sppPR sppDM sppEC-A sppEC-L sppDES-L
#> 1.7555900 1.6784724 -0.4269123 1.1045525 -0.4269123 -0.6715877
#> sppDF minedno
#> -0.4269123 -2.4037967
fixef(m2, "zero_part")
#> (Intercept) sppPR sppDM sppEC-A sppEC-L sppDES-L
#> 1.7555979 1.6785013 -0.4269270 1.1045352 -0.4269270 -0.6715963
#> sppDF minedno
#> -0.4269270 -2.4037889
This suggests to me that there could be an issue with the approximation of the integrals over the random effects in this dataset.
Ok, I see! I knew that GLMMadaptive uses a different approach to approximate the integrals, but I didn't know it makes such a difference. Maybe you could add a note to the docs, or describe this as short example in a vignette? I guess some users may not be expecting such "large" difference due to different approaches, either?
Here's another example with glmer()
, that seem to confirm your guess:
library(GLMMadaptive)
library(lme4)
#> Loading required package: Matrix
#>
#> Attaching package: 'lme4'
#> The following object is masked from 'package:GLMMadaptive':
#>
#> negative.binomial
library(HSAUR2)
#> Loading required package: tools
data("toenail")
m1 <- glmer(
outcome ~ treatment * visit + (1 | patientID),
data = toenail,
family = binomial,
nAGQ = 20
)
m2 <- mixed_model(
outcome ~ treatment * visit,
random = ~ 1 | patientID,
data = toenail,
family = binomial,
nAGQ = 20
)
fixef(m1)
#> (Intercept) treatmentterbinafine
#> -0.4530317 0.1583433
#> visit treatmentterbinafine:visit
#> -0.7913051 -0.2360022
fixef(m2)
#> (Intercept) treatmentterbinafine
#> -0.4665846 0.1585886
#> visit treatmentterbinafine:visit
#> -0.7915773 -0.2361823
m3 <- glmer(
outcome ~ treatment * visit + (1 | patientID),
data = toenail,
family = binomial
)
m4 <- mixed_model(
outcome ~ treatment * visit,
random = ~ 1 | patientID,
data = toenail,
family = binomial
)
fixef(m3)
#> (Intercept) treatmentterbinafine
#> -1.42518531 -0.01011846
#> visit treatmentterbinafine:visit
#> -0.80443955 -0.23512185
fixef(m4)
#> (Intercept) treatmentterbinafine
#> -0.5334504 0.1566945
#> visit treatmentterbinafine:visit
#> -0.7898613 -0.2357171
Created on 2019-01-22 by the reprex package (v0.2.1)
Maybe you can close this issue then...
Well, this has been the motivation in the first place to develop this package. Namely, that it does matter which method you use to approximate the integrals and that the adaptive Gaussian quadrature is the one that is considered the "gold-standard".