bbolker/broom.mixed

feature request: Tidy random effects from nested mixed effect models

Opened this issue · 3 comments

Thank you for making the broom.mixed package and be able to tidy model results from mixed effect modeling packages.

Recently, I fitted a nested random intercept model and realized that the random parameters are not tidied. All i really want is to have the variance estimates in a nice tibble.

Example:

    library(tibble)
    library(magrittr)
    library(lme4)
#> Loading required package: Matrix
    library(nlme)
#> 
#> Attaching package: 'nlme'
#> The following object is masked from 'package:lme4':
#> 
#>     lmList
    library(broom.mixed)
    
    data("sleepstudy", package="lme4")
    
## Random Intercept Model
lmm1 <- nlme::lme(Reaction ~ Days, random=~ Days|Subject, sleepstudy)

tidy(lmm1, "ran_pars")
#> # A tibble: 4 × 4
#>   effect   group    term                 estimate
#>  <chr>    <chr>    <chr>                   <dbl>
#> 1 ran_pars Subject  sd_(Intercept)        24.7   
#> 2 ran_pars Subject  cor_Days.(Intercept)   0.0656
#> 3 ran_pars Subject  sd_Days                5.92  
#> 4 ran_pars Residual sd_Observation        25.6  

## Crossed effect model
    sleepstudy2 <- sleepstudy %>%
        tibble::add_column(Group = rep(LETTERS[1:10], each = 18))
    
    lmm2 <- nlme::lme(Reaction ~ Days,
                      random = list(Subject = ~ Days, Group = ~ 1),
                      data = sleepstudy2
    )
    
    lmm2
#> Linear mixed-effects model fit by REML
#>   Data: sleepstudy2 
#>   Log-restricted-likelihood: -871.0085
#>   Fixed: Reaction ~ Days 
#> (Intercept)        Days 
#>   252.21968    10.42173 
#> 
#> Random effects:
#>  Formula: ~Days | Subject
#>  Structure: General positive-definite, Log-Cholesky parametrization
#>             StdDev   Corr  
#> (Intercept) 18.30302 (Intr)
#> Days         6.12880 0.048 
#> 
#>  Formula: ~1 | Group %in% Subject
#>         (Intercept) Residual
#> StdDev:    15.84422 25.09344
#> 
#> Number of Observations: 180
#> Number of Groups: 
#>            Subject Group %in% Subject 
#>                 18                 26
    
    #Fixed effects tidy great!
    tidy(lmm2, "fixed")
#> # A tibble: 2 × 7
#>   effect term        estimate std.error    df statistic  p.value
#>   <chr>  <chr>          <dbl>     <dbl> <dbl>     <dbl>    <dbl>
#> 1 fixed  (Intercept)    252.       6.64   153     38.0  7.74e-80
#> 2 fixed  Days            10.4      1.62   153      6.44 1.43e- 9
    
    #Random effects can't be evaluated?
    tidy(lmm2, "ran_pars")
#> Warning in tidy.lme(lmm2, "ran_pars"): ran_pars not yet implemented for
#> multiple levels of nesting
#> # A tibble: 0 × 1
#> # ℹ 1 variable: effect <chr>

Created on 2024-08-30 with reprex v2.1.1

Using VarCorr() it should be possible to extract the relevant components

vc <- VarCorr(lmm2)
vc
#>            Variance        StdDev   Corr  
#> Subject =   pdLogChol(Days)                
(#> Intercept) 335.00060       18.30302 (Intr)
#> Days         37.56219        6.12880 0.048 
#> Group =     pdLogChol(1)                   
#> (Intercept) 251.03921       15.84422       
#> Residual    629.68083       25.09344    

I get an error trying to create this into a tibble.

I also noticed that there was no method to tidy a Variance Components object of a fit from lme4 or nlme. While it may seem not necessary due to the extensive work of tidying an object directly, these functions would be helpful tools working toward tidying a nested random effects model.

Here is a proposed and preliminary solution of a regular random effects model with lme4 and nlme:

### Tidying VarCorr of Mixed Effect Models

#nlme VarCorr object: VarCorr.lme
#lmer VarCorr object: VarCorr.merMod

# vc <- lme4::VarCorr(x)

tidy.VarCorr.lme <- function(
    vc,    
    # effects = c("ran_pars", "fixed"),  #effects are always "ran_pars"
    scales = NULL, 
    conf.int = FALSE, 
    conf.level = 0.95, 
    ...
){
  
  if(any(class(vc) == "VarCorr.lme")){
    #Need to convert nlme object to a proper tibble
    vc <- convert_VarCorr.MerMod_to_VarCorr.lme(vc)
  } 
  
  
  if(conf.int){
    cli::cli_alert_info("No confidence intervals is possible for random effects in a crossed model.")
  }
  
  if(scales == "vcov" & !is.null(scales)){
    ests_random <- as.tibble(vc) %>%
      rename(group = grp, term = var1, estimate_var = vcov) %>%
      mutate(
        term = case_when(
          !is.na(term) & !is.na(var2) ~ paste0("cov_", term, "_", var2),
          group == "Residual" & is.na(term) ~ paste0("var_", "Observation"),
          TRUE ~ paste0("var_", term)
        )
      ) %>%
      mutate(effect = "ran_pars", .before = "group")
    
    total_var <- ests_random %>% filter(is.na(var2)) %>% summarize(s = sum(estimate_var)) %>% pull(s)
    
    ests_random <- ests_random %>%
      mutate(prop_var = case_when(
        !stringr::str_detect(term, "cov_") ~ estimate_var/total_var,
        TRUE ~ NA_real_
      )
      ) %>%
      select(-var2, -sdcor)
    
    #Check
    print(as.tibble(vc))
    
  } else if(scales == "sdcor" | is.null(scales)){
    ests_random <- as.tibble(vc) %>%
      rename(group = grp, term = var1, estimate = sdcor) %>%
      mutate(
        term = case_when(
          !is.na(term) & !is.na(var2) ~ paste0("cor_", term, "_", var2),
          group == "Residual" & is.na(term) ~ paste0("sd_", "Observation"),
          TRUE ~ paste0("sd_", term)
        )
      ) %>%
      mutate(effect = "ran_pars", .before = "group")
    
    #Can estimate covariance if requested with a bit more work...
    
    total_var <- ests_random %>% filter(is.na(var2)) %>% summarize(s = sum(estimate^2)) %>% pull(s)
    
    ests_random <- ests_random %>%
      mutate(prop_var = case_when(
        !str_detect(term, "cor_") ~ estimate^2/total_var,
        TRUE ~ NA_real_
      )
      ) %>%
      select(-var2, -vcov)
    
    #Check
    print(vc)
  }
  
  return(ests_random)
}

#  lmm.lme4 <- lme4::lmer(Reaction ~ Days + (Days | Subject), sleepstudy)
#as.tibble(lme4::VarCorr(lmm.lme4))
# grp      var1        var2    vcov   sdcor
# <chr>    <chr>       <chr>  <dbl>   <dbl>
# 1 Subject  (Intercept) NA    612.   24.7   
# 2 Subject  Days        NA     35.1   5.92  
# 3 Subject  (Intercept) Days    9.60  0.0656
# 4 Residual NA          NA    655.   25.6  


#This function converts VarCorr on a nlme object to a tibble form of an lme4 object
#i.e. VarCorr.merMod -> tibble of VarCorr.lme
#
#'@param vc  A variance-correlation component of nlme::lme object. 
#'@return The tibble version of VarCorr.lme object from lme4 package.
convert_VarCorr.MerMod_to_VarCorr.lme <- function(vc){
  A <- as.matrix(vc)
  A
  row.residual <- str_which("Residual", rownames(A))
  
  ##Unsure how to generalize this
  corr.A <- A[-row.residual, -(1:2), drop = TRUE]
  corr.A
  corr.A[-1]
  t.corr <- tibble(var = names(corr.A), corr = as.vector(corr.A))
  corr <- tibble(
    var1 = t.corr[1, "var", drop = TRUE],
    var2 = t.corr[2, "var", drop = TRUE],
    sdcor = t.corr[2, "corr", drop = TRUE]
  )
  
  tib <-  tibble(grp = NA_character_, var1 = rownames(A), var2 = NA, vcov= A[ ,"Variance"], sdcor = A[ , "StdDev"]) %>%
    bind_rows(corr) %>%
    mutate(
      grp = case_when(
        var1 != "Residual" ~ "Subject",
        var1 == "Residual" ~ "Residual",
        TRUE ~ NA_character_
      ),
      vcov = as.numeric(vcov),
      sdcor = as.numeric(sdcor)
    )
  
  return(tib)
}

data("sleepstudy", package="lme4")
lmm.nlme <- lme(Reaction ~ Days, random=~ Days|Subject, sleepstudy)
# > VarCorr(lmm.nlme)
# Subject = pdLogChol(Days) 
# Variance StdDev    Corr  
# (Intercept) 612.0795 24.740241 (Intr)
# Days         35.0713  5.922103 0.066 
# Residual    654.9424 25.591843       

A <- convert_VarCorr.MerMod_to_VarCorr.lme(VarCorr(lmm.nlme))
tidy.VarCorr.lme(A, scales = "vcov")
tidy.VarCorr.lme(A, scales = "sdcor")
tidy(VarCorr(lmm.nlme))
#Error: No tidy method for objects of class VarCorr.lme
tidy(lmm.nlme)

There is no issue in tidying a crossed random effects model using lme4::lmer(). The tidy.merMod() function tidies the random effects fine. (I added this in case other readers are curious)

library(magrittr); 
library(broom.mixed); 
data("sleepstudy", package="lme4")
sleepstudy2 <- sleepstudy %>%
    tibble::add_column(Group = rep(LETTERS[1:10], each = 18))

#Crossed Random Model 
lmm.lme4 <- lmerTest::lmer(Reaction ~ Days + (Days|Subject) + (1|Group), data = sleepstudy2)
lmm.lme4
#> Linear mixed model fit by REML ['lmerModLmerTest']
#> Formula: Reaction ~ Days + (Days | Subject) + (1 | Group)
#>    Data: sleepstudy2
#> REML criterion at convergence: 1743.496
#> Random effects:
#>  Groups   Name        Std.Dev. Corr
#>  Subject  (Intercept) 25.287       
#>           Days         5.973   0.04
#>  Group    (Intercept)  6.071       
#>  Residual             25.397       
#> Number of obs: 180, groups:  Subject, 18; Group, 10
#> Fixed Effects:
#> (Intercept)         Days  
#>      251.47        10.45

tidy(lmm.lme4)
#> # A tibble: 7 × 8
#>   effect   group    term            estimate std.error statistic    df   p.value
#>   <chr>    <chr>    <chr>              <dbl>     <dbl>     <dbl> <dbl>     <dbl>
#> 1 fixed    <NA>     (Intercept)     251.          7.19     35.0   9.95  9.55e-12
#> 2 fixed    <NA>     Days             10.5         1.56      6.72 16.7   3.96e- 6
#> 3 ran_pars Subject  sd__(Intercept)  25.3        NA        NA    NA    NA       
#> 4 ran_pars Subject  cor__(Intercep…   0.0420     NA        NA    NA    NA       
#> 5 ran_pars Subject  sd__Days          5.97       NA        NA    NA    NA       
#> 6 ran_pars Group    sd__(Intercept)   6.07       NA        NA    NA    NA       
#> 7 ran_pars Residual sd__Observation  25.4        NA        NA    NA    NA
Created on 2024-08-30 with [reprex v2.1.1](https://reprex.tidyverse.org/)


TIDYING NLME CROSSED INTERCEPT MODELS

If we can create a tibble from VarCorr.lme (nlme::lme). that would solve the problem...

Here is some preliminary work in attempt to do just that.

rownames.v <- rownames(vc)
 
subject_row <-  stringr::str_which("Subject = ", rownames.v)
group_row <- stringr::str_which("Group = ", rownames.v)
residual_row <- stringr::str_which("Residual", rownames.v)
 
vc_subject <- vc[(subject_row+1):(group_row-1), ]
vc_group <- vc[(group_row+1):(resiudal_row-1), ]
vc_residual <- vc[residual_row, ]


 tibble(
   effect = "ran_pars",
   subject = vc[rownames.v[1], 1],
   var1 = rownames.v,
   var2 = NA,
   estimate = if(scales == "vcov") vc[, "Variance"] else vc[, "StdDev"]
 ) %>%
   mutate(
     group = case_when(
       var1 == "Residual" ~ "Residual",
       TRUE ~ group
     )

Could we apply the function tidy.VarCorr.lme() for each section of the vc's output?