Latent Variable Models (lava)

https://travis-ci.org/kkholst/lava.svg?branch=master https://codecov.io/github/kkholst/lava/coverage.svg?branch=master

https://www.r-pkg.org/badges/version/lava

https://cranlogs.r-pkg.org/badges/lava

Estimation and simulation of latent variable models

Installation

install.packages("lava",dependencies=TRUE)
library("lava")
demo("lava")

For graphical capabilities the Rgraphviz package is needed

source("http://bioconductor.org/biocLite.R");
biocLite("BiocUpgrade") ## Upgrade previous bioconductor installation
biocLite("Rgraphviz")

or the igraph or visNetwork packages

install.packages("igraph",dependencies=TRUE)
install.packages("visNetwork",dependencies=TRUE)

The development version may be installed directly from github:

devtools::install_github("kkholst/lava")

Citation

To cite that lava package please use the following reference

Klaus K. Holst and Esben Budtz-Joergensen (2013). Linear Latent Variable Models: The lava-package. Computational Statistics 28 (4), pp 1385-1453. http://dx.doi.org/10.1007/s00180-012-0344-y

@Article{lava,
  title = {Linear Latent Variable Models: The lava-package},
  author = {Klaus K. Holst and Esben Budtz-Joergensen},
  year = {2013},
  volume = {28},
  number = {4},
  pages = {1385-1452},
  journal = {Computational Statistics},
  note = {http://dx.doi.org/10.1007/s00180-012-0344-y},
}

Examples

Structural Equation Model

Specify structurual equation models with two factors

m <- lvm()
regression(m) <- c(y1,y2,y3) ~ eta1
regression(m) <- c(z1,z2,z3) ~ eta2
latent(m) <- ~eta1+eta2
regression(m) <- eta2~eta1+x
regression(m) <- eta1~x

labels(m) <- c(eta1=expression(eta[1]),eta2=expression(eta[2]))
plot(m)

inst/lava1.png

Simulation

set.seed(1)
d <- sim(m,100)

Estimation

e <- estimate(m,d)
e
                    Estimate Std. Error  Z-value   P-value
Measurements:                                             
   y2~eta1           0.95462    0.08083 11.80993    <1e-12
   y3~eta1           0.98476    0.08922 11.03722    <1e-12
    z2~eta2          0.97038    0.05368 18.07714    <1e-12
    z3~eta2          0.95608    0.05643 16.94182    <1e-12
Regressions:                                              
   eta1~x            1.24587    0.11486 10.84694    <1e-12
    eta2~eta1        0.95608    0.18008  5.30910 1.102e-07
    eta2~x           1.11495    0.25228  4.41951 9.893e-06
Intercepts:                                               
   y2               -0.13896    0.12458 -1.11537    0.2647
   y3               -0.07661    0.13869 -0.55241    0.5807
   eta1              0.15801    0.12780  1.23644    0.2163
   z2               -0.00441    0.14858 -0.02969    0.9763
   z3               -0.15900    0.15731 -1.01076    0.3121
   eta2             -0.14143    0.18380 -0.76949    0.4416
Residual Variances:                                       
   y1                0.69684    0.14858  4.69004          
   y2                0.89804    0.16630  5.40026          
   y3                1.22456    0.21182  5.78109          
   eta1              0.93620    0.19623  4.77084          
   z1                1.41422    0.26259  5.38570          
   z2                0.87569    0.19463  4.49934          
   z3                1.18155    0.22640  5.21883          
   eta2              1.24430    0.28992  4.29195

Model assessment

Assessing goodness-of-fit (linearity between eta2 and eta1)

library(gof)
g <- cumres(e,eta2~eta1)
plot(g)

inst/gof1.png

Non-linear measurement error model

Simulate non-linear model

m <- lvm(c(y1,y2,y3)~u,u~x)
transform(m,u2~u) <- function(x) x^2
regression(m) <- z~u2+u

set.seed(1)
d <- sim(m,200,p=c("z"=-1,"z~u2"=-0.5))

Stage 1:

m1 <- lvm(c(y1[0:s],y2[0:s],y3[0:s])~1*u,u~x)
latent(m1) <- ~u
(e1 <- estimate(m1,d))
                    Estimate Std. Error  Z-value  P-value
Regressions:                                             
   u~x               1.06998    0.08208 13.03542   <1e-12
Intercepts:                                              
   u                -0.08871    0.08753 -1.01344   0.3108
Residual Variances:                                      
   y1                1.00054    0.07075 14.14214         
   u                 1.19873    0.15503  7.73233

Stage 2

pp <- function(mu,var,data,...) cbind(u=mu[,"u"],u2=mu[,"u"]^2+var["u","u"])
(e <- measurement.error(e1, z~1+x, data=d, predictfun=pp))
            Estimate Std.Err   2.5%  97.5%  P-value
(Intercept)  -1.1068  0.1380 -1.377 -0.836 1.04e-15
x            -0.0899  0.1496 -0.383  0.203 5.48e-01
u             1.1108  0.1350  0.846  1.375 1.89e-16
u2           -0.4266  0.0586 -0.541 -0.312 3.41e-13
f <- function(p) p[1]+p["u"]*u+p["u2"]*u^2
u <- seq(-1,1,length.out=100)
plot(e, f, data=data.frame(u))

inst/me1.png

Simulation

Studying the small-sample properties of mediation analysis

m <- lvm(y~x,c~1)
regression(m) <- c(y,x)~z
eventTime(m) <- t~min(y=1,c=0)
transform(m,S~t+status) <- function(x) survival::Surv(x[,1],x[,2])
plot(m)

inst/mediation1.png

Simulate from model and estimate indirect effects

onerun <- function(...) {
    d <- sim(m,100)
    m0 <- lvm(S~x+z,x~z)
    e <- estimate(m0,d,estimator="glm")
    vec(coef(effects(e,S~z))[,1:2])
}
val <- sim(onerun,100)
summary(val)
        Total.Estimate Direct.Estimate Indirect.Estimate S~x~z.Estimate
Mean         2.0162354       0.9932393         1.0229961      1.0229961
SD           0.1684448       0.1936356         0.1698821      0.1698821
Min          1.7405642       0.5912522         0.6255221      0.6255221
2.5%         1.7499169       0.6293145         0.6986610      0.6986610
50%          2.0029443       0.9709732         1.0195000      1.0195000
97.5%        2.3844601       1.3921685         1.4005224      1.4005224
Max          2.4469513       1.7187305         1.4383459      1.4383459
Missing      0.0000000       0.0000000         0.0000000      0.0000000
        Total.Std.Err Direct.Std.Err Indirect.Std.Err S~x~z.Std.Err
Mean       0.19195276     0.18473452        0.1722835     0.1722835
SD         0.02263971     0.02536669        0.0199476     0.0199476
Min        0.14387867     0.13318518        0.1246594     0.1246594
2.5%       0.14977002     0.13860194        0.1345885     0.1345885
50%        0.19375670     0.17976077        0.1723076     0.1723076
97.5%      0.23488476     0.24059604        0.2147245     0.2147245
Max        0.23689616     0.25184711        0.2201178     0.2201178
Missing    0.00000000     0.00000000        0.0000000     0.0000000

Add additional simulations and visualize results

val <- sim(val,500) ## Add 500 simulations
plot(val,estimate=c("Total.Estimate","Indirect.Estimate"),
     true=c(2,1),se=c("Total.Std.Err","Indirect.Std.Err"))

inst/mediation2.png