Skip to contents

This vignette demonstrates the implementation of treed distributed lag non-linear model (TDLNM). More details can be found in Mork and Wilson (2021) <doi: 10.1093/biostatistics/kxaa051>.

Load data

Simulated data is available on GitHub. It can be loaded with the following code.

sbd_dlmtree <- get_sbd_dlmtree()

Data preparation

# Response and covariates
sbd_cov <- sbd_dlmtree %>% 
            select(bwgaz, ChildSex, MomAge, GestAge, MomPriorBMI, Race,
                    Hispanic, MomEdu, SmkAny, Marital, Income,
                    EstDateConcept, EstMonthConcept, EstYearConcept)

# Exposure data
sbd_exp <- list(PM25 = sbd_dlmtree %>% select(starts_with("pm25_")),
                TEMP = sbd_dlmtree %>% select(starts_with("temp_")),
                SO2 = sbd_dlmtree %>% select(starts_with("so2_")),
                CO = sbd_dlmtree %>% select(starts_with("co_")),
                NO2 = sbd_dlmtree %>% select(starts_with("no2_")))
sbd_exp <- sbd_exp %>% lapply(as.matrix)

Fitting the model

tdlnm.fit <- dlmtree(formula = bwgaz ~ ChildSex + MomAge + MomPriorBMI + 
                                        Race + Hispanic + SmkAny + EstMonthConcept,
                     data = sbd_cov,
                     exposure.data = sbd_exp[["TEMP"]],
                     dlm.type = "nonlinear",
                     family = "gaussian",
                     tdlnm.exposure.splits = 20,
                     n.burn = 2500, n.iter = 10000, n.thin = 5)
#> Preparing data...
#> 
#> Running TDLNM:
#> Burn-in % complete 
#> [0--------25--------50--------75--------100]
#>  ''''''''''''''''''''''''''''''''''''''''''
#> MCMC iterations (est time: 28 seconds)
#> [0--------25--------50--------75--------100]
#>  ''''''''''''''''''''''''''''''''''''''''''
#> Compiling results...

Model fit summary

tdlnm.sum <- summary(tdlnm.fit)
#> Centered DLNM at exposure value 0
tdlnm.sum
#> ---
#> TDLNM summary
#> 
#> Model run info:
#> - bwgaz ~ ChildSex + MomAge + MomPriorBMI + Race + Hispanic + SmkAny + EstMonthConcept 
#> - sample size: 10,000 
#> - family: gaussian 
#> - 20 trees
#> - 2500 burn-in iterations
#> - 10000 post-burn iterations
#> - 5 thinning factor
#> - 0.95 confidence level
#> 
#> Fixed effect coefficients:
#>                        Mean  Lower  Upper
#> (Intercept)           0.160 -0.913  1.180
#> *ChildSexM           -2.105 -2.127 -2.086
#> MomAge                0.001 -0.001  0.002
#> *MomPriorBMI         -0.021 -0.023 -0.019
#> RaceAsianPI           0.026 -0.103  0.152
#> RaceBlack             0.031 -0.094  0.157
#> Racewhite             0.012 -0.112  0.133
#> *HispanicNonHispanic  0.256  0.233  0.278
#> *SmkAnyY             -0.396 -0.442 -0.351
#> *EstMonthConcept2     0.117  0.035  0.202
#> *EstMonthConcept3     0.231  0.100  0.356
#> *EstMonthConcept4     0.369  0.201  0.533
#> *EstMonthConcept5     0.498  0.318  0.681
#> *EstMonthConcept6     0.452  0.275  0.639
#> *EstMonthConcept7     0.387  0.214  0.573
#> *EstMonthConcept8     0.238  0.072  0.420
#> *EstMonthConcept9     0.264  0.099  0.437
#> *EstMonthConcept10    0.159  0.014  0.308
#> *EstMonthConcept11    0.127  0.013  0.237
#> EstMonthConcept12     0.020 -0.054  0.094
#> ---
#> * = CI does not contain zero
#> 
#> DLNM effect:
#> range = [-0.042, 0.059]
#> signal-to-noise = 0.405
#> critical windows: 4-6,10-34 
#> 
#> residual standard errors: 0.004

Exposure-time surface

plot(tdlnm.sum, 
     main = "Plot title", 
     xlab = "Time axis label", 
     ylab = "Exposure-concentration axis label", 
     flab = "Effect color label")

Slicing on exposure-concentration

# slicing on exposure-concentration
plot(tdlnm.sum, plot.type = "slice", val = 1, main = "Slice at concentration 1") 

plot(tdlnm.sum, plot.type = "slice", val = 2, main = "Slice at concentration 2")

Slicing on time lag

# slicing on exposure-concentration
plot(tdlnm.sum, plot.type = "slice", time = 7, main = "Slice at time 7")

plot(tdlnm.sum, plot.type = "slice", time = 15, main = "Slice at time 15")

plot(tdlnm.sum, plot.type = "slice", time = 33, main = "Slice at time 33")