-
Notifications
You must be signed in to change notification settings - Fork 15
/
Copy pathanalysis.R
212 lines (174 loc) · 7.23 KB
/
analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
## ------------------------------------------------------------------------------------------
##
## Script name: Corticosteroids Main Analysis
##
## Purpose of script: Show an example of code to run an analysis for a
## dynamic treatment regime for a time-varying exposure and time-dependent confounders
## for a time-to-event outcome.
##
## Author: Katherine Hoffman
##
## Date Created: 2022-05-22
##
## Author: Katherine Hoffman, 2022
## Email: [email protected]
##
## ------------------------------------------------------------------------------------------
##
## Notes: The data set of n=500 patients for time=(1,2,...,14) contains realistic values for patients but
## is a toy data set and cannot be used to obtain the result in the paper due to
## data-sharing restrictions. The interventions are 1: 6 days of corticosteroids at time of
## hypoxia vs. no corticosteroids
##
## ------------------------------------------------------------------------------------------
## system set up
options(java.parameters = '-Xmx2500m') # expand Java RAM for BART to run on a cluster computer
set.seed(7)
op <- options(nwarnings = 10000) ## <- get "full statistics" of warnings
## load necessary packages:
# install.packages(c("tidyverse","earth","BART","glmnet"))
# devtools::install_github("nt-williams/lmtp@sl3") # sl3 compatible branch (faster)
# devtools::install_github("tlverse/sl3")
library(tidyverse)
library(lmtp) # note that this code uses sl3 version
library(future)
library(sl3)
plan(sequential) # change depending on parallelization preferences, see ?future for details
## -----------------------------------------------------------------------------------------
## load data
# wide-format, already pre-processed (see img/analytical_file.png and README.MD)
dat_lmtp <- read_rds(here::here("data/dat_demo.rds"))
## ----------------------------------------------------------------------------------------
## write intervention functions
# treatment intervention 1:
# when a patient becomes hypoxic, administer steroids for 6 days
# H is the day of severe hypoxia indicator variable created in data pre-processing
# this function takes in dataframe and treatment variable name (A_00, A_01, etc.)
# then if corresponding hypoxia indicator (H_00, H_01, etc.) is 1, set steroids to 1 for the next 6 days, otherwise, 0
int_steroids_after_hypoxia <- function(dat, trt) {
trt_day <- parse_number(trt) # first, get the number of the treatment day of interest
# we want to check the previous 6 days for hypoxia
earliest_day_to_check <- max(0,trt_day-5) # subtracting 5 will get us previous six days
# this generates the relevant H_* column names for the hypoxia days we need to check
hypoxia_days_to_check <- paste0("H_",
str_pad(earliest_day_to_check:trt_day,
width=2,
side="left",
pad="0")
)
# we'll set up a count variable to check when hypoxia was reached
sum <- 0
for(i in hypoxia_days_to_check){
add <- ifelse(dat[[i]] == 1, 1, 0)
sum <- sum + add
}
# if hypoxia was reached in one those (max) 6 days before treatment to check, return 1 for that treatment day
return(ifelse(sum > 0, 1, 0)) # otherwise, return 0
}
# treatment intervention 2:
# never steroids - always return 0 for treatment
int_no_steroids <- function(data, trt) {
data[[trt]] <- 0 # just change the entire treatment vector of interest to 0
data[[trt]] # and return
}
## ------------------------------------------------------------------------------------------
## set up superlearner candidate learners (to run through {sl3} within lmtp)
mars_grid_params <- list( # manually create a grid of MARS learners
degree = c(2,3),
penalty = c(1,2,3)
)
mars_grid <- expand.grid(mars_grid_params, KEEP.OUT.ATTRS = FALSE)
mars_learners <- apply(mars_grid, MARGIN = 1, function(tuning_params) {
do.call(Lrnr_earth$new, as.list(tuning_params))
})
# initiate other super learner candidates
lrn_lasso <- Lrnr_glmnet$new(alpha = 1)
lrn_ridge <- Lrnr_glmnet$new(alpha = 0)
lrn_enet <- Lrnr_glmnet$new(alpha = 0.5)
lrn_bart <- Lrnr_bartMachine$new()
lrn_mean <- Lrnr_mean$new()
learners <- unlist(list(
#mars_learners, # don't include commented out libraries in demo code due to time
lrn_lasso,
#lrn_ridge,
#lrn_enet,
#lrn_bart,
lrn_mean
),
recursive = TRUE
)
lrnrs <- make_learner(Stack, learners) # stack all learners together, see sl3 documentation
## -----------------------------------------------------------------------------------------
## set LMTP parameters of outcome, censoring treatment, and confounders
outcome_day <- 14 # half the outcome time frame (for computational time purposes)
padded_days <- str_pad(0:(outcome_day-1), 2, pad = "0")
padded_days_out <- str_pad(1:outcome_day, 2, pad = "0")
a <- paste0("A_", padded_days) # names of treatment cols
bs <- dat_lmtp |> # names of baseline covariates cols
select(red_cap_source, age, sex, bmi, cad, home_o2_yn, dm, htn, cva) |> # subset of baseline confounders from paper
names()
y <- paste0("Y_",padded_days_out) # names of outcome cols
censoring <- paste0("C_",padded_days) # names of censoring cols (1 = observed at next time point)
used_letters <- dat_lmtp |> # names of time varying covariates
select(starts_with("L_"),
starts_with("H_"),
-ends_with(paste0("_",outcome_day))) |>
names()
tv <- map(0:(outcome_day - 1), function(x) { # time varying covariates must be a list
used_letters[str_detect(used_letters, str_pad(x, 2, pad="0"))]
})
## ----------------------------------------------------
## miscellaneous LMTP function parameters
trim <- .995 # quantile range for trimming ranges, if necessary
folds <- 5 # cross-fitting folds (5 for time, 10 in paper)
SL_folds <- 5 # cross-validation folds for superlearning (5 for time, 10 in paper)
k <- 1 # we assume that data from k days previously is sufficient for the patient's covariate history
# this is 2 in the paper
## -----------------------------------------------------------------------------------------
## fit LMTP results objects -- note that estimate is for survival, not incidence rate
# intervention 1
res_steroid <-
progressr::with_progress(
lmtp_sdr(
dat_lmtp_fix_censoring,
trt = a,
outcome = y,
baseline = bs,
time_vary = tv,
cens = censoring,
shift = int_steroids_after_hypoxia,
outcome_type = "survival",
learners_outcome = lrnrs,
learners_trt = lrnrs,
folds = folds,
.SL_folds = SL_folds,
.trim = trim,
k=k,
intervention_type = "dynamic"
)
)
# intervention 2
res_no_steroid <-
progressr::with_progress(
lmtp_sdr(
dat_lmtp,
trt = a,
outcome = y,
baseline = bs,
time_vary = tv,
cens = censoring,
shift = int_no_steroids,
outcome_type = "survival",
learners_outcome = lrnrs,
learners_trt = lrnrs,
folds = folds,
.SL_folds = SL_folds,
.trim = trim,
k=k,
intervention_type = "static"
)
)
## -----------------------------------------------------------------------------------------
## save results
saveRDS(res_steroid, file = here::here("output/res_steroid.rds"))
saveRDS(res_no_steroid, file = here::here("output/res_no_steroid.rds"))