Workflow for analyses

library(stype)
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(riskimator)
set.seed(122)

Workflows as part of a data.frame

ctimes <- list(
   v_continuous_nonneg(c(5, 6, 10, Inf, 1, Inf, 19), 
                internal_name = "cA"),
   v_continuous_nonneg(c(4, 1, 15, Inf, Inf, Inf, 21), 
                internal_name = "cB")
)

otimes <- list(
  v_continuous_nonneg(c(2, 6, 11, 12, Inf, Inf, 25),
               internal_name = "oA"),
  v_continuous_nonneg(c(1, Inf, 10, Inf, Inf, Inf, 23), 
               internal_name = "oB")
)

est_cph <- function(x, df){
  cph <- survival::coxph(
    formula = as_Surv(x, censor_as_event = TRUE) ~ as_canonical(x3),
    data = df)
  exp(-predict(cph, type = "expected"))
}

weight_rcensored_outcomes <- function(dt){
  over_map(dt, rcensored_l %.% outcome_l, function(x) {
    over(x, data_summary_l, function(ds) { 
      c(ds, list(crisk = cumrisk(x, w = est_cph, df = dt)))
    })
  })
}

smd_covariates_by_treatments <- function(dt){

  # update data summary for each covariate tagged with baseline
  over_map(dt, covariate_l %.% tag_l("baseline"), function(z) {
      
    # compute SMD for each exposure variable in the dataset
    smds <- purrr::map(
      .x = view(dt, exposure_l),
      .f = ~ smd::smd(x = as_canonical(z), g = as_canonical(.x)) 
    )
    
    # update the data_summary
    over(z, data_summary_l, function(d) { c(d, smds = list(smds)) })
  })
}



df <- 
tibble(
  x1 = v_rcensored(outcomes = otimes, censors = ctimes, end_time = 15,
                  context  = context(purpose = purpose(study_role = "outcome"))),
  x2 = v_rcensored(outcomes = otimes[[1]], end_time = 15,
                   context  = context(purpose = purpose(study_role = "outcome"))),
  
  x3 = v_binary(as.logical(rbinom(7, 1, prob = 0.5)),
                context  = context(purpose = purpose(study_role = "covariate",
                                                     tags = "baseline"))),
  
  x4 = v_nominal(factor(LETTERS[rbinom(7, 1, prob = 0.5) + 1L]),
                context  = context(purpose = purpose(study_role = "exposure"))),
  
  x5 = v_binary(as.logical(rbinom(7, 1, prob = 0.5)),
                context  = context(purpose = purpose(study_role = "covariate",
                                                     tags = "baseline"))),
  
  x6 = v_nominal(factor(LETTERS[rbinom(7, 1, prob = 0.5) + 4L]),
                context  = context(purpose = purpose(study_role = "exposure"))),
)

dt <- analysis(
  df,
  modifiers = list(weight_rcensored_outcomes, smd_covariates_by_treatments)
)
#> Warning in coxph.fit(X, Y, istrat, offset, init, control, weights = weights, :
#> Loglik converged before variable 1 ; coefficient may be infinite.


dt %>% get_data_summary()
#> An object of class  "data_summary" 
#> $n_obs
#> [1] 7
#> 
#> $n_col
#> [1] 6
#> 
#> $n_stypes
#> [1] 6
#> 
#> $n_tagged
#> [1] 2
dt[1:6, ] %>% get_data_summary()
#> Warning in coxph.fit(X, Y, istrat, offset, init, control, weights = weights, :
#> Loglik converged before variable 1 ; coefficient may be infinite.
#> An object of class  "data_summary" 
#> $n_obs
#> [1] 6
#> 
#> $n_col
#> [1] 6
#> 
#> $n_stypes
#> [1] 6
#> 
#> $n_tagged
#> [1] 2

view(df$x1, data_summary_l)
#> An object of class  "data_summary" 
#> $n
#> [1] 7
#> 
#> $has_missing
#> [1] FALSE
#> 
#> $n_nonmissing
#> [1] 7
#> 
#> $n_missing
#> [1] 0
#> 
#> $proportion_missing
#> [1] 0
#> 
#> $is_constant
#> [1] FALSE
#> 
#> $person_time
#> [1] 55
#> 
#> $n_events
#> [1] 3
#> 
#> $outcome_reasons
#> x
#>   oA   oB <NA> 
#>    1    2    4 
#> 
#> $n_censored
#> [1] 2
#> 
#> $censor_reasons
#> x
#>   cA   cB <NA> 
#>    1    1    5 
#> 
#> $eair
#> [1] 0.05454545
#> 
#> $eair_variance
#> [1] 0.006923789
view(dt$x1, data_summary_l)
#> $n
#> [1] 7
#> 
#> $has_missing
#> [1] FALSE
#> 
#> $n_nonmissing
#> [1] 7
#> 
#> $n_missing
#> [1] 0
#> 
#> $proportion_missing
#> [1] 0
#> 
#> $is_constant
#> [1] FALSE
#> 
#> $person_time
#> [1] 55
#> 
#> $n_events
#> [1] 3
#> 
#> $outcome_reasons
#> x
#>   oA   oB <NA> 
#>    1    2    4 
#> 
#> $n_censored
#> [1] 2
#> 
#> $censor_reasons
#> x
#>   cA   cB <NA> 
#>    1    1    5 
#> 
#> $eair
#> [1] 0.05454545
#> 
#> $eair_variance
#> [1] 0.006923789
#> 
#> $crisk
#> $crisk$time
#> [1]  1 10 12
#> 
#> $crisk$estimate
#> [1] 0.2240446 0.3669017 0.5097589
view(dt[1:6, ]$x1, data_summary_l)
#> Warning in coxph.fit(X, Y, istrat, offset, init, control, weights = weights, :
#> Loglik converged before variable 1 ; coefficient may be infinite.
#> $n
#> [1] 6
#> 
#> $has_missing
#> [1] FALSE
#> 
#> $n_nonmissing
#> [1] 6
#> 
#> $n_missing
#> [1] 0
#> 
#> $proportion_missing
#> [1] 0
#> 
#> $is_constant
#> [1] FALSE
#> 
#> $person_time
#> [1] 40
#> 
#> $n_events
#> [1] 3
#> 
#> $outcome_reasons
#> x
#>   oA   oB <NA> 
#>    1    2    3 
#> 
#> $n_censored
#> [1] 2
#> 
#> $censor_reasons
#> x
#>   cA   cB <NA> 
#>    1    1    4 
#> 
#> $eair
#> [1] 0.075
#> 
#> $eair_variance
#> [1] 0.0099225
#> 
#> $crisk
#> $crisk$time
#> [1]  1 10 12
#> 
#> $crisk$estimate
#> [1] 0.2986670 0.4653336 0.6320003
view(dt$x2, data_summary_l)
#> $n
#> [1] 7
#> 
#> $has_missing
#> [1] FALSE
#> 
#> $n_nonmissing
#> [1] 7
#> 
#> $n_missing
#> [1] 0
#> 
#> $proportion_missing
#> [1] 0
#> 
#> $is_constant
#> [1] FALSE
#> 
#> $person_time
#> [1] 76
#> 
#> $n_events
#> [1] 4
#> 
#> $outcome_reasons
#> x
#>   oA <NA> 
#>    4    3 
#> 
#> $n_censored
#> [1] 0
#> 
#> $censor_reasons
#> x
#>    1 <NA> 
#>    0    7 
#> 
#> $eair
#> [1] 0.05263158
#> 
#> $eair_variance
#> [1] 0.004880097
#> 
#> $crisk
#> $crisk$time
#> [1]  2  6 11 12
#> 
#> $crisk$estimate
#> [1] 0.1428571 0.2857143 0.4285714 0.5714286
view(dt$x3, data_summary_l)
#> $n
#> [1] 7
#> 
#> $has_missing
#> [1] FALSE
#> 
#> $n_nonmissing
#> [1] 7
#> 
#> $n_missing
#> [1] 0
#> 
#> $proportion_missing
#> [1] 0
#> 
#> $is_constant
#> [1] FALSE
#> 
#> $num_0
#> [1] 2
#> 
#> $num_1
#> [1] 5
#> 
#> $proportion
#> [1] 0.7142857
#> 
#> $variance
#> [1] 0.2380952
#> 
#> $smds
#> $smds$x4
#>   term estimate
#> 1    B 1.414214
#> 
#> $smds$x6
#>   term  estimate
#> 1    E 0.6625892
view(dt[1:5, ]$x5, data_summary_l)
#> Warning in coxph.fit(X, Y, istrat, offset, init, control, weights = weights, :
#> Ran out of iterations and did not converge
#> Warning in coxph.fit(X, Y, istrat, offset, init, control, weights = weights, :
#> Ran out of iterations and did not converge
#> $n
#> [1] 5
#> 
#> $has_missing
#> [1] FALSE
#> 
#> $n_nonmissing
#> [1] 5
#> 
#> $n_missing
#> [1] 0
#> 
#> $proportion_missing
#> [1] 0
#> 
#> $is_constant
#> [1] FALSE
#> 
#> $num_0
#> [1] 3
#> 
#> $num_1
#> [1] 2
#> 
#> $proportion
#> [1] 0.4
#> 
#> $variance
#> [1] 0.3
#> 
#> $smds
#> $smds$x4
#>   term   estimate
#> 1    B -0.3429972
#> 
#> $smds$x6
#>   term estimate
#> 1    E        0

2022-08-25

Workflows as part of a data.frame