library(stype)
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
library(lenses)
weight
ing a stype
to update the
data_summary
The weight
function updates a the
data_summary
of a stype
vector with a
weight.
stype
vector
x <- v_binary(c(TRUE, TRUE, TRUE, FALSE))
w <- c(10, 10, 1, 1)
get_data_summary(x)
#> An object of class "data_summary"
#> $n
#> [1] 4
#>
#> $has_missing
#> [1] FALSE
#>
#> $n_nonmissing
#> [1] 4
#>
#> $n_missing
#> [1] 0
#>
#> $proportion_missing
#> [1] 0
#>
#> $is_constant
#> [1] FALSE
#>
#> $num_0
#> [1] 1
#>
#> $num_1
#> [1] 3
#>
#> $proportion
#> [1] 0.75
#>
#> $variance
#> [1] 0.25
z <- weight(x, w)
class(w)
#> [1] "numeric"
get_data_summary(z)
#> An object of class "data_summary"
#> $n
#> [1] 4
#>
#> $has_missing
#> [1] FALSE
#>
#> $n_nonmissing
#> [1] 4
#>
#> $n_missing
#> [1] 0
#>
#> $proportion_missing
#> [1] 0
#>
#> $is_constant
#> [1] FALSE
#>
#> $num_0
#> [1] 1
#>
#> $num_1
#> [1] 3
#>
#> $proportion
#> [1] 0.75
#>
#> $variance
#> [1] 0.25
#>
#> $weighted_proportion
#> [1] 0.9545455
However, the weights are not part of the data, thus when subsetting
the vector the data_summary
no longer includes the weighted
summaries:
get_data_summary(z[1:2])
#> An object of class "data_summary"
#> $n
#> [1] 2
#>
#> $has_missing
#> [1] FALSE
#>
#> $n_nonmissing
#> [1] 2
#>
#> $n_missing
#> [1] 0
#>
#> $proportion_missing
#> [1] 0
#>
#> $is_constant
#> [1] TRUE
#>
#> $num_0
#> [1] 0
#>
#> $num_1
#> [1] 2
#>
#> $proportion
#> [1] 1
#>
#> $variance
#> [1] 0
stype
vectors
Here, we use lenses
to weight
only certain
variables within a tibble
.
weight_var_l <- weight_l %.% lenses::index_l(1) %.% vec_data_l
otcm <- context(purpose = purpose(study_role = "outcome"))
# note the different ways of setting the context
dt <- tibble::tibble(
x1 = v_binary(c(TRUE, FALSE, TRUE), context = otcm),
x2 = v_binary(c(FALSE, TRUE, FALSE)) %>% set(context_l, otcm),
x3 = v_continuous(c(1.5, 1.6, 0.1), context = otcm),
x4 = v_continuous(c(1.5, 1.6, 0.1)) %>%
set(context_l, context(purpose = purpose(study_role = "weight"))),
)
tail(view(dt[[1]], data_summary_l), 2)
#> $proportion
#> [1] 0.6666667
#>
#> $variance
#> [1] 0.3333333
# Just weight binary variables
dt1 <- over_map(dt, outcome_l %.% binary_l, function(x){
weight(x, view(dt, weight_var_l))
})
dt1
#> # A tibble: 3 × 4
#> x1 x2 x3 x4
#> <bnry> <bnry> <cont> <cont>
#> 1 1 0 1.5 1.5
#> 2 0 1 1.6 1.6
#> 3 1 0 0.1 0.1
tail(view(dt1[[1]], data_summary_l), 2)
#> $variance
#> [1] 0.3333333
#>
#> $weighted_proportion
#> [1] 0.5
tail(view(dt1[[3]], data_summary_l), 2)
#> $max
#> [1] 1.6
#>
#> $qtiles
#> 1% 2.5% 5% 10% 25% 50% 75% 90% 95% 97.5% 99%
#> 0.128 0.170 0.240 0.380 0.800 1.500 1.550 1.580 1.590 1.595 1.598
# Weight all outcomes
dt2 <- over_map(dt, outcome_l, function(x){
weight(x, view(dt, weight_var_l))
})
tail(view(dt2[[1]], data_summary_l), 2)
#> $variance
#> [1] 0.3333333
#>
#> $weighted_proportion
#> [1] 0.5
tail(view(dt2[[3]], data_summary_l), 2)
#> $qtiles
#> 1% 2.5% 5% 10% 25% 50% 75% 90% 95% 97.5% 99%
#> 0.128 0.170 0.240 0.380 0.800 1.500 1.550 1.580 1.590 1.595 1.598
#>
#> $weighted_mean
#> [1] 1.50625
ctimes <- list(
v_continuous_nonneg(c(5, 6, 10, Inf, 1, Inf, 19),
internal_name = "cA"),
v_continuous_nonneg(c(4, 1, 15, Inf, Inf, Inf, 21),
internal_name = "cB")
)
otimes <- list(
v_continuous_nonneg(c(2, 6, 11, 12, Inf, Inf, 25),
internal_name = "oA"),
v_continuous_nonneg(c(1, Inf, 10, Inf, Inf, Inf, 23),
internal_name = "oB")
)
x1 <- v_rcensored(
outcomes = otimes, censors = ctimes, end_time = 15,
extra_descriptors = list(
crisk = function(x) {
list( riskimator::cumrisk(x, w = riskimator::product_limit) )
} ))
get_data_summary(x1, "crisk")
#> $time
#> [1] 1 10 12
#>
#> $estimate
#> [1] 0.2 0.4 0.6
get_data_summary(x1[c(3,5,6)], "crisk")
#> $time
#> [1] 10
#>
#> $estimate
#> [1] 0.5
library(riskimator)
dt <- tibble(
x1 = v_rcensored(outcomes = otimes, censors = ctimes, end_time = 15),
x2 = v_rcensored(outcomes = otimes[[1]], end_time = 15),
x3 = v_binary(as.logical(rbinom(7, 1, prob = 0.5)),
context = context(purpose = purpose(study_role = "covariate"))),
)
est_cph <- function(x, df){
cph <- survival::coxph(
formula = as_Surv(x, censor_as_event = TRUE) ~ as_canonical(x3),
data = df)
exp(-predict(cph, type = "expected"))
}
weight_rcensored_outcomes <- function(dt){
over_map(dt, rcensored_l, function(x) {
over(x, data_summary_l, function(ds) {
c(ds, list(crisk = cumrisk(x, w = est_cph, df = dt)))
})
})
}
dt2 <- weight_rcensored_outcomes(dt)
view(dt2$x1, data_summary_l)
#> $n
#> [1] 7
#>
#> $has_missing
#> [1] FALSE
#>
#> $n_nonmissing
#> [1] 7
#>
#> $n_missing
#> [1] 0
#>
#> $proportion_missing
#> [1] 0
#>
#> $is_constant
#> [1] FALSE
#>
#> $person_time
#> [1] 55
#>
#> $n_events
#> [1] 3
#>
#> $outcome_reasons
#> x
#> oA oB <NA>
#> 1 2 4
#>
#> $n_censored
#> [1] 2
#>
#> $censor_reasons
#> x
#> cA cB <NA>
#> 1 1 5
#>
#> $eair
#> [1] 0.05454545
#>
#> $eair_variance
#> [1] 0.006923789
#>
#> $crisk
#> $crisk$time
#> [1] 1 10 12
#>
#> $crisk$estimate
#> [1] 0.1867779 0.3928251 0.5988723