Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 65 additions & 2 deletions analysis/covid/0_covid_design.R
Original file line number Diff line number Diff line change
Expand Up @@ -192,8 +192,8 @@ approval_lookup <- c(
pfizer_BA45 = "2022-09-11", #"2022-09-12"?
pfizer_XBB15 = "2023-09-05",
pfizer_JN1 = "2024-07-24",
#pfizer_KP2 = "2024-10-10",
#pfizer_KP2_pfs = "2024-10-10",
pfizer_KP2 = "2024-10-10",
pfizer_KP2_pfs = "2024-10-10",
#pfizer_unspecified = "2020-12-02",
#pfizer_original_children = "2021-12-22",
#pfizer_JN1_children = "2024-07-24",
Expand All @@ -217,6 +217,69 @@ approval_lookup <- c(
#valneva = "2022-04-14"
)

# Approval dates come mainly from Table 3 of the ECHO protocol.
campaign_product_lookup <- list(

"Primary series" = c(
"pfizer_original", # BNT162b2
"moderna_original", # mRNA-1273
"az_original" # ChAdOx1-S
),

"Autumn 2021" = c(
"pfizer_original", # BNT162b2
"moderna_original", # mRNA-1273
"az_original" # ChAdOx1-S
),

"Spring 2022" = c(
"pfizer_original", # BNT162b2
"moderna_original" # mRNA-1273
),

"Autumn 2022" = c(
"pfizer_original", # BNT162b2
"moderna_original", # mRNA-1273
"pfizer_BA1", # BNT162b2/BA.1
"moderna_omicron" # mRNA-1273/BA.1
),

"Spring 2023" = c(
"pfizer_BA45", # BNT162b2/BA.4-5
"moderna_BA45", # mRNA-1273/BA.4-5
"sanofigsk_B1" # Vidprevtyn
),

"Autumn 2023" = c(
"pfizer_BA45", # BNT162b2/BA.4-5
"moderna_BA45", # mRNA-1273/BA.4-5
"pfizer_XBB15", # BNT162b2.XBB.1.5
"moderna_XBB15", # mRNA-1273.XBB.1.5
"sanofigsk_B1" # Vidprevtyn
),

"Spring 2024" = c(
"pfizer_XBB15", # BNT162b2.XBB.1.5
"moderna_XBB15" # mRNA-1273.XBB.1.5
),

"Autumn 2024" = c(
"pfizer_JN1", # BNT162b2.JN.1
"moderna_JN1" # mRNA-1273.JN.1
),

"Spring 2025" = c(
"pfizer_JN1", # BNT162b2.JN.1
"moderna_JN1" # mRNA-1273.JN.1
),

"Autumn 2025" = c(
"pfizer_KP2", # BNT162b2.KP.2
"pfizer_KP2_pfs" # BNT162b2.KP.2
)
)


# relabel_from_lookup <- function(x, from, to, source){
# left_join(tibble(x=x), source, by = {{from}})[[{{to}}]]
# }
161 changes: 114 additions & 47 deletions analysis/covid/2_covid_data_quality.R
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,6 @@ data_vax_ELD <-
as_tibble()



# ---- 3.3 Multiple Vaccinations on the Same Day ----

products_cooccurrence_flat <-
Expand All @@ -137,9 +136,6 @@ products_cooccurrence_flat <-
.groups = "drop"
) |>
mutate(
flag_same_day_multiple =
total_records_day > 1,

flag_same_day_same_product =
total_records_day > 1 & n_products_day == 1,

Expand All @@ -154,7 +150,6 @@ data_vax_ELD <-
select(
patient_id, vax_date,
total_records_day, n_products_day, product_pattern,
flag_same_day_multiple,
flag_same_day_same_product,
flag_same_day_mixed_product
),
Expand All @@ -176,7 +171,16 @@ data_vax_ELD <-
data_vax_interval <-
data_vax_ELD |>
filter(campaign != "Pre-2020-04-23") |>
filter(!flag_same_day_multiple) |> # exclude same-day multiple-record combinations

# deduplicate same-day same-product records
arrange(patient_id, vax_date, vax_product) |>
group_by(patient_id, vax_date, vax_product) |>
slice(1) |>
ungroup() |>

# exclude mixed-product records flagged for now
filter(!flag_same_day_mixed_product) |> # may be revised once standard cleaning rules are agreed

arrange(patient_id, vax_date) |>
group_by(patient_id) |>
mutate(
Expand Down Expand Up @@ -255,7 +259,6 @@ flag_long_noninterval <-
flag_pre_rollout_date,
flag_unapproved_product,
flag_product_before_approval,
flag_same_day_multiple,
flag_same_day_same_product,
flag_same_day_mixed_product
) |>
Expand All @@ -279,6 +282,7 @@ flag_long_noninterval <-
table_overall_noninterval_flags_unrounded <-
make_summary_table_total(
data = flag_long_noninterval,
denom_data = data_vax_ELD,
group_vars = c("flag_type"),
round = FALSE
) |>
Expand All @@ -287,6 +291,7 @@ table_overall_noninterval_flags_unrounded <-
table_overall_noninterval_flags_rounded <-
make_summary_table_total(
data = flag_long_noninterval,
denom_data = data_vax_ELD,
group_vars = c("flag_type"),
round = TRUE,
sdc_threshold = sdc_threshold
Expand All @@ -304,93 +309,113 @@ write_csv(
)


# ---- Table 2: Campaign summary of non-interval flags with vaccination-date-specific active denominators ----
# ---- Table 2: Campaign x product summary of non-interval flags with vaccination-date-specific active denominators ----
# Exclude the two pre-rollout categories for campaign/product summaries.
# These early categories are retained in the overall summary only.
data_registration_ELD <- read_feather(here("output", "covid", "extract_covid","registrations.arrow"))
table_campaign_noninterval_flags_unrounded <-

analysis_campaigns <- setdiff(
as.character(campaign_info$campaign_label),
c("Pre-2020-04-23", "Pre-roll-out")
)

flag_long_noninterval_primary_onwards <-
flag_long_noninterval |>
dplyr::filter(campaign %in% analysis_campaigns)

data_vax_ELD_primary_onwards <-
data_vax_ELD |>
dplyr::filter(campaign %in% analysis_campaigns)

table_campaign_product_noninterval_flags_unrounded <-
make_summary_table_vaccination_date_specific_active(
flag_data = flag_long_noninterval,
event_data = data_vax_ELD,
flag_data = flag_long_noninterval_primary_onwards,
event_data = data_vax_ELD_primary_onwards,
registration_data = data_registration_ELD,
group_vars = c("campaign", "vax_product", "flag_type"),
round = FALSE
) |>
arrange(campaign, flag_type)
dplyr::arrange(campaign, vax_product, flag_type)

table_campaign_noninterval_flags_rounded <-
table_campaign_product_noninterval_flags_rounded <-
make_summary_table_vaccination_date_specific_active(
flag_data = flag_long_noninterval,
event_data = data_vax_ELD,
flag_data = flag_long_noninterval_primary_onwards,
event_data = data_vax_ELD_primary_onwards,
registration_data = data_registration_ELD,
group_vars = c("campaign", "vax_product", "flag_type"),
round = TRUE,
sdc_threshold = sdc_threshold
) |>
arrange(campaign, flag_type)
dplyr::arrange(campaign, vax_product, flag_type)

write_csv(
table_campaign_noninterval_flags_unrounded,
fs::path(output_dir, "count_campaign_noninterval_flags_unrounded.csv")
table_campaign_product_noninterval_flags_unrounded,
fs::path(output_dir, "count_campaign_product_noninterval_flags_unrounded.csv")
)

write_csv(
table_campaign_noninterval_flags_rounded,
fs::path(output_dir, "count_campaign_noninterval_flags.csv")
table_campaign_product_noninterval_flags_rounded,
fs::path(output_dir, "count_campaign_product_noninterval_flags.csv")
)


# ---- Table 3: Product summary of non-interval flags ----
table_product_noninterval_flags_unrounded <-
make_summary_table_total(
data = flag_long_noninterval,
group_vars = c("vax_product", "flag_type"),
# ---- Table 3: interval context x interval bin ----
table_interval_context_unrounded <-
make_interval_table(
data = data_vax_interval,
group_var = "interval_context",
round = FALSE
) |>
arrange(vax_product, flag_type)
arrange(interval_context, interval_bin)

table_product_noninterval_flags_rounded <-
make_summary_table_total(
data = flag_long_noninterval,
group_vars = c("vax_product", "flag_type"),
table_interval_context_rounded <-
make_interval_table(
data = data_vax_interval,
group_var = "interval_context",
round = TRUE,
sdc_threshold = sdc_threshold
) |>
arrange(vax_product, flag_type)
arrange(interval_context, interval_bin)

write_csv(
table_product_noninterval_flags_unrounded,
fs::path(output_dir, "count_product_noninterval_flags_unrounded.csv")
table_interval_context_unrounded,
fs::path(output_dir, "count_interval_context_unrounded.csv")
)

write_csv(
table_product_noninterval_flags_rounded,
fs::path(output_dir, "count_product_noninterval_flags.csv")
table_interval_context_rounded,
fs::path(output_dir, "count_interval_context.csv")
)


# ---- Table 4: interval context x interval bin ----
table_interval_context_unrounded <-
# ---- Table 4: campaign x interval bin ----
# Current campaign = campaign of the current vaccination event.
# This shows the interval distribution within each campaign.

table_interval_campaign_unrounded <-
make_interval_table(
data = data_vax_interval,
group_var = "interval_context",
group_var = "campaign",
round = FALSE
) |>
arrange(interval_context, interval_bin)
arrange(campaign, interval_bin)

table_interval_context_rounded <-
table_interval_campaign_rounded <-
make_interval_table(
data = data_vax_interval,
group_var = "interval_context",
group_var = "campaign",
round = TRUE,
sdc_threshold = sdc_threshold
) |>
arrange(interval_context, interval_bin)
arrange(campaign, interval_bin)

write_csv(
table_interval_context_unrounded,
fs::path(output_dir, "count_interval_context_unrounded.csv")
table_interval_campaign_unrounded,
fs::path(output_dir, "count_interval_campaign_unrounded.csv")
)

write_csv(
table_interval_context_rounded,
fs::path(output_dir, "count_interval_context.csv")
table_interval_campaign_rounded,
fs::path(output_dir, "count_interval_campaign.csv")
)


Expand Down Expand Up @@ -449,4 +474,46 @@ write_csv(
write_csv(
table_interval_product_transition_rounded,
fs::path(output_dir, "count_interval_product_transition.csv")
)


## count same-day mixed-product co-occurrence ----

mixed_products_cooccurrence_flat <-
data_vax_ELD |>
filter(flag_same_day_mixed_product) |>
count(patient_id, vax_date, vax_product, name = "n") |>
arrange(patient_id, vax_date, vax_product) |>
group_by(patient_id, vax_date) |>
summarise(
vax_product =
paste0(n, "x ", as.character(vax_product),
collapse = " --AND-- "),
.groups = "drop"
)

count_mixed_products_cooccurrence_unrounded <-
mixed_products_cooccurrence_flat |>
group_by(vax_product) |>
summarise(
count_total = n(),
.groups = "drop"
) |>
arrange(desc(count_total)) |>
as_tibble()

count_mixed_products_cooccurrence <-
count_mixed_products_cooccurrence_unrounded |>
mutate(
count_total = roundmid_any(count_total, sdc_threshold)
)

write_csv(
count_mixed_products_cooccurrence_unrounded,
fs::path(output_dir, "count_same_day_mixed_product_cooccurrence_unrounded.csv")
)

write_csv(
count_mixed_products_cooccurrence,
fs::path(output_dir, "count_same_day_mixed_product_cooccurrence.csv")
)
13 changes: 8 additions & 5 deletions analysis/covid/fn_covid_data_quality.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ roundmid_any <- function(x, to = 1) {
# 2. Summary table functions ----

# ---- helper A: summary table with total denominator only ----
make_summary_table_total <- function(data, group_vars, round = FALSE, sdc_threshold = NULL) {
make_summary_table_total <- function(data, group_vars, denom_data, round = FALSE, sdc_threshold = NULL) {

# function to optionally round values
round_fun <- function(x) {
Expand All @@ -31,8 +31,8 @@ make_summary_table_total <- function(data, group_vars, round = FALSE, sdc_thresh
# choose column suffix
suffix <- if (round) "_midpoint10" else ""

denom_records_total <- round_fun(nrow(data))
denom_patients_total <- round_fun(dplyr::n_distinct(data$patient_id))
denom_records_total <- round_fun(nrow(denom_data))
denom_patients_total <- round_fun(dplyr::n_distinct(denom_data$patient_id))

out <-
data |>
Expand Down Expand Up @@ -66,6 +66,7 @@ make_summary_table_vaccination_date_specific_active <- function(
flag_data,
event_data,
registration_data,
group_vars,
round = FALSE,
sdc_threshold = NULL
) {
Expand Down Expand Up @@ -154,7 +155,9 @@ make_summary_table_vaccination_date_specific_active <- function(
# numerator
numerator_df <-
flag_data |>
dplyr::group_by(campaign, flag_type) |>
dplyr::group_by(
dplyr::across(all_of(group_vars))
) |>
dplyr::summarise(
n_records = round_fun(dplyr::n()),
n_patients = round_fun(dplyr::n_distinct(patient_id)),
Expand All @@ -176,7 +179,7 @@ make_summary_table_vaccination_date_specific_active <- function(
}

out |>
dplyr::select(campaign, flag_type, dplyr::everything())
dplyr::select(all_of(group_vars), dplyr::everything())
}


Expand Down