diff --git a/analysis/covid/0_covid_design.R b/analysis/covid/0_covid_design.R index f6ec419..2256843 100644 --- a/analysis/covid/0_covid_design.R +++ b/analysis/covid/0_covid_design.R @@ -192,8 +192,8 @@ approval_lookup <- c( pfizer_BA45 = "2022-09-11", #"2022-09-12"? pfizer_XBB15 = "2023-09-05", pfizer_JN1 = "2024-07-24", - #pfizer_KP2 = "2024-10-10", - #pfizer_KP2_pfs = "2024-10-10", + pfizer_KP2 = "2024-10-10", + pfizer_KP2_pfs = "2024-10-10", #pfizer_unspecified = "2020-12-02", #pfizer_original_children = "2021-12-22", #pfizer_JN1_children = "2024-07-24", @@ -217,6 +217,69 @@ approval_lookup <- c( #valneva = "2022-04-14" ) +# Approval dates come mainly from Table 3 of the ECHO protocol. +campaign_product_lookup <- list( + + "Primary series" = c( + "pfizer_original", # BNT162b2 + "moderna_original", # mRNA-1273 + "az_original" # ChAdOx1-S + ), + + "Autumn 2021" = c( + "pfizer_original", # BNT162b2 + "moderna_original", # mRNA-1273 + "az_original" # ChAdOx1-S + ), + + "Spring 2022" = c( + "pfizer_original", # BNT162b2 + "moderna_original" # mRNA-1273 + ), + + "Autumn 2022" = c( + "pfizer_original", # BNT162b2 + "moderna_original", # mRNA-1273 + "pfizer_BA1", # BNT162b2/BA.1 + "moderna_omicron" # mRNA-1273/BA.1 + ), + + "Spring 2023" = c( + "pfizer_BA45", # BNT162b2/BA.4-5 + "moderna_BA45", # mRNA-1273/BA.4-5 + "sanofigsk_B1" # Vidprevtyn + ), + + "Autumn 2023" = c( + "pfizer_BA45", # BNT162b2/BA.4-5 + "moderna_BA45", # mRNA-1273/BA.4-5 + "pfizer_XBB15", # BNT162b2.XBB.1.5 + "moderna_XBB15", # mRNA-1273.XBB.1.5 + "sanofigsk_B1" # Vidprevtyn + ), + + "Spring 2024" = c( + "pfizer_XBB15", # BNT162b2.XBB.1.5 + "moderna_XBB15" # mRNA-1273.XBB.1.5 + ), + + "Autumn 2024" = c( + "pfizer_JN1", # BNT162b2.JN.1 + "moderna_JN1" # mRNA-1273.JN.1 + ), + + "Spring 2025" = c( + "pfizer_JN1", # BNT162b2.JN.1 + "moderna_JN1" # mRNA-1273.JN.1 + ), + + "Autumn 2025" = c( + "pfizer_KP2", # BNT162b2.KP.2 + "pfizer_KP2_pfs" # BNT162b2.KP.2 + ) +) + + # relabel_from_lookup <- function(x, from, to, source){ # left_join(tibble(x=x), source, by = {{from}})[[{{to}}]] # } diff --git a/analysis/covid/2_covid_data_quality.R b/analysis/covid/2_covid_data_quality.R index f77a58b..d9168b7 100644 --- a/analysis/covid/2_covid_data_quality.R +++ b/analysis/covid/2_covid_data_quality.R @@ -116,7 +116,6 @@ data_vax_ELD <- as_tibble() - # ---- 3.3 Multiple Vaccinations on the Same Day ---- products_cooccurrence_flat <- @@ -137,9 +136,6 @@ products_cooccurrence_flat <- .groups = "drop" ) |> mutate( - flag_same_day_multiple = - total_records_day > 1, - flag_same_day_same_product = total_records_day > 1 & n_products_day == 1, @@ -154,7 +150,6 @@ data_vax_ELD <- select( patient_id, vax_date, total_records_day, n_products_day, product_pattern, - flag_same_day_multiple, flag_same_day_same_product, flag_same_day_mixed_product ), @@ -176,7 +171,16 @@ data_vax_ELD <- data_vax_interval <- data_vax_ELD |> filter(campaign != "Pre-2020-04-23") |> - filter(!flag_same_day_multiple) |> # exclude same-day multiple-record combinations + + # deduplicate same-day same-product records + arrange(patient_id, vax_date, vax_product) |> + group_by(patient_id, vax_date, vax_product) |> + slice(1) |> + ungroup() |> + + # exclude mixed-product records flagged for now + filter(!flag_same_day_mixed_product) |> # may be revised once standard cleaning rules are agreed + arrange(patient_id, vax_date) |> group_by(patient_id) |> mutate( @@ -255,7 +259,6 @@ flag_long_noninterval <- flag_pre_rollout_date, flag_unapproved_product, flag_product_before_approval, - flag_same_day_multiple, flag_same_day_same_product, flag_same_day_mixed_product ) |> @@ -279,6 +282,7 @@ flag_long_noninterval <- table_overall_noninterval_flags_unrounded <- make_summary_table_total( data = flag_long_noninterval, + denom_data = data_vax_ELD, group_vars = c("flag_type"), round = FALSE ) |> @@ -287,6 +291,7 @@ table_overall_noninterval_flags_unrounded <- table_overall_noninterval_flags_rounded <- make_summary_table_total( data = flag_long_noninterval, + denom_data = data_vax_ELD, group_vars = c("flag_type"), round = TRUE, sdc_threshold = sdc_threshold @@ -304,93 +309,113 @@ write_csv( ) -# ---- Table 2: Campaign summary of non-interval flags with vaccination-date-specific active denominators ---- +# ---- Table 2: Campaign x product summary of non-interval flags with vaccination-date-specific active denominators ---- +# Exclude the two pre-rollout categories for campaign/product summaries. +# These early categories are retained in the overall summary only. data_registration_ELD <- read_feather(here("output", "covid", "extract_covid","registrations.arrow")) -table_campaign_noninterval_flags_unrounded <- + +analysis_campaigns <- setdiff( + as.character(campaign_info$campaign_label), + c("Pre-2020-04-23", "Pre-roll-out") +) + +flag_long_noninterval_primary_onwards <- + flag_long_noninterval |> + dplyr::filter(campaign %in% analysis_campaigns) + +data_vax_ELD_primary_onwards <- + data_vax_ELD |> + dplyr::filter(campaign %in% analysis_campaigns) + +table_campaign_product_noninterval_flags_unrounded <- make_summary_table_vaccination_date_specific_active( - flag_data = flag_long_noninterval, - event_data = data_vax_ELD, + flag_data = flag_long_noninterval_primary_onwards, + event_data = data_vax_ELD_primary_onwards, registration_data = data_registration_ELD, + group_vars = c("campaign", "vax_product", "flag_type"), round = FALSE ) |> - arrange(campaign, flag_type) + dplyr::arrange(campaign, vax_product, flag_type) -table_campaign_noninterval_flags_rounded <- +table_campaign_product_noninterval_flags_rounded <- make_summary_table_vaccination_date_specific_active( - flag_data = flag_long_noninterval, - event_data = data_vax_ELD, + flag_data = flag_long_noninterval_primary_onwards, + event_data = data_vax_ELD_primary_onwards, registration_data = data_registration_ELD, + group_vars = c("campaign", "vax_product", "flag_type"), round = TRUE, sdc_threshold = sdc_threshold ) |> - arrange(campaign, flag_type) + dplyr::arrange(campaign, vax_product, flag_type) write_csv( - table_campaign_noninterval_flags_unrounded, - fs::path(output_dir, "count_campaign_noninterval_flags_unrounded.csv") + table_campaign_product_noninterval_flags_unrounded, + fs::path(output_dir, "count_campaign_product_noninterval_flags_unrounded.csv") ) write_csv( - table_campaign_noninterval_flags_rounded, - fs::path(output_dir, "count_campaign_noninterval_flags.csv") + table_campaign_product_noninterval_flags_rounded, + fs::path(output_dir, "count_campaign_product_noninterval_flags.csv") ) - -# ---- Table 3: Product summary of non-interval flags ---- -table_product_noninterval_flags_unrounded <- - make_summary_table_total( - data = flag_long_noninterval, - group_vars = c("vax_product", "flag_type"), +# ---- Table 3: interval context x interval bin ---- +table_interval_context_unrounded <- + make_interval_table( + data = data_vax_interval, + group_var = "interval_context", round = FALSE ) |> - arrange(vax_product, flag_type) + arrange(interval_context, interval_bin) -table_product_noninterval_flags_rounded <- - make_summary_table_total( - data = flag_long_noninterval, - group_vars = c("vax_product", "flag_type"), +table_interval_context_rounded <- + make_interval_table( + data = data_vax_interval, + group_var = "interval_context", round = TRUE, sdc_threshold = sdc_threshold ) |> - arrange(vax_product, flag_type) + arrange(interval_context, interval_bin) write_csv( - table_product_noninterval_flags_unrounded, - fs::path(output_dir, "count_product_noninterval_flags_unrounded.csv") + table_interval_context_unrounded, + fs::path(output_dir, "count_interval_context_unrounded.csv") ) write_csv( - table_product_noninterval_flags_rounded, - fs::path(output_dir, "count_product_noninterval_flags.csv") + table_interval_context_rounded, + fs::path(output_dir, "count_interval_context.csv") ) -# ---- Table 4: interval context x interval bin ---- -table_interval_context_unrounded <- +# ---- Table 4: campaign x interval bin ---- +# Current campaign = campaign of the current vaccination event. +# This shows the interval distribution within each campaign. + +table_interval_campaign_unrounded <- make_interval_table( data = data_vax_interval, - group_var = "interval_context", + group_var = "campaign", round = FALSE ) |> - arrange(interval_context, interval_bin) + arrange(campaign, interval_bin) -table_interval_context_rounded <- +table_interval_campaign_rounded <- make_interval_table( data = data_vax_interval, - group_var = "interval_context", + group_var = "campaign", round = TRUE, sdc_threshold = sdc_threshold ) |> - arrange(interval_context, interval_bin) + arrange(campaign, interval_bin) write_csv( - table_interval_context_unrounded, - fs::path(output_dir, "count_interval_context_unrounded.csv") + table_interval_campaign_unrounded, + fs::path(output_dir, "count_interval_campaign_unrounded.csv") ) write_csv( - table_interval_context_rounded, - fs::path(output_dir, "count_interval_context.csv") + table_interval_campaign_rounded, + fs::path(output_dir, "count_interval_campaign.csv") ) @@ -449,4 +474,46 @@ write_csv( write_csv( table_interval_product_transition_rounded, fs::path(output_dir, "count_interval_product_transition.csv") +) + + +## count same-day mixed-product co-occurrence ---- + +mixed_products_cooccurrence_flat <- + data_vax_ELD |> + filter(flag_same_day_mixed_product) |> + count(patient_id, vax_date, vax_product, name = "n") |> + arrange(patient_id, vax_date, vax_product) |> + group_by(patient_id, vax_date) |> + summarise( + vax_product = + paste0(n, "x ", as.character(vax_product), + collapse = " --AND-- "), + .groups = "drop" + ) + +count_mixed_products_cooccurrence_unrounded <- + mixed_products_cooccurrence_flat |> + group_by(vax_product) |> + summarise( + count_total = n(), + .groups = "drop" + ) |> + arrange(desc(count_total)) |> + as_tibble() + +count_mixed_products_cooccurrence <- + count_mixed_products_cooccurrence_unrounded |> + mutate( + count_total = roundmid_any(count_total, sdc_threshold) + ) + +write_csv( + count_mixed_products_cooccurrence_unrounded, + fs::path(output_dir, "count_same_day_mixed_product_cooccurrence_unrounded.csv") +) + +write_csv( + count_mixed_products_cooccurrence, + fs::path(output_dir, "count_same_day_mixed_product_cooccurrence.csv") ) \ No newline at end of file diff --git a/analysis/covid/fn_covid_data_quality.R b/analysis/covid/fn_covid_data_quality.R index d8728f1..70516d0 100644 --- a/analysis/covid/fn_covid_data_quality.R +++ b/analysis/covid/fn_covid_data_quality.R @@ -21,7 +21,7 @@ roundmid_any <- function(x, to = 1) { # 2. Summary table functions ---- # ---- helper A: summary table with total denominator only ---- -make_summary_table_total <- function(data, group_vars, round = FALSE, sdc_threshold = NULL) { +make_summary_table_total <- function(data, group_vars, denom_data, round = FALSE, sdc_threshold = NULL) { # function to optionally round values round_fun <- function(x) { @@ -31,8 +31,8 @@ make_summary_table_total <- function(data, group_vars, round = FALSE, sdc_thresh # choose column suffix suffix <- if (round) "_midpoint10" else "" - denom_records_total <- round_fun(nrow(data)) - denom_patients_total <- round_fun(dplyr::n_distinct(data$patient_id)) + denom_records_total <- round_fun(nrow(denom_data)) + denom_patients_total <- round_fun(dplyr::n_distinct(denom_data$patient_id)) out <- data |> @@ -66,6 +66,7 @@ make_summary_table_vaccination_date_specific_active <- function( flag_data, event_data, registration_data, + group_vars, round = FALSE, sdc_threshold = NULL ) { @@ -154,7 +155,9 @@ make_summary_table_vaccination_date_specific_active <- function( # numerator numerator_df <- flag_data |> - dplyr::group_by(campaign, flag_type) |> + dplyr::group_by( + dplyr::across(all_of(group_vars)) + ) |> dplyr::summarise( n_records = round_fun(dplyr::n()), n_patients = round_fun(dplyr::n_distinct(patient_id)), @@ -176,7 +179,7 @@ make_summary_table_vaccination_date_specific_active <- function( } out |> - dplyr::select(campaign, flag_type, dplyr::everything()) + dplyr::select(all_of(group_vars), dplyr::everything()) }