From a8aed367758db5cee92351841c78608664ddd66d Mon Sep 17 00:00:00 2001 From: qiangao7 Date: Tue, 19 May 2026 10:18:11 +0000 Subject: [PATCH 1/7] Add KP2 to approval flow; introduce campaign_product_lookup; --- analysis/covid/0_covid_design.R | 67 ++++++++++++++++++++++++++++++++- 1 file changed, 65 insertions(+), 2 deletions(-) diff --git a/analysis/covid/0_covid_design.R b/analysis/covid/0_covid_design.R index f6ec419..2256843 100644 --- a/analysis/covid/0_covid_design.R +++ b/analysis/covid/0_covid_design.R @@ -192,8 +192,8 @@ approval_lookup <- c( pfizer_BA45 = "2022-09-11", #"2022-09-12"? pfizer_XBB15 = "2023-09-05", pfizer_JN1 = "2024-07-24", - #pfizer_KP2 = "2024-10-10", - #pfizer_KP2_pfs = "2024-10-10", + pfizer_KP2 = "2024-10-10", + pfizer_KP2_pfs = "2024-10-10", #pfizer_unspecified = "2020-12-02", #pfizer_original_children = "2021-12-22", #pfizer_JN1_children = "2024-07-24", @@ -217,6 +217,69 @@ approval_lookup <- c( #valneva = "2022-04-14" ) +# Approval dates come mainly from Table 3 of the ECHO protocol. +campaign_product_lookup <- list( + + "Primary series" = c( + "pfizer_original", # BNT162b2 + "moderna_original", # mRNA-1273 + "az_original" # ChAdOx1-S + ), + + "Autumn 2021" = c( + "pfizer_original", # BNT162b2 + "moderna_original", # mRNA-1273 + "az_original" # ChAdOx1-S + ), + + "Spring 2022" = c( + "pfizer_original", # BNT162b2 + "moderna_original" # mRNA-1273 + ), + + "Autumn 2022" = c( + "pfizer_original", # BNT162b2 + "moderna_original", # mRNA-1273 + "pfizer_BA1", # BNT162b2/BA.1 + "moderna_omicron" # mRNA-1273/BA.1 + ), + + "Spring 2023" = c( + "pfizer_BA45", # BNT162b2/BA.4-5 + "moderna_BA45", # mRNA-1273/BA.4-5 + "sanofigsk_B1" # Vidprevtyn + ), + + "Autumn 2023" = c( + "pfizer_BA45", # BNT162b2/BA.4-5 + "moderna_BA45", # mRNA-1273/BA.4-5 + "pfizer_XBB15", # BNT162b2.XBB.1.5 + "moderna_XBB15", # mRNA-1273.XBB.1.5 + "sanofigsk_B1" # Vidprevtyn + ), + + "Spring 2024" = c( + "pfizer_XBB15", # BNT162b2.XBB.1.5 + "moderna_XBB15" # mRNA-1273.XBB.1.5 + ), + + "Autumn 2024" = c( + "pfizer_JN1", # BNT162b2.JN.1 + "moderna_JN1" # mRNA-1273.JN.1 + ), + + "Spring 2025" = c( + "pfizer_JN1", # BNT162b2.JN.1 + "moderna_JN1" # mRNA-1273.JN.1 + ), + + "Autumn 2025" = c( + "pfizer_KP2", # BNT162b2.KP.2 + "pfizer_KP2_pfs" # BNT162b2.KP.2 + ) +) + + # relabel_from_lookup <- function(x, from, to, source){ # left_join(tibble(x=x), source, by = {{from}})[[{{to}}]] # } From 4ebb3b05bc8362557e170dacec166fa13d76127e Mon Sep 17 00:00:00 2001 From: qiangao7 Date: Tue, 19 May 2026 10:19:35 +0000 Subject: [PATCH 2/7] =?UTF-8?q?Remove=20separate=20product=20and=20campaig?= =?UTF-8?q?n=20tables;=20build=20combined=20product=C3=97campaign=20table.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- analysis/covid/2_covid_data_quality.R | 71 +++++++++++--------------- analysis/covid/fn_covid_data_quality.R | 7 ++- 2 files changed, 35 insertions(+), 43 deletions(-) diff --git a/analysis/covid/2_covid_data_quality.R b/analysis/covid/2_covid_data_quality.R index f77a58b..46b8f39 100644 --- a/analysis/covid/2_covid_data_quality.R +++ b/analysis/covid/2_covid_data_quality.R @@ -304,64 +304,53 @@ write_csv( ) -# ---- Table 2: Campaign summary of non-interval flags with vaccination-date-specific active denominators ---- +# ---- Table 2: Campaign x product summary of non-interval flags with vaccination-date-specific active denominators ---- +# Exclude the two pre-rollout categories for campaign/product summaries. +# These early categories are retained in the overall summary only. data_registration_ELD <- read_feather(here("output", "covid", "extract_covid","registrations.arrow")) -table_campaign_noninterval_flags_unrounded <- - make_summary_table_vaccination_date_specific_active( - flag_data = flag_long_noninterval, - event_data = data_vax_ELD, - registration_data = data_registration_ELD, - round = FALSE - ) |> - arrange(campaign, flag_type) -table_campaign_noninterval_flags_rounded <- - make_summary_table_vaccination_date_specific_active( - flag_data = flag_long_noninterval, - event_data = data_vax_ELD, - registration_data = data_registration_ELD, - round = TRUE, - sdc_threshold = sdc_threshold - ) |> - arrange(campaign, flag_type) - -write_csv( - table_campaign_noninterval_flags_unrounded, - fs::path(output_dir, "count_campaign_noninterval_flags_unrounded.csv") +analysis_campaigns <- setdiff( + as.character(campaign_info$campaign_label), + c("Pre-2020-04-23", "Pre-roll-out") ) -write_csv( - table_campaign_noninterval_flags_rounded, - fs::path(output_dir, "count_campaign_noninterval_flags.csv") -) +flag_long_noninterval_primary_onwards <- + flag_long_noninterval |> + dplyr::filter(campaign %in% analysis_campaigns) +data_vax_ELD_primary_onwards <- + data_vax_ELD |> + dplyr::filter(campaign %in% analysis_campaigns) -# ---- Table 3: Product summary of non-interval flags ---- -table_product_noninterval_flags_unrounded <- - make_summary_table_total( - data = flag_long_noninterval, - group_vars = c("vax_product", "flag_type"), +table_campaign_product_noninterval_flags_unrounded <- + make_summary_table_vaccination_date_specific_active( + flag_data = flag_long_noninterval_primary_onwards, + event_data = data_vax_ELD_primary_onwards, + registration_data = data_registration_ELD, + group_vars = c("campaign", "vax_product", "flag_type"), round = FALSE ) |> - arrange(vax_product, flag_type) + dplyr::arrange(campaign, vax_product, flag_type) -table_product_noninterval_flags_rounded <- - make_summary_table_total( - data = flag_long_noninterval, - group_vars = c("vax_product", "flag_type"), +table_campaign_product_noninterval_flags_rounded <- + make_summary_table_vaccination_date_specific_active( + flag_data = flag_long_noninterval_primary_onwards, + event_data = data_vax_ELD_primary_onwards, + registration_data = data_registration_ELD, + group_vars = c("campaign", "vax_product", "flag_type"), round = TRUE, sdc_threshold = sdc_threshold ) |> - arrange(vax_product, flag_type) + dplyr::arrange(campaign, vax_product, flag_type) write_csv( - table_product_noninterval_flags_unrounded, - fs::path(output_dir, "count_product_noninterval_flags_unrounded.csv") + table_campaign_product_noninterval_flags_unrounded, + fs::path(output_dir, "count_campaign_product_noninterval_flags_unrounded.csv") ) write_csv( - table_product_noninterval_flags_rounded, - fs::path(output_dir, "count_product_noninterval_flags.csv") + table_campaign_product_noninterval_flags_rounded, + fs::path(output_dir, "count_campaign_product_noninterval_flags.csv") ) diff --git a/analysis/covid/fn_covid_data_quality.R b/analysis/covid/fn_covid_data_quality.R index d8728f1..418c34f 100644 --- a/analysis/covid/fn_covid_data_quality.R +++ b/analysis/covid/fn_covid_data_quality.R @@ -66,6 +66,7 @@ make_summary_table_vaccination_date_specific_active <- function( flag_data, event_data, registration_data, + group_vars, round = FALSE, sdc_threshold = NULL ) { @@ -154,7 +155,9 @@ make_summary_table_vaccination_date_specific_active <- function( # numerator numerator_df <- flag_data |> - dplyr::group_by(campaign, flag_type) |> + dplyr::group_by( + dplyr::across(all_of(group_vars)) + ) |> dplyr::summarise( n_records = round_fun(dplyr::n()), n_patients = round_fun(dplyr::n_distinct(patient_id)), @@ -176,7 +179,7 @@ make_summary_table_vaccination_date_specific_active <- function( } out |> - dplyr::select(campaign, flag_type, dplyr::everything()) + dplyr::select(all_of(group_vars), dplyr::everything()) } From 11bee9da9594cd3a4f97f0873b84b2c22c73b2bc Mon Sep 17 00:00:00 2001 From: qiangao7 Date: Tue, 19 May 2026 10:21:21 +0000 Subject: [PATCH 3/7] Remove redundant flag (same-day multiple product) --- analysis/covid/2_covid_data_quality.R | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/analysis/covid/2_covid_data_quality.R b/analysis/covid/2_covid_data_quality.R index 46b8f39..c549197 100644 --- a/analysis/covid/2_covid_data_quality.R +++ b/analysis/covid/2_covid_data_quality.R @@ -116,7 +116,6 @@ data_vax_ELD <- as_tibble() - # ---- 3.3 Multiple Vaccinations on the Same Day ---- products_cooccurrence_flat <- @@ -137,9 +136,6 @@ products_cooccurrence_flat <- .groups = "drop" ) |> mutate( - flag_same_day_multiple = - total_records_day > 1, - flag_same_day_same_product = total_records_day > 1 & n_products_day == 1, @@ -154,7 +150,6 @@ data_vax_ELD <- select( patient_id, vax_date, total_records_day, n_products_day, product_pattern, - flag_same_day_multiple, flag_same_day_same_product, flag_same_day_mixed_product ), @@ -176,7 +171,8 @@ data_vax_ELD <- data_vax_interval <- data_vax_ELD |> filter(campaign != "Pre-2020-04-23") |> - filter(!flag_same_day_multiple) |> # exclude same-day multiple-record combinations + filter(!flag_same_day_same_product) |> # exclude same-day multiple-record combinations + filter(!flag_same_day_same_product) |> # exclude same-day multiple-record combinations arrange(patient_id, vax_date) |> group_by(patient_id) |> mutate( @@ -255,7 +251,6 @@ flag_long_noninterval <- flag_pre_rollout_date, flag_unapproved_product, flag_product_before_approval, - flag_same_day_multiple, flag_same_day_same_product, flag_same_day_mixed_product ) |> From 62082d70699dc1c624239a3fd4bcbbb69b23afb1 Mon Sep 17 00:00:00 2001 From: qiangao7 Date: Tue, 19 May 2026 10:22:02 +0000 Subject: [PATCH 4/7] Update flag total to records total (5.11 report used early-year total due to time constraints for re-output and checks). --- analysis/covid/2_covid_data_quality.R | 2 ++ analysis/covid/fn_covid_data_quality.R | 6 +++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/analysis/covid/2_covid_data_quality.R b/analysis/covid/2_covid_data_quality.R index c549197..481cfef 100644 --- a/analysis/covid/2_covid_data_quality.R +++ b/analysis/covid/2_covid_data_quality.R @@ -274,6 +274,7 @@ flag_long_noninterval <- table_overall_noninterval_flags_unrounded <- make_summary_table_total( data = flag_long_noninterval, + denom_data = data_vax_ELD, group_vars = c("flag_type"), round = FALSE ) |> @@ -282,6 +283,7 @@ table_overall_noninterval_flags_unrounded <- table_overall_noninterval_flags_rounded <- make_summary_table_total( data = flag_long_noninterval, + denom_data = data_vax_ELD, group_vars = c("flag_type"), round = TRUE, sdc_threshold = sdc_threshold diff --git a/analysis/covid/fn_covid_data_quality.R b/analysis/covid/fn_covid_data_quality.R index 418c34f..70516d0 100644 --- a/analysis/covid/fn_covid_data_quality.R +++ b/analysis/covid/fn_covid_data_quality.R @@ -21,7 +21,7 @@ roundmid_any <- function(x, to = 1) { # 2. Summary table functions ---- # ---- helper A: summary table with total denominator only ---- -make_summary_table_total <- function(data, group_vars, round = FALSE, sdc_threshold = NULL) { +make_summary_table_total <- function(data, group_vars, denom_data, round = FALSE, sdc_threshold = NULL) { # function to optionally round values round_fun <- function(x) { @@ -31,8 +31,8 @@ make_summary_table_total <- function(data, group_vars, round = FALSE, sdc_thresh # choose column suffix suffix <- if (round) "_midpoint10" else "" - denom_records_total <- round_fun(nrow(data)) - denom_patients_total <- round_fun(dplyr::n_distinct(data$patient_id)) + denom_records_total <- round_fun(nrow(denom_data)) + denom_patients_total <- round_fun(dplyr::n_distinct(denom_data$patient_id)) out <- data |> From 4510106fad0a99ef5f4da079f6c7296f4e247c96 Mon Sep 17 00:00:00 2001 From: qiangao7 Date: Tue, 19 May 2026 10:22:33 +0000 Subject: [PATCH 5/7] =?UTF-8?q?Generate=20campaign=20=C3=97=20interval=20b?= =?UTF-8?q?in=20table.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- analysis/covid/2_covid_data_quality.R | 35 +++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/analysis/covid/2_covid_data_quality.R b/analysis/covid/2_covid_data_quality.R index 481cfef..480f783 100644 --- a/analysis/covid/2_covid_data_quality.R +++ b/analysis/covid/2_covid_data_quality.R @@ -350,8 +350,7 @@ write_csv( fs::path(output_dir, "count_campaign_product_noninterval_flags.csv") ) - -# ---- Table 4: interval context x interval bin ---- +# ---- Table 3: interval context x interval bin ---- table_interval_context_unrounded <- make_interval_table( data = data_vax_interval, @@ -380,6 +379,38 @@ write_csv( ) +# ---- Table 4: campaign x interval bin ---- +# Current campaign = campaign of the current vaccination event. +# This shows the interval distribution within each campaign. + +table_interval_campaign_unrounded <- + make_interval_table( + data = data_vax_interval, + group_var = "campaign", + round = FALSE + ) |> + arrange(campaign, interval_bin) + +table_interval_campaign_rounded <- + make_interval_table( + data = data_vax_interval, + group_var = "campaign", + round = TRUE, + sdc_threshold = sdc_threshold + ) |> + arrange(campaign, interval_bin) + +write_csv( + table_interval_campaign_unrounded, + fs::path(output_dir, "count_interval_campaign_unrounded.csv") +) + +write_csv( + table_interval_campaign_rounded, + fs::path(output_dir, "count_interval_campaign.csv") +) + + # ---- Table 5: campaign transition type x interval bin ---- table_interval_campaign_transition_unrounded <- make_interval_table( From 2893c7a19057d79c9661d0f675f5ceb3e1ccace8 Mon Sep 17 00:00:00 2001 From: qiangao7 Date: Wed, 20 May 2026 13:06:19 +0000 Subject: [PATCH 6/7] Keep only one per same product and remove all mixed products first --- analysis/covid/2_covid_data_quality.R | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/analysis/covid/2_covid_data_quality.R b/analysis/covid/2_covid_data_quality.R index 480f783..719dd4b 100644 --- a/analysis/covid/2_covid_data_quality.R +++ b/analysis/covid/2_covid_data_quality.R @@ -171,8 +171,16 @@ data_vax_ELD <- data_vax_interval <- data_vax_ELD |> filter(campaign != "Pre-2020-04-23") |> - filter(!flag_same_day_same_product) |> # exclude same-day multiple-record combinations - filter(!flag_same_day_same_product) |> # exclude same-day multiple-record combinations + + # deduplicate same-day same-product records + arrange(patient_id, vax_date, vax_product) |> + group_by(patient_id, vax_date, vax_product) |> + slice(1) |> + ungroup() |> + + # exclude mixed-product records flagged for now + filter(!flag_same_day_mixed_product) |> # may be revised once standard cleaning rules are agreed + arrange(patient_id, vax_date) |> group_by(patient_id) |> mutate( From 42444f8343255136fad74ba8ccdb0652012eed0b Mon Sep 17 00:00:00 2001 From: qiangao7 Date: Wed, 20 May 2026 13:06:51 +0000 Subject: [PATCH 7/7] Create a table to analyze patterns of mixed products --- analysis/covid/2_covid_data_quality.R | 42 +++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/analysis/covid/2_covid_data_quality.R b/analysis/covid/2_covid_data_quality.R index 719dd4b..d9168b7 100644 --- a/analysis/covid/2_covid_data_quality.R +++ b/analysis/covid/2_covid_data_quality.R @@ -474,4 +474,46 @@ write_csv( write_csv( table_interval_product_transition_rounded, fs::path(output_dir, "count_interval_product_transition.csv") +) + + +## count same-day mixed-product co-occurrence ---- + +mixed_products_cooccurrence_flat <- + data_vax_ELD |> + filter(flag_same_day_mixed_product) |> + count(patient_id, vax_date, vax_product, name = "n") |> + arrange(patient_id, vax_date, vax_product) |> + group_by(patient_id, vax_date) |> + summarise( + vax_product = + paste0(n, "x ", as.character(vax_product), + collapse = " --AND-- "), + .groups = "drop" + ) + +count_mixed_products_cooccurrence_unrounded <- + mixed_products_cooccurrence_flat |> + group_by(vax_product) |> + summarise( + count_total = n(), + .groups = "drop" + ) |> + arrange(desc(count_total)) |> + as_tibble() + +count_mixed_products_cooccurrence <- + count_mixed_products_cooccurrence_unrounded |> + mutate( + count_total = roundmid_any(count_total, sdc_threshold) + ) + +write_csv( + count_mixed_products_cooccurrence_unrounded, + fs::path(output_dir, "count_same_day_mixed_product_cooccurrence_unrounded.csv") +) + +write_csv( + count_mixed_products_cooccurrence, + fs::path(output_dir, "count_same_day_mixed_product_cooccurrence.csv") ) \ No newline at end of file