From 3727d5a6019ecbbad6b465eea180bd7ef753d9cf Mon Sep 17 00:00:00 2001 From: Yamina Boukari <87201452+YaminaB@users.noreply.github.com> Date: Mon, 15 Jun 2026 13:58:00 +0000 Subject: [PATCH] adding occ migration coding comparison --- analysis/annual_code_counts.py | 50 +++++++++++++++++++++++++++++++++ annual_code_counts_measures.csv | 18 ++++++++++++ measures.csv | 35 +++++++++++++++++++++++ project.yaml | 6 ++++ 4 files changed, 109 insertions(+) create mode 100644 analysis/annual_code_counts.py create mode 100644 annual_code_counts_measures.csv create mode 100644 measures.csv diff --git a/analysis/annual_code_counts.py b/analysis/annual_code_counts.py new file mode 100644 index 0000000..e02e76a --- /dev/null +++ b/analysis/annual_code_counts.py @@ -0,0 +1,50 @@ +# ############################################################################# +# Annual migration coding +# - Author: Yamina Boukari +# - Bennett Institute for Applied Data Science, University of Oxford, 2025 +############################################################################# + +# This is a script that uses measures to create an annual coding count of all migration-related codes +# in order to compare with this OCC paper: https://bjgpopen.org/content/early/2026/02/20/BJGPO.2025.0138 + +from ehrql import create_measures, INTERVAL +from ehrql.tables.tpp import patients, practice_registrations, clinical_events, addresses +import migration_status_variables +from analysis import utilities +import codelists + +measures = create_measures() +measures.configure_dummy_data(population_size=1000) +measures.configure_disclosure_control(enabled=False) # enable on real data + +common = utilities.build_common_vars(INTERVAL) +measures.define_defaults(intervals=common["intervals"]) + # group_by = common["subgroups"]) + +# denominator is not default because for this I want number of people with a migrant code +# in the given interval ONLY, whereas the default denominator is anyone registered and alive in +# the interval + +date_of_entry_code = ["860021000000109"] + +migrant_codes_and_date_of_uk_entry = codelists.all_migrant_codes + date_of_entry_code + +migration_codes_in_interval_excl_date_of_uk_entry = clinical_events.where( + clinical_events.snomedct_code.is_in(codelists.all_migrant_codes)).where( + clinical_events.date.is_during(INTERVAL)) +# (because the BJGPO paper did not include date of UK entry code) + +migration_codes_in_interval_incl_date_of_uk_entry = clinical_events.where( + clinical_events.snomedct_code.is_in(migrant_codes_and_date_of_uk_entry)).where( + clinical_events.date.is_during(INTERVAL)) + +measures.define_measure( + name="all_migration_codes_excl_date_of_uk_entry", + numerator=migration_codes_in_interval_excl_date_of_uk_entry.count_for_patient(), + denominator=migration_codes_in_interval_excl_date_of_uk_entry.exists_for_patient()) + +measures.define_measure( + name="all_migration_codes_incl_date_of_uk_entry", + numerator=migration_codes_in_interval_incl_date_of_uk_entry.count_for_patient(), + denominator=migration_codes_in_interval_incl_date_of_uk_entry.exists_for_patient()) + diff --git a/annual_code_counts_measures.csv b/annual_code_counts_measures.csv new file mode 100644 index 0000000..dd26ef8 --- /dev/null +++ b/annual_code_counts_measures.csv @@ -0,0 +1,18 @@ +measure,interval_start,interval_end,ratio,numerator,denominator +annual_count,2009-01-01,2009-12-31,0.067,67,1000 +annual_count,2010-01-01,2010-12-31,0.07839195979899498,78,995 +annual_count,2011-01-01,2011-12-31,0.0755873340143003,74,979 +annual_count,2012-01-01,2012-12-31,0.06749740394600208,65,963 +annual_count,2013-01-01,2013-12-31,0.06533192834562697,62,949 +annual_count,2014-01-01,2014-12-31,0.04898828541001065,46,939 +annual_count,2015-01-01,2015-12-31,0.07775377969762419,72,926 +annual_count,2016-01-01,2016-12-31,0.06870229007633588,63,917 +annual_count,2017-01-01,2017-12-31,0.06215316315205328,56,901 +annual_count,2018-01-01,2018-12-31,0.07570621468926554,67,885 +annual_count,2019-01-01,2019-12-31,0.07621247113163972,66,866 +annual_count,2020-01-01,2020-12-31,0.07746478873239436,66,852 +annual_count,2021-01-01,2021-12-31,0.07728894173602854,65,841 +annual_count,2022-01-01,2022-12-31,0.07237635705669482,60,829 +annual_count,2023-01-01,2023-12-31,0.08302808302808302,68,819 +annual_count,2024-01-01,2024-12-31,0.0880893300248139,71,806 +annual_count,2025-01-01,2025-12-31,0.07682619647355164,61,794 diff --git a/measures.csv b/measures.csv new file mode 100644 index 0000000..70ed85e --- /dev/null +++ b/measures.csv @@ -0,0 +1,35 @@ +measure,interval_start,interval_end,ratio,numerator,denominator +all_migration_codes_excl_date_of_uk_entry,2009-01-01,2009-12-31,1.2338709677419355,153,124 +all_migration_codes_excl_date_of_uk_entry,2010-01-01,2010-12-31,1.1904761904761905,150,126 +all_migration_codes_excl_date_of_uk_entry,2011-01-01,2011-12-31,1.1544715447154472,142,123 +all_migration_codes_excl_date_of_uk_entry,2012-01-01,2012-12-31,1.1652173913043478,134,115 +all_migration_codes_excl_date_of_uk_entry,2013-01-01,2013-12-31,1.1967213114754098,146,122 +all_migration_codes_excl_date_of_uk_entry,2014-01-01,2014-12-31,1.1346153846153846,177,156 +all_migration_codes_excl_date_of_uk_entry,2015-01-01,2015-12-31,1.1102941176470589,151,136 +all_migration_codes_excl_date_of_uk_entry,2016-01-01,2016-12-31,1.125,171,152 +all_migration_codes_excl_date_of_uk_entry,2017-01-01,2017-12-31,1.0916030534351144,143,131 +all_migration_codes_excl_date_of_uk_entry,2018-01-01,2018-12-31,1.103448275862069,160,145 +all_migration_codes_excl_date_of_uk_entry,2019-01-01,2019-12-31,1.238709677419355,192,155 +all_migration_codes_excl_date_of_uk_entry,2020-01-01,2020-12-31,1.2121212121212122,200,165 +all_migration_codes_excl_date_of_uk_entry,2021-01-01,2021-12-31,1.3496932515337423,220,163 +all_migration_codes_excl_date_of_uk_entry,2022-01-01,2022-12-31,1.2446808510638299,234,188 +all_migration_codes_excl_date_of_uk_entry,2023-01-01,2023-12-31,1.343915343915344,254,189 +all_migration_codes_excl_date_of_uk_entry,2024-01-01,2024-12-31,1.4751131221719458,326,221 +all_migration_codes_excl_date_of_uk_entry,2025-01-01,2025-12-31,1.6651982378854626,378,227 +all_migration_codes_incl_date_of_uk_entry,2009-01-01,2009-12-31,1.2338709677419355,153,124 +all_migration_codes_incl_date_of_uk_entry,2010-01-01,2010-12-31,1.1904761904761905,150,126 +all_migration_codes_incl_date_of_uk_entry,2011-01-01,2011-12-31,1.1544715447154472,142,123 +all_migration_codes_incl_date_of_uk_entry,2012-01-01,2012-12-31,1.1652173913043478,134,115 +all_migration_codes_incl_date_of_uk_entry,2013-01-01,2013-12-31,1.1967213114754098,146,122 +all_migration_codes_incl_date_of_uk_entry,2014-01-01,2014-12-31,1.1346153846153846,177,156 +all_migration_codes_incl_date_of_uk_entry,2015-01-01,2015-12-31,1.1102941176470589,151,136 +all_migration_codes_incl_date_of_uk_entry,2016-01-01,2016-12-31,1.131578947368421,172,152 +all_migration_codes_incl_date_of_uk_entry,2017-01-01,2017-12-31,1.0916030534351144,143,131 +all_migration_codes_incl_date_of_uk_entry,2018-01-01,2018-12-31,1.103448275862069,160,145 +all_migration_codes_incl_date_of_uk_entry,2019-01-01,2019-12-31,1.238709677419355,192,155 +all_migration_codes_incl_date_of_uk_entry,2020-01-01,2020-12-31,1.2121212121212122,200,165 +all_migration_codes_incl_date_of_uk_entry,2021-01-01,2021-12-31,1.3496932515337423,220,163 +all_migration_codes_incl_date_of_uk_entry,2022-01-01,2022-12-31,1.2446808510638299,234,188 +all_migration_codes_incl_date_of_uk_entry,2023-01-01,2023-12-31,1.343915343915344,254,189 +all_migration_codes_incl_date_of_uk_entry,2024-01-01,2024-12-31,1.4751131221719458,326,221 +all_migration_codes_incl_date_of_uk_entry,2025-01-01,2025-12-31,1.6651982378854626,378,227 diff --git a/project.yaml b/project.yaml index 860c464..13767e1 100644 --- a/project.yaml +++ b/project.yaml @@ -265,4 +265,10 @@ actions: moderately_sensitive: csv: output/tables/date_variable_checks.csv + generate_annual_migration_coding_counts: + run: ehrql:v1 generate-measures analysis/annual_code_counts.py --output output/tables/annual_counts/migration_coding_occ_comparison.csv + outputs: + moderately_sensitive: + csv: output/tables/annual_counts/migration_coding_occ_comparison.csv +