Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 50 additions & 0 deletions analysis/annual_code_counts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# #############################################################################
# Annual migration coding
# - Author: Yamina Boukari
# - Bennett Institute for Applied Data Science, University of Oxford, 2025
#############################################################################

# This is a script that uses measures to create an annual coding count of all migration-related codes
# in order to compare with this OCC paper: https://bjgpopen.org/content/early/2026/02/20/BJGPO.2025.0138

from ehrql import create_measures, INTERVAL
from ehrql.tables.tpp import patients, practice_registrations, clinical_events, addresses
import migration_status_variables
from analysis import utilities
import codelists

measures = create_measures()
measures.configure_dummy_data(population_size=1000)
measures.configure_disclosure_control(enabled=False) # enable on real data

common = utilities.build_common_vars(INTERVAL)
measures.define_defaults(intervals=common["intervals"])
# group_by = common["subgroups"])

# denominator is not default because for this I want number of people with a migrant code
# in the given interval ONLY, whereas the default denominator is anyone registered and alive in
# the interval

date_of_entry_code = ["860021000000109"]

migrant_codes_and_date_of_uk_entry = codelists.all_migrant_codes + date_of_entry_code

migration_codes_in_interval_excl_date_of_uk_entry = clinical_events.where(
clinical_events.snomedct_code.is_in(codelists.all_migrant_codes)).where(
clinical_events.date.is_during(INTERVAL))
# (because the BJGPO paper did not include date of UK entry code)

migration_codes_in_interval_incl_date_of_uk_entry = clinical_events.where(
clinical_events.snomedct_code.is_in(migrant_codes_and_date_of_uk_entry)).where(
clinical_events.date.is_during(INTERVAL))

measures.define_measure(
name="all_migration_codes_excl_date_of_uk_entry",
numerator=migration_codes_in_interval_excl_date_of_uk_entry.count_for_patient(),
denominator=migration_codes_in_interval_excl_date_of_uk_entry.exists_for_patient())

measures.define_measure(
name="all_migration_codes_incl_date_of_uk_entry",
numerator=migration_codes_in_interval_incl_date_of_uk_entry.count_for_patient(),
denominator=migration_codes_in_interval_incl_date_of_uk_entry.exists_for_patient())

18 changes: 18 additions & 0 deletions annual_code_counts_measures.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
measure,interval_start,interval_end,ratio,numerator,denominator
annual_count,2009-01-01,2009-12-31,0.067,67,1000
annual_count,2010-01-01,2010-12-31,0.07839195979899498,78,995
annual_count,2011-01-01,2011-12-31,0.0755873340143003,74,979
annual_count,2012-01-01,2012-12-31,0.06749740394600208,65,963
annual_count,2013-01-01,2013-12-31,0.06533192834562697,62,949
annual_count,2014-01-01,2014-12-31,0.04898828541001065,46,939
annual_count,2015-01-01,2015-12-31,0.07775377969762419,72,926
annual_count,2016-01-01,2016-12-31,0.06870229007633588,63,917
annual_count,2017-01-01,2017-12-31,0.06215316315205328,56,901
annual_count,2018-01-01,2018-12-31,0.07570621468926554,67,885
annual_count,2019-01-01,2019-12-31,0.07621247113163972,66,866
annual_count,2020-01-01,2020-12-31,0.07746478873239436,66,852
annual_count,2021-01-01,2021-12-31,0.07728894173602854,65,841
annual_count,2022-01-01,2022-12-31,0.07237635705669482,60,829
annual_count,2023-01-01,2023-12-31,0.08302808302808302,68,819
annual_count,2024-01-01,2024-12-31,0.0880893300248139,71,806
annual_count,2025-01-01,2025-12-31,0.07682619647355164,61,794
35 changes: 35 additions & 0 deletions measures.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
measure,interval_start,interval_end,ratio,numerator,denominator
all_migration_codes_excl_date_of_uk_entry,2009-01-01,2009-12-31,1.2338709677419355,153,124
all_migration_codes_excl_date_of_uk_entry,2010-01-01,2010-12-31,1.1904761904761905,150,126
all_migration_codes_excl_date_of_uk_entry,2011-01-01,2011-12-31,1.1544715447154472,142,123
all_migration_codes_excl_date_of_uk_entry,2012-01-01,2012-12-31,1.1652173913043478,134,115
all_migration_codes_excl_date_of_uk_entry,2013-01-01,2013-12-31,1.1967213114754098,146,122
all_migration_codes_excl_date_of_uk_entry,2014-01-01,2014-12-31,1.1346153846153846,177,156
all_migration_codes_excl_date_of_uk_entry,2015-01-01,2015-12-31,1.1102941176470589,151,136
all_migration_codes_excl_date_of_uk_entry,2016-01-01,2016-12-31,1.125,171,152
all_migration_codes_excl_date_of_uk_entry,2017-01-01,2017-12-31,1.0916030534351144,143,131
all_migration_codes_excl_date_of_uk_entry,2018-01-01,2018-12-31,1.103448275862069,160,145
all_migration_codes_excl_date_of_uk_entry,2019-01-01,2019-12-31,1.238709677419355,192,155
all_migration_codes_excl_date_of_uk_entry,2020-01-01,2020-12-31,1.2121212121212122,200,165
all_migration_codes_excl_date_of_uk_entry,2021-01-01,2021-12-31,1.3496932515337423,220,163
all_migration_codes_excl_date_of_uk_entry,2022-01-01,2022-12-31,1.2446808510638299,234,188
all_migration_codes_excl_date_of_uk_entry,2023-01-01,2023-12-31,1.343915343915344,254,189
all_migration_codes_excl_date_of_uk_entry,2024-01-01,2024-12-31,1.4751131221719458,326,221
all_migration_codes_excl_date_of_uk_entry,2025-01-01,2025-12-31,1.6651982378854626,378,227
all_migration_codes_incl_date_of_uk_entry,2009-01-01,2009-12-31,1.2338709677419355,153,124
all_migration_codes_incl_date_of_uk_entry,2010-01-01,2010-12-31,1.1904761904761905,150,126
all_migration_codes_incl_date_of_uk_entry,2011-01-01,2011-12-31,1.1544715447154472,142,123
all_migration_codes_incl_date_of_uk_entry,2012-01-01,2012-12-31,1.1652173913043478,134,115
all_migration_codes_incl_date_of_uk_entry,2013-01-01,2013-12-31,1.1967213114754098,146,122
all_migration_codes_incl_date_of_uk_entry,2014-01-01,2014-12-31,1.1346153846153846,177,156
all_migration_codes_incl_date_of_uk_entry,2015-01-01,2015-12-31,1.1102941176470589,151,136
all_migration_codes_incl_date_of_uk_entry,2016-01-01,2016-12-31,1.131578947368421,172,152
all_migration_codes_incl_date_of_uk_entry,2017-01-01,2017-12-31,1.0916030534351144,143,131
all_migration_codes_incl_date_of_uk_entry,2018-01-01,2018-12-31,1.103448275862069,160,145
all_migration_codes_incl_date_of_uk_entry,2019-01-01,2019-12-31,1.238709677419355,192,155
all_migration_codes_incl_date_of_uk_entry,2020-01-01,2020-12-31,1.2121212121212122,200,165
all_migration_codes_incl_date_of_uk_entry,2021-01-01,2021-12-31,1.3496932515337423,220,163
all_migration_codes_incl_date_of_uk_entry,2022-01-01,2022-12-31,1.2446808510638299,234,188
all_migration_codes_incl_date_of_uk_entry,2023-01-01,2023-12-31,1.343915343915344,254,189
all_migration_codes_incl_date_of_uk_entry,2024-01-01,2024-12-31,1.4751131221719458,326,221
all_migration_codes_incl_date_of_uk_entry,2025-01-01,2025-12-31,1.6651982378854626,378,227
6 changes: 6 additions & 0 deletions project.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -265,4 +265,10 @@ actions:
moderately_sensitive:
csv: output/tables/date_variable_checks.csv

generate_annual_migration_coding_counts:
run: ehrql:v1 generate-measures analysis/annual_code_counts.py --output output/tables/annual_counts/migration_coding_occ_comparison.csv
outputs:
moderately_sensitive:
csv: output/tables/annual_counts/migration_coding_occ_comparison.csv