Skip to content

Commit e9759cc

Browse files
committed
Merge branch 'add_rvdss_indicator' into ndefries/rvdss-framework
2 parents f68e335 + 5696636 commit e9759cc

File tree

3 files changed

+20
-9
lines changed

3 files changed

+20
-9
lines changed

src/acquisition/rvdss/constants.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,12 @@
99
"adenovirus": "adv",
1010
"adeno": "adv",
1111
"human metapneumovirus": "hmpv",
12-
"enterovirus/rhinovirus": "evrv",
12+
"enterovirus_rhinovirus": "evrv",
1313
"rhinovirus": "evrv",
1414
"rhv": "evrv",
15-
"entero/rhino": "evrv",
15+
"entero_rhino": "evrv",
1616
"rhino":"evrv",
17-
"ev/rv":"evrv",
17+
"ev_rv":"evrv",
1818
"coronavirus":"hcov",
1919
"coron":"hcov",
2020
"coro":"hcov",
@@ -45,6 +45,7 @@
4545
"atl":"atlantic",
4646
"pr" :"prairies" ,
4747
"terr" :"territories",
48+
"uhn sinai hospital":"uhn mount sinai hospital"
4849
}
4950

5051
# Regions are groups of provinces that are geographically close together. Some single provinces are reported as their own region (e.g. Québec, Ontario).

src/acquisition/rvdss/pull_historic.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -210,8 +210,9 @@ def make_signal_type_spelling_consistent(signal):
210210
combined_pat2 = '|'.join((pat3, pat4))
211211

212212
new_signal = re.sub(combined_pat, "positive_tests",signal)
213-
new_signal = re.sub(combined_pat2, "positive_tests",signal)
214-
new_signal = re.sub("total ", "",signal)
213+
new_signal = re.sub(combined_pat2, "tests",new_signal)
214+
new_signal =re.sub(" *%", "_pct_positive",new_signal)
215+
new_signal = re.sub("total ", "",new_signal)
215216
return(new_signal)
216217

217218
def preprocess_table_columns(table):
@@ -240,6 +241,7 @@ def preprocess_table_columns(table):
240241
table.columns = [re.sub("flutest","flu test", col) for col in table.columns]
241242
table.columns = [re.sub(r"other hpiv","hpivother",t) for t in table.columns]
242243

244+
table.columns=[make_signal_type_spelling_consistent(col) for col in table.columns]
243245
return(table)
244246

245247
def create_detections_table(table,modified_date,week_number,week_end_date,start_year):
@@ -251,9 +253,8 @@ def create_detections_table(table,modified_date,week_number,week_end_date,start_
251253
table["geo_value"]=[re.sub("^province of$","alberta",c) for c in table["geo_value"]]
252254

253255
# make naming consistent
254-
table.columns=[make_signal_type_spelling_consistent(col) for col in table.columns]
255256
table.columns=[add_flu_prefix(col) for col in table.columns]
256-
matches=['test','geo_value']
257+
matches=['test','geo_value','positive']
257258

258259
new_names = []
259260
for i in range(len(table.columns)):
@@ -305,7 +306,6 @@ def create_number_detections_table(table,modified_date,start_year):
305306

306307
def create_percent_positive_detection_table(table,modified_date,start_year, flu=False,overwrite_weeks=False):
307308
table = deduplicate_rows(table)
308-
table.columns=[re.sub(" *%", "_pct_positive",col) for col in table.columns]
309309
table.columns = [re.sub(' +', ' ',col) for col in table.columns]
310310
table.insert(2,"issue",modified_date)
311311
table=table.rename(columns={'week end':"time_value"})

src/acquisition/rvdss/utils.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
from epiweeks import Week
66
from datetime import datetime
77
import math
8+
from unidecode import unidecode
9+
import string
810

911
from delphi.epidata.acquisition.rvdss.constants import (
1012
VIRUSES, GEOS, REGIONS, NATION, LAST_WEEK_OF_YEAR,
@@ -24,11 +26,19 @@ def abbreviate_geo(full_name):
2426
lowercase=re.sub("\.|\*","",lowercase)
2527
lowercase=re.sub("/territoires","",lowercase)
2628
lowercase=re.sub("^cana$","can",lowercase)
29+
lowercase =lowercase.translate(str.maketrans(string.punctuation, ' '*len(string.punctuation),'.'+"'"))
30+
lowercase=re.sub(' +', ' ', lowercase)
31+
32+
new_name=unidecode(lowercase)
33+
new_name=re.sub(' +', ' ', new_name)
2734

2835
keys = (re.escape(k) for k in GEOS.keys())
2936
pattern = re.compile(r'^\b(' + '|'.join(keys) + r')\b$')
3037

31-
result = pattern.sub(lambda x: GEOS[x.group()], lowercase)
38+
result = pattern.sub(lambda x: GEOS[x.group()], new_name)
39+
40+
if result == new_name:
41+
result = lowercase
3242
return(result)
3343

3444
def create_geo_types(geo,default_geo):

0 commit comments

Comments
 (0)