Skip to content

Commit 0be5f08

Browse files
committed
combine different spellings of labs
1 parent 31ec961 commit 0be5f08

File tree

2 files changed

+12
-1
lines changed

2 files changed

+12
-1
lines changed

src/acquisition/rvdss/constants.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
"atl":"atlantic",
4444
"pr" :"prairies" ,
4545
"terr" :"territories",
46+
"uhn sinai hospital":"uhn mount sinai hospital"
4647
}
4748

4849
# Regions are groups of provinces that are geographically close together. Some single provinces are reported as their own region (e.g. Québec, Ontario).

src/acquisition/rvdss/utils.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
from epiweeks import Week
66
from datetime import datetime
77
import math
8+
from unidecode import unidecode
9+
import string
810

911
from delphi.epidata.acquisition.rvdss.constants import (
1012
VIRUSES, GEOS, REGIONS, NATION, LAST_WEEK_OF_YEAR,
@@ -24,11 +26,19 @@ def abbreviate_geo(full_name):
2426
lowercase=re.sub("\.|\*","",lowercase)
2527
lowercase=re.sub("/territoires","",lowercase)
2628
lowercase=re.sub("^cana$","can",lowercase)
29+
lowercase =lowercase.translate(str.maketrans(string.punctuation, ' '*len(string.punctuation),'.'+"'"))
30+
lowercase=re.sub(' +', ' ', lowercase)
31+
32+
new_name=unidecode(lowercase)
33+
new_name=re.sub(' +', ' ', new_name)
2734

2835
keys = (re.escape(k) for k in GEOS.keys())
2936
pattern = re.compile(r'^\b(' + '|'.join(keys) + r')\b$')
3037

31-
result = pattern.sub(lambda x: GEOS[x.group()], lowercase)
38+
result = pattern.sub(lambda x: GEOS[x.group()], new_name)
39+
40+
if result == new_name:
41+
result = lowercase
3242
return(result)
3343

3444
def create_geo_types(geo,default_geo):

0 commit comments

Comments
 (0)