Skip to content

Commit 2563cd4

Browse files
committed
Add extra NA string and fix combining table function
1 parent 47f5210 commit 2563cd4

File tree

3 files changed

+44
-16
lines changed

3 files changed

+44
-16
lines changed

src/acquisition/rvdss/pull_historic.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -405,7 +405,7 @@ def fetch_one_season_from_report(url):
405405

406406
# Read table, coding all the abbreviations for missing data into NA
407407
# Also use dropna because removing footers causes the html to have an empty row
408-
na_values = ['N.A.','N.A', 'N.C.','N.R.','Not Available','Not Tested',"not available",
408+
na_values = ['N.A.','N.A', 'N.C.','N.R.','N.R','Not Available','Not Tested',"not available",
409409
"not tested","N.D.","-",'Not tested','non testé']
410410
table = pd.read_html(StringIO(str(tab)),na_values=na_values)[0].dropna(how="all")
411411

src/acquisition/rvdss/run.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -59,16 +59,17 @@ def update_historical_data(logger):
5959
for tt in table_types.keys():
6060
# Merge tables together from dashboards and reports for each table type.
6161
dashboard_data = [elem.get(tt, pd.DataFrame()) for elem in dashboard_dict_list] # a list of all the dashboard tables
62-
report_data = report_dict_list.get(tt, None) # a list of the report table
62+
report_data = [elem.get(tt, pd.DataFrame()) for elem in report_dict_list] # a list of the report table
6363

6464
all_dashboard_tables = pd.concat(dashboard_data)
65-
66-
if all_dashboard_tables.empty == False and report_data.empty == False:
65+
all_report_data = pd.concat(report_data)
66+
67+
if all_dashboard_tables.empty == False and all_report_data.empty == False:
6768
all_dashboard_tables=all_dashboard_tables.reset_index()
6869
all_dashboard_tables=all_dashboard_tables.set_index(['epiweek', 'time_value', 'issue', 'geo_type', 'geo_value'])
6970

70-
hist_dict_list[tt] = pd.concat([report_data, all_dashboard_tables])
71-
71+
hist_dict_list[tt] = pd.concat([all_report_data, all_dashboard_tables])
72+
7273
# Combine all rables into a single table
7374
data = combine_tables(hist_dict_list)
7475

src/acquisition/rvdss/utils.py

Lines changed: 37 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -247,36 +247,60 @@ def get_detections_data(base_url,headers,update_date):
247247
def expand_detections_columns(new_data):
248248
# add extra columns - percent positivities
249249
if "adv_positive_tests" in new_data.columns and "adv_tests" in new_data.columns:
250-
new_data["adv_pct_positive"] = new_data["adv_positive_tests"]/new_data["adv_tests"]*100
250+
new_data["adv_pct_positive"] = np.divide(new_data["adv_positive_tests"], new_data["adv_tests"],
251+
out=np.zeros_like(new_data["adv_positive_tests"],dtype=float),
252+
where=new_data["adv_tests"]!=0)*100
251253

252254
if "evrv_positive_tests" in new_data.columns and "evrv_tests" in new_data.columns:
253-
new_data["evrv_pct_positive"] = new_data["evrv_positive_tests"]/new_data["evrv_tests"]*100
255+
new_data["evrv_pct_positive"] = np.divide(new_data["evrv_positive_tests"], new_data["evrv_tests"],
256+
out=np.zeros_like(new_data["evrv_positive_tests"],dtype=float),
257+
where=new_data["evrv_tests"]!=0)*100
254258

255259
if "flu_positive_tests" in new_data.columns and "flu_tests" in new_data.columns:
256-
new_data["flu_pct_positive"] = new_data["flu_positive_tests"]/new_data["flu_tests"]*100
260+
new_data["flu_pct_positive"] = np.divide(new_data["flu_positive_tests"], new_data["flu_tests"],
261+
out=np.zeros_like(new_data["flu_positive_tests"],dtype=float),
262+
where=new_data["flu_tests"]!=0)*100
257263

258264
if "sarscov2_positive_tests" in new_data.columns and "sarscov2_tests" in new_data.columns:
259-
new_data["sarscov2_pct_positive"] = new_data["sarscov2_positive_tests"]/new_data["sarscov2_tests"]*100
265+
new_data["sarscov2_pct_positive"] = np.divide(new_data["sarscov2_positive_tests"], new_data["sarscov2_tests"],
266+
out=np.zeros_like(new_data["sarscov2_positive_tests"],dtype=float),
267+
where=new_data["sarscov2_tests"]!=0)*100
260268

261269
if "flua_positive_tests" in new_data.columns and "flub_positive_tests" in new_data.columns:
262270
new_data["flua_tests"] = new_data["flu_tests"]
263271
new_data["flub_tests"] = new_data["flu_tests"]
264-
new_data["flua_pct_positive"] = new_data["flua_positive_tests"]/new_data["flu_tests"]*100
272+
273+
new_data["flua_pct_positive"] = np.divide(new_data["flua_positive_tests"], new_data["flu_tests"],
274+
out=np.zeros_like(new_data["flua_positive_tests"],dtype=float),
275+
where=new_data["flu_tests"]!=0)*100
276+
265277
new_data["flub_pct_positive"] = new_data["flub_positive_tests"]/new_data["flu_tests"]*100
278+
new_data["flub_pct_positive"] = np.divide(new_data["flub_positive_tests"], new_data["flu_tests"],
279+
out=np.zeros_like(new_data["flub_positive_tests"],dtype=float),
280+
where=new_data["flu_tests"]!=0)*100
266281

267282
if "hcov_positive_tests" in new_data.columns and "hcov_tests" in new_data.columns:
268-
new_data["hcov_pct_positive"] = new_data["hcov_positive_tests"]/new_data["hcov_tests"]*100
283+
new_data["hcov_pct_positive"] = np.divide(new_data["hcov_positive_tests"], new_data["hcov_tests"],
284+
out=np.zeros_like(new_data["hcov_positive_tests"],dtype=float),
285+
where=new_data["hcov_tests"]!=0)*100
269286

270287
if "hmpv_positive_tests" in new_data.columns and "hmpv_tests" in new_data.columns:
271-
new_data["hmpv_pct_positive"] = new_data["hmpv_positive_tests"]/new_data["hmpv_tests"]*100
288+
new_data["hmpv_pct_positive"] = np.divide(new_data["hmpv_positive_tests"], new_data["hmpv_tests"],
289+
out=np.zeros_like(new_data["hmpv_positive_tests"],dtype=float),
290+
where=new_data["hmpv_tests"]!=0)*100
272291

273292
if "rsv_positive_tests" in new_data.columns and "rsv_tests" in new_data.columns:
274-
new_data["rsv_pct_positive"] = new_data["rsv_positive_tests"]/new_data["rsv_tests"]*100
293+
new_data["rsv_pct_positive"] = np.divide(new_data["rsv_positive_tests"], new_data["rsv_tests"],
294+
out=np.zeros_like(new_data["rsv_positive_tests"],dtype=float),
295+
where=new_data["rsv_tests"]!=0)*100
275296

276297
if "hpiv1_positive_tests" in new_data.columns and "hpiv_tests" in new_data.columns:
277298
new_data["hpiv_positive_tests"] = new_data["hpiv1_positive_tests"] + new_data["hpiv2_positive_tests"]+ new_data["hpiv3_positive_tests"]+new_data["hpiv4_positive_tests"]+new_data["hpivother_positive_tests"]
278-
new_data["hpiv_pct_positive"] = new_data["hpiv_positive_tests"]/new_data["hpiv_tests"]*100
279299

300+
new_data["hpiv_pct_positive"] = np.divide(new_data["hpiv_positive_tests"], new_data["hpiv_tests"],
301+
out=np.zeros_like(new_data["hpiv_positive_tests"],dtype=float),
302+
where=new_data["hpiv_tests"]!=0)*100
303+
280304
return(new_data.set_index(['epiweek', 'time_value', 'issue', 'geo_type', 'geo_value']))
281305

282306
def duplicate_provincial_detections(data):
@@ -301,9 +325,12 @@ def combine_tables(data_dict):
301325
num_tables = len(data_dict)
302326
if(num_tables==2):
303327
positive=data_dict["positive"]
328+
positive=positive.reset_index()
329+
304330
detections=data_dict["respiratory_detection"]
331+
detections=detections.reset_index()
332+
305333
detections = expand_detections_columns(detections)
306-
307334
positive = positive.drop(['geo_type'], axis=1)
308335
positive['geo_type'] = [create_geo_types(g,'lab') for g in positive['geo_value']]
309336
positive=positive.set_index(['epiweek', 'time_value', 'issue', 'geo_type', 'geo_value'])

0 commit comments

Comments
 (0)