Skip to content

Commit 411694c

Browse files
committed
add hhs geo aggregate
1 parent 8117096 commit 411694c

File tree

3 files changed

+37
-19
lines changed

3 files changed

+37
-19
lines changed

nhsn/delphi_nhsn/constants.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
"""Registry for signal names."""
22

3-
GEOS = ["state", "nation"]
3+
GEOS = ["state", "nation", "hhs"]
44

55
# column name from socrata
66
TOTAL_ADMISSION_COVID_API = "totalconfc19newadm"

nhsn/delphi_nhsn/pull.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,8 @@ def pull_nhsn_data(socrata_token: str, backup_dir: str, custom_run: bool, logger
6565
df[signal] = df[col_name]
6666

6767
df = df[keep_columns]
68+
df["geo_id"] = df["geo_id"].str.lower()
69+
df.loc[df["geo_id"] == "usa", 'geo_id'] = "us"
6870
df = df.astype(TYPE_DICT)
6971
else:
7072
df = pd.DataFrame(columns=keep_columns)
@@ -113,6 +115,8 @@ def pull_preliminary_nhsn_data(
113115

114116
df = df[keep_columns]
115117
df = df.astype(PRELIM_TYPE_DICT)
118+
df["geo_id"] = df["geo_id"].str.lower()
119+
df.loc[df["geo_id"] == "usa", 'geo_id'] = "us"
116120
else:
117121
df = pd.DataFrame(columns=keep_columns)
118122

nhsn/delphi_nhsn/run.py

Lines changed: 32 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,10 @@
1616
"""
1717
import time
1818
from datetime import date, datetime, timedelta
19+
from itertools import product
1920

2021
import numpy as np
21-
from delphi_utils import get_structured_logger
22+
from delphi_utils import get_structured_logger, GeoMapper
2223
from delphi_utils.export import create_export_csv
2324

2425
from .constants import GEOS, PRELIM_SIGNALS_MAP, SIGNALS_MAP
@@ -54,26 +55,39 @@ def run_module(params):
5455
nhsn_df = pull_nhsn_data(socrata_token, backup_dir, custom_run=custom_run, logger=logger)
5556
preliminary_nhsn_df = pull_preliminary_nhsn_data(socrata_token, backup_dir, custom_run=custom_run, logger=logger)
5657

57-
for signals, df_pull in [(SIGNALS_MAP.keys(), nhsn_df), (PRELIM_SIGNALS_MAP.keys(), preliminary_nhsn_df)]:
58+
geo_mapper = GeoMapper()
59+
signal_df_dict = {signal: nhsn_df for signal in SIGNALS_MAP.keys()}
60+
signal_df_dict.update({signal: preliminary_nhsn_df for signal in PRELIM_SIGNALS_MAP.keys()})
61+
62+
for signal, df_pull in signal_df_dict.items():
5863
for geo in GEOS:
64+
df = df_pull.copy()
65+
df = df[["timestamp", "geo_id", signal]]
66+
df.rename({signal: "val"}, axis=1, inplace=True)
5967
if geo == "nation":
60-
df = df_pull[df_pull["geo_id"] == "USA"]
68+
df = df[df["geo_id"] == "us"]
69+
elif geo == "hhs":
70+
df = df[df["geo_id"] != "us"]
71+
df = geo_mapper.add_population_column(df, geocode_type="state_id", geocode_col="geo_id")
72+
df = geo_mapper.add_geocode(df, "state_id", "state_code", from_col="state_id")
73+
df = geo_mapper.add_geocode(df, "state_code", "hhs", from_col="state_code", new_col="geo_id")
74+
df = geo_mapper.aggregate_by_weighted_sum(df, "geo_id_y", "val", "timestamp", "population")
75+
df = df.rename(columns={"weighted_val": "val"})
6176
else:
62-
df = df_pull[df_pull["geo_id"] != "USA"]
63-
for signal in signals:
64-
df["val"] = df[signal]
65-
df["se"] = np.nan
66-
df["sample_size"] = np.nan
67-
dates = create_export_csv(
68-
df,
69-
geo_res=geo,
70-
export_dir=export_dir,
71-
start_date=datetime.strptime(export_start_date, "%Y-%m-%d"),
72-
sensor=signal,
73-
weekly_dates=True,
74-
)
75-
if len(dates) > 0:
76-
run_stats.append((max(dates), len(dates)))
77+
df = df[df_pull["geo_id"] != "us"]
78+
df["se"] = np.nan
79+
df["sample_size"] = np.nan
80+
print(signal, geo)
81+
dates = create_export_csv(
82+
df,
83+
geo_res=geo,
84+
export_dir=export_dir,
85+
start_date=datetime.strptime(export_start_date, "%Y-%m-%d"),
86+
sensor=signal,
87+
weekly_dates=True,
88+
)
89+
if len(dates) > 0:
90+
run_stats.append((max(dates), len(dates)))
7791

7892
elapsed_time_in_seconds = round(time.time() - start_time, 2)
7993
min_max_date = run_stats and min(s[0] for s in run_stats)

0 commit comments

Comments
 (0)