Skip to content

Commit e7eccaa

Browse files
authored
more logging and cleaned logic (#2076)
* more logging and cleaned logic * changed variable names * lint * add param to test * fix test * consistent location for custom run flag
1 parent 9dbbc59 commit e7eccaa

File tree

8 files changed

+39
-27
lines changed

8 files changed

+39
-27
lines changed

google_symptoms/delphi_google_symptoms/constants.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -30,14 +30,15 @@
3030
METRICS = METRICS + SYMPTOM_SETS[combmetric]
3131

3232
SMOOTHERS = ["raw", "smoothed"]
33-
GEO_RESOLUTIONS = [
34-
"state",
35-
"county",
36-
"msa",
37-
"hrr",
38-
"hhs",
39-
"nation"
40-
]
33+
34+
GEO_RESOLUTIONS = {
35+
"state": "state",
36+
"county": "county",
37+
"msa": "county",
38+
"hrr": "county",
39+
"hhs": "state",
40+
"nation": "state",
41+
}
4142

4243
SMOOTHERS_MAP = {
4344
"raw": (Smoother("identity", impute_method=None),

google_symptoms/delphi_google_symptoms/date_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ def generate_num_export_days(params: Dict, logger) -> [int]:
9898
expected_date_diff += global_max_expected_lag
9999

100100
if latest_date_diff > expected_date_diff:
101-
logger.info("Missing date", date=to_datetime(min(gs_metadata.max_time)).date())
101+
logger.info("Lag is more than expected", expected_lag=expected_date_diff, lag=latest_date_diff)
102102

103103
num_export_days = expected_date_diff
104104

google_symptoms/delphi_google_symptoms/patch.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ def patch(params):
7878
makedirs(f"{current_issue_dir}", exist_ok=True)
7979

8080
params["common"]["export_dir"] = f"""{current_issue_dir}"""
81-
params["indicator"]["custom_run"] = True
81+
params["common"]["custom_run"] = True
8282

8383
date_settings = patch_dates[issue_date]
8484

google_symptoms/delphi_google_symptoms/pull.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ def produce_query(level, date_range):
158158
return query
159159

160160

161-
def pull_gs_data_one_geolevel(level, date_range):
161+
def pull_gs_data_one_geolevel(level, date_range, logger):
162162
"""Pull latest data for a single geo level.
163163
164164
Fetch data and transform it into the appropriate format, as described in
@@ -209,6 +209,9 @@ def pull_gs_data_one_geolevel(level, date_range):
209209

210210
if len(df) == 0:
211211
df = pd.DataFrame(columns=["open_covid_region_code", "date"] + list(colname_map.keys()))
212+
logger.info(
213+
"No data available for date range", geo_level=level, start_date=date_range[0], end_date=date_range[1]
214+
)
212215

213216
df = preprocess(df, level)
214217
return df
@@ -232,7 +235,7 @@ def initialize_credentials(credentials):
232235
pandas_gbq.context.project = credentials.project_id
233236

234237

235-
def pull_gs_data(credentials, export_start_date, export_end_date, num_export_days, custom_run_flag):
238+
def pull_gs_data(credentials, export_start_date, export_end_date, num_export_days, custom_run_flag, logger):
236239
"""Pull latest dataset for each geo level and combine.
237240
238241
PS: No information for PR
@@ -264,10 +267,9 @@ def pull_gs_data(credentials, export_start_date, export_end_date, num_export_day
264267
dfs = {}
265268

266269
# For state level data
267-
dfs["state"] = pull_gs_data_one_geolevel("state", retrieve_dates)
270+
dfs["state"] = pull_gs_data_one_geolevel("state", retrieve_dates, logger)
268271
# For county level data
269-
dfs["county"] = pull_gs_data_one_geolevel("county", retrieve_dates)
270-
272+
dfs["county"] = pull_gs_data_one_geolevel("county", retrieve_dates, logger)
271273

272274
# Add District of Columbia as county
273275
try:

google_symptoms/delphi_google_symptoms/run.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ def run_module(params, logger=None):
5858
num_export_days = generate_num_export_days(params, logger)
5959
# safety check for patch parameters exists in file, but not running custom runs/patches
6060
custom_run_flag = (
61-
False if not params["indicator"].get("custom_run", False) else params["indicator"].get("custom_run", False)
61+
False if not params["common"].get("custom_run", False) else params["indicator"].get("custom_run", False)
6262
)
6363

6464
# Pull GS data
@@ -68,17 +68,21 @@ def run_module(params, logger=None):
6868
export_end_date,
6969
num_export_days,
7070
custom_run_flag,
71+
logger,
7172
)
72-
for geo_res in GEO_RESOLUTIONS:
73+
74+
for geo_res, mapped_res in GEO_RESOLUTIONS.items():
75+
df_pull = dfs[mapped_res]
76+
if len(df_pull) == 0:
77+
logger.info("Skipping processing; No data available for geo", geo_type=geo_res)
78+
continue
7379
if geo_res == "state":
7480
df_pull = dfs["state"]
7581
elif geo_res in ["hhs", "nation"]:
76-
df_pull = geo_map(dfs["state"], geo_res)
82+
df_pull = geo_map(dfs[mapped_res], geo_res)
7783
else:
78-
df_pull = geo_map(dfs["county"], geo_res)
84+
df_pull = geo_map(dfs[mapped_res], geo_res)
7985

80-
if len(df_pull) == 0:
81-
continue
8286
for metric, smoother in product(COMBINED_METRIC, SMOOTHERS):
8387
sensor_name = "_".join([smoother, "search"])
8488
logger.info("Generating signal and exporting to CSV", geo_type=geo_res, signal=f"{metric}_{sensor_name}")

google_symptoms/params.json.template

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
"common": {
33
"export_dir": "./receiving",
44
"log_exceptions": false,
5+
"custom_run": false,
56
"log_filename": "./google-symptoms.log"
67
},
78
"indicator": {

google_symptoms/tests/test_patch.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ def mocked_patch(self, params_):
5454
mock_patch("delphi_google_symptoms.pull.pandas_gbq.read_gbq") as mock_read_gbq, \
5555
mock_patch("delphi_google_symptoms.pull.initialize_credentials", return_value=None), \
5656
mock_patch("delphi_google_symptoms.date_utils.covidcast.metadata", return_value=covidcast_metadata), \
57-
mock_patch("delphi_google_symptoms.run.GEO_RESOLUTIONS", new=["state"]):
57+
mock_patch("delphi_google_symptoms.run.GEO_RESOLUTIONS", new={"state": "state"}):
5858
def side_effect(*args, **kwargs):
5959
if "symptom_search_sub_region_1_daily" in args[0]:
6060
df = state_data_gap

google_symptoms/tests/test_pull.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
pull_gs_data, preprocess, format_dates_for_query, pull_gs_data_one_geolevel)
1313
from delphi_google_symptoms.constants import METRICS, COMBINED_METRIC
1414
from conftest import TEST_DIR
15+
from delphi_utils import get_structured_logger
1516

1617
good_input = {
1718
"state": f"{TEST_DIR}/test_data/small_states_daily.csv",
@@ -30,6 +31,7 @@
3031

3132

3233
class TestPullGoogleSymptoms:
34+
logger = get_structured_logger()
3335
@freeze_time("2021-01-05")
3436
@mock.patch("pandas_gbq.read_gbq")
3537
@mock.patch("delphi_google_symptoms.pull.initialize_credentials")
@@ -49,7 +51,9 @@ def test_good_file(self, mock_credentials, mock_read_gbq):
4951
end_date = datetime.combine(date.today(), datetime.min.time())
5052

5153
dfs = pull_gs_data("", datetime.strptime(
52-
"20201230", "%Y%m%d"), datetime.combine(date.today(), datetime.min.time()), 0, False)
54+
"20201230", "%Y%m%d"),
55+
datetime.combine(date.today(), datetime.min.time()),
56+
0, False, self.logger)
5357

5458
for level in ["county", "state"]:
5559
df = dfs[level]
@@ -119,7 +123,7 @@ def test_format_dates_for_query(self):
119123
def test_pull_one_gs_no_dates(self, mock_read_gbq):
120124
mock_read_gbq.return_value = pd.DataFrame()
121125

122-
output = pull_gs_data_one_geolevel("state", ["", ""])
126+
output = pull_gs_data_one_geolevel("state", ["", ""], self.logger)
123127
expected = pd.DataFrame(columns=new_keep_cols)
124128
assert_frame_equal(output, expected, check_dtype = False)
125129

@@ -133,7 +137,7 @@ def test_pull_one_gs_retry_success(self):
133137
with mock.patch("pandas_gbq.read_gbq") as mock_read_gbq:
134138
mock_read_gbq.side_effect = [badRequestException, pd.DataFrame()]
135139

136-
output = pull_gs_data_one_geolevel("state", ["", ""])
140+
output = pull_gs_data_one_geolevel("state", ["", ""], self.logger)
137141
expected = pd.DataFrame(columns=new_keep_cols)
138142
assert_frame_equal(output, expected, check_dtype = False)
139143
assert mock_read_gbq.call_count == 2
@@ -147,7 +151,7 @@ def test_pull_one_gs_retry_too_many(self):
147151
with mock.patch("pandas_gbq.read_gbq") as mock_read_gbq:
148152
with pytest.raises(BadRequest):
149153
mock_read_gbq.side_effect = [badRequestException, badRequestException, pd.DataFrame()]
150-
pull_gs_data_one_geolevel("state", ["", ""])
154+
pull_gs_data_one_geolevel("state", ["", ""], self.logger)
151155

152156

153157
def test_pull_one_gs_retry_bad(self):
@@ -156,7 +160,7 @@ def test_pull_one_gs_retry_bad(self):
156160
with mock.patch("pandas_gbq.read_gbq") as mock_read_gbq:
157161
with pytest.raises(BadRequest):
158162
mock_read_gbq.side_effect = [badRequestException,pd.DataFrame()]
159-
pull_gs_data_one_geolevel("state", ["", ""])
163+
pull_gs_data_one_geolevel("state", ["", ""], self.logger)
160164

161165
def test_preprocess_no_data(self):
162166
output = preprocess(pd.DataFrame(columns=keep_cols), "state")

0 commit comments

Comments
 (0)