Skip to content

Commit 043f374

Browse files
committed
changed so only the latest day in epiweek gets patched in
1 parent 34bda4a commit 043f374

File tree

2 files changed

+50
-70
lines changed

2 files changed

+50
-70
lines changed

nhsn/delphi_nhsn/patch.py

Lines changed: 30 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -15,56 +15,44 @@
1515
}
1616
}
1717
18-
It will generate data for the range of issue dates corresponding to source data files available in "backup_dir" specified under "common", and store them in batch issue format under "patch_dir":
18+
It will generate data for the range of issue dates corresponding to source data files available in "backup_dir"
19+
specified under "common", and store them in batch issue format under "patch_dir":
1920
[name-of-patch]/issue_[issue-date]/nhsn/actual_data_file.csv
2021
"""
2122

2223
from datetime import datetime
2324
from os import makedirs
2425
from pathlib import Path
26+
from typing import List
2527

2628
from delphi_utils import get_structured_logger, read_params
2729
from epiweeks import Week
2830

2931
from .run import run_module
3032

3133

32-
def group_source_files(source_files):
34+
def filter_source_files(source_files: List[Path]):
3335
"""
34-
Group patch files such that each lists contains unique epiweek issue date.
35-
36-
This allows for acquisitions break down patches files per unique epiweek
37-
NHSN has not been updating their data in a consistent fashion
38-
and in order to properly capture all the changes that happened, the patch files needs
39-
36+
Filter patch files such that each element in the list is an unique epiweek with the latest issue date.
4037
4138
Parameters
4239
----------
4340
source_files
4441
4542
Returns
4643
-------
47-
list of list of dates where the inner list represents issue dates with a corresponding weekday
48-
the content of list contains issue date with the corresponding weekday
44+
list of issue dates
4945
50-
ie:
51-
[
52-
[datetime.datetime(2024, 9, 9, 0, 0), datetime.datetime(2024, 11, 18, 0, 0)], # (weekday = 0)
53-
[datetime.datetime(2024, 11, 20, 0, 0)] # (weekday = 2)
54-
]
55-
56-
the index may not represent the weekday integer if the sources files does not have issues dates for all 7 days
5746
"""
58-
days_in_week = 7
59-
patch_list = [[] for _ in range(days_in_week)]
47+
epiweek_dict = dict()
6048

6149
for file in source_files:
6250
if "prelim" not in file.stem:
6351
current_issue_date = datetime.strptime(file.name.split(".")[0], "%Y%m%d")
64-
weekday = current_issue_date.weekday()
65-
patch_list[weekday].append(current_issue_date)
52+
epiweek = Week.fromdate(current_issue_date)
53+
epiweek_dict[epiweek] = file
6654

67-
filtered_patch_list = [lst for lst in patch_list if lst]
55+
filtered_patch_list = list(epiweek_dict.values())
6856
return filtered_patch_list
6957

7058

@@ -74,41 +62,31 @@ def patch(params):
7462
7563
The range of issue dates is specified in params.json using the following keys:
7664
- "patch": Only used for patching data
77-
- "start_date": str, YYYY-MM-DD format, first issue date
78-
- "end_date": str, YYYY-MM-DD format, last issue date
7965
- "patch_dir": str, directory to write all issues output
8066
"""
8167
logger = get_structured_logger("delphi_nhsn.patch", filename=params["common"]["log_filename"])
8268

8369
source_files = sorted(Path(params["common"]["backup_dir"]).glob("*.csv.gz"))
84-
85-
patch_directory_prefix = params["patch"]["patch_dir"]
86-
patch_list = group_source_files(source_files)
87-
for idx, patch_dates in enumerate(patch_list):
88-
start_issue = patch_dates[0]
89-
end_issue = patch_dates[-1]
90-
91-
patch_directory = f"{patch_directory_prefix}_{idx}"
92-
params["patch"]["patch_dir"] = patch_directory
93-
94-
logger.info(
95-
"Starting patching",
96-
patch_directory=patch_directory,
97-
start_issue=start_issue.strftime("%Y-%m-%d"),
98-
end_issue=end_issue.strftime("%Y-%m-%d"),
99-
)
100-
101-
makedirs(patch_directory, exist_ok=True)
102-
103-
for issue_date in patch_dates:
104-
current_issue_ew = Week.fromdate(issue_date)
105-
logger.info("Running issue", issue_date=issue_date.strftime("%Y-%m-%d"))
106-
params["patch"]["issue_date"] = issue_date.strftime("%Y%m%d")
107-
current_issue_dir = f"{params['patch']['patch_dir']}/issue_{current_issue_ew}/nhsn"
108-
makedirs(current_issue_dir, exist_ok=True)
109-
params["common"]["export_dir"] = current_issue_dir
110-
params["common"]["custom_run"] = True
111-
run_module(params, logger)
70+
makedirs(params["patch"]["patch_dir"], exist_ok=True)
71+
72+
logger.info(
73+
"Starting patching",
74+
patch_directory=params["patch"]["patch_dir"],
75+
start_issue=source_files[0].name.split(".")[0],
76+
end_issue=source_files[-1].name.split(".")[0],
77+
)
78+
79+
patch_list = filter_source_files(source_files)
80+
for file in patch_list:
81+
issue_date = datetime.strptime(file.name.split(".")[0], "%Y%m%d")
82+
current_issue_ew = Week.fromdate(issue_date)
83+
logger.info("Running issue", issue_date=issue_date.strftime("%Y-%m-%d"))
84+
params["patch"]["issue_date"] = issue_date.strftime("%Y%m%d")
85+
current_issue_dir = f"{params['patch']['patch_dir']}/issue_{current_issue_ew}/nhsn"
86+
makedirs(current_issue_dir, exist_ok=True)
87+
params["common"]["export_dir"] = current_issue_dir
88+
params["common"]["custom_run"] = True
89+
run_module(params, logger)
11290

11391

11492
if __name__ == "__main__":

nhsn/tests/test_patch.py

Lines changed: 20 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import glob
22
import os
3-
import tempfile
3+
from collections import defaultdict
44
from pathlib import Path
55
import shutil
66
from unittest.mock import patch as mock_patch
@@ -10,7 +10,7 @@
1010

1111
from epiweeks import Week
1212

13-
from delphi_nhsn.patch import group_source_files, patch
13+
from delphi_nhsn.patch import filter_source_files, patch
1414
from delphi_nhsn.constants import TOTAL_ADMISSION_COVID_API, TOTAL_ADMISSION_FLU_API
1515
from conftest import TEST_DATA, PRELIM_TEST_DATA, TEST_DIR
1616

@@ -51,12 +51,18 @@ def generate_dummy_file_names(self):
5151
file_list.append(custom_filename)
5252
return file_list
5353

54-
def test_group_source_files(self):
54+
def test_filter_source_files(self):
5555
filelist = self.generate_dummy_file_names()
56-
processed_file_list = group_source_files(filelist)
57-
for file_list in processed_file_list:
58-
converted_file_list = [Week.fromdate(date) for date in file_list]
59-
assert len(converted_file_list) == len(set(converted_file_list))
56+
epiweek_dict = defaultdict(list)
57+
for file in filelist:
58+
issue_dt = datetime.strptime(file.name.split(".")[0], "%Y%m%d")
59+
issue_epiweek = Week.fromdate(issue_dt)
60+
epiweek_dict[issue_epiweek].append(issue_dt)
61+
patch_issue_list = filter_source_files(filelist)
62+
for file in patch_issue_list:
63+
issue_dt = datetime.strptime(file.name.split(".")[0], "%Y%m%d")
64+
issue_epiweek = Week.fromdate(issue_dt)
65+
assert max(epiweek_dict[issue_epiweek]) == issue_dt
6066

6167
def generate_test_source_files(self):
6268
start_date = datetime(2024, 8, 1)
@@ -102,19 +108,15 @@ def test_patch(self, params_w_patch):
102108
file_list, prelim_file_list = self.generate_test_source_files()
103109
patch(params_w_patch)
104110

105-
for idx in range(7):
106-
patch_paths = [Path(dir) for dir in glob.glob(f"{TEST_DIR}/patch_dir_{idx}/*")]
107-
for patch_path in patch_paths:
108-
# epiweek + the index of the patch files should equal the issue date (which is set as the value of the csv)
109-
issue_dt = Week.fromstring(patch_path.name.replace("issue_", "")).daydate(idx).strftime("%Y%m%d")
110-
for patch_file in Path(patch_path / "nhsn").iterdir():
111-
df = pd.read_csv(str(patch_file))
112-
val = str(int(df["val"][0]))
113-
assert issue_dt == val
111+
for issue_path in Path(f"{TEST_DIR}/patch_dir").glob("*"):
112+
issue_dt_str = issue_path.name.replace("issue_", "")
113+
for file in Path(issue_path / "nhsn").iterdir():
114+
df = pd.read_csv(file)
115+
val = Week.fromdate(datetime.strptime(str(int(df["val"][0])), "%Y%m%d"))
116+
assert issue_dt_str == str(val)
114117

115118
# clean up
116-
for idx in range(7):
117-
shutil.rmtree(f"{TEST_DIR}/patch_dir_{idx}")
119+
shutil.rmtree(f"{TEST_DIR}/patch_dir")
118120

119121
for file in file_list:
120122
os.remove(file)

0 commit comments

Comments
 (0)