|
| 1 | +""" |
| 2 | +This module is used for patching data in the delphi_nhsn package. |
| 3 | +
|
| 4 | +To use this module, you need to specify the range of issue dates in params.json, like so: |
| 5 | +
|
| 6 | +{ |
| 7 | + "common": { |
| 8 | + ... |
| 9 | + }, |
| 10 | + "validation": { |
| 11 | + ... |
| 12 | + }, |
| 13 | + "patch": { |
| 14 | + "patch_dir": "/Users/minhkhuele/Desktop/delphi/covidcast-indicators/nhsn/patch" |
| 15 | + } |
| 16 | +} |
| 17 | +
|
| 18 | +It will generate data for the range of issue dates corresponding to source data files available in "backup_dir" |
| 19 | +specified under "common", and store them in batch issue format under "patch_dir": |
| 20 | +[name-of-patch]/issue_[issue-date]/nhsn/actual_data_file.csv |
| 21 | +""" |
| 22 | + |
| 23 | +from datetime import datetime |
| 24 | +from os import makedirs |
| 25 | +from pathlib import Path |
| 26 | +from typing import List |
| 27 | + |
| 28 | +from delphi_utils import get_structured_logger, read_params |
| 29 | +from epiweeks import Week |
| 30 | + |
| 31 | +from .run import run_module |
| 32 | + |
| 33 | + |
| 34 | +def filter_source_files(source_files: List[Path]): |
| 35 | + """ |
| 36 | + Filter patch files such that each element in the list is an unique epiweek with the latest issue date. |
| 37 | +
|
| 38 | + Parameters |
| 39 | + ---------- |
| 40 | + source_files |
| 41 | +
|
| 42 | + Returns |
| 43 | + ------- |
| 44 | + list of issue dates |
| 45 | +
|
| 46 | + """ |
| 47 | + epiweek_dict = dict() |
| 48 | + |
| 49 | + for file in source_files: |
| 50 | + if "prelim" not in file.stem: |
| 51 | + current_issue_date = datetime.strptime(file.name.split(".")[0], "%Y%m%d") |
| 52 | + epiweek = Week.fromdate(current_issue_date) |
| 53 | + epiweek_dict[epiweek] = file |
| 54 | + |
| 55 | + filtered_patch_list = list(epiweek_dict.values()) |
| 56 | + return filtered_patch_list |
| 57 | + |
| 58 | + |
| 59 | +def patch(params): |
| 60 | + """ |
| 61 | + Run the doctor visits indicator for a range of issue dates. |
| 62 | +
|
| 63 | + The range of issue dates is specified in params.json using the following keys: |
| 64 | + - "patch": Only used for patching data |
| 65 | + - "patch_dir": str, directory to write all issues output |
| 66 | + """ |
| 67 | + logger = get_structured_logger("delphi_nhsn.patch", filename=params["common"]["log_filename"]) |
| 68 | + |
| 69 | + source_files = sorted(Path(params["common"]["backup_dir"]).glob("*.csv.gz")) |
| 70 | + makedirs(params["patch"]["patch_dir"], exist_ok=True) |
| 71 | + |
| 72 | + logger.info( |
| 73 | + "Starting patching", |
| 74 | + patch_directory=params["patch"]["patch_dir"], |
| 75 | + start_issue=source_files[0].name.split(".")[0], |
| 76 | + end_issue=source_files[-1].name.split(".")[0], |
| 77 | + ) |
| 78 | + |
| 79 | + patch_list = filter_source_files(source_files) |
| 80 | + for file in patch_list: |
| 81 | + issue_date_str = file.name.split(".")[0] |
| 82 | + logger.info("Running issue", issue_date=datetime.strptime(issue_date_str, "%Y%m%d").strftime("%Y-%m-%d")) |
| 83 | + params["patch"]["issue_date"] = issue_date_str |
| 84 | + # regardless of week date type or not the directory name must be issue_date_YYYYMMDD |
| 85 | + # conversion in done in acquisition |
| 86 | + current_issue_dir = f"{params['patch']['patch_dir']}/issue_{issue_date_str}/nhsn" |
| 87 | + makedirs(current_issue_dir, exist_ok=True) |
| 88 | + params["common"]["export_dir"] = current_issue_dir |
| 89 | + params["common"]["custom_run"] = True |
| 90 | + run_module(params, logger) |
| 91 | + |
| 92 | + |
| 93 | +if __name__ == "__main__": |
| 94 | + patch(read_params()) |
0 commit comments