-
Notifications
You must be signed in to change notification settings - Fork 16
2085 add proportions nhsn #2111
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 10 commits
ff91c4c
5ef99b2
6b19402
ad92262
f4b3c40
1df478c
6e5a99b
7cabd8a
6a73c35
6e0d4c2
2da6c08
1e408ba
77662dc
783ab24
76d5436
18de943
e3e96bf
e9bb0a7
33f3db5
88fbc6e
7e6b23a
a220e0d
d8f237b
11ceae9
ebe52aa
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,8 +1,12 @@ | ||
| # -*- coding: utf-8 -*- | ||
| """Functions for pulling NSSP ER data.""" | ||
| import logging | ||
| import random | ||
| import time | ||
| from datetime import datetime, timedelta | ||
| from pathlib import Path | ||
| from typing import Optional | ||
| from urllib.error import HTTPError | ||
|
|
||
| import pandas as pd | ||
| from delphi_utils import create_backup_csv | ||
|
|
@@ -11,20 +15,45 @@ | |
| from .constants import MAIN_DATASET_ID, PRELIM_DATASET_ID, PRELIM_SIGNALS_MAP, PRELIM_TYPE_DICT, SIGNALS_MAP, TYPE_DICT | ||
|
|
||
|
|
||
| def pull_data(socrata_token: str, dataset_id: str): | ||
| def check_last_updated(client, dataset_id, logger): | ||
| """Check last updated timestamp to determine data should be pulled or not.""" | ||
| try: | ||
| response = client.get_metadata(dataset_id) | ||
| except HTTPError as err: | ||
| if err.code == 503: | ||
| time.sleep(2 + random.randint(0, 1000) / 1000.0) | ||
aysim319 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| response = client.get_metadata(dataset_id) | ||
| else: | ||
| raise err | ||
aysim319 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| updated_timestamp = datetime.utcfromtimestamp(int(response["rowsUpdatedAt"])) | ||
| now = datetime.utcnow() | ||
| recently_updated = (now - updated_timestamp) < timedelta(days=1) | ||
|
||
| prelim_prefix = "Preliminary " if dataset_id == PRELIM_DATASET_ID else "" | ||
| if recently_updated: | ||
| logger.info(f"{prelim_prefix}NHSN data was recently updated; Pulling data", updated_timestamp=updated_timestamp) | ||
| else: | ||
| logger.info(f"{prelim_prefix}NHSN data is stale; Skipping", updated_timestamp=updated_timestamp) | ||
| return recently_updated | ||
|
|
||
|
|
||
| def pull_data(socrata_token: str, dataset_id: str, logger): | ||
| """Pull data from Socrata API.""" | ||
| client = Socrata("data.cdc.gov", socrata_token) | ||
| results = [] | ||
| offset = 0 | ||
| limit = 50000 # maximum limit allowed by SODA 2.0 | ||
| while True: | ||
| page = client.get(dataset_id, limit=limit, offset=offset) | ||
| if not page: | ||
| break # exit the loop if no more results | ||
| results.extend(page) | ||
| offset += limit | ||
|
|
||
| df = pd.DataFrame.from_records(results) | ||
| recently_updated = check_last_updated(client, "ua7e-t2fy", logger) | ||
aysim319 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| df = pd.DataFrame() | ||
| if recently_updated: | ||
| results = [] | ||
| offset = 0 | ||
| limit = 50000 # maximum limit allowed by SODA 2.0 | ||
| while True: | ||
aysim319 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| page = client.get(dataset_id, limit=limit, offset=offset) | ||
| if not page: | ||
| break # exit the loop if no more results | ||
| results.extend(page) | ||
| offset += limit | ||
|
|
||
| df = pd.DataFrame.from_records(results) | ||
| return df | ||
|
|
||
|
|
||
|
|
@@ -89,7 +118,7 @@ def pull_nhsn_data( | |
| """ | ||
| # Pull data from Socrata API | ||
| df = ( | ||
| pull_data(socrata_token, dataset_id=MAIN_DATASET_ID) | ||
| pull_data(socrata_token, MAIN_DATASET_ID, logger) | ||
| if not custom_run | ||
| else pull_data_from_file(backup_dir, issue_date, logger, prelim_flag=False) | ||
| ) | ||
|
|
@@ -144,8 +173,9 @@ def pull_preliminary_nhsn_data( | |
| pd.DataFrame | ||
| Dataframe as described above. | ||
| """ | ||
| # Pull data from Socrata API | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I know they're similar, i thought about it and went back and forth about it but I was in the thought of maybe in the future there would be something different going on so kept it seperate. I'm not too concerned about this, since we'll be slowly deprecating this codebase; |
||
| df = ( | ||
| pull_data(socrata_token, dataset_id=PRELIM_DATASET_ID) | ||
| pull_data(socrata_token, PRELIM_DATASET_ID, logger) | ||
| if not custom_run | ||
| else pull_data_from_file(backup_dir, issue_date, logger, prelim_flag=True) | ||
| ) | ||
|
|
||

Uh oh!
There was an error while loading. Please reload this page.