1- # TODO: this is pseudocode and may not run or may not run correctly
2-
31import pandas as pd
42
3+
54def fetch_report_urls (season ):
65 """Get all report URLs from a season's report index page"""
76 pass
87
9- # TODO: consider how to encode a "season" object, maybe as a tuple of start/end years `(2023, 2024)`, or a string `2023-2024`.
8+ ## TODO: consider how to encode a "season" object, maybe as a tuple of start/end years `(2023, 2024)`, or a string `2023-2024`.
9+ ## TODO: I think there's already a fn for this that includes the loop and seasons
1010def fetch_one_season_from_report (season ):
1111 report_urls = fetch_report_urls (season )
1212 df_list = [fetch_one_report (url ) for url in report_urls ]
1313 df = pd .concat (df_list )
1414
1515 return df
1616
17- def fetch_one_dashboard (url = None ):
17+ def fetch_dashboard_data (url = None ):
1818 """Get data from current or archived dashboard"""
19- # If no url is provided, fetch data from the current dashboard (whose URL is static).
20- if not url :
21- url = DEFAULT_DASHBOARD_URL
22-
23- # TODO: put rest of scraping code in here
2419 pass
2520
26- def fetch_report_data (start_date , end_date ):
27- included_seasons = compute_seasons_in_range ( start_date , end_date )
21+ def fetch_report_data ():
22+ seasons = [...]
2823
29- # Fetch all reports made for each season.
30- # We do this because fetching reports is pretty fast, and it saves us from
31- # having to parse either URLs or text on the webpage. We will drop data
32- # outside the requested range later.
33- df_list = [fetch_one_season_from_report (season ) for season in included_seasons ]
24+ # Fetch all reports made for all seasons.
25+ ## TODO: I think there's already a fn for this that includes the loop and seasons
26+ df_list = [fetch_one_season_from_report (season ) for season in seasons ]
3427 df = pd .concat (df_list )
3528
36- # Only keep data that was issued within the requested date range.
37- df = df [start_date <= df .issue <= end_date ]
38-
3929 return df
4030
41- def fetch_historical_dashboard_data (start_date , end_date ):
42- included_weeks = compute_weeks_in_range (start_date , end_date )
43- included_report_urls = construct_archived_dashboard_urls (included_weeks )
44-
45- df_list = [fetch_one_dashboard (url ) for url in included_report_urls ]
31+ def fetch_historical_dashboard_data ():
32+ included_report_urls = fetch_archived_dashboard_urls ()
33+ df_list = [fetch_dashboard_data (url ) for url in included_report_urls ]
4634 df = pd .concat (df_list )
4735
4836 return df
4937
50- def fetch_historical_dashboard_data (start_date , end_date ):
51- create all historical_dashboard_urls included in date range
38+ def fetch_historical_dashboard_data ():
39+ create / scrape all historical_dashboard_urls
5240 loop over urls :
5341 fetch_dashboard_data (historical_dashboard_url )
5442
55- included_seasons = compute_seasons_in_range (start_date , end_date )
56- df_list = [fetch_one_season_from_report (season ) for season in included_seasons ]
5743 df = pd .concat (df_list )
58- df = df [start_date <= df .issue <= end_date ]
5944
6045 return df
6146
6247def fetch_current_dashboard_data ():
63- fetch_dashboard_data (current_dashboard_url )
64-
65- def fetch_data (start_date , end_date ):
66- if (start_date , end_date ) not exist :
67- data = fetch_current_dashboard_data ()
68- else :
69- early_range , late_range = split_date_range_by_dashboard_release_date (start_date , end_date )
70- report_data = fetch_report_data (early_range )
71- dashboard_data = fetch_historical_dashboard_data (late_range )
72-
73- data = [report_data , dashboard_data ].concat ()
74-
75- return data
48+ return fetch_dashboard_data (DEFAULT_DASHBOARD_URL )
49+
50+ def update_current_data (start_date , end_date ):
51+ data = fetch_current_dashboard_data ()
52+ update_database (data )
53+
54+ def update_historical_data ():
55+ report_data = fetch_report_data ()
56+ dashboard_data = fetch_historical_dashboard_data ()
57+
58+ data = [report_data , dashboard_data ].concat ()
59+
60+ update_database (data )
61+
62+
63+ def main ():
64+ # args and usage
65+ parser = argparse .ArgumentParser ()
66+ # fmt: off
67+ parser .add_argument (
68+ "--current" ,
69+ "-c" ,
70+ action = "store_true" ,
71+ help = "fetch current data, that is, data for the latest epiweek"
72+ )
73+ parser .add_argument (
74+ "--historical" ,
75+ "-h" ,
76+ action = "store_true" ,
77+ help = "fetch historical data, that is, data for all available time periods other than the latest epiweek"
78+ )
79+ # fmt: on
80+ args = parser .parse_args ()
81+
82+ current_flag , historical_flag = (
83+ args .current ,
84+ args .historical ,
85+ )
86+ if not current_flag and not historical_flag :
87+ raise Exception ("no data was requested" )
88+
89+ # Decide what to update
90+ if current_flag :
91+ update_current_data ()
92+ if historical_flag :
93+ update_historical_data ()
94+
95+
96+ if __name__ == "__main__" :
97+ main ()
0 commit comments