Skip to content

Commit a44ad10

Browse files
committed
rename files
1 parent bae693b commit a44ad10

File tree

3 files changed

+75
-0
lines changed

3 files changed

+75
-0
lines changed
File renamed without changes.
File renamed without changes.

src/acquisition/rvdss/run.py

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
# TODO: this is pseudocode and may not run or may not run correctly
2+
3+
import pandas as pd
4+
5+
def fetch_report_urls(season):
6+
"""Get all report URLs from a season's report index page"""
7+
pass
8+
9+
# TODO: consider how to encode a "season" object, maybe as a tuple of start/end years `(2023, 2024)`, or a string `2023-2024`.
10+
def fetch_one_season_from_report(season):
11+
report_urls = fetch_report_urls(season)
12+
df_list = [fetch_one_report(url) for url in report_urls]
13+
df = pd.concat(df_list)
14+
15+
return df
16+
17+
def fetch_one_dashboard(url = None):
18+
"""Get data from current or archived dashboard"""
19+
# If no url is provided, fetch data from the current dashboard (whose URL is static).
20+
if not url:
21+
url = DEFAULT_DASHBOARD_URL
22+
23+
# TODO: put rest of scraping code in here
24+
pass
25+
26+
def fetch_report_data(start_date, end_date):
27+
included_seasons = compute_seasons_in_range(start_date, end_date)
28+
29+
# Fetch all reports made for each season.
30+
# We do this because fetching reports is pretty fast, and it saves us from
31+
# having to parse either URLs or text on the webpage. We will drop data
32+
# outside the requested range later.
33+
df_list = [fetch_one_season_from_report(season) for season in included_seasons]
34+
df = pd.concat(df_list)
35+
36+
# Only keep data that was issued within the requested date range.
37+
df = df[start_date <= df.issue <= end_date]
38+
39+
return df
40+
41+
def fetch_historical_dashboard_data(start_date, end_date):
42+
included_weeks = compute_weeks_in_range(start_date, end_date)
43+
included_report_urls = construct_archived_dashboard_urls(included_weeks)
44+
45+
df_list = [fetch_one_dashboard(url) for url in included_report_urls]
46+
df = pd.concat(df_list)
47+
48+
return df
49+
50+
def fetch_historical_dashboard_data(start_date, end_date):
51+
create all historical_dashboard_urls included in date range
52+
loop over urls:
53+
fetch_dashboard_data(historical_dashboard_url)
54+
55+
included_seasons = compute_seasons_in_range(start_date, end_date)
56+
df_list = [fetch_one_season_from_report(season) for season in included_seasons]
57+
df = pd.concat(df_list)
58+
df = df[start_date <= df.issue <= end_date]
59+
60+
return df
61+
62+
def fetch_current_dashboard_data():
63+
fetch_dashboard_data(current_dashboard_url)
64+
65+
def fetch_data(start_date, end_date):
66+
if (start_date, end_date) not exist:
67+
data = fetch_current_dashboard_data()
68+
else:
69+
early_range, late_range = split_date_range_by_dashboard_release_date(start_date, end_date)
70+
report_data = fetch_report_data(early_range)
71+
dashboard_data = fetch_historical_dashboard_data(late_range)
72+
73+
data = [report_data, dashboard_data].concat()
74+
75+
return data

0 commit comments

Comments
 (0)