From 29249f5e12f8075915a475cfffa84d9e404a256d Mon Sep 17 00:00:00 2001 From: Peter Dudfield Date: Fri, 14 Nov 2025 17:38:35 +0000 Subject: [PATCH 01/60] first bit of work --- pyproject.toml | 4 + src/dataplatform/forecast.py | 269 +++++++++++++++++++++++++++++++++++ src/main.py | 2 + 3 files changed, 275 insertions(+) create mode 100644 src/dataplatform/forecast.py diff --git a/pyproject.toml b/pyproject.toml index 25ef385..a81d0e5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,6 +35,7 @@ dependencies = [ "torch @ https://download.pytorch.org/whl/cpu/torch-2.3.1%2Bcpu-cp312-cp312-linux_x86_64.whl ; platform_system == 'Linux' and platform_machine == 'x86_64'", "torch @ https://download.pytorch.org/whl/cpu/torch-2.3.1-cp312-none-macosx_11_0_arm64.whl ; platform_system == 'Darwin' and platform_machine == 'arm64'", "matplotlib>=3.8,<4.0", + "dp-sdk", ] [project.optional-dependencies] @@ -66,6 +67,9 @@ dev-dependencies = [ index-url = "https://download.pytorch.org/whl/cpu" extra-index-url = ["https://pypi.org/simple"] +[tool.uv.sources] +dp-sdk = { url = "https://github.com/openclimatefix/data-platform/releases/download/v0.12.0/dp_sdk-0.12.0-py3-none-any.whl" } + [tool.pytest.ini_options] testpaths = ["tests"] python_files = ["test_*.py"] diff --git a/src/dataplatform/forecast.py b/src/dataplatform/forecast.py new file mode 100644 index 0000000..5b9f4f4 --- /dev/null +++ b/src/dataplatform/forecast.py @@ -0,0 +1,269 @@ +import streamlit as st +from datetime import datetime, timedelta, timezone +import os +import asyncio +from dp_sdk.ocf import dp +import pandas as pd +from grpclib.client import Channel +import plotly.graph_objects as go + +data_platform_host = os.getenv("DATA_PLATFORM_HOST", "localhost") +data_platform_port = int(os.getenv("DATA_PLATFORM_PORT", "50051")) + +# TODO make this dynamic +observer_names = ['pvlive_in_day','pvlive_day_after'] + + +async def get_forecast_data(_client, location,start_date,end_date,selected_forecasters) -> pd.DataFrame: + + all_data_df = [] + + # loop over 7 days of data + temp_start_date = start_date + while temp_start_date <= end_date: + temp_end_date = temp_start_date + timedelta(days=7) + if temp_end_date > end_date: + temp_end_date = end_date + + # fetch data + stream_forecast_data_request = dp.StreamForecastDataRequest(location_uuid=location.location_uuid, + energy_source=dp.EnergySource.SOLAR, + time_window=dp.TimeWindow( + start_timestamp_utc=datetime.combine(temp_start_date, datetime.min.time()).replace(tzinfo=timezone.utc), + end_timestamp_utc=datetime.combine(temp_end_date, datetime.min.time()).replace(tzinfo=timezone.utc) + ), + forecasters=selected_forecasters) + stream_forecast_data_response = _client.stream_forecast_data(stream_forecast_data_request) + + + i=0 + async for forecast_data in stream_forecast_data_response: + + forecast_data_dict = forecast_data.to_dict() + forecast_data_dict.pop('otherStatisticsFractions') + data_df = pd.DataFrame(forecast_data_dict, index=[i]) + all_data_df.append(data_df) + + i += 1 + + temp_start_date = temp_start_date + timedelta(days=7) + + all_data_df = pd.concat(all_data_df, ignore_index=True) + + return all_data_df + + +async def get_all_observations(client, location, start_date, end_date) -> pd.DataFrame: + + all_observations_df = [] + + for observer_name in observer_names: + + # loop over 7 days of data + observation_one_df = [] + temp_start_date = start_date + while temp_start_date <= end_date: + temp_end_date = temp_start_date + timedelta(days=7) + if temp_end_date > end_date: + temp_end_date = end_date + + + get_observations_request = dp.GetObservationsAsTimeseriesRequest(observer_name=observer_name, + location_uuid=location.location_uuid, + energy_source=dp.EnergySource.SOLAR, + time_window=dp.TimeWindow( + start_timestamp_utc=datetime.combine(temp_start_date, datetime.min.time()).replace(tzinfo=timezone.utc), + end_timestamp_utc=datetime.combine(temp_end_date, datetime.min.time()).replace(tzinfo=timezone.utc) + ),) + get_observations_response = await client.get_observations_as_timeseries(get_observations_request) + + i=0 + for value in get_observations_response.values: + observations_df = pd.DataFrame(value.to_dict(), index=[i]) + observation_one_df.append(observations_df) + i += 1 + + temp_start_date = temp_start_date + timedelta(days=7) + + observation_one_df = pd.concat(observation_one_df, ignore_index=True) + observation_one_df = observation_one_df.sort_values(by='timestampUtc') + observation_one_df['observer_name'] = observer_name + + all_observations_df.append(observation_one_df) + + all_observations_df = pd.concat(all_observations_df, ignore_index=True) + + return all_observations_df + + +def dp_forecast_page(): + asyncio.run(async_dp_forecast_page()) + + +async def async_dp_forecast_page(): + st.title("Data Platform Forecast Page") + st.write("This is the forecast page from the Data Platform module. This is very much a WIP") + + async with Channel(host=data_platform_host, port=data_platform_port) as channel: + client = dp.DataPlatformDataServiceStub(channel) + + # Select Country + country = st.sidebar.selectbox("TODO Select a Country", ['UK', 'NL'], index=0) + + # Select Location Type + location_types = [dp.LocationType.NATION, dp.LocationType.GSP, dp.LocationType.SITE] + location_type = st.sidebar.selectbox("Select a Location Type", location_types, index=0) + + # List Location + list_locations_request = dp.ListLocationsRequest(location_type_filter=location_type) + list_locations_response = await client.list_locations(list_locations_request) + locations = list_locations_response.locations + location_names = [loc.location_name for loc in locations] + + # slect locations + selected_location_name = st.sidebar.selectbox("Select a Location", location_names, index=0) + selected_location = next(loc for loc in locations if loc.location_name == selected_location_name) + + # get models + get_forecasters_request = dp.ListForecastersRequest(latest_versions_only=True) + get_forecasters_response = await client.list_forecasters(get_forecasters_request) + forecasters = get_forecasters_response.forecasters + forecaster_names = [forecaster.forecaster_name for forecaster in forecasters] + selected_forecaster_name = st.sidebar.multiselect("Select a Forecaster", forecaster_names, default=forecaster_names[0]) + selected_forecasters = [forecaster for forecaster in forecasters if forecaster.forecaster_name in selected_forecaster_name] + + # select start and end date + start_date = st.sidebar.date_input("Start date:", datetime.now().date() - timedelta(days=30)) + end_date = st.sidebar.date_input("End date:", datetime.now().date() + timedelta(days=3)) + + # select forecast type + st.sidebar.write("TODO Select Forecast Type:") + + # setup page + st.header("Time Series Plot") + + # get generation data + all_observations_df = await get_all_observations(client, selected_location, start_date, end_date) + + # get forcast all data + all_forecast_data_df = await get_forecast_data(client, selected_location, start_date, end_date, selected_forecasters) + st.write(f"Selected Location uuid: {selected_location.location_uuid}. \ + Fetched {len(all_forecast_data_df)} rows of forecast data") + + # add download button + csv = all_forecast_data_df.to_csv().encode("utf-8") + st.download_button( + label="⬇️", + data=csv, + file_name=f"site_forecast_{selected_location.location_uuid}_{start_date}_{end_date}.csv", + mime="text/csv", + ) + + + all_forecast_data_df['target_timestamp_utc'] = pd.to_datetime(all_forecast_data_df['initTimestamp']) + pd.to_timedelta(all_forecast_data_df['horizonMins'], unit='m') + + # Choose current forecast + # this is done by selecting the unique target_timestamp_utc with the the lowest horizonMins + # it should also be unique for each forecasterFullName + current_forecast_df = all_forecast_data_df.loc[all_forecast_data_df.groupby(['target_timestamp_utc', 'forecasterFullname'])['horizonMins'].idxmin()] + + # plot the results + fig = go.Figure() + for forecaster in selected_forecasters: + name_and_version = f'{forecaster.forecaster_name}:{forecaster.forecaster_version}' + forecaster_df = current_forecast_df[current_forecast_df['forecasterFullname'] == name_and_version] + fig.add_trace(go.Scatter( + x=forecaster_df['target_timestamp_utc'], + y=forecaster_df['p50Fraction'], + mode='lines', + name=forecaster.forecaster_name + )) + + for observer_name in observer_names: + obs_df = all_observations_df[all_observations_df['observer_name'] == observer_name] + fig.add_trace(go.Scatter( + x=obs_df['timestampUtc'], + y=obs_df['valueFraction'], + mode='lines', + name=observer_name + )) + + fig.update_layout( + title='Current Forecast', + xaxis_title='Time', + yaxis_title='Generation [%]', + legend_title='Forecaster' + ) + + st.plotly_chart(fig) + + + + st.header("Summary Accuracy Graph") + + # take the foecast data, and group by horizonMins, forecasterFullName + # calculate mean absolute error between p50Fraction and observations valueFraction + all_observations_df['timestampUtc'] = pd.to_datetime(all_observations_df['timestampUtc']) + merged_df = pd.merge(all_forecast_data_df, all_observations_df, left_on=['target_timestamp_utc'], right_on=['timestampUtc'], how='inner', suffixes=('_forecast', '_observation')) + merged_df['absolute_error'] = (merged_df['p50Fraction'] - merged_df['valueFraction']).abs() + + summary_df = merged_df.groupby(['horizonMins', 'forecasterFullname']).agg({'absolute_error': 'mean'}).reset_index() + summary_df['std'] = merged_df.groupby(['horizonMins', 'forecasterFullname']).agg({'absolute_error': 'std'}).reset_index()['absolute_error'] + summary_df['count'] = merged_df.groupby(['horizonMins', 'forecasterFullname']).agg({'absolute_error': 'count'}).reset_index()['absolute_error'] + summary_df['sem'] = summary_df['std'] / (summary_df['count']**0.5) + + fig2 = go.Figure() + + for forecaster in selected_forecasters: + name_and_version = f'{forecaster.forecaster_name}:{forecaster.forecaster_version}' + forecaster_df = summary_df[summary_df['forecasterFullname'] == name_and_version] + fig2.add_trace(go.Scatter( + x=forecaster_df['horizonMins'], + y=forecaster_df['absolute_error'], + mode='lines+markers', + name=forecaster.forecaster_name + )) + + fig2.add_trace( + go.Scatter( + x=forecaster_df['horizonMins'], + y=forecaster_df['absolute_error'] - 1.96 * forecaster_df['sem'], + mode="lines", + # name="p10: " + model, + # line=dict(color=get_colour_from_model_name(model), width=0), + legendgroup=forecaster.forecaster_name, + showlegend=False, + ) + ) + + fig2.add_trace( + go.Scatter( + x=forecaster_df['horizonMins'], + y=forecaster_df['absolute_error'] + 1.96 * forecaster_df['sem'], + mode="lines", + # name="p10: " + model, + # line=dict(color=get_colour_from_model_name(model), width=0), + legendgroup=forecaster.forecaster_name, + showlegend=False, + fill="tonexty", + ) + ) + + + fig2.update_layout( + title='Mean Absolute Error by Horizon', + xaxis_title='Horizon (Minutes)', + yaxis_title='Mean Absolute Error [%]', + legend_title='Forecaster' + ) + + st.plotly_chart(fig2) + + + csv = summary_df.to_csv().encode("utf-8") + st.download_button( + label="⬇️", + data=csv, + file_name=f"summary_accuracy_{selected_location.location_uuid}_{start_date}_{end_date}.csv", + mime="text/csv", + ) \ No newline at end of file diff --git a/src/main.py b/src/main.py index c4cec02..577303f 100644 --- a/src/main.py +++ b/src/main.py @@ -36,6 +36,7 @@ from cloudcasting_page import cloudcasting_page from adjuster import adjuster_page from batch_page import batch_page +from dataplatform.forecast import dp_forecast_page st.get_option("theme.primaryColor") st.set_page_config(layout="wide", page_title="OCF Dashboard") @@ -262,6 +263,7 @@ def main_page(): st.Page(status_page, title="🚦 Status"), st.Page(forecast_page, title="📈 Forecast"), st.Page(pvsite_forecast_page, title="📉 Site Forecast"), + st.Page(dp_forecast_page, title="📉 DP Forecast"), st.Page(sites_toolbox_page, title="🛠️ Sites Toolbox"), st.Page(user_page, title="👥 API Users"), st.Page(nwp_page, title="🌤️ NWP"), From cc505532d8b093d4f24d32ab7492582fd8c4f80f Mon Sep 17 00:00:00 2001 From: Peter Dudfield Date: Sun, 16 Nov 2025 19:25:17 +0000 Subject: [PATCH 02/60] add to readme --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index adca9b2..e79ace1 100644 --- a/README.md +++ b/README.md @@ -83,6 +83,8 @@ To run the app locally, you'll need to connect it to the `forecast development d OCF team members can connect to the `forecast development database` using [these Notion instructions](https://www.notion.so/openclimatefix/Connecting-to-AWS-RDS-bf35b3fbd61f40df9c974c240e042354). Add `DB_URL= (db_url from notion documents)` to a `secrets.toml` file. Follow the instructions in the Notion document to connect to the database v. +To connect to the database platform, use `DATA_PLATFORM_HOST` and `DATA_PLATFORM_PORT`. + Run app: ```shell From 9803a66d7546dac73d9a7e8d1d9bf7dc1d4058c5 Mon Sep 17 00:00:00 2001 From: Peter Dudfield Date: Sun, 16 Nov 2025 19:49:53 +0000 Subject: [PATCH 03/60] add to todo list --- src/dataplatform/forecast.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/dataplatform/forecast.py b/src/dataplatform/forecast.py index 5b9f4f4..075dc77 100644 --- a/src/dataplatform/forecast.py +++ b/src/dataplatform/forecast.py @@ -266,4 +266,15 @@ async def async_dp_forecast_page(): data=csv, file_name=f"summary_accuracy_{selected_location.location_uuid}_{start_date}_{end_date}.csv", mime="text/csv", - ) \ No newline at end of file + ) + + + st.header("TODO") + + st.write("Metrics summary table") + st.write("Add more metrics") + st.write("Add forecast horizon options") + st.write("Add creation time forecast filter") + st.write("Daily Metrics graphs") + st.write("colours") + st.write("speed up read, use async and more caching") From 4779ec06d9882e7eccb1262f868d0e049d5bcd75 Mon Sep 17 00:00:00 2001 From: Peter Dudfield Date: Sun, 16 Nov 2025 21:21:32 +0000 Subject: [PATCH 04/60] add todo --- src/dataplatform/forecast.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/dataplatform/forecast.py b/src/dataplatform/forecast.py index 075dc77..2603c33 100644 --- a/src/dataplatform/forecast.py +++ b/src/dataplatform/forecast.py @@ -271,6 +271,7 @@ async def async_dp_forecast_page(): st.header("TODO") + st.write("Change from % to MW") st.write("Metrics summary table") st.write("Add more metrics") st.write("Add forecast horizon options") From b159482274bf920bc7d827e34f7139834373fe69 Mon Sep 17 00:00:00 2001 From: Peter Dudfield Date: Mon, 17 Nov 2025 11:23:05 +0000 Subject: [PATCH 05/60] stream forecast more effeciently --- src/dataplatform/forecast.py | 310 ++++++++++++++++++++++------------- 1 file changed, 199 insertions(+), 111 deletions(-) diff --git a/src/dataplatform/forecast.py b/src/dataplatform/forecast.py index 2603c33..c4a8a06 100644 --- a/src/dataplatform/forecast.py +++ b/src/dataplatform/forecast.py @@ -6,16 +6,18 @@ import pandas as pd from grpclib.client import Channel import plotly.graph_objects as go +import betterproto data_platform_host = os.getenv("DATA_PLATFORM_HOST", "localhost") data_platform_port = int(os.getenv("DATA_PLATFORM_PORT", "50051")) # TODO make this dynamic -observer_names = ['pvlive_in_day','pvlive_day_after'] +observer_names = ["pvlive_in_day", "pvlive_day_after"] -async def get_forecast_data(_client, location,start_date,end_date,selected_forecasters) -> pd.DataFrame: - +async def get_forecast_data( + _client, location, start_date, end_date, selected_forecasters +) -> pd.DataFrame: all_data_df = [] # loop over 7 days of data @@ -26,25 +28,33 @@ async def get_forecast_data(_client, location,start_date,end_date,selected_forec temp_end_date = end_date # fetch data - stream_forecast_data_request = dp.StreamForecastDataRequest(location_uuid=location.location_uuid, - energy_source=dp.EnergySource.SOLAR, - time_window=dp.TimeWindow( - start_timestamp_utc=datetime.combine(temp_start_date, datetime.min.time()).replace(tzinfo=timezone.utc), - end_timestamp_utc=datetime.combine(temp_end_date, datetime.min.time()).replace(tzinfo=timezone.utc) - ), - forecasters=selected_forecasters) - stream_forecast_data_response = _client.stream_forecast_data(stream_forecast_data_request) - - - i=0 - async for forecast_data in stream_forecast_data_response: - - forecast_data_dict = forecast_data.to_dict() - forecast_data_dict.pop('otherStatisticsFractions') - data_df = pd.DataFrame(forecast_data_dict, index=[i]) - all_data_df.append(data_df) - - i += 1 + stream_forecast_data_request = dp.StreamForecastDataRequest( + location_uuid=location.location_uuid, + energy_source=dp.EnergySource.SOLAR, + time_window=dp.TimeWindow( + start_timestamp_utc=datetime.combine( + temp_start_date, datetime.min.time() + ).replace(tzinfo=timezone.utc), + end_timestamp_utc=datetime.combine( + temp_end_date, datetime.min.time() + ).replace(tzinfo=timezone.utc), + ), + forecasters=selected_forecasters, + ) + forecasts = [] + async for chunk in _client.stream_forecast_data(stream_forecast_data_request): + forecasts.append(chunk.to_dict(casing=betterproto.Casing.SNAKE)) + + if len(forecasts) > 0: + all_data_df.append( + pd.DataFrame.from_dict(forecasts) + .pipe( + lambda df: df.join( + pd.json_normalize(df["other_statistics_fractions"]) + ) + ) + .drop("other_statistics_fractions", axis=1) + ) temp_start_date = temp_start_date + timedelta(days=7) @@ -54,11 +64,9 @@ async def get_forecast_data(_client, location,start_date,end_date,selected_forec async def get_all_observations(client, location, start_date, end_date) -> pd.DataFrame: - all_observations_df = [] for observer_name in observer_names: - # loop over 7 days of data observation_one_df = [] temp_start_date = start_date @@ -67,30 +75,37 @@ async def get_all_observations(client, location, start_date, end_date) -> pd.Dat if temp_end_date > end_date: temp_end_date = end_date + get_observations_request = dp.GetObservationsAsTimeseriesRequest( + observer_name=observer_name, + location_uuid=location.location_uuid, + energy_source=dp.EnergySource.SOLAR, + time_window=dp.TimeWindow( + start_timestamp_utc=datetime.combine( + temp_start_date, datetime.min.time() + ).replace(tzinfo=timezone.utc), + end_timestamp_utc=datetime.combine( + temp_end_date, datetime.min.time() + ).replace(tzinfo=timezone.utc), + ), + ) + get_observations_response = await client.get_observations_as_timeseries( + get_observations_request + ) - get_observations_request = dp.GetObservationsAsTimeseriesRequest(observer_name=observer_name, - location_uuid=location.location_uuid, - energy_source=dp.EnergySource.SOLAR, - time_window=dp.TimeWindow( - start_timestamp_utc=datetime.combine(temp_start_date, datetime.min.time()).replace(tzinfo=timezone.utc), - end_timestamp_utc=datetime.combine(temp_end_date, datetime.min.time()).replace(tzinfo=timezone.utc) - ),) - get_observations_response = await client.get_observations_as_timeseries(get_observations_request) - - i=0 + i = 0 for value in get_observations_response.values: observations_df = pd.DataFrame(value.to_dict(), index=[i]) observation_one_df.append(observations_df) i += 1 temp_start_date = temp_start_date + timedelta(days=7) - + observation_one_df = pd.concat(observation_one_df, ignore_index=True) - observation_one_df = observation_one_df.sort_values(by='timestampUtc') - observation_one_df['observer_name'] = observer_name + observation_one_df = observation_one_df.sort_values(by="timestampUtc") + observation_one_df["observer_name"] = observer_name all_observations_df.append(observation_one_df) - + all_observations_df = pd.concat(all_observations_df, ignore_index=True) return all_observations_df @@ -100,55 +115,87 @@ def dp_forecast_page(): asyncio.run(async_dp_forecast_page()) -async def async_dp_forecast_page(): +async def async_dp_forecast_page(): st.title("Data Platform Forecast Page") - st.write("This is the forecast page from the Data Platform module. This is very much a WIP") + st.write( + "This is the forecast page from the Data Platform module. This is very much a WIP" + ) async with Channel(host=data_platform_host, port=data_platform_port) as channel: client = dp.DataPlatformDataServiceStub(channel) # Select Country - country = st.sidebar.selectbox("TODO Select a Country", ['UK', 'NL'], index=0) + country = st.sidebar.selectbox("TODO Select a Country", ["UK", "NL"], index=0) # Select Location Type - location_types = [dp.LocationType.NATION, dp.LocationType.GSP, dp.LocationType.SITE] - location_type = st.sidebar.selectbox("Select a Location Type", location_types, index=0) - + location_types = [ + dp.LocationType.NATION, + dp.LocationType.GSP, + dp.LocationType.SITE, + ] + location_type = st.sidebar.selectbox( + "Select a Location Type", location_types, index=0 + ) + # List Location - list_locations_request = dp.ListLocationsRequest(location_type_filter=location_type) + list_locations_request = dp.ListLocationsRequest( + location_type_filter=location_type + ) list_locations_response = await client.list_locations(list_locations_request) locations = list_locations_response.locations location_names = [loc.location_name for loc in locations] - + # slect locations - selected_location_name = st.sidebar.selectbox("Select a Location", location_names, index=0) - selected_location = next(loc for loc in locations if loc.location_name == selected_location_name) + selected_location_name = st.sidebar.selectbox( + "Select a Location", location_names, index=0 + ) + selected_location = next( + loc for loc in locations if loc.location_name == selected_location_name + ) # get models get_forecasters_request = dp.ListForecastersRequest(latest_versions_only=True) - get_forecasters_response = await client.list_forecasters(get_forecasters_request) + get_forecasters_response = await client.list_forecasters( + get_forecasters_request + ) forecasters = get_forecasters_response.forecasters forecaster_names = [forecaster.forecaster_name for forecaster in forecasters] - selected_forecaster_name = st.sidebar.multiselect("Select a Forecaster", forecaster_names, default=forecaster_names[0]) - selected_forecasters = [forecaster for forecaster in forecasters if forecaster.forecaster_name in selected_forecaster_name] + selected_forecaster_name = st.sidebar.multiselect( + "Select a Forecaster", forecaster_names, default=forecaster_names[0] + ) + selected_forecasters = [ + forecaster + for forecaster in forecasters + if forecaster.forecaster_name in selected_forecaster_name + ] # select start and end date - start_date = st.sidebar.date_input("Start date:", datetime.now().date() - timedelta(days=30)) - end_date = st.sidebar.date_input("End date:", datetime.now().date() + timedelta(days=3)) + start_date = st.sidebar.date_input( + "Start date:", datetime.now().date() - timedelta(days=30) + ) + end_date = st.sidebar.date_input( + "End date:", datetime.now().date() + timedelta(days=3) + ) # select forecast type st.sidebar.write("TODO Select Forecast Type:") # setup page st.header("Time Series Plot") - + # get generation data - all_observations_df = await get_all_observations(client, selected_location, start_date, end_date) + all_observations_df = await get_all_observations( + client, selected_location, start_date, end_date + ) # get forcast all data - all_forecast_data_df = await get_forecast_data(client, selected_location, start_date, end_date, selected_forecasters) - st.write(f"Selected Location uuid: {selected_location.location_uuid}. \ - Fetched {len(all_forecast_data_df)} rows of forecast data") + all_forecast_data_df = await get_forecast_data( + client, selected_location, start_date, end_date, selected_forecasters + ) + st.write( + f"Selected Location uuid: {selected_location.location_uuid}. \ + Fetched {len(all_forecast_data_df)} rows of forecast data" + ) # add download button csv = all_forecast_data_df.to_csv().encode("utf-8") @@ -159,75 +206,117 @@ async def async_dp_forecast_page(): mime="text/csv", ) + all_forecast_data_df["target_timestamp_utc"] = pd.to_datetime( + all_forecast_data_df["init_timestamp"] + ) + pd.to_timedelta(all_forecast_data_df["horizon_mins"], unit="m") - all_forecast_data_df['target_timestamp_utc'] = pd.to_datetime(all_forecast_data_df['initTimestamp']) + pd.to_timedelta(all_forecast_data_df['horizonMins'], unit='m') - # Choose current forecast # this is done by selecting the unique target_timestamp_utc with the the lowest horizonMins # it should also be unique for each forecasterFullName - current_forecast_df = all_forecast_data_df.loc[all_forecast_data_df.groupby(['target_timestamp_utc', 'forecasterFullname'])['horizonMins'].idxmin()] + current_forecast_df = all_forecast_data_df.loc[ + all_forecast_data_df.groupby( + ["target_timestamp_utc", "forecaster_fullname"] + )["horizon_mins"].idxmin() + ] - # plot the results + # plot the results fig = go.Figure() for forecaster in selected_forecasters: - name_and_version = f'{forecaster.forecaster_name}:{forecaster.forecaster_version}' - forecaster_df = current_forecast_df[current_forecast_df['forecasterFullname'] == name_and_version] - fig.add_trace(go.Scatter( - x=forecaster_df['target_timestamp_utc'], - y=forecaster_df['p50Fraction'], - mode='lines', - name=forecaster.forecaster_name - )) + name_and_version = ( + f"{forecaster.forecaster_name}:{forecaster.forecaster_version}" + ) + forecaster_df = current_forecast_df[ + current_forecast_df["forecaster_fullname"] == name_and_version + ] + fig.add_trace( + go.Scatter( + x=forecaster_df["target_timestamp_utc"], + y=forecaster_df["p50_fraction"], + mode="lines", + name=forecaster.forecaster_name, + ) + ) for observer_name in observer_names: - obs_df = all_observations_df[all_observations_df['observer_name'] == observer_name] - fig.add_trace(go.Scatter( - x=obs_df['timestampUtc'], - y=obs_df['valueFraction'], - mode='lines', - name=observer_name - )) + obs_df = all_observations_df[ + all_observations_df["observer_name"] == observer_name + ] + fig.add_trace( + go.Scatter( + x=obs_df["timestampUtc"], + y=obs_df["valueFraction"], + mode="lines", + name=observer_name, + ) + ) fig.update_layout( - title='Current Forecast', - xaxis_title='Time', - yaxis_title='Generation [%]', - legend_title='Forecaster' + title="Current Forecast", + xaxis_title="Time", + yaxis_title="Generation [%]", + legend_title="Forecaster", ) st.plotly_chart(fig) - - st.header("Summary Accuracy Graph") # take the foecast data, and group by horizonMins, forecasterFullName # calculate mean absolute error between p50Fraction and observations valueFraction - all_observations_df['timestampUtc'] = pd.to_datetime(all_observations_df['timestampUtc']) - merged_df = pd.merge(all_forecast_data_df, all_observations_df, left_on=['target_timestamp_utc'], right_on=['timestampUtc'], how='inner', suffixes=('_forecast', '_observation')) - merged_df['absolute_error'] = (merged_df['p50Fraction'] - merged_df['valueFraction']).abs() - - summary_df = merged_df.groupby(['horizonMins', 'forecasterFullname']).agg({'absolute_error': 'mean'}).reset_index() - summary_df['std'] = merged_df.groupby(['horizonMins', 'forecasterFullname']).agg({'absolute_error': 'std'}).reset_index()['absolute_error'] - summary_df['count'] = merged_df.groupby(['horizonMins', 'forecasterFullname']).agg({'absolute_error': 'count'}).reset_index()['absolute_error'] - summary_df['sem'] = summary_df['std'] / (summary_df['count']**0.5) + all_observations_df["timestampUtc"] = pd.to_datetime( + all_observations_df["timestampUtc"] + ) + merged_df = pd.merge( + all_forecast_data_df, + all_observations_df, + left_on=["target_timestamp_utc"], + right_on=["timestampUtc"], + how="inner", + suffixes=("_forecast", "_observation"), + ) + merged_df["absolute_error"] = ( + merged_df["p50_fraction"] - merged_df["valueFraction"] + ).abs() + + summary_df = ( + merged_df.groupby(["horizon_mins", "forecaster_fullname"]) + .agg({"absolute_error": "mean"}) + .reset_index() + ) + summary_df["std"] = ( + merged_df.groupby(["horizon_mins", "forecaster_fullname"]) + .agg({"absolute_error": "std"}) + .reset_index()["absolute_error"] + ) + summary_df["count"] = ( + merged_df.groupby(["horizon_mins", "forecaster_fullname"]) + .agg({"absolute_error": "count"}) + .reset_index()["absolute_error"] + ) + summary_df["sem"] = summary_df["std"] / (summary_df["count"] ** 0.5) fig2 = go.Figure() - + for forecaster in selected_forecasters: - name_and_version = f'{forecaster.forecaster_name}:{forecaster.forecaster_version}' - forecaster_df = summary_df[summary_df['forecasterFullname'] == name_and_version] - fig2.add_trace(go.Scatter( - x=forecaster_df['horizonMins'], - y=forecaster_df['absolute_error'], - mode='lines+markers', - name=forecaster.forecaster_name - )) + name_and_version = ( + f"{forecaster.forecaster_name}:{forecaster.forecaster_version}" + ) + forecaster_df = summary_df[ + summary_df["forecaster_fullname"] == name_and_version + ] + fig2.add_trace( + go.Scatter( + x=forecaster_df["horizon_mins"], + y=forecaster_df["absolute_error"], + mode="lines+markers", + name=forecaster.forecaster_name, + ) + ) fig2.add_trace( go.Scatter( - x=forecaster_df['horizonMins'], - y=forecaster_df['absolute_error'] - 1.96 * forecaster_df['sem'], + x=forecaster_df["horizon_mins"], + y=forecaster_df["absolute_error"] - 1.96 * forecaster_df["sem"], mode="lines", # name="p10: " + model, # line=dict(color=get_colour_from_model_name(model), width=0), @@ -238,8 +327,8 @@ async def async_dp_forecast_page(): fig2.add_trace( go.Scatter( - x=forecaster_df['horizonMins'], - y=forecaster_df['absolute_error'] + 1.96 * forecaster_df['sem'], + x=forecaster_df["horizon_mins"], + y=forecaster_df["absolute_error"] + 1.96 * forecaster_df["sem"], mode="lines", # name="p10: " + model, # line=dict(color=get_colour_from_model_name(model), width=0), @@ -249,17 +338,15 @@ async def async_dp_forecast_page(): ) ) - fig2.update_layout( - title='Mean Absolute Error by Horizon', - xaxis_title='Horizon (Minutes)', - yaxis_title='Mean Absolute Error [%]', - legend_title='Forecaster' + title="Mean Absolute Error by Horizon", + xaxis_title="Horizon (Minutes)", + yaxis_title="Mean Absolute Error [%]", + legend_title="Forecaster", ) st.plotly_chart(fig2) - csv = summary_df.to_csv().encode("utf-8") st.download_button( label="⬇️", @@ -268,10 +355,11 @@ async def async_dp_forecast_page(): mime="text/csv", ) - st.header("TODO") st.write("Change from % to MW") + st.write("Add probabilistic") + st.write("Align forecasts on t0") st.write("Metrics summary table") st.write("Add more metrics") st.write("Add forecast horizon options") From cbb10ea459f855be4f15cabb57a8d5f25c62cc7d Mon Sep 17 00:00:00 2001 From: Peter Dudfield Date: Mon, 17 Nov 2025 11:30:44 +0000 Subject: [PATCH 06/60] tidy up observations --- src/dataplatform/forecast.py | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/src/dataplatform/forecast.py b/src/dataplatform/forecast.py index c4a8a06..86459b9 100644 --- a/src/dataplatform/forecast.py +++ b/src/dataplatform/forecast.py @@ -43,7 +43,11 @@ async def get_forecast_data( ) forecasts = [] async for chunk in _client.stream_forecast_data(stream_forecast_data_request): - forecasts.append(chunk.to_dict(casing=betterproto.Casing.SNAKE)) + forecasts.append( + chunk.to_dict( + include_default_values=True, casing=betterproto.Casing.SNAKE + ) + ) if len(forecasts) > 0: all_data_df.append( @@ -92,16 +96,20 @@ async def get_all_observations(client, location, start_date, end_date) -> pd.Dat get_observations_request ) - i = 0 - for value in get_observations_response.values: - observations_df = pd.DataFrame(value.to_dict(), index=[i]) - observation_one_df.append(observations_df) - i += 1 + observations = [] + for chunk in get_observations_response.values: + observations.append( + chunk.to_dict( + include_default_values=True, casing=betterproto.Casing.SNAKE + ) + ) + + observation_one_df.append(pd.DataFrame.from_dict(observations)) temp_start_date = temp_start_date + timedelta(days=7) observation_one_df = pd.concat(observation_one_df, ignore_index=True) - observation_one_df = observation_one_df.sort_values(by="timestampUtc") + observation_one_df = observation_one_df.sort_values(by="timestamp_utc") observation_one_df["observer_name"] = observer_name all_observations_df.append(observation_one_df) @@ -243,8 +251,8 @@ async def async_dp_forecast_page(): ] fig.add_trace( go.Scatter( - x=obs_df["timestampUtc"], - y=obs_df["valueFraction"], + x=obs_df["timestamp_utc"], + y=obs_df["value_fraction"], mode="lines", name=observer_name, ) @@ -263,19 +271,19 @@ async def async_dp_forecast_page(): # take the foecast data, and group by horizonMins, forecasterFullName # calculate mean absolute error between p50Fraction and observations valueFraction - all_observations_df["timestampUtc"] = pd.to_datetime( - all_observations_df["timestampUtc"] + all_observations_df["timestamp_utc"] = pd.to_datetime( + all_observations_df["timestamp_utc"] ) merged_df = pd.merge( all_forecast_data_df, all_observations_df, left_on=["target_timestamp_utc"], - right_on=["timestampUtc"], + right_on=["timestamp_utc"], how="inner", suffixes=("_forecast", "_observation"), ) merged_df["absolute_error"] = ( - merged_df["p50_fraction"] - merged_df["valueFraction"] + merged_df["p50_fraction"] - merged_df["value_fraction"] ).abs() summary_df = ( From 46c77921054b167ede2bc3910a7155822d231835 Mon Sep 17 00:00:00 2001 From: Peter Dudfield Date: Mon, 17 Nov 2025 11:35:28 +0000 Subject: [PATCH 07/60] tidy up time window --- src/dataplatform/forecast.py | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/src/dataplatform/forecast.py b/src/dataplatform/forecast.py index 86459b9..8b82b9f 100644 --- a/src/dataplatform/forecast.py +++ b/src/dataplatform/forecast.py @@ -31,14 +31,7 @@ async def get_forecast_data( stream_forecast_data_request = dp.StreamForecastDataRequest( location_uuid=location.location_uuid, energy_source=dp.EnergySource.SOLAR, - time_window=dp.TimeWindow( - start_timestamp_utc=datetime.combine( - temp_start_date, datetime.min.time() - ).replace(tzinfo=timezone.utc), - end_timestamp_utc=datetime.combine( - temp_end_date, datetime.min.time() - ).replace(tzinfo=timezone.utc), - ), + time_window=dp.TimeWindow(start_timestamp_utc=temp_start_date, end_timestamp_utc=temp_end_date), forecasters=selected_forecasters, ) forecasts = [] @@ -83,14 +76,7 @@ async def get_all_observations(client, location, start_date, end_date) -> pd.Dat observer_name=observer_name, location_uuid=location.location_uuid, energy_source=dp.EnergySource.SOLAR, - time_window=dp.TimeWindow( - start_timestamp_utc=datetime.combine( - temp_start_date, datetime.min.time() - ).replace(tzinfo=timezone.utc), - end_timestamp_utc=datetime.combine( - temp_end_date, datetime.min.time() - ).replace(tzinfo=timezone.utc), - ), + time_window=dp.TimeWindow(temp_start_date, temp_end_date), ) get_observations_response = await client.get_observations_as_timeseries( get_observations_request @@ -184,6 +170,8 @@ async def async_dp_forecast_page(): end_date = st.sidebar.date_input( "End date:", datetime.now().date() + timedelta(days=3) ) + start_date = datetime.combine(start_date, datetime.min.time()).replace(tzinfo=timezone.utc) + end_date = datetime.combine(end_date, datetime.min.time()).replace(tzinfo=timezone.utc) # select forecast type st.sidebar.write("TODO Select Forecast Type:") From fb83cc4fee851212762b63dbccabf173ad22d917 Mon Sep 17 00:00:00 2001 From: Peter Dudfield Date: Mon, 17 Nov 2025 11:37:52 +0000 Subject: [PATCH 08/60] dp 0.13.1 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index a81d0e5..9e6453d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -68,7 +68,7 @@ index-url = "https://download.pytorch.org/whl/cpu" extra-index-url = ["https://pypi.org/simple"] [tool.uv.sources] -dp-sdk = { url = "https://github.com/openclimatefix/data-platform/releases/download/v0.12.0/dp_sdk-0.12.0-py3-none-any.whl" } +dp-sdk = { url = "https://github.com/openclimatefix/data-platform/releases/download/v0.13.1/dp_sdk-0.13.1-py3-none-any.whl" } [tool.pytest.ini_options] testpaths = ["tests"] From 5ee30e26df34f591dadf61365832d409d5b80514 Mon Sep 17 00:00:00 2001 From: Peter Dudfield Date: Mon, 17 Nov 2025 11:43:43 +0000 Subject: [PATCH 09/60] Us Watts not % --- src/dataplatform/forecast.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/src/dataplatform/forecast.py b/src/dataplatform/forecast.py index 8b82b9f..c3eae11 100644 --- a/src/dataplatform/forecast.py +++ b/src/dataplatform/forecast.py @@ -57,6 +57,9 @@ async def get_forecast_data( all_data_df = pd.concat(all_data_df, ignore_index=True) + # get watt value + all_data_df['p50_watts'] = all_data_df['p50_fraction'].astype(float) * all_data_df['effective_capacity_watts'].astype(float) + return all_data_df @@ -102,6 +105,8 @@ async def get_all_observations(client, location, start_date, end_date) -> pd.Dat all_observations_df = pd.concat(all_observations_df, ignore_index=True) + all_observations_df['value_watts'] = all_observations_df['value_fraction'].astype(float) * all_observations_df['effective_capacity_watts'].astype(float) + return all_observations_df @@ -227,7 +232,7 @@ async def async_dp_forecast_page(): fig.add_trace( go.Scatter( x=forecaster_df["target_timestamp_utc"], - y=forecaster_df["p50_fraction"], + y=forecaster_df["p50_watts"], mode="lines", name=forecaster.forecaster_name, ) @@ -240,7 +245,7 @@ async def async_dp_forecast_page(): fig.add_trace( go.Scatter( x=obs_df["timestamp_utc"], - y=obs_df["value_fraction"], + y=obs_df["value_watts"], mode="lines", name=observer_name, ) @@ -249,7 +254,7 @@ async def async_dp_forecast_page(): fig.update_layout( title="Current Forecast", xaxis_title="Time", - yaxis_title="Generation [%]", + yaxis_title="Generation [Watts]", legend_title="Forecaster", ) @@ -271,7 +276,7 @@ async def async_dp_forecast_page(): suffixes=("_forecast", "_observation"), ) merged_df["absolute_error"] = ( - merged_df["p50_fraction"] - merged_df["value_fraction"] + merged_df["p50_watts"] - merged_df["value_watts"] ).abs() summary_df = ( @@ -337,7 +342,7 @@ async def async_dp_forecast_page(): fig2.update_layout( title="Mean Absolute Error by Horizon", xaxis_title="Horizon (Minutes)", - yaxis_title="Mean Absolute Error [%]", + yaxis_title="Mean Absolute Error [watts]", legend_title="Forecaster", ) @@ -353,8 +358,8 @@ async def async_dp_forecast_page(): st.header("TODO") - st.write("Change from % to MW") st.write("Add probabilistic") + st.write("Scale to KW/MW/GW as needed") st.write("Align forecasts on t0") st.write("Metrics summary table") st.write("Add more metrics") From ce8497bff0c1dd6053839061c988100f41f68c09 Mon Sep 17 00:00:00 2001 From: Peter Dudfield Date: Mon, 17 Nov 2025 14:50:59 +0000 Subject: [PATCH 10/60] load 30 days of data --- pyproject.toml | 2 +- src/dataplatform/forecast.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 9e6453d..a98466b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -68,7 +68,7 @@ index-url = "https://download.pytorch.org/whl/cpu" extra-index-url = ["https://pypi.org/simple"] [tool.uv.sources] -dp-sdk = { url = "https://github.com/openclimatefix/data-platform/releases/download/v0.13.1/dp_sdk-0.13.1-py3-none-any.whl" } +dp-sdk = { url = "https://github.com/openclimatefix/data-platform/releases/download/v0.13.2/dp_sdk-0.13.2-py3-none-any.whl" } [tool.pytest.ini_options] testpaths = ["tests"] diff --git a/src/dataplatform/forecast.py b/src/dataplatform/forecast.py index c3eae11..46de551 100644 --- a/src/dataplatform/forecast.py +++ b/src/dataplatform/forecast.py @@ -23,7 +23,7 @@ async def get_forecast_data( # loop over 7 days of data temp_start_date = start_date while temp_start_date <= end_date: - temp_end_date = temp_start_date + timedelta(days=7) + temp_end_date = temp_start_date + timedelta(days=30) if temp_end_date > end_date: temp_end_date = end_date From 834581ca1e8b67d467e331d7e14740f4b5dde6e8 Mon Sep 17 00:00:00 2001 From: Peter Dudfield Date: Mon, 17 Nov 2025 16:23:02 +0000 Subject: [PATCH 11/60] add metrics table --- src/dataplatform/forecast.py | 155 +++++++++++++++++++++++++++++++---- 1 file changed, 140 insertions(+), 15 deletions(-) diff --git a/src/dataplatform/forecast.py b/src/dataplatform/forecast.py index 46de551..4835d73 100644 --- a/src/dataplatform/forecast.py +++ b/src/dataplatform/forecast.py @@ -15,12 +15,13 @@ observer_names = ["pvlive_in_day", "pvlive_day_after"] + async def get_forecast_data( _client, location, start_date, end_date, selected_forecasters ) -> pd.DataFrame: all_data_df = [] - # loop over 7 days of data + # loop over 30 days of data temp_start_date = start_date while temp_start_date <= end_date: temp_end_date = temp_start_date + timedelta(days=30) @@ -31,7 +32,9 @@ async def get_forecast_data( stream_forecast_data_request = dp.StreamForecastDataRequest( location_uuid=location.location_uuid, energy_source=dp.EnergySource.SOLAR, - time_window=dp.TimeWindow(start_timestamp_utc=temp_start_date, end_timestamp_utc=temp_end_date), + time_window=dp.TimeWindow( + start_timestamp_utc=temp_start_date, end_timestamp_utc=temp_end_date + ), forecasters=selected_forecasters, ) forecasts = [] @@ -58,12 +61,15 @@ async def get_forecast_data( all_data_df = pd.concat(all_data_df, ignore_index=True) # get watt value - all_data_df['p50_watts'] = all_data_df['p50_fraction'].astype(float) * all_data_df['effective_capacity_watts'].astype(float) + all_data_df["p50_watts"] = all_data_df["p50_fraction"].astype(float) * all_data_df[ + "effective_capacity_watts" + ].astype(float) return all_data_df async def get_all_observations(client, location, start_date, end_date) -> pd.DataFrame: + all_observations_df = [] for observer_name in observer_names: @@ -105,7 +111,9 @@ async def get_all_observations(client, location, start_date, end_date) -> pd.Dat all_observations_df = pd.concat(all_observations_df, ignore_index=True) - all_observations_df['value_watts'] = all_observations_df['value_fraction'].astype(float) * all_observations_df['effective_capacity_watts'].astype(float) + all_observations_df["value_watts"] = all_observations_df["value_fraction"].astype( + float + ) * all_observations_df["effective_capacity_watts"].astype(float) return all_observations_df @@ -175,8 +183,12 @@ async def async_dp_forecast_page(): end_date = st.sidebar.date_input( "End date:", datetime.now().date() + timedelta(days=3) ) - start_date = datetime.combine(start_date, datetime.min.time()).replace(tzinfo=timezone.utc) - end_date = datetime.combine(end_date, datetime.min.time()).replace(tzinfo=timezone.utc) + start_date = datetime.combine(start_date, datetime.min.time()).replace( + tzinfo=timezone.utc + ) + end_date = datetime.combine(end_date, datetime.min.time()).replace( + tzinfo=timezone.utc + ) # select forecast type st.sidebar.write("TODO Select Forecast Type:") @@ -261,6 +273,18 @@ async def async_dp_forecast_page(): st.plotly_chart(fig) st.header("Summary Accuracy Graph") + metrics = { + "MAE": "MAE is absolute mean error, average(abs(y-x))", + "ME": "ME is mean (bias) error, average((y-x))", + "NMAE (by capacity)": " NMAE (by capacity), average(abs(y-x)) / mean(capacity)", + "NMAE (by mean observed generation)": " NMAE (by mean observed generation), average(abs(y-x)) / mean(y)", + # "NMAE (by observed generation)":" NAME (by observed generation)" + } + selected_metric = st.sidebar.selectbox( + "Select a Metrics", metrics.keys(), index=0 + ) + + st.write(metrics) # take the foecast data, and group by horizonMins, forecasterFullName # calculate mean absolute error between p50Fraction and observations valueFraction @@ -275,9 +299,19 @@ async def async_dp_forecast_page(): how="inner", suffixes=("_forecast", "_observation"), ) - merged_df["absolute_error"] = ( - merged_df["p50_watts"] - merged_df["value_watts"] - ).abs() + merged_df["effective_capacity_watts_observation"] = merged_df[ + "effective_capacity_watts_observation" + ].astype(float) + + # error + merged_df["error"] = merged_df["p50_watts"] - merged_df["value_watts"] + + # absolute error + merged_df["absolute_error"] = (merged_df["error"]).abs() + + # absolute error, normalized by mean observed generation + mean_observed_generation = merged_df["value_watts"].mean() + # merged_df['absolute_error_normalized_by_generation'] = merged_df['absolute_error'] / merged_df['value_watts'] summary_df = ( merged_df.groupby(["horizon_mins", "forecaster_fullname"]) @@ -296,6 +330,35 @@ async def async_dp_forecast_page(): ) summary_df["sem"] = summary_df["std"] / (summary_df["count"] ** 0.5) + # ME + summary_df["ME"] = ( + merged_df.groupby(["horizon_mins", "forecaster_fullname"]) + .agg({"error": "mean"}) + .reset_index()["error"] + ) + + # summary_df["absolute_error_divided_by_observed"] = ( + # merged_df.groupby(["horizon_mins", "forecaster_fullname"]) + # .agg({"absolute_error_normalized_by_generation": "mean"}) + # .reset_index()["absolute_error_normalized_by_generation"] + # ) + + summary_df["effective_capacity_watts_observation"] = ( + merged_df.groupby(["horizon_mins", "forecaster_fullname"]) + .agg({"effective_capacity_watts_observation": "mean"}) + .reset_index()["effective_capacity_watts_observation"] + ) + + # rename absolute_error to MAE + summary_df = summary_df.rename(columns={"absolute_error": "MAE"}) + summary_df["NMAE (by capacity)"] = ( + summary_df["MAE"] / summary_df["effective_capacity_watts_observation"] + ) + summary_df["NMAE (by mean observed generation)"] = ( + summary_df["MAE"] / mean_observed_generation + ) + # summary_df["NMAE (by observed generation)"] = summary_df["absolute_error_divided_by_observed"] + fig2 = go.Figure() for forecaster in selected_forecasters: @@ -308,7 +371,7 @@ async def async_dp_forecast_page(): fig2.add_trace( go.Scatter( x=forecaster_df["horizon_mins"], - y=forecaster_df["absolute_error"], + y=forecaster_df[selected_metric], mode="lines+markers", name=forecaster.forecaster_name, ) @@ -317,7 +380,7 @@ async def async_dp_forecast_page(): fig2.add_trace( go.Scatter( x=forecaster_df["horizon_mins"], - y=forecaster_df["absolute_error"] - 1.96 * forecaster_df["sem"], + y=forecaster_df[selected_metric] - 1.96 * forecaster_df["sem"], mode="lines", # name="p10: " + model, # line=dict(color=get_colour_from_model_name(model), width=0), @@ -329,7 +392,7 @@ async def async_dp_forecast_page(): fig2.add_trace( go.Scatter( x=forecaster_df["horizon_mins"], - y=forecaster_df["absolute_error"] + 1.96 * forecaster_df["sem"], + y=forecaster_df[selected_metric] + 1.96 * forecaster_df["sem"], mode="lines", # name="p10: " + model, # line=dict(color=get_colour_from_model_name(model), width=0), @@ -340,9 +403,9 @@ async def async_dp_forecast_page(): ) fig2.update_layout( - title="Mean Absolute Error by Horizon", + title=f"{selected_metric} by Horizon", xaxis_title="Horizon (Minutes)", - yaxis_title="Mean Absolute Error [watts]", + yaxis_title=selected_metric, legend_title="Forecaster", ) @@ -356,12 +419,74 @@ async def async_dp_forecast_page(): mime="text/csv", ) + st.header("Summary Accuracy Table") + + # add slider to select min and max horizon mins + min_horizon, max_horizon = st.slider( + "Select Horizon Mins Range", + int(summary_df["horizon_mins"].min()), + int(summary_df["horizon_mins"].max()), + ( + int(summary_df["horizon_mins"].min()), + int(summary_df["horizon_mins"].max()), + ), + step=30, + ) + + # Reduce my horizon mins + summary_table_df = merged_df[ + (merged_df["horizon_mins"] >= min_horizon) + & (merged_df["horizon_mins"] <= max_horizon) + ] + + summary_table_df = summary_table_df.rename( + columns={ + "effective_capacity_watts_observation": "Capacity_watts", + "value_watts": "Mean_Observed_Generation_watts", + } + ) + + value_columns = [ + "error", + "absolute_error", + # 'absolute_error_normalized_by_generation', + "Mean_Observed_Generation_watts", + "Capacity_watts", + ] + + summary_table_df = summary_table_df[["forecaster_fullname"] + value_columns] + + summary_table_df["Capacity_watts"] = summary_table_df["Capacity_watts"].astype( + float + ) + + # group by forecaster full name a + summary_table_df = summary_table_df.groupby("forecaster_fullname").mean() + + # rename + summary_table_df = summary_table_df.rename( + columns={ + "error": "ME", + "absolute_error": "MAE", + # 'absolute_error_normalized_by_generation': 'NMAE (by observed generation)', + "Capacity_watts": "Mean Capacity", + "Mean_Observed_Generation_watts": "Mean Observed Generation", + } + ) + + # pivot table, so forecaster_fullname is columns + summary_table_df = summary_table_df.pivot_table( + columns=summary_table_df.index, + values=summary_table_df.columns.tolist(), + ) + + st.dataframe(summary_table_df) + st.header("TODO") st.write("Add probabilistic") st.write("Scale to KW/MW/GW as needed") st.write("Align forecasts on t0") - st.write("Metrics summary table") st.write("Add more metrics") st.write("Add forecast horizon options") st.write("Add creation time forecast filter") From 65bf1e215b5d9e104f3429529c56646a19924f19 Mon Sep 17 00:00:00 2001 From: Peter Dudfield Date: Mon, 17 Nov 2025 16:23:38 +0000 Subject: [PATCH 12/60] add data caching --- src/dataplatform/forecast.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/dataplatform/forecast.py b/src/dataplatform/forecast.py index 4835d73..058f4c4 100644 --- a/src/dataplatform/forecast.py +++ b/src/dataplatform/forecast.py @@ -484,6 +484,7 @@ async def async_dp_forecast_page(): st.header("TODO") + st.write("Add caching on data") st.write("Add probabilistic") st.write("Scale to KW/MW/GW as needed") st.write("Align forecasts on t0") From 963833d0b4903b64184767fd508a1eda422e6ffd Mon Sep 17 00:00:00 2001 From: Peter Dudfield Date: Mon, 17 Nov 2025 21:17:56 +0000 Subject: [PATCH 13/60] move back to 7 days --- src/dataplatform/forecast.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dataplatform/forecast.py b/src/dataplatform/forecast.py index 058f4c4..3398da8 100644 --- a/src/dataplatform/forecast.py +++ b/src/dataplatform/forecast.py @@ -24,7 +24,7 @@ async def get_forecast_data( # loop over 30 days of data temp_start_date = start_date while temp_start_date <= end_date: - temp_end_date = temp_start_date + timedelta(days=30) + temp_end_date = temp_start_date + timedelta(days=7) if temp_end_date > end_date: temp_end_date = end_date From 72ca1345abad4d123f4003a5099d8b27c4bfd9a7 Mon Sep 17 00:00:00 2001 From: Peter Dudfield Date: Tue, 18 Nov 2025 10:52:28 +0000 Subject: [PATCH 14/60] add caching --- pyproject.toml | 3 +- src/dataplatform/forecast.py | 77 ++++++++++++++++++++++++++++-------- 2 files changed, 63 insertions(+), 17 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index a98466b..267734f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,7 +18,7 @@ dependencies = [ "plotly==5.24.1", "psycopg2-binary==2.9.10", "SQLAlchemy==2.0.36", - "streamlit==1.46.1", + "streamlit==1.51.0", "testcontainers==4.9.0", "uvicorn==0.34.0", "geopandas==1.0.1", @@ -36,6 +36,7 @@ dependencies = [ "torch @ https://download.pytorch.org/whl/cpu/torch-2.3.1-cp312-none-macosx_11_0_arm64.whl ; platform_system == 'Darwin' and platform_machine == 'arm64'", "matplotlib>=3.8,<4.0", "dp-sdk", + "aiocache", ] [project.optional-dependencies] diff --git a/src/dataplatform/forecast.py b/src/dataplatform/forecast.py index 3398da8..fe9f3da 100644 --- a/src/dataplatform/forecast.py +++ b/src/dataplatform/forecast.py @@ -7,6 +7,8 @@ from grpclib.client import Channel import plotly.graph_objects as go import betterproto +import time +from aiocache import Cache, cached data_platform_host = os.getenv("DATA_PLATFORM_HOST", "localhost") data_platform_port = int(os.getenv("DATA_PLATFORM_PORT", "50051")) @@ -15,12 +17,48 @@ observer_names = ["pvlive_in_day", "pvlive_day_after"] +def key_builder_remove_client(func, *args, **kwargs): + """Custom key builder that ignores the client argument for caching purposes.""" + + key = f"{func.__name__}:" + for arg in args: + if isinstance(arg, dp.DataPlatformDataServiceStub): + continue + key += f"{arg}-" + + for k, v in kwargs.items(): + key += f"{k}={v}-" + + return key + async def get_forecast_data( _client, location, start_date, end_date, selected_forecasters ) -> pd.DataFrame: all_data_df = [] + for forecaster in selected_forecasters: + forecaster_data_df = await get_forecast_data_one_forecaster( + _client, location, start_date, end_date, forecaster + ) + all_data_df.append(forecaster_data_df) + + all_data_df = pd.concat(all_data_df, ignore_index=True) + + # get watt value + all_data_df["p50_watts"] = all_data_df["p50_fraction"].astype(float) * all_data_df[ + "effective_capacity_watts" + ].astype(float) + + return all_data_df + + +@cached(ttl=300, cache=Cache.MEMORY, key_builder=key_builder_remove_client) +async def get_forecast_data_one_forecaster( + client, location, start_date, end_date, selected_forecaster +) -> pd.DataFrame: + all_data_df = [] + # loop over 30 days of data temp_start_date = start_date while temp_start_date <= end_date: @@ -35,10 +73,10 @@ async def get_forecast_data( time_window=dp.TimeWindow( start_timestamp_utc=temp_start_date, end_timestamp_utc=temp_end_date ), - forecasters=selected_forecasters, + forecasters=[selected_forecaster], ) forecasts = [] - async for chunk in _client.stream_forecast_data(stream_forecast_data_request): + async for chunk in client.stream_forecast_data(stream_forecast_data_request): forecasts.append( chunk.to_dict( include_default_values=True, casing=betterproto.Casing.SNAKE @@ -60,16 +98,11 @@ async def get_forecast_data( all_data_df = pd.concat(all_data_df, ignore_index=True) - # get watt value - all_data_df["p50_watts"] = all_data_df["p50_fraction"].astype(float) * all_data_df[ - "effective_capacity_watts" - ].astype(float) - return all_data_df -async def get_all_observations(client, location, start_date, end_date) -> pd.DataFrame: - +@cached(ttl=300, cache=Cache.MEMORY, key_builder=key_builder_remove_client) +async def get_all_observations(_client, location, start_date, end_date) -> pd.DataFrame: all_observations_df = [] for observer_name in observer_names: @@ -87,7 +120,7 @@ async def get_all_observations(client, location, start_date, end_date) -> pd.Dat energy_source=dp.EnergySource.SOLAR, time_window=dp.TimeWindow(temp_start_date, temp_end_date), ) - get_observations_response = await client.get_observations_as_timeseries( + get_observations_response = await _client.get_observations_as_timeseries( get_observations_request ) @@ -167,8 +200,14 @@ async def async_dp_forecast_page(): ) forecasters = get_forecasters_response.forecasters forecaster_names = [forecaster.forecaster_name for forecaster in forecasters] + if "pvnet_v2" in forecaster_names: + default_index = forecaster_names.index("pvnet_v2") + else: + default_index = 0 selected_forecaster_name = st.sidebar.multiselect( - "Select a Forecaster", forecaster_names, default=forecaster_names[0] + "Select a Forecaster", + forecaster_names, + default=forecaster_names[default_index], ) selected_forecasters = [ forecaster @@ -193,21 +232,24 @@ async def async_dp_forecast_page(): # select forecast type st.sidebar.write("TODO Select Forecast Type:") - # setup page - st.header("Time Series Plot") - # get generation data + time_start = time.time() all_observations_df = await get_all_observations( client, selected_location, start_date, end_date ) + observation_seconds = time.time() - time_start # get forcast all data + time_start = time.time() all_forecast_data_df = await get_forecast_data( client, selected_location, start_date, end_date, selected_forecasters ) + forecast_seconds = time.time() - time_start + st.write(f"Selected Location uuid: `{selected_location.location_uuid}`.") st.write( - f"Selected Location uuid: {selected_location.location_uuid}. \ - Fetched {len(all_forecast_data_df)} rows of forecast data" + f"Fetched `{len(all_forecast_data_df)}` rows of forecast data in `{forecast_seconds:.2f}` seconds. \ + Fetched `{len(all_observations_df)}` rows of observation data in `{observation_seconds:.2f}` seconds. \ + We cache data for 5 minutses to speed up repeated requests." ) # add download button @@ -219,6 +261,9 @@ async def async_dp_forecast_page(): mime="text/csv", ) + # 1. Plot of raw forecast data + st.header("Time Series Plot") + all_forecast_data_df["target_timestamp_utc"] = pd.to_datetime( all_forecast_data_df["init_timestamp"] ) + pd.to_timedelta(all_forecast_data_df["horizon_mins"], unit="m") From bda0da3b021188c41e0e050854708bd1d75c4f31 Mon Sep 17 00:00:00 2001 From: Peter Dudfield Date: Tue, 18 Nov 2025 10:54:31 +0000 Subject: [PATCH 15/60] move data to new file --- src/dataplatform/data.py | 146 +++++++++++++++++++++++++++++++++++ src/dataplatform/forecast.py | 138 +-------------------------------- 2 files changed, 148 insertions(+), 136 deletions(-) create mode 100644 src/dataplatform/data.py diff --git a/src/dataplatform/data.py b/src/dataplatform/data.py new file mode 100644 index 0000000..03c9518 --- /dev/null +++ b/src/dataplatform/data.py @@ -0,0 +1,146 @@ +from datetime import timedelta +import os +from dp_sdk.ocf import dp +import pandas as pd +import betterproto +from aiocache import Cache, cached + +data_platform_host = os.getenv("DATA_PLATFORM_HOST", "localhost") +data_platform_port = int(os.getenv("DATA_PLATFORM_PORT", "50051")) + +# TODO make this dynamic +observer_names = ["pvlive_in_day", "pvlive_day_after"] + + +def key_builder_remove_client(func, *args, **kwargs): + """Custom key builder that ignores the client argument for caching purposes.""" + + key = f"{func.__name__}:" + for arg in args: + if isinstance(arg, dp.DataPlatformDataServiceStub): + continue + key += f"{arg}-" + + for k, v in kwargs.items(): + key += f"{k}={v}-" + + return key + + +async def get_forecast_data( + _client, location, start_date, end_date, selected_forecasters +) -> pd.DataFrame: + all_data_df = [] + + for forecaster in selected_forecasters: + forecaster_data_df = await get_forecast_data_one_forecaster( + _client, location, start_date, end_date, forecaster + ) + all_data_df.append(forecaster_data_df) + + all_data_df = pd.concat(all_data_df, ignore_index=True) + + # get watt value + all_data_df["p50_watts"] = all_data_df["p50_fraction"].astype(float) * all_data_df[ + "effective_capacity_watts" + ].astype(float) + + return all_data_df + + +@cached(ttl=300, cache=Cache.MEMORY, key_builder=key_builder_remove_client) +async def get_forecast_data_one_forecaster( + client, location, start_date, end_date, selected_forecaster +) -> pd.DataFrame: + all_data_df = [] + + # loop over 30 days of data + temp_start_date = start_date + while temp_start_date <= end_date: + temp_end_date = temp_start_date + timedelta(days=7) + if temp_end_date > end_date: + temp_end_date = end_date + + # fetch data + stream_forecast_data_request = dp.StreamForecastDataRequest( + location_uuid=location.location_uuid, + energy_source=dp.EnergySource.SOLAR, + time_window=dp.TimeWindow( + start_timestamp_utc=temp_start_date, end_timestamp_utc=temp_end_date + ), + forecasters=[selected_forecaster], + ) + forecasts = [] + async for chunk in client.stream_forecast_data(stream_forecast_data_request): + forecasts.append( + chunk.to_dict( + include_default_values=True, casing=betterproto.Casing.SNAKE + ) + ) + + if len(forecasts) > 0: + all_data_df.append( + pd.DataFrame.from_dict(forecasts) + .pipe( + lambda df: df.join( + pd.json_normalize(df["other_statistics_fractions"]) + ) + ) + .drop("other_statistics_fractions", axis=1) + ) + + temp_start_date = temp_start_date + timedelta(days=7) + + all_data_df = pd.concat(all_data_df, ignore_index=True) + + return all_data_df + + +@cached(ttl=300, cache=Cache.MEMORY, key_builder=key_builder_remove_client) +async def get_all_observations(_client, location, start_date, end_date) -> pd.DataFrame: + all_observations_df = [] + + for observer_name in observer_names: + # loop over 7 days of data + observation_one_df = [] + temp_start_date = start_date + while temp_start_date <= end_date: + temp_end_date = temp_start_date + timedelta(days=7) + if temp_end_date > end_date: + temp_end_date = end_date + + get_observations_request = dp.GetObservationsAsTimeseriesRequest( + observer_name=observer_name, + location_uuid=location.location_uuid, + energy_source=dp.EnergySource.SOLAR, + time_window=dp.TimeWindow(temp_start_date, temp_end_date), + ) + get_observations_response = await _client.get_observations_as_timeseries( + get_observations_request + ) + + observations = [] + for chunk in get_observations_response.values: + observations.append( + chunk.to_dict( + include_default_values=True, casing=betterproto.Casing.SNAKE + ) + ) + + observation_one_df.append(pd.DataFrame.from_dict(observations)) + + temp_start_date = temp_start_date + timedelta(days=7) + + observation_one_df = pd.concat(observation_one_df, ignore_index=True) + observation_one_df = observation_one_df.sort_values(by="timestamp_utc") + observation_one_df["observer_name"] = observer_name + + all_observations_df.append(observation_one_df) + + all_observations_df = pd.concat(all_observations_df, ignore_index=True) + + all_observations_df["value_watts"] = all_observations_df["value_fraction"].astype( + float + ) * all_observations_df["effective_capacity_watts"].astype(float) + + return all_observations_df \ No newline at end of file diff --git a/src/dataplatform/forecast.py b/src/dataplatform/forecast.py index fe9f3da..c8ff27b 100644 --- a/src/dataplatform/forecast.py +++ b/src/dataplatform/forecast.py @@ -6,9 +6,9 @@ import pandas as pd from grpclib.client import Channel import plotly.graph_objects as go -import betterproto import time -from aiocache import Cache, cached + +from src.dataplatform.data import get_all_observations, get_forecast_data data_platform_host = os.getenv("DATA_PLATFORM_HOST", "localhost") data_platform_port = int(os.getenv("DATA_PLATFORM_PORT", "50051")) @@ -17,140 +17,6 @@ observer_names = ["pvlive_in_day", "pvlive_day_after"] -def key_builder_remove_client(func, *args, **kwargs): - """Custom key builder that ignores the client argument for caching purposes.""" - - key = f"{func.__name__}:" - for arg in args: - if isinstance(arg, dp.DataPlatformDataServiceStub): - continue - key += f"{arg}-" - - for k, v in kwargs.items(): - key += f"{k}={v}-" - - return key - - -async def get_forecast_data( - _client, location, start_date, end_date, selected_forecasters -) -> pd.DataFrame: - all_data_df = [] - - for forecaster in selected_forecasters: - forecaster_data_df = await get_forecast_data_one_forecaster( - _client, location, start_date, end_date, forecaster - ) - all_data_df.append(forecaster_data_df) - - all_data_df = pd.concat(all_data_df, ignore_index=True) - - # get watt value - all_data_df["p50_watts"] = all_data_df["p50_fraction"].astype(float) * all_data_df[ - "effective_capacity_watts" - ].astype(float) - - return all_data_df - - -@cached(ttl=300, cache=Cache.MEMORY, key_builder=key_builder_remove_client) -async def get_forecast_data_one_forecaster( - client, location, start_date, end_date, selected_forecaster -) -> pd.DataFrame: - all_data_df = [] - - # loop over 30 days of data - temp_start_date = start_date - while temp_start_date <= end_date: - temp_end_date = temp_start_date + timedelta(days=7) - if temp_end_date > end_date: - temp_end_date = end_date - - # fetch data - stream_forecast_data_request = dp.StreamForecastDataRequest( - location_uuid=location.location_uuid, - energy_source=dp.EnergySource.SOLAR, - time_window=dp.TimeWindow( - start_timestamp_utc=temp_start_date, end_timestamp_utc=temp_end_date - ), - forecasters=[selected_forecaster], - ) - forecasts = [] - async for chunk in client.stream_forecast_data(stream_forecast_data_request): - forecasts.append( - chunk.to_dict( - include_default_values=True, casing=betterproto.Casing.SNAKE - ) - ) - - if len(forecasts) > 0: - all_data_df.append( - pd.DataFrame.from_dict(forecasts) - .pipe( - lambda df: df.join( - pd.json_normalize(df["other_statistics_fractions"]) - ) - ) - .drop("other_statistics_fractions", axis=1) - ) - - temp_start_date = temp_start_date + timedelta(days=7) - - all_data_df = pd.concat(all_data_df, ignore_index=True) - - return all_data_df - - -@cached(ttl=300, cache=Cache.MEMORY, key_builder=key_builder_remove_client) -async def get_all_observations(_client, location, start_date, end_date) -> pd.DataFrame: - all_observations_df = [] - - for observer_name in observer_names: - # loop over 7 days of data - observation_one_df = [] - temp_start_date = start_date - while temp_start_date <= end_date: - temp_end_date = temp_start_date + timedelta(days=7) - if temp_end_date > end_date: - temp_end_date = end_date - - get_observations_request = dp.GetObservationsAsTimeseriesRequest( - observer_name=observer_name, - location_uuid=location.location_uuid, - energy_source=dp.EnergySource.SOLAR, - time_window=dp.TimeWindow(temp_start_date, temp_end_date), - ) - get_observations_response = await _client.get_observations_as_timeseries( - get_observations_request - ) - - observations = [] - for chunk in get_observations_response.values: - observations.append( - chunk.to_dict( - include_default_values=True, casing=betterproto.Casing.SNAKE - ) - ) - - observation_one_df.append(pd.DataFrame.from_dict(observations)) - - temp_start_date = temp_start_date + timedelta(days=7) - - observation_one_df = pd.concat(observation_one_df, ignore_index=True) - observation_one_df = observation_one_df.sort_values(by="timestamp_utc") - observation_one_df["observer_name"] = observer_name - - all_observations_df.append(observation_one_df) - - all_observations_df = pd.concat(all_observations_df, ignore_index=True) - - all_observations_df["value_watts"] = all_observations_df["value_fraction"].astype( - float - ) * all_observations_df["effective_capacity_watts"].astype(float) - - return all_observations_df - - def dp_forecast_page(): asyncio.run(async_dp_forecast_page()) From 75ab761c42cced5d0c1838c26ce3d3999bf3e9d7 Mon Sep 17 00:00:00 2001 From: Peter Dudfield Date: Tue, 18 Nov 2025 11:23:51 +0000 Subject: [PATCH 16/60] add colours to main plot --- src/dataplatform/forecast.py | 53 ++++++++++++++++++++++++++---------- 1 file changed, 38 insertions(+), 15 deletions(-) diff --git a/src/dataplatform/forecast.py b/src/dataplatform/forecast.py index c8ff27b..b4825ed 100644 --- a/src/dataplatform/forecast.py +++ b/src/dataplatform/forecast.py @@ -16,6 +16,19 @@ # TODO make this dynamic observer_names = ["pvlive_in_day", "pvlive_day_after"] +colours = [ + "#FFD480", + "#FF8F73", + "#4675C1", + "#65B0C9", + "#58B0A9", + "#FAA056", + "#306BFF", + "#FF4901", + "#B701FF", + "#17E58F", +] + def dp_forecast_page(): asyncio.run(async_dp_forecast_page()) @@ -145,7 +158,30 @@ async def async_dp_forecast_page(): # plot the results fig = go.Figure() - for forecaster in selected_forecasters: + for observer_name in observer_names: + obs_df = all_observations_df[ + all_observations_df["observer_name"] == observer_name + ] + + if observer_name == "pvlive_in_day": + # dashed white line + line = dict(color="white", dash="dash") + elif observer_name == "pvlive_day_after": + line = dict(color="white") + else: + line = dict() + + fig.add_trace( + go.Scatter( + x=obs_df["timestamp_utc"], + y=obs_df["value_watts"], + mode="lines", + name=observer_name, + line=line, + ) + ) + + for i, forecaster in enumerate(selected_forecasters): name_and_version = ( f"{forecaster.forecaster_name}:{forecaster.forecaster_version}" ) @@ -158,19 +194,7 @@ async def async_dp_forecast_page(): y=forecaster_df["p50_watts"], mode="lines", name=forecaster.forecaster_name, - ) - ) - - for observer_name in observer_names: - obs_df = all_observations_df[ - all_observations_df["observer_name"] == observer_name - ] - fig.add_trace( - go.Scatter( - x=obs_df["timestamp_utc"], - y=obs_df["value_watts"], - mode="lines", - name=observer_name, + line=dict(color=colours[i % len(colours)]), ) ) @@ -395,7 +419,6 @@ async def async_dp_forecast_page(): st.header("TODO") - st.write("Add caching on data") st.write("Add probabilistic") st.write("Scale to KW/MW/GW as needed") st.write("Align forecasts on t0") From dc1b91f25ffb2928dba2532e84de15d264eea85f Mon Sep 17 00:00:00 2001 From: Peter Dudfield Date: Tue, 18 Nov 2025 11:30:32 +0000 Subject: [PATCH 17/60] update import --- src/dataplatform/forecast.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/dataplatform/forecast.py b/src/dataplatform/forecast.py index b4825ed..5312412 100644 --- a/src/dataplatform/forecast.py +++ b/src/dataplatform/forecast.py @@ -8,7 +8,7 @@ import plotly.graph_objects as go import time -from src.dataplatform.data import get_all_observations, get_forecast_data +from dataplatform.data import get_all_observations, get_forecast_data data_platform_host = os.getenv("DATA_PLATFORM_HOST", "localhost") data_platform_port = int(os.getenv("DATA_PLATFORM_PORT", "50051")) @@ -354,6 +354,7 @@ async def async_dp_forecast_page(): mime="text/csv", ) + # 3. Summary Accuracy Table, with slider to select min and max horizon mins st.header("Summary Accuracy Table") # add slider to select min and max horizon mins @@ -417,6 +418,10 @@ async def async_dp_forecast_page(): st.dataframe(summary_table_df) + # 4. Daily metric plots + st.header("Daily Metrics Plots") + st.write("TODO") + st.header("TODO") st.write("Add probabilistic") From a111be7f84dc13f99894ffedbbaa52c9d042b27f Mon Sep 17 00:00:00 2001 From: Peter Dudfield Date: Tue, 18 Nov 2025 14:32:02 +0000 Subject: [PATCH 18/60] scale by units and add colours --- src/dataplatform/forecast.py | 42 +++++++++++++++++++++++++----------- 1 file changed, 29 insertions(+), 13 deletions(-) diff --git a/src/dataplatform/forecast.py b/src/dataplatform/forecast.py index 5312412..9e96927 100644 --- a/src/dataplatform/forecast.py +++ b/src/dataplatform/forecast.py @@ -111,6 +111,16 @@ async def async_dp_forecast_page(): # select forecast type st.sidebar.write("TODO Select Forecast Type:") + # select units + if location_type == dp.LocationType.NATION: + default_unit_index = 3 # GW + else: + default_unit_index = 2 # MW + units = st.sidebar.selectbox("Select Units", ["W", "kW", "MW", "GW"], index=default_unit_index) + scale_factors = {"W": 1, "kW": 1e3, "MW": 1e6, "GW": 1e9} + scale_factor = scale_factors[units] + + # get generation data time_start = time.time() all_observations_df = await get_all_observations( @@ -174,7 +184,7 @@ async def async_dp_forecast_page(): fig.add_trace( go.Scatter( x=obs_df["timestamp_utc"], - y=obs_df["value_watts"], + y=obs_df["value_watts"] / scale_factor, mode="lines", name=observer_name, line=line, @@ -191,7 +201,7 @@ async def async_dp_forecast_page(): fig.add_trace( go.Scatter( x=forecaster_df["target_timestamp_utc"], - y=forecaster_df["p50_watts"], + y=forecaster_df["p50_watts"] / scale_factor, mode="lines", name=forecaster.forecaster_name, line=dict(color=colours[i % len(colours)]), @@ -201,7 +211,7 @@ async def async_dp_forecast_page(): fig.update_layout( title="Current Forecast", xaxis_title="Time", - yaxis_title="Generation [Watts]", + yaxis_title=f"Generation [{units}]", legend_title="Forecaster", ) @@ -296,7 +306,7 @@ async def async_dp_forecast_page(): fig2 = go.Figure() - for forecaster in selected_forecasters: + for i, forecaster in enumerate(selected_forecasters): name_and_version = ( f"{forecaster.forecaster_name}:{forecaster.forecaster_version}" ) @@ -306,19 +316,19 @@ async def async_dp_forecast_page(): fig2.add_trace( go.Scatter( x=forecaster_df["horizon_mins"], - y=forecaster_df[selected_metric], + y=forecaster_df[selected_metric] / scale_factor, mode="lines+markers", name=forecaster.forecaster_name, + line=dict(color=colours[i % len(colours)]), ) ) fig2.add_trace( go.Scatter( x=forecaster_df["horizon_mins"], - y=forecaster_df[selected_metric] - 1.96 * forecaster_df["sem"], + y=(forecaster_df[selected_metric] - 1.96 * forecaster_df["sem"]) / scale_factor, mode="lines", - # name="p10: " + model, - # line=dict(color=get_colour_from_model_name(model), width=0), + line=dict(color=colours[i % len(colours)], width=0), legendgroup=forecaster.forecaster_name, showlegend=False, ) @@ -327,10 +337,9 @@ async def async_dp_forecast_page(): fig2.add_trace( go.Scatter( x=forecaster_df["horizon_mins"], - y=forecaster_df[selected_metric] + 1.96 * forecaster_df["sem"], + y=(forecaster_df[selected_metric] + 1.96 * forecaster_df["sem"]) / scale_factor, mode="lines", - # name="p10: " + model, - # line=dict(color=get_colour_from_model_name(model), width=0), + line=dict(color=colours[i % len(colours)], width=0), legendgroup=forecaster.forecaster_name, showlegend=False, fill="tonexty", @@ -340,7 +349,7 @@ async def async_dp_forecast_page(): fig2.update_layout( title=f"{selected_metric} by Horizon", xaxis_title="Horizon (Minutes)", - yaxis_title=selected_metric, + yaxis_title=f"{selected_metric} [{units}]", legend_title="Forecaster", ) @@ -410,12 +419,20 @@ async def async_dp_forecast_page(): } ) + # scale by units + summary_table_df = summary_table_df / scale_factor + summary_table_df = summary_table_df.rename( + {col: f'{col} [{units}]' for col in summary_table_df.columns}, + axis=1, + ) + # pivot table, so forecaster_fullname is columns summary_table_df = summary_table_df.pivot_table( columns=summary_table_df.index, values=summary_table_df.columns.tolist(), ) + st.dataframe(summary_table_df) # 4. Daily metric plots @@ -425,7 +442,6 @@ async def async_dp_forecast_page(): st.header("TODO") st.write("Add probabilistic") - st.write("Scale to KW/MW/GW as needed") st.write("Align forecasts on t0") st.write("Add more metrics") st.write("Add forecast horizon options") From 5a0442385320cf80d2651443eabfa1deb03bc439 Mon Sep 17 00:00:00 2001 From: Peter Dudfield Date: Tue, 18 Nov 2025 14:41:42 +0000 Subject: [PATCH 19/60] add probablistic --- src/dataplatform/data.py | 6 ++++++ src/dataplatform/forecast.py | 26 +++++++++++++++++++++++++- 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/src/dataplatform/data.py b/src/dataplatform/data.py index 03c9518..3bbf675 100644 --- a/src/dataplatform/data.py +++ b/src/dataplatform/data.py @@ -45,6 +45,12 @@ async def get_forecast_data( "effective_capacity_watts" ].astype(float) + for col in ["p10", "p25", "p75", "p90"]: + if col in all_data_df.columns: + all_data_df[f"{col}_watts"] = all_data_df[col].astype(float) * all_data_df[ + "effective_capacity_watts" + ].astype(float) + return all_data_df diff --git a/src/dataplatform/forecast.py b/src/dataplatform/forecast.py index 9e96927..19df5bc 100644 --- a/src/dataplatform/forecast.py +++ b/src/dataplatform/forecast.py @@ -207,6 +207,31 @@ async def async_dp_forecast_page(): line=dict(color=colours[i % len(colours)]), ) ) + print(forecaster_df.columns) + if 'p10_watts' in forecaster_df.columns and 'p90_watts' in forecaster_df.columns: + fig.add_trace( + go.Scatter( + x=forecaster_df["target_timestamp_utc"], + y=forecaster_df["p10_watts"] / scale_factor, + mode="lines", + line=dict(color=colours[i % len(colours)], width=0), + legendgroup=forecaster.forecaster_name, + showlegend=False, + ) + ) + + fig.add_trace( + go.Scatter( + x=forecaster_df["target_timestamp_utc"], + y=forecaster_df["p90_watts"] / scale_factor, + mode="lines", + line=dict(color=colours[i % len(colours)], width=0), + legendgroup=forecaster.forecaster_name, + showlegend=False, + fill="tonexty", + ) + ) + fig.update_layout( title="Current Forecast", @@ -441,7 +466,6 @@ async def async_dp_forecast_page(): st.header("TODO") - st.write("Add probabilistic") st.write("Align forecasts on t0") st.write("Add more metrics") st.write("Add forecast horizon options") From d5380641a286316c51778137bdd1bda90abce39c Mon Sep 17 00:00:00 2001 From: Peter Dudfield Date: Tue, 18 Nov 2025 15:52:47 +0000 Subject: [PATCH 20/60] add forecast type options, add daily MAE options --- src/dataplatform/forecast.py | 83 ++++++++++++++++++++++++++++++------ 1 file changed, 69 insertions(+), 14 deletions(-) diff --git a/src/dataplatform/forecast.py b/src/dataplatform/forecast.py index 19df5bc..f2e0df1 100644 --- a/src/dataplatform/forecast.py +++ b/src/dataplatform/forecast.py @@ -109,7 +109,14 @@ async def async_dp_forecast_page(): ) # select forecast type - st.sidebar.write("TODO Select Forecast Type:") + selected_forecast_type = st.sidebar.selectbox( + "Select a Forecast Type", ['Current', "Horizon", "t0"], index=0 + ) + + if selected_forecast_type == 'Horizon': + selected_forecast_horizon = st.sidebar.selectbox( + "Select a Forecast Horizon", list(range(0,2400,30)), index=3 + ) # select units if location_type == dp.LocationType.NATION: @@ -157,14 +164,27 @@ async def async_dp_forecast_page(): all_forecast_data_df["init_timestamp"] ) + pd.to_timedelta(all_forecast_data_df["horizon_mins"], unit="m") - # Choose current forecast - # this is done by selecting the unique target_timestamp_utc with the the lowest horizonMins - # it should also be unique for each forecasterFullName - current_forecast_df = all_forecast_data_df.loc[ - all_forecast_data_df.groupby( - ["target_timestamp_utc", "forecaster_fullname"] - )["horizon_mins"].idxmin() - ] + if selected_forecast_type == 'Current': + # Choose current forecast + # this is done by selecting the unique target_timestamp_utc with the the lowest horizonMins + # it should also be unique for each forecasterFullName + current_forecast_df = all_forecast_data_df.loc[ + all_forecast_data_df.groupby( + ["target_timestamp_utc", "forecaster_fullname"] + )["horizon_mins"].idxmin() + ] + elif selected_forecast_type == 'Horizon': + # Choose horizon forecast + current_forecast_df = all_forecast_data_df[ + all_forecast_data_df["horizon_mins"] >= selected_forecast_horizon + ] + current_forecast_df = current_forecast_df.loc[ + current_forecast_df.groupby( + ["target_timestamp_utc", "forecaster_fullname"] + )["horizon_mins"].idxmin() + ] + else: + pass # plot the results fig = go.Figure() @@ -462,14 +482,49 @@ async def async_dp_forecast_page(): # 4. Daily metric plots st.header("Daily Metrics Plots") - st.write("TODO") + st.write("Plotted below are the daily MAE for each forecaster. This is for all forecast horizons.") + daily_plots_df = merged_df + daily_plots_df["date_utc"] = daily_plots_df["timestamp_utc"].dt.date + + # group by forecaster name and date + daily_metrics_df = ( + daily_plots_df.groupby(["date_utc", "forecaster_fullname"]) + .agg({"absolute_error": "mean"}) + .reset_index() + ) + + fig3 = go.Figure() + for i, forecaster in enumerate(selected_forecasters): + name_and_version = ( + f"{forecaster.forecaster_name}:{forecaster.forecaster_version}" + ) + forecaster_df = daily_metrics_df[ + daily_metrics_df["forecaster_fullname"] == name_and_version + ] + fig3.add_trace( + go.Scatter( + x=forecaster_df["date_utc"], + y=forecaster_df["absolute_error"] / scale_factor, + # mode="lines+markers", + name=forecaster.forecaster_name, + line=dict(color=colours[i % len(colours)]), + ) + ) + + fig3.update_layout( + title=f"Daily MAE", + xaxis_title="Date", + yaxis_title=f"MAE [{units}]", + legend_title="Forecaster", + ) + + st.plotly_chart(fig3) + + st.header("TODO") st.write("Align forecasts on t0") st.write("Add more metrics") - st.write("Add forecast horizon options") - st.write("Add creation time forecast filter") - st.write("Daily Metrics graphs") - st.write("colours") + st.write("Add creation time / t0 forecast filter") st.write("speed up read, use async and more caching") From 7791489bf9c971ca9ade5c75f9b97ca21c99b221 Mon Sep 17 00:00:00 2001 From: Peter Dudfield Date: Tue, 18 Nov 2025 16:24:09 +0000 Subject: [PATCH 21/60] add daily ME --- src/dataplatform/forecast.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/src/dataplatform/forecast.py b/src/dataplatform/forecast.py index f2e0df1..e566faa 100644 --- a/src/dataplatform/forecast.py +++ b/src/dataplatform/forecast.py @@ -266,8 +266,8 @@ async def async_dp_forecast_page(): metrics = { "MAE": "MAE is absolute mean error, average(abs(y-x))", "ME": "ME is mean (bias) error, average((y-x))", - "NMAE (by capacity)": " NMAE (by capacity), average(abs(y-x)) / mean(capacity)", - "NMAE (by mean observed generation)": " NMAE (by mean observed generation), average(abs(y-x)) / mean(y)", + # "TODO NMAE (by capacity)": " NMAE (by capacity), average(abs(y-x)) / mean(capacity)", + # "TODO NMAE (by mean observed generation)": " NMAE (by mean observed generation), average(abs(y-x)) / mean(y)", # "NMAE (by observed generation)":" NAME (by observed generation)" } selected_metric = st.sidebar.selectbox( @@ -491,6 +491,12 @@ async def async_dp_forecast_page(): daily_plots_df.groupby(["date_utc", "forecaster_fullname"]) .agg({"absolute_error": "mean"}) .reset_index() + ).rename(columns={"absolute_error": "MAE"}) + # ME + daily_metrics_df["ME"] = ( + daily_plots_df.groupby(["date_utc", "forecaster_fullname"]) + .agg({"error": "mean"}) + .reset_index()["error"] ) fig3 = go.Figure() @@ -504,7 +510,7 @@ async def async_dp_forecast_page(): fig3.add_trace( go.Scatter( x=forecaster_df["date_utc"], - y=forecaster_df["absolute_error"] / scale_factor, + y=forecaster_df[selected_metric] / scale_factor, # mode="lines+markers", name=forecaster.forecaster_name, line=dict(color=colours[i % len(colours)]), @@ -512,9 +518,9 @@ async def async_dp_forecast_page(): ) fig3.update_layout( - title=f"Daily MAE", + title=f"Daily {selected_metric}", xaxis_title="Date", - yaxis_title=f"MAE [{units}]", + yaxis_title=f"{selected_metric} [{units}]", legend_title="Forecaster", ) From 095d49586c4a9e61448f224506ce19cef93633cf Mon Sep 17 00:00:00 2001 From: Peter Dudfield Date: Wed, 19 Nov 2025 12:16:10 +0000 Subject: [PATCH 22/60] add two todos --- src/dataplatform/forecast.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/dataplatform/forecast.py b/src/dataplatform/forecast.py index e566faa..277f6ce 100644 --- a/src/dataplatform/forecast.py +++ b/src/dataplatform/forecast.py @@ -529,7 +529,9 @@ async def async_dp_forecast_page(): st.header("TODO") - + + st.write("Deal with new forecast versions") + st.write("Turn off probability when line clicked off") st.write("Align forecasts on t0") st.write("Add more metrics") st.write("Add creation time / t0 forecast filter") From d10135f79cd43205d8d6fdf94dd70c82f391c45c Mon Sep 17 00:00:00 2001 From: Peter Dudfield Date: Wed, 19 Nov 2025 13:52:24 +0000 Subject: [PATCH 23/60] solve for different forecast versions --- src/dataplatform/data.py | 5 +++ src/dataplatform/forecast.py | 62 ++++++++++++++++-------------------- 2 files changed, 33 insertions(+), 34 deletions(-) diff --git a/src/dataplatform/data.py b/src/dataplatform/data.py index 3bbf675..f9dcf6a 100644 --- a/src/dataplatform/data.py +++ b/src/dataplatform/data.py @@ -99,6 +99,11 @@ async def get_forecast_data_one_forecaster( all_data_df = pd.concat(all_data_df, ignore_index=True) + # create column forecaster_name, its forecaster_fullname with version removed + all_data_df["forecaster_name"] = all_data_df["forecaster_fullname"].apply( + lambda x: x.rsplit(":", 1)[0] # split from right, max 1 split + ) + return all_data_df diff --git a/src/dataplatform/forecast.py b/src/dataplatform/forecast.py index 277f6ce..da07190 100644 --- a/src/dataplatform/forecast.py +++ b/src/dataplatform/forecast.py @@ -73,12 +73,12 @@ async def async_dp_forecast_page(): ) # get models - get_forecasters_request = dp.ListForecastersRequest(latest_versions_only=True) + get_forecasters_request = dp.ListForecastersRequest() get_forecasters_response = await client.list_forecasters( get_forecasters_request ) forecasters = get_forecasters_response.forecasters - forecaster_names = [forecaster.forecaster_name for forecaster in forecasters] + forecaster_names = sorted(list(set([forecaster.forecaster_name for forecaster in forecasters]))) if "pvnet_v2" in forecaster_names: default_index = forecaster_names.index("pvnet_v2") else: @@ -170,7 +170,7 @@ async def async_dp_forecast_page(): # it should also be unique for each forecasterFullName current_forecast_df = all_forecast_data_df.loc[ all_forecast_data_df.groupby( - ["target_timestamp_utc", "forecaster_fullname"] + ["target_timestamp_utc", "forecaster_name"] )["horizon_mins"].idxmin() ] elif selected_forecast_type == 'Horizon': @@ -180,7 +180,7 @@ async def async_dp_forecast_page(): ] current_forecast_df = current_forecast_df.loc[ current_forecast_df.groupby( - ["target_timestamp_utc", "forecaster_fullname"] + ["target_timestamp_utc", "forecaster_name"] )["horizon_mins"].idxmin() ] else: @@ -211,23 +211,19 @@ async def async_dp_forecast_page(): ) ) - for i, forecaster in enumerate(selected_forecasters): - name_and_version = ( - f"{forecaster.forecaster_name}:{forecaster.forecaster_version}" - ) + for i, forecaster_name in enumerate(forecaster_names): forecaster_df = current_forecast_df[ - current_forecast_df["forecaster_fullname"] == name_and_version + current_forecast_df["forecaster_name"] == forecaster_name ] fig.add_trace( go.Scatter( x=forecaster_df["target_timestamp_utc"], y=forecaster_df["p50_watts"] / scale_factor, mode="lines", - name=forecaster.forecaster_name, + name=forecaster_name, line=dict(color=colours[i % len(colours)]), ) ) - print(forecaster_df.columns) if 'p10_watts' in forecaster_df.columns and 'p90_watts' in forecaster_df.columns: fig.add_trace( go.Scatter( @@ -235,7 +231,7 @@ async def async_dp_forecast_page(): y=forecaster_df["p10_watts"] / scale_factor, mode="lines", line=dict(color=colours[i % len(colours)], width=0), - legendgroup=forecaster.forecaster_name, + legendgroup=forecaster_name, showlegend=False, ) ) @@ -246,7 +242,7 @@ async def async_dp_forecast_page(): y=forecaster_df["p90_watts"] / scale_factor, mode="lines", line=dict(color=colours[i % len(colours)], width=0), - legendgroup=forecaster.forecaster_name, + legendgroup=forecaster_name, showlegend=False, fill="tonexty", ) @@ -304,17 +300,17 @@ async def async_dp_forecast_page(): # merged_df['absolute_error_normalized_by_generation'] = merged_df['absolute_error'] / merged_df['value_watts'] summary_df = ( - merged_df.groupby(["horizon_mins", "forecaster_fullname"]) + merged_df.groupby(["horizon_mins", "forecaster_name"]) .agg({"absolute_error": "mean"}) .reset_index() ) summary_df["std"] = ( - merged_df.groupby(["horizon_mins", "forecaster_fullname"]) + merged_df.groupby(["horizon_mins", "forecaster_name"]) .agg({"absolute_error": "std"}) .reset_index()["absolute_error"] ) summary_df["count"] = ( - merged_df.groupby(["horizon_mins", "forecaster_fullname"]) + merged_df.groupby(["horizon_mins", "forecaster_name"]) .agg({"absolute_error": "count"}) .reset_index()["absolute_error"] ) @@ -322,19 +318,19 @@ async def async_dp_forecast_page(): # ME summary_df["ME"] = ( - merged_df.groupby(["horizon_mins", "forecaster_fullname"]) + merged_df.groupby(["horizon_mins", "forecaster_name"]) .agg({"error": "mean"}) .reset_index()["error"] ) # summary_df["absolute_error_divided_by_observed"] = ( - # merged_df.groupby(["horizon_mins", "forecaster_fullname"]) + # merged_df.groupby(["horizon_mins", "forecaster_name"]) # .agg({"absolute_error_normalized_by_generation": "mean"}) # .reset_index()["absolute_error_normalized_by_generation"] # ) summary_df["effective_capacity_watts_observation"] = ( - merged_df.groupby(["horizon_mins", "forecaster_fullname"]) + merged_df.groupby(["horizon_mins", "forecaster_name"]) .agg({"effective_capacity_watts_observation": "mean"}) .reset_index()["effective_capacity_watts_observation"] ) @@ -351,19 +347,17 @@ async def async_dp_forecast_page(): fig2 = go.Figure() - for i, forecaster in enumerate(selected_forecasters): - name_and_version = ( - f"{forecaster.forecaster_name}:{forecaster.forecaster_version}" - ) + for i, forecaster_name in enumerate(forecaster_names): + forecaster_df = summary_df[ - summary_df["forecaster_fullname"] == name_and_version + summary_df["forecaster_name"] == forecaster_name ] fig2.add_trace( go.Scatter( x=forecaster_df["horizon_mins"], y=forecaster_df[selected_metric] / scale_factor, mode="lines+markers", - name=forecaster.forecaster_name, + name=forecaster_name, line=dict(color=colours[i % len(colours)]), ) ) @@ -374,7 +368,7 @@ async def async_dp_forecast_page(): y=(forecaster_df[selected_metric] - 1.96 * forecaster_df["sem"]) / scale_factor, mode="lines", line=dict(color=colours[i % len(colours)], width=0), - legendgroup=forecaster.forecaster_name, + legendgroup=forecaster_name, showlegend=False, ) ) @@ -385,7 +379,7 @@ async def async_dp_forecast_page(): y=(forecaster_df[selected_metric] + 1.96 * forecaster_df["sem"]) / scale_factor, mode="lines", line=dict(color=colours[i % len(colours)], width=0), - legendgroup=forecaster.forecaster_name, + legendgroup=forecaster_name, showlegend=False, fill="tonexty", ) @@ -444,14 +438,14 @@ async def async_dp_forecast_page(): "Capacity_watts", ] - summary_table_df = summary_table_df[["forecaster_fullname"] + value_columns] + summary_table_df = summary_table_df[["forecaster_name"] + value_columns] summary_table_df["Capacity_watts"] = summary_table_df["Capacity_watts"].astype( float ) # group by forecaster full name a - summary_table_df = summary_table_df.groupby("forecaster_fullname").mean() + summary_table_df = summary_table_df.groupby("forecaster_name").mean() # rename summary_table_df = summary_table_df.rename( @@ -471,7 +465,7 @@ async def async_dp_forecast_page(): axis=1, ) - # pivot table, so forecaster_fullname is columns + # pivot table, so forecaster_name is columns summary_table_df = summary_table_df.pivot_table( columns=summary_table_df.index, values=summary_table_df.columns.tolist(), @@ -488,13 +482,13 @@ async def async_dp_forecast_page(): # group by forecaster name and date daily_metrics_df = ( - daily_plots_df.groupby(["date_utc", "forecaster_fullname"]) + daily_plots_df.groupby(["date_utc", "forecaster_name"]) .agg({"absolute_error": "mean"}) .reset_index() ).rename(columns={"absolute_error": "MAE"}) # ME daily_metrics_df["ME"] = ( - daily_plots_df.groupby(["date_utc", "forecaster_fullname"]) + daily_plots_df.groupby(["date_utc", "forecaster_name"]) .agg({"error": "mean"}) .reset_index()["error"] ) @@ -502,10 +496,10 @@ async def async_dp_forecast_page(): fig3 = go.Figure() for i, forecaster in enumerate(selected_forecasters): name_and_version = ( - f"{forecaster.forecaster_name}:{forecaster.forecaster_version}" + f"{forecaster.forecaster_name}" ) forecaster_df = daily_metrics_df[ - daily_metrics_df["forecaster_fullname"] == name_and_version + daily_metrics_df["forecaster_name"] == name_and_version ] fig3.add_trace( go.Scatter( From 2a57b66d70cedcc45259854743e60735192bc1bb Mon Sep 17 00:00:00 2001 From: Peter Dudfield Date: Wed, 19 Nov 2025 13:52:33 +0000 Subject: [PATCH 24/60] remove from todo --- src/dataplatform/forecast.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/dataplatform/forecast.py b/src/dataplatform/forecast.py index da07190..4cfe39f 100644 --- a/src/dataplatform/forecast.py +++ b/src/dataplatform/forecast.py @@ -524,7 +524,6 @@ async def async_dp_forecast_page(): st.header("TODO") - st.write("Deal with new forecast versions") st.write("Turn off probability when line clicked off") st.write("Align forecasts on t0") st.write("Add more metrics") From 85dd00a0f74e7c6834a51a5fd76ea5686417083d Mon Sep 17 00:00:00 2001 From: Peter Dudfield Date: Thu, 20 Nov 2025 08:02:33 +0000 Subject: [PATCH 25/60] add legendgroup --- src/dataplatform/forecast.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/dataplatform/forecast.py b/src/dataplatform/forecast.py index 4cfe39f..cea9258 100644 --- a/src/dataplatform/forecast.py +++ b/src/dataplatform/forecast.py @@ -222,6 +222,8 @@ async def async_dp_forecast_page(): mode="lines", name=forecaster_name, line=dict(color=colours[i % len(colours)]), + legendgroup=forecaster_name, + ) ) if 'p10_watts' in forecaster_df.columns and 'p90_watts' in forecaster_df.columns: @@ -524,7 +526,7 @@ async def async_dp_forecast_page(): st.header("TODO") - st.write("Turn off probability when line clicked off") + st.write("Make metrics based on pvlive_data_after") st.write("Align forecasts on t0") st.write("Add more metrics") st.write("Add creation time / t0 forecast filter") From 6d0b280211d5cb9c7d8f2eba56780c4d05ddb124 Mon Sep 17 00:00:00 2001 From: Peter Dudfield Date: Thu, 20 Nov 2025 11:41:01 +0000 Subject: [PATCH 26/60] filter on pvlive_day_after --- src/dataplatform/forecast.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/src/dataplatform/forecast.py b/src/dataplatform/forecast.py index cea9258..6ee80b2 100644 --- a/src/dataplatform/forecast.py +++ b/src/dataplatform/forecast.py @@ -260,6 +260,7 @@ async def async_dp_forecast_page(): st.plotly_chart(fig) + # 2. Summary Accuracy Graph st.header("Summary Accuracy Graph") metrics = { "MAE": "MAE is absolute mean error, average(abs(y-x))", @@ -274,6 +275,12 @@ async def async_dp_forecast_page(): st.write(metrics) + # If the observation data includes pvlive_day_after and pvlive_in_day, then lets just take pvlive_day_after + if "pvlive_day_after" in all_observations_df["observer_name"].values: + all_observations_df = all_observations_df[ + all_observations_df["observer_name"] == "pvlive_day_after" + ] + # take the foecast data, and group by horizonMins, forecasterFullName # calculate mean absolute error between p50Fraction and observations valueFraction all_observations_df["timestamp_utc"] = pd.to_datetime( @@ -291,16 +298,14 @@ async def async_dp_forecast_page(): "effective_capacity_watts_observation" ].astype(float) - # error + # error and absolute error merged_df["error"] = merged_df["p50_watts"] - merged_df["value_watts"] + merged_df["absolute_error"] = merged_df["error"].abs() - # absolute error - merged_df["absolute_error"] = (merged_df["error"]).abs() - - # absolute error, normalized by mean observed generation + # Get the mean observed generation mean_observed_generation = merged_df["value_watts"].mean() - # merged_df['absolute_error_normalized_by_generation'] = merged_df['absolute_error'] / merged_df['value_watts'] - + + # mean absolute error by horizonMins and forecasterFullName summary_df = ( merged_df.groupby(["horizon_mins", "forecaster_name"]) .agg({"absolute_error": "mean"}) @@ -325,11 +330,7 @@ async def async_dp_forecast_page(): .reset_index()["error"] ) - # summary_df["absolute_error_divided_by_observed"] = ( - # merged_df.groupby(["horizon_mins", "forecaster_name"]) - # .agg({"absolute_error_normalized_by_generation": "mean"}) - # .reset_index()["absolute_error_normalized_by_generation"] - # ) + # TODO more metrics summary_df["effective_capacity_watts_observation"] = ( merged_df.groupby(["horizon_mins", "forecaster_name"]) @@ -526,7 +527,6 @@ async def async_dp_forecast_page(): st.header("TODO") - st.write("Make metrics based on pvlive_data_after") st.write("Align forecasts on t0") st.write("Add more metrics") st.write("Add creation time / t0 forecast filter") From fa6af554b2ad048398a50cad5dc57770f75dd769 Mon Sep 17 00:00:00 2001 From: Peter Dudfield Date: Thu, 20 Nov 2025 16:39:07 +0000 Subject: [PATCH 27/60] add todo bug not releasing cache --- src/dataplatform/data.py | 8 ++++---- src/dataplatform/forecast.py | 1 + 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/dataplatform/data.py b/src/dataplatform/data.py index f9dcf6a..f44da71 100644 --- a/src/dataplatform/data.py +++ b/src/dataplatform/data.py @@ -28,13 +28,13 @@ def key_builder_remove_client(func, *args, **kwargs): async def get_forecast_data( - _client, location, start_date, end_date, selected_forecasters + client, location, start_date, end_date, selected_forecasters ) -> pd.DataFrame: all_data_df = [] for forecaster in selected_forecasters: forecaster_data_df = await get_forecast_data_one_forecaster( - _client, location, start_date, end_date, forecaster + client, location, start_date, end_date, forecaster ) all_data_df.append(forecaster_data_df) @@ -108,7 +108,7 @@ async def get_forecast_data_one_forecaster( @cached(ttl=300, cache=Cache.MEMORY, key_builder=key_builder_remove_client) -async def get_all_observations(_client, location, start_date, end_date) -> pd.DataFrame: +async def get_all_observations(client, location, start_date, end_date) -> pd.DataFrame: all_observations_df = [] for observer_name in observer_names: @@ -126,7 +126,7 @@ async def get_all_observations(_client, location, start_date, end_date) -> pd.Da energy_source=dp.EnergySource.SOLAR, time_window=dp.TimeWindow(temp_start_date, temp_end_date), ) - get_observations_response = await _client.get_observations_as_timeseries( + get_observations_response = await client.get_observations_as_timeseries( get_observations_request ) diff --git a/src/dataplatform/forecast.py b/src/dataplatform/forecast.py index 6ee80b2..368c639 100644 --- a/src/dataplatform/forecast.py +++ b/src/dataplatform/forecast.py @@ -527,6 +527,7 @@ async def async_dp_forecast_page(): st.header("TODO") + st.write("Bug: cache not releasing") st.write("Align forecasts on t0") st.write("Add more metrics") st.write("Add creation time / t0 forecast filter") From d3bdaf6084a1ba795a047aca55723e882de1188a Mon Sep 17 00:00:00 2001 From: Peter Dudfield Date: Thu, 20 Nov 2025 20:51:43 +0000 Subject: [PATCH 28/60] refactor into multiple files --- src/dataplatform/forecast.py | 534 ------------------------ src/dataplatform/forecast/constanst.py | 20 + src/dataplatform/{ => forecast}/data.py | 117 ++++-- src/dataplatform/forecast/main.py | 213 ++++++++++ src/dataplatform/forecast/plot.py | 243 +++++++++++ src/dataplatform/forecast/setup.py | 95 +++++ 6 files changed, 664 insertions(+), 558 deletions(-) delete mode 100644 src/dataplatform/forecast.py create mode 100644 src/dataplatform/forecast/constanst.py rename src/dataplatform/{ => forecast}/data.py (58%) create mode 100644 src/dataplatform/forecast/main.py create mode 100644 src/dataplatform/forecast/plot.py create mode 100644 src/dataplatform/forecast/setup.py diff --git a/src/dataplatform/forecast.py b/src/dataplatform/forecast.py deleted file mode 100644 index 368c639..0000000 --- a/src/dataplatform/forecast.py +++ /dev/null @@ -1,534 +0,0 @@ -import streamlit as st -from datetime import datetime, timedelta, timezone -import os -import asyncio -from dp_sdk.ocf import dp -import pandas as pd -from grpclib.client import Channel -import plotly.graph_objects as go -import time - -from dataplatform.data import get_all_observations, get_forecast_data - -data_platform_host = os.getenv("DATA_PLATFORM_HOST", "localhost") -data_platform_port = int(os.getenv("DATA_PLATFORM_PORT", "50051")) - -# TODO make this dynamic -observer_names = ["pvlive_in_day", "pvlive_day_after"] - -colours = [ - "#FFD480", - "#FF8F73", - "#4675C1", - "#65B0C9", - "#58B0A9", - "#FAA056", - "#306BFF", - "#FF4901", - "#B701FF", - "#17E58F", -] - - -def dp_forecast_page(): - asyncio.run(async_dp_forecast_page()) - - -async def async_dp_forecast_page(): - st.title("Data Platform Forecast Page") - st.write( - "This is the forecast page from the Data Platform module. This is very much a WIP" - ) - - async with Channel(host=data_platform_host, port=data_platform_port) as channel: - client = dp.DataPlatformDataServiceStub(channel) - - # Select Country - country = st.sidebar.selectbox("TODO Select a Country", ["UK", "NL"], index=0) - - # Select Location Type - location_types = [ - dp.LocationType.NATION, - dp.LocationType.GSP, - dp.LocationType.SITE, - ] - location_type = st.sidebar.selectbox( - "Select a Location Type", location_types, index=0 - ) - - # List Location - list_locations_request = dp.ListLocationsRequest( - location_type_filter=location_type - ) - list_locations_response = await client.list_locations(list_locations_request) - locations = list_locations_response.locations - location_names = [loc.location_name for loc in locations] - - # slect locations - selected_location_name = st.sidebar.selectbox( - "Select a Location", location_names, index=0 - ) - selected_location = next( - loc for loc in locations if loc.location_name == selected_location_name - ) - - # get models - get_forecasters_request = dp.ListForecastersRequest() - get_forecasters_response = await client.list_forecasters( - get_forecasters_request - ) - forecasters = get_forecasters_response.forecasters - forecaster_names = sorted(list(set([forecaster.forecaster_name for forecaster in forecasters]))) - if "pvnet_v2" in forecaster_names: - default_index = forecaster_names.index("pvnet_v2") - else: - default_index = 0 - selected_forecaster_name = st.sidebar.multiselect( - "Select a Forecaster", - forecaster_names, - default=forecaster_names[default_index], - ) - selected_forecasters = [ - forecaster - for forecaster in forecasters - if forecaster.forecaster_name in selected_forecaster_name - ] - - # select start and end date - start_date = st.sidebar.date_input( - "Start date:", datetime.now().date() - timedelta(days=30) - ) - end_date = st.sidebar.date_input( - "End date:", datetime.now().date() + timedelta(days=3) - ) - start_date = datetime.combine(start_date, datetime.min.time()).replace( - tzinfo=timezone.utc - ) - end_date = datetime.combine(end_date, datetime.min.time()).replace( - tzinfo=timezone.utc - ) - - # select forecast type - selected_forecast_type = st.sidebar.selectbox( - "Select a Forecast Type", ['Current', "Horizon", "t0"], index=0 - ) - - if selected_forecast_type == 'Horizon': - selected_forecast_horizon = st.sidebar.selectbox( - "Select a Forecast Horizon", list(range(0,2400,30)), index=3 - ) - - # select units - if location_type == dp.LocationType.NATION: - default_unit_index = 3 # GW - else: - default_unit_index = 2 # MW - units = st.sidebar.selectbox("Select Units", ["W", "kW", "MW", "GW"], index=default_unit_index) - scale_factors = {"W": 1, "kW": 1e3, "MW": 1e6, "GW": 1e9} - scale_factor = scale_factors[units] - - - # get generation data - time_start = time.time() - all_observations_df = await get_all_observations( - client, selected_location, start_date, end_date - ) - observation_seconds = time.time() - time_start - - # get forcast all data - time_start = time.time() - all_forecast_data_df = await get_forecast_data( - client, selected_location, start_date, end_date, selected_forecasters - ) - forecast_seconds = time.time() - time_start - st.write(f"Selected Location uuid: `{selected_location.location_uuid}`.") - st.write( - f"Fetched `{len(all_forecast_data_df)}` rows of forecast data in `{forecast_seconds:.2f}` seconds. \ - Fetched `{len(all_observations_df)}` rows of observation data in `{observation_seconds:.2f}` seconds. \ - We cache data for 5 minutses to speed up repeated requests." - ) - - # add download button - csv = all_forecast_data_df.to_csv().encode("utf-8") - st.download_button( - label="⬇️", - data=csv, - file_name=f"site_forecast_{selected_location.location_uuid}_{start_date}_{end_date}.csv", - mime="text/csv", - ) - - # 1. Plot of raw forecast data - st.header("Time Series Plot") - - all_forecast_data_df["target_timestamp_utc"] = pd.to_datetime( - all_forecast_data_df["init_timestamp"] - ) + pd.to_timedelta(all_forecast_data_df["horizon_mins"], unit="m") - - if selected_forecast_type == 'Current': - # Choose current forecast - # this is done by selecting the unique target_timestamp_utc with the the lowest horizonMins - # it should also be unique for each forecasterFullName - current_forecast_df = all_forecast_data_df.loc[ - all_forecast_data_df.groupby( - ["target_timestamp_utc", "forecaster_name"] - )["horizon_mins"].idxmin() - ] - elif selected_forecast_type == 'Horizon': - # Choose horizon forecast - current_forecast_df = all_forecast_data_df[ - all_forecast_data_df["horizon_mins"] >= selected_forecast_horizon - ] - current_forecast_df = current_forecast_df.loc[ - current_forecast_df.groupby( - ["target_timestamp_utc", "forecaster_name"] - )["horizon_mins"].idxmin() - ] - else: - pass - - # plot the results - fig = go.Figure() - for observer_name in observer_names: - obs_df = all_observations_df[ - all_observations_df["observer_name"] == observer_name - ] - - if observer_name == "pvlive_in_day": - # dashed white line - line = dict(color="white", dash="dash") - elif observer_name == "pvlive_day_after": - line = dict(color="white") - else: - line = dict() - - fig.add_trace( - go.Scatter( - x=obs_df["timestamp_utc"], - y=obs_df["value_watts"] / scale_factor, - mode="lines", - name=observer_name, - line=line, - ) - ) - - for i, forecaster_name in enumerate(forecaster_names): - forecaster_df = current_forecast_df[ - current_forecast_df["forecaster_name"] == forecaster_name - ] - fig.add_trace( - go.Scatter( - x=forecaster_df["target_timestamp_utc"], - y=forecaster_df["p50_watts"] / scale_factor, - mode="lines", - name=forecaster_name, - line=dict(color=colours[i % len(colours)]), - legendgroup=forecaster_name, - - ) - ) - if 'p10_watts' in forecaster_df.columns and 'p90_watts' in forecaster_df.columns: - fig.add_trace( - go.Scatter( - x=forecaster_df["target_timestamp_utc"], - y=forecaster_df["p10_watts"] / scale_factor, - mode="lines", - line=dict(color=colours[i % len(colours)], width=0), - legendgroup=forecaster_name, - showlegend=False, - ) - ) - - fig.add_trace( - go.Scatter( - x=forecaster_df["target_timestamp_utc"], - y=forecaster_df["p90_watts"] / scale_factor, - mode="lines", - line=dict(color=colours[i % len(colours)], width=0), - legendgroup=forecaster_name, - showlegend=False, - fill="tonexty", - ) - ) - - - fig.update_layout( - title="Current Forecast", - xaxis_title="Time", - yaxis_title=f"Generation [{units}]", - legend_title="Forecaster", - ) - - st.plotly_chart(fig) - - # 2. Summary Accuracy Graph - st.header("Summary Accuracy Graph") - metrics = { - "MAE": "MAE is absolute mean error, average(abs(y-x))", - "ME": "ME is mean (bias) error, average((y-x))", - # "TODO NMAE (by capacity)": " NMAE (by capacity), average(abs(y-x)) / mean(capacity)", - # "TODO NMAE (by mean observed generation)": " NMAE (by mean observed generation), average(abs(y-x)) / mean(y)", - # "NMAE (by observed generation)":" NAME (by observed generation)" - } - selected_metric = st.sidebar.selectbox( - "Select a Metrics", metrics.keys(), index=0 - ) - - st.write(metrics) - - # If the observation data includes pvlive_day_after and pvlive_in_day, then lets just take pvlive_day_after - if "pvlive_day_after" in all_observations_df["observer_name"].values: - all_observations_df = all_observations_df[ - all_observations_df["observer_name"] == "pvlive_day_after" - ] - - # take the foecast data, and group by horizonMins, forecasterFullName - # calculate mean absolute error between p50Fraction and observations valueFraction - all_observations_df["timestamp_utc"] = pd.to_datetime( - all_observations_df["timestamp_utc"] - ) - merged_df = pd.merge( - all_forecast_data_df, - all_observations_df, - left_on=["target_timestamp_utc"], - right_on=["timestamp_utc"], - how="inner", - suffixes=("_forecast", "_observation"), - ) - merged_df["effective_capacity_watts_observation"] = merged_df[ - "effective_capacity_watts_observation" - ].astype(float) - - # error and absolute error - merged_df["error"] = merged_df["p50_watts"] - merged_df["value_watts"] - merged_df["absolute_error"] = merged_df["error"].abs() - - # Get the mean observed generation - mean_observed_generation = merged_df["value_watts"].mean() - - # mean absolute error by horizonMins and forecasterFullName - summary_df = ( - merged_df.groupby(["horizon_mins", "forecaster_name"]) - .agg({"absolute_error": "mean"}) - .reset_index() - ) - summary_df["std"] = ( - merged_df.groupby(["horizon_mins", "forecaster_name"]) - .agg({"absolute_error": "std"}) - .reset_index()["absolute_error"] - ) - summary_df["count"] = ( - merged_df.groupby(["horizon_mins", "forecaster_name"]) - .agg({"absolute_error": "count"}) - .reset_index()["absolute_error"] - ) - summary_df["sem"] = summary_df["std"] / (summary_df["count"] ** 0.5) - - # ME - summary_df["ME"] = ( - merged_df.groupby(["horizon_mins", "forecaster_name"]) - .agg({"error": "mean"}) - .reset_index()["error"] - ) - - # TODO more metrics - - summary_df["effective_capacity_watts_observation"] = ( - merged_df.groupby(["horizon_mins", "forecaster_name"]) - .agg({"effective_capacity_watts_observation": "mean"}) - .reset_index()["effective_capacity_watts_observation"] - ) - - # rename absolute_error to MAE - summary_df = summary_df.rename(columns={"absolute_error": "MAE"}) - summary_df["NMAE (by capacity)"] = ( - summary_df["MAE"] / summary_df["effective_capacity_watts_observation"] - ) - summary_df["NMAE (by mean observed generation)"] = ( - summary_df["MAE"] / mean_observed_generation - ) - # summary_df["NMAE (by observed generation)"] = summary_df["absolute_error_divided_by_observed"] - - fig2 = go.Figure() - - for i, forecaster_name in enumerate(forecaster_names): - - forecaster_df = summary_df[ - summary_df["forecaster_name"] == forecaster_name - ] - fig2.add_trace( - go.Scatter( - x=forecaster_df["horizon_mins"], - y=forecaster_df[selected_metric] / scale_factor, - mode="lines+markers", - name=forecaster_name, - line=dict(color=colours[i % len(colours)]), - ) - ) - - fig2.add_trace( - go.Scatter( - x=forecaster_df["horizon_mins"], - y=(forecaster_df[selected_metric] - 1.96 * forecaster_df["sem"]) / scale_factor, - mode="lines", - line=dict(color=colours[i % len(colours)], width=0), - legendgroup=forecaster_name, - showlegend=False, - ) - ) - - fig2.add_trace( - go.Scatter( - x=forecaster_df["horizon_mins"], - y=(forecaster_df[selected_metric] + 1.96 * forecaster_df["sem"]) / scale_factor, - mode="lines", - line=dict(color=colours[i % len(colours)], width=0), - legendgroup=forecaster_name, - showlegend=False, - fill="tonexty", - ) - ) - - fig2.update_layout( - title=f"{selected_metric} by Horizon", - xaxis_title="Horizon (Minutes)", - yaxis_title=f"{selected_metric} [{units}]", - legend_title="Forecaster", - ) - - st.plotly_chart(fig2) - - csv = summary_df.to_csv().encode("utf-8") - st.download_button( - label="⬇️", - data=csv, - file_name=f"summary_accuracy_{selected_location.location_uuid}_{start_date}_{end_date}.csv", - mime="text/csv", - ) - - # 3. Summary Accuracy Table, with slider to select min and max horizon mins - st.header("Summary Accuracy Table") - - # add slider to select min and max horizon mins - min_horizon, max_horizon = st.slider( - "Select Horizon Mins Range", - int(summary_df["horizon_mins"].min()), - int(summary_df["horizon_mins"].max()), - ( - int(summary_df["horizon_mins"].min()), - int(summary_df["horizon_mins"].max()), - ), - step=30, - ) - - # Reduce my horizon mins - summary_table_df = merged_df[ - (merged_df["horizon_mins"] >= min_horizon) - & (merged_df["horizon_mins"] <= max_horizon) - ] - - summary_table_df = summary_table_df.rename( - columns={ - "effective_capacity_watts_observation": "Capacity_watts", - "value_watts": "Mean_Observed_Generation_watts", - } - ) - - value_columns = [ - "error", - "absolute_error", - # 'absolute_error_normalized_by_generation', - "Mean_Observed_Generation_watts", - "Capacity_watts", - ] - - summary_table_df = summary_table_df[["forecaster_name"] + value_columns] - - summary_table_df["Capacity_watts"] = summary_table_df["Capacity_watts"].astype( - float - ) - - # group by forecaster full name a - summary_table_df = summary_table_df.groupby("forecaster_name").mean() - - # rename - summary_table_df = summary_table_df.rename( - columns={ - "error": "ME", - "absolute_error": "MAE", - # 'absolute_error_normalized_by_generation': 'NMAE (by observed generation)', - "Capacity_watts": "Mean Capacity", - "Mean_Observed_Generation_watts": "Mean Observed Generation", - } - ) - - # scale by units - summary_table_df = summary_table_df / scale_factor - summary_table_df = summary_table_df.rename( - {col: f'{col} [{units}]' for col in summary_table_df.columns}, - axis=1, - ) - - # pivot table, so forecaster_name is columns - summary_table_df = summary_table_df.pivot_table( - columns=summary_table_df.index, - values=summary_table_df.columns.tolist(), - ) - - - st.dataframe(summary_table_df) - - # 4. Daily metric plots - st.header("Daily Metrics Plots") - st.write("Plotted below are the daily MAE for each forecaster. This is for all forecast horizons.") - daily_plots_df = merged_df - daily_plots_df["date_utc"] = daily_plots_df["timestamp_utc"].dt.date - - # group by forecaster name and date - daily_metrics_df = ( - daily_plots_df.groupby(["date_utc", "forecaster_name"]) - .agg({"absolute_error": "mean"}) - .reset_index() - ).rename(columns={"absolute_error": "MAE"}) - # ME - daily_metrics_df["ME"] = ( - daily_plots_df.groupby(["date_utc", "forecaster_name"]) - .agg({"error": "mean"}) - .reset_index()["error"] - ) - - fig3 = go.Figure() - for i, forecaster in enumerate(selected_forecasters): - name_and_version = ( - f"{forecaster.forecaster_name}" - ) - forecaster_df = daily_metrics_df[ - daily_metrics_df["forecaster_name"] == name_and_version - ] - fig3.add_trace( - go.Scatter( - x=forecaster_df["date_utc"], - y=forecaster_df[selected_metric] / scale_factor, - # mode="lines+markers", - name=forecaster.forecaster_name, - line=dict(color=colours[i % len(colours)]), - ) - ) - - fig3.update_layout( - title=f"Daily {selected_metric}", - xaxis_title="Date", - yaxis_title=f"{selected_metric} [{units}]", - legend_title="Forecaster", - ) - - st.plotly_chart(fig3) - - - - st.header("TODO") - - st.write("Bug: cache not releasing") - st.write("Align forecasts on t0") - st.write("Add more metrics") - st.write("Add creation time / t0 forecast filter") - st.write("speed up read, use async and more caching") diff --git a/src/dataplatform/forecast/constanst.py b/src/dataplatform/forecast/constanst.py new file mode 100644 index 0000000..107fdab --- /dev/null +++ b/src/dataplatform/forecast/constanst.py @@ -0,0 +1,20 @@ +colours = [ + "#FFD480", + "#FF8F73", + "#4675C1", + "#65B0C9", + "#58B0A9", + "#FAA056", + "#306BFF", + "#FF4901", + "#B701FF", + "#17E58F", +] + +metrics = { + "MAE": "MAE is absolute mean error, average(abs(y-x))", + "ME": "ME is mean (bias) error, average((y-x))", + # "TODO NMAE (by capacity)": " NMAE (by capacity), average(abs(y-x)) / mean(capacity)", + # "TODO NMAE (by mean observed generation)": " NMAE (by mean observed generation), average(abs(y-x)) / mean(y)", + # "NMAE (by observed generation)":" NAME (by observed generation)" +} diff --git a/src/dataplatform/data.py b/src/dataplatform/forecast/data.py similarity index 58% rename from src/dataplatform/data.py rename to src/dataplatform/forecast/data.py index f44da71..7b5cb1b 100644 --- a/src/dataplatform/data.py +++ b/src/dataplatform/forecast/data.py @@ -1,9 +1,11 @@ -from datetime import timedelta import os -from dp_sdk.ocf import dp -import pandas as pd +import time +from datetime import timedelta + import betterproto +import pandas as pd from aiocache import Cache, cached +from dp_sdk.ocf import dp data_platform_host = os.getenv("DATA_PLATFORM_HOST", "localhost") data_platform_port = int(os.getenv("DATA_PLATFORM_PORT", "50051")) @@ -14,7 +16,6 @@ def key_builder_remove_client(func, *args, **kwargs): """Custom key builder that ignores the client argument for caching purposes.""" - key = f"{func.__name__}:" for arg in args: if isinstance(arg, dp.DataPlatformDataServiceStub): @@ -28,13 +29,21 @@ def key_builder_remove_client(func, *args, **kwargs): async def get_forecast_data( - client, location, start_date, end_date, selected_forecasters + client, + location, + start_date, + end_date, + selected_forecasters, ) -> pd.DataFrame: all_data_df = [] for forecaster in selected_forecasters: forecaster_data_df = await get_forecast_data_one_forecaster( - client, location, start_date, end_date, forecaster + client, + location, + start_date, + end_date, + forecaster, ) all_data_df.append(forecaster_data_df) @@ -56,7 +65,11 @@ async def get_forecast_data( @cached(ttl=300, cache=Cache.MEMORY, key_builder=key_builder_remove_client) async def get_forecast_data_one_forecaster( - client, location, start_date, end_date, selected_forecaster + client, + location, + start_date, + end_date, + selected_forecaster, ) -> pd.DataFrame: all_data_df = [] @@ -72,27 +85,22 @@ async def get_forecast_data_one_forecaster( location_uuid=location.location_uuid, energy_source=dp.EnergySource.SOLAR, time_window=dp.TimeWindow( - start_timestamp_utc=temp_start_date, end_timestamp_utc=temp_end_date + start_timestamp_utc=temp_start_date, + end_timestamp_utc=temp_end_date, ), forecasters=[selected_forecaster], ) forecasts = [] async for chunk in client.stream_forecast_data(stream_forecast_data_request): forecasts.append( - chunk.to_dict( - include_default_values=True, casing=betterproto.Casing.SNAKE - ) + chunk.to_dict(include_default_values=True, casing=betterproto.Casing.SNAKE), ) if len(forecasts) > 0: all_data_df.append( pd.DataFrame.from_dict(forecasts) - .pipe( - lambda df: df.join( - pd.json_normalize(df["other_statistics_fractions"]) - ) - ) - .drop("other_statistics_fractions", axis=1) + .pipe(lambda df: df.join(pd.json_normalize(df["other_statistics_fractions"]))) + .drop("other_statistics_fractions", axis=1), ) temp_start_date = temp_start_date + timedelta(days=7) @@ -101,7 +109,7 @@ async def get_forecast_data_one_forecaster( # create column forecaster_name, its forecaster_fullname with version removed all_data_df["forecaster_name"] = all_data_df["forecaster_fullname"].apply( - lambda x: x.rsplit(":", 1)[0] # split from right, max 1 split + lambda x: x.rsplit(":", 1)[0], # split from right, max 1 split ) return all_data_df @@ -127,15 +135,13 @@ async def get_all_observations(client, location, start_date, end_date) -> pd.Dat time_window=dp.TimeWindow(temp_start_date, temp_end_date), ) get_observations_response = await client.get_observations_as_timeseries( - get_observations_request + get_observations_request, ) observations = [] for chunk in get_observations_response.values: observations.append( - chunk.to_dict( - include_default_values=True, casing=betterproto.Casing.SNAKE - ) + chunk.to_dict(include_default_values=True, casing=betterproto.Casing.SNAKE), ) observation_one_df.append(pd.DataFrame.from_dict(observations)) @@ -151,7 +157,70 @@ async def get_all_observations(client, location, start_date, end_date) -> pd.Dat all_observations_df = pd.concat(all_observations_df, ignore_index=True) all_observations_df["value_watts"] = all_observations_df["value_fraction"].astype( - float + float, ) * all_observations_df["effective_capacity_watts"].astype(float) + all_observations_df["timestamp_utc"] = pd.to_datetime(all_observations_df["timestamp_utc"]) + + return all_observations_df + + +async def get_all_data(client, selected_location, start_date, end_date, selected_forecasters): + # get generation data + time_start = time.time() + all_observations_df = await get_all_observations( + client, + selected_location, + start_date, + end_date, + ) + observation_seconds = time.time() - time_start + + # get forcast all data + time_start = time.time() + all_forecast_data_df = await get_forecast_data( + client, + selected_location, + start_date, + end_date, + selected_forecasters, + ) + forecast_seconds = time.time() - time_start + + # If the observation data includes pvlive_day_after and pvlive_in_day, then lets just take pvlive_day_after + one_observations_df = all_observations_df.copy() + if "pvlive_day_after" in all_observations_df["observer_name"].values: + one_observations_df = all_observations_df[ + all_observations_df["observer_name"] == "pvlive_day_after" + ] + + + # make target_timestamp_utc + all_forecast_data_df["target_timestamp_utc"] = pd.to_datetime( + all_forecast_data_df["init_timestamp"], + ) + pd.to_timedelta(all_forecast_data_df["horizon_mins"], unit="m") + + # take the foecast data, and group by horizonMins, forecasterFullName + # calculate mean absolute error between p50Fraction and observations valueFraction + merged_df = pd.merge( + all_forecast_data_df, + one_observations_df, + left_on=["target_timestamp_utc"], + right_on=["timestamp_utc"], + how="inner", + suffixes=("_forecast", "_observation"), + ) + merged_df["effective_capacity_watts_observation"] = merged_df[ + "effective_capacity_watts_observation" + ].astype(float) - return all_observations_df \ No newline at end of file + # error and absolute error + merged_df["error"] = merged_df["p50_watts"] - merged_df["value_watts"] + merged_df["absolute_error"] = merged_df["error"].abs() + + return { + "merged_df": merged_df, + "all_forecast_data_df": all_forecast_data_df, + "all_observations_df": all_observations_df, + "forecast_seconds": forecast_seconds, + "observation_seconds": observation_seconds, + } diff --git a/src/dataplatform/forecast/main.py b/src/dataplatform/forecast/main.py new file mode 100644 index 0000000..db47630 --- /dev/null +++ b/src/dataplatform/forecast/main.py @@ -0,0 +1,213 @@ +import asyncio +import os + +import streamlit as st +from dp_sdk.ocf import dp +from grpclib.client import Channel + +from dataplatform.forecast.constanst import metrics +from dataplatform.forecast.data import get_all_data +from dataplatform.forecast.plot import ( + plot_forecast_metric_per_day, + plot_forecast_metric_vs_horizon_minutes, + plot_forecast_time_series, +) +from dataplatform.forecast.setup import setup_page + +data_platform_host = os.getenv("DATA_PLATFORM_HOST", "localhost") +data_platform_port = int(os.getenv("DATA_PLATFORM_PORT", "50051")) + +# TODO make this dynamic +observer_names = ["pvlive_in_day", "pvlive_day_after"] + + +def dp_forecast_page(): + asyncio.run(async_dp_forecast_page()) + + +async def async_dp_forecast_page(): + st.title("Data Platform Forecast Page") + st.write("This is the forecast page from the Data Platform module. This is very much a WIP") + + async with Channel(host=data_platform_host, port=data_platform_port) as channel: + client = dp.DataPlatformDataServiceStub(channel) + + setup_page_dict = await setup_page(client) + selected_location = setup_page_dict["selected_location"] + start_date = setup_page_dict["start_date"] + end_date = setup_page_dict["end_date"] + selected_forecasters = setup_page_dict["selected_forecasters"] + forecaster_names = setup_page_dict["forecaster_names"] + selected_metric = setup_page_dict["selected_metric"] + selected_forecast_type = setup_page_dict["selected_forecast_type"] + scale_factor = setup_page_dict["scale_factor"] + selected_forecast_horizon = setup_page_dict["selected_forecast_horizon"] + units = setup_page_dict["units"] + + ### 1. Get all the data ### + all_data_dict = await get_all_data( + client=client, + start_date=start_date, + end_date=end_date, + selected_forecasters=selected_forecasters, + selected_location=selected_location, + ) + merged_df = all_data_dict["merged_df"] + all_forecast_data_df = all_data_dict["all_forecast_data_df"] + all_observations_df = all_data_dict["all_observations_df"] + forecast_seconds = all_data_dict["forecast_seconds"] + observation_seconds = all_data_dict["observation_seconds"] + + st.write(f"Selected Location uuid: `{selected_location.location_uuid}`.") + st.write( + f"Fetched `{len(all_forecast_data_df)}` rows of forecast data in `{forecast_seconds:.2f}` seconds. \ + Fetched `{len(all_observations_df)}` rows of observation data in `{observation_seconds:.2f}` seconds. \ + We cache data for 5 minutses to speed up repeated requests.", + ) + + # add download button + csv = all_forecast_data_df.to_csv().encode("utf-8") + st.download_button( + label="⬇️", + data=csv, + file_name=f"site_forecast_{selected_location.location_uuid}_{start_date}_{end_date}.csv", + mime="text/csv", + ) + + ### 2. Plot of raw forecast data. ### + st.header("Time Series Plot") + + fig = plot_forecast_time_series( + all_forecast_data_df=all_forecast_data_df, + all_observations_df=all_observations_df, + forecaster_names=forecaster_names, + observer_names=observer_names, + scale_factor=scale_factor, + units=units, + selected_forecast_type=selected_forecast_type, + selected_forecast_horizon=selected_forecast_horizon, + ) + st.plotly_chart(fig) + + ### 3. Summary Accuracy Graph. ### + st.header("Summary Accuracy Graph") + + st.write(metrics) + + fig2, summary_df = plot_forecast_metric_vs_horizon_minutes( + merged_df, forecaster_names, selected_metric, scale_factor, units + ) + + st.plotly_chart(fig2) + + csv = summary_df.to_csv().encode("utf-8") + st.download_button( + label="⬇️", + data=csv, + file_name=f"summary_accuracy_{selected_location.location_uuid}_{start_date}_{end_date}.csv", + mime="text/csv", + ) + + ### 4. Summary Accuracy Table, with slider to select min and max horizon mins. ### + st.header("Summary Accuracy Table") + + # add slider to select min and max horizon mins + min_horizon, max_horizon = st.slider( + "Select Horizon Mins Range", + int(summary_df["horizon_mins"].min()), + int(summary_df["horizon_mins"].max()), + ( + int(summary_df["horizon_mins"].min()), + int(summary_df["horizon_mins"].max()), + ), + step=30, + ) + + summary_table_df = make_summary_data( + merged_df=merged_df, + min_horizon=min_horizon, + max_horizon=max_horizon, + scale_factor=scale_factor, + units=units, + ) + + st.dataframe(summary_table_df) + + ### 4. Daily metric plots. ### + st.header("Daily Metrics Plots") + st.write( + "Plotted below are the daily MAE for each forecaster. This is for all forecast horizons.", + ) + + fig3 = plot_forecast_metric_per_day( + merged_df=merged_df, + selected_forecasters=selected_forecasters, + scale_factor=scale_factor, + units=units, + selected_metric=selected_metric + ) + + st.plotly_chart(fig3) + + st.header("TODO") + + st.write("Bug: cache not releasing") + st.write("Align forecasts on t0") + st.write("Add more metrics") + st.write("Add creation time / t0 forecast filter") + st.write("speed up read, use async and more caching") + + +def make_summary_data(merged_df, min_horizon, max_horizon, scale_factor, units): + # Reduce my horizon mins + summary_table_df = merged_df[ + (merged_df["horizon_mins"] >= min_horizon) & (merged_df["horizon_mins"] <= max_horizon) + ] + + summary_table_df = summary_table_df.rename( + columns={ + "effective_capacity_watts_observation": "Capacity_watts", + "value_watts": "Mean_Observed_Generation_watts", + }, + ) + + value_columns = [ + "error", + "absolute_error", + # 'absolute_error_normalized_by_generation', + "Mean_Observed_Generation_watts", + "Capacity_watts", + ] + + summary_table_df = summary_table_df[["forecaster_name"] + value_columns] + + summary_table_df["Capacity_watts"] = summary_table_df["Capacity_watts"].astype(float) + + # group by forecaster full name a + summary_table_df = summary_table_df.groupby("forecaster_name").mean() + + # rename + summary_table_df = summary_table_df.rename( + columns={ + "error": "ME", + "absolute_error": "MAE", + # 'absolute_error_normalized_by_generation': 'NMAE (by observed generation)', + "Capacity_watts": "Mean Capacity", + "Mean_Observed_Generation_watts": "Mean Observed Generation", + }, + ) + + # scale by units + summary_table_df = summary_table_df / scale_factor + summary_table_df = summary_table_df.rename( + {col: f"{col} [{units}]" for col in summary_table_df.columns}, + axis=1, + ) + + # pivot table, so forecaster_name is columns + summary_table_df = summary_table_df.pivot_table( + columns=summary_table_df.index, + values=summary_table_df.columns.tolist(), + ) + + return summary_table_df diff --git a/src/dataplatform/forecast/plot.py b/src/dataplatform/forecast/plot.py new file mode 100644 index 0000000..52bbbe1 --- /dev/null +++ b/src/dataplatform/forecast/plot.py @@ -0,0 +1,243 @@ +import plotly.graph_objects as go + +from dataplatform.forecast.constanst import colours + + +def plot_forecast_time_series( + all_forecast_data_df, + all_observations_df, + forecaster_names, + observer_names, + scale_factor, + units, + selected_forecast_type, + selected_forecast_horizon, +): + if selected_forecast_type == "Current": + # Choose current forecast + # this is done by selecting the unique target_timestamp_utc with the the lowest horizonMins + # it should also be unique for each forecasterFullName + current_forecast_df = all_forecast_data_df.loc[ + all_forecast_data_df.groupby(["target_timestamp_utc", "forecaster_name"])[ + "horizon_mins" + ].idxmin() + ] + elif selected_forecast_type == "Horizon": + # Choose horizon forecast + current_forecast_df = all_forecast_data_df[ + all_forecast_data_df["horizon_mins"] >= selected_forecast_horizon + ] + current_forecast_df = current_forecast_df.loc[ + current_forecast_df.groupby(["target_timestamp_utc", "forecaster_name"])[ + "horizon_mins" + ].idxmin() + ] + else: + pass + + # plot the results + fig = go.Figure() + for observer_name in observer_names: + obs_df = all_observations_df[all_observations_df["observer_name"] == observer_name] + + if observer_name == "pvlive_in_day": + # dashed white line + line = dict(color="white", dash="dash") + elif observer_name == "pvlive_day_after": + line = dict(color="white") + else: + line = dict() + + fig.add_trace( + go.Scatter( + x=obs_df["timestamp_utc"], + y=obs_df["value_watts"] / scale_factor, + mode="lines", + name=observer_name, + line=line, + ), + ) + + for i, forecaster_name in enumerate(forecaster_names): + forecaster_df = current_forecast_df[ + current_forecast_df["forecaster_name"] == forecaster_name + ] + fig.add_trace( + go.Scatter( + x=forecaster_df["target_timestamp_utc"], + y=forecaster_df["p50_watts"] / scale_factor, + mode="lines", + name=forecaster_name, + line=dict(color=colours[i % len(colours)]), + legendgroup=forecaster_name, + ), + ) + if "p10_watts" in forecaster_df.columns and "p90_watts" in forecaster_df.columns: + fig.add_trace( + go.Scatter( + x=forecaster_df["target_timestamp_utc"], + y=forecaster_df["p10_watts"] / scale_factor, + mode="lines", + line=dict(color=colours[i % len(colours)], width=0), + legendgroup=forecaster_name, + showlegend=False, + ), + ) + + fig.add_trace( + go.Scatter( + x=forecaster_df["target_timestamp_utc"], + y=forecaster_df["p90_watts"] / scale_factor, + mode="lines", + line=dict(color=colours[i % len(colours)], width=0), + legendgroup=forecaster_name, + showlegend=False, + fill="tonexty", + ), + ) + + fig.update_layout( + title="Current Forecast", + xaxis_title="Time", + yaxis_title=f"Generation [{units}]", + legend_title="Forecaster", + ) + + return fig + + +def plot_forecast_metric_vs_horizon_minutes( + merged_df, forecaster_names, selected_metric, scale_factor, units +): + # Get the mean observed generation + mean_observed_generation = merged_df["value_watts"].mean() + + # mean absolute error by horizonMins and forecasterFullName + summary_df = ( + merged_df.groupby(["horizon_mins", "forecaster_name"]) + .agg({"absolute_error": "mean"}) + .reset_index() + ) + summary_df["std"] = ( + merged_df.groupby(["horizon_mins", "forecaster_name"]) + .agg({"absolute_error": "std"}) + .reset_index()["absolute_error"] + ) + summary_df["count"] = ( + merged_df.groupby(["horizon_mins", "forecaster_name"]) + .agg({"absolute_error": "count"}) + .reset_index()["absolute_error"] + ) + summary_df["sem"] = summary_df["std"] / (summary_df["count"] ** 0.5) + + # ME + summary_df["ME"] = ( + merged_df.groupby(["horizon_mins", "forecaster_name"]) + .agg({"error": "mean"}) + .reset_index()["error"] + ) + + # TODO more metrics + + summary_df["effective_capacity_watts_observation"] = ( + merged_df.groupby(["horizon_mins", "forecaster_name"]) + .agg({"effective_capacity_watts_observation": "mean"}) + .reset_index()["effective_capacity_watts_observation"] + ) + + # rename absolute_error to MAE + summary_df = summary_df.rename(columns={"absolute_error": "MAE"}) + summary_df["NMAE (by capacity)"] = ( + summary_df["MAE"] / summary_df["effective_capacity_watts_observation"] + ) + summary_df["NMAE (by mean observed generation)"] = summary_df["MAE"] / mean_observed_generation + # summary_df["NMAE (by observed generation)"] = summary_df["absolute_error_divided_by_observed"] + + fig2 = go.Figure() + + for i, forecaster_name in enumerate(forecaster_names): + forecaster_df = summary_df[summary_df["forecaster_name"] == forecaster_name] + fig2.add_trace( + go.Scatter( + x=forecaster_df["horizon_mins"], + y=forecaster_df[selected_metric] / scale_factor, + mode="lines+markers", + name=forecaster_name, + line=dict(color=colours[i % len(colours)]), + ), + ) + + fig2.add_trace( + go.Scatter( + x=forecaster_df["horizon_mins"], + y=(forecaster_df[selected_metric] - 1.96 * forecaster_df["sem"]) / scale_factor, + mode="lines", + line=dict(color=colours[i % len(colours)], width=0), + legendgroup=forecaster_name, + showlegend=False, + ), + ) + + fig2.add_trace( + go.Scatter( + x=forecaster_df["horizon_mins"], + y=(forecaster_df[selected_metric] + 1.96 * forecaster_df["sem"]) / scale_factor, + mode="lines", + line=dict(color=colours[i % len(colours)], width=0), + legendgroup=forecaster_name, + showlegend=False, + fill="tonexty", + ), + ) + + fig2.update_layout( + title=f"{selected_metric} by Horizon", + xaxis_title="Horizon (Minutes)", + yaxis_title=f"{selected_metric} [{units}]", + legend_title="Forecaster", + ) + + return fig2, summary_df + + +def plot_forecast_metric_per_day( + merged_df, selected_forecasters, selected_metric, scale_factor, units +): + daily_plots_df = merged_df + daily_plots_df["date_utc"] = daily_plots_df["timestamp_utc"].dt.date + + # group by forecaster name and date + daily_metrics_df = ( + daily_plots_df.groupby(["date_utc", "forecaster_name"]) + .agg({"absolute_error": "mean"}) + .reset_index() + ).rename(columns={"absolute_error": "MAE"}) + # ME + daily_metrics_df["ME"] = ( + daily_plots_df.groupby(["date_utc", "forecaster_name"]) + .agg({"error": "mean"}) + .reset_index()["error"] + ) + + fig3 = go.Figure() + for i, forecaster in enumerate(selected_forecasters): + name_and_version = f"{forecaster.forecaster_name}" + forecaster_df = daily_metrics_df[daily_metrics_df["forecaster_name"] == name_and_version] + fig3.add_trace( + go.Scatter( + x=forecaster_df["date_utc"], + y=forecaster_df[selected_metric] / scale_factor, + # mode="lines+markers", + name=forecaster.forecaster_name, + line=dict(color=colours[i % len(colours)]), + ), + ) + + fig3.update_layout( + title=f"Daily {selected_metric}", + xaxis_title="Date", + yaxis_title=f"{selected_metric} [{units}]", + legend_title="Forecaster", + ) + + return fig3 diff --git a/src/dataplatform/forecast/setup.py b/src/dataplatform/forecast/setup.py new file mode 100644 index 0000000..bf9c39a --- /dev/null +++ b/src/dataplatform/forecast/setup.py @@ -0,0 +1,95 @@ +from datetime import UTC, datetime, timedelta + +import streamlit as st +from dp_sdk.ocf import dp + +from dataplatform.forecast.constanst import metrics + + +async def setup_page(client) -> dict: + # Select Country + country = st.sidebar.selectbox("TODO Select a Country", ["UK", "NL"], index=0) + + # Select Location Type + location_types = [ + dp.LocationType.NATION, + dp.LocationType.GSP, + dp.LocationType.SITE, + ] + location_type = st.sidebar.selectbox("Select a Location Type", location_types, index=0) + + # List Location + list_locations_request = dp.ListLocationsRequest(location_type_filter=location_type) + list_locations_response = await client.list_locations(list_locations_request) + locations = list_locations_response.locations + location_names = [loc.location_name for loc in locations] + + # slect locations + selected_location_name = st.sidebar.selectbox("Select a Location", location_names, index=0) + selected_location = next( + loc for loc in locations if loc.location_name == selected_location_name + ) + + # get models + get_forecasters_request = dp.ListForecastersRequest() + get_forecasters_response = await client.list_forecasters(get_forecasters_request) + forecasters = get_forecasters_response.forecasters + forecaster_names = sorted(list(set([forecaster.forecaster_name for forecaster in forecasters]))) + if "pvnet_v2" in forecaster_names: + default_index = forecaster_names.index("pvnet_v2") + else: + default_index = 0 + selected_forecaster_name = st.sidebar.multiselect( + "Select a Forecaster", + forecaster_names, + default=forecaster_names[default_index], + ) + selected_forecasters = [ + forecaster + for forecaster in forecasters + if forecaster.forecaster_name in selected_forecaster_name + ] + + # select start and end date + start_date = st.sidebar.date_input("Start date:", datetime.now().date() - timedelta(days=30)) + end_date = st.sidebar.date_input("End date:", datetime.now().date() + timedelta(days=3)) + start_date = datetime.combine(start_date, datetime.min.time()).replace(tzinfo=UTC) + end_date = datetime.combine(end_date, datetime.min.time()).replace(tzinfo=UTC) + + # select forecast type + selected_forecast_type = st.sidebar.selectbox( + "Select a Forecast Type", + ["Current", "Horizon", "t0"], + index=0, + ) + + selected_forecast_horizon = None + if selected_forecast_type == "Horizon": + selected_forecast_horizon = st.sidebar.selectbox( + "Select a Forecast Horizon", + list(range(0, 2400, 30)), + index=3, + ) + + # select units + default_unit_index = 2 # MW + if location_type == dp.LocationType.NATION: + default_unit_index = 3 # GW + units = st.sidebar.selectbox("Select Units", ["W", "kW", "MW", "GW"], index=default_unit_index) + scale_factors = {"W": 1, "kW": 1e3, "MW": 1e6, "GW": 1e9} + scale_factor = scale_factors[units] + + selected_metric = st.sidebar.selectbox("Select a Metrics", metrics.keys(), index=0) + + return { + "selected_location": selected_location, + "selected_forecasters": selected_forecasters, + "start_date": start_date, + "end_date": end_date, + "selected_forecast_type": selected_forecast_type, + "scale_factor": scale_factor, + "selected_metric": selected_metric, + "forecaster_names": forecaster_names, + "selected_forecast_horizon": selected_forecast_horizon, + "units": units, + } From 2d6ad59ca9be8c020de894345c816423505c93e0 Mon Sep 17 00:00:00 2001 From: Peter Dudfield Date: Fri, 21 Nov 2025 09:11:08 +0000 Subject: [PATCH 29/60] increase forecast window to 30 days --- src/dataplatform/forecast/data.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/dataplatform/forecast/data.py b/src/dataplatform/forecast/data.py index 7b5cb1b..e0acb24 100644 --- a/src/dataplatform/forecast/data.py +++ b/src/dataplatform/forecast/data.py @@ -76,7 +76,7 @@ async def get_forecast_data_one_forecaster( # loop over 30 days of data temp_start_date = start_date while temp_start_date <= end_date: - temp_end_date = temp_start_date + timedelta(days=7) + temp_end_date = temp_start_date + timedelta(days=30) if temp_end_date > end_date: temp_end_date = end_date @@ -103,7 +103,7 @@ async def get_forecast_data_one_forecaster( .drop("other_statistics_fractions", axis=1), ) - temp_start_date = temp_start_date + timedelta(days=7) + temp_start_date = temp_start_date + timedelta(days=30) all_data_df = pd.concat(all_data_df, ignore_index=True) From ccf2c872d8896aa163eddfbbc8b3c5ea5c2f8b73 Mon Sep 17 00:00:00 2001 From: Peter Dudfield Date: Fri, 21 Nov 2025 09:38:29 +0000 Subject: [PATCH 30/60] add init files --- src/dataplatform/__init__.py | 0 src/dataplatform/forecast/__init__.py | 0 2 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 src/dataplatform/__init__.py create mode 100644 src/dataplatform/forecast/__init__.py diff --git a/src/dataplatform/__init__.py b/src/dataplatform/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/dataplatform/forecast/__init__.py b/src/dataplatform/forecast/__init__.py new file mode 100644 index 0000000..e69de29 From 144ccf425fca166171f8316610d8736099e78e7d Mon Sep 17 00:00:00 2001 From: Peter Dudfield Date: Fri, 21 Nov 2025 10:52:24 +0000 Subject: [PATCH 31/60] fix import --- src/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main.py b/src/main.py index 577303f..663be0c 100644 --- a/src/main.py +++ b/src/main.py @@ -11,6 +11,7 @@ from nowcasting_datamodel.models.metric import MetricValue from auth import check_password +from dataplatform.forecast.main import dp_forecast_page from forecast import forecast_page from get_data import get_metric_value from plots.all_gsps import make_all_gsps_plots @@ -36,7 +37,6 @@ from cloudcasting_page import cloudcasting_page from adjuster import adjuster_page from batch_page import batch_page -from dataplatform.forecast import dp_forecast_page st.get_option("theme.primaryColor") st.set_page_config(layout="wide", page_title="OCF Dashboard") From 10bbf6e030fa9ed94796a747ba1848564d776a15 Mon Sep 17 00:00:00 2001 From: Peter Dudfield Date: Mon, 24 Nov 2025 08:49:11 +0000 Subject: [PATCH 32/60] add more todos --- src/dataplatform/forecast/main.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/dataplatform/forecast/main.py b/src/dataplatform/forecast/main.py index db47630..0dac0b8 100644 --- a/src/dataplatform/forecast/main.py +++ b/src/dataplatform/forecast/main.py @@ -156,6 +156,8 @@ async def async_dp_forecast_page(): st.write("Add more metrics") st.write("Add creation time / t0 forecast filter") st.write("speed up read, use async and more caching") + st.write("Improve GSP labels") + st.write("Get page working with no observations data") def make_summary_data(merged_df, min_horizon, max_horizon, scale_factor, units): From a0faf6b02357b7294908baebe5d7478b9cbadf99 Mon Sep 17 00:00:00 2001 From: Peter Dudfield Date: Mon, 24 Nov 2025 15:18:37 +0000 Subject: [PATCH 33/60] add TODOs --- src/dataplatform/forecast/main.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/dataplatform/forecast/main.py b/src/dataplatform/forecast/main.py index 0dac0b8..8a1b792 100644 --- a/src/dataplatform/forecast/main.py +++ b/src/dataplatform/forecast/main.py @@ -158,6 +158,14 @@ async def async_dp_forecast_page(): st.write("speed up read, use async and more caching") st.write("Improve GSP labels") st.write("Get page working with no observations data") + st.write("Change UK to use MW") + st.write("Add GSP to name") + st.write("Remove last MAE point") + st.write("Reduce to last 7 days") + st.write("Options to togle probablies in MAE ") + st.write("Change y/x to actula and forecast") + st.write("Remove duplicate names in legend of daily metrics plot") + st.write("Look into shading areas disappering") def make_summary_data(merged_df, min_horizon, max_horizon, scale_factor, units): From 1886cb5843aae5869a6ad1d1f57bdb9950d8759a Mon Sep 17 00:00:00 2001 From: Peter Dudfield Date: Mon, 24 Nov 2025 15:19:54 +0000 Subject: [PATCH 34/60] use MW by default on UK-National --- src/dataplatform/forecast/main.py | 2 +- src/dataplatform/forecast/setup.py | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/src/dataplatform/forecast/main.py b/src/dataplatform/forecast/main.py index 8a1b792..4bbda7f 100644 --- a/src/dataplatform/forecast/main.py +++ b/src/dataplatform/forecast/main.py @@ -158,7 +158,7 @@ async def async_dp_forecast_page(): st.write("speed up read, use async and more caching") st.write("Improve GSP labels") st.write("Get page working with no observations data") - st.write("Change UK to use MW") + st.write("Change UK to use MW: done") st.write("Add GSP to name") st.write("Remove last MAE point") st.write("Reduce to last 7 days") diff --git a/src/dataplatform/forecast/setup.py b/src/dataplatform/forecast/setup.py index bf9c39a..bb02d74 100644 --- a/src/dataplatform/forecast/setup.py +++ b/src/dataplatform/forecast/setup.py @@ -73,8 +73,6 @@ async def setup_page(client) -> dict: # select units default_unit_index = 2 # MW - if location_type == dp.LocationType.NATION: - default_unit_index = 3 # GW units = st.sidebar.selectbox("Select Units", ["W", "kW", "MW", "GW"], index=default_unit_index) scale_factors = {"W": 1, "kW": 1e3, "MW": 1e6, "GW": 1e9} scale_factor = scale_factors[units] From 5cf060cf460a42f2f6088557d2c61151c6688829 Mon Sep 17 00:00:00 2001 From: Peter Dudfield Date: Mon, 24 Nov 2025 15:23:57 +0000 Subject: [PATCH 35/60] add gsp id to name --- src/dataplatform/forecast/main.py | 4 ++-- src/dataplatform/forecast/setup.py | 15 +++++++++------ 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/src/dataplatform/forecast/main.py b/src/dataplatform/forecast/main.py index 4bbda7f..d004b9d 100644 --- a/src/dataplatform/forecast/main.py +++ b/src/dataplatform/forecast/main.py @@ -158,8 +158,8 @@ async def async_dp_forecast_page(): st.write("speed up read, use async and more caching") st.write("Improve GSP labels") st.write("Get page working with no observations data") - st.write("Change UK to use MW: done") - st.write("Add GSP to name") + st.write("Done: Change UK to use MW") + st.write("Done: Add GSP id to name: done") st.write("Remove last MAE point") st.write("Reduce to last 7 days") st.write("Options to togle probablies in MAE ") diff --git a/src/dataplatform/forecast/setup.py b/src/dataplatform/forecast/setup.py index bb02d74..98f849e 100644 --- a/src/dataplatform/forecast/setup.py +++ b/src/dataplatform/forecast/setup.py @@ -21,14 +21,17 @@ async def setup_page(client) -> dict: # List Location list_locations_request = dp.ListLocationsRequest(location_type_filter=location_type) list_locations_response = await client.list_locations(list_locations_request) - locations = list_locations_response.locations - location_names = [loc.location_name for loc in locations] + all_locations = list_locations_response.locations + + location_names = {loc.location_name:loc for loc in all_locations} + if location_type == dp.LocationType.GSP: + location_names = {f'{int(loc.metadata.fields['gsp_id'].number_value)}:{loc.location_name}': loc for loc in all_locations} + # sort by gsp id + location_names = dict(sorted(location_names.items(), key=lambda item: int(item[0].split(":")[0]))) # slect locations - selected_location_name = st.sidebar.selectbox("Select a Location", location_names, index=0) - selected_location = next( - loc for loc in locations if loc.location_name == selected_location_name - ) + selected_location_name = st.sidebar.selectbox("Select a Location", location_names.keys(), index=0) + selected_location = location_names[selected_location_name] # get models get_forecasters_request = dp.ListForecastersRequest() From c427a7736fccd784652b82060c0d2bb4fd6c935e Mon Sep 17 00:00:00 2001 From: Peter Dudfield Date: Mon, 24 Nov 2025 15:30:34 +0000 Subject: [PATCH 36/60] reduce to 7 days --- src/dataplatform/forecast/main.py | 2 +- src/dataplatform/forecast/setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/dataplatform/forecast/main.py b/src/dataplatform/forecast/main.py index d004b9d..58429fb 100644 --- a/src/dataplatform/forecast/main.py +++ b/src/dataplatform/forecast/main.py @@ -161,7 +161,7 @@ async def async_dp_forecast_page(): st.write("Done: Change UK to use MW") st.write("Done: Add GSP id to name: done") st.write("Remove last MAE point") - st.write("Reduce to last 7 days") + st.write("Done: Reduce to last 7 days") st.write("Options to togle probablies in MAE ") st.write("Change y/x to actula and forecast") st.write("Remove duplicate names in legend of daily metrics plot") diff --git a/src/dataplatform/forecast/setup.py b/src/dataplatform/forecast/setup.py index 98f849e..cb33681 100644 --- a/src/dataplatform/forecast/setup.py +++ b/src/dataplatform/forecast/setup.py @@ -54,7 +54,7 @@ async def setup_page(client) -> dict: ] # select start and end date - start_date = st.sidebar.date_input("Start date:", datetime.now().date() - timedelta(days=30)) + start_date = st.sidebar.date_input("Start date:", datetime.now().date() - timedelta(days=7)) end_date = st.sidebar.date_input("End date:", datetime.now().date() + timedelta(days=3)) start_date = datetime.combine(start_date, datetime.min.time()).replace(tzinfo=UTC) end_date = datetime.combine(end_date, datetime.min.time()).replace(tzinfo=UTC) From b0dd9aea19a66218ce86a4ec16c8d3131e345849 Mon Sep 17 00:00:00 2001 From: Peter Dudfield Date: Mon, 24 Nov 2025 15:40:12 +0000 Subject: [PATCH 37/60] fix for MAE plot --- src/dataplatform/forecast/plot.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/dataplatform/forecast/plot.py b/src/dataplatform/forecast/plot.py index 52bbbe1..3ef4c8c 100644 --- a/src/dataplatform/forecast/plot.py +++ b/src/dataplatform/forecast/plot.py @@ -164,6 +164,7 @@ def plot_forecast_metric_vs_horizon_minutes( mode="lines+markers", name=forecaster_name, line=dict(color=colours[i % len(colours)]), + legendgroup=forecaster_name, ), ) From e5b137a69af6d5703298890aa005adf26d3a3073 Mon Sep 17 00:00:00 2001 From: Peter Dudfield Date: Mon, 24 Nov 2025 15:43:22 +0000 Subject: [PATCH 38/60] have option to show sem --- src/dataplatform/forecast/main.py | 9 +++++-- src/dataplatform/forecast/plot.py | 45 ++++++++++++++++--------------- 2 files changed, 30 insertions(+), 24 deletions(-) diff --git a/src/dataplatform/forecast/main.py b/src/dataplatform/forecast/main.py index 58429fb..ad4cdae 100644 --- a/src/dataplatform/forecast/main.py +++ b/src/dataplatform/forecast/main.py @@ -93,9 +93,14 @@ async def async_dp_forecast_page(): st.header("Summary Accuracy Graph") st.write(metrics) + if selected_metric == 'MAE': + show_sem = st.checkbox("Show SEM", value=True) + else: + show_sem = False + fig2, summary_df = plot_forecast_metric_vs_horizon_minutes( - merged_df, forecaster_names, selected_metric, scale_factor, units + merged_df, forecaster_names, selected_metric, scale_factor, units, show_sem ) st.plotly_chart(fig2) @@ -162,7 +167,7 @@ async def async_dp_forecast_page(): st.write("Done: Add GSP id to name: done") st.write("Remove last MAE point") st.write("Done: Reduce to last 7 days") - st.write("Options to togle probablies in MAE ") + st.write("Options to togle probablies in MAE: Done") st.write("Change y/x to actula and forecast") st.write("Remove duplicate names in legend of daily metrics plot") st.write("Look into shading areas disappering") diff --git a/src/dataplatform/forecast/plot.py b/src/dataplatform/forecast/plot.py index 3ef4c8c..da00fac 100644 --- a/src/dataplatform/forecast/plot.py +++ b/src/dataplatform/forecast/plot.py @@ -107,7 +107,7 @@ def plot_forecast_time_series( def plot_forecast_metric_vs_horizon_minutes( - merged_df, forecaster_names, selected_metric, scale_factor, units + merged_df, forecaster_names, selected_metric, scale_factor, units, show_sem ): # Get the mean observed generation mean_observed_generation = merged_df["value_watts"].mean() @@ -168,28 +168,29 @@ def plot_forecast_metric_vs_horizon_minutes( ), ) - fig2.add_trace( - go.Scatter( - x=forecaster_df["horizon_mins"], - y=(forecaster_df[selected_metric] - 1.96 * forecaster_df["sem"]) / scale_factor, - mode="lines", - line=dict(color=colours[i % len(colours)], width=0), - legendgroup=forecaster_name, - showlegend=False, - ), - ) + if show_sem: + fig2.add_trace( + go.Scatter( + x=forecaster_df["horizon_mins"], + y=(forecaster_df[selected_metric] - 1.96 * forecaster_df["sem"]) / scale_factor, + mode="lines", + line=dict(color=colours[i % len(colours)], width=0), + legendgroup=forecaster_name, + showlegend=False, + ), + ) - fig2.add_trace( - go.Scatter( - x=forecaster_df["horizon_mins"], - y=(forecaster_df[selected_metric] + 1.96 * forecaster_df["sem"]) / scale_factor, - mode="lines", - line=dict(color=colours[i % len(colours)], width=0), - legendgroup=forecaster_name, - showlegend=False, - fill="tonexty", - ), - ) + fig2.add_trace( + go.Scatter( + x=forecaster_df["horizon_mins"], + y=(forecaster_df[selected_metric] + 1.96 * forecaster_df["sem"]) / scale_factor, + mode="lines", + line=dict(color=colours[i % len(colours)], width=0), + legendgroup=forecaster_name, + showlegend=False, + fill="tonexty", + ), + ) fig2.update_layout( title=f"{selected_metric} by Horizon", From c83446eb74b17f6b62e3f22371ef77a15c3e01c9 Mon Sep 17 00:00:00 2001 From: Peter Dudfield Date: Mon, 24 Nov 2025 15:44:59 +0000 Subject: [PATCH 39/60] forecast vs actual --- src/dataplatform/forecast/constanst.py | 4 ++-- src/dataplatform/forecast/main.py | 5 ++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/dataplatform/forecast/constanst.py b/src/dataplatform/forecast/constanst.py index 107fdab..9fd0aa2 100644 --- a/src/dataplatform/forecast/constanst.py +++ b/src/dataplatform/forecast/constanst.py @@ -12,8 +12,8 @@ ] metrics = { - "MAE": "MAE is absolute mean error, average(abs(y-x))", - "ME": "ME is mean (bias) error, average((y-x))", + "MAE": "MAE is absolute mean error, average(abs(forecast-actual))", + "ME": "ME is mean (bias) error, average((forecast-actual))", # "TODO NMAE (by capacity)": " NMAE (by capacity), average(abs(y-x)) / mean(capacity)", # "TODO NMAE (by mean observed generation)": " NMAE (by mean observed generation), average(abs(y-x)) / mean(y)", # "NMAE (by observed generation)":" NAME (by observed generation)" diff --git a/src/dataplatform/forecast/main.py b/src/dataplatform/forecast/main.py index ad4cdae..0f8d5cb 100644 --- a/src/dataplatform/forecast/main.py +++ b/src/dataplatform/forecast/main.py @@ -161,14 +161,13 @@ async def async_dp_forecast_page(): st.write("Add more metrics") st.write("Add creation time / t0 forecast filter") st.write("speed up read, use async and more caching") - st.write("Improve GSP labels") st.write("Get page working with no observations data") st.write("Done: Change UK to use MW") st.write("Done: Add GSP id to name: done") st.write("Remove last MAE point") st.write("Done: Reduce to last 7 days") - st.write("Options to togle probablies in MAE: Done") - st.write("Change y/x to actula and forecast") + st.write("Done: Options to togle probablies in MAE") + st.write("Done: Change y/x to actual and forecast") st.write("Remove duplicate names in legend of daily metrics plot") st.write("Look into shading areas disappering") From 1c0a0b383695756ebb35ad4d13fa42e7ff78b048 Mon Sep 17 00:00:00 2001 From: Peter Dudfield Date: Mon, 24 Nov 2025 15:47:13 +0000 Subject: [PATCH 40/60] remove duplicate in daily MAE plot --- src/dataplatform/forecast/main.py | 4 ++-- src/dataplatform/forecast/plot.py | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/dataplatform/forecast/main.py b/src/dataplatform/forecast/main.py index 0f8d5cb..56510e0 100644 --- a/src/dataplatform/forecast/main.py +++ b/src/dataplatform/forecast/main.py @@ -146,7 +146,7 @@ async def async_dp_forecast_page(): fig3 = plot_forecast_metric_per_day( merged_df=merged_df, - selected_forecasters=selected_forecasters, + forecaster_names=forecaster_names, scale_factor=scale_factor, units=units, selected_metric=selected_metric @@ -168,7 +168,7 @@ async def async_dp_forecast_page(): st.write("Done: Reduce to last 7 days") st.write("Done: Options to togle probablies in MAE") st.write("Done: Change y/x to actual and forecast") - st.write("Remove duplicate names in legend of daily metrics plot") + st.write("Done: Remove duplicate names in legend of daily metrics plot") st.write("Look into shading areas disappering") diff --git a/src/dataplatform/forecast/plot.py b/src/dataplatform/forecast/plot.py index da00fac..e2cad8a 100644 --- a/src/dataplatform/forecast/plot.py +++ b/src/dataplatform/forecast/plot.py @@ -203,7 +203,7 @@ def plot_forecast_metric_vs_horizon_minutes( def plot_forecast_metric_per_day( - merged_df, selected_forecasters, selected_metric, scale_factor, units + merged_df, forecaster_names, selected_metric, scale_factor, units ): daily_plots_df = merged_df daily_plots_df["date_utc"] = daily_plots_df["timestamp_utc"].dt.date @@ -222,15 +222,15 @@ def plot_forecast_metric_per_day( ) fig3 = go.Figure() - for i, forecaster in enumerate(selected_forecasters): - name_and_version = f"{forecaster.forecaster_name}" + for i, forecaster_name in enumerate(forecaster_names): + name_and_version = f"{forecaster_name}" forecaster_df = daily_metrics_df[daily_metrics_df["forecaster_name"] == name_and_version] fig3.add_trace( go.Scatter( x=forecaster_df["date_utc"], y=forecaster_df[selected_metric] / scale_factor, # mode="lines+markers", - name=forecaster.forecaster_name, + name=forecaster_name, line=dict(color=colours[i % len(colours)]), ), ) From c47f8b1ee3a37c8ce1aad6b39f491f64639e6c10 Mon Sep 17 00:00:00 2001 From: Peter Dudfield Date: Mon, 24 Nov 2025 16:57:08 +0000 Subject: [PATCH 41/60] minus 1 sec, so we dont get obsevervations on the next day --- src/dataplatform/forecast/main.py | 12 +++--------- src/dataplatform/forecast/setup.py | 2 +- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/src/dataplatform/forecast/main.py b/src/dataplatform/forecast/main.py index 56510e0..592e5d6 100644 --- a/src/dataplatform/forecast/main.py +++ b/src/dataplatform/forecast/main.py @@ -162,15 +162,9 @@ async def async_dp_forecast_page(): st.write("Add creation time / t0 forecast filter") st.write("speed up read, use async and more caching") st.write("Get page working with no observations data") - st.write("Done: Change UK to use MW") - st.write("Done: Add GSP id to name: done") - st.write("Remove last MAE point") - st.write("Done: Reduce to last 7 days") - st.write("Done: Options to togle probablies in MAE") - st.write("Done: Change y/x to actual and forecast") - st.write("Done: Remove duplicate names in legend of daily metrics plot") - st.write("Look into shading areas disappering") - + st.write("Check works for diffrent version of observers") + st.write("Remove last MAE point: done in solar consumer") + def make_summary_data(merged_df, min_horizon, max_horizon, scale_factor, units): # Reduce my horizon mins diff --git a/src/dataplatform/forecast/setup.py b/src/dataplatform/forecast/setup.py index cb33681..36d3649 100644 --- a/src/dataplatform/forecast/setup.py +++ b/src/dataplatform/forecast/setup.py @@ -57,7 +57,7 @@ async def setup_page(client) -> dict: start_date = st.sidebar.date_input("Start date:", datetime.now().date() - timedelta(days=7)) end_date = st.sidebar.date_input("End date:", datetime.now().date() + timedelta(days=3)) start_date = datetime.combine(start_date, datetime.min.time()).replace(tzinfo=UTC) - end_date = datetime.combine(end_date, datetime.min.time()).replace(tzinfo=UTC) + end_date = datetime.combine(end_date, datetime.min.time()).replace(tzinfo=UTC) - timedelta(seconds=1) # select forecast type selected_forecast_type = st.sidebar.selectbox( From 6ceaad3ce6e00a0c7b83111ad6582827d564acd1 Mon Sep 17 00:00:00 2001 From: Peter Dudfield Date: Mon, 24 Nov 2025 16:58:10 +0000 Subject: [PATCH 42/60] tidy --- src/dataplatform/forecast/data.py | 3 --- src/dataplatform/forecast/main.py | 2 -- 2 files changed, 5 deletions(-) diff --git a/src/dataplatform/forecast/data.py b/src/dataplatform/forecast/data.py index e0acb24..6a69070 100644 --- a/src/dataplatform/forecast/data.py +++ b/src/dataplatform/forecast/data.py @@ -7,9 +7,6 @@ from aiocache import Cache, cached from dp_sdk.ocf import dp -data_platform_host = os.getenv("DATA_PLATFORM_HOST", "localhost") -data_platform_port = int(os.getenv("DATA_PLATFORM_PORT", "50051")) - # TODO make this dynamic observer_names = ["pvlive_in_day", "pvlive_day_after"] diff --git a/src/dataplatform/forecast/main.py b/src/dataplatform/forecast/main.py index 592e5d6..6b8efb3 100644 --- a/src/dataplatform/forecast/main.py +++ b/src/dataplatform/forecast/main.py @@ -162,8 +162,6 @@ async def async_dp_forecast_page(): st.write("Add creation time / t0 forecast filter") st.write("speed up read, use async and more caching") st.write("Get page working with no observations data") - st.write("Check works for diffrent version of observers") - st.write("Remove last MAE point: done in solar consumer") def make_summary_data(merged_df, min_horizon, max_horizon, scale_factor, units): From 91f60aabee8c87e66bb452e80c007645b5569d04 Mon Sep 17 00:00:00 2001 From: Peter Dudfield Date: Mon, 24 Nov 2025 21:30:51 +0000 Subject: [PATCH 43/60] option for aligning t0s --- src/dataplatform/forecast/data.py | 23 +++++++++++++++++++++++ src/dataplatform/forecast/main.py | 15 ++++++++++----- 2 files changed, 33 insertions(+), 5 deletions(-) diff --git a/src/dataplatform/forecast/data.py b/src/dataplatform/forecast/data.py index 6a69070..455071a 100644 --- a/src/dataplatform/forecast/data.py +++ b/src/dataplatform/forecast/data.py @@ -221,3 +221,26 @@ async def get_all_data(client, selected_location, start_date, end_date, selected "forecast_seconds": forecast_seconds, "observation_seconds": observation_seconds, } + + +def align_t0(merged_df: pd.DataFrame) -> pd.DataFrame: + """ Align t0 forecasts for different forecasters """ + + # get all forecaster names + forecaster_names = merged_df["forecaster_name"].unique() + + # align t0 for each forecaster + t0s_per_forecaster = {} + for forecaster_name in forecaster_names: + forecaster_df = merged_df[merged_df["forecaster_name"] == forecaster_name] + + t0s = forecaster_df["init_timestamp"].unique() + t0s_per_forecaster[forecaster_name] = set(t0s) + + # find common t0s + common_t0s = set.intersection(*t0s_per_forecaster.values()) + + # align common t0s in merged_df + merged_df = merged_df[merged_df["init_timestamp"].isin(common_t0s)] + + return merged_df \ No newline at end of file diff --git a/src/dataplatform/forecast/main.py b/src/dataplatform/forecast/main.py index 6b8efb3..9aa7f0d 100644 --- a/src/dataplatform/forecast/main.py +++ b/src/dataplatform/forecast/main.py @@ -6,7 +6,7 @@ from grpclib.client import Channel from dataplatform.forecast.constanst import metrics -from dataplatform.forecast.data import get_all_data +from dataplatform.forecast.data import get_all_data, align_t0 from dataplatform.forecast.plot import ( plot_forecast_metric_per_day, plot_forecast_metric_vs_horizon_minutes, @@ -90,7 +90,8 @@ async def async_dp_forecast_page(): st.plotly_chart(fig) ### 3. Summary Accuracy Graph. ### - st.header("Summary Accuracy Graph") + st.header("Summary Accuracy") + st.write(metrics) if selected_metric == 'MAE': @@ -98,6 +99,11 @@ async def async_dp_forecast_page(): else: show_sem = False + align_t0s = st.checkbox("Align t0s", value=True) + if align_t0s: + merged_df = align_t0(merged_df) + + st.subheader("Summary Accuracy Graph") fig2, summary_df = plot_forecast_metric_vs_horizon_minutes( merged_df, forecaster_names, selected_metric, scale_factor, units, show_sem @@ -114,7 +120,7 @@ async def async_dp_forecast_page(): ) ### 4. Summary Accuracy Table, with slider to select min and max horizon mins. ### - st.header("Summary Accuracy Table") + st.subheader("Summary Accuracy Table") # add slider to select min and max horizon mins min_horizon, max_horizon = st.slider( @@ -139,7 +145,7 @@ async def async_dp_forecast_page(): st.dataframe(summary_table_df) ### 4. Daily metric plots. ### - st.header("Daily Metrics Plots") + st.subheader("Daily Metrics Plots") st.write( "Plotted below are the daily MAE for each forecaster. This is for all forecast horizons.", ) @@ -157,7 +163,6 @@ async def async_dp_forecast_page(): st.header("TODO") st.write("Bug: cache not releasing") - st.write("Align forecasts on t0") st.write("Add more metrics") st.write("Add creation time / t0 forecast filter") st.write("speed up read, use async and more caching") From 4b30bdb02c32c3a3589ddd731cab9a03899bcff1 Mon Sep 17 00:00:00 2001 From: Peter Dudfield Date: Mon, 24 Nov 2025 21:33:30 +0000 Subject: [PATCH 44/60] MAE plot link to 0 --- src/dataplatform/forecast/main.py | 13 +++++++------ src/dataplatform/forecast/plot.py | 3 +++ 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/src/dataplatform/forecast/main.py b/src/dataplatform/forecast/main.py index 9aa7f0d..d0c52bf 100644 --- a/src/dataplatform/forecast/main.py +++ b/src/dataplatform/forecast/main.py @@ -94,17 +94,18 @@ async def async_dp_forecast_page(): st.write(metrics) - if selected_metric == 'MAE': - show_sem = st.checkbox("Show SEM", value=True) - else: - show_sem = False - - align_t0s = st.checkbox("Align t0s", value=True) + + align_t0s = st.checkbox("Align t0s (Only common t0s across all forecaster are used)", value=True) if align_t0s: merged_df = align_t0(merged_df) st.subheader("Summary Accuracy Graph") + if selected_metric == 'MAE': + show_sem = st.checkbox("Show SEM", value=True) + else: + show_sem = False + fig2, summary_df = plot_forecast_metric_vs_horizon_minutes( merged_df, forecaster_names, selected_metric, scale_factor, units, show_sem ) diff --git a/src/dataplatform/forecast/plot.py b/src/dataplatform/forecast/plot.py index e2cad8a..25ba7e1 100644 --- a/src/dataplatform/forecast/plot.py +++ b/src/dataplatform/forecast/plot.py @@ -199,6 +199,9 @@ def plot_forecast_metric_vs_horizon_minutes( legend_title="Forecaster", ) + if selected_metric == "MAE": + fig2.update_yaxes(range=[0, None]) + return fig2, summary_df From 9603b9cda2749de9703f092efa2c45302f1fd484 Mon Sep 17 00:00:00 2001 From: Peter Dudfield Date: Tue, 25 Nov 2025 09:29:46 +0000 Subject: [PATCH 45/60] try to sort cache issue out --- src/dataplatform/forecast/data.py | 5 ++--- src/dataplatform/forecast/main.py | 4 ++-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/dataplatform/forecast/data.py b/src/dataplatform/forecast/data.py index 455071a..6e7c3b5 100644 --- a/src/dataplatform/forecast/data.py +++ b/src/dataplatform/forecast/data.py @@ -15,9 +15,8 @@ def key_builder_remove_client(func, *args, **kwargs): """Custom key builder that ignores the client argument for caching purposes.""" key = f"{func.__name__}:" for arg in args: - if isinstance(arg, dp.DataPlatformDataServiceStub): - continue - key += f"{arg}-" + if not isinstance(arg, dp.DataPlatformDataServiceStub): + key += f"{arg}-" for k, v in kwargs.items(): key += f"{k}={v}-" diff --git a/src/dataplatform/forecast/main.py b/src/dataplatform/forecast/main.py index d0c52bf..3ddff18 100644 --- a/src/dataplatform/forecast/main.py +++ b/src/dataplatform/forecast/main.py @@ -161,9 +161,9 @@ async def async_dp_forecast_page(): st.plotly_chart(fig3) - st.header("TODO") + st.header("Known Issues and TODOs") - st.write("Bug: cache not releasing") + st.write("Bug: cache not releasing, the cache should stay for 5 minutes") st.write("Add more metrics") st.write("Add creation time / t0 forecast filter") st.write("speed up read, use async and more caching") From b861b7ddec3d74754fd2c04737810be3f4426e73 Mon Sep 17 00:00:00 2001 From: Peter Dudfield Date: Tue, 25 Nov 2025 10:05:59 +0000 Subject: [PATCH 46/60] add select t0s from forecast --- src/dataplatform/forecast/data.py | 1 + src/dataplatform/forecast/main.py | 10 +++- src/dataplatform/forecast/plot.py | 92 ++++++++++++++++++------------ src/dataplatform/forecast/setup.py | 16 +++++- 4 files changed, 79 insertions(+), 40 deletions(-) diff --git a/src/dataplatform/forecast/data.py b/src/dataplatform/forecast/data.py index 6e7c3b5..984bb89 100644 --- a/src/dataplatform/forecast/data.py +++ b/src/dataplatform/forecast/data.py @@ -191,6 +191,7 @@ async def get_all_data(client, selected_location, start_date, end_date, selected # make target_timestamp_utc + all_forecast_data_df["init_timestamp"] = pd.to_datetime(all_forecast_data_df["init_timestamp"]) all_forecast_data_df["target_timestamp_utc"] = pd.to_datetime( all_forecast_data_df["init_timestamp"], ) + pd.to_timedelta(all_forecast_data_df["horizon_mins"], unit="m") diff --git a/src/dataplatform/forecast/main.py b/src/dataplatform/forecast/main.py index 3ddff18..6b047f3 100644 --- a/src/dataplatform/forecast/main.py +++ b/src/dataplatform/forecast/main.py @@ -42,6 +42,7 @@ async def async_dp_forecast_page(): selected_forecast_type = setup_page_dict["selected_forecast_type"] scale_factor = setup_page_dict["scale_factor"] selected_forecast_horizon = setup_page_dict["selected_forecast_horizon"] + selected_t0s = setup_page_dict["selected_t0s"] units = setup_page_dict["units"] ### 1. Get all the data ### @@ -77,6 +78,8 @@ async def async_dp_forecast_page(): ### 2. Plot of raw forecast data. ### st.header("Time Series Plot") + show_probabilistic = st.checkbox("Show Probabilistic Forecasts", value=True) + fig = plot_forecast_time_series( all_forecast_data_df=all_forecast_data_df, all_observations_df=all_observations_df, @@ -86,11 +89,13 @@ async def async_dp_forecast_page(): units=units, selected_forecast_type=selected_forecast_type, selected_forecast_horizon=selected_forecast_horizon, + selected_t0s=selected_t0s, + show_probabilistic=show_probabilistic ) st.plotly_chart(fig) ### 3. Summary Accuracy Graph. ### - st.header("Summary Accuracy") + st.header("Accuracy") st.write(metrics) @@ -99,7 +104,7 @@ async def async_dp_forecast_page(): if align_t0s: merged_df = align_t0(merged_df) - st.subheader("Summary Accuracy Graph") + st.subheader("Metric vs Forecast Horizon") if selected_metric == 'MAE': show_sem = st.checkbox("Show SEM", value=True) @@ -165,7 +170,6 @@ async def async_dp_forecast_page(): st.write("Bug: cache not releasing, the cache should stay for 5 minutes") st.write("Add more metrics") - st.write("Add creation time / t0 forecast filter") st.write("speed up read, use async and more caching") st.write("Get page working with no observations data") diff --git a/src/dataplatform/forecast/plot.py b/src/dataplatform/forecast/plot.py index 25ba7e1..99c9d71 100644 --- a/src/dataplatform/forecast/plot.py +++ b/src/dataplatform/forecast/plot.py @@ -3,6 +3,44 @@ from dataplatform.forecast.constanst import colours +def make_time_series_trace(fig, forecaster_df, forecaster_name, scale_factor, i, show_probabilistic=True): + + fig.add_trace( + go.Scatter( + x=forecaster_df["target_timestamp_utc"], + y=forecaster_df["p50_watts"] / scale_factor, + mode="lines", + name=forecaster_name, + line=dict(color=colours[i % len(colours)]), + legendgroup=forecaster_name, + ), + ) + if show_probabilistic and "p10_watts" in forecaster_df.columns and "p90_watts" in forecaster_df.columns: + fig.add_trace( + go.Scatter( + x=forecaster_df["target_timestamp_utc"], + y=forecaster_df["p10_watts"] / scale_factor, + mode="lines", + line=dict(color=colours[i % len(colours)], width=0), + legendgroup=forecaster_name, + showlegend=False, + ), + ) + + fig.add_trace( + go.Scatter( + x=forecaster_df["target_timestamp_utc"], + y=forecaster_df["p90_watts"] / scale_factor, + mode="lines", + line=dict(color=colours[i % len(colours)], width=0), + legendgroup=forecaster_name, + showlegend=False, + fill="tonexty", + ), + ) + + return fig + def plot_forecast_time_series( all_forecast_data_df, all_observations_df, @@ -12,6 +50,8 @@ def plot_forecast_time_series( units, selected_forecast_type, selected_forecast_horizon, + selected_t0s, + show_probabilistic=True, ): if selected_forecast_type == "Current": # Choose current forecast @@ -32,8 +72,10 @@ def plot_forecast_time_series( "horizon_mins" ].idxmin() ] - else: - pass + elif selected_forecast_type == "t0": + current_forecast_df = all_forecast_data_df[ + all_forecast_data_df["init_timestamp"].isin(selected_t0s) + ] # plot the results fig = go.Figure() @@ -58,43 +100,19 @@ def plot_forecast_time_series( ), ) + for i, forecaster_name in enumerate(forecaster_names): forecaster_df = current_forecast_df[ - current_forecast_df["forecaster_name"] == forecaster_name - ] - fig.add_trace( - go.Scatter( - x=forecaster_df["target_timestamp_utc"], - y=forecaster_df["p50_watts"] / scale_factor, - mode="lines", - name=forecaster_name, - line=dict(color=colours[i % len(colours)]), - legendgroup=forecaster_name, - ), - ) - if "p10_watts" in forecaster_df.columns and "p90_watts" in forecaster_df.columns: - fig.add_trace( - go.Scatter( - x=forecaster_df["target_timestamp_utc"], - y=forecaster_df["p10_watts"] / scale_factor, - mode="lines", - line=dict(color=colours[i % len(colours)], width=0), - legendgroup=forecaster_name, - showlegend=False, - ), - ) + current_forecast_df["forecaster_name"] == forecaster_name + ] + if selected_forecast_type in ["Current", "Horizon"]: - fig.add_trace( - go.Scatter( - x=forecaster_df["target_timestamp_utc"], - y=forecaster_df["p90_watts"] / scale_factor, - mode="lines", - line=dict(color=colours[i % len(colours)], width=0), - legendgroup=forecaster_name, - showlegend=False, - fill="tonexty", - ), - ) + fig = make_time_series_trace(fig, forecaster_df, forecaster_name, scale_factor, i, show_probabilistic) + elif selected_forecast_type == "t0": + for _, t0 in enumerate(selected_t0s): + forecaster_with_t0_df = forecaster_df[forecaster_df["init_timestamp"] == t0] + forecaster_name_wth_t0 = f"{forecaster_name} | t0: {t0}" + fig = make_time_series_trace(fig, forecaster_with_t0_df, forecaster_name_wth_t0, scale_factor, i, show_probabilistic) fig.update_layout( title="Current Forecast", @@ -246,3 +264,5 @@ def plot_forecast_metric_per_day( ) return fig3 + + diff --git a/src/dataplatform/forecast/setup.py b/src/dataplatform/forecast/setup.py index 36d3649..b821d85 100644 --- a/src/dataplatform/forecast/setup.py +++ b/src/dataplatform/forecast/setup.py @@ -1,5 +1,6 @@ from datetime import UTC, datetime, timedelta +import pandas as pd import streamlit as st from dp_sdk.ocf import dp @@ -67,12 +68,24 @@ async def setup_page(client) -> dict: ) selected_forecast_horizon = None + selected_t0s = None if selected_forecast_type == "Horizon": selected_forecast_horizon = st.sidebar.selectbox( "Select a Forecast Horizon", - list(range(0, 2400, 30)), + list(range(0, 24*60, 30)), index=3, ) + if selected_forecast_type == "t0": + + # make datetimes every 30 minutes from start_date to end_date + all_t0s = pd.date_range(start=start_date, end=end_date, freq='30T').to_pydatetime().tolist() + + + selected_t0s = st.sidebar.multiselect( + "Select t0s", + all_t0s, + default=all_t0s[:5], + ) # select units default_unit_index = 2 # MW @@ -92,5 +105,6 @@ async def setup_page(client) -> dict: "selected_metric": selected_metric, "forecaster_names": forecaster_names, "selected_forecast_horizon": selected_forecast_horizon, + "selected_t0s": selected_t0s, "units": units, } From 839cd21e84538ff3fa8af69ab1b13f1f6ae54f28 Mon Sep 17 00:00:00 2001 From: Peter Dudfield Date: Tue, 25 Nov 2025 10:22:40 +0000 Subject: [PATCH 47/60] tidy --- src/dataplatform/forecast/main.py | 1 + src/dataplatform/forecast/setup.py | 5 ++--- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/dataplatform/forecast/main.py b/src/dataplatform/forecast/main.py index 6b047f3..67eddbf 100644 --- a/src/dataplatform/forecast/main.py +++ b/src/dataplatform/forecast/main.py @@ -172,6 +172,7 @@ async def async_dp_forecast_page(): st.write("Add more metrics") st.write("speed up read, use async and more caching") st.write("Get page working with no observations data") + st.write("MAE vs horizon plot should start at 0") def make_summary_data(merged_df, min_horizon, max_horizon, scale_factor, units): diff --git a/src/dataplatform/forecast/setup.py b/src/dataplatform/forecast/setup.py index b821d85..c40697c 100644 --- a/src/dataplatform/forecast/setup.py +++ b/src/dataplatform/forecast/setup.py @@ -78,13 +78,12 @@ async def setup_page(client) -> dict: if selected_forecast_type == "t0": # make datetimes every 30 minutes from start_date to end_date - all_t0s = pd.date_range(start=start_date, end=end_date, freq='30T').to_pydatetime().tolist() + all_t0s = pd.date_range(start=start_date, end=end_date, freq='30min').to_pydatetime().tolist() - selected_t0s = st.sidebar.multiselect( "Select t0s", all_t0s, - default=all_t0s[:5], + default=all_t0s[:min(5, len(all_t0s))], ) # select units From 86c6f8c1ee192e9e21934cef1258f392bd1f4ab4 Mon Sep 17 00:00:00 2001 From: Peter Dudfield Date: Wed, 26 Nov 2025 08:37:42 +0000 Subject: [PATCH 48/60] add todo --- src/dataplatform/forecast/main.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/dataplatform/forecast/main.py b/src/dataplatform/forecast/main.py index 67eddbf..acbada9 100644 --- a/src/dataplatform/forecast/main.py +++ b/src/dataplatform/forecast/main.py @@ -164,12 +164,14 @@ async def async_dp_forecast_page(): selected_metric=selected_metric ) + st.plotly_chart(fig3) st.header("Known Issues and TODOs") st.write("Bug: cache not releasing, the cache should stay for 5 minutes") st.write("Add more metrics") + st.write("Group adjust and non-adjust") st.write("speed up read, use async and more caching") st.write("Get page working with no observations data") st.write("MAE vs horizon plot should start at 0") From 44b08bac260dcd7f737305993123365dfe9dc7de Mon Sep 17 00:00:00 2001 From: Peter Dudfield Date: Wed, 26 Nov 2025 13:32:05 +0000 Subject: [PATCH 49/60] cache more functions --- src/dataplatform/forecast/cache.py | 16 ++++++++ src/dataplatform/forecast/data.py | 26 +++--------- src/dataplatform/forecast/main.py | 25 ++++++----- src/dataplatform/forecast/plot.py | 62 ++++++++++++++++++---------- src/dataplatform/forecast/setup.py | 66 ++++++++++++++++++++---------- 5 files changed, 123 insertions(+), 72 deletions(-) create mode 100644 src/dataplatform/forecast/cache.py diff --git a/src/dataplatform/forecast/cache.py b/src/dataplatform/forecast/cache.py new file mode 100644 index 0000000..5f804ec --- /dev/null +++ b/src/dataplatform/forecast/cache.py @@ -0,0 +1,16 @@ +from dp_sdk.ocf import dp + + +def key_builder_remove_client(func, *args, **kwargs): + """Custom key builder that ignores the client argument for caching purposes.""" + key = f"{func.__name__}:" + for arg in args: + if not isinstance(arg, dp.DataPlatformDataServiceStub): + key += f"{arg}-" + + for k, v in kwargs.items(): + key += f"{k}={v}-" + + print(key) + + return key diff --git a/src/dataplatform/forecast/data.py b/src/dataplatform/forecast/data.py index 984bb89..e214d30 100644 --- a/src/dataplatform/forecast/data.py +++ b/src/dataplatform/forecast/data.py @@ -1,4 +1,3 @@ -import os import time from datetime import timedelta @@ -7,23 +6,12 @@ from aiocache import Cache, cached from dp_sdk.ocf import dp +from dataplatform.forecast.cache import key_builder_remove_client + # TODO make this dynamic observer_names = ["pvlive_in_day", "pvlive_day_after"] -def key_builder_remove_client(func, *args, **kwargs): - """Custom key builder that ignores the client argument for caching purposes.""" - key = f"{func.__name__}:" - for arg in args: - if not isinstance(arg, dp.DataPlatformDataServiceStub): - key += f"{arg}-" - - for k, v in kwargs.items(): - key += f"{k}={v}-" - - return key - - async def get_forecast_data( client, location, @@ -156,7 +144,7 @@ async def get_all_observations(client, location, start_date, end_date) -> pd.Dat float, ) * all_observations_df["effective_capacity_watts"].astype(float) all_observations_df["timestamp_utc"] = pd.to_datetime(all_observations_df["timestamp_utc"]) - + return all_observations_df @@ -188,7 +176,6 @@ async def get_all_data(client, selected_location, start_date, end_date, selected one_observations_df = all_observations_df[ all_observations_df["observer_name"] == "pvlive_day_after" ] - # make target_timestamp_utc all_forecast_data_df["init_timestamp"] = pd.to_datetime(all_forecast_data_df["init_timestamp"]) @@ -224,8 +211,7 @@ async def get_all_data(client, selected_location, start_date, end_date, selected def align_t0(merged_df: pd.DataFrame) -> pd.DataFrame: - """ Align t0 forecasts for different forecasters """ - + """Align t0 forecasts for different forecasters""" # get all forecaster names forecaster_names = merged_df["forecaster_name"].unique() @@ -233,7 +219,7 @@ def align_t0(merged_df: pd.DataFrame) -> pd.DataFrame: t0s_per_forecaster = {} for forecaster_name in forecaster_names: forecaster_df = merged_df[merged_df["forecaster_name"] == forecaster_name] - + t0s = forecaster_df["init_timestamp"].unique() t0s_per_forecaster[forecaster_name] = set(t0s) @@ -243,4 +229,4 @@ def align_t0(merged_df: pd.DataFrame) -> pd.DataFrame: # align common t0s in merged_df merged_df = merged_df[merged_df["init_timestamp"].isin(common_t0s)] - return merged_df \ No newline at end of file + return merged_df diff --git a/src/dataplatform/forecast/main.py b/src/dataplatform/forecast/main.py index acbada9..fde4056 100644 --- a/src/dataplatform/forecast/main.py +++ b/src/dataplatform/forecast/main.py @@ -6,7 +6,7 @@ from grpclib.client import Channel from dataplatform.forecast.constanst import metrics -from dataplatform.forecast.data import get_all_data, align_t0 +from dataplatform.forecast.data import align_t0, get_all_data from dataplatform.forecast.plot import ( plot_forecast_metric_per_day, plot_forecast_metric_vs_horizon_minutes, @@ -90,29 +90,35 @@ async def async_dp_forecast_page(): selected_forecast_type=selected_forecast_type, selected_forecast_horizon=selected_forecast_horizon, selected_t0s=selected_t0s, - show_probabilistic=show_probabilistic + show_probabilistic=show_probabilistic, ) st.plotly_chart(fig) ### 3. Summary Accuracy Graph. ### st.header("Accuracy") - st.write(metrics) - - align_t0s = st.checkbox("Align t0s (Only common t0s across all forecaster are used)", value=True) + + align_t0s = st.checkbox( + "Align t0s (Only common t0s across all forecaster are used)", value=True, + ) if align_t0s: merged_df = align_t0(merged_df) st.subheader("Metric vs Forecast Horizon") - if selected_metric == 'MAE': + if selected_metric == "MAE": show_sem = st.checkbox("Show SEM", value=True) else: show_sem = False fig2, summary_df = plot_forecast_metric_vs_horizon_minutes( - merged_df, forecaster_names, selected_metric, scale_factor, units, show_sem + merged_df, + forecaster_names, + selected_metric, + scale_factor, + units, + show_sem, ) st.plotly_chart(fig2) @@ -161,10 +167,9 @@ async def async_dp_forecast_page(): forecaster_names=forecaster_names, scale_factor=scale_factor, units=units, - selected_metric=selected_metric + selected_metric=selected_metric, ) - st.plotly_chart(fig3) st.header("Known Issues and TODOs") @@ -175,7 +180,7 @@ async def async_dp_forecast_page(): st.write("speed up read, use async and more caching") st.write("Get page working with no observations data") st.write("MAE vs horizon plot should start at 0") - + def make_summary_data(merged_df, min_horizon, max_horizon, scale_factor, units): # Reduce my horizon mins diff --git a/src/dataplatform/forecast/plot.py b/src/dataplatform/forecast/plot.py index 99c9d71..dc16685 100644 --- a/src/dataplatform/forecast/plot.py +++ b/src/dataplatform/forecast/plot.py @@ -3,19 +3,24 @@ from dataplatform.forecast.constanst import colours -def make_time_series_trace(fig, forecaster_df, forecaster_name, scale_factor, i, show_probabilistic=True): - +def make_time_series_trace( + fig, forecaster_df, forecaster_name, scale_factor, i, show_probabilistic=True, +): fig.add_trace( - go.Scatter( - x=forecaster_df["target_timestamp_utc"], - y=forecaster_df["p50_watts"] / scale_factor, - mode="lines", - name=forecaster_name, - line=dict(color=colours[i % len(colours)]), - legendgroup=forecaster_name, - ), + go.Scatter( + x=forecaster_df["target_timestamp_utc"], + y=forecaster_df["p50_watts"] / scale_factor, + mode="lines", + name=forecaster_name, + line=dict(color=colours[i % len(colours)]), + legendgroup=forecaster_name, + ), ) - if show_probabilistic and "p10_watts" in forecaster_df.columns and "p90_watts" in forecaster_df.columns: + if ( + show_probabilistic + and "p10_watts" in forecaster_df.columns + and "p90_watts" in forecaster_df.columns + ): fig.add_trace( go.Scatter( x=forecaster_df["target_timestamp_utc"], @@ -41,6 +46,7 @@ def make_time_series_trace(fig, forecaster_df, forecaster_name, scale_factor, i, return fig + def plot_forecast_time_series( all_forecast_data_df, all_observations_df, @@ -100,19 +106,26 @@ def plot_forecast_time_series( ), ) - for i, forecaster_name in enumerate(forecaster_names): forecaster_df = current_forecast_df[ - current_forecast_df["forecaster_name"] == forecaster_name - ] + current_forecast_df["forecaster_name"] == forecaster_name + ] if selected_forecast_type in ["Current", "Horizon"]: - - fig = make_time_series_trace(fig, forecaster_df, forecaster_name, scale_factor, i, show_probabilistic) + fig = make_time_series_trace( + fig, forecaster_df, forecaster_name, scale_factor, i, show_probabilistic, + ) elif selected_forecast_type == "t0": for _, t0 in enumerate(selected_t0s): forecaster_with_t0_df = forecaster_df[forecaster_df["init_timestamp"] == t0] forecaster_name_wth_t0 = f"{forecaster_name} | t0: {t0}" - fig = make_time_series_trace(fig, forecaster_with_t0_df, forecaster_name_wth_t0, scale_factor, i, show_probabilistic) + fig = make_time_series_trace( + fig, + forecaster_with_t0_df, + forecaster_name_wth_t0, + scale_factor, + i, + show_probabilistic, + ) fig.update_layout( title="Current Forecast", @@ -125,7 +138,12 @@ def plot_forecast_time_series( def plot_forecast_metric_vs_horizon_minutes( - merged_df, forecaster_names, selected_metric, scale_factor, units, show_sem + merged_df, + forecaster_names, + selected_metric, + scale_factor, + units, + show_sem, ): # Get the mean observed generation mean_observed_generation = merged_df["value_watts"].mean() @@ -224,7 +242,11 @@ def plot_forecast_metric_vs_horizon_minutes( def plot_forecast_metric_per_day( - merged_df, forecaster_names, selected_metric, scale_factor, units + merged_df, + forecaster_names, + selected_metric, + scale_factor, + units, ): daily_plots_df = merged_df daily_plots_df["date_utc"] = daily_plots_df["timestamp_utc"].dt.date @@ -264,5 +286,3 @@ def plot_forecast_metric_per_day( ) return fig3 - - diff --git a/src/dataplatform/forecast/setup.py b/src/dataplatform/forecast/setup.py index c40697c..fbf5c51 100644 --- a/src/dataplatform/forecast/setup.py +++ b/src/dataplatform/forecast/setup.py @@ -2,11 +2,42 @@ import pandas as pd import streamlit as st +from aiocache import Cache, cached from dp_sdk.ocf import dp +from dataplatform.forecast.cache import key_builder_remove_client from dataplatform.forecast.constanst import metrics +@cached(ttl=300, cache=Cache.MEMORY, key_builder=key_builder_remove_client) +async def get_location_names(client, location_type) -> dict: + # List Location + list_locations_request = dp.ListLocationsRequest(location_type_filter=location_type) + list_locations_response = await client.list_locations(list_locations_request) + all_locations = list_locations_response.locations + + location_names = {loc.location_name: loc for loc in all_locations} + if location_type == dp.LocationType.GSP: + location_names = { + f"{int(loc.metadata.fields['gsp_id'].number_value)}:{loc.location_name}": loc + for loc in all_locations + } + # sort by gsp id + location_names = dict( + sorted(location_names.items(), key=lambda item: int(item[0].split(":")[0])), + ) + + return location_names + + +@cached(ttl=300, cache=Cache.MEMORY, key_builder=key_builder_remove_client) +async def get_forecasters(client): + get_forecasters_request = dp.ListForecastersRequest() + get_forecasters_response = await client.list_forecasters(get_forecasters_request) + forecasters = get_forecasters_response.forecasters + return forecasters + + async def setup_page(client) -> dict: # Select Country country = st.sidebar.selectbox("TODO Select a Country", ["UK", "NL"], index=0) @@ -19,25 +50,15 @@ async def setup_page(client) -> dict: ] location_type = st.sidebar.selectbox("Select a Location Type", location_types, index=0) - # List Location - list_locations_request = dp.ListLocationsRequest(location_type_filter=location_type) - list_locations_response = await client.list_locations(list_locations_request) - all_locations = list_locations_response.locations - - location_names = {loc.location_name:loc for loc in all_locations} - if location_type == dp.LocationType.GSP: - location_names = {f'{int(loc.metadata.fields['gsp_id'].number_value)}:{loc.location_name}': loc for loc in all_locations} - # sort by gsp id - location_names = dict(sorted(location_names.items(), key=lambda item: int(item[0].split(":")[0]))) - - # slect locations - selected_location_name = st.sidebar.selectbox("Select a Location", location_names.keys(), index=0) + # select locations + location_names = await get_location_names(client, location_type) + selected_location_name = st.sidebar.selectbox( + "Select a Location", location_names.keys(), index=0, + ) selected_location = location_names[selected_location_name] # get models - get_forecasters_request = dp.ListForecastersRequest() - get_forecasters_response = await client.list_forecasters(get_forecasters_request) - forecasters = get_forecasters_response.forecasters + forecasters = await get_forecasters(client) forecaster_names = sorted(list(set([forecaster.forecaster_name for forecaster in forecasters]))) if "pvnet_v2" in forecaster_names: default_index = forecaster_names.index("pvnet_v2") @@ -58,7 +79,9 @@ async def setup_page(client) -> dict: start_date = st.sidebar.date_input("Start date:", datetime.now().date() - timedelta(days=7)) end_date = st.sidebar.date_input("End date:", datetime.now().date() + timedelta(days=3)) start_date = datetime.combine(start_date, datetime.min.time()).replace(tzinfo=UTC) - end_date = datetime.combine(end_date, datetime.min.time()).replace(tzinfo=UTC) - timedelta(seconds=1) + end_date = datetime.combine(end_date, datetime.min.time()).replace(tzinfo=UTC) - timedelta( + seconds=1, + ) # select forecast type selected_forecast_type = st.sidebar.selectbox( @@ -72,18 +95,19 @@ async def setup_page(client) -> dict: if selected_forecast_type == "Horizon": selected_forecast_horizon = st.sidebar.selectbox( "Select a Forecast Horizon", - list(range(0, 24*60, 30)), + list(range(0, 24 * 60, 30)), index=3, ) if selected_forecast_type == "t0": - # make datetimes every 30 minutes from start_date to end_date - all_t0s = pd.date_range(start=start_date, end=end_date, freq='30min').to_pydatetime().tolist() + all_t0s = ( + pd.date_range(start=start_date, end=end_date, freq="30min").to_pydatetime().tolist() + ) selected_t0s = st.sidebar.multiselect( "Select t0s", all_t0s, - default=all_t0s[:min(5, len(all_t0s))], + default=all_t0s[: min(5, len(all_t0s))], ) # select units From ab92c25f92670b8a4776fba51986ebc6991a0262 Mon Sep 17 00:00:00 2001 From: Peter Dudfield Date: Wed, 26 Nov 2025 13:49:52 +0000 Subject: [PATCH 50/60] ruff --- src/dataplatform/forecast/cache.py | 6 +- .../forecast/{constanst.py => constant.py} | 5 +- src/dataplatform/forecast/data.py | 48 ++++++--- src/dataplatform/forecast/main.py | 39 ++++--- src/dataplatform/forecast/plot.py | 101 +++++++++++------- src/dataplatform/forecast/setup.py | 36 ++++--- 6 files changed, 150 insertions(+), 85 deletions(-) rename src/dataplatform/forecast/{constanst.py => constant.py} (52%) diff --git a/src/dataplatform/forecast/cache.py b/src/dataplatform/forecast/cache.py index 5f804ec..71ee679 100644 --- a/src/dataplatform/forecast/cache.py +++ b/src/dataplatform/forecast/cache.py @@ -1,7 +1,9 @@ +"""Cache utilities for the forecast module.""" + from dp_sdk.ocf import dp -def key_builder_remove_client(func, *args, **kwargs): +def key_builder_remove_client(func: callable, *args: list, **kwargs: dict) -> str: """Custom key builder that ignores the client argument for caching purposes.""" key = f"{func.__name__}:" for arg in args: @@ -11,6 +13,4 @@ def key_builder_remove_client(func, *args, **kwargs): for k, v in kwargs.items(): key += f"{k}={v}-" - print(key) - return key diff --git a/src/dataplatform/forecast/constanst.py b/src/dataplatform/forecast/constant.py similarity index 52% rename from src/dataplatform/forecast/constanst.py rename to src/dataplatform/forecast/constant.py index 9fd0aa2..bdf7e21 100644 --- a/src/dataplatform/forecast/constanst.py +++ b/src/dataplatform/forecast/constant.py @@ -1,3 +1,5 @@ +"""Constants for the forecast module.""" + colours = [ "#FFD480", "#FF8F73", @@ -14,7 +16,4 @@ metrics = { "MAE": "MAE is absolute mean error, average(abs(forecast-actual))", "ME": "ME is mean (bias) error, average((forecast-actual))", - # "TODO NMAE (by capacity)": " NMAE (by capacity), average(abs(y-x)) / mean(capacity)", - # "TODO NMAE (by mean observed generation)": " NMAE (by mean observed generation), average(abs(y-x)) / mean(y)", - # "NMAE (by observed generation)":" NAME (by observed generation)" } diff --git a/src/dataplatform/forecast/data.py b/src/dataplatform/forecast/data.py index e214d30..335a65a 100644 --- a/src/dataplatform/forecast/data.py +++ b/src/dataplatform/forecast/data.py @@ -1,5 +1,7 @@ +"""Functions to get forecast and observation data from Data Platform.""" + import time -from datetime import timedelta +from datetime import datetime, timedelta import betterproto import pandas as pd @@ -13,12 +15,13 @@ async def get_forecast_data( - client, - location, - start_date, - end_date, - selected_forecasters, + client: dp.DataPlatformDataServiceStub, + location: dp.ListLocationsResponseLocationSummary, + start_date: datetime, + end_date: datetime, + selected_forecasters: list[dp.Forecaster], ) -> pd.DataFrame: + """Get forecast data for the given location and time window.""" all_data_df = [] for forecaster in selected_forecasters: @@ -49,12 +52,13 @@ async def get_forecast_data( @cached(ttl=300, cache=Cache.MEMORY, key_builder=key_builder_remove_client) async def get_forecast_data_one_forecaster( - client, - location, - start_date, - end_date, - selected_forecaster, + client: dp, + location: dp.ListLocationsResponseLocationSummary, + start_date: datetime, + end_date: datetime, + selected_forecaster: dp.Forecaster, ) -> pd.DataFrame: + """Get forecast data for one forecaster for the given location and time window.""" all_data_df = [] # loop over 30 days of data @@ -100,7 +104,13 @@ async def get_forecast_data_one_forecaster( @cached(ttl=300, cache=Cache.MEMORY, key_builder=key_builder_remove_client) -async def get_all_observations(client, location, start_date, end_date) -> pd.DataFrame: +async def get_all_observations( + client: dp.DataPlatformDataServiceStub, + location: dp.ListLocationsResponseLocationSummary, + start_date: datetime, + end_date: datetime, +) -> pd.DataFrame: + """Get all observations for the given location and time window.""" all_observations_df = [] for observer_name in observer_names: @@ -148,7 +158,14 @@ async def get_all_observations(client, location, start_date, end_date) -> pd.Dat return all_observations_df -async def get_all_data(client, selected_location, start_date, end_date, selected_forecasters): +async def get_all_data( + client: dp.DataPlatformDataServiceStub, + selected_location: dp.ListLocationsResponseLocationSummary, + start_date: datetime, + end_date: datetime, + selected_forecasters: list[dp.Forecaster], +) -> dict: + """Get all forecast and observation data, and merge them.""" # get generation data time_start = time.time() all_observations_df = await get_all_observations( @@ -170,7 +187,8 @@ async def get_all_data(client, selected_location, start_date, end_date, selected ) forecast_seconds = time.time() - time_start - # If the observation data includes pvlive_day_after and pvlive_in_day, then lets just take pvlive_day_after + # If the observation data includes pvlive_day_after and pvlive_in_day, + # then lets just take pvlive_day_after one_observations_df = all_observations_df.copy() if "pvlive_day_after" in all_observations_df["observer_name"].values: one_observations_df = all_observations_df[ @@ -211,7 +229,7 @@ async def get_all_data(client, selected_location, start_date, end_date, selected def align_t0(merged_df: pd.DataFrame) -> pd.DataFrame: - """Align t0 forecasts for different forecasters""" + """Align t0 forecasts for different forecasters.""" # get all forecaster names forecaster_names = merged_df["forecaster_name"].unique() diff --git a/src/dataplatform/forecast/main.py b/src/dataplatform/forecast/main.py index fde4056..09e6c64 100644 --- a/src/dataplatform/forecast/main.py +++ b/src/dataplatform/forecast/main.py @@ -1,11 +1,14 @@ +"""Data Platform Forecast Streamlit Page Main Code.""" + import asyncio import os +import pandas as pd import streamlit as st from dp_sdk.ocf import dp from grpclib.client import Channel -from dataplatform.forecast.constanst import metrics +from dataplatform.forecast.constant import metrics from dataplatform.forecast.data import align_t0, get_all_data from dataplatform.forecast.plot import ( plot_forecast_metric_per_day, @@ -21,11 +24,13 @@ observer_names = ["pvlive_in_day", "pvlive_day_after"] -def dp_forecast_page(): +def dp_forecast_page() -> None: + """Wrapper function that is not async to call the main async function.""" asyncio.run(async_dp_forecast_page()) -async def async_dp_forecast_page(): +async def async_dp_forecast_page() -> None: + """Async Main function for the Data Platform Forecast Streamlit page.""" st.title("Data Platform Forecast Page") st.write("This is the forecast page from the Data Platform module. This is very much a WIP") @@ -61,8 +66,10 @@ async def async_dp_forecast_page(): st.write(f"Selected Location uuid: `{selected_location.location_uuid}`.") st.write( - f"Fetched `{len(all_forecast_data_df)}` rows of forecast data in `{forecast_seconds:.2f}` seconds. \ - Fetched `{len(all_observations_df)}` rows of observation data in `{observation_seconds:.2f}` seconds. \ + f"Fetched `{len(all_forecast_data_df)}` rows of forecast data \ + in `{forecast_seconds:.2f}` seconds. \ + Fetched `{len(all_observations_df)}` rows of observation data \ + in `{observation_seconds:.2f}` seconds. \ We cache data for 5 minutses to speed up repeated requests.", ) @@ -100,17 +107,15 @@ async def async_dp_forecast_page(): st.write(metrics) align_t0s = st.checkbox( - "Align t0s (Only common t0s across all forecaster are used)", value=True, + "Align t0s (Only common t0s across all forecaster are used)", + value=True, ) if align_t0s: merged_df = align_t0(merged_df) st.subheader("Metric vs Forecast Horizon") - if selected_metric == "MAE": - show_sem = st.checkbox("Show SEM", value=True) - else: - show_sem = False + show_sem = st.checkbox("Show SEM", value=True) if selected_metric == "MAE" else False fig2, summary_df = plot_forecast_metric_vs_horizon_minutes( merged_df, @@ -159,7 +164,8 @@ async def async_dp_forecast_page(): ### 4. Daily metric plots. ### st.subheader("Daily Metrics Plots") st.write( - "Plotted below are the daily MAE for each forecaster. This is for all forecast horizons.", + "Plotted below are the daily MAE for each forecaster. " + "This is for all forecast horizons.", ) fig3 = plot_forecast_metric_per_day( @@ -182,7 +188,14 @@ async def async_dp_forecast_page(): st.write("MAE vs horizon plot should start at 0") -def make_summary_data(merged_df, min_horizon, max_horizon, scale_factor, units): +def make_summary_data( + merged_df: pd.DataFrame, + min_horizon: int, + max_horizon: int, + scale_factor: float, + units: str, +) -> pd.DataFrame: + """Make summary data table for given min and max horizon mins.""" # Reduce my horizon mins summary_table_df = merged_df[ (merged_df["horizon_mins"] >= min_horizon) & (merged_df["horizon_mins"] <= max_horizon) @@ -203,7 +216,7 @@ def make_summary_data(merged_df, min_horizon, max_horizon, scale_factor, units): "Capacity_watts", ] - summary_table_df = summary_table_df[["forecaster_name"] + value_columns] + summary_table_df = summary_table_df[["forecaster_name", *value_columns]] summary_table_df["Capacity_watts"] = summary_table_df["Capacity_watts"].astype(float) diff --git a/src/dataplatform/forecast/plot.py b/src/dataplatform/forecast/plot.py index dc16685..92f3d9b 100644 --- a/src/dataplatform/forecast/plot.py +++ b/src/dataplatform/forecast/plot.py @@ -1,18 +1,32 @@ +"""Plotting functions for forecast analysis.""" + +from datetime import datetime + +import pandas as pd import plotly.graph_objects as go -from dataplatform.forecast.constanst import colours +from dataplatform.forecast.constant import colours def make_time_series_trace( - fig, forecaster_df, forecaster_name, scale_factor, i, show_probabilistic=True, -): + fig: go.Figure, + forecaster_df: pd.DataFrame, + forecaster_name: str, + scale_factor: float, + i: int, + show_probabilistic: bool = True, +) -> go.Figure: + """Make time series trace for a forecaster. + + Include p10 and p90 shading if show_probabilistic is True. + """ fig.add_trace( go.Scatter( x=forecaster_df["target_timestamp_utc"], y=forecaster_df["p50_watts"] / scale_factor, mode="lines", name=forecaster_name, - line=dict(color=colours[i % len(colours)]), + line={"color": colours[i % len(colours)]}, legendgroup=forecaster_name, ), ) @@ -26,7 +40,7 @@ def make_time_series_trace( x=forecaster_df["target_timestamp_utc"], y=forecaster_df["p10_watts"] / scale_factor, mode="lines", - line=dict(color=colours[i % len(colours)], width=0), + line={"color": colours[i % len(colours)], "width": 0}, legendgroup=forecaster_name, showlegend=False, ), @@ -37,7 +51,7 @@ def make_time_series_trace( x=forecaster_df["target_timestamp_utc"], y=forecaster_df["p90_watts"] / scale_factor, mode="lines", - line=dict(color=colours[i % len(colours)], width=0), + line={"color": colours[i % len(colours)], "width": 0}, legendgroup=forecaster_name, showlegend=False, fill="tonexty", @@ -48,17 +62,21 @@ def make_time_series_trace( def plot_forecast_time_series( - all_forecast_data_df, - all_observations_df, - forecaster_names, - observer_names, - scale_factor, - units, - selected_forecast_type, - selected_forecast_horizon, - selected_t0s, - show_probabilistic=True, -): + all_forecast_data_df: pd.DataFrame, + all_observations_df: pd.DataFrame, + forecaster_names: list[str], + observer_names: list[str], + scale_factor: float, + units: str, + selected_forecast_type: str, + selected_forecast_horizon: int, + selected_t0s: list[datetime], + show_probabilistic: bool = True, +) -> go.Figure: + """Plot forecast time series. + + This make a plot of the raw forecasts and observations, for mulitple forecast. + """ if selected_forecast_type == "Current": # Choose current forecast # this is done by selecting the unique target_timestamp_utc with the the lowest horizonMins @@ -90,11 +108,11 @@ def plot_forecast_time_series( if observer_name == "pvlive_in_day": # dashed white line - line = dict(color="white", dash="dash") + line = {"color": "white", "dash": "dash"} elif observer_name == "pvlive_day_after": - line = dict(color="white") + line = {"color": "white"} else: - line = dict() + line = {} fig.add_trace( go.Scatter( @@ -112,7 +130,12 @@ def plot_forecast_time_series( ] if selected_forecast_type in ["Current", "Horizon"]: fig = make_time_series_trace( - fig, forecaster_df, forecaster_name, scale_factor, i, show_probabilistic, + fig, + forecaster_df, + forecaster_name, + scale_factor, + i, + show_probabilistic, ) elif selected_forecast_type == "t0": for _, t0 in enumerate(selected_t0s): @@ -138,13 +161,14 @@ def plot_forecast_time_series( def plot_forecast_metric_vs_horizon_minutes( - merged_df, - forecaster_names, - selected_metric, - scale_factor, - units, - show_sem, -): + merged_df: pd.DataFrame, + forecaster_names: list[str], + selected_metric: str, + scale_factor: float, + units: str, + show_sem: bool, +) -> go.Figure: + """Plot forecast metric vs horizon minutes.""" # Get the mean observed generation mean_observed_generation = merged_df["value_watts"].mean() @@ -199,7 +223,7 @@ def plot_forecast_metric_vs_horizon_minutes( y=forecaster_df[selected_metric] / scale_factor, mode="lines+markers", name=forecaster_name, - line=dict(color=colours[i % len(colours)]), + line={"color": colours[i % len(colours)]}, legendgroup=forecaster_name, ), ) @@ -210,7 +234,7 @@ def plot_forecast_metric_vs_horizon_minutes( x=forecaster_df["horizon_mins"], y=(forecaster_df[selected_metric] - 1.96 * forecaster_df["sem"]) / scale_factor, mode="lines", - line=dict(color=colours[i % len(colours)], width=0), + line={"color": colours[i % len(colours)], "width": 0}, legendgroup=forecaster_name, showlegend=False, ), @@ -221,7 +245,7 @@ def plot_forecast_metric_vs_horizon_minutes( x=forecaster_df["horizon_mins"], y=(forecaster_df[selected_metric] + 1.96 * forecaster_df["sem"]) / scale_factor, mode="lines", - line=dict(color=colours[i % len(colours)], width=0), + line={"color": colours[i % len(colours)], "width": 0}, legendgroup=forecaster_name, showlegend=False, fill="tonexty", @@ -242,12 +266,13 @@ def plot_forecast_metric_vs_horizon_minutes( def plot_forecast_metric_per_day( - merged_df, - forecaster_names, - selected_metric, - scale_factor, - units, -): + merged_df: pd.DataFrame, + forecaster_names: list, + selected_metric: str, + scale_factor: float, + units: str, +) -> go.Figure: + """Plot forecast metric per day.""" daily_plots_df = merged_df daily_plots_df["date_utc"] = daily_plots_df["timestamp_utc"].dt.date @@ -274,7 +299,7 @@ def plot_forecast_metric_per_day( y=forecaster_df[selected_metric] / scale_factor, # mode="lines+markers", name=forecaster_name, - line=dict(color=colours[i % len(colours)]), + line={"color": colours[i % len(colours)]}, ), ) diff --git a/src/dataplatform/forecast/setup.py b/src/dataplatform/forecast/setup.py index fbf5c51..a5094f1 100644 --- a/src/dataplatform/forecast/setup.py +++ b/src/dataplatform/forecast/setup.py @@ -1,3 +1,5 @@ +"""Setup Forecast Streamlit Page.""" + from datetime import UTC, datetime, timedelta import pandas as pd @@ -6,11 +8,15 @@ from dp_sdk.ocf import dp from dataplatform.forecast.cache import key_builder_remove_client -from dataplatform.forecast.constanst import metrics +from dataplatform.forecast.constant import metrics @cached(ttl=300, cache=Cache.MEMORY, key_builder=key_builder_remove_client) -async def get_location_names(client, location_type) -> dict: +async def get_location_names( + client: dp.DataPlatformDataServiceStub, + location_type: dp.LocationType, +) -> dict: + """Get location names for a given location type.""" # List Location list_locations_request = dp.ListLocationsRequest(location_type_filter=location_type) list_locations_response = await client.list_locations(list_locations_request) @@ -31,16 +37,18 @@ async def get_location_names(client, location_type) -> dict: @cached(ttl=300, cache=Cache.MEMORY, key_builder=key_builder_remove_client) -async def get_forecasters(client): +async def get_forecasters(client: dp.DataPlatformDataServiceStub) -> list[dp.Forecaster]: + """Get all forecasters.""" get_forecasters_request = dp.ListForecastersRequest() get_forecasters_response = await client.list_forecasters(get_forecasters_request) forecasters = get_forecasters_response.forecasters return forecasters -async def setup_page(client) -> dict: +async def setup_page(client: dp.DataPlatformDataServiceStub) -> dict: + """Setup the Streamlit page with sidebar options.""" # Select Country - country = st.sidebar.selectbox("TODO Select a Country", ["UK", "NL"], index=0) + st.sidebar.selectbox("TODO Select a Country", ["UK", "NL"], index=0) # Select Location Type location_types = [ @@ -53,17 +61,16 @@ async def setup_page(client) -> dict: # select locations location_names = await get_location_names(client, location_type) selected_location_name = st.sidebar.selectbox( - "Select a Location", location_names.keys(), index=0, + "Select a Location", + location_names.keys(), + index=0, ) selected_location = location_names[selected_location_name] # get models forecasters = await get_forecasters(client) - forecaster_names = sorted(list(set([forecaster.forecaster_name for forecaster in forecasters]))) - if "pvnet_v2" in forecaster_names: - default_index = forecaster_names.index("pvnet_v2") - else: - default_index = 0 + forecaster_names = sorted({forecaster.forecaster_name for forecaster in forecasters}) + default_index = forecaster_names.index("pvnet_v2") if "pvnet_v2" in forecaster_names else 0 selected_forecaster_name = st.sidebar.multiselect( "Select a Forecaster", forecaster_names, @@ -76,8 +83,11 @@ async def setup_page(client) -> dict: ] # select start and end date - start_date = st.sidebar.date_input("Start date:", datetime.now().date() - timedelta(days=7)) - end_date = st.sidebar.date_input("End date:", datetime.now().date() + timedelta(days=3)) + start_date = st.sidebar.date_input( + "Start date:", + datetime.now(tz=UTC).date() - timedelta(days=7), + ) + end_date = st.sidebar.date_input("End date:", datetime.now(tz=UTC).date() + timedelta(days=3)) start_date = datetime.combine(start_date, datetime.min.time()).replace(tzinfo=UTC) end_date = datetime.combine(end_date, datetime.min.time()).replace(tzinfo=UTC) - timedelta( seconds=1, From 0f4058e3bdd6562e05fe0f3b86cb213730160489 Mon Sep 17 00:00:00 2001 From: Peter Dudfield Date: Wed, 26 Nov 2025 15:31:06 +0000 Subject: [PATCH 51/60] Feedback, add details --- src/dataplatform/forecast/main.py | 17 ++++++++++++----- src/dataplatform/forecast/plot.py | 3 +++ 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/src/dataplatform/forecast/main.py b/src/dataplatform/forecast/main.py index 09e6c64..aa3776d 100644 --- a/src/dataplatform/forecast/main.py +++ b/src/dataplatform/forecast/main.py @@ -74,12 +74,13 @@ async def async_dp_forecast_page() -> None: ) # add download button - csv = all_forecast_data_df.to_csv().encode("utf-8") + csv = merged_df.to_csv().encode("utf-8") st.download_button( - label="⬇️", + label="⬇️ Download data", data=csv, file_name=f"site_forecast_{selected_location.location_uuid}_{start_date}_{end_date}.csv", mime="text/csv", + help='Download the forecast and generation data as a CSV file.' ) ### 2. Plot of raw forecast data. ### @@ -115,7 +116,13 @@ async def async_dp_forecast_page() -> None: st.subheader("Metric vs Forecast Horizon") - show_sem = st.checkbox("Show SEM", value=True) if selected_metric == "MAE" else False + if selected_metric == "MAE": + show_sem = st.checkbox("Show Uncertainty", + value=True, + help='On the plot below show the uncertainty bands associated with the MAE. ' \ + 'This is done by looking at Standard Error of the Mean (SEM) of the absolute errors.') + else: + show_sem = False fig2, summary_df = plot_forecast_metric_vs_horizon_minutes( merged_df, @@ -130,10 +137,11 @@ async def async_dp_forecast_page() -> None: csv = summary_df.to_csv().encode("utf-8") st.download_button( - label="⬇️", + label="⬇️ Download summary", data=csv, file_name=f"summary_accuracy_{selected_location.location_uuid}_{start_date}_{end_date}.csv", mime="text/csv", + help='Download the summary accuracy data as a CSV file.' ) ### 4. Summary Accuracy Table, with slider to select min and max horizon mins. ### @@ -185,7 +193,6 @@ async def async_dp_forecast_page() -> None: st.write("Group adjust and non-adjust") st.write("speed up read, use async and more caching") st.write("Get page working with no observations data") - st.write("MAE vs horizon plot should start at 0") def make_summary_data( diff --git a/src/dataplatform/forecast/plot.py b/src/dataplatform/forecast/plot.py index 92f3d9b..42d7e5f 100644 --- a/src/dataplatform/forecast/plot.py +++ b/src/dataplatform/forecast/plot.py @@ -310,4 +310,7 @@ def plot_forecast_metric_per_day( legend_title="Forecaster", ) + if selected_metric == "MAE": + fig3.update_yaxes(range=[0, None]) + return fig3 From 3af7531d95f16727a0b3e523f4ea8d961fac862c Mon Sep 17 00:00:00 2001 From: Peter Dudfield Date: Thu, 27 Nov 2025 09:54:25 +0000 Subject: [PATCH 52/60] robustness against no forecast data --- src/dataplatform/forecast/data.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/dataplatform/forecast/data.py b/src/dataplatform/forecast/data.py index 335a65a..6b9168c 100644 --- a/src/dataplatform/forecast/data.py +++ b/src/dataplatform/forecast/data.py @@ -32,7 +32,8 @@ async def get_forecast_data( end_date, forecaster, ) - all_data_df.append(forecaster_data_df) + if forecaster_data_df is not None: + all_data_df.append(forecaster_data_df) all_data_df = pd.concat(all_data_df, ignore_index=True) @@ -57,7 +58,7 @@ async def get_forecast_data_one_forecaster( start_date: datetime, end_date: datetime, selected_forecaster: dp.Forecaster, -) -> pd.DataFrame: +) -> pd.DataFrame | None: """Get forecast data for one forecaster for the given location and time window.""" all_data_df = [] @@ -93,6 +94,9 @@ async def get_forecast_data_one_forecaster( temp_start_date = temp_start_date + timedelta(days=30) + if len(all_data_df) == 0: + return None + all_data_df = pd.concat(all_data_df, ignore_index=True) # create column forecaster_name, its forecaster_fullname with version removed From 4a0ff3372d906cb9ecc3e611e94f59e3e09453b1 Mon Sep 17 00:00:00 2001 From: Peter Dudfield Date: Fri, 28 Nov 2025 15:30:38 +0000 Subject: [PATCH 53/60] release cache data every 5 mins --- src/dataplatform/forecast/cache.py | 11 +++++++++++ src/dataplatform/forecast/constant.py | 2 ++ src/dataplatform/forecast/data.py | 7 ++++--- src/dataplatform/forecast/main.py | 16 +++++++++------- src/dataplatform/forecast/setup.py | 6 +++--- 5 files changed, 29 insertions(+), 13 deletions(-) diff --git a/src/dataplatform/forecast/cache.py b/src/dataplatform/forecast/cache.py index 71ee679..a14402b 100644 --- a/src/dataplatform/forecast/cache.py +++ b/src/dataplatform/forecast/cache.py @@ -1,7 +1,11 @@ """Cache utilities for the forecast module.""" +from datetime import UTC, datetime, timedelta + from dp_sdk.ocf import dp +from dataplatform.forecast.constant import cache_seconds + def key_builder_remove_client(func: callable, *args: list, **kwargs: dict) -> str: """Custom key builder that ignores the client argument for caching purposes.""" @@ -13,4 +17,11 @@ def key_builder_remove_client(func: callable, *args: list, **kwargs: dict) -> st for k, v in kwargs.items(): key += f"{k}={v}-" + # get the time now to the closest 5 minutes, this forces a new cache every 5 minutes + current_time = datetime.now(UTC).replace(second=0, microsecond=0) + current_time = current_time - timedelta( + minutes=current_time.minute % (int(cache_seconds / 60)), + ) + key += f"time={current_time}-" + return key diff --git a/src/dataplatform/forecast/constant.py b/src/dataplatform/forecast/constant.py index bdf7e21..0de2e21 100644 --- a/src/dataplatform/forecast/constant.py +++ b/src/dataplatform/forecast/constant.py @@ -17,3 +17,5 @@ "MAE": "MAE is absolute mean error, average(abs(forecast-actual))", "ME": "ME is mean (bias) error, average((forecast-actual))", } + +cache_seconds = 300 # 5 minutes diff --git a/src/dataplatform/forecast/data.py b/src/dataplatform/forecast/data.py index 6b9168c..77ba125 100644 --- a/src/dataplatform/forecast/data.py +++ b/src/dataplatform/forecast/data.py @@ -9,6 +9,7 @@ from dp_sdk.ocf import dp from dataplatform.forecast.cache import key_builder_remove_client +from dataplatform.forecast.constant import cache_seconds # TODO make this dynamic observer_names = ["pvlive_in_day", "pvlive_day_after"] @@ -51,7 +52,7 @@ async def get_forecast_data( return all_data_df -@cached(ttl=300, cache=Cache.MEMORY, key_builder=key_builder_remove_client) +@cached(ttl=cache_seconds, cache=Cache.MEMORY, key_builder=key_builder_remove_client) async def get_forecast_data_one_forecaster( client: dp, location: dp.ListLocationsResponseLocationSummary, @@ -96,7 +97,7 @@ async def get_forecast_data_one_forecaster( if len(all_data_df) == 0: return None - + all_data_df = pd.concat(all_data_df, ignore_index=True) # create column forecaster_name, its forecaster_fullname with version removed @@ -107,7 +108,7 @@ async def get_forecast_data_one_forecaster( return all_data_df -@cached(ttl=300, cache=Cache.MEMORY, key_builder=key_builder_remove_client) +@cached(ttl=cache_seconds, cache=Cache.MEMORY, key_builder=key_builder_remove_client) async def get_all_observations( client: dp.DataPlatformDataServiceStub, location: dp.ListLocationsResponseLocationSummary, diff --git a/src/dataplatform/forecast/main.py b/src/dataplatform/forecast/main.py index aa3776d..1700026 100644 --- a/src/dataplatform/forecast/main.py +++ b/src/dataplatform/forecast/main.py @@ -80,7 +80,7 @@ async def async_dp_forecast_page() -> None: data=csv, file_name=f"site_forecast_{selected_location.location_uuid}_{start_date}_{end_date}.csv", mime="text/csv", - help='Download the forecast and generation data as a CSV file.' + help="Download the forecast and generation data as a CSV file.", ) ### 2. Plot of raw forecast data. ### @@ -117,10 +117,13 @@ async def async_dp_forecast_page() -> None: st.subheader("Metric vs Forecast Horizon") if selected_metric == "MAE": - show_sem = st.checkbox("Show Uncertainty", - value=True, - help='On the plot below show the uncertainty bands associated with the MAE. ' \ - 'This is done by looking at Standard Error of the Mean (SEM) of the absolute errors.') + show_sem = st.checkbox( + "Show Uncertainty", + value=True, + help="On the plot below show the uncertainty bands associated with the MAE. " + "This is done by looking at " \ + "Standard Error of the Mean (SEM) of the absolute errors.", + ) else: show_sem = False @@ -141,7 +144,7 @@ async def async_dp_forecast_page() -> None: data=csv, file_name=f"summary_accuracy_{selected_location.location_uuid}_{start_date}_{end_date}.csv", mime="text/csv", - help='Download the summary accuracy data as a CSV file.' + help="Download the summary accuracy data as a CSV file.", ) ### 4. Summary Accuracy Table, with slider to select min and max horizon mins. ### @@ -188,7 +191,6 @@ async def async_dp_forecast_page() -> None: st.header("Known Issues and TODOs") - st.write("Bug: cache not releasing, the cache should stay for 5 minutes") st.write("Add more metrics") st.write("Group adjust and non-adjust") st.write("speed up read, use async and more caching") diff --git a/src/dataplatform/forecast/setup.py b/src/dataplatform/forecast/setup.py index a5094f1..b82c8a5 100644 --- a/src/dataplatform/forecast/setup.py +++ b/src/dataplatform/forecast/setup.py @@ -8,10 +8,10 @@ from dp_sdk.ocf import dp from dataplatform.forecast.cache import key_builder_remove_client -from dataplatform.forecast.constant import metrics +from dataplatform.forecast.constant import cache_seconds, metrics -@cached(ttl=300, cache=Cache.MEMORY, key_builder=key_builder_remove_client) +@cached(ttl=cache_seconds, cache=Cache.MEMORY, key_builder=key_builder_remove_client) async def get_location_names( client: dp.DataPlatformDataServiceStub, location_type: dp.LocationType, @@ -36,7 +36,7 @@ async def get_location_names( return location_names -@cached(ttl=300, cache=Cache.MEMORY, key_builder=key_builder_remove_client) +@cached(ttl=cache_seconds, cache=Cache.MEMORY, key_builder=key_builder_remove_client) async def get_forecasters(client: dp.DataPlatformDataServiceStub) -> list[dp.Forecaster]: """Get all forecasters.""" get_forecasters_request = dp.ListForecastersRequest() From 697893511694fe720c756ccd1bffad89ba8e52a1 Mon Sep 17 00:00:00 2001 From: Peter Dudfield Date: Mon, 1 Dec 2025 17:07:02 +0000 Subject: [PATCH 54/60] PR comments --- src/dataplatform/forecast/constant.py | 3 + src/dataplatform/forecast/data.py | 84 ++++++++----------- src/dataplatform/forecast/main.py | 112 ++++++++++++++++++-------- src/dataplatform/forecast/plot.py | 49 +---------- src/dataplatform/forecast/setup.py | 2 +- 5 files changed, 117 insertions(+), 133 deletions(-) diff --git a/src/dataplatform/forecast/constant.py b/src/dataplatform/forecast/constant.py index 0de2e21..5c51245 100644 --- a/src/dataplatform/forecast/constant.py +++ b/src/dataplatform/forecast/constant.py @@ -19,3 +19,6 @@ } cache_seconds = 300 # 5 minutes + +# This is used for a specific case for the UK National and GSP +observer_names = ["pvlive_in_day", "pvlive_day_after"] diff --git a/src/dataplatform/forecast/data.py b/src/dataplatform/forecast/data.py index 77ba125..33f653c 100644 --- a/src/dataplatform/forecast/data.py +++ b/src/dataplatform/forecast/data.py @@ -9,10 +9,7 @@ from dp_sdk.ocf import dp from dataplatform.forecast.cache import key_builder_remove_client -from dataplatform.forecast.constant import cache_seconds - -# TODO make this dynamic -observer_names = ["pvlive_in_day", "pvlive_day_after"] +from dataplatform.forecast.constant import cache_seconds, observer_names async def get_forecast_data( @@ -38,16 +35,14 @@ async def get_forecast_data( all_data_df = pd.concat(all_data_df, ignore_index=True) + all_data_df["effective_capacity_watts"] = all_data_df["effective_capacity_watts"].astype(float) + # get watt value - all_data_df["p50_watts"] = all_data_df["p50_fraction"].astype(float) * all_data_df[ - "effective_capacity_watts" - ].astype(float) + all_data_df["p50_watts"] = all_data_df["p50_fraction"] * all_data_df["effective_capacity_watts"] for col in ["p10", "p25", "p75", "p90"]: if col in all_data_df.columns: - all_data_df[f"{col}_watts"] = all_data_df[col].astype(float) * all_data_df[ - "effective_capacity_watts" - ].astype(float) + all_data_df[f"{col}_watts"] = all_data_df[col] * all_data_df["effective_capacity_watts"] return all_data_df @@ -61,14 +56,12 @@ async def get_forecast_data_one_forecaster( selected_forecaster: dp.Forecaster, ) -> pd.DataFrame | None: """Get forecast data for one forecaster for the given location and time window.""" - all_data_df = [] + all_data_list_dict = [] - # loop over 30 days of data + # Grab all the data, in chunks of 30 days to avoid too large requests temp_start_date = start_date while temp_start_date <= end_date: - temp_end_date = temp_start_date + timedelta(days=30) - if temp_end_date > end_date: - temp_end_date = end_date + temp_end_date = min(temp_start_date + timedelta(days=30), end_date) # fetch data stream_forecast_data_request = dp.StreamForecastDataRequest( @@ -87,18 +80,18 @@ async def get_forecast_data_one_forecaster( ) if len(forecasts) > 0: - all_data_df.append( - pd.DataFrame.from_dict(forecasts) - .pipe(lambda df: df.join(pd.json_normalize(df["other_statistics_fractions"]))) - .drop("other_statistics_fractions", axis=1), - ) + all_data_list_dict.extend(forecasts) temp_start_date = temp_start_date + timedelta(days=30) + all_data_df = pd.DataFrame.from_dict(all_data_list_dict) if len(all_data_df) == 0: return None - all_data_df = pd.concat(all_data_df, ignore_index=True) + # get plevels into columns + all_data_df = all_data_df.pipe( + lambda df: df.join(pd.json_normalize(df["other_statistics_fractions"])), + ).drop("other_statistics_fractions", axis=1) # create column forecaster_name, its forecaster_fullname with version removed all_data_df["forecaster_name"] = all_data_df["forecaster_fullname"].apply( @@ -119,13 +112,11 @@ async def get_all_observations( all_observations_df = [] for observer_name in observer_names: - # loop over 7 days of data + # Get all the observations for this observer_name, in chunks of 7 days observation_one_df = [] temp_start_date = start_date while temp_start_date <= end_date: - temp_end_date = temp_start_date + timedelta(days=7) - if temp_end_date > end_date: - temp_end_date = end_date + temp_end_date = min(temp_start_date + timedelta(days=7), end_date) get_observations_request = dp.GetObservationsAsTimeseriesRequest( observer_name=observer_name, @@ -155,9 +146,13 @@ async def get_all_observations( all_observations_df = pd.concat(all_observations_df, ignore_index=True) - all_observations_df["value_watts"] = all_observations_df["value_fraction"].astype( - float, - ) * all_observations_df["effective_capacity_watts"].astype(float) + all_observations_df["effective_capacity_watts"] = all_observations_df[ + "effective_capacity_watts" + ].astype(float) + + all_observations_df["value_watts"] = ( + all_observations_df["value_fraction"] * all_observations_df["effective_capacity_watts"] + ) all_observations_df["timestamp_utc"] = pd.to_datetime(all_observations_df["timestamp_utc"]) return all_observations_df @@ -202,9 +197,9 @@ async def get_all_data( # make target_timestamp_utc all_forecast_data_df["init_timestamp"] = pd.to_datetime(all_forecast_data_df["init_timestamp"]) - all_forecast_data_df["target_timestamp_utc"] = pd.to_datetime( - all_forecast_data_df["init_timestamp"], - ) + pd.to_timedelta(all_forecast_data_df["horizon_mins"], unit="m") + all_forecast_data_df["target_timestamp_utc"] = all_forecast_data_df[ + "init_timestamp" + ] + pd.to_timedelta(all_forecast_data_df["horizon_mins"], unit="m") # take the foecast data, and group by horizonMins, forecasterFullName # calculate mean absolute error between p50Fraction and observations valueFraction @@ -218,7 +213,7 @@ async def get_all_data( ) merged_df["effective_capacity_watts_observation"] = merged_df[ "effective_capacity_watts_observation" - ].astype(float) + ] # error and absolute error merged_df["error"] = merged_df["p50_watts"] - merged_df["value_watts"] @@ -235,21 +230,10 @@ async def get_all_data( def align_t0(merged_df: pd.DataFrame) -> pd.DataFrame: """Align t0 forecasts for different forecasters.""" - # get all forecaster names - forecaster_names = merged_df["forecaster_name"].unique() - - # align t0 for each forecaster - t0s_per_forecaster = {} - for forecaster_name in forecaster_names: - forecaster_df = merged_df[merged_df["forecaster_name"] == forecaster_name] - - t0s = forecaster_df["init_timestamp"].unique() - t0s_per_forecaster[forecaster_name] = set(t0s) - - # find common t0s - common_t0s = set.intersection(*t0s_per_forecaster.values()) - - # align common t0s in merged_df - merged_df = merged_df[merged_df["init_timestamp"].isin(common_t0s)] - - return merged_df + # number of unique forecasters + num_forecasters = merged_df["forecaster_name"].nunique() + # Count number of forecasters that have each t0 time + counts = merged_df.groupby("init_timestamp")["forecaster_name"].nunique() + # Filter to just those t0s that all forecasters have + common_t0s = counts[counts == num_forecasters].index + return merged_df[merged_df["init_timestamp"].isin(common_t0s)] diff --git a/src/dataplatform/forecast/main.py b/src/dataplatform/forecast/main.py index 1700026..27ee535 100644 --- a/src/dataplatform/forecast/main.py +++ b/src/dataplatform/forecast/main.py @@ -8,7 +8,7 @@ from dp_sdk.ocf import dp from grpclib.client import Channel -from dataplatform.forecast.constant import metrics +from dataplatform.forecast.constant import metrics, observer_names from dataplatform.forecast.data import align_t0, get_all_data from dataplatform.forecast.plot import ( plot_forecast_metric_per_day, @@ -20,9 +20,6 @@ data_platform_host = os.getenv("DATA_PLATFORM_HOST", "localhost") data_platform_port = int(os.getenv("DATA_PLATFORM_PORT", "50051")) -# TODO make this dynamic -observer_names = ["pvlive_in_day", "pvlive_day_after"] - def dp_forecast_page() -> None: """Wrapper function that is not async to call the main async function.""" @@ -121,14 +118,17 @@ async def async_dp_forecast_page() -> None: "Show Uncertainty", value=True, help="On the plot below show the uncertainty bands associated with the MAE. " - "This is done by looking at " \ - "Standard Error of the Mean (SEM) of the absolute errors.", + "This is done by looking at the " + "Standard Error of the Mean (SEM) of the absolute errors. " + "We plot the 5 to 95 percentile range around the MAE.", ) else: show_sem = False - fig2, summary_df = plot_forecast_metric_vs_horizon_minutes( - merged_df, + summary_df = make_summary_data_metric_vs_horizon_minutes(merged_df) + + fig2 = plot_forecast_metric_vs_horizon_minutes( + summary_df, forecaster_names, selected_metric, scale_factor, @@ -151,13 +151,15 @@ async def async_dp_forecast_page() -> None: st.subheader("Summary Accuracy Table") # add slider to select min and max horizon mins + default_min_horizon = int(summary_df["horizon_mins"].min()) + default_max_horizon = int(summary_df["horizon_mins"].max()) min_horizon, max_horizon = st.slider( "Select Horizon Mins Range", - int(summary_df["horizon_mins"].min()), - int(summary_df["horizon_mins"].max()), + default_min_horizon, + default_max_horizon, ( - int(summary_df["horizon_mins"].min()), - int(summary_df["horizon_mins"].max()), + default_min_horizon, + default_max_horizon, ), step=30, ) @@ -210,39 +212,19 @@ def make_summary_data( (merged_df["horizon_mins"] >= min_horizon) & (merged_df["horizon_mins"] <= max_horizon) ] - summary_table_df = summary_table_df.rename( - columns={ - "effective_capacity_watts_observation": "Capacity_watts", - "value_watts": "Mean_Observed_Generation_watts", - }, - ) + capacity_watts_col = "effective_capacity_watts_observation" value_columns = [ "error", "absolute_error", - # 'absolute_error_normalized_by_generation', - "Mean_Observed_Generation_watts", - "Capacity_watts", + "value_watts", + capacity_watts_col, ] - summary_table_df = summary_table_df[["forecaster_name", *value_columns]] - summary_table_df["Capacity_watts"] = summary_table_df["Capacity_watts"].astype(float) - # group by forecaster full name a summary_table_df = summary_table_df.groupby("forecaster_name").mean() - # rename - summary_table_df = summary_table_df.rename( - columns={ - "error": "ME", - "absolute_error": "MAE", - # 'absolute_error_normalized_by_generation': 'NMAE (by observed generation)', - "Capacity_watts": "Mean Capacity", - "Mean_Observed_Generation_watts": "Mean Observed Generation", - }, - ) - # scale by units summary_table_df = summary_table_df / scale_factor summary_table_df = summary_table_df.rename( @@ -256,4 +238,64 @@ def make_summary_data( values=summary_table_df.columns.tolist(), ) + # rename + summary_table_df = summary_table_df.rename( + columns={ + "error": "ME", + "absolute_error": "MAE", + capacity_watts_col: "Mean Capacity", + "value_watts": "Mean Observed Generation", + }, + ) + return summary_table_df + + +def make_summary_data_metric_vs_horizon_minutes( + merged_df: pd.DataFrame, +) -> pd.DataFrame: + """Make summary data for forecast metric vs horizon minutes.""" + # Get the mean observed generation + mean_observed_generation = merged_df["value_watts"].mean() + + # mean absolute error by horizonMins and forecasterFullName + summary_df = ( + merged_df.groupby(["horizon_mins", "forecaster_name"]) + .agg({"absolute_error": "mean"}) + .reset_index() + ) + summary_df["std"] = ( + merged_df.groupby(["horizon_mins", "forecaster_name"]) + .agg({"absolute_error": "std"}) + .reset_index()["absolute_error"] + ) + summary_df["count"] = ( + merged_df.groupby(["horizon_mins", "forecaster_name"]) + .agg({"absolute_error": "count"}) + .reset_index()["absolute_error"] + ) + summary_df["sem"] = summary_df["std"] / (summary_df["count"] ** 0.5) + + # ME + summary_df["ME"] = ( + merged_df.groupby(["horizon_mins", "forecaster_name"]) + .agg({"error": "mean"}) + .reset_index()["error"] + ) + + # TODO more metrics + + summary_df["effective_capacity_watts_observation"] = ( + merged_df.groupby(["horizon_mins", "forecaster_name"]) + .agg({"effective_capacity_watts_observation": "mean"}) + .reset_index()["effective_capacity_watts_observation"] + ) + + # rename absolute_error to MAE + summary_df = summary_df.rename(columns={"absolute_error": "MAE"}) + summary_df["NMAE (by capacity)"] = ( + summary_df["MAE"] / summary_df["effective_capacity_watts_observation"] + ) + summary_df["NMAE (by mean observed generation)"] = summary_df["MAE"] / mean_observed_generation + + return summary_df diff --git a/src/dataplatform/forecast/plot.py b/src/dataplatform/forecast/plot.py index 42d7e5f..bd89138 100644 --- a/src/dataplatform/forecast/plot.py +++ b/src/dataplatform/forecast/plot.py @@ -161,7 +161,7 @@ def plot_forecast_time_series( def plot_forecast_metric_vs_horizon_minutes( - merged_df: pd.DataFrame, + summary_df: pd.DataFrame, forecaster_names: list[str], selected_metric: str, scale_factor: float, @@ -169,50 +169,6 @@ def plot_forecast_metric_vs_horizon_minutes( show_sem: bool, ) -> go.Figure: """Plot forecast metric vs horizon minutes.""" - # Get the mean observed generation - mean_observed_generation = merged_df["value_watts"].mean() - - # mean absolute error by horizonMins and forecasterFullName - summary_df = ( - merged_df.groupby(["horizon_mins", "forecaster_name"]) - .agg({"absolute_error": "mean"}) - .reset_index() - ) - summary_df["std"] = ( - merged_df.groupby(["horizon_mins", "forecaster_name"]) - .agg({"absolute_error": "std"}) - .reset_index()["absolute_error"] - ) - summary_df["count"] = ( - merged_df.groupby(["horizon_mins", "forecaster_name"]) - .agg({"absolute_error": "count"}) - .reset_index()["absolute_error"] - ) - summary_df["sem"] = summary_df["std"] / (summary_df["count"] ** 0.5) - - # ME - summary_df["ME"] = ( - merged_df.groupby(["horizon_mins", "forecaster_name"]) - .agg({"error": "mean"}) - .reset_index()["error"] - ) - - # TODO more metrics - - summary_df["effective_capacity_watts_observation"] = ( - merged_df.groupby(["horizon_mins", "forecaster_name"]) - .agg({"effective_capacity_watts_observation": "mean"}) - .reset_index()["effective_capacity_watts_observation"] - ) - - # rename absolute_error to MAE - summary_df = summary_df.rename(columns={"absolute_error": "MAE"}) - summary_df["NMAE (by capacity)"] = ( - summary_df["MAE"] / summary_df["effective_capacity_watts_observation"] - ) - summary_df["NMAE (by mean observed generation)"] = summary_df["MAE"] / mean_observed_generation - # summary_df["NMAE (by observed generation)"] = summary_df["absolute_error_divided_by_observed"] - fig2 = go.Figure() for i, forecaster_name in enumerate(forecaster_names): @@ -262,7 +218,7 @@ def plot_forecast_metric_vs_horizon_minutes( if selected_metric == "MAE": fig2.update_yaxes(range=[0, None]) - return fig2, summary_df + return fig2 def plot_forecast_metric_per_day( @@ -297,7 +253,6 @@ def plot_forecast_metric_per_day( go.Scatter( x=forecaster_df["date_utc"], y=forecaster_df[selected_metric] / scale_factor, - # mode="lines+markers", name=forecaster_name, line={"color": colours[i % len(colours)]}, ), diff --git a/src/dataplatform/forecast/setup.py b/src/dataplatform/forecast/setup.py index b82c8a5..e6b8e8a 100644 --- a/src/dataplatform/forecast/setup.py +++ b/src/dataplatform/forecast/setup.py @@ -105,7 +105,7 @@ async def setup_page(client: dp.DataPlatformDataServiceStub) -> dict: if selected_forecast_type == "Horizon": selected_forecast_horizon = st.sidebar.selectbox( "Select a Forecast Horizon", - list(range(0, 24 * 60, 30)), + list(range(0, 36 * 60, 30)), index=3, ) if selected_forecast_type == "t0": From 2b2da5bb00d813e786271dcb3661332f0567dfef Mon Sep 17 00:00:00 2001 From: Peter Dudfield Date: Mon, 1 Dec 2025 17:25:23 +0000 Subject: [PATCH 55/60] add option for strict forecast filtering --- src/dataplatform/forecast/main.py | 2 ++ src/dataplatform/forecast/plot.py | 12 +++++++++--- src/dataplatform/forecast/setup.py | 8 ++++++++ 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/src/dataplatform/forecast/main.py b/src/dataplatform/forecast/main.py index 27ee535..0a6125f 100644 --- a/src/dataplatform/forecast/main.py +++ b/src/dataplatform/forecast/main.py @@ -46,6 +46,7 @@ async def async_dp_forecast_page() -> None: selected_forecast_horizon = setup_page_dict["selected_forecast_horizon"] selected_t0s = setup_page_dict["selected_t0s"] units = setup_page_dict["units"] + strict_horizon_filtering = setup_page_dict["strict_horizon_filtering"] ### 1. Get all the data ### all_data_dict = await get_all_data( @@ -96,6 +97,7 @@ async def async_dp_forecast_page() -> None: selected_forecast_horizon=selected_forecast_horizon, selected_t0s=selected_t0s, show_probabilistic=show_probabilistic, + strict_horizon_filtering=strict_horizon_filtering, ) st.plotly_chart(fig) diff --git a/src/dataplatform/forecast/plot.py b/src/dataplatform/forecast/plot.py index bd89138..ebaa66c 100644 --- a/src/dataplatform/forecast/plot.py +++ b/src/dataplatform/forecast/plot.py @@ -72,6 +72,7 @@ def plot_forecast_time_series( selected_forecast_horizon: int, selected_t0s: list[datetime], show_probabilistic: bool = True, + strict_horizon_filtering: bool = False, ) -> go.Figure: """Plot forecast time series. @@ -88,9 +89,14 @@ def plot_forecast_time_series( ] elif selected_forecast_type == "Horizon": # Choose horizon forecast - current_forecast_df = all_forecast_data_df[ - all_forecast_data_df["horizon_mins"] >= selected_forecast_horizon - ] + if strict_horizon_filtering: + current_forecast_df = all_forecast_data_df[ + all_forecast_data_df["horizon_mins"] == selected_forecast_horizon + ] + else: + current_forecast_df = all_forecast_data_df[ + all_forecast_data_df["horizon_mins"] >= selected_forecast_horizon + ] current_forecast_df = current_forecast_df.loc[ current_forecast_df.groupby(["target_timestamp_utc", "forecaster_name"])[ "horizon_mins" diff --git a/src/dataplatform/forecast/setup.py b/src/dataplatform/forecast/setup.py index e6b8e8a..a46db86 100644 --- a/src/dataplatform/forecast/setup.py +++ b/src/dataplatform/forecast/setup.py @@ -101,6 +101,7 @@ async def setup_page(client: dp.DataPlatformDataServiceStub) -> dict: ) selected_forecast_horizon = None + strict_horizon_filtering = False selected_t0s = None if selected_forecast_type == "Horizon": selected_forecast_horizon = st.sidebar.selectbox( @@ -108,6 +109,12 @@ async def setup_page(client: dp.DataPlatformDataServiceStub) -> dict: list(range(0, 36 * 60, 30)), index=3, ) + strict_horizon_filtering = st.sidebar.checkbox( + "Strict Horizon Filtering", + value=False, + help="Only show forecasts that exactly match the selected horizon, " + "if not, we use any forecast horizon greater or equal than", + ) if selected_forecast_type == "t0": # make datetimes every 30 minutes from start_date to end_date all_t0s = ( @@ -140,4 +147,5 @@ async def setup_page(client: dp.DataPlatformDataServiceStub) -> dict: "selected_forecast_horizon": selected_forecast_horizon, "selected_t0s": selected_t0s, "units": units, + "strict_horizon_filtering": strict_horizon_filtering, } From b1bee0b78ba86cd409fce85546df04fba5883a25 Mon Sep 17 00:00:00 2001 From: Peter Dudfield Date: Mon, 1 Dec 2025 17:26:30 +0000 Subject: [PATCH 56/60] tidy --- src/dataplatform/forecast/data.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/dataplatform/forecast/data.py b/src/dataplatform/forecast/data.py index 33f653c..34c3df5 100644 --- a/src/dataplatform/forecast/data.py +++ b/src/dataplatform/forecast/data.py @@ -211,9 +211,6 @@ async def get_all_data( how="inner", suffixes=("_forecast", "_observation"), ) - merged_df["effective_capacity_watts_observation"] = merged_df[ - "effective_capacity_watts_observation" - ] # error and absolute error merged_df["error"] = merged_df["p50_watts"] - merged_df["value_watts"] From 50e0ae225bf8a3c0bfa556d914382e20e3d67660 Mon Sep 17 00:00:00 2001 From: Peter Dudfield Date: Mon, 1 Dec 2025 17:47:11 +0000 Subject: [PATCH 57/60] Pr commens, use agg better --- src/dataplatform/forecast/main.py | 31 +++++++++++++------------------ src/dataplatform/forecast/plot.py | 10 +++------- 2 files changed, 16 insertions(+), 25 deletions(-) diff --git a/src/dataplatform/forecast/main.py b/src/dataplatform/forecast/main.py index 0a6125f..2f642a0 100644 --- a/src/dataplatform/forecast/main.py +++ b/src/dataplatform/forecast/main.py @@ -263,26 +263,21 @@ def make_summary_data_metric_vs_horizon_minutes( # mean absolute error by horizonMins and forecasterFullName summary_df = ( merged_df.groupby(["horizon_mins", "forecaster_name"]) - .agg({"absolute_error": "mean"}) + .agg( + { + "absolute_error": ["mean", "std", "count"], + "error": "mean", + }, + ) .reset_index() ) - summary_df["std"] = ( - merged_df.groupby(["horizon_mins", "forecaster_name"]) - .agg({"absolute_error": "std"}) - .reset_index()["absolute_error"] - ) - summary_df["count"] = ( - merged_df.groupby(["horizon_mins", "forecaster_name"]) - .agg({"absolute_error": "count"}) - .reset_index()["absolute_error"] - ) - summary_df["sem"] = summary_df["std"] / (summary_df["count"] ** 0.5) - # ME - summary_df["ME"] = ( - merged_df.groupby(["horizon_mins", "forecaster_name"]) - .agg({"error": "mean"}) - .reset_index()["error"] + summary_df.columns = ["_".join(col).strip() for col in summary_df.columns.values] + summary_df.columns = [col[:-1] if col.endswith("_") else col for col in summary_df.columns] + + # calculate sem of MAE + summary_df["sem"] = summary_df["absolute_error_std"] / ( + summary_df["absolute_error_count"] ** 0.5 ) # TODO more metrics @@ -294,7 +289,7 @@ def make_summary_data_metric_vs_horizon_minutes( ) # rename absolute_error to MAE - summary_df = summary_df.rename(columns={"absolute_error": "MAE"}) + summary_df = summary_df.rename(columns={"absolute_error_mean": "MAE", "error_mean": "ME"}) summary_df["NMAE (by capacity)"] = ( summary_df["MAE"] / summary_df["effective_capacity_watts_observation"] ) diff --git a/src/dataplatform/forecast/plot.py b/src/dataplatform/forecast/plot.py index ebaa66c..ff53bc4 100644 --- a/src/dataplatform/forecast/plot.py +++ b/src/dataplatform/forecast/plot.py @@ -241,16 +241,12 @@ def plot_forecast_metric_per_day( # group by forecaster name and date daily_metrics_df = ( daily_plots_df.groupby(["date_utc", "forecaster_name"]) - .agg({"absolute_error": "mean"}) + .agg({"absolute_error": "mean", "error": "mean"}) .reset_index() - ).rename(columns={"absolute_error": "MAE"}) - # ME - daily_metrics_df["ME"] = ( - daily_plots_df.groupby(["date_utc", "forecaster_name"]) - .agg({"error": "mean"}) - .reset_index()["error"] ) + daily_metrics_df = daily_metrics_df.rename(columns={"absolute_error": "MAE", "error": "ME"}) + fig3 = go.Figure() for i, forecaster_name in enumerate(forecaster_names): name_and_version = f"{forecaster_name}" From 5232456ca0f650d1b57d52d87a7465d2617e1980 Mon Sep 17 00:00:00 2001 From: Peter Dudfield Date: Mon, 1 Dec 2025 18:09:43 +0000 Subject: [PATCH 58/60] use p10_fraction, rather than p10 --- src/dataplatform/forecast/data.py | 5 +++-- src/dataplatform/forecast/main.py | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/dataplatform/forecast/data.py b/src/dataplatform/forecast/data.py index 34c3df5..fa45943 100644 --- a/src/dataplatform/forecast/data.py +++ b/src/dataplatform/forecast/data.py @@ -41,8 +41,9 @@ async def get_forecast_data( all_data_df["p50_watts"] = all_data_df["p50_fraction"] * all_data_df["effective_capacity_watts"] for col in ["p10", "p25", "p75", "p90"]: - if col in all_data_df.columns: - all_data_df[f"{col}_watts"] = all_data_df[col] * all_data_df["effective_capacity_watts"] + col_fraction = f"{col}_fraction" + if col_fraction in all_data_df.columns: + all_data_df[f"{col}_watts"] = all_data_df[col_fraction] * all_data_df["effective_capacity_watts"] return all_data_df diff --git a/src/dataplatform/forecast/main.py b/src/dataplatform/forecast/main.py index 2f642a0..0602268 100644 --- a/src/dataplatform/forecast/main.py +++ b/src/dataplatform/forecast/main.py @@ -68,7 +68,7 @@ async def async_dp_forecast_page() -> None: in `{forecast_seconds:.2f}` seconds. \ Fetched `{len(all_observations_df)}` rows of observation data \ in `{observation_seconds:.2f}` seconds. \ - We cache data for 5 minutses to speed up repeated requests.", + We cache data for 5 minutes to speed up repeated requests.", ) # add download button From 612ebbf0b915f728d5225580e99527eee811d045 Mon Sep 17 00:00:00 2001 From: Peter Dudfield Date: Mon, 1 Dec 2025 21:05:37 +0000 Subject: [PATCH 59/60] add _fraction to column from other_statistics_fractions column --- src/dataplatform/forecast/data.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/dataplatform/forecast/data.py b/src/dataplatform/forecast/data.py index fa45943..bdf66b1 100644 --- a/src/dataplatform/forecast/data.py +++ b/src/dataplatform/forecast/data.py @@ -89,10 +89,14 @@ async def get_forecast_data_one_forecaster( if len(all_data_df) == 0: return None - # get plevels into columns + # get plevels into columns and rename them 'fraction + columns_before_expand = set(all_data_df.columns) all_data_df = all_data_df.pipe( lambda df: df.join(pd.json_normalize(df["other_statistics_fractions"])), ).drop("other_statistics_fractions", axis=1) + new_columns = set(all_data_df.columns) - columns_before_expand + if len(new_columns) > 0: + all_data_df = all_data_df.rename(columns={col: f"{col}_fraction" for col in new_columns}) # create column forecaster_name, its forecaster_fullname with version removed all_data_df["forecaster_name"] = all_data_df["forecaster_fullname"].apply( From 0726b1044dde215e57331c250ab0c5f44f1ed3f1 Mon Sep 17 00:00:00 2001 From: Peter Dudfield Date: Mon, 1 Dec 2025 21:06:07 +0000 Subject: [PATCH 60/60] lint --- src/dataplatform/forecast/data.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/dataplatform/forecast/data.py b/src/dataplatform/forecast/data.py index bdf66b1..d4c3d46 100644 --- a/src/dataplatform/forecast/data.py +++ b/src/dataplatform/forecast/data.py @@ -43,7 +43,9 @@ async def get_forecast_data( for col in ["p10", "p25", "p75", "p90"]: col_fraction = f"{col}_fraction" if col_fraction in all_data_df.columns: - all_data_df[f"{col}_watts"] = all_data_df[col_fraction] * all_data_df["effective_capacity_watts"] + all_data_df[f"{col}_watts"] = ( + all_data_df[col_fraction] * all_data_df["effective_capacity_watts"] + ) return all_data_df