From 29249f5e12f8075915a475cfffa84d9e404a256d Mon Sep 17 00:00:00 2001
From: Peter Dudfield <peter.dudfield@hotmail.com>
Date: Fri, 14 Nov 2025 17:38:35 +0000
Subject: [PATCH 01/60] first bit of work

---
 pyproject.toml               |   4 +
 src/dataplatform/forecast.py | 269 +++++++++++++++++++++++++++++++++++
 src/main.py                  |   2 +
 3 files changed, 275 insertions(+)
 create mode 100644 src/dataplatform/forecast.py

diff --git a/pyproject.toml b/pyproject.toml
index 25ef385..a81d0e5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -35,6 +35,7 @@ dependencies = [
     "torch @ https://download.pytorch.org/whl/cpu/torch-2.3.1%2Bcpu-cp312-cp312-linux_x86_64.whl ; platform_system == 'Linux' and platform_machine == 'x86_64'",
     "torch @ https://download.pytorch.org/whl/cpu/torch-2.3.1-cp312-none-macosx_11_0_arm64.whl ; platform_system == 'Darwin' and platform_machine == 'arm64'",
     "matplotlib>=3.8,<4.0",
+    "dp-sdk",
 ]
 
 [project.optional-dependencies]
@@ -66,6 +67,9 @@ dev-dependencies = [
 index-url = "https://download.pytorch.org/whl/cpu"
 extra-index-url = ["https://pypi.org/simple"]
 
+[tool.uv.sources]
+dp-sdk = { url = "https://github.com/openclimatefix/data-platform/releases/download/v0.12.0/dp_sdk-0.12.0-py3-none-any.whl" }
+
 [tool.pytest.ini_options]
 testpaths = ["tests"]
 python_files = ["test_*.py"]
diff --git a/src/dataplatform/forecast.py b/src/dataplatform/forecast.py
new file mode 100644
index 0000000..5b9f4f4
--- /dev/null
+++ b/src/dataplatform/forecast.py
@@ -0,0 +1,269 @@
+import streamlit as st
+from datetime import datetime, timedelta, timezone
+import os
+import asyncio
+from dp_sdk.ocf import dp
+import pandas as pd
+from grpclib.client import Channel
+import plotly.graph_objects as go
+
+data_platform_host = os.getenv("DATA_PLATFORM_HOST", "localhost")
+data_platform_port = int(os.getenv("DATA_PLATFORM_PORT", "50051"))
+
+# TODO make this dynamic
+observer_names = ['pvlive_in_day','pvlive_day_after']
+
+
+async def get_forecast_data(_client, location,start_date,end_date,selected_forecasters) -> pd.DataFrame:
+
+    all_data_df = []
+
+    # loop over 7 days of data
+    temp_start_date = start_date
+    while temp_start_date <= end_date:
+        temp_end_date = temp_start_date + timedelta(days=7)
+        if temp_end_date > end_date:
+            temp_end_date = end_date
+
+        # fetch data
+        stream_forecast_data_request = dp.StreamForecastDataRequest(location_uuid=location.location_uuid,
+                                    energy_source=dp.EnergySource.SOLAR,
+                                    time_window=dp.TimeWindow(
+                                        start_timestamp_utc=datetime.combine(temp_start_date, datetime.min.time()).replace(tzinfo=timezone.utc),
+                                        end_timestamp_utc=datetime.combine(temp_end_date, datetime.min.time()).replace(tzinfo=timezone.utc)
+                                    ),
+                                    forecasters=selected_forecasters)
+        stream_forecast_data_response = _client.stream_forecast_data(stream_forecast_data_request)
+
+
+        i=0
+        async for forecast_data in stream_forecast_data_response:
+
+            forecast_data_dict = forecast_data.to_dict()
+            forecast_data_dict.pop('otherStatisticsFractions')
+            data_df = pd.DataFrame(forecast_data_dict, index=[i])
+            all_data_df.append(data_df)
+
+            i += 1
+
+        temp_start_date = temp_start_date + timedelta(days=7)
+
+    all_data_df = pd.concat(all_data_df, ignore_index=True)
+
+    return all_data_df
+
+
+async def get_all_observations(client, location, start_date, end_date) -> pd.DataFrame:
+
+    all_observations_df = []
+
+    for observer_name in observer_names:
+
+        # loop over 7 days of data
+        observation_one_df = []
+        temp_start_date = start_date
+        while temp_start_date <= end_date:
+            temp_end_date = temp_start_date + timedelta(days=7)
+            if temp_end_date > end_date:
+                temp_end_date = end_date
+
+
+            get_observations_request = dp.GetObservationsAsTimeseriesRequest(observer_name=observer_name,
+                                                location_uuid=location.location_uuid,
+                                                energy_source=dp.EnergySource.SOLAR,
+                                                time_window=dp.TimeWindow(
+                                                    start_timestamp_utc=datetime.combine(temp_start_date, datetime.min.time()).replace(tzinfo=timezone.utc),
+                                                    end_timestamp_utc=datetime.combine(temp_end_date, datetime.min.time()).replace(tzinfo=timezone.utc)
+                                                ),)
+            get_observations_response = await client.get_observations_as_timeseries(get_observations_request)
+            
+            i=0
+            for value in get_observations_response.values:
+                observations_df = pd.DataFrame(value.to_dict(), index=[i])
+                observation_one_df.append(observations_df)
+                i += 1
+
+            temp_start_date = temp_start_date + timedelta(days=7)
+        
+        observation_one_df = pd.concat(observation_one_df, ignore_index=True)
+        observation_one_df = observation_one_df.sort_values(by='timestampUtc')
+        observation_one_df['observer_name'] = observer_name
+
+        all_observations_df.append(observation_one_df)
+    
+    all_observations_df = pd.concat(all_observations_df, ignore_index=True)
+
+    return all_observations_df
+
+
+def dp_forecast_page():
+    asyncio.run(async_dp_forecast_page())
+
+
+async def async_dp_forecast_page():    
+    st.title("Data Platform Forecast Page")
+    st.write("This is the forecast page from the Data Platform module. This is very much a WIP")
+
+    async with Channel(host=data_platform_host, port=data_platform_port) as channel:
+        client = dp.DataPlatformDataServiceStub(channel)
+
+        # Select Country
+        country = st.sidebar.selectbox("TODO Select a Country", ['UK', 'NL'], index=0)
+
+        # Select Location Type
+        location_types = [dp.LocationType.NATION, dp.LocationType.GSP, dp.LocationType.SITE]
+        location_type = st.sidebar.selectbox("Select a Location Type", location_types, index=0)
+        
+        # List Location
+        list_locations_request = dp.ListLocationsRequest(location_type_filter=location_type)
+        list_locations_response = await client.list_locations(list_locations_request)
+        locations = list_locations_response.locations
+        location_names = [loc.location_name for loc in locations]
+        
+        # slect locations
+        selected_location_name = st.sidebar.selectbox("Select a Location", location_names, index=0)
+        selected_location = next(loc for loc in locations if loc.location_name == selected_location_name)
+
+        # get models
+        get_forecasters_request = dp.ListForecastersRequest(latest_versions_only=True)
+        get_forecasters_response = await client.list_forecasters(get_forecasters_request)
+        forecasters = get_forecasters_response.forecasters
+        forecaster_names = [forecaster.forecaster_name for forecaster in forecasters]
+        selected_forecaster_name = st.sidebar.multiselect("Select a Forecaster", forecaster_names, default=forecaster_names[0])
+        selected_forecasters = [forecaster for forecaster in forecasters if forecaster.forecaster_name in selected_forecaster_name]
+
+        # select start and end date
+        start_date = st.sidebar.date_input("Start date:", datetime.now().date() - timedelta(days=30))
+        end_date = st.sidebar.date_input("End date:", datetime.now().date() + timedelta(days=3))
+
+        # select forecast type
+        st.sidebar.write("TODO Select Forecast Type:")
+
+        # setup page
+        st.header("Time Series Plot")
+        
+        # get generation data
+        all_observations_df = await get_all_observations(client, selected_location, start_date, end_date)
+
+        # get forcast all data
+        all_forecast_data_df = await get_forecast_data(client, selected_location, start_date, end_date, selected_forecasters)
+        st.write(f"Selected Location uuid: {selected_location.location_uuid}. \
+                 Fetched {len(all_forecast_data_df)} rows of forecast data")
+
+        # add download button
+        csv = all_forecast_data_df.to_csv().encode("utf-8")
+        st.download_button(
+            label="⬇️",
+            data=csv,
+            file_name=f"site_forecast_{selected_location.location_uuid}_{start_date}_{end_date}.csv",
+            mime="text/csv",
+        )
+
+
+        all_forecast_data_df['target_timestamp_utc'] = pd.to_datetime(all_forecast_data_df['initTimestamp']) + pd.to_timedelta(all_forecast_data_df['horizonMins'], unit='m')
+        
+        # Choose current forecast
+        # this is done by selecting the unique target_timestamp_utc with the the lowest horizonMins
+        # it should also be unique for each forecasterFullName
+        current_forecast_df = all_forecast_data_df.loc[all_forecast_data_df.groupby(['target_timestamp_utc', 'forecasterFullname'])['horizonMins'].idxmin()]
+
+        # plot the results
+        fig = go.Figure()
+        for forecaster in selected_forecasters:
+            name_and_version = f'{forecaster.forecaster_name}:{forecaster.forecaster_version}'
+            forecaster_df = current_forecast_df[current_forecast_df['forecasterFullname'] == name_and_version]
+            fig.add_trace(go.Scatter(
+                x=forecaster_df['target_timestamp_utc'],
+                y=forecaster_df['p50Fraction'],
+                mode='lines',
+                name=forecaster.forecaster_name
+            ))
+
+        for observer_name in observer_names:
+            obs_df = all_observations_df[all_observations_df['observer_name'] == observer_name]
+            fig.add_trace(go.Scatter(
+                x=obs_df['timestampUtc'],
+                y=obs_df['valueFraction'],
+                mode='lines',
+                name=observer_name
+            ))
+
+        fig.update_layout(
+            title='Current Forecast',
+            xaxis_title='Time',
+            yaxis_title='Generation [%]',
+            legend_title='Forecaster'
+        )
+
+        st.plotly_chart(fig)
+
+
+
+        st.header("Summary Accuracy Graph")
+
+        # take the foecast data, and group by horizonMins, forecasterFullName
+        # calculate mean absolute error between p50Fraction and observations valueFraction
+        all_observations_df['timestampUtc'] = pd.to_datetime(all_observations_df['timestampUtc'])
+        merged_df = pd.merge(all_forecast_data_df, all_observations_df, left_on=['target_timestamp_utc'], right_on=['timestampUtc'], how='inner', suffixes=('_forecast', '_observation'))
+        merged_df['absolute_error'] = (merged_df['p50Fraction'] - merged_df['valueFraction']).abs()
+
+        summary_df = merged_df.groupby(['horizonMins', 'forecasterFullname']).agg({'absolute_error': 'mean'}).reset_index()
+        summary_df['std'] = merged_df.groupby(['horizonMins', 'forecasterFullname']).agg({'absolute_error': 'std'}).reset_index()['absolute_error']
+        summary_df['count'] = merged_df.groupby(['horizonMins', 'forecasterFullname']).agg({'absolute_error': 'count'}).reset_index()['absolute_error']
+        summary_df['sem'] = summary_df['std'] / (summary_df['count']**0.5)
+
+        fig2 = go.Figure()
+        
+        for forecaster in selected_forecasters:
+            name_and_version = f'{forecaster.forecaster_name}:{forecaster.forecaster_version}'
+            forecaster_df = summary_df[summary_df['forecasterFullname'] == name_and_version]
+            fig2.add_trace(go.Scatter(
+                x=forecaster_df['horizonMins'],
+                y=forecaster_df['absolute_error'],
+                mode='lines+markers',
+                name=forecaster.forecaster_name
+            ))  
+
+            fig2.add_trace(
+                go.Scatter(
+                    x=forecaster_df['horizonMins'],
+                    y=forecaster_df['absolute_error'] - 1.96 * forecaster_df['sem'],
+                    mode="lines",
+                    # name="p10: " + model,
+                    # line=dict(color=get_colour_from_model_name(model), width=0),
+                    legendgroup=forecaster.forecaster_name,
+                    showlegend=False,
+                )
+            )
+
+            fig2.add_trace(
+                go.Scatter(
+                    x=forecaster_df['horizonMins'],
+                    y=forecaster_df['absolute_error'] + 1.96 * forecaster_df['sem'],
+                    mode="lines",
+                    # name="p10: " + model,
+                    # line=dict(color=get_colour_from_model_name(model), width=0),
+                    legendgroup=forecaster.forecaster_name,
+                    showlegend=False,
+                    fill="tonexty",
+                )
+            )
+
+
+        fig2.update_layout(
+            title='Mean Absolute Error by Horizon',
+            xaxis_title='Horizon (Minutes)',
+            yaxis_title='Mean Absolute Error [%]',
+            legend_title='Forecaster'
+        )
+
+        st.plotly_chart(fig2)
+
+
+        csv = summary_df.to_csv().encode("utf-8")
+        st.download_button(
+            label="⬇️",
+            data=csv,
+            file_name=f"summary_accuracy_{selected_location.location_uuid}_{start_date}_{end_date}.csv",
+            mime="text/csv",
+        )
\ No newline at end of file
diff --git a/src/main.py b/src/main.py
index c4cec02..577303f 100644
--- a/src/main.py
+++ b/src/main.py
@@ -36,6 +36,7 @@
 from cloudcasting_page import cloudcasting_page
 from adjuster import adjuster_page
 from batch_page import batch_page
+from dataplatform.forecast import dp_forecast_page
 
 st.get_option("theme.primaryColor")
 st.set_page_config(layout="wide", page_title="OCF Dashboard")
@@ -262,6 +263,7 @@ def main_page():
         st.Page(status_page, title="🚦 Status"),
         st.Page(forecast_page, title="📈 Forecast"),
         st.Page(pvsite_forecast_page, title="📉 Site Forecast"),
+        st.Page(dp_forecast_page, title="📉 DP Forecast"),
         st.Page(sites_toolbox_page, title="🛠️ Sites Toolbox"),
         st.Page(user_page, title="👥 API Users"),
         st.Page(nwp_page, title="🌤️ NWP"),

From cc505532d8b093d4f24d32ab7492582fd8c4f80f Mon Sep 17 00:00:00 2001
From: Peter Dudfield <peter.dudfield@hotmail.com>
Date: Sun, 16 Nov 2025 19:25:17 +0000
Subject: [PATCH 02/60] add to readme

---
 README.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/README.md b/README.md
index adca9b2..e79ace1 100644
--- a/README.md
+++ b/README.md
@@ -83,6 +83,8 @@ To run the app locally, you'll need to connect it to the `forecast development d
 
 OCF team members can connect to the `forecast development database` using [these Notion instructions](https://www.notion.so/openclimatefix/Connecting-to-AWS-RDS-bf35b3fbd61f40df9c974c240e042354). Add `DB_URL= (db_url from notion documents)` to a `secrets.toml` file. Follow the instructions in the Notion document to connect to the database v.
 
+To connect to the database platform, use `DATA_PLATFORM_HOST` and `DATA_PLATFORM_PORT`. 
+
 Run app:
 
 ```shell

From 9803a66d7546dac73d9a7e8d1d9bf7dc1d4058c5 Mon Sep 17 00:00:00 2001
From: Peter Dudfield <peter.dudfield@hotmail.com>
Date: Sun, 16 Nov 2025 19:49:53 +0000
Subject: [PATCH 03/60] add to todo list

---
 src/dataplatform/forecast.py | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/src/dataplatform/forecast.py b/src/dataplatform/forecast.py
index 5b9f4f4..075dc77 100644
--- a/src/dataplatform/forecast.py
+++ b/src/dataplatform/forecast.py
@@ -266,4 +266,15 @@ async def async_dp_forecast_page():
             data=csv,
             file_name=f"summary_accuracy_{selected_location.location_uuid}_{start_date}_{end_date}.csv",
             mime="text/csv",
-        )
\ No newline at end of file
+        )
+
+
+        st.header("TODO")
+
+        st.write("Metrics summary table")
+        st.write("Add more metrics")
+        st.write("Add forecast horizon options")
+        st.write("Add creation time forecast filter")
+        st.write("Daily Metrics graphs")
+        st.write("colours")
+        st.write("speed up read, use async and more caching")

From 4779ec06d9882e7eccb1262f868d0e049d5bcd75 Mon Sep 17 00:00:00 2001
From: Peter Dudfield <peter.dudfield@hotmail.com>
Date: Sun, 16 Nov 2025 21:21:32 +0000
Subject: [PATCH 04/60] add todo

---
 src/dataplatform/forecast.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/dataplatform/forecast.py b/src/dataplatform/forecast.py
index 075dc77..2603c33 100644
--- a/src/dataplatform/forecast.py
+++ b/src/dataplatform/forecast.py
@@ -271,6 +271,7 @@ async def async_dp_forecast_page():
 
         st.header("TODO")
 
+        st.write("Change from % to MW")
         st.write("Metrics summary table")
         st.write("Add more metrics")
         st.write("Add forecast horizon options")

From b159482274bf920bc7d827e34f7139834373fe69 Mon Sep 17 00:00:00 2001
From: Peter Dudfield <peter.dudfield@hotmail.com>
Date: Mon, 17 Nov 2025 11:23:05 +0000
Subject: [PATCH 05/60] stream forecast more effeciently

---
 src/dataplatform/forecast.py | 310 ++++++++++++++++++++++-------------
 1 file changed, 199 insertions(+), 111 deletions(-)

diff --git a/src/dataplatform/forecast.py b/src/dataplatform/forecast.py
index 2603c33..c4a8a06 100644
--- a/src/dataplatform/forecast.py
+++ b/src/dataplatform/forecast.py
@@ -6,16 +6,18 @@
 import pandas as pd
 from grpclib.client import Channel
 import plotly.graph_objects as go
+import betterproto
 
 data_platform_host = os.getenv("DATA_PLATFORM_HOST", "localhost")
 data_platform_port = int(os.getenv("DATA_PLATFORM_PORT", "50051"))
 
 # TODO make this dynamic
-observer_names = ['pvlive_in_day','pvlive_day_after']
+observer_names = ["pvlive_in_day", "pvlive_day_after"]
 
 
-async def get_forecast_data(_client, location,start_date,end_date,selected_forecasters) -> pd.DataFrame:
-
+async def get_forecast_data(
+    _client, location, start_date, end_date, selected_forecasters
+) -> pd.DataFrame:
     all_data_df = []
 
     # loop over 7 days of data
@@ -26,25 +28,33 @@ async def get_forecast_data(_client, location,start_date,end_date,selected_forec
             temp_end_date = end_date
 
         # fetch data
-        stream_forecast_data_request = dp.StreamForecastDataRequest(location_uuid=location.location_uuid,
-                                    energy_source=dp.EnergySource.SOLAR,
-                                    time_window=dp.TimeWindow(
-                                        start_timestamp_utc=datetime.combine(temp_start_date, datetime.min.time()).replace(tzinfo=timezone.utc),
-                                        end_timestamp_utc=datetime.combine(temp_end_date, datetime.min.time()).replace(tzinfo=timezone.utc)
-                                    ),
-                                    forecasters=selected_forecasters)
-        stream_forecast_data_response = _client.stream_forecast_data(stream_forecast_data_request)
-
-
-        i=0
-        async for forecast_data in stream_forecast_data_response:
-
-            forecast_data_dict = forecast_data.to_dict()
-            forecast_data_dict.pop('otherStatisticsFractions')
-            data_df = pd.DataFrame(forecast_data_dict, index=[i])
-            all_data_df.append(data_df)
-
-            i += 1
+        stream_forecast_data_request = dp.StreamForecastDataRequest(
+            location_uuid=location.location_uuid,
+            energy_source=dp.EnergySource.SOLAR,
+            time_window=dp.TimeWindow(
+                start_timestamp_utc=datetime.combine(
+                    temp_start_date, datetime.min.time()
+                ).replace(tzinfo=timezone.utc),
+                end_timestamp_utc=datetime.combine(
+                    temp_end_date, datetime.min.time()
+                ).replace(tzinfo=timezone.utc),
+            ),
+            forecasters=selected_forecasters,
+        )
+        forecasts = []
+        async for chunk in _client.stream_forecast_data(stream_forecast_data_request):
+            forecasts.append(chunk.to_dict(casing=betterproto.Casing.SNAKE))
+
+        if len(forecasts) > 0:
+            all_data_df.append(
+                pd.DataFrame.from_dict(forecasts)
+                .pipe(
+                    lambda df: df.join(
+                        pd.json_normalize(df["other_statistics_fractions"])
+                    )
+                )
+                .drop("other_statistics_fractions", axis=1)
+            )
 
         temp_start_date = temp_start_date + timedelta(days=7)
 
@@ -54,11 +64,9 @@ async def get_forecast_data(_client, location,start_date,end_date,selected_forec
 
 
 async def get_all_observations(client, location, start_date, end_date) -> pd.DataFrame:
-
     all_observations_df = []
 
     for observer_name in observer_names:
-
         # loop over 7 days of data
         observation_one_df = []
         temp_start_date = start_date
@@ -67,30 +75,37 @@ async def get_all_observations(client, location, start_date, end_date) -> pd.Dat
             if temp_end_date > end_date:
                 temp_end_date = end_date
 
+            get_observations_request = dp.GetObservationsAsTimeseriesRequest(
+                observer_name=observer_name,
+                location_uuid=location.location_uuid,
+                energy_source=dp.EnergySource.SOLAR,
+                time_window=dp.TimeWindow(
+                    start_timestamp_utc=datetime.combine(
+                        temp_start_date, datetime.min.time()
+                    ).replace(tzinfo=timezone.utc),
+                    end_timestamp_utc=datetime.combine(
+                        temp_end_date, datetime.min.time()
+                    ).replace(tzinfo=timezone.utc),
+                ),
+            )
+            get_observations_response = await client.get_observations_as_timeseries(
+                get_observations_request
+            )
 
-            get_observations_request = dp.GetObservationsAsTimeseriesRequest(observer_name=observer_name,
-                                                location_uuid=location.location_uuid,
-                                                energy_source=dp.EnergySource.SOLAR,
-                                                time_window=dp.TimeWindow(
-                                                    start_timestamp_utc=datetime.combine(temp_start_date, datetime.min.time()).replace(tzinfo=timezone.utc),
-                                                    end_timestamp_utc=datetime.combine(temp_end_date, datetime.min.time()).replace(tzinfo=timezone.utc)
-                                                ),)
-            get_observations_response = await client.get_observations_as_timeseries(get_observations_request)
-            
-            i=0
+            i = 0
             for value in get_observations_response.values:
                 observations_df = pd.DataFrame(value.to_dict(), index=[i])
                 observation_one_df.append(observations_df)
                 i += 1
 
             temp_start_date = temp_start_date + timedelta(days=7)
-        
+
         observation_one_df = pd.concat(observation_one_df, ignore_index=True)
-        observation_one_df = observation_one_df.sort_values(by='timestampUtc')
-        observation_one_df['observer_name'] = observer_name
+        observation_one_df = observation_one_df.sort_values(by="timestampUtc")
+        observation_one_df["observer_name"] = observer_name
 
         all_observations_df.append(observation_one_df)
-    
+
     all_observations_df = pd.concat(all_observations_df, ignore_index=True)
 
     return all_observations_df
@@ -100,55 +115,87 @@ def dp_forecast_page():
     asyncio.run(async_dp_forecast_page())
 
 
-async def async_dp_forecast_page():    
+async def async_dp_forecast_page():
     st.title("Data Platform Forecast Page")
-    st.write("This is the forecast page from the Data Platform module. This is very much a WIP")
+    st.write(
+        "This is the forecast page from the Data Platform module. This is very much a WIP"
+    )
 
     async with Channel(host=data_platform_host, port=data_platform_port) as channel:
         client = dp.DataPlatformDataServiceStub(channel)
 
         # Select Country
-        country = st.sidebar.selectbox("TODO Select a Country", ['UK', 'NL'], index=0)
+        country = st.sidebar.selectbox("TODO Select a Country", ["UK", "NL"], index=0)
 
         # Select Location Type
-        location_types = [dp.LocationType.NATION, dp.LocationType.GSP, dp.LocationType.SITE]
-        location_type = st.sidebar.selectbox("Select a Location Type", location_types, index=0)
-        
+        location_types = [
+            dp.LocationType.NATION,
+            dp.LocationType.GSP,
+            dp.LocationType.SITE,
+        ]
+        location_type = st.sidebar.selectbox(
+            "Select a Location Type", location_types, index=0
+        )
+
         # List Location
-        list_locations_request = dp.ListLocationsRequest(location_type_filter=location_type)
+        list_locations_request = dp.ListLocationsRequest(
+            location_type_filter=location_type
+        )
         list_locations_response = await client.list_locations(list_locations_request)
         locations = list_locations_response.locations
         location_names = [loc.location_name for loc in locations]
-        
+
         # slect locations
-        selected_location_name = st.sidebar.selectbox("Select a Location", location_names, index=0)
-        selected_location = next(loc for loc in locations if loc.location_name == selected_location_name)
+        selected_location_name = st.sidebar.selectbox(
+            "Select a Location", location_names, index=0
+        )
+        selected_location = next(
+            loc for loc in locations if loc.location_name == selected_location_name
+        )
 
         # get models
         get_forecasters_request = dp.ListForecastersRequest(latest_versions_only=True)
-        get_forecasters_response = await client.list_forecasters(get_forecasters_request)
+        get_forecasters_response = await client.list_forecasters(
+            get_forecasters_request
+        )
         forecasters = get_forecasters_response.forecasters
         forecaster_names = [forecaster.forecaster_name for forecaster in forecasters]
-        selected_forecaster_name = st.sidebar.multiselect("Select a Forecaster", forecaster_names, default=forecaster_names[0])
-        selected_forecasters = [forecaster for forecaster in forecasters if forecaster.forecaster_name in selected_forecaster_name]
+        selected_forecaster_name = st.sidebar.multiselect(
+            "Select a Forecaster", forecaster_names, default=forecaster_names[0]
+        )
+        selected_forecasters = [
+            forecaster
+            for forecaster in forecasters
+            if forecaster.forecaster_name in selected_forecaster_name
+        ]
 
         # select start and end date
-        start_date = st.sidebar.date_input("Start date:", datetime.now().date() - timedelta(days=30))
-        end_date = st.sidebar.date_input("End date:", datetime.now().date() + timedelta(days=3))
+        start_date = st.sidebar.date_input(
+            "Start date:", datetime.now().date() - timedelta(days=30)
+        )
+        end_date = st.sidebar.date_input(
+            "End date:", datetime.now().date() + timedelta(days=3)
+        )
 
         # select forecast type
         st.sidebar.write("TODO Select Forecast Type:")
 
         # setup page
         st.header("Time Series Plot")
-        
+
         # get generation data
-        all_observations_df = await get_all_observations(client, selected_location, start_date, end_date)
+        all_observations_df = await get_all_observations(
+            client, selected_location, start_date, end_date
+        )
 
         # get forcast all data
-        all_forecast_data_df = await get_forecast_data(client, selected_location, start_date, end_date, selected_forecasters)
-        st.write(f"Selected Location uuid: {selected_location.location_uuid}. \
-                 Fetched {len(all_forecast_data_df)} rows of forecast data")
+        all_forecast_data_df = await get_forecast_data(
+            client, selected_location, start_date, end_date, selected_forecasters
+        )
+        st.write(
+            f"Selected Location uuid: {selected_location.location_uuid}. \
+                 Fetched {len(all_forecast_data_df)} rows of forecast data"
+        )
 
         # add download button
         csv = all_forecast_data_df.to_csv().encode("utf-8")
@@ -159,75 +206,117 @@ async def async_dp_forecast_page():
             mime="text/csv",
         )
 
+        all_forecast_data_df["target_timestamp_utc"] = pd.to_datetime(
+            all_forecast_data_df["init_timestamp"]
+        ) + pd.to_timedelta(all_forecast_data_df["horizon_mins"], unit="m")
 
-        all_forecast_data_df['target_timestamp_utc'] = pd.to_datetime(all_forecast_data_df['initTimestamp']) + pd.to_timedelta(all_forecast_data_df['horizonMins'], unit='m')
-        
         # Choose current forecast
         # this is done by selecting the unique target_timestamp_utc with the the lowest horizonMins
         # it should also be unique for each forecasterFullName
-        current_forecast_df = all_forecast_data_df.loc[all_forecast_data_df.groupby(['target_timestamp_utc', 'forecasterFullname'])['horizonMins'].idxmin()]
+        current_forecast_df = all_forecast_data_df.loc[
+            all_forecast_data_df.groupby(
+                ["target_timestamp_utc", "forecaster_fullname"]
+            )["horizon_mins"].idxmin()
+        ]
 
-        # plot the results
+        # plot the results
         fig = go.Figure()
         for forecaster in selected_forecasters:
-            name_and_version = f'{forecaster.forecaster_name}:{forecaster.forecaster_version}'
-            forecaster_df = current_forecast_df[current_forecast_df['forecasterFullname'] == name_and_version]
-            fig.add_trace(go.Scatter(
-                x=forecaster_df['target_timestamp_utc'],
-                y=forecaster_df['p50Fraction'],
-                mode='lines',
-                name=forecaster.forecaster_name
-            ))
+            name_and_version = (
+                f"{forecaster.forecaster_name}:{forecaster.forecaster_version}"
+            )
+            forecaster_df = current_forecast_df[
+                current_forecast_df["forecaster_fullname"] == name_and_version
+            ]
+            fig.add_trace(
+                go.Scatter(
+                    x=forecaster_df["target_timestamp_utc"],
+                    y=forecaster_df["p50_fraction"],
+                    mode="lines",
+                    name=forecaster.forecaster_name,
+                )
+            )
 
         for observer_name in observer_names:
-            obs_df = all_observations_df[all_observations_df['observer_name'] == observer_name]
-            fig.add_trace(go.Scatter(
-                x=obs_df['timestampUtc'],
-                y=obs_df['valueFraction'],
-                mode='lines',
-                name=observer_name
-            ))
+            obs_df = all_observations_df[
+                all_observations_df["observer_name"] == observer_name
+            ]
+            fig.add_trace(
+                go.Scatter(
+                    x=obs_df["timestampUtc"],
+                    y=obs_df["valueFraction"],
+                    mode="lines",
+                    name=observer_name,
+                )
+            )
 
         fig.update_layout(
-            title='Current Forecast',
-            xaxis_title='Time',
-            yaxis_title='Generation [%]',
-            legend_title='Forecaster'
+            title="Current Forecast",
+            xaxis_title="Time",
+            yaxis_title="Generation [%]",
+            legend_title="Forecaster",
         )
 
         st.plotly_chart(fig)
 
-
-
         st.header("Summary Accuracy Graph")
 
         # take the foecast data, and group by horizonMins, forecasterFullName
         # calculate mean absolute error between p50Fraction and observations valueFraction
-        all_observations_df['timestampUtc'] = pd.to_datetime(all_observations_df['timestampUtc'])
-        merged_df = pd.merge(all_forecast_data_df, all_observations_df, left_on=['target_timestamp_utc'], right_on=['timestampUtc'], how='inner', suffixes=('_forecast', '_observation'))
-        merged_df['absolute_error'] = (merged_df['p50Fraction'] - merged_df['valueFraction']).abs()
-
-        summary_df = merged_df.groupby(['horizonMins', 'forecasterFullname']).agg({'absolute_error': 'mean'}).reset_index()
-        summary_df['std'] = merged_df.groupby(['horizonMins', 'forecasterFullname']).agg({'absolute_error': 'std'}).reset_index()['absolute_error']
-        summary_df['count'] = merged_df.groupby(['horizonMins', 'forecasterFullname']).agg({'absolute_error': 'count'}).reset_index()['absolute_error']
-        summary_df['sem'] = summary_df['std'] / (summary_df['count']**0.5)
+        all_observations_df["timestampUtc"] = pd.to_datetime(
+            all_observations_df["timestampUtc"]
+        )
+        merged_df = pd.merge(
+            all_forecast_data_df,
+            all_observations_df,
+            left_on=["target_timestamp_utc"],
+            right_on=["timestampUtc"],
+            how="inner",
+            suffixes=("_forecast", "_observation"),
+        )
+        merged_df["absolute_error"] = (
+            merged_df["p50_fraction"] - merged_df["valueFraction"]
+        ).abs()
+
+        summary_df = (
+            merged_df.groupby(["horizon_mins", "forecaster_fullname"])
+            .agg({"absolute_error": "mean"})
+            .reset_index()
+        )
+        summary_df["std"] = (
+            merged_df.groupby(["horizon_mins", "forecaster_fullname"])
+            .agg({"absolute_error": "std"})
+            .reset_index()["absolute_error"]
+        )
+        summary_df["count"] = (
+            merged_df.groupby(["horizon_mins", "forecaster_fullname"])
+            .agg({"absolute_error": "count"})
+            .reset_index()["absolute_error"]
+        )
+        summary_df["sem"] = summary_df["std"] / (summary_df["count"] ** 0.5)
 
         fig2 = go.Figure()
-        
+
         for forecaster in selected_forecasters:
-            name_and_version = f'{forecaster.forecaster_name}:{forecaster.forecaster_version}'
-            forecaster_df = summary_df[summary_df['forecasterFullname'] == name_and_version]
-            fig2.add_trace(go.Scatter(
-                x=forecaster_df['horizonMins'],
-                y=forecaster_df['absolute_error'],
-                mode='lines+markers',
-                name=forecaster.forecaster_name
-            ))  
+            name_and_version = (
+                f"{forecaster.forecaster_name}:{forecaster.forecaster_version}"
+            )
+            forecaster_df = summary_df[
+                summary_df["forecaster_fullname"] == name_and_version
+            ]
+            fig2.add_trace(
+                go.Scatter(
+                    x=forecaster_df["horizon_mins"],
+                    y=forecaster_df["absolute_error"],
+                    mode="lines+markers",
+                    name=forecaster.forecaster_name,
+                )
+            )
 
             fig2.add_trace(
                 go.Scatter(
-                    x=forecaster_df['horizonMins'],
-                    y=forecaster_df['absolute_error'] - 1.96 * forecaster_df['sem'],
+                    x=forecaster_df["horizon_mins"],
+                    y=forecaster_df["absolute_error"] - 1.96 * forecaster_df["sem"],
                     mode="lines",
                     # name="p10: " + model,
                     # line=dict(color=get_colour_from_model_name(model), width=0),
@@ -238,8 +327,8 @@ async def async_dp_forecast_page():
 
             fig2.add_trace(
                 go.Scatter(
-                    x=forecaster_df['horizonMins'],
-                    y=forecaster_df['absolute_error'] + 1.96 * forecaster_df['sem'],
+                    x=forecaster_df["horizon_mins"],
+                    y=forecaster_df["absolute_error"] + 1.96 * forecaster_df["sem"],
                     mode="lines",
                     # name="p10: " + model,
                     # line=dict(color=get_colour_from_model_name(model), width=0),
@@ -249,17 +338,15 @@ async def async_dp_forecast_page():
                 )
             )
 
-
         fig2.update_layout(
-            title='Mean Absolute Error by Horizon',
-            xaxis_title='Horizon (Minutes)',
-            yaxis_title='Mean Absolute Error [%]',
-            legend_title='Forecaster'
+            title="Mean Absolute Error by Horizon",
+            xaxis_title="Horizon (Minutes)",
+            yaxis_title="Mean Absolute Error [%]",
+            legend_title="Forecaster",
         )
 
         st.plotly_chart(fig2)
 
-
         csv = summary_df.to_csv().encode("utf-8")
         st.download_button(
             label="⬇️",
@@ -268,10 +355,11 @@ async def async_dp_forecast_page():
             mime="text/csv",
         )
 
-
         st.header("TODO")
 
         st.write("Change from % to MW")
+        st.write("Add probabilistic")
+        st.write("Align forecasts on t0")
         st.write("Metrics summary table")
         st.write("Add more metrics")
         st.write("Add forecast horizon options")

From cbb10ea459f855be4f15cabb57a8d5f25c62cc7d Mon Sep 17 00:00:00 2001
From: Peter Dudfield <peter.dudfield@hotmail.com>
Date: Mon, 17 Nov 2025 11:30:44 +0000
Subject: [PATCH 06/60] tidy up observations

---
 src/dataplatform/forecast.py | 34 +++++++++++++++++++++-------------
 1 file changed, 21 insertions(+), 13 deletions(-)

diff --git a/src/dataplatform/forecast.py b/src/dataplatform/forecast.py
index c4a8a06..86459b9 100644
--- a/src/dataplatform/forecast.py
+++ b/src/dataplatform/forecast.py
@@ -43,7 +43,11 @@ async def get_forecast_data(
         )
         forecasts = []
         async for chunk in _client.stream_forecast_data(stream_forecast_data_request):
-            forecasts.append(chunk.to_dict(casing=betterproto.Casing.SNAKE))
+            forecasts.append(
+                chunk.to_dict(
+                    include_default_values=True, casing=betterproto.Casing.SNAKE
+                )
+            )
 
         if len(forecasts) > 0:
             all_data_df.append(
@@ -92,16 +96,20 @@ async def get_all_observations(client, location, start_date, end_date) -> pd.Dat
                 get_observations_request
             )
 
-            i = 0
-            for value in get_observations_response.values:
-                observations_df = pd.DataFrame(value.to_dict(), index=[i])
-                observation_one_df.append(observations_df)
-                i += 1
+            observations = []
+            for chunk in get_observations_response.values:
+                observations.append(
+                    chunk.to_dict(
+                        include_default_values=True, casing=betterproto.Casing.SNAKE
+                    )
+                )
+
+            observation_one_df.append(pd.DataFrame.from_dict(observations))
 
             temp_start_date = temp_start_date + timedelta(days=7)
 
         observation_one_df = pd.concat(observation_one_df, ignore_index=True)
-        observation_one_df = observation_one_df.sort_values(by="timestampUtc")
+        observation_one_df = observation_one_df.sort_values(by="timestamp_utc")
         observation_one_df["observer_name"] = observer_name
 
         all_observations_df.append(observation_one_df)
@@ -243,8 +251,8 @@ async def async_dp_forecast_page():
             ]
             fig.add_trace(
                 go.Scatter(
-                    x=obs_df["timestampUtc"],
-                    y=obs_df["valueFraction"],
+                    x=obs_df["timestamp_utc"],
+                    y=obs_df["value_fraction"],
                     mode="lines",
                     name=observer_name,
                 )
@@ -263,19 +271,19 @@ async def async_dp_forecast_page():
 
         # take the foecast data, and group by horizonMins, forecasterFullName
         # calculate mean absolute error between p50Fraction and observations valueFraction
-        all_observations_df["timestampUtc"] = pd.to_datetime(
-            all_observations_df["timestampUtc"]
+        all_observations_df["timestamp_utc"] = pd.to_datetime(
+            all_observations_df["timestamp_utc"]
         )
         merged_df = pd.merge(
             all_forecast_data_df,
             all_observations_df,
             left_on=["target_timestamp_utc"],
-            right_on=["timestampUtc"],
+            right_on=["timestamp_utc"],
             how="inner",
             suffixes=("_forecast", "_observation"),
         )
         merged_df["absolute_error"] = (
-            merged_df["p50_fraction"] - merged_df["valueFraction"]
+            merged_df["p50_fraction"] - merged_df["value_fraction"]
         ).abs()
 
         summary_df = (

From 46c77921054b167ede2bc3910a7155822d231835 Mon Sep 17 00:00:00 2001
From: Peter Dudfield <peter.dudfield@hotmail.com>
Date: Mon, 17 Nov 2025 11:35:28 +0000
Subject: [PATCH 07/60] tidy up time window

---
 src/dataplatform/forecast.py | 20 ++++----------------
 1 file changed, 4 insertions(+), 16 deletions(-)

diff --git a/src/dataplatform/forecast.py b/src/dataplatform/forecast.py
index 86459b9..8b82b9f 100644
--- a/src/dataplatform/forecast.py
+++ b/src/dataplatform/forecast.py
@@ -31,14 +31,7 @@ async def get_forecast_data(
         stream_forecast_data_request = dp.StreamForecastDataRequest(
             location_uuid=location.location_uuid,
             energy_source=dp.EnergySource.SOLAR,
-            time_window=dp.TimeWindow(
-                start_timestamp_utc=datetime.combine(
-                    temp_start_date, datetime.min.time()
-                ).replace(tzinfo=timezone.utc),
-                end_timestamp_utc=datetime.combine(
-                    temp_end_date, datetime.min.time()
-                ).replace(tzinfo=timezone.utc),
-            ),
+            time_window=dp.TimeWindow(start_timestamp_utc=temp_start_date, end_timestamp_utc=temp_end_date),
             forecasters=selected_forecasters,
         )
         forecasts = []
@@ -83,14 +76,7 @@ async def get_all_observations(client, location, start_date, end_date) -> pd.Dat
                 observer_name=observer_name,
                 location_uuid=location.location_uuid,
                 energy_source=dp.EnergySource.SOLAR,
-                time_window=dp.TimeWindow(
-                    start_timestamp_utc=datetime.combine(
-                        temp_start_date, datetime.min.time()
-                    ).replace(tzinfo=timezone.utc),
-                    end_timestamp_utc=datetime.combine(
-                        temp_end_date, datetime.min.time()
-                    ).replace(tzinfo=timezone.utc),
-                ),
+                time_window=dp.TimeWindow(temp_start_date, temp_end_date),
             )
             get_observations_response = await client.get_observations_as_timeseries(
                 get_observations_request
@@ -184,6 +170,8 @@ async def async_dp_forecast_page():
         end_date = st.sidebar.date_input(
             "End date:", datetime.now().date() + timedelta(days=3)
         )
+        start_date = datetime.combine(start_date, datetime.min.time()).replace(tzinfo=timezone.utc)
+        end_date = datetime.combine(end_date, datetime.min.time()).replace(tzinfo=timezone.utc)
 
         # select forecast type
         st.sidebar.write("TODO Select Forecast Type:")

From fb83cc4fee851212762b63dbccabf173ad22d917 Mon Sep 17 00:00:00 2001
From: Peter Dudfield <peter.dudfield@hotmail.com>
Date: Mon, 17 Nov 2025 11:37:52 +0000
Subject: [PATCH 08/60] dp 0.13.1

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index a81d0e5..9e6453d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -68,7 +68,7 @@ index-url = "https://download.pytorch.org/whl/cpu"
 extra-index-url = ["https://pypi.org/simple"]
 
 [tool.uv.sources]
-dp-sdk = { url = "https://github.com/openclimatefix/data-platform/releases/download/v0.12.0/dp_sdk-0.12.0-py3-none-any.whl" }
+dp-sdk = { url = "https://github.com/openclimatefix/data-platform/releases/download/v0.13.1/dp_sdk-0.13.1-py3-none-any.whl" }
 
 [tool.pytest.ini_options]
 testpaths = ["tests"]

From 5ee30e26df34f591dadf61365832d409d5b80514 Mon Sep 17 00:00:00 2001
From: Peter Dudfield <peter.dudfield@hotmail.com>
Date: Mon, 17 Nov 2025 11:43:43 +0000
Subject: [PATCH 09/60] Us Watts not %

---
 src/dataplatform/forecast.py | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/src/dataplatform/forecast.py b/src/dataplatform/forecast.py
index 8b82b9f..c3eae11 100644
--- a/src/dataplatform/forecast.py
+++ b/src/dataplatform/forecast.py
@@ -57,6 +57,9 @@ async def get_forecast_data(
 
     all_data_df = pd.concat(all_data_df, ignore_index=True)
 
+    # get watt value
+    all_data_df['p50_watts'] = all_data_df['p50_fraction'].astype(float) * all_data_df['effective_capacity_watts'].astype(float)
+
     return all_data_df
 
 
@@ -102,6 +105,8 @@ async def get_all_observations(client, location, start_date, end_date) -> pd.Dat
 
     all_observations_df = pd.concat(all_observations_df, ignore_index=True)
 
+    all_observations_df['value_watts'] = all_observations_df['value_fraction'].astype(float) * all_observations_df['effective_capacity_watts'].astype(float)
+
     return all_observations_df
 
 
@@ -227,7 +232,7 @@ async def async_dp_forecast_page():
             fig.add_trace(
                 go.Scatter(
                     x=forecaster_df["target_timestamp_utc"],
-                    y=forecaster_df["p50_fraction"],
+                    y=forecaster_df["p50_watts"],
                     mode="lines",
                     name=forecaster.forecaster_name,
                 )
@@ -240,7 +245,7 @@ async def async_dp_forecast_page():
             fig.add_trace(
                 go.Scatter(
                     x=obs_df["timestamp_utc"],
-                    y=obs_df["value_fraction"],
+                    y=obs_df["value_watts"],
                     mode="lines",
                     name=observer_name,
                 )
@@ -249,7 +254,7 @@ async def async_dp_forecast_page():
         fig.update_layout(
             title="Current Forecast",
             xaxis_title="Time",
-            yaxis_title="Generation [%]",
+            yaxis_title="Generation [Watts]",
             legend_title="Forecaster",
         )
 
@@ -271,7 +276,7 @@ async def async_dp_forecast_page():
             suffixes=("_forecast", "_observation"),
         )
         merged_df["absolute_error"] = (
-            merged_df["p50_fraction"] - merged_df["value_fraction"]
+            merged_df["p50_watts"] - merged_df["value_watts"]
         ).abs()
 
         summary_df = (
@@ -337,7 +342,7 @@ async def async_dp_forecast_page():
         fig2.update_layout(
             title="Mean Absolute Error by Horizon",
             xaxis_title="Horizon (Minutes)",
-            yaxis_title="Mean Absolute Error [%]",
+            yaxis_title="Mean Absolute Error [watts]",
             legend_title="Forecaster",
         )
 
@@ -353,8 +358,8 @@ async def async_dp_forecast_page():
 
         st.header("TODO")
 
-        st.write("Change from % to MW")
         st.write("Add probabilistic")
+        st.write("Scale to KW/MW/GW as needed")
         st.write("Align forecasts on t0")
         st.write("Metrics summary table")
         st.write("Add more metrics")

From ce8497bff0c1dd6053839061c988100f41f68c09 Mon Sep 17 00:00:00 2001
From: Peter Dudfield <peter.dudfield@hotmail.com>
Date: Mon, 17 Nov 2025 14:50:59 +0000
Subject: [PATCH 10/60] load 30 days of data

---
 pyproject.toml               | 2 +-
 src/dataplatform/forecast.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 9e6453d..a98466b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -68,7 +68,7 @@ index-url = "https://download.pytorch.org/whl/cpu"
 extra-index-url = ["https://pypi.org/simple"]
 
 [tool.uv.sources]
-dp-sdk = { url = "https://github.com/openclimatefix/data-platform/releases/download/v0.13.1/dp_sdk-0.13.1-py3-none-any.whl" }
+dp-sdk = { url = "https://github.com/openclimatefix/data-platform/releases/download/v0.13.2/dp_sdk-0.13.2-py3-none-any.whl" }
 
 [tool.pytest.ini_options]
 testpaths = ["tests"]
diff --git a/src/dataplatform/forecast.py b/src/dataplatform/forecast.py
index c3eae11..46de551 100644
--- a/src/dataplatform/forecast.py
+++ b/src/dataplatform/forecast.py
@@ -23,7 +23,7 @@ async def get_forecast_data(
     # loop over 7 days of data
     temp_start_date = start_date
     while temp_start_date <= end_date:
-        temp_end_date = temp_start_date + timedelta(days=7)
+        temp_end_date = temp_start_date + timedelta(days=30)
         if temp_end_date > end_date:
             temp_end_date = end_date
 

From 834581ca1e8b67d467e331d7e14740f4b5dde6e8 Mon Sep 17 00:00:00 2001
From: Peter Dudfield <peter.dudfield@hotmail.com>
Date: Mon, 17 Nov 2025 16:23:02 +0000
Subject: [PATCH 11/60] add metrics table

---
 src/dataplatform/forecast.py | 155 +++++++++++++++++++++++++++++++----
 1 file changed, 140 insertions(+), 15 deletions(-)

diff --git a/src/dataplatform/forecast.py b/src/dataplatform/forecast.py
index 46de551..4835d73 100644
--- a/src/dataplatform/forecast.py
+++ b/src/dataplatform/forecast.py
@@ -15,12 +15,13 @@
 observer_names = ["pvlive_in_day", "pvlive_day_after"]
 
 
+
 async def get_forecast_data(
     _client, location, start_date, end_date, selected_forecasters
 ) -> pd.DataFrame:
     all_data_df = []
 
-    # loop over 7 days of data
+    # loop over 30 days of data
     temp_start_date = start_date
     while temp_start_date <= end_date:
         temp_end_date = temp_start_date + timedelta(days=30)
@@ -31,7 +32,9 @@ async def get_forecast_data(
         stream_forecast_data_request = dp.StreamForecastDataRequest(
             location_uuid=location.location_uuid,
             energy_source=dp.EnergySource.SOLAR,
-            time_window=dp.TimeWindow(start_timestamp_utc=temp_start_date, end_timestamp_utc=temp_end_date),
+            time_window=dp.TimeWindow(
+                start_timestamp_utc=temp_start_date, end_timestamp_utc=temp_end_date
+            ),
             forecasters=selected_forecasters,
         )
         forecasts = []
@@ -58,12 +61,15 @@ async def get_forecast_data(
     all_data_df = pd.concat(all_data_df, ignore_index=True)
 
     # get watt value
-    all_data_df['p50_watts'] = all_data_df['p50_fraction'].astype(float) * all_data_df['effective_capacity_watts'].astype(float)
+    all_data_df["p50_watts"] = all_data_df["p50_fraction"].astype(float) * all_data_df[
+        "effective_capacity_watts"
+    ].astype(float)
 
     return all_data_df
 
 
 async def get_all_observations(client, location, start_date, end_date) -> pd.DataFrame:
+
     all_observations_df = []
 
     for observer_name in observer_names:
@@ -105,7 +111,9 @@ async def get_all_observations(client, location, start_date, end_date) -> pd.Dat
 
     all_observations_df = pd.concat(all_observations_df, ignore_index=True)
 
-    all_observations_df['value_watts'] = all_observations_df['value_fraction'].astype(float) * all_observations_df['effective_capacity_watts'].astype(float)
+    all_observations_df["value_watts"] = all_observations_df["value_fraction"].astype(
+        float
+    ) * all_observations_df["effective_capacity_watts"].astype(float)
 
     return all_observations_df
 
@@ -175,8 +183,12 @@ async def async_dp_forecast_page():
         end_date = st.sidebar.date_input(
             "End date:", datetime.now().date() + timedelta(days=3)
         )
-        start_date = datetime.combine(start_date, datetime.min.time()).replace(tzinfo=timezone.utc)
-        end_date = datetime.combine(end_date, datetime.min.time()).replace(tzinfo=timezone.utc)
+        start_date = datetime.combine(start_date, datetime.min.time()).replace(
+            tzinfo=timezone.utc
+        )
+        end_date = datetime.combine(end_date, datetime.min.time()).replace(
+            tzinfo=timezone.utc
+        )
 
         # select forecast type
         st.sidebar.write("TODO Select Forecast Type:")
@@ -261,6 +273,18 @@ async def async_dp_forecast_page():
         st.plotly_chart(fig)
 
         st.header("Summary Accuracy Graph")
+        metrics = {
+            "MAE": "MAE is absolute mean error, average(abs(y-x))",
+            "ME": "ME is mean (bias) error, average((y-x))",
+            "NMAE (by capacity)": " NMAE (by capacity), average(abs(y-x)) / mean(capacity)",
+            "NMAE (by mean observed generation)": " NMAE (by mean observed generation), average(abs(y-x)) / mean(y)",
+            #    "NMAE (by observed generation)":" NAME (by observed generation)"
+        }
+        selected_metric = st.sidebar.selectbox(
+            "Select a Metrics", metrics.keys(), index=0
+        )
+
+        st.write(metrics)
 
         # take the foecast data, and group by horizonMins, forecasterFullName
         # calculate mean absolute error between p50Fraction and observations valueFraction
@@ -275,9 +299,19 @@ async def async_dp_forecast_page():
             how="inner",
             suffixes=("_forecast", "_observation"),
         )
-        merged_df["absolute_error"] = (
-            merged_df["p50_watts"] - merged_df["value_watts"]
-        ).abs()
+        merged_df["effective_capacity_watts_observation"] = merged_df[
+            "effective_capacity_watts_observation"
+        ].astype(float)
+
+        # error
+        merged_df["error"] = merged_df["p50_watts"] - merged_df["value_watts"]
+
+        # absolute error
+        merged_df["absolute_error"] = (merged_df["error"]).abs()
+
+        # absolute error, normalized by mean observed generation
+        mean_observed_generation = merged_df["value_watts"].mean()
+        # merged_df['absolute_error_normalized_by_generation'] = merged_df['absolute_error'] / merged_df['value_watts']
 
         summary_df = (
             merged_df.groupby(["horizon_mins", "forecaster_fullname"])
@@ -296,6 +330,35 @@ async def async_dp_forecast_page():
         )
         summary_df["sem"] = summary_df["std"] / (summary_df["count"] ** 0.5)
 
+        # ME
+        summary_df["ME"] = (
+            merged_df.groupby(["horizon_mins", "forecaster_fullname"])
+            .agg({"error": "mean"})
+            .reset_index()["error"]
+        )
+
+        # summary_df["absolute_error_divided_by_observed"] = (
+        #     merged_df.groupby(["horizon_mins", "forecaster_fullname"])
+        #     .agg({"absolute_error_normalized_by_generation": "mean"})
+        #     .reset_index()["absolute_error_normalized_by_generation"]
+        # )
+
+        summary_df["effective_capacity_watts_observation"] = (
+            merged_df.groupby(["horizon_mins", "forecaster_fullname"])
+            .agg({"effective_capacity_watts_observation": "mean"})
+            .reset_index()["effective_capacity_watts_observation"]
+        )
+
+        # rename absolute_error to MAE
+        summary_df = summary_df.rename(columns={"absolute_error": "MAE"})
+        summary_df["NMAE (by capacity)"] = (
+            summary_df["MAE"] / summary_df["effective_capacity_watts_observation"]
+        )
+        summary_df["NMAE (by mean observed generation)"] = (
+            summary_df["MAE"] / mean_observed_generation
+        )
+        # summary_df["NMAE (by observed generation)"] = summary_df["absolute_error_divided_by_observed"]
+
         fig2 = go.Figure()
 
         for forecaster in selected_forecasters:
@@ -308,7 +371,7 @@ async def async_dp_forecast_page():
             fig2.add_trace(
                 go.Scatter(
                     x=forecaster_df["horizon_mins"],
-                    y=forecaster_df["absolute_error"],
+                    y=forecaster_df[selected_metric],
                     mode="lines+markers",
                     name=forecaster.forecaster_name,
                 )
@@ -317,7 +380,7 @@ async def async_dp_forecast_page():
             fig2.add_trace(
                 go.Scatter(
                     x=forecaster_df["horizon_mins"],
-                    y=forecaster_df["absolute_error"] - 1.96 * forecaster_df["sem"],
+                    y=forecaster_df[selected_metric] - 1.96 * forecaster_df["sem"],
                     mode="lines",
                     # name="p10: " + model,
                     # line=dict(color=get_colour_from_model_name(model), width=0),
@@ -329,7 +392,7 @@ async def async_dp_forecast_page():
             fig2.add_trace(
                 go.Scatter(
                     x=forecaster_df["horizon_mins"],
-                    y=forecaster_df["absolute_error"] + 1.96 * forecaster_df["sem"],
+                    y=forecaster_df[selected_metric] + 1.96 * forecaster_df["sem"],
                     mode="lines",
                     # name="p10: " + model,
                     # line=dict(color=get_colour_from_model_name(model), width=0),
@@ -340,9 +403,9 @@ async def async_dp_forecast_page():
             )
 
         fig2.update_layout(
-            title="Mean Absolute Error by Horizon",
+            title=f"{selected_metric} by Horizon",
             xaxis_title="Horizon (Minutes)",
-            yaxis_title="Mean Absolute Error [watts]",
+            yaxis_title=selected_metric,
             legend_title="Forecaster",
         )
 
@@ -356,12 +419,74 @@ async def async_dp_forecast_page():
             mime="text/csv",
         )
 
+        st.header("Summary Accuracy Table")
+
+        # add slider to select min and max horizon mins
+        min_horizon, max_horizon = st.slider(
+            "Select Horizon Mins Range",
+            int(summary_df["horizon_mins"].min()),
+            int(summary_df["horizon_mins"].max()),
+            (
+                int(summary_df["horizon_mins"].min()),
+                int(summary_df["horizon_mins"].max()),
+            ),
+            step=30,
+        )
+
+        # Reduce my horizon mins
+        summary_table_df = merged_df[
+            (merged_df["horizon_mins"] >= min_horizon)
+            & (merged_df["horizon_mins"] <= max_horizon)
+        ]
+
+        summary_table_df = summary_table_df.rename(
+            columns={
+                "effective_capacity_watts_observation": "Capacity_watts",
+                "value_watts": "Mean_Observed_Generation_watts",
+            }
+        )
+
+        value_columns = [
+            "error",
+            "absolute_error",
+            #  'absolute_error_normalized_by_generation',
+            "Mean_Observed_Generation_watts",
+            "Capacity_watts",
+        ]
+
+        summary_table_df = summary_table_df[["forecaster_fullname"] + value_columns]
+
+        summary_table_df["Capacity_watts"] = summary_table_df["Capacity_watts"].astype(
+            float
+        )
+
+        # group by forecaster full name a
+        summary_table_df = summary_table_df.groupby("forecaster_fullname").mean()
+
+        # rename
+        summary_table_df = summary_table_df.rename(
+            columns={
+                "error": "ME",
+                "absolute_error": "MAE",
+                # 'absolute_error_normalized_by_generation': 'NMAE (by observed generation)',
+                "Capacity_watts": "Mean Capacity",
+                "Mean_Observed_Generation_watts": "Mean Observed Generation",
+            }
+        )
+
+        # pivot table, so forecaster_fullname is columns
+        summary_table_df = summary_table_df.pivot_table(
+            columns=summary_table_df.index,
+            values=summary_table_df.columns.tolist(),
+        )
+
+        st.dataframe(summary_table_df)
+
         st.header("TODO")
 
         st.write("Add probabilistic")
         st.write("Scale to KW/MW/GW as needed")
         st.write("Align forecasts on t0")
-        st.write("Metrics summary table")
         st.write("Add more metrics")
         st.write("Add forecast horizon options")
         st.write("Add creation time forecast filter")

From 65bf1e215b5d9e104f3429529c56646a19924f19 Mon Sep 17 00:00:00 2001
From: Peter Dudfield <peter.dudfield@hotmail.com>
Date: Mon, 17 Nov 2025 16:23:38 +0000
Subject: [PATCH 12/60] add data caching

---
 src/dataplatform/forecast.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/dataplatform/forecast.py b/src/dataplatform/forecast.py
index 4835d73..058f4c4 100644
--- a/src/dataplatform/forecast.py
+++ b/src/dataplatform/forecast.py
@@ -484,6 +484,7 @@ async def async_dp_forecast_page():
 
         st.header("TODO")
 
+        st.write("Add caching on data")
         st.write("Add probabilistic")
         st.write("Scale to KW/MW/GW as needed")
         st.write("Align forecasts on t0")

From 963833d0b4903b64184767fd508a1eda422e6ffd Mon Sep 17 00:00:00 2001
From: Peter Dudfield <peter.dudfield@hotmail.com>
Date: Mon, 17 Nov 2025 21:17:56 +0000
Subject: [PATCH 13/60] move back to 7 days

---
 src/dataplatform/forecast.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/dataplatform/forecast.py b/src/dataplatform/forecast.py
index 058f4c4..3398da8 100644
--- a/src/dataplatform/forecast.py
+++ b/src/dataplatform/forecast.py
@@ -24,7 +24,7 @@ async def get_forecast_data(
     # loop over 30 days of data
     temp_start_date = start_date
     while temp_start_date <= end_date:
-        temp_end_date = temp_start_date + timedelta(days=30)
+        temp_end_date = temp_start_date + timedelta(days=7)
         if temp_end_date > end_date:
             temp_end_date = end_date
 

From 72ca1345abad4d123f4003a5099d8b27c4bfd9a7 Mon Sep 17 00:00:00 2001
From: Peter Dudfield <peter.dudfield@hotmail.com>
Date: Tue, 18 Nov 2025 10:52:28 +0000
Subject: [PATCH 14/60] add caching

---
 pyproject.toml               |  3 +-
 src/dataplatform/forecast.py | 77 ++++++++++++++++++++++++++++--------
 2 files changed, 63 insertions(+), 17 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index a98466b..267734f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -18,7 +18,7 @@ dependencies = [
     "plotly==5.24.1",
     "psycopg2-binary==2.9.10",
     "SQLAlchemy==2.0.36",
-    "streamlit==1.46.1",
+    "streamlit==1.51.0",
     "testcontainers==4.9.0",
     "uvicorn==0.34.0",
     "geopandas==1.0.1",
@@ -36,6 +36,7 @@ dependencies = [
     "torch @ https://download.pytorch.org/whl/cpu/torch-2.3.1-cp312-none-macosx_11_0_arm64.whl ; platform_system == 'Darwin' and platform_machine == 'arm64'",
     "matplotlib>=3.8,<4.0",
     "dp-sdk",
+    "aiocache",
 ]
 
 [project.optional-dependencies]
diff --git a/src/dataplatform/forecast.py b/src/dataplatform/forecast.py
index 3398da8..fe9f3da 100644
--- a/src/dataplatform/forecast.py
+++ b/src/dataplatform/forecast.py
@@ -7,6 +7,8 @@
 from grpclib.client import Channel
 import plotly.graph_objects as go
 import betterproto
+import time
+from aiocache import Cache, cached
 
 data_platform_host = os.getenv("DATA_PLATFORM_HOST", "localhost")
 data_platform_port = int(os.getenv("DATA_PLATFORM_PORT", "50051"))
@@ -15,12 +17,48 @@
 observer_names = ["pvlive_in_day", "pvlive_day_after"]
 
 
+def key_builder_remove_client(func, *args, **kwargs):
+    """Custom key builder that ignores the client argument for caching purposes."""
+
+    key = f"{func.__name__}:"
+    for arg in args:
+        if isinstance(arg, dp.DataPlatformDataServiceStub):
+            continue
+        key += f"{arg}-"
+
+    for k, v in kwargs.items():
+        key += f"{k}={v}-"
+
+    return key
+
 
 async def get_forecast_data(
     _client, location, start_date, end_date, selected_forecasters
 ) -> pd.DataFrame:
     all_data_df = []
 
+    for forecaster in selected_forecasters:
+        forecaster_data_df = await get_forecast_data_one_forecaster(
+            _client, location, start_date, end_date, forecaster
+        )
+        all_data_df.append(forecaster_data_df)
+
+    all_data_df = pd.concat(all_data_df, ignore_index=True)
+
+    # get watt value
+    all_data_df["p50_watts"] = all_data_df["p50_fraction"].astype(float) * all_data_df[
+        "effective_capacity_watts"
+    ].astype(float)
+
+    return all_data_df
+
+
+@cached(ttl=300, cache=Cache.MEMORY, key_builder=key_builder_remove_client)
+async def get_forecast_data_one_forecaster(
+    client, location, start_date, end_date, selected_forecaster
+) -> pd.DataFrame:
+    all_data_df = []
+
     # loop over 30 days of data
     temp_start_date = start_date
     while temp_start_date <= end_date:
@@ -35,10 +73,10 @@ async def get_forecast_data(
             time_window=dp.TimeWindow(
                 start_timestamp_utc=temp_start_date, end_timestamp_utc=temp_end_date
             ),
-            forecasters=selected_forecasters,
+            forecasters=[selected_forecaster],
         )
         forecasts = []
-        async for chunk in _client.stream_forecast_data(stream_forecast_data_request):
+        async for chunk in client.stream_forecast_data(stream_forecast_data_request):
             forecasts.append(
                 chunk.to_dict(
                     include_default_values=True, casing=betterproto.Casing.SNAKE
@@ -60,16 +98,11 @@ async def get_forecast_data(
 
     all_data_df = pd.concat(all_data_df, ignore_index=True)
 
-    # get watt value
-    all_data_df["p50_watts"] = all_data_df["p50_fraction"].astype(float) * all_data_df[
-        "effective_capacity_watts"
-    ].astype(float)
-
     return all_data_df
 
 
-async def get_all_observations(client, location, start_date, end_date) -> pd.DataFrame:
-
+@cached(ttl=300, cache=Cache.MEMORY, key_builder=key_builder_remove_client)
+async def get_all_observations(_client, location, start_date, end_date) -> pd.DataFrame:
     all_observations_df = []
 
     for observer_name in observer_names:
@@ -87,7 +120,7 @@ async def get_all_observations(client, location, start_date, end_date) -> pd.Dat
                 energy_source=dp.EnergySource.SOLAR,
                 time_window=dp.TimeWindow(temp_start_date, temp_end_date),
             )
-            get_observations_response = await client.get_observations_as_timeseries(
+            get_observations_response = await _client.get_observations_as_timeseries(
                 get_observations_request
             )
 
@@ -167,8 +200,14 @@ async def async_dp_forecast_page():
         )
         forecasters = get_forecasters_response.forecasters
         forecaster_names = [forecaster.forecaster_name for forecaster in forecasters]
+        if "pvnet_v2" in forecaster_names:
+            default_index = forecaster_names.index("pvnet_v2")
+        else:
+            default_index = 0
         selected_forecaster_name = st.sidebar.multiselect(
-            "Select a Forecaster", forecaster_names, default=forecaster_names[0]
+            "Select a Forecaster",
+            forecaster_names,
+            default=forecaster_names[default_index],
         )
         selected_forecasters = [
             forecaster
@@ -193,21 +232,24 @@ async def async_dp_forecast_page():
         # select forecast type
         st.sidebar.write("TODO Select Forecast Type:")
 
-        # setup page
-        st.header("Time Series Plot")
-
         # get generation data
+        time_start = time.time()
         all_observations_df = await get_all_observations(
             client, selected_location, start_date, end_date
         )
+        observation_seconds = time.time() - time_start
 
         # get forcast all data
+        time_start = time.time()
         all_forecast_data_df = await get_forecast_data(
             client, selected_location, start_date, end_date, selected_forecasters
         )
+        forecast_seconds = time.time() - time_start
+        st.write(f"Selected Location uuid: `{selected_location.location_uuid}`.")
         st.write(
-            f"Selected Location uuid: {selected_location.location_uuid}. \
-                 Fetched {len(all_forecast_data_df)} rows of forecast data"
+            f"Fetched `{len(all_forecast_data_df)}` rows of forecast data in `{forecast_seconds:.2f}` seconds. \
+                 Fetched `{len(all_observations_df)}` rows of observation data in `{observation_seconds:.2f}` seconds. \
+                 We cache data for 5 minutses to speed up repeated requests."
         )
 
         # add download button
@@ -219,6 +261,9 @@ async def async_dp_forecast_page():
             mime="text/csv",
         )
 
+        # 1. Plot of raw forecast data
+        st.header("Time Series Plot")
+
         all_forecast_data_df["target_timestamp_utc"] = pd.to_datetime(
             all_forecast_data_df["init_timestamp"]
         ) + pd.to_timedelta(all_forecast_data_df["horizon_mins"], unit="m")

From bda0da3b021188c41e0e050854708bd1d75c4f31 Mon Sep 17 00:00:00 2001
From: Peter Dudfield <peter.dudfield@hotmail.com>
Date: Tue, 18 Nov 2025 10:54:31 +0000
Subject: [PATCH 15/60] move data to new file

---
 src/dataplatform/data.py     | 146 +++++++++++++++++++++++++++++++++++
 src/dataplatform/forecast.py | 138 +--------------------------------
 2 files changed, 148 insertions(+), 136 deletions(-)
 create mode 100644 src/dataplatform/data.py

diff --git a/src/dataplatform/data.py b/src/dataplatform/data.py
new file mode 100644
index 0000000..03c9518
--- /dev/null
+++ b/src/dataplatform/data.py
@@ -0,0 +1,146 @@
+from datetime import timedelta
+import os
+from dp_sdk.ocf import dp
+import pandas as pd
+import betterproto
+from aiocache import Cache, cached
+
+data_platform_host = os.getenv("DATA_PLATFORM_HOST", "localhost")
+data_platform_port = int(os.getenv("DATA_PLATFORM_PORT", "50051"))
+
+# TODO make this dynamic
+observer_names = ["pvlive_in_day", "pvlive_day_after"]
+
+
+def key_builder_remove_client(func, *args, **kwargs):
+    """Custom key builder that ignores the client argument for caching purposes."""
+
+    key = f"{func.__name__}:"
+    for arg in args:
+        if isinstance(arg, dp.DataPlatformDataServiceStub):
+            continue
+        key += f"{arg}-"
+
+    for k, v in kwargs.items():
+        key += f"{k}={v}-"
+
+    return key
+
+
+async def get_forecast_data(
+    _client, location, start_date, end_date, selected_forecasters
+) -> pd.DataFrame:
+    all_data_df = []
+
+    for forecaster in selected_forecasters:
+        forecaster_data_df = await get_forecast_data_one_forecaster(
+            _client, location, start_date, end_date, forecaster
+        )
+        all_data_df.append(forecaster_data_df)
+
+    all_data_df = pd.concat(all_data_df, ignore_index=True)
+
+    # get watt value
+    all_data_df["p50_watts"] = all_data_df["p50_fraction"].astype(float) * all_data_df[
+        "effective_capacity_watts"
+    ].astype(float)
+
+    return all_data_df
+
+
+@cached(ttl=300, cache=Cache.MEMORY, key_builder=key_builder_remove_client)
+async def get_forecast_data_one_forecaster(
+    client, location, start_date, end_date, selected_forecaster
+) -> pd.DataFrame:
+    all_data_df = []
+
+    # loop over 30 days of data
+    temp_start_date = start_date
+    while temp_start_date <= end_date:
+        temp_end_date = temp_start_date + timedelta(days=7)
+        if temp_end_date > end_date:
+            temp_end_date = end_date
+
+        # fetch data
+        stream_forecast_data_request = dp.StreamForecastDataRequest(
+            location_uuid=location.location_uuid,
+            energy_source=dp.EnergySource.SOLAR,
+            time_window=dp.TimeWindow(
+                start_timestamp_utc=temp_start_date, end_timestamp_utc=temp_end_date
+            ),
+            forecasters=[selected_forecaster],
+        )
+        forecasts = []
+        async for chunk in client.stream_forecast_data(stream_forecast_data_request):
+            forecasts.append(
+                chunk.to_dict(
+                    include_default_values=True, casing=betterproto.Casing.SNAKE
+                )
+            )
+
+        if len(forecasts) > 0:
+            all_data_df.append(
+                pd.DataFrame.from_dict(forecasts)
+                .pipe(
+                    lambda df: df.join(
+                        pd.json_normalize(df["other_statistics_fractions"])
+                    )
+                )
+                .drop("other_statistics_fractions", axis=1)
+            )
+
+        temp_start_date = temp_start_date + timedelta(days=7)
+
+    all_data_df = pd.concat(all_data_df, ignore_index=True)
+
+    return all_data_df
+
+
+@cached(ttl=300, cache=Cache.MEMORY, key_builder=key_builder_remove_client)
+async def get_all_observations(_client, location, start_date, end_date) -> pd.DataFrame:
+    all_observations_df = []
+
+    for observer_name in observer_names:
+        # loop over 7 days of data
+        observation_one_df = []
+        temp_start_date = start_date
+        while temp_start_date <= end_date:
+            temp_end_date = temp_start_date + timedelta(days=7)
+            if temp_end_date > end_date:
+                temp_end_date = end_date
+
+            get_observations_request = dp.GetObservationsAsTimeseriesRequest(
+                observer_name=observer_name,
+                location_uuid=location.location_uuid,
+                energy_source=dp.EnergySource.SOLAR,
+                time_window=dp.TimeWindow(temp_start_date, temp_end_date),
+            )
+            get_observations_response = await _client.get_observations_as_timeseries(
+                get_observations_request
+            )
+
+            observations = []
+            for chunk in get_observations_response.values:
+                observations.append(
+                    chunk.to_dict(
+                        include_default_values=True, casing=betterproto.Casing.SNAKE
+                    )
+                )
+
+            observation_one_df.append(pd.DataFrame.from_dict(observations))
+
+            temp_start_date = temp_start_date + timedelta(days=7)
+
+        observation_one_df = pd.concat(observation_one_df, ignore_index=True)
+        observation_one_df = observation_one_df.sort_values(by="timestamp_utc")
+        observation_one_df["observer_name"] = observer_name
+
+        all_observations_df.append(observation_one_df)
+
+    all_observations_df = pd.concat(all_observations_df, ignore_index=True)
+
+    all_observations_df["value_watts"] = all_observations_df["value_fraction"].astype(
+        float
+    ) * all_observations_df["effective_capacity_watts"].astype(float)
+
+    return all_observations_df
\ No newline at end of file
diff --git a/src/dataplatform/forecast.py b/src/dataplatform/forecast.py
index fe9f3da..c8ff27b 100644
--- a/src/dataplatform/forecast.py
+++ b/src/dataplatform/forecast.py
@@ -6,9 +6,9 @@
 import pandas as pd
 from grpclib.client import Channel
 import plotly.graph_objects as go
-import betterproto
 import time
-from aiocache import Cache, cached
+
+from src.dataplatform.data import get_all_observations, get_forecast_data
 
 data_platform_host = os.getenv("DATA_PLATFORM_HOST", "localhost")
 data_platform_port = int(os.getenv("DATA_PLATFORM_PORT", "50051"))
@@ -17,140 +17,6 @@
 observer_names = ["pvlive_in_day", "pvlive_day_after"]
 
 
-def key_builder_remove_client(func, *args, **kwargs):
-    """Custom key builder that ignores the client argument for caching purposes."""
-
-    key = f"{func.__name__}:"
-    for arg in args:
-        if isinstance(arg, dp.DataPlatformDataServiceStub):
-            continue
-        key += f"{arg}-"
-
-    for k, v in kwargs.items():
-        key += f"{k}={v}-"
-
-    return key
-
-
-async def get_forecast_data(
-    _client, location, start_date, end_date, selected_forecasters
-) -> pd.DataFrame:
-    all_data_df = []
-
-    for forecaster in selected_forecasters:
-        forecaster_data_df = await get_forecast_data_one_forecaster(
-            _client, location, start_date, end_date, forecaster
-        )
-        all_data_df.append(forecaster_data_df)
-
-    all_data_df = pd.concat(all_data_df, ignore_index=True)
-
-    # get watt value
-    all_data_df["p50_watts"] = all_data_df["p50_fraction"].astype(float) * all_data_df[
-        "effective_capacity_watts"
-    ].astype(float)
-
-    return all_data_df
-
-
-@cached(ttl=300, cache=Cache.MEMORY, key_builder=key_builder_remove_client)
-async def get_forecast_data_one_forecaster(
-    client, location, start_date, end_date, selected_forecaster
-) -> pd.DataFrame:
-    all_data_df = []
-
-    # loop over 30 days of data
-    temp_start_date = start_date
-    while temp_start_date <= end_date:
-        temp_end_date = temp_start_date + timedelta(days=7)
-        if temp_end_date > end_date:
-            temp_end_date = end_date
-
-        # fetch data
-        stream_forecast_data_request = dp.StreamForecastDataRequest(
-            location_uuid=location.location_uuid,
-            energy_source=dp.EnergySource.SOLAR,
-            time_window=dp.TimeWindow(
-                start_timestamp_utc=temp_start_date, end_timestamp_utc=temp_end_date
-            ),
-            forecasters=[selected_forecaster],
-        )
-        forecasts = []
-        async for chunk in client.stream_forecast_data(stream_forecast_data_request):
-            forecasts.append(
-                chunk.to_dict(
-                    include_default_values=True, casing=betterproto.Casing.SNAKE
-                )
-            )
-
-        if len(forecasts) > 0:
-            all_data_df.append(
-                pd.DataFrame.from_dict(forecasts)
-                .pipe(
-                    lambda df: df.join(
-                        pd.json_normalize(df["other_statistics_fractions"])
-                    )
-                )
-                .drop("other_statistics_fractions", axis=1)
-            )
-
-        temp_start_date = temp_start_date + timedelta(days=7)
-
-    all_data_df = pd.concat(all_data_df, ignore_index=True)
-
-    return all_data_df
-
-
-@cached(ttl=300, cache=Cache.MEMORY, key_builder=key_builder_remove_client)
-async def get_all_observations(_client, location, start_date, end_date) -> pd.DataFrame:
-    all_observations_df = []
-
-    for observer_name in observer_names:
-        # loop over 7 days of data
-        observation_one_df = []
-        temp_start_date = start_date
-        while temp_start_date <= end_date:
-            temp_end_date = temp_start_date + timedelta(days=7)
-            if temp_end_date > end_date:
-                temp_end_date = end_date
-
-            get_observations_request = dp.GetObservationsAsTimeseriesRequest(
-                observer_name=observer_name,
-                location_uuid=location.location_uuid,
-                energy_source=dp.EnergySource.SOLAR,
-                time_window=dp.TimeWindow(temp_start_date, temp_end_date),
-            )
-            get_observations_response = await _client.get_observations_as_timeseries(
-                get_observations_request
-            )
-
-            observations = []
-            for chunk in get_observations_response.values:
-                observations.append(
-                    chunk.to_dict(
-                        include_default_values=True, casing=betterproto.Casing.SNAKE
-                    )
-                )
-
-            observation_one_df.append(pd.DataFrame.from_dict(observations))
-
-            temp_start_date = temp_start_date + timedelta(days=7)
-
-        observation_one_df = pd.concat(observation_one_df, ignore_index=True)
-        observation_one_df = observation_one_df.sort_values(by="timestamp_utc")
-        observation_one_df["observer_name"] = observer_name
-
-        all_observations_df.append(observation_one_df)
-
-    all_observations_df = pd.concat(all_observations_df, ignore_index=True)
-
-    all_observations_df["value_watts"] = all_observations_df["value_fraction"].astype(
-        float
-    ) * all_observations_df["effective_capacity_watts"].astype(float)
-
-    return all_observations_df
-
-
 def dp_forecast_page():
     asyncio.run(async_dp_forecast_page())
 

From 75ab761c42cced5d0c1838c26ce3d3999bf3e9d7 Mon Sep 17 00:00:00 2001
From: Peter Dudfield <peter.dudfield@hotmail.com>
Date: Tue, 18 Nov 2025 11:23:51 +0000
Subject: [PATCH 16/60] add colours to main plot

---
 src/dataplatform/forecast.py | 53 ++++++++++++++++++++++++++----------
 1 file changed, 38 insertions(+), 15 deletions(-)

diff --git a/src/dataplatform/forecast.py b/src/dataplatform/forecast.py
index c8ff27b..b4825ed 100644
--- a/src/dataplatform/forecast.py
+++ b/src/dataplatform/forecast.py
@@ -16,6 +16,19 @@
 # TODO make this dynamic
 observer_names = ["pvlive_in_day", "pvlive_day_after"]
 
+colours = [
+    "#FFD480",
+    "#FF8F73",
+    "#4675C1",
+    "#65B0C9",
+    "#58B0A9",
+    "#FAA056",
+    "#306BFF",
+    "#FF4901",
+    "#B701FF",
+    "#17E58F",
+]
+
 
 def dp_forecast_page():
     asyncio.run(async_dp_forecast_page())
@@ -145,7 +158,30 @@ async def async_dp_forecast_page():
 
         # plot the results
         fig = go.Figure()
-        for forecaster in selected_forecasters:
+        for observer_name in observer_names:
+            obs_df = all_observations_df[
+                all_observations_df["observer_name"] == observer_name
+            ]
+
+            if observer_name == "pvlive_in_day":
+                # dashed white line
+                line = dict(color="white", dash="dash")
+            elif observer_name == "pvlive_day_after":
+                line = dict(color="white")
+            else:
+                line = dict()
+
+            fig.add_trace(
+                go.Scatter(
+                    x=obs_df["timestamp_utc"],
+                    y=obs_df["value_watts"],
+                    mode="lines",
+                    name=observer_name,
+                    line=line,
+                )
+            )
+
+        for i, forecaster in enumerate(selected_forecasters):
             name_and_version = (
                 f"{forecaster.forecaster_name}:{forecaster.forecaster_version}"
             )
@@ -158,19 +194,7 @@ async def async_dp_forecast_page():
                     y=forecaster_df["p50_watts"],
                     mode="lines",
                     name=forecaster.forecaster_name,
-                )
-            )
-
-        for observer_name in observer_names:
-            obs_df = all_observations_df[
-                all_observations_df["observer_name"] == observer_name
-            ]
-            fig.add_trace(
-                go.Scatter(
-                    x=obs_df["timestamp_utc"],
-                    y=obs_df["value_watts"],
-                    mode="lines",
-                    name=observer_name,
+                    line=dict(color=colours[i % len(colours)]),
                 )
             )
 
@@ -395,7 +419,6 @@ async def async_dp_forecast_page():
 
         st.header("TODO")
 
-        st.write("Add caching on data")
         st.write("Add probabilistic")
         st.write("Scale to KW/MW/GW as needed")
         st.write("Align forecasts on t0")

From dc1b91f25ffb2928dba2532e84de15d264eea85f Mon Sep 17 00:00:00 2001
From: Peter Dudfield <peter.dudfield@hotmail.com>
Date: Tue, 18 Nov 2025 11:30:32 +0000
Subject: [PATCH 17/60] update import

---
 src/dataplatform/forecast.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/dataplatform/forecast.py b/src/dataplatform/forecast.py
index b4825ed..5312412 100644
--- a/src/dataplatform/forecast.py
+++ b/src/dataplatform/forecast.py
@@ -8,7 +8,7 @@
 import plotly.graph_objects as go
 import time
 
-from src.dataplatform.data import get_all_observations, get_forecast_data
+from dataplatform.data import get_all_observations, get_forecast_data
 
 data_platform_host = os.getenv("DATA_PLATFORM_HOST", "localhost")
 data_platform_port = int(os.getenv("DATA_PLATFORM_PORT", "50051"))
@@ -354,6 +354,7 @@ async def async_dp_forecast_page():
             mime="text/csv",
         )
 
+        # 3. Summary Accuracy Table, with slider to select min and max horizon mins
         st.header("Summary Accuracy Table")
 
         # add slider to select min and max horizon mins
@@ -417,6 +418,10 @@ async def async_dp_forecast_page():
 
         st.dataframe(summary_table_df)
 
+        # 4. Daily metric plots
+        st.header("Daily Metrics Plots")
+        st.write("TODO")
+
         st.header("TODO")
 
         st.write("Add probabilistic")

From a111be7f84dc13f99894ffedbbaa52c9d042b27f Mon Sep 17 00:00:00 2001
From: Peter Dudfield <peter.dudfield@hotmail.com>
Date: Tue, 18 Nov 2025 14:32:02 +0000
Subject: [PATCH 18/60] scale by units and add colours

---
 src/dataplatform/forecast.py | 42 +++++++++++++++++++++++++-----------
 1 file changed, 29 insertions(+), 13 deletions(-)

diff --git a/src/dataplatform/forecast.py b/src/dataplatform/forecast.py
index 5312412..9e96927 100644
--- a/src/dataplatform/forecast.py
+++ b/src/dataplatform/forecast.py
@@ -111,6 +111,16 @@ async def async_dp_forecast_page():
         # select forecast type
         st.sidebar.write("TODO Select Forecast Type:")
 
+        # select units
+        if location_type == dp.LocationType.NATION:
+            default_unit_index = 3  # GW
+        else:
+            default_unit_index = 2  # MW
+        units = st.sidebar.selectbox("Select Units", ["W", "kW", "MW", "GW"], index=default_unit_index)
+        scale_factors = {"W": 1, "kW": 1e3, "MW": 1e6, "GW": 1e9}
+        scale_factor = scale_factors[units]
+
+
         # get generation data
         time_start = time.time()
         all_observations_df = await get_all_observations(
@@ -174,7 +184,7 @@ async def async_dp_forecast_page():
             fig.add_trace(
                 go.Scatter(
                     x=obs_df["timestamp_utc"],
-                    y=obs_df["value_watts"],
+                    y=obs_df["value_watts"] / scale_factor,
                     mode="lines",
                     name=observer_name,
                     line=line,
@@ -191,7 +201,7 @@ async def async_dp_forecast_page():
             fig.add_trace(
                 go.Scatter(
                     x=forecaster_df["target_timestamp_utc"],
-                    y=forecaster_df["p50_watts"],
+                    y=forecaster_df["p50_watts"] / scale_factor,
                     mode="lines",
                     name=forecaster.forecaster_name,
                     line=dict(color=colours[i % len(colours)]),
@@ -201,7 +211,7 @@ async def async_dp_forecast_page():
         fig.update_layout(
             title="Current Forecast",
             xaxis_title="Time",
-            yaxis_title="Generation [Watts]",
+            yaxis_title=f"Generation [{units}]",
             legend_title="Forecaster",
         )
 
@@ -296,7 +306,7 @@ async def async_dp_forecast_page():
 
         fig2 = go.Figure()
 
-        for forecaster in selected_forecasters:
+        for i, forecaster in enumerate(selected_forecasters):
             name_and_version = (
                 f"{forecaster.forecaster_name}:{forecaster.forecaster_version}"
             )
@@ -306,19 +316,19 @@ async def async_dp_forecast_page():
             fig2.add_trace(
                 go.Scatter(
                     x=forecaster_df["horizon_mins"],
-                    y=forecaster_df[selected_metric],
+                    y=forecaster_df[selected_metric] / scale_factor,
                     mode="lines+markers",
                     name=forecaster.forecaster_name,
+                    line=dict(color=colours[i % len(colours)]),
                 )
             )
 
             fig2.add_trace(
                 go.Scatter(
                     x=forecaster_df["horizon_mins"],
-                    y=forecaster_df[selected_metric] - 1.96 * forecaster_df["sem"],
+                    y=(forecaster_df[selected_metric] - 1.96 * forecaster_df["sem"]) / scale_factor,
                     mode="lines",
-                    # name="p10: " + model,
-                    # line=dict(color=get_colour_from_model_name(model), width=0),
+                    line=dict(color=colours[i % len(colours)], width=0),
                     legendgroup=forecaster.forecaster_name,
                     showlegend=False,
                 )
@@ -327,10 +337,9 @@ async def async_dp_forecast_page():
             fig2.add_trace(
                 go.Scatter(
                     x=forecaster_df["horizon_mins"],
-                    y=forecaster_df[selected_metric] + 1.96 * forecaster_df["sem"],
+                    y=(forecaster_df[selected_metric] + 1.96 * forecaster_df["sem"]) / scale_factor,
                     mode="lines",
-                    # name="p10: " + model,
-                    # line=dict(color=get_colour_from_model_name(model), width=0),
+                    line=dict(color=colours[i % len(colours)], width=0),
                     legendgroup=forecaster.forecaster_name,
                     showlegend=False,
                     fill="tonexty",
@@ -340,7 +349,7 @@ async def async_dp_forecast_page():
         fig2.update_layout(
             title=f"{selected_metric} by Horizon",
             xaxis_title="Horizon (Minutes)",
-            yaxis_title=selected_metric,
+            yaxis_title=f"{selected_metric} [{units}]",
             legend_title="Forecaster",
         )
 
@@ -410,12 +419,20 @@ async def async_dp_forecast_page():
             }
         )
 
+        # scale by units
+        summary_table_df = summary_table_df / scale_factor
+        summary_table_df = summary_table_df.rename(
+            {col: f'{col} [{units}]' for col in summary_table_df.columns},
+            axis=1,
+        )
+
         # pivot table, so forecaster_fullname is columns
         summary_table_df = summary_table_df.pivot_table(
             columns=summary_table_df.index,
             values=summary_table_df.columns.tolist(),
         )
 
+
         st.dataframe(summary_table_df)
 
         # 4. Daily metric plots
@@ -425,7 +442,6 @@ async def async_dp_forecast_page():
         st.header("TODO")
 
         st.write("Add probabilistic")
-        st.write("Scale to KW/MW/GW as needed")
         st.write("Align forecasts on t0")
         st.write("Add more metrics")
         st.write("Add forecast horizon options")

From 5a0442385320cf80d2651443eabfa1deb03bc439 Mon Sep 17 00:00:00 2001
From: Peter Dudfield <peter.dudfield@hotmail.com>
Date: Tue, 18 Nov 2025 14:41:42 +0000
Subject: [PATCH 19/60] add probablistic

---
 src/dataplatform/data.py     |  6 ++++++
 src/dataplatform/forecast.py | 26 +++++++++++++++++++++++++-
 2 files changed, 31 insertions(+), 1 deletion(-)

diff --git a/src/dataplatform/data.py b/src/dataplatform/data.py
index 03c9518..3bbf675 100644
--- a/src/dataplatform/data.py
+++ b/src/dataplatform/data.py
@@ -45,6 +45,12 @@ async def get_forecast_data(
         "effective_capacity_watts"
     ].astype(float)
 
+    for col in ["p10", "p25", "p75", "p90"]:
+        if col in all_data_df.columns:
+            all_data_df[f"{col}_watts"] = all_data_df[col].astype(float) * all_data_df[
+                "effective_capacity_watts"
+            ].astype(float)
+
     return all_data_df
 
 
diff --git a/src/dataplatform/forecast.py b/src/dataplatform/forecast.py
index 9e96927..19df5bc 100644
--- a/src/dataplatform/forecast.py
+++ b/src/dataplatform/forecast.py
@@ -207,6 +207,31 @@ async def async_dp_forecast_page():
                     line=dict(color=colours[i % len(colours)]),
                 )
             )
+            print(forecaster_df.columns)
+            if 'p10_watts' in forecaster_df.columns and 'p90_watts' in forecaster_df.columns:
+                fig.add_trace(
+                go.Scatter(
+                    x=forecaster_df["target_timestamp_utc"],
+                    y=forecaster_df["p10_watts"] / scale_factor,
+                    mode="lines",
+                    line=dict(color=colours[i % len(colours)], width=0),
+                    legendgroup=forecaster.forecaster_name,
+                    showlegend=False,
+                    )
+                )
+
+                fig.add_trace(
+                    go.Scatter(
+                        x=forecaster_df["target_timestamp_utc"],
+                        y=forecaster_df["p90_watts"] / scale_factor,
+                        mode="lines",
+                        line=dict(color=colours[i % len(colours)], width=0),
+                        legendgroup=forecaster.forecaster_name,
+                        showlegend=False,
+                        fill="tonexty",
+                        )
+                    )
+            
 
         fig.update_layout(
             title="Current Forecast",
@@ -441,7 +466,6 @@ async def async_dp_forecast_page():
 
         st.header("TODO")
 
-        st.write("Add probabilistic")
         st.write("Align forecasts on t0")
         st.write("Add more metrics")
         st.write("Add forecast horizon options")

From d5380641a286316c51778137bdd1bda90abce39c Mon Sep 17 00:00:00 2001
From: Peter Dudfield <peter.dudfield@hotmail.com>
Date: Tue, 18 Nov 2025 15:52:47 +0000
Subject: [PATCH 20/60] add forecast type options, add daily MAE options

---
 src/dataplatform/forecast.py | 83 ++++++++++++++++++++++++++++++------
 1 file changed, 69 insertions(+), 14 deletions(-)

diff --git a/src/dataplatform/forecast.py b/src/dataplatform/forecast.py
index 19df5bc..f2e0df1 100644
--- a/src/dataplatform/forecast.py
+++ b/src/dataplatform/forecast.py
@@ -109,7 +109,14 @@ async def async_dp_forecast_page():
         )
 
         # select forecast type
-        st.sidebar.write("TODO Select Forecast Type:")
+        selected_forecast_type = st.sidebar.selectbox(
+            "Select a Forecast Type", ['Current', "Horizon", "t0"], index=0
+        )
+
+        if selected_forecast_type == 'Horizon':
+            selected_forecast_horizon = st.sidebar.selectbox(
+                "Select a Forecast Horizon", list(range(0,2400,30)), index=3
+            )
 
         # select units
         if location_type == dp.LocationType.NATION:
@@ -157,14 +164,27 @@ async def async_dp_forecast_page():
             all_forecast_data_df["init_timestamp"]
         ) + pd.to_timedelta(all_forecast_data_df["horizon_mins"], unit="m")
 
-        # Choose current forecast
-        # this is done by selecting the unique target_timestamp_utc with the the lowest horizonMins
-        # it should also be unique for each forecasterFullName
-        current_forecast_df = all_forecast_data_df.loc[
-            all_forecast_data_df.groupby(
-                ["target_timestamp_utc", "forecaster_fullname"]
-            )["horizon_mins"].idxmin()
-        ]
+        if selected_forecast_type == 'Current':
+            # Choose current forecast
+            # this is done by selecting the unique target_timestamp_utc with the the lowest horizonMins
+            # it should also be unique for each forecasterFullName
+            current_forecast_df = all_forecast_data_df.loc[
+                all_forecast_data_df.groupby(
+                    ["target_timestamp_utc", "forecaster_fullname"]
+                )["horizon_mins"].idxmin()
+            ]
+        elif selected_forecast_type == 'Horizon':
+            # Choose horizon forecast
+            current_forecast_df = all_forecast_data_df[
+                all_forecast_data_df["horizon_mins"] >= selected_forecast_horizon
+            ]
+            current_forecast_df = current_forecast_df.loc[
+                current_forecast_df.groupby(
+                    ["target_timestamp_utc", "forecaster_fullname"]
+                )["horizon_mins"].idxmin()
+            ]
+        else:
+            pass
 
         # plot the results
         fig = go.Figure()
@@ -462,14 +482,49 @@ async def async_dp_forecast_page():
 
         # 4. Daily metric plots
         st.header("Daily Metrics Plots")
-        st.write("TODO")
+        st.write("Plotted below are the daily MAE for each forecaster. This is for all forecast horizons.")
+        daily_plots_df = merged_df
+        daily_plots_df["date_utc"] = daily_plots_df["timestamp_utc"].dt.date
+
+        # group by forecaster name and date
+        daily_metrics_df = (
+            daily_plots_df.groupby(["date_utc", "forecaster_fullname"])
+            .agg({"absolute_error": "mean"})
+            .reset_index()
+        )
+
+        fig3 = go.Figure()
+        for i, forecaster in enumerate(selected_forecasters):
+            name_and_version = (
+                f"{forecaster.forecaster_name}:{forecaster.forecaster_version}"
+            )
+            forecaster_df = daily_metrics_df[
+                daily_metrics_df["forecaster_fullname"] == name_and_version
+            ]
+            fig3.add_trace(
+                go.Scatter(
+                    x=forecaster_df["date_utc"],
+                    y=forecaster_df["absolute_error"] / scale_factor,
+                    # mode="lines+markers",
+                    name=forecaster.forecaster_name,
+                    line=dict(color=colours[i % len(colours)]),
+                )
+            )
+
+        fig3.update_layout(
+            title=f"Daily MAE",
+            xaxis_title="Date",
+            yaxis_title=f"MAE [{units}]",
+            legend_title="Forecaster",
+        )
+
+        st.plotly_chart(fig3)
+
+
 
         st.header("TODO")
 
         st.write("Align forecasts on t0")
         st.write("Add more metrics")
-        st.write("Add forecast horizon options")
-        st.write("Add creation time forecast filter")
-        st.write("Daily Metrics graphs")
-        st.write("colours")
+        st.write("Add creation time / t0 forecast filter")
         st.write("speed up read, use async and more caching")

From 7791489bf9c971ca9ade5c75f9b97ca21c99b221 Mon Sep 17 00:00:00 2001
From: Peter Dudfield <peter.dudfield@hotmail.com>
Date: Tue, 18 Nov 2025 16:24:09 +0000
Subject: [PATCH 21/60] add daily ME

---
 src/dataplatform/forecast.py | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/src/dataplatform/forecast.py b/src/dataplatform/forecast.py
index f2e0df1..e566faa 100644
--- a/src/dataplatform/forecast.py
+++ b/src/dataplatform/forecast.py
@@ -266,8 +266,8 @@ async def async_dp_forecast_page():
         metrics = {
             "MAE": "MAE is absolute mean error, average(abs(y-x))",
             "ME": "ME is mean (bias) error, average((y-x))",
-            "NMAE (by capacity)": " NMAE (by capacity), average(abs(y-x)) / mean(capacity)",
-            "NMAE (by mean observed generation)": " NMAE (by mean observed generation), average(abs(y-x)) / mean(y)",
+            # "TODO NMAE (by capacity)": " NMAE (by capacity), average(abs(y-x)) / mean(capacity)",
+            # "TODO NMAE (by mean observed generation)": " NMAE (by mean observed generation), average(abs(y-x)) / mean(y)",
             #    "NMAE (by observed generation)":" NAME (by observed generation)"
         }
         selected_metric = st.sidebar.selectbox(
@@ -491,6 +491,12 @@ async def async_dp_forecast_page():
             daily_plots_df.groupby(["date_utc", "forecaster_fullname"])
             .agg({"absolute_error": "mean"})
             .reset_index()
+        ).rename(columns={"absolute_error": "MAE"})
+         # ME
+        daily_metrics_df["ME"] = (
+            daily_plots_df.groupby(["date_utc", "forecaster_fullname"])
+            .agg({"error": "mean"})
+            .reset_index()["error"]
         )
 
         fig3 = go.Figure()
@@ -504,7 +510,7 @@ async def async_dp_forecast_page():
             fig3.add_trace(
                 go.Scatter(
                     x=forecaster_df["date_utc"],
-                    y=forecaster_df["absolute_error"] / scale_factor,
+                    y=forecaster_df[selected_metric] / scale_factor,
                     # mode="lines+markers",
                     name=forecaster.forecaster_name,
                     line=dict(color=colours[i % len(colours)]),
@@ -512,9 +518,9 @@ async def async_dp_forecast_page():
             )
 
         fig3.update_layout(
-            title=f"Daily MAE",
+            title=f"Daily {selected_metric}",
             xaxis_title="Date",
-            yaxis_title=f"MAE [{units}]",
+            yaxis_title=f"{selected_metric} [{units}]",
             legend_title="Forecaster",
         )
 

From 095d49586c4a9e61448f224506ce19cef93633cf Mon Sep 17 00:00:00 2001
From: Peter Dudfield <peter.dudfield@hotmail.com>
Date: Wed, 19 Nov 2025 12:16:10 +0000
Subject: [PATCH 22/60] add two todos

---
 src/dataplatform/forecast.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/dataplatform/forecast.py b/src/dataplatform/forecast.py
index e566faa..277f6ce 100644
--- a/src/dataplatform/forecast.py
+++ b/src/dataplatform/forecast.py
@@ -529,7 +529,9 @@ async def async_dp_forecast_page():
 
 
         st.header("TODO")
-
+        
+        st.write("Deal with new forecast versions")
+        st.write("Turn off probability when line clicked off")
         st.write("Align forecasts on t0")
         st.write("Add more metrics")
         st.write("Add creation time / t0 forecast filter")

From d10135f79cd43205d8d6fdf94dd70c82f391c45c Mon Sep 17 00:00:00 2001
From: Peter Dudfield <peter.dudfield@hotmail.com>
Date: Wed, 19 Nov 2025 13:52:24 +0000
Subject: [PATCH 23/60] solve for different forecast versions

---
 src/dataplatform/data.py     |  5 +++
 src/dataplatform/forecast.py | 62 ++++++++++++++++--------------------
 2 files changed, 33 insertions(+), 34 deletions(-)

diff --git a/src/dataplatform/data.py b/src/dataplatform/data.py
index 3bbf675..f9dcf6a 100644
--- a/src/dataplatform/data.py
+++ b/src/dataplatform/data.py
@@ -99,6 +99,11 @@ async def get_forecast_data_one_forecaster(
 
     all_data_df = pd.concat(all_data_df, ignore_index=True)
 
+    # create column forecaster_name, its forecaster_fullname with version removed
+    all_data_df["forecaster_name"] = all_data_df["forecaster_fullname"].apply(
+        lambda x: x.rsplit(":", 1)[0]  # split from right, max 1 split
+    )
+
     return all_data_df
 
 
diff --git a/src/dataplatform/forecast.py b/src/dataplatform/forecast.py
index 277f6ce..da07190 100644
--- a/src/dataplatform/forecast.py
+++ b/src/dataplatform/forecast.py
@@ -73,12 +73,12 @@ async def async_dp_forecast_page():
         )
 
         # get models
-        get_forecasters_request = dp.ListForecastersRequest(latest_versions_only=True)
+        get_forecasters_request = dp.ListForecastersRequest()
         get_forecasters_response = await client.list_forecasters(
             get_forecasters_request
         )
         forecasters = get_forecasters_response.forecasters
-        forecaster_names = [forecaster.forecaster_name for forecaster in forecasters]
+        forecaster_names = sorted(list(set([forecaster.forecaster_name for forecaster in forecasters])))
         if "pvnet_v2" in forecaster_names:
             default_index = forecaster_names.index("pvnet_v2")
         else:
@@ -170,7 +170,7 @@ async def async_dp_forecast_page():
             # it should also be unique for each forecasterFullName
             current_forecast_df = all_forecast_data_df.loc[
                 all_forecast_data_df.groupby(
-                    ["target_timestamp_utc", "forecaster_fullname"]
+                    ["target_timestamp_utc", "forecaster_name"]
                 )["horizon_mins"].idxmin()
             ]
         elif selected_forecast_type == 'Horizon':
@@ -180,7 +180,7 @@ async def async_dp_forecast_page():
             ]
             current_forecast_df = current_forecast_df.loc[
                 current_forecast_df.groupby(
-                    ["target_timestamp_utc", "forecaster_fullname"]
+                    ["target_timestamp_utc", "forecaster_name"]
                 )["horizon_mins"].idxmin()
             ]
         else:
@@ -211,23 +211,19 @@ async def async_dp_forecast_page():
                 )
             )
 
-        for i, forecaster in enumerate(selected_forecasters):
-            name_and_version = (
-                f"{forecaster.forecaster_name}:{forecaster.forecaster_version}"
-            )
+        for i, forecaster_name in enumerate(forecaster_names):
             forecaster_df = current_forecast_df[
-                current_forecast_df["forecaster_fullname"] == name_and_version
+                current_forecast_df["forecaster_name"] == forecaster_name
             ]
             fig.add_trace(
                 go.Scatter(
                     x=forecaster_df["target_timestamp_utc"],
                     y=forecaster_df["p50_watts"] / scale_factor,
                     mode="lines",
-                    name=forecaster.forecaster_name,
+                    name=forecaster_name,
                     line=dict(color=colours[i % len(colours)]),
                 )
             )
-            print(forecaster_df.columns)
             if 'p10_watts' in forecaster_df.columns and 'p90_watts' in forecaster_df.columns:
                 fig.add_trace(
                 go.Scatter(
@@ -235,7 +231,7 @@ async def async_dp_forecast_page():
                     y=forecaster_df["p10_watts"] / scale_factor,
                     mode="lines",
                     line=dict(color=colours[i % len(colours)], width=0),
-                    legendgroup=forecaster.forecaster_name,
+                    legendgroup=forecaster_name,
                     showlegend=False,
                     )
                 )
@@ -246,7 +242,7 @@ async def async_dp_forecast_page():
                         y=forecaster_df["p90_watts"] / scale_factor,
                         mode="lines",
                         line=dict(color=colours[i % len(colours)], width=0),
-                        legendgroup=forecaster.forecaster_name,
+                        legendgroup=forecaster_name,
                         showlegend=False,
                         fill="tonexty",
                         )
@@ -304,17 +300,17 @@ async def async_dp_forecast_page():
         # merged_df['absolute_error_normalized_by_generation'] = merged_df['absolute_error'] / merged_df['value_watts']
 
         summary_df = (
-            merged_df.groupby(["horizon_mins", "forecaster_fullname"])
+            merged_df.groupby(["horizon_mins", "forecaster_name"])
             .agg({"absolute_error": "mean"})
             .reset_index()
         )
         summary_df["std"] = (
-            merged_df.groupby(["horizon_mins", "forecaster_fullname"])
+            merged_df.groupby(["horizon_mins", "forecaster_name"])
             .agg({"absolute_error": "std"})
             .reset_index()["absolute_error"]
         )
         summary_df["count"] = (
-            merged_df.groupby(["horizon_mins", "forecaster_fullname"])
+            merged_df.groupby(["horizon_mins", "forecaster_name"])
             .agg({"absolute_error": "count"})
             .reset_index()["absolute_error"]
         )
@@ -322,19 +318,19 @@ async def async_dp_forecast_page():
 
         # ME
         summary_df["ME"] = (
-            merged_df.groupby(["horizon_mins", "forecaster_fullname"])
+            merged_df.groupby(["horizon_mins", "forecaster_name"])
             .agg({"error": "mean"})
             .reset_index()["error"]
         )
 
         # summary_df["absolute_error_divided_by_observed"] = (
-        #     merged_df.groupby(["horizon_mins", "forecaster_fullname"])
+        #     merged_df.groupby(["horizon_mins", "forecaster_name"])
         #     .agg({"absolute_error_normalized_by_generation": "mean"})
         #     .reset_index()["absolute_error_normalized_by_generation"]
         # )
 
         summary_df["effective_capacity_watts_observation"] = (
-            merged_df.groupby(["horizon_mins", "forecaster_fullname"])
+            merged_df.groupby(["horizon_mins", "forecaster_name"])
             .agg({"effective_capacity_watts_observation": "mean"})
             .reset_index()["effective_capacity_watts_observation"]
         )
@@ -351,19 +347,17 @@ async def async_dp_forecast_page():
 
         fig2 = go.Figure()
 
-        for i, forecaster in enumerate(selected_forecasters):
-            name_and_version = (
-                f"{forecaster.forecaster_name}:{forecaster.forecaster_version}"
-            )
+        for i, forecaster_name in enumerate(forecaster_names):
+  
             forecaster_df = summary_df[
-                summary_df["forecaster_fullname"] == name_and_version
+                summary_df["forecaster_name"] == forecaster_name
             ]
             fig2.add_trace(
                 go.Scatter(
                     x=forecaster_df["horizon_mins"],
                     y=forecaster_df[selected_metric] / scale_factor,
                     mode="lines+markers",
-                    name=forecaster.forecaster_name,
+                    name=forecaster_name,
                     line=dict(color=colours[i % len(colours)]),
                 )
             )
@@ -374,7 +368,7 @@ async def async_dp_forecast_page():
                     y=(forecaster_df[selected_metric] - 1.96 * forecaster_df["sem"]) / scale_factor,
                     mode="lines",
                     line=dict(color=colours[i % len(colours)], width=0),
-                    legendgroup=forecaster.forecaster_name,
+                    legendgroup=forecaster_name,
                     showlegend=False,
                 )
             )
@@ -385,7 +379,7 @@ async def async_dp_forecast_page():
                     y=(forecaster_df[selected_metric] + 1.96 * forecaster_df["sem"]) / scale_factor,
                     mode="lines",
                     line=dict(color=colours[i % len(colours)], width=0),
-                    legendgroup=forecaster.forecaster_name,
+                    legendgroup=forecaster_name,
                     showlegend=False,
                     fill="tonexty",
                 )
@@ -444,14 +438,14 @@ async def async_dp_forecast_page():
             "Capacity_watts",
         ]
 
-        summary_table_df = summary_table_df[["forecaster_fullname"] + value_columns]
+        summary_table_df = summary_table_df[["forecaster_name"] + value_columns]
 
         summary_table_df["Capacity_watts"] = summary_table_df["Capacity_watts"].astype(
             float
         )
 
         # group by forecaster full name a
-        summary_table_df = summary_table_df.groupby("forecaster_fullname").mean()
+        summary_table_df = summary_table_df.groupby("forecaster_name").mean()
 
         # rename
         summary_table_df = summary_table_df.rename(
@@ -471,7 +465,7 @@ async def async_dp_forecast_page():
             axis=1,
         )
 
-        # pivot table, so forecaster_fullname is columns
+        # pivot table, so forecaster_name is columns
         summary_table_df = summary_table_df.pivot_table(
             columns=summary_table_df.index,
             values=summary_table_df.columns.tolist(),
@@ -488,13 +482,13 @@ async def async_dp_forecast_page():
 
         # group by forecaster name and date
         daily_metrics_df = (
-            daily_plots_df.groupby(["date_utc", "forecaster_fullname"])
+            daily_plots_df.groupby(["date_utc", "forecaster_name"])
             .agg({"absolute_error": "mean"})
             .reset_index()
         ).rename(columns={"absolute_error": "MAE"})
          # ME
         daily_metrics_df["ME"] = (
-            daily_plots_df.groupby(["date_utc", "forecaster_fullname"])
+            daily_plots_df.groupby(["date_utc", "forecaster_name"])
             .agg({"error": "mean"})
             .reset_index()["error"]
         )
@@ -502,10 +496,10 @@ async def async_dp_forecast_page():
         fig3 = go.Figure()
         for i, forecaster in enumerate(selected_forecasters):
             name_and_version = (
-                f"{forecaster.forecaster_name}:{forecaster.forecaster_version}"
+                f"{forecaster.forecaster_name}"
             )
             forecaster_df = daily_metrics_df[
-                daily_metrics_df["forecaster_fullname"] == name_and_version
+                daily_metrics_df["forecaster_name"] == name_and_version
             ]
             fig3.add_trace(
                 go.Scatter(

From 2a57b66d70cedcc45259854743e60735192bc1bb Mon Sep 17 00:00:00 2001
From: Peter Dudfield <peter.dudfield@hotmail.com>
Date: Wed, 19 Nov 2025 13:52:33 +0000
Subject: [PATCH 24/60] remove from todo

---
 src/dataplatform/forecast.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/dataplatform/forecast.py b/src/dataplatform/forecast.py
index da07190..4cfe39f 100644
--- a/src/dataplatform/forecast.py
+++ b/src/dataplatform/forecast.py
@@ -524,7 +524,6 @@ async def async_dp_forecast_page():
 
         st.header("TODO")
         
-        st.write("Deal with new forecast versions")
         st.write("Turn off probability when line clicked off")
         st.write("Align forecasts on t0")
         st.write("Add more metrics")

From 85dd00a0f74e7c6834a51a5fd76ea5686417083d Mon Sep 17 00:00:00 2001
From: Peter Dudfield <peter.dudfield@hotmail.com>
Date: Thu, 20 Nov 2025 08:02:33 +0000
Subject: [PATCH 25/60] add legendgroup

---
 src/dataplatform/forecast.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/dataplatform/forecast.py b/src/dataplatform/forecast.py
index 4cfe39f..cea9258 100644
--- a/src/dataplatform/forecast.py
+++ b/src/dataplatform/forecast.py
@@ -222,6 +222,8 @@ async def async_dp_forecast_page():
                     mode="lines",
                     name=forecaster_name,
                     line=dict(color=colours[i % len(colours)]),
+                    legendgroup=forecaster_name,
+                        
                 )
             )
             if 'p10_watts' in forecaster_df.columns and 'p90_watts' in forecaster_df.columns:
@@ -524,7 +526,7 @@ async def async_dp_forecast_page():
 
         st.header("TODO")
         
-        st.write("Turn off probability when line clicked off")
+        st.write("Make metrics based on pvlive_data_after")
         st.write("Align forecasts on t0")
         st.write("Add more metrics")
         st.write("Add creation time / t0 forecast filter")

From 6d0b280211d5cb9c7d8f2eba56780c4d05ddb124 Mon Sep 17 00:00:00 2001
From: Peter Dudfield <peter.dudfield@hotmail.com>
Date: Thu, 20 Nov 2025 11:41:01 +0000
Subject: [PATCH 26/60] filter on pvlive_day_after

---
 src/dataplatform/forecast.py | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/src/dataplatform/forecast.py b/src/dataplatform/forecast.py
index cea9258..6ee80b2 100644
--- a/src/dataplatform/forecast.py
+++ b/src/dataplatform/forecast.py
@@ -260,6 +260,7 @@ async def async_dp_forecast_page():
 
         st.plotly_chart(fig)
 
+        # 2. Summary Accuracy Graph
         st.header("Summary Accuracy Graph")
         metrics = {
             "MAE": "MAE is absolute mean error, average(abs(y-x))",
@@ -274,6 +275,12 @@ async def async_dp_forecast_page():
 
         st.write(metrics)
 
+        # If the observation data includes pvlive_day_after and pvlive_in_day, then lets just take pvlive_day_after
+        if "pvlive_day_after" in all_observations_df["observer_name"].values:
+            all_observations_df = all_observations_df[
+                all_observations_df["observer_name"] == "pvlive_day_after"
+            ]
+  
         # take the foecast data, and group by horizonMins, forecasterFullName
         # calculate mean absolute error between p50Fraction and observations valueFraction
         all_observations_df["timestamp_utc"] = pd.to_datetime(
@@ -291,16 +298,14 @@ async def async_dp_forecast_page():
             "effective_capacity_watts_observation"
         ].astype(float)
 
-        # error
+        # error and absolute error
         merged_df["error"] = merged_df["p50_watts"] - merged_df["value_watts"]
+        merged_df["absolute_error"] = merged_df["error"].abs()
 
-        # absolute error
-        merged_df["absolute_error"] = (merged_df["error"]).abs()
-
-        # absolute error, normalized by mean observed generation
+        # Get the mean observed generation
         mean_observed_generation = merged_df["value_watts"].mean()
-        # merged_df['absolute_error_normalized_by_generation'] = merged_df['absolute_error'] / merged_df['value_watts']
-
+        
+        # mean absolute error by horizonMins and forecasterFullName
         summary_df = (
             merged_df.groupby(["horizon_mins", "forecaster_name"])
             .agg({"absolute_error": "mean"})
@@ -325,11 +330,7 @@ async def async_dp_forecast_page():
             .reset_index()["error"]
         )
 
-        # summary_df["absolute_error_divided_by_observed"] = (
-        #     merged_df.groupby(["horizon_mins", "forecaster_name"])
-        #     .agg({"absolute_error_normalized_by_generation": "mean"})
-        #     .reset_index()["absolute_error_normalized_by_generation"]
-        # )
+        # TODO more metrics
 
         summary_df["effective_capacity_watts_observation"] = (
             merged_df.groupby(["horizon_mins", "forecaster_name"])
@@ -526,7 +527,6 @@ async def async_dp_forecast_page():
 
         st.header("TODO")
         
-        st.write("Make metrics based on pvlive_data_after")
         st.write("Align forecasts on t0")
         st.write("Add more metrics")
         st.write("Add creation time / t0 forecast filter")

From fa6af554b2ad048398a50cad5dc57770f75dd769 Mon Sep 17 00:00:00 2001
From: Peter Dudfield <peter.dudfield@hotmail.com>
Date: Thu, 20 Nov 2025 16:39:07 +0000
Subject: [PATCH 27/60] add todo bug not releasing cache

---
 src/dataplatform/data.py     | 8 ++++----
 src/dataplatform/forecast.py | 1 +
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/dataplatform/data.py b/src/dataplatform/data.py
index f9dcf6a..f44da71 100644
--- a/src/dataplatform/data.py
+++ b/src/dataplatform/data.py
@@ -28,13 +28,13 @@ def key_builder_remove_client(func, *args, **kwargs):
 
 
 async def get_forecast_data(
-    _client, location, start_date, end_date, selected_forecasters
+    client, location, start_date, end_date, selected_forecasters
 ) -> pd.DataFrame:
     all_data_df = []
 
     for forecaster in selected_forecasters:
         forecaster_data_df = await get_forecast_data_one_forecaster(
-            _client, location, start_date, end_date, forecaster
+            client, location, start_date, end_date, forecaster
         )
         all_data_df.append(forecaster_data_df)
 
@@ -108,7 +108,7 @@ async def get_forecast_data_one_forecaster(
 
 
 @cached(ttl=300, cache=Cache.MEMORY, key_builder=key_builder_remove_client)
-async def get_all_observations(_client, location, start_date, end_date) -> pd.DataFrame:
+async def get_all_observations(client, location, start_date, end_date) -> pd.DataFrame:
     all_observations_df = []
 
     for observer_name in observer_names:
@@ -126,7 +126,7 @@ async def get_all_observations(_client, location, start_date, end_date) -> pd.Da
                 energy_source=dp.EnergySource.SOLAR,
                 time_window=dp.TimeWindow(temp_start_date, temp_end_date),
             )
-            get_observations_response = await _client.get_observations_as_timeseries(
+            get_observations_response = await client.get_observations_as_timeseries(
                 get_observations_request
             )
 
diff --git a/src/dataplatform/forecast.py b/src/dataplatform/forecast.py
index 6ee80b2..368c639 100644
--- a/src/dataplatform/forecast.py
+++ b/src/dataplatform/forecast.py
@@ -527,6 +527,7 @@ async def async_dp_forecast_page():
 
         st.header("TODO")
         
+        st.write("Bug: cache not releasing")
         st.write("Align forecasts on t0")
         st.write("Add more metrics")
         st.write("Add creation time / t0 forecast filter")

From d3bdaf6084a1ba795a047aca55723e882de1188a Mon Sep 17 00:00:00 2001
From: Peter Dudfield <peter.dudfield@hotmail.com>
Date: Thu, 20 Nov 2025 20:51:43 +0000
Subject: [PATCH 28/60] refactor into multiple files

---
 src/dataplatform/forecast.py            | 534 ------------------------
 src/dataplatform/forecast/constanst.py  |  20 +
 src/dataplatform/{ => forecast}/data.py | 117 ++++--
 src/dataplatform/forecast/main.py       | 213 ++++++++++
 src/dataplatform/forecast/plot.py       | 243 +++++++++++
 src/dataplatform/forecast/setup.py      |  95 +++++
 6 files changed, 664 insertions(+), 558 deletions(-)
 delete mode 100644 src/dataplatform/forecast.py
 create mode 100644 src/dataplatform/forecast/constanst.py
 rename src/dataplatform/{ => forecast}/data.py (58%)
 create mode 100644 src/dataplatform/forecast/main.py
 create mode 100644 src/dataplatform/forecast/plot.py
 create mode 100644 src/dataplatform/forecast/setup.py

diff --git a/src/dataplatform/forecast.py b/src/dataplatform/forecast.py
deleted file mode 100644
index 368c639..0000000
--- a/src/dataplatform/forecast.py
+++ /dev/null
@@ -1,534 +0,0 @@
-import streamlit as st
-from datetime import datetime, timedelta, timezone
-import os
-import asyncio
-from dp_sdk.ocf import dp
-import pandas as pd
-from grpclib.client import Channel
-import plotly.graph_objects as go
-import time
-
-from dataplatform.data import get_all_observations, get_forecast_data
-
-data_platform_host = os.getenv("DATA_PLATFORM_HOST", "localhost")
-data_platform_port = int(os.getenv("DATA_PLATFORM_PORT", "50051"))
-
-# TODO make this dynamic
-observer_names = ["pvlive_in_day", "pvlive_day_after"]
-
-colours = [
-    "#FFD480",
-    "#FF8F73",
-    "#4675C1",
-    "#65B0C9",
-    "#58B0A9",
-    "#FAA056",
-    "#306BFF",
-    "#FF4901",
-    "#B701FF",
-    "#17E58F",
-]
-
-
-def dp_forecast_page():
-    asyncio.run(async_dp_forecast_page())
-
-
-async def async_dp_forecast_page():
-    st.title("Data Platform Forecast Page")
-    st.write(
-        "This is the forecast page from the Data Platform module. This is very much a WIP"
-    )
-
-    async with Channel(host=data_platform_host, port=data_platform_port) as channel:
-        client = dp.DataPlatformDataServiceStub(channel)
-
-        # Select Country
-        country = st.sidebar.selectbox("TODO Select a Country", ["UK", "NL"], index=0)
-
-        # Select Location Type
-        location_types = [
-            dp.LocationType.NATION,
-            dp.LocationType.GSP,
-            dp.LocationType.SITE,
-        ]
-        location_type = st.sidebar.selectbox(
-            "Select a Location Type", location_types, index=0
-        )
-
-        # List Location
-        list_locations_request = dp.ListLocationsRequest(
-            location_type_filter=location_type
-        )
-        list_locations_response = await client.list_locations(list_locations_request)
-        locations = list_locations_response.locations
-        location_names = [loc.location_name for loc in locations]
-
-        # slect locations
-        selected_location_name = st.sidebar.selectbox(
-            "Select a Location", location_names, index=0
-        )
-        selected_location = next(
-            loc for loc in locations if loc.location_name == selected_location_name
-        )
-
-        # get models
-        get_forecasters_request = dp.ListForecastersRequest()
-        get_forecasters_response = await client.list_forecasters(
-            get_forecasters_request
-        )
-        forecasters = get_forecasters_response.forecasters
-        forecaster_names = sorted(list(set([forecaster.forecaster_name for forecaster in forecasters])))
-        if "pvnet_v2" in forecaster_names:
-            default_index = forecaster_names.index("pvnet_v2")
-        else:
-            default_index = 0
-        selected_forecaster_name = st.sidebar.multiselect(
-            "Select a Forecaster",
-            forecaster_names,
-            default=forecaster_names[default_index],
-        )
-        selected_forecasters = [
-            forecaster
-            for forecaster in forecasters
-            if forecaster.forecaster_name in selected_forecaster_name
-        ]
-
-        # select start and end date
-        start_date = st.sidebar.date_input(
-            "Start date:", datetime.now().date() - timedelta(days=30)
-        )
-        end_date = st.sidebar.date_input(
-            "End date:", datetime.now().date() + timedelta(days=3)
-        )
-        start_date = datetime.combine(start_date, datetime.min.time()).replace(
-            tzinfo=timezone.utc
-        )
-        end_date = datetime.combine(end_date, datetime.min.time()).replace(
-            tzinfo=timezone.utc
-        )
-
-        # select forecast type
-        selected_forecast_type = st.sidebar.selectbox(
-            "Select a Forecast Type", ['Current', "Horizon", "t0"], index=0
-        )
-
-        if selected_forecast_type == 'Horizon':
-            selected_forecast_horizon = st.sidebar.selectbox(
-                "Select a Forecast Horizon", list(range(0,2400,30)), index=3
-            )
-
-        # select units
-        if location_type == dp.LocationType.NATION:
-            default_unit_index = 3  # GW
-        else:
-            default_unit_index = 2  # MW
-        units = st.sidebar.selectbox("Select Units", ["W", "kW", "MW", "GW"], index=default_unit_index)
-        scale_factors = {"W": 1, "kW": 1e3, "MW": 1e6, "GW": 1e9}
-        scale_factor = scale_factors[units]
-
-
-        # get generation data
-        time_start = time.time()
-        all_observations_df = await get_all_observations(
-            client, selected_location, start_date, end_date
-        )
-        observation_seconds = time.time() - time_start
-
-        # get forcast all data
-        time_start = time.time()
-        all_forecast_data_df = await get_forecast_data(
-            client, selected_location, start_date, end_date, selected_forecasters
-        )
-        forecast_seconds = time.time() - time_start
-        st.write(f"Selected Location uuid: `{selected_location.location_uuid}`.")
-        st.write(
-            f"Fetched `{len(all_forecast_data_df)}` rows of forecast data in `{forecast_seconds:.2f}` seconds. \
-                 Fetched `{len(all_observations_df)}` rows of observation data in `{observation_seconds:.2f}` seconds. \
-                 We cache data for 5 minutses to speed up repeated requests."
-        )
-
-        # add download button
-        csv = all_forecast_data_df.to_csv().encode("utf-8")
-        st.download_button(
-            label="⬇️",
-            data=csv,
-            file_name=f"site_forecast_{selected_location.location_uuid}_{start_date}_{end_date}.csv",
-            mime="text/csv",
-        )
-
-        # 1. Plot of raw forecast data
-        st.header("Time Series Plot")
-
-        all_forecast_data_df["target_timestamp_utc"] = pd.to_datetime(
-            all_forecast_data_df["init_timestamp"]
-        ) + pd.to_timedelta(all_forecast_data_df["horizon_mins"], unit="m")
-
-        if selected_forecast_type == 'Current':
-            # Choose current forecast
-            # this is done by selecting the unique target_timestamp_utc with the the lowest horizonMins
-            # it should also be unique for each forecasterFullName
-            current_forecast_df = all_forecast_data_df.loc[
-                all_forecast_data_df.groupby(
-                    ["target_timestamp_utc", "forecaster_name"]
-                )["horizon_mins"].idxmin()
-            ]
-        elif selected_forecast_type == 'Horizon':
-            # Choose horizon forecast
-            current_forecast_df = all_forecast_data_df[
-                all_forecast_data_df["horizon_mins"] >= selected_forecast_horizon
-            ]
-            current_forecast_df = current_forecast_df.loc[
-                current_forecast_df.groupby(
-                    ["target_timestamp_utc", "forecaster_name"]
-                )["horizon_mins"].idxmin()
-            ]
-        else:
-            pass
-
-        # plot the results
-        fig = go.Figure()
-        for observer_name in observer_names:
-            obs_df = all_observations_df[
-                all_observations_df["observer_name"] == observer_name
-            ]
-
-            if observer_name == "pvlive_in_day":
-                # dashed white line
-                line = dict(color="white", dash="dash")
-            elif observer_name == "pvlive_day_after":
-                line = dict(color="white")
-            else:
-                line = dict()
-
-            fig.add_trace(
-                go.Scatter(
-                    x=obs_df["timestamp_utc"],
-                    y=obs_df["value_watts"] / scale_factor,
-                    mode="lines",
-                    name=observer_name,
-                    line=line,
-                )
-            )
-
-        for i, forecaster_name in enumerate(forecaster_names):
-            forecaster_df = current_forecast_df[
-                current_forecast_df["forecaster_name"] == forecaster_name
-            ]
-            fig.add_trace(
-                go.Scatter(
-                    x=forecaster_df["target_timestamp_utc"],
-                    y=forecaster_df["p50_watts"] / scale_factor,
-                    mode="lines",
-                    name=forecaster_name,
-                    line=dict(color=colours[i % len(colours)]),
-                    legendgroup=forecaster_name,
-                        
-                )
-            )
-            if 'p10_watts' in forecaster_df.columns and 'p90_watts' in forecaster_df.columns:
-                fig.add_trace(
-                go.Scatter(
-                    x=forecaster_df["target_timestamp_utc"],
-                    y=forecaster_df["p10_watts"] / scale_factor,
-                    mode="lines",
-                    line=dict(color=colours[i % len(colours)], width=0),
-                    legendgroup=forecaster_name,
-                    showlegend=False,
-                    )
-                )
-
-                fig.add_trace(
-                    go.Scatter(
-                        x=forecaster_df["target_timestamp_utc"],
-                        y=forecaster_df["p90_watts"] / scale_factor,
-                        mode="lines",
-                        line=dict(color=colours[i % len(colours)], width=0),
-                        legendgroup=forecaster_name,
-                        showlegend=False,
-                        fill="tonexty",
-                        )
-                    )
-            
-
-        fig.update_layout(
-            title="Current Forecast",
-            xaxis_title="Time",
-            yaxis_title=f"Generation [{units}]",
-            legend_title="Forecaster",
-        )
-
-        st.plotly_chart(fig)
-
-        # 2. Summary Accuracy Graph
-        st.header("Summary Accuracy Graph")
-        metrics = {
-            "MAE": "MAE is absolute mean error, average(abs(y-x))",
-            "ME": "ME is mean (bias) error, average((y-x))",
-            # "TODO NMAE (by capacity)": " NMAE (by capacity), average(abs(y-x)) / mean(capacity)",
-            # "TODO NMAE (by mean observed generation)": " NMAE (by mean observed generation), average(abs(y-x)) / mean(y)",
-            #    "NMAE (by observed generation)":" NAME (by observed generation)"
-        }
-        selected_metric = st.sidebar.selectbox(
-            "Select a Metrics", metrics.keys(), index=0
-        )
-
-        st.write(metrics)
-
-        # If the observation data includes pvlive_day_after and pvlive_in_day, then lets just take pvlive_day_after
-        if "pvlive_day_after" in all_observations_df["observer_name"].values:
-            all_observations_df = all_observations_df[
-                all_observations_df["observer_name"] == "pvlive_day_after"
-            ]
-  
-        # take the foecast data, and group by horizonMins, forecasterFullName
-        # calculate mean absolute error between p50Fraction and observations valueFraction
-        all_observations_df["timestamp_utc"] = pd.to_datetime(
-            all_observations_df["timestamp_utc"]
-        )
-        merged_df = pd.merge(
-            all_forecast_data_df,
-            all_observations_df,
-            left_on=["target_timestamp_utc"],
-            right_on=["timestamp_utc"],
-            how="inner",
-            suffixes=("_forecast", "_observation"),
-        )
-        merged_df["effective_capacity_watts_observation"] = merged_df[
-            "effective_capacity_watts_observation"
-        ].astype(float)
-
-        # error and absolute error
-        merged_df["error"] = merged_df["p50_watts"] - merged_df["value_watts"]
-        merged_df["absolute_error"] = merged_df["error"].abs()
-
-        # Get the mean observed generation
-        mean_observed_generation = merged_df["value_watts"].mean()
-        
-        # mean absolute error by horizonMins and forecasterFullName
-        summary_df = (
-            merged_df.groupby(["horizon_mins", "forecaster_name"])
-            .agg({"absolute_error": "mean"})
-            .reset_index()
-        )
-        summary_df["std"] = (
-            merged_df.groupby(["horizon_mins", "forecaster_name"])
-            .agg({"absolute_error": "std"})
-            .reset_index()["absolute_error"]
-        )
-        summary_df["count"] = (
-            merged_df.groupby(["horizon_mins", "forecaster_name"])
-            .agg({"absolute_error": "count"})
-            .reset_index()["absolute_error"]
-        )
-        summary_df["sem"] = summary_df["std"] / (summary_df["count"] ** 0.5)
-
-        # ME
-        summary_df["ME"] = (
-            merged_df.groupby(["horizon_mins", "forecaster_name"])
-            .agg({"error": "mean"})
-            .reset_index()["error"]
-        )
-
-        # TODO more metrics
-
-        summary_df["effective_capacity_watts_observation"] = (
-            merged_df.groupby(["horizon_mins", "forecaster_name"])
-            .agg({"effective_capacity_watts_observation": "mean"})
-            .reset_index()["effective_capacity_watts_observation"]
-        )
-
-        # rename absolute_error to MAE
-        summary_df = summary_df.rename(columns={"absolute_error": "MAE"})
-        summary_df["NMAE (by capacity)"] = (
-            summary_df["MAE"] / summary_df["effective_capacity_watts_observation"]
-        )
-        summary_df["NMAE (by mean observed generation)"] = (
-            summary_df["MAE"] / mean_observed_generation
-        )
-        # summary_df["NMAE (by observed generation)"] = summary_df["absolute_error_divided_by_observed"]
-
-        fig2 = go.Figure()
-
-        for i, forecaster_name in enumerate(forecaster_names):
-  
-            forecaster_df = summary_df[
-                summary_df["forecaster_name"] == forecaster_name
-            ]
-            fig2.add_trace(
-                go.Scatter(
-                    x=forecaster_df["horizon_mins"],
-                    y=forecaster_df[selected_metric] / scale_factor,
-                    mode="lines+markers",
-                    name=forecaster_name,
-                    line=dict(color=colours[i % len(colours)]),
-                )
-            )
-
-            fig2.add_trace(
-                go.Scatter(
-                    x=forecaster_df["horizon_mins"],
-                    y=(forecaster_df[selected_metric] - 1.96 * forecaster_df["sem"]) / scale_factor,
-                    mode="lines",
-                    line=dict(color=colours[i % len(colours)], width=0),
-                    legendgroup=forecaster_name,
-                    showlegend=False,
-                )
-            )
-
-            fig2.add_trace(
-                go.Scatter(
-                    x=forecaster_df["horizon_mins"],
-                    y=(forecaster_df[selected_metric] + 1.96 * forecaster_df["sem"]) / scale_factor,
-                    mode="lines",
-                    line=dict(color=colours[i % len(colours)], width=0),
-                    legendgroup=forecaster_name,
-                    showlegend=False,
-                    fill="tonexty",
-                )
-            )
-
-        fig2.update_layout(
-            title=f"{selected_metric} by Horizon",
-            xaxis_title="Horizon (Minutes)",
-            yaxis_title=f"{selected_metric} [{units}]",
-            legend_title="Forecaster",
-        )
-
-        st.plotly_chart(fig2)
-
-        csv = summary_df.to_csv().encode("utf-8")
-        st.download_button(
-            label="⬇️",
-            data=csv,
-            file_name=f"summary_accuracy_{selected_location.location_uuid}_{start_date}_{end_date}.csv",
-            mime="text/csv",
-        )
-
-        # 3. Summary Accuracy Table, with slider to select min and max horizon mins
-        st.header("Summary Accuracy Table")
-
-        # add slider to select min and max horizon mins
-        min_horizon, max_horizon = st.slider(
-            "Select Horizon Mins Range",
-            int(summary_df["horizon_mins"].min()),
-            int(summary_df["horizon_mins"].max()),
-            (
-                int(summary_df["horizon_mins"].min()),
-                int(summary_df["horizon_mins"].max()),
-            ),
-            step=30,
-        )
-
-        # Reduce my horizon mins
-        summary_table_df = merged_df[
-            (merged_df["horizon_mins"] >= min_horizon)
-            & (merged_df["horizon_mins"] <= max_horizon)
-        ]
-
-        summary_table_df = summary_table_df.rename(
-            columns={
-                "effective_capacity_watts_observation": "Capacity_watts",
-                "value_watts": "Mean_Observed_Generation_watts",
-            }
-        )
-
-        value_columns = [
-            "error",
-            "absolute_error",
-            #  'absolute_error_normalized_by_generation',
-            "Mean_Observed_Generation_watts",
-            "Capacity_watts",
-        ]
-
-        summary_table_df = summary_table_df[["forecaster_name"] + value_columns]
-
-        summary_table_df["Capacity_watts"] = summary_table_df["Capacity_watts"].astype(
-            float
-        )
-
-        # group by forecaster full name a
-        summary_table_df = summary_table_df.groupby("forecaster_name").mean()
-
-        # rename
-        summary_table_df = summary_table_df.rename(
-            columns={
-                "error": "ME",
-                "absolute_error": "MAE",
-                # 'absolute_error_normalized_by_generation': 'NMAE (by observed generation)',
-                "Capacity_watts": "Mean Capacity",
-                "Mean_Observed_Generation_watts": "Mean Observed Generation",
-            }
-        )
-
-        # scale by units
-        summary_table_df = summary_table_df / scale_factor
-        summary_table_df = summary_table_df.rename(
-            {col: f'{col} [{units}]' for col in summary_table_df.columns},
-            axis=1,
-        )
-
-        # pivot table, so forecaster_name is columns
-        summary_table_df = summary_table_df.pivot_table(
-            columns=summary_table_df.index,
-            values=summary_table_df.columns.tolist(),
-        )
-
-
-        st.dataframe(summary_table_df)
-
-        # 4. Daily metric plots
-        st.header("Daily Metrics Plots")
-        st.write("Plotted below are the daily MAE for each forecaster. This is for all forecast horizons.")
-        daily_plots_df = merged_df
-        daily_plots_df["date_utc"] = daily_plots_df["timestamp_utc"].dt.date
-
-        # group by forecaster name and date
-        daily_metrics_df = (
-            daily_plots_df.groupby(["date_utc", "forecaster_name"])
-            .agg({"absolute_error": "mean"})
-            .reset_index()
-        ).rename(columns={"absolute_error": "MAE"})
-         # ME
-        daily_metrics_df["ME"] = (
-            daily_plots_df.groupby(["date_utc", "forecaster_name"])
-            .agg({"error": "mean"})
-            .reset_index()["error"]
-        )
-
-        fig3 = go.Figure()
-        for i, forecaster in enumerate(selected_forecasters):
-            name_and_version = (
-                f"{forecaster.forecaster_name}"
-            )
-            forecaster_df = daily_metrics_df[
-                daily_metrics_df["forecaster_name"] == name_and_version
-            ]
-            fig3.add_trace(
-                go.Scatter(
-                    x=forecaster_df["date_utc"],
-                    y=forecaster_df[selected_metric] / scale_factor,
-                    # mode="lines+markers",
-                    name=forecaster.forecaster_name,
-                    line=dict(color=colours[i % len(colours)]),
-                )
-            )
-
-        fig3.update_layout(
-            title=f"Daily {selected_metric}",
-            xaxis_title="Date",
-            yaxis_title=f"{selected_metric} [{units}]",
-            legend_title="Forecaster",
-        )
-
-        st.plotly_chart(fig3)
-
-
-
-        st.header("TODO")
-        
-        st.write("Bug: cache not releasing")
-        st.write("Align forecasts on t0")
-        st.write("Add more metrics")
-        st.write("Add creation time / t0 forecast filter")
-        st.write("speed up read, use async and more caching")
diff --git a/src/dataplatform/forecast/constanst.py b/src/dataplatform/forecast/constanst.py
new file mode 100644
index 0000000..107fdab
--- /dev/null
+++ b/src/dataplatform/forecast/constanst.py
@@ -0,0 +1,20 @@
+colours = [
+    "#FFD480",
+    "#FF8F73",
+    "#4675C1",
+    "#65B0C9",
+    "#58B0A9",
+    "#FAA056",
+    "#306BFF",
+    "#FF4901",
+    "#B701FF",
+    "#17E58F",
+]
+
+metrics = {
+    "MAE": "MAE is absolute mean error, average(abs(y-x))",
+    "ME": "ME is mean (bias) error, average((y-x))",
+    # "TODO NMAE (by capacity)": " NMAE (by capacity), average(abs(y-x)) / mean(capacity)",
+    # "TODO NMAE (by mean observed generation)": " NMAE (by mean observed generation), average(abs(y-x)) / mean(y)",
+    #    "NMAE (by observed generation)":" NAME (by observed generation)"
+}
diff --git a/src/dataplatform/data.py b/src/dataplatform/forecast/data.py
similarity index 58%
rename from src/dataplatform/data.py
rename to src/dataplatform/forecast/data.py
index f44da71..7b5cb1b 100644
--- a/src/dataplatform/data.py
+++ b/src/dataplatform/forecast/data.py
@@ -1,9 +1,11 @@
-from datetime import timedelta
 import os
-from dp_sdk.ocf import dp
-import pandas as pd
+import time
+from datetime import timedelta
+
 import betterproto
+import pandas as pd
 from aiocache import Cache, cached
+from dp_sdk.ocf import dp
 
 data_platform_host = os.getenv("DATA_PLATFORM_HOST", "localhost")
 data_platform_port = int(os.getenv("DATA_PLATFORM_PORT", "50051"))
@@ -14,7 +16,6 @@
 
 def key_builder_remove_client(func, *args, **kwargs):
     """Custom key builder that ignores the client argument for caching purposes."""
-
     key = f"{func.__name__}:"
     for arg in args:
         if isinstance(arg, dp.DataPlatformDataServiceStub):
@@ -28,13 +29,21 @@ def key_builder_remove_client(func, *args, **kwargs):
 
 
 async def get_forecast_data(
-    client, location, start_date, end_date, selected_forecasters
+    client,
+    location,
+    start_date,
+    end_date,
+    selected_forecasters,
 ) -> pd.DataFrame:
     all_data_df = []
 
     for forecaster in selected_forecasters:
         forecaster_data_df = await get_forecast_data_one_forecaster(
-            client, location, start_date, end_date, forecaster
+            client,
+            location,
+            start_date,
+            end_date,
+            forecaster,
         )
         all_data_df.append(forecaster_data_df)
 
@@ -56,7 +65,11 @@ async def get_forecast_data(
 
 @cached(ttl=300, cache=Cache.MEMORY, key_builder=key_builder_remove_client)
 async def get_forecast_data_one_forecaster(
-    client, location, start_date, end_date, selected_forecaster
+    client,
+    location,
+    start_date,
+    end_date,
+    selected_forecaster,
 ) -> pd.DataFrame:
     all_data_df = []
 
@@ -72,27 +85,22 @@ async def get_forecast_data_one_forecaster(
             location_uuid=location.location_uuid,
             energy_source=dp.EnergySource.SOLAR,
             time_window=dp.TimeWindow(
-                start_timestamp_utc=temp_start_date, end_timestamp_utc=temp_end_date
+                start_timestamp_utc=temp_start_date,
+                end_timestamp_utc=temp_end_date,
             ),
             forecasters=[selected_forecaster],
         )
         forecasts = []
         async for chunk in client.stream_forecast_data(stream_forecast_data_request):
             forecasts.append(
-                chunk.to_dict(
-                    include_default_values=True, casing=betterproto.Casing.SNAKE
-                )
+                chunk.to_dict(include_default_values=True, casing=betterproto.Casing.SNAKE),
             )
 
         if len(forecasts) > 0:
             all_data_df.append(
                 pd.DataFrame.from_dict(forecasts)
-                .pipe(
-                    lambda df: df.join(
-                        pd.json_normalize(df["other_statistics_fractions"])
-                    )
-                )
-                .drop("other_statistics_fractions", axis=1)
+                .pipe(lambda df: df.join(pd.json_normalize(df["other_statistics_fractions"])))
+                .drop("other_statistics_fractions", axis=1),
             )
 
         temp_start_date = temp_start_date + timedelta(days=7)
@@ -101,7 +109,7 @@ async def get_forecast_data_one_forecaster(
 
     # create column forecaster_name, its forecaster_fullname with version removed
     all_data_df["forecaster_name"] = all_data_df["forecaster_fullname"].apply(
-        lambda x: x.rsplit(":", 1)[0]  # split from right, max 1 split
+        lambda x: x.rsplit(":", 1)[0],  # split from right, max 1 split
     )
 
     return all_data_df
@@ -127,15 +135,13 @@ async def get_all_observations(client, location, start_date, end_date) -> pd.Dat
                 time_window=dp.TimeWindow(temp_start_date, temp_end_date),
             )
             get_observations_response = await client.get_observations_as_timeseries(
-                get_observations_request
+                get_observations_request,
             )
 
             observations = []
             for chunk in get_observations_response.values:
                 observations.append(
-                    chunk.to_dict(
-                        include_default_values=True, casing=betterproto.Casing.SNAKE
-                    )
+                    chunk.to_dict(include_default_values=True, casing=betterproto.Casing.SNAKE),
                 )
 
             observation_one_df.append(pd.DataFrame.from_dict(observations))
@@ -151,7 +157,70 @@ async def get_all_observations(client, location, start_date, end_date) -> pd.Dat
     all_observations_df = pd.concat(all_observations_df, ignore_index=True)
 
     all_observations_df["value_watts"] = all_observations_df["value_fraction"].astype(
-        float
+        float,
     ) * all_observations_df["effective_capacity_watts"].astype(float)
+    all_observations_df["timestamp_utc"] = pd.to_datetime(all_observations_df["timestamp_utc"])
+    
+    return all_observations_df
+
+
+async def get_all_data(client, selected_location, start_date, end_date, selected_forecasters):
+    # get generation data
+    time_start = time.time()
+    all_observations_df = await get_all_observations(
+        client,
+        selected_location,
+        start_date,
+        end_date,
+    )
+    observation_seconds = time.time() - time_start
+
+    # get forcast all data
+    time_start = time.time()
+    all_forecast_data_df = await get_forecast_data(
+        client,
+        selected_location,
+        start_date,
+        end_date,
+        selected_forecasters,
+    )
+    forecast_seconds = time.time() - time_start
+
+    # If the observation data includes pvlive_day_after and pvlive_in_day, then lets just take pvlive_day_after
+    one_observations_df = all_observations_df.copy()
+    if "pvlive_day_after" in all_observations_df["observer_name"].values:
+        one_observations_df = all_observations_df[
+            all_observations_df["observer_name"] == "pvlive_day_after"
+        ]
+    
+
+    # make target_timestamp_utc
+    all_forecast_data_df["target_timestamp_utc"] = pd.to_datetime(
+        all_forecast_data_df["init_timestamp"],
+    ) + pd.to_timedelta(all_forecast_data_df["horizon_mins"], unit="m")
+
+    # take the foecast data, and group by horizonMins, forecasterFullName
+    # calculate mean absolute error between p50Fraction and observations valueFraction
+    merged_df = pd.merge(
+        all_forecast_data_df,
+        one_observations_df,
+        left_on=["target_timestamp_utc"],
+        right_on=["timestamp_utc"],
+        how="inner",
+        suffixes=("_forecast", "_observation"),
+    )
+    merged_df["effective_capacity_watts_observation"] = merged_df[
+        "effective_capacity_watts_observation"
+    ].astype(float)
 
-    return all_observations_df
\ No newline at end of file
+    # error and absolute error
+    merged_df["error"] = merged_df["p50_watts"] - merged_df["value_watts"]
+    merged_df["absolute_error"] = merged_df["error"].abs()
+
+    return {
+        "merged_df": merged_df,
+        "all_forecast_data_df": all_forecast_data_df,
+        "all_observations_df": all_observations_df,
+        "forecast_seconds": forecast_seconds,
+        "observation_seconds": observation_seconds,
+    }
diff --git a/src/dataplatform/forecast/main.py b/src/dataplatform/forecast/main.py
new file mode 100644
index 0000000..db47630
--- /dev/null
+++ b/src/dataplatform/forecast/main.py
@@ -0,0 +1,213 @@
+import asyncio
+import os
+
+import streamlit as st
+from dp_sdk.ocf import dp
+from grpclib.client import Channel
+
+from dataplatform.forecast.constanst import metrics
+from dataplatform.forecast.data import get_all_data
+from dataplatform.forecast.plot import (
+    plot_forecast_metric_per_day,
+    plot_forecast_metric_vs_horizon_minutes,
+    plot_forecast_time_series,
+)
+from dataplatform.forecast.setup import setup_page
+
+data_platform_host = os.getenv("DATA_PLATFORM_HOST", "localhost")
+data_platform_port = int(os.getenv("DATA_PLATFORM_PORT", "50051"))
+
+# TODO make this dynamic
+observer_names = ["pvlive_in_day", "pvlive_day_after"]
+
+
+def dp_forecast_page():
+    asyncio.run(async_dp_forecast_page())
+
+
+async def async_dp_forecast_page():
+    st.title("Data Platform Forecast Page")
+    st.write("This is the forecast page from the Data Platform module. This is very much a WIP")
+
+    async with Channel(host=data_platform_host, port=data_platform_port) as channel:
+        client = dp.DataPlatformDataServiceStub(channel)
+
+        setup_page_dict = await setup_page(client)
+        selected_location = setup_page_dict["selected_location"]
+        start_date = setup_page_dict["start_date"]
+        end_date = setup_page_dict["end_date"]
+        selected_forecasters = setup_page_dict["selected_forecasters"]
+        forecaster_names = setup_page_dict["forecaster_names"]
+        selected_metric = setup_page_dict["selected_metric"]
+        selected_forecast_type = setup_page_dict["selected_forecast_type"]
+        scale_factor = setup_page_dict["scale_factor"]
+        selected_forecast_horizon = setup_page_dict["selected_forecast_horizon"]
+        units = setup_page_dict["units"]
+
+        ### 1. Get all the data ###
+        all_data_dict = await get_all_data(
+            client=client,
+            start_date=start_date,
+            end_date=end_date,
+            selected_forecasters=selected_forecasters,
+            selected_location=selected_location,
+        )
+        merged_df = all_data_dict["merged_df"]
+        all_forecast_data_df = all_data_dict["all_forecast_data_df"]
+        all_observations_df = all_data_dict["all_observations_df"]
+        forecast_seconds = all_data_dict["forecast_seconds"]
+        observation_seconds = all_data_dict["observation_seconds"]
+
+        st.write(f"Selected Location uuid: `{selected_location.location_uuid}`.")
+        st.write(
+            f"Fetched `{len(all_forecast_data_df)}` rows of forecast data in `{forecast_seconds:.2f}` seconds. \
+            Fetched `{len(all_observations_df)}` rows of observation data in `{observation_seconds:.2f}` seconds. \
+            We cache data for 5 minutses to speed up repeated requests.",
+        )
+
+        # add download button
+        csv = all_forecast_data_df.to_csv().encode("utf-8")
+        st.download_button(
+            label="⬇️",
+            data=csv,
+            file_name=f"site_forecast_{selected_location.location_uuid}_{start_date}_{end_date}.csv",
+            mime="text/csv",
+        )
+
+        ### 2. Plot of raw forecast data. ###
+        st.header("Time Series Plot")
+
+        fig = plot_forecast_time_series(
+            all_forecast_data_df=all_forecast_data_df,
+            all_observations_df=all_observations_df,
+            forecaster_names=forecaster_names,
+            observer_names=observer_names,
+            scale_factor=scale_factor,
+            units=units,
+            selected_forecast_type=selected_forecast_type,
+            selected_forecast_horizon=selected_forecast_horizon,
+        )
+        st.plotly_chart(fig)
+
+        ### 3. Summary Accuracy Graph. ###
+        st.header("Summary Accuracy Graph")
+
+        st.write(metrics)
+
+        fig2, summary_df = plot_forecast_metric_vs_horizon_minutes(
+            merged_df, forecaster_names, selected_metric, scale_factor, units
+        )
+
+        st.plotly_chart(fig2)
+
+        csv = summary_df.to_csv().encode("utf-8")
+        st.download_button(
+            label="⬇️",
+            data=csv,
+            file_name=f"summary_accuracy_{selected_location.location_uuid}_{start_date}_{end_date}.csv",
+            mime="text/csv",
+        )
+
+        ### 4. Summary Accuracy Table, with slider to select min and max horizon mins. ###
+        st.header("Summary Accuracy Table")
+
+        # add slider to select min and max horizon mins
+        min_horizon, max_horizon = st.slider(
+            "Select Horizon Mins Range",
+            int(summary_df["horizon_mins"].min()),
+            int(summary_df["horizon_mins"].max()),
+            (
+                int(summary_df["horizon_mins"].min()),
+                int(summary_df["horizon_mins"].max()),
+            ),
+            step=30,
+        )
+
+        summary_table_df = make_summary_data(
+            merged_df=merged_df,
+            min_horizon=min_horizon,
+            max_horizon=max_horizon,
+            scale_factor=scale_factor,
+            units=units,
+        )
+
+        st.dataframe(summary_table_df)
+
+        ### 4. Daily metric plots. ###
+        st.header("Daily Metrics Plots")
+        st.write(
+            "Plotted below are the daily MAE for each forecaster. This is for all forecast horizons.",
+        )
+
+        fig3 = plot_forecast_metric_per_day(
+            merged_df=merged_df,
+            selected_forecasters=selected_forecasters,
+            scale_factor=scale_factor,
+            units=units,
+            selected_metric=selected_metric
+        )
+
+        st.plotly_chart(fig3)
+
+        st.header("TODO")
+
+        st.write("Bug: cache not releasing")
+        st.write("Align forecasts on t0")
+        st.write("Add more metrics")
+        st.write("Add creation time / t0 forecast filter")
+        st.write("speed up read, use async and more caching")
+
+
+def make_summary_data(merged_df, min_horizon, max_horizon, scale_factor, units):
+    # Reduce my horizon mins
+    summary_table_df = merged_df[
+        (merged_df["horizon_mins"] >= min_horizon) & (merged_df["horizon_mins"] <= max_horizon)
+    ]
+
+    summary_table_df = summary_table_df.rename(
+        columns={
+            "effective_capacity_watts_observation": "Capacity_watts",
+            "value_watts": "Mean_Observed_Generation_watts",
+        },
+    )
+
+    value_columns = [
+        "error",
+        "absolute_error",
+        #  'absolute_error_normalized_by_generation',
+        "Mean_Observed_Generation_watts",
+        "Capacity_watts",
+    ]
+
+    summary_table_df = summary_table_df[["forecaster_name"] + value_columns]
+
+    summary_table_df["Capacity_watts"] = summary_table_df["Capacity_watts"].astype(float)
+
+    # group by forecaster full name a
+    summary_table_df = summary_table_df.groupby("forecaster_name").mean()
+
+    # rename
+    summary_table_df = summary_table_df.rename(
+        columns={
+            "error": "ME",
+            "absolute_error": "MAE",
+            # 'absolute_error_normalized_by_generation': 'NMAE (by observed generation)',
+            "Capacity_watts": "Mean Capacity",
+            "Mean_Observed_Generation_watts": "Mean Observed Generation",
+        },
+    )
+
+    # scale by units
+    summary_table_df = summary_table_df / scale_factor
+    summary_table_df = summary_table_df.rename(
+        {col: f"{col} [{units}]" for col in summary_table_df.columns},
+        axis=1,
+    )
+
+    # pivot table, so forecaster_name is columns
+    summary_table_df = summary_table_df.pivot_table(
+        columns=summary_table_df.index,
+        values=summary_table_df.columns.tolist(),
+    )
+
+    return summary_table_df
diff --git a/src/dataplatform/forecast/plot.py b/src/dataplatform/forecast/plot.py
new file mode 100644
index 0000000..52bbbe1
--- /dev/null
+++ b/src/dataplatform/forecast/plot.py
@@ -0,0 +1,243 @@
+import plotly.graph_objects as go
+
+from dataplatform.forecast.constanst import colours
+
+
+def plot_forecast_time_series(
+    all_forecast_data_df,
+    all_observations_df,
+    forecaster_names,
+    observer_names,
+    scale_factor,
+    units,
+    selected_forecast_type,
+    selected_forecast_horizon,
+):
+    if selected_forecast_type == "Current":
+        # Choose current forecast
+        # this is done by selecting the unique target_timestamp_utc with the the lowest horizonMins
+        # it should also be unique for each forecasterFullName
+        current_forecast_df = all_forecast_data_df.loc[
+            all_forecast_data_df.groupby(["target_timestamp_utc", "forecaster_name"])[
+                "horizon_mins"
+            ].idxmin()
+        ]
+    elif selected_forecast_type == "Horizon":
+        # Choose horizon forecast
+        current_forecast_df = all_forecast_data_df[
+            all_forecast_data_df["horizon_mins"] >= selected_forecast_horizon
+        ]
+        current_forecast_df = current_forecast_df.loc[
+            current_forecast_df.groupby(["target_timestamp_utc", "forecaster_name"])[
+                "horizon_mins"
+            ].idxmin()
+        ]
+    else:
+        pass
+
+    # plot the results
+    fig = go.Figure()
+    for observer_name in observer_names:
+        obs_df = all_observations_df[all_observations_df["observer_name"] == observer_name]
+
+        if observer_name == "pvlive_in_day":
+            # dashed white line
+            line = dict(color="white", dash="dash")
+        elif observer_name == "pvlive_day_after":
+            line = dict(color="white")
+        else:
+            line = dict()
+
+        fig.add_trace(
+            go.Scatter(
+                x=obs_df["timestamp_utc"],
+                y=obs_df["value_watts"] / scale_factor,
+                mode="lines",
+                name=observer_name,
+                line=line,
+            ),
+        )
+
+    for i, forecaster_name in enumerate(forecaster_names):
+        forecaster_df = current_forecast_df[
+            current_forecast_df["forecaster_name"] == forecaster_name
+        ]
+        fig.add_trace(
+            go.Scatter(
+                x=forecaster_df["target_timestamp_utc"],
+                y=forecaster_df["p50_watts"] / scale_factor,
+                mode="lines",
+                name=forecaster_name,
+                line=dict(color=colours[i % len(colours)]),
+                legendgroup=forecaster_name,
+            ),
+        )
+        if "p10_watts" in forecaster_df.columns and "p90_watts" in forecaster_df.columns:
+            fig.add_trace(
+                go.Scatter(
+                    x=forecaster_df["target_timestamp_utc"],
+                    y=forecaster_df["p10_watts"] / scale_factor,
+                    mode="lines",
+                    line=dict(color=colours[i % len(colours)], width=0),
+                    legendgroup=forecaster_name,
+                    showlegend=False,
+                ),
+            )
+
+            fig.add_trace(
+                go.Scatter(
+                    x=forecaster_df["target_timestamp_utc"],
+                    y=forecaster_df["p90_watts"] / scale_factor,
+                    mode="lines",
+                    line=dict(color=colours[i % len(colours)], width=0),
+                    legendgroup=forecaster_name,
+                    showlegend=False,
+                    fill="tonexty",
+                ),
+            )
+
+    fig.update_layout(
+        title="Current Forecast",
+        xaxis_title="Time",
+        yaxis_title=f"Generation [{units}]",
+        legend_title="Forecaster",
+    )
+
+    return fig
+
+
+def plot_forecast_metric_vs_horizon_minutes(
+    merged_df, forecaster_names, selected_metric, scale_factor, units
+):
+    # Get the mean observed generation
+    mean_observed_generation = merged_df["value_watts"].mean()
+
+    # mean absolute error by horizonMins and forecasterFullName
+    summary_df = (
+        merged_df.groupby(["horizon_mins", "forecaster_name"])
+        .agg({"absolute_error": "mean"})
+        .reset_index()
+    )
+    summary_df["std"] = (
+        merged_df.groupby(["horizon_mins", "forecaster_name"])
+        .agg({"absolute_error": "std"})
+        .reset_index()["absolute_error"]
+    )
+    summary_df["count"] = (
+        merged_df.groupby(["horizon_mins", "forecaster_name"])
+        .agg({"absolute_error": "count"})
+        .reset_index()["absolute_error"]
+    )
+    summary_df["sem"] = summary_df["std"] / (summary_df["count"] ** 0.5)
+
+    # ME
+    summary_df["ME"] = (
+        merged_df.groupby(["horizon_mins", "forecaster_name"])
+        .agg({"error": "mean"})
+        .reset_index()["error"]
+    )
+
+    # TODO more metrics
+
+    summary_df["effective_capacity_watts_observation"] = (
+        merged_df.groupby(["horizon_mins", "forecaster_name"])
+        .agg({"effective_capacity_watts_observation": "mean"})
+        .reset_index()["effective_capacity_watts_observation"]
+    )
+
+    # rename absolute_error to MAE
+    summary_df = summary_df.rename(columns={"absolute_error": "MAE"})
+    summary_df["NMAE (by capacity)"] = (
+        summary_df["MAE"] / summary_df["effective_capacity_watts_observation"]
+    )
+    summary_df["NMAE (by mean observed generation)"] = summary_df["MAE"] / mean_observed_generation
+    # summary_df["NMAE (by observed generation)"] = summary_df["absolute_error_divided_by_observed"]
+
+    fig2 = go.Figure()
+
+    for i, forecaster_name in enumerate(forecaster_names):
+        forecaster_df = summary_df[summary_df["forecaster_name"] == forecaster_name]
+        fig2.add_trace(
+            go.Scatter(
+                x=forecaster_df["horizon_mins"],
+                y=forecaster_df[selected_metric] / scale_factor,
+                mode="lines+markers",
+                name=forecaster_name,
+                line=dict(color=colours[i % len(colours)]),
+            ),
+        )
+
+        fig2.add_trace(
+            go.Scatter(
+                x=forecaster_df["horizon_mins"],
+                y=(forecaster_df[selected_metric] - 1.96 * forecaster_df["sem"]) / scale_factor,
+                mode="lines",
+                line=dict(color=colours[i % len(colours)], width=0),
+                legendgroup=forecaster_name,
+                showlegend=False,
+            ),
+        )
+
+        fig2.add_trace(
+            go.Scatter(
+                x=forecaster_df["horizon_mins"],
+                y=(forecaster_df[selected_metric] + 1.96 * forecaster_df["sem"]) / scale_factor,
+                mode="lines",
+                line=dict(color=colours[i % len(colours)], width=0),
+                legendgroup=forecaster_name,
+                showlegend=False,
+                fill="tonexty",
+            ),
+        )
+
+    fig2.update_layout(
+        title=f"{selected_metric} by Horizon",
+        xaxis_title="Horizon (Minutes)",
+        yaxis_title=f"{selected_metric} [{units}]",
+        legend_title="Forecaster",
+    )
+
+    return fig2, summary_df
+
+
+def plot_forecast_metric_per_day(
+    merged_df, selected_forecasters, selected_metric, scale_factor, units
+):
+    daily_plots_df = merged_df
+    daily_plots_df["date_utc"] = daily_plots_df["timestamp_utc"].dt.date
+
+    # group by forecaster name and date
+    daily_metrics_df = (
+        daily_plots_df.groupby(["date_utc", "forecaster_name"])
+        .agg({"absolute_error": "mean"})
+        .reset_index()
+    ).rename(columns={"absolute_error": "MAE"})
+    # ME
+    daily_metrics_df["ME"] = (
+        daily_plots_df.groupby(["date_utc", "forecaster_name"])
+        .agg({"error": "mean"})
+        .reset_index()["error"]
+    )
+
+    fig3 = go.Figure()
+    for i, forecaster in enumerate(selected_forecasters):
+        name_and_version = f"{forecaster.forecaster_name}"
+        forecaster_df = daily_metrics_df[daily_metrics_df["forecaster_name"] == name_and_version]
+        fig3.add_trace(
+            go.Scatter(
+                x=forecaster_df["date_utc"],
+                y=forecaster_df[selected_metric] / scale_factor,
+                # mode="lines+markers",
+                name=forecaster.forecaster_name,
+                line=dict(color=colours[i % len(colours)]),
+            ),
+        )
+
+    fig3.update_layout(
+        title=f"Daily {selected_metric}",
+        xaxis_title="Date",
+        yaxis_title=f"{selected_metric} [{units}]",
+        legend_title="Forecaster",
+    )
+
+    return fig3
diff --git a/src/dataplatform/forecast/setup.py b/src/dataplatform/forecast/setup.py
new file mode 100644
index 0000000..bf9c39a
--- /dev/null
+++ b/src/dataplatform/forecast/setup.py
@@ -0,0 +1,95 @@
+from datetime import UTC, datetime, timedelta
+
+import streamlit as st
+from dp_sdk.ocf import dp
+
+from dataplatform.forecast.constanst import metrics
+
+
+async def setup_page(client) -> dict:
+    # Select Country
+    country = st.sidebar.selectbox("TODO Select a Country", ["UK", "NL"], index=0)
+
+    # Select Location Type
+    location_types = [
+        dp.LocationType.NATION,
+        dp.LocationType.GSP,
+        dp.LocationType.SITE,
+    ]
+    location_type = st.sidebar.selectbox("Select a Location Type", location_types, index=0)
+
+    # List Location
+    list_locations_request = dp.ListLocationsRequest(location_type_filter=location_type)
+    list_locations_response = await client.list_locations(list_locations_request)
+    locations = list_locations_response.locations
+    location_names = [loc.location_name for loc in locations]
+
+    # slect locations
+    selected_location_name = st.sidebar.selectbox("Select a Location", location_names, index=0)
+    selected_location = next(
+        loc for loc in locations if loc.location_name == selected_location_name
+    )
+
+    # get models
+    get_forecasters_request = dp.ListForecastersRequest()
+    get_forecasters_response = await client.list_forecasters(get_forecasters_request)
+    forecasters = get_forecasters_response.forecasters
+    forecaster_names = sorted(list(set([forecaster.forecaster_name for forecaster in forecasters])))
+    if "pvnet_v2" in forecaster_names:
+        default_index = forecaster_names.index("pvnet_v2")
+    else:
+        default_index = 0
+    selected_forecaster_name = st.sidebar.multiselect(
+        "Select a Forecaster",
+        forecaster_names,
+        default=forecaster_names[default_index],
+    )
+    selected_forecasters = [
+        forecaster
+        for forecaster in forecasters
+        if forecaster.forecaster_name in selected_forecaster_name
+    ]
+
+    # select start and end date
+    start_date = st.sidebar.date_input("Start date:", datetime.now().date() - timedelta(days=30))
+    end_date = st.sidebar.date_input("End date:", datetime.now().date() + timedelta(days=3))
+    start_date = datetime.combine(start_date, datetime.min.time()).replace(tzinfo=UTC)
+    end_date = datetime.combine(end_date, datetime.min.time()).replace(tzinfo=UTC)
+
+    # select forecast type
+    selected_forecast_type = st.sidebar.selectbox(
+        "Select a Forecast Type",
+        ["Current", "Horizon", "t0"],
+        index=0,
+    )
+
+    selected_forecast_horizon = None
+    if selected_forecast_type == "Horizon":
+        selected_forecast_horizon = st.sidebar.selectbox(
+            "Select a Forecast Horizon",
+            list(range(0, 2400, 30)),
+            index=3,
+        )
+
+    # select units
+    default_unit_index = 2  # MW
+    if location_type == dp.LocationType.NATION:
+        default_unit_index = 3  # GW
+    units = st.sidebar.selectbox("Select Units", ["W", "kW", "MW", "GW"], index=default_unit_index)
+    scale_factors = {"W": 1, "kW": 1e3, "MW": 1e6, "GW": 1e9}
+    scale_factor = scale_factors[units]
+
+    selected_metric = st.sidebar.selectbox("Select a Metrics", metrics.keys(), index=0)
+
+    return {
+        "selected_location": selected_location,
+        "selected_forecasters": selected_forecasters,
+        "start_date": start_date,
+        "end_date": end_date,
+        "selected_forecast_type": selected_forecast_type,
+        "scale_factor": scale_factor,
+        "selected_metric": selected_metric,
+        "forecaster_names": forecaster_names,
+        "selected_forecast_horizon": selected_forecast_horizon,
+        "units": units,
+    }

From 2d6ad59ca9be8c020de894345c816423505c93e0 Mon Sep 17 00:00:00 2001
From: Peter Dudfield <peter.dudfield@hotmail.com>
Date: Fri, 21 Nov 2025 09:11:08 +0000
Subject: [PATCH 29/60] increase forecast window to 30 days

---
 src/dataplatform/forecast/data.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/dataplatform/forecast/data.py b/src/dataplatform/forecast/data.py
index 7b5cb1b..e0acb24 100644
--- a/src/dataplatform/forecast/data.py
+++ b/src/dataplatform/forecast/data.py
@@ -76,7 +76,7 @@ async def get_forecast_data_one_forecaster(
     # loop over 30 days of data
     temp_start_date = start_date
     while temp_start_date <= end_date:
-        temp_end_date = temp_start_date + timedelta(days=7)
+        temp_end_date = temp_start_date + timedelta(days=30)
         if temp_end_date > end_date:
             temp_end_date = end_date
 
@@ -103,7 +103,7 @@ async def get_forecast_data_one_forecaster(
                 .drop("other_statistics_fractions", axis=1),
             )
 
-        temp_start_date = temp_start_date + timedelta(days=7)
+        temp_start_date = temp_start_date + timedelta(days=30)
 
     all_data_df = pd.concat(all_data_df, ignore_index=True)
 

From ccf2c872d8896aa163eddfbbc8b3c5ea5c2f8b73 Mon Sep 17 00:00:00 2001
From: Peter Dudfield <peter.dudfield@hotmail.com>
Date: Fri, 21 Nov 2025 09:38:29 +0000
Subject: [PATCH 30/60] add init files

---
 src/dataplatform/__init__.py          | 0
 src/dataplatform/forecast/__init__.py | 0
 2 files changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 src/dataplatform/__init__.py
 create mode 100644 src/dataplatform/forecast/__init__.py

diff --git a/src/dataplatform/__init__.py b/src/dataplatform/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/dataplatform/forecast/__init__.py b/src/dataplatform/forecast/__init__.py
new file mode 100644
index 0000000..e69de29

From 144ccf425fca166171f8316610d8736099e78e7d Mon Sep 17 00:00:00 2001
From: Peter Dudfield <peter.dudfield@hotmail.com>
Date: Fri, 21 Nov 2025 10:52:24 +0000
Subject: [PATCH 31/60] fix import

---
 src/main.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/main.py b/src/main.py
index 577303f..663be0c 100644
--- a/src/main.py
+++ b/src/main.py
@@ -11,6 +11,7 @@
 from nowcasting_datamodel.models.metric import MetricValue
 
 from auth import check_password
+from dataplatform.forecast.main import dp_forecast_page
 from forecast import forecast_page
 from get_data import get_metric_value
 from plots.all_gsps import make_all_gsps_plots
@@ -36,7 +37,6 @@
 from cloudcasting_page import cloudcasting_page
 from adjuster import adjuster_page
 from batch_page import batch_page
-from dataplatform.forecast import dp_forecast_page
 
 st.get_option("theme.primaryColor")
 st.set_page_config(layout="wide", page_title="OCF Dashboard")

From 10bbf6e030fa9ed94796a747ba1848564d776a15 Mon Sep 17 00:00:00 2001
From: Peter Dudfield <peter.dudfield@hotmail.com>
Date: Mon, 24 Nov 2025 08:49:11 +0000
Subject: [PATCH 32/60] add more todos

---
 src/dataplatform/forecast/main.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/dataplatform/forecast/main.py b/src/dataplatform/forecast/main.py
index db47630..0dac0b8 100644
--- a/src/dataplatform/forecast/main.py
+++ b/src/dataplatform/forecast/main.py
@@ -156,6 +156,8 @@ async def async_dp_forecast_page():
         st.write("Add more metrics")
         st.write("Add creation time / t0 forecast filter")
         st.write("speed up read, use async and more caching")
+        st.write("Improve GSP labels")
+        st.write("Get page working with no observations data")
 
 
 def make_summary_data(merged_df, min_horizon, max_horizon, scale_factor, units):

From a0faf6b02357b7294908baebe5d7478b9cbadf99 Mon Sep 17 00:00:00 2001
From: Peter Dudfield <peter.dudfield@hotmail.com>
Date: Mon, 24 Nov 2025 15:18:37 +0000
Subject: [PATCH 33/60] add TODOs

---
 src/dataplatform/forecast/main.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/src/dataplatform/forecast/main.py b/src/dataplatform/forecast/main.py
index 0dac0b8..8a1b792 100644
--- a/src/dataplatform/forecast/main.py
+++ b/src/dataplatform/forecast/main.py
@@ -158,6 +158,14 @@ async def async_dp_forecast_page():
         st.write("speed up read, use async and more caching")
         st.write("Improve GSP labels")
         st.write("Get page working with no observations data")
+        st.write("Change UK to use MW")
+        st.write("Add GSP to name")
+        st.write("Remove last MAE point")
+        st.write("Reduce to last 7 days")
+        st.write("Options to togle probablies in MAE ")
+        st.write("Change y/x to actula and forecast")
+        st.write("Remove duplicate names in legend of daily metrics plot")
+        st.write("Look into shading areas disappering")
 
 
 def make_summary_data(merged_df, min_horizon, max_horizon, scale_factor, units):

From 1886cb5843aae5869a6ad1d1f57bdb9950d8759a Mon Sep 17 00:00:00 2001
From: Peter Dudfield <peter.dudfield@hotmail.com>
Date: Mon, 24 Nov 2025 15:19:54 +0000
Subject: [PATCH 34/60] use MW by default on UK-National

---
 src/dataplatform/forecast/main.py  | 2 +-
 src/dataplatform/forecast/setup.py | 2 --
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/dataplatform/forecast/main.py b/src/dataplatform/forecast/main.py
index 8a1b792..4bbda7f 100644
--- a/src/dataplatform/forecast/main.py
+++ b/src/dataplatform/forecast/main.py
@@ -158,7 +158,7 @@ async def async_dp_forecast_page():
         st.write("speed up read, use async and more caching")
         st.write("Improve GSP labels")
         st.write("Get page working with no observations data")
-        st.write("Change UK to use MW")
+        st.write("Change UK to use MW: done")
         st.write("Add GSP to name")
         st.write("Remove last MAE point")
         st.write("Reduce to last 7 days")
diff --git a/src/dataplatform/forecast/setup.py b/src/dataplatform/forecast/setup.py
index bf9c39a..bb02d74 100644
--- a/src/dataplatform/forecast/setup.py
+++ b/src/dataplatform/forecast/setup.py
@@ -73,8 +73,6 @@ async def setup_page(client) -> dict:
 
     # select units
     default_unit_index = 2  # MW
-    if location_type == dp.LocationType.NATION:
-        default_unit_index = 3  # GW
     units = st.sidebar.selectbox("Select Units", ["W", "kW", "MW", "GW"], index=default_unit_index)
     scale_factors = {"W": 1, "kW": 1e3, "MW": 1e6, "GW": 1e9}
     scale_factor = scale_factors[units]

From 5cf060cf460a42f2f6088557d2c61151c6688829 Mon Sep 17 00:00:00 2001
From: Peter Dudfield <peter.dudfield@hotmail.com>
Date: Mon, 24 Nov 2025 15:23:57 +0000
Subject: [PATCH 35/60] add gsp id to name

---
 src/dataplatform/forecast/main.py  |  4 ++--
 src/dataplatform/forecast/setup.py | 15 +++++++++------
 2 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/src/dataplatform/forecast/main.py b/src/dataplatform/forecast/main.py
index 4bbda7f..d004b9d 100644
--- a/src/dataplatform/forecast/main.py
+++ b/src/dataplatform/forecast/main.py
@@ -158,8 +158,8 @@ async def async_dp_forecast_page():
         st.write("speed up read, use async and more caching")
         st.write("Improve GSP labels")
         st.write("Get page working with no observations data")
-        st.write("Change UK to use MW: done")
-        st.write("Add GSP to name")
+        st.write("Done: Change UK to use MW")
+        st.write("Done: Add GSP id to name: done")
         st.write("Remove last MAE point")
         st.write("Reduce to last 7 days")
         st.write("Options to togle probablies in MAE ")
diff --git a/src/dataplatform/forecast/setup.py b/src/dataplatform/forecast/setup.py
index bb02d74..98f849e 100644
--- a/src/dataplatform/forecast/setup.py
+++ b/src/dataplatform/forecast/setup.py
@@ -21,14 +21,17 @@ async def setup_page(client) -> dict:
     # List Location
     list_locations_request = dp.ListLocationsRequest(location_type_filter=location_type)
     list_locations_response = await client.list_locations(list_locations_request)
-    locations = list_locations_response.locations
-    location_names = [loc.location_name for loc in locations]
+    all_locations = list_locations_response.locations
+    
+    location_names = {loc.location_name:loc for loc in all_locations}
+    if location_type == dp.LocationType.GSP:
+        location_names = {f'{int(loc.metadata.fields['gsp_id'].number_value)}:{loc.location_name}': loc for loc in all_locations}
+        # sort by gsp id
+        location_names = dict(sorted(location_names.items(), key=lambda item: int(item[0].split(":")[0])))
 
     # slect locations
-    selected_location_name = st.sidebar.selectbox("Select a Location", location_names, index=0)
-    selected_location = next(
-        loc for loc in locations if loc.location_name == selected_location_name
-    )
+    selected_location_name = st.sidebar.selectbox("Select a Location", location_names.keys(), index=0)
+    selected_location = location_names[selected_location_name]
 
     # get models
     get_forecasters_request = dp.ListForecastersRequest()

From c427a7736fccd784652b82060c0d2bb4fd6c935e Mon Sep 17 00:00:00 2001
From: Peter Dudfield <peter.dudfield@hotmail.com>
Date: Mon, 24 Nov 2025 15:30:34 +0000
Subject: [PATCH 36/60] reduce to 7 days

---
 src/dataplatform/forecast/main.py  | 2 +-
 src/dataplatform/forecast/setup.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/dataplatform/forecast/main.py b/src/dataplatform/forecast/main.py
index d004b9d..58429fb 100644
--- a/src/dataplatform/forecast/main.py
+++ b/src/dataplatform/forecast/main.py
@@ -161,7 +161,7 @@ async def async_dp_forecast_page():
         st.write("Done: Change UK to use MW")
         st.write("Done: Add GSP id to name: done")
         st.write("Remove last MAE point")
-        st.write("Reduce to last 7 days")
+        st.write("Done: Reduce to last 7 days")
         st.write("Options to togle probablies in MAE ")
         st.write("Change y/x to actula and forecast")
         st.write("Remove duplicate names in legend of daily metrics plot")
diff --git a/src/dataplatform/forecast/setup.py b/src/dataplatform/forecast/setup.py
index 98f849e..cb33681 100644
--- a/src/dataplatform/forecast/setup.py
+++ b/src/dataplatform/forecast/setup.py
@@ -54,7 +54,7 @@ async def setup_page(client) -> dict:
     ]
 
     # select start and end date
-    start_date = st.sidebar.date_input("Start date:", datetime.now().date() - timedelta(days=30))
+    start_date = st.sidebar.date_input("Start date:", datetime.now().date() - timedelta(days=7))
     end_date = st.sidebar.date_input("End date:", datetime.now().date() + timedelta(days=3))
     start_date = datetime.combine(start_date, datetime.min.time()).replace(tzinfo=UTC)
     end_date = datetime.combine(end_date, datetime.min.time()).replace(tzinfo=UTC)

From b0dd9aea19a66218ce86a4ec16c8d3131e345849 Mon Sep 17 00:00:00 2001
From: Peter Dudfield <peter.dudfield@hotmail.com>
Date: Mon, 24 Nov 2025 15:40:12 +0000
Subject: [PATCH 37/60] fix for MAE plot

---
 src/dataplatform/forecast/plot.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/dataplatform/forecast/plot.py b/src/dataplatform/forecast/plot.py
index 52bbbe1..3ef4c8c 100644
--- a/src/dataplatform/forecast/plot.py
+++ b/src/dataplatform/forecast/plot.py
@@ -164,6 +164,7 @@ def plot_forecast_metric_vs_horizon_minutes(
                 mode="lines+markers",
                 name=forecaster_name,
                 line=dict(color=colours[i % len(colours)]),
+                legendgroup=forecaster_name,
             ),
         )
 

From e5b137a69af6d5703298890aa005adf26d3a3073 Mon Sep 17 00:00:00 2001
From: Peter Dudfield <peter.dudfield@hotmail.com>
Date: Mon, 24 Nov 2025 15:43:22 +0000
Subject: [PATCH 38/60] have option to show sem

---
 src/dataplatform/forecast/main.py |  9 +++++--
 src/dataplatform/forecast/plot.py | 45 ++++++++++++++++---------------
 2 files changed, 30 insertions(+), 24 deletions(-)

diff --git a/src/dataplatform/forecast/main.py b/src/dataplatform/forecast/main.py
index 58429fb..ad4cdae 100644
--- a/src/dataplatform/forecast/main.py
+++ b/src/dataplatform/forecast/main.py
@@ -93,9 +93,14 @@ async def async_dp_forecast_page():
         st.header("Summary Accuracy Graph")
 
         st.write(metrics)
+        if selected_metric == 'MAE':
+            show_sem = st.checkbox("Show SEM", value=True)
+        else:
+            show_sem = False
+
 
         fig2, summary_df = plot_forecast_metric_vs_horizon_minutes(
-            merged_df, forecaster_names, selected_metric, scale_factor, units
+            merged_df, forecaster_names, selected_metric, scale_factor, units, show_sem
         )
 
         st.plotly_chart(fig2)
@@ -162,7 +167,7 @@ async def async_dp_forecast_page():
         st.write("Done: Add GSP id to name: done")
         st.write("Remove last MAE point")
         st.write("Done: Reduce to last 7 days")
-        st.write("Options to togle probablies in MAE ")
+        st.write("Options to togle probablies in MAE: Done")
         st.write("Change y/x to actula and forecast")
         st.write("Remove duplicate names in legend of daily metrics plot")
         st.write("Look into shading areas disappering")
diff --git a/src/dataplatform/forecast/plot.py b/src/dataplatform/forecast/plot.py
index 3ef4c8c..da00fac 100644
--- a/src/dataplatform/forecast/plot.py
+++ b/src/dataplatform/forecast/plot.py
@@ -107,7 +107,7 @@ def plot_forecast_time_series(
 
 
 def plot_forecast_metric_vs_horizon_minutes(
-    merged_df, forecaster_names, selected_metric, scale_factor, units
+    merged_df, forecaster_names, selected_metric, scale_factor, units, show_sem
 ):
     # Get the mean observed generation
     mean_observed_generation = merged_df["value_watts"].mean()
@@ -168,28 +168,29 @@ def plot_forecast_metric_vs_horizon_minutes(
             ),
         )
 
-        fig2.add_trace(
-            go.Scatter(
-                x=forecaster_df["horizon_mins"],
-                y=(forecaster_df[selected_metric] - 1.96 * forecaster_df["sem"]) / scale_factor,
-                mode="lines",
-                line=dict(color=colours[i % len(colours)], width=0),
-                legendgroup=forecaster_name,
-                showlegend=False,
-            ),
-        )
+        if show_sem:
+            fig2.add_trace(
+                go.Scatter(
+                    x=forecaster_df["horizon_mins"],
+                    y=(forecaster_df[selected_metric] - 1.96 * forecaster_df["sem"]) / scale_factor,
+                    mode="lines",
+                    line=dict(color=colours[i % len(colours)], width=0),
+                    legendgroup=forecaster_name,
+                    showlegend=False,
+                ),
+            )
 
-        fig2.add_trace(
-            go.Scatter(
-                x=forecaster_df["horizon_mins"],
-                y=(forecaster_df[selected_metric] + 1.96 * forecaster_df["sem"]) / scale_factor,
-                mode="lines",
-                line=dict(color=colours[i % len(colours)], width=0),
-                legendgroup=forecaster_name,
-                showlegend=False,
-                fill="tonexty",
-            ),
-        )
+            fig2.add_trace(
+                go.Scatter(
+                    x=forecaster_df["horizon_mins"],
+                    y=(forecaster_df[selected_metric] + 1.96 * forecaster_df["sem"]) / scale_factor,
+                    mode="lines",
+                    line=dict(color=colours[i % len(colours)], width=0),
+                    legendgroup=forecaster_name,
+                    showlegend=False,
+                    fill="tonexty",
+                ),
+            )
 
     fig2.update_layout(
         title=f"{selected_metric} by Horizon",

From c83446eb74b17f6b62e3f22371ef77a15c3e01c9 Mon Sep 17 00:00:00 2001
From: Peter Dudfield <peter.dudfield@hotmail.com>
Date: Mon, 24 Nov 2025 15:44:59 +0000
Subject: [PATCH 39/60] forecast vs actual

---
 src/dataplatform/forecast/constanst.py | 4 ++--
 src/dataplatform/forecast/main.py      | 5 ++---
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/src/dataplatform/forecast/constanst.py b/src/dataplatform/forecast/constanst.py
index 107fdab..9fd0aa2 100644
--- a/src/dataplatform/forecast/constanst.py
+++ b/src/dataplatform/forecast/constanst.py
@@ -12,8 +12,8 @@
 ]
 
 metrics = {
-    "MAE": "MAE is absolute mean error, average(abs(y-x))",
-    "ME": "ME is mean (bias) error, average((y-x))",
+    "MAE": "MAE is absolute mean error, average(abs(forecast-actual))",
+    "ME": "ME is mean (bias) error, average((forecast-actual))",
     # "TODO NMAE (by capacity)": " NMAE (by capacity), average(abs(y-x)) / mean(capacity)",
     # "TODO NMAE (by mean observed generation)": " NMAE (by mean observed generation), average(abs(y-x)) / mean(y)",
     #    "NMAE (by observed generation)":" NAME (by observed generation)"
diff --git a/src/dataplatform/forecast/main.py b/src/dataplatform/forecast/main.py
index ad4cdae..0f8d5cb 100644
--- a/src/dataplatform/forecast/main.py
+++ b/src/dataplatform/forecast/main.py
@@ -161,14 +161,13 @@ async def async_dp_forecast_page():
         st.write("Add more metrics")
         st.write("Add creation time / t0 forecast filter")
         st.write("speed up read, use async and more caching")
-        st.write("Improve GSP labels")
         st.write("Get page working with no observations data")
         st.write("Done: Change UK to use MW")
         st.write("Done: Add GSP id to name: done")
         st.write("Remove last MAE point")
         st.write("Done: Reduce to last 7 days")
-        st.write("Options to togle probablies in MAE: Done")
-        st.write("Change y/x to actula and forecast")
+        st.write("Done: Options to togle probablies in MAE")
+        st.write("Done: Change y/x to actual and forecast")
         st.write("Remove duplicate names in legend of daily metrics plot")
         st.write("Look into shading areas disappering")
 

From 1c0a0b383695756ebb35ad4d13fa42e7ff78b048 Mon Sep 17 00:00:00 2001
From: Peter Dudfield <peter.dudfield@hotmail.com>
Date: Mon, 24 Nov 2025 15:47:13 +0000
Subject: [PATCH 40/60] remove duplicate in daily MAE plot

---
 src/dataplatform/forecast/main.py | 4 ++--
 src/dataplatform/forecast/plot.py | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/dataplatform/forecast/main.py b/src/dataplatform/forecast/main.py
index 0f8d5cb..56510e0 100644
--- a/src/dataplatform/forecast/main.py
+++ b/src/dataplatform/forecast/main.py
@@ -146,7 +146,7 @@ async def async_dp_forecast_page():
 
         fig3 = plot_forecast_metric_per_day(
             merged_df=merged_df,
-            selected_forecasters=selected_forecasters,
+            forecaster_names=forecaster_names,
             scale_factor=scale_factor,
             units=units,
             selected_metric=selected_metric
@@ -168,7 +168,7 @@ async def async_dp_forecast_page():
         st.write("Done: Reduce to last 7 days")
         st.write("Done: Options to togle probablies in MAE")
         st.write("Done: Change y/x to actual and forecast")
-        st.write("Remove duplicate names in legend of daily metrics plot")
+        st.write("Done: Remove duplicate names in legend of daily metrics plot")
         st.write("Look into shading areas disappering")
 
 
diff --git a/src/dataplatform/forecast/plot.py b/src/dataplatform/forecast/plot.py
index da00fac..e2cad8a 100644
--- a/src/dataplatform/forecast/plot.py
+++ b/src/dataplatform/forecast/plot.py
@@ -203,7 +203,7 @@ def plot_forecast_metric_vs_horizon_minutes(
 
 
 def plot_forecast_metric_per_day(
-    merged_df, selected_forecasters, selected_metric, scale_factor, units
+    merged_df, forecaster_names, selected_metric, scale_factor, units
 ):
     daily_plots_df = merged_df
     daily_plots_df["date_utc"] = daily_plots_df["timestamp_utc"].dt.date
@@ -222,15 +222,15 @@ def plot_forecast_metric_per_day(
     )
 
     fig3 = go.Figure()
-    for i, forecaster in enumerate(selected_forecasters):
-        name_and_version = f"{forecaster.forecaster_name}"
+    for i, forecaster_name in enumerate(forecaster_names):
+        name_and_version = f"{forecaster_name}"
         forecaster_df = daily_metrics_df[daily_metrics_df["forecaster_name"] == name_and_version]
         fig3.add_trace(
             go.Scatter(
                 x=forecaster_df["date_utc"],
                 y=forecaster_df[selected_metric] / scale_factor,
                 # mode="lines+markers",
-                name=forecaster.forecaster_name,
+                name=forecaster_name,
                 line=dict(color=colours[i % len(colours)]),
             ),
         )

From c47f8b1ee3a37c8ce1aad6b39f491f64639e6c10 Mon Sep 17 00:00:00 2001
From: Peter Dudfield <peter.dudfield@hotmail.com>
Date: Mon, 24 Nov 2025 16:57:08 +0000
Subject: [PATCH 41/60] minus 1 sec, so we dont get obsevervations on the next
 day

---
 src/dataplatform/forecast/main.py  | 12 +++---------
 src/dataplatform/forecast/setup.py |  2 +-
 2 files changed, 4 insertions(+), 10 deletions(-)

diff --git a/src/dataplatform/forecast/main.py b/src/dataplatform/forecast/main.py
index 56510e0..592e5d6 100644
--- a/src/dataplatform/forecast/main.py
+++ b/src/dataplatform/forecast/main.py
@@ -162,15 +162,9 @@ async def async_dp_forecast_page():
         st.write("Add creation time / t0 forecast filter")
         st.write("speed up read, use async and more caching")
         st.write("Get page working with no observations data")
-        st.write("Done: Change UK to use MW")
-        st.write("Done: Add GSP id to name: done")
-        st.write("Remove last MAE point")
-        st.write("Done: Reduce to last 7 days")
-        st.write("Done: Options to togle probablies in MAE")
-        st.write("Done: Change y/x to actual and forecast")
-        st.write("Done: Remove duplicate names in legend of daily metrics plot")
-        st.write("Look into shading areas disappering")
-
+        st.write("Check works for diffrent version of observers")
+        st.write("Remove last MAE point: done in solar consumer")
+    
 
 def make_summary_data(merged_df, min_horizon, max_horizon, scale_factor, units):
     # Reduce my horizon mins
diff --git a/src/dataplatform/forecast/setup.py b/src/dataplatform/forecast/setup.py
index cb33681..36d3649 100644
--- a/src/dataplatform/forecast/setup.py
+++ b/src/dataplatform/forecast/setup.py
@@ -57,7 +57,7 @@ async def setup_page(client) -> dict:
     start_date = st.sidebar.date_input("Start date:", datetime.now().date() - timedelta(days=7))
     end_date = st.sidebar.date_input("End date:", datetime.now().date() + timedelta(days=3))
     start_date = datetime.combine(start_date, datetime.min.time()).replace(tzinfo=UTC)
-    end_date = datetime.combine(end_date, datetime.min.time()).replace(tzinfo=UTC)
+    end_date = datetime.combine(end_date, datetime.min.time()).replace(tzinfo=UTC) - timedelta(seconds=1)
 
     # select forecast type
     selected_forecast_type = st.sidebar.selectbox(

From 6ceaad3ce6e00a0c7b83111ad6582827d564acd1 Mon Sep 17 00:00:00 2001
From: Peter Dudfield <peter.dudfield@hotmail.com>
Date: Mon, 24 Nov 2025 16:58:10 +0000
Subject: [PATCH 42/60] tidy

---
 src/dataplatform/forecast/data.py | 3 ---
 src/dataplatform/forecast/main.py | 2 --
 2 files changed, 5 deletions(-)

diff --git a/src/dataplatform/forecast/data.py b/src/dataplatform/forecast/data.py
index e0acb24..6a69070 100644
--- a/src/dataplatform/forecast/data.py
+++ b/src/dataplatform/forecast/data.py
@@ -7,9 +7,6 @@
 from aiocache import Cache, cached
 from dp_sdk.ocf import dp
 
-data_platform_host = os.getenv("DATA_PLATFORM_HOST", "localhost")
-data_platform_port = int(os.getenv("DATA_PLATFORM_PORT", "50051"))
-
 # TODO make this dynamic
 observer_names = ["pvlive_in_day", "pvlive_day_after"]
 
diff --git a/src/dataplatform/forecast/main.py b/src/dataplatform/forecast/main.py
index 592e5d6..6b8efb3 100644
--- a/src/dataplatform/forecast/main.py
+++ b/src/dataplatform/forecast/main.py
@@ -162,8 +162,6 @@ async def async_dp_forecast_page():
         st.write("Add creation time / t0 forecast filter")
         st.write("speed up read, use async and more caching")
         st.write("Get page working with no observations data")
-        st.write("Check works for diffrent version of observers")
-        st.write("Remove last MAE point: done in solar consumer")
     
 
 def make_summary_data(merged_df, min_horizon, max_horizon, scale_factor, units):

From 91f60aabee8c87e66bb452e80c007645b5569d04 Mon Sep 17 00:00:00 2001
From: Peter Dudfield <peter.dudfield@hotmail.com>
Date: Mon, 24 Nov 2025 21:30:51 +0000
Subject: [PATCH 43/60] option for aligning t0s

---
 src/dataplatform/forecast/data.py | 23 +++++++++++++++++++++++
 src/dataplatform/forecast/main.py | 15 ++++++++++-----
 2 files changed, 33 insertions(+), 5 deletions(-)

diff --git a/src/dataplatform/forecast/data.py b/src/dataplatform/forecast/data.py
index 6a69070..455071a 100644
--- a/src/dataplatform/forecast/data.py
+++ b/src/dataplatform/forecast/data.py
@@ -221,3 +221,26 @@ async def get_all_data(client, selected_location, start_date, end_date, selected
         "forecast_seconds": forecast_seconds,
         "observation_seconds": observation_seconds,
     }
+
+
+def align_t0(merged_df: pd.DataFrame) -> pd.DataFrame:
+    """ Align t0 forecasts for different forecasters """
+
+    # get all forecaster names
+    forecaster_names = merged_df["forecaster_name"].unique()
+
+    # align t0 for each forecaster
+    t0s_per_forecaster = {}
+    for forecaster_name in forecaster_names:
+        forecaster_df = merged_df[merged_df["forecaster_name"] == forecaster_name]
+        
+        t0s = forecaster_df["init_timestamp"].unique()
+        t0s_per_forecaster[forecaster_name] = set(t0s)
+
+    # find common t0s
+    common_t0s = set.intersection(*t0s_per_forecaster.values())
+
+    # align common t0s in merged_df
+    merged_df = merged_df[merged_df["init_timestamp"].isin(common_t0s)]
+
+    return merged_df
\ No newline at end of file
diff --git a/src/dataplatform/forecast/main.py b/src/dataplatform/forecast/main.py
index 6b8efb3..9aa7f0d 100644
--- a/src/dataplatform/forecast/main.py
+++ b/src/dataplatform/forecast/main.py
@@ -6,7 +6,7 @@
 from grpclib.client import Channel
 
 from dataplatform.forecast.constanst import metrics
-from dataplatform.forecast.data import get_all_data
+from dataplatform.forecast.data import get_all_data, align_t0
 from dataplatform.forecast.plot import (
     plot_forecast_metric_per_day,
     plot_forecast_metric_vs_horizon_minutes,
@@ -90,7 +90,8 @@ async def async_dp_forecast_page():
         st.plotly_chart(fig)
 
         ### 3. Summary Accuracy Graph. ###
-        st.header("Summary Accuracy Graph")
+        st.header("Summary Accuracy")
+        
 
         st.write(metrics)
         if selected_metric == 'MAE':
@@ -98,6 +99,11 @@ async def async_dp_forecast_page():
         else:
             show_sem = False
 
+        align_t0s = st.checkbox("Align t0s", value=True)
+        if align_t0s:
+            merged_df = align_t0(merged_df)
+
+        st.subheader("Summary Accuracy Graph")
 
         fig2, summary_df = plot_forecast_metric_vs_horizon_minutes(
             merged_df, forecaster_names, selected_metric, scale_factor, units, show_sem
@@ -114,7 +120,7 @@ async def async_dp_forecast_page():
         )
 
         ### 4. Summary Accuracy Table, with slider to select min and max horizon mins. ###
-        st.header("Summary Accuracy Table")
+        st.subheader("Summary Accuracy Table")
 
         # add slider to select min and max horizon mins
         min_horizon, max_horizon = st.slider(
@@ -139,7 +145,7 @@ async def async_dp_forecast_page():
         st.dataframe(summary_table_df)
 
         ### 4. Daily metric plots. ###
-        st.header("Daily Metrics Plots")
+        st.subheader("Daily Metrics Plots")
         st.write(
             "Plotted below are the daily MAE for each forecaster. This is for all forecast horizons.",
         )
@@ -157,7 +163,6 @@ async def async_dp_forecast_page():
         st.header("TODO")
 
         st.write("Bug: cache not releasing")
-        st.write("Align forecasts on t0")
         st.write("Add more metrics")
         st.write("Add creation time / t0 forecast filter")
         st.write("speed up read, use async and more caching")

From 4b30bdb02c32c3a3589ddd731cab9a03899bcff1 Mon Sep 17 00:00:00 2001
From: Peter Dudfield <peter.dudfield@hotmail.com>
Date: Mon, 24 Nov 2025 21:33:30 +0000
Subject: [PATCH 44/60] MAE plot link to 0

---
 src/dataplatform/forecast/main.py | 13 +++++++------
 src/dataplatform/forecast/plot.py |  3 +++
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/src/dataplatform/forecast/main.py b/src/dataplatform/forecast/main.py
index 9aa7f0d..d0c52bf 100644
--- a/src/dataplatform/forecast/main.py
+++ b/src/dataplatform/forecast/main.py
@@ -94,17 +94,18 @@ async def async_dp_forecast_page():
         
 
         st.write(metrics)
-        if selected_metric == 'MAE':
-            show_sem = st.checkbox("Show SEM", value=True)
-        else:
-            show_sem = False
-
-        align_t0s = st.checkbox("Align t0s", value=True)
+    
+        align_t0s = st.checkbox("Align t0s (Only common t0s across all forecaster are used)", value=True)
         if align_t0s:
             merged_df = align_t0(merged_df)
 
         st.subheader("Summary Accuracy Graph")
 
+        if selected_metric == 'MAE':
+            show_sem = st.checkbox("Show SEM", value=True)
+        else:
+            show_sem = False
+
         fig2, summary_df = plot_forecast_metric_vs_horizon_minutes(
             merged_df, forecaster_names, selected_metric, scale_factor, units, show_sem
         )
diff --git a/src/dataplatform/forecast/plot.py b/src/dataplatform/forecast/plot.py
index e2cad8a..25ba7e1 100644
--- a/src/dataplatform/forecast/plot.py
+++ b/src/dataplatform/forecast/plot.py
@@ -199,6 +199,9 @@ def plot_forecast_metric_vs_horizon_minutes(
         legend_title="Forecaster",
     )
 
+    if selected_metric == "MAE":
+        fig2.update_yaxes(range=[0, None])
+
     return fig2, summary_df
 
 

From 9603b9cda2749de9703f092efa2c45302f1fd484 Mon Sep 17 00:00:00 2001
From: Peter Dudfield <peter.dudfield@hotmail.com>
Date: Tue, 25 Nov 2025 09:29:46 +0000
Subject: [PATCH 45/60] try to sort cache issue out

---
 src/dataplatform/forecast/data.py | 5 ++---
 src/dataplatform/forecast/main.py | 4 ++--
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/src/dataplatform/forecast/data.py b/src/dataplatform/forecast/data.py
index 455071a..6e7c3b5 100644
--- a/src/dataplatform/forecast/data.py
+++ b/src/dataplatform/forecast/data.py
@@ -15,9 +15,8 @@ def key_builder_remove_client(func, *args, **kwargs):
     """Custom key builder that ignores the client argument for caching purposes."""
     key = f"{func.__name__}:"
     for arg in args:
-        if isinstance(arg, dp.DataPlatformDataServiceStub):
-            continue
-        key += f"{arg}-"
+        if not isinstance(arg, dp.DataPlatformDataServiceStub):
+            key += f"{arg}-"
 
     for k, v in kwargs.items():
         key += f"{k}={v}-"
diff --git a/src/dataplatform/forecast/main.py b/src/dataplatform/forecast/main.py
index d0c52bf..3ddff18 100644
--- a/src/dataplatform/forecast/main.py
+++ b/src/dataplatform/forecast/main.py
@@ -161,9 +161,9 @@ async def async_dp_forecast_page():
 
         st.plotly_chart(fig3)
 
-        st.header("TODO")
+        st.header("Known Issues and TODOs")
 
-        st.write("Bug: cache not releasing")
+        st.write("Bug: cache not releasing, the cache should stay for 5 minutes")
         st.write("Add more metrics")
         st.write("Add creation time / t0 forecast filter")
         st.write("speed up read, use async and more caching")

From b861b7ddec3d74754fd2c04737810be3f4426e73 Mon Sep 17 00:00:00 2001
From: Peter Dudfield <peter.dudfield@hotmail.com>
Date: Tue, 25 Nov 2025 10:05:59 +0000
Subject: [PATCH 46/60] add select t0s from forecast

---
 src/dataplatform/forecast/data.py  |  1 +
 src/dataplatform/forecast/main.py  | 10 +++-
 src/dataplatform/forecast/plot.py  | 92 ++++++++++++++++++------------
 src/dataplatform/forecast/setup.py | 16 +++++-
 4 files changed, 79 insertions(+), 40 deletions(-)

diff --git a/src/dataplatform/forecast/data.py b/src/dataplatform/forecast/data.py
index 6e7c3b5..984bb89 100644
--- a/src/dataplatform/forecast/data.py
+++ b/src/dataplatform/forecast/data.py
@@ -191,6 +191,7 @@ async def get_all_data(client, selected_location, start_date, end_date, selected
     
 
     # make target_timestamp_utc
+    all_forecast_data_df["init_timestamp"] = pd.to_datetime(all_forecast_data_df["init_timestamp"])
     all_forecast_data_df["target_timestamp_utc"] = pd.to_datetime(
         all_forecast_data_df["init_timestamp"],
     ) + pd.to_timedelta(all_forecast_data_df["horizon_mins"], unit="m")
diff --git a/src/dataplatform/forecast/main.py b/src/dataplatform/forecast/main.py
index 3ddff18..6b047f3 100644
--- a/src/dataplatform/forecast/main.py
+++ b/src/dataplatform/forecast/main.py
@@ -42,6 +42,7 @@ async def async_dp_forecast_page():
         selected_forecast_type = setup_page_dict["selected_forecast_type"]
         scale_factor = setup_page_dict["scale_factor"]
         selected_forecast_horizon = setup_page_dict["selected_forecast_horizon"]
+        selected_t0s = setup_page_dict["selected_t0s"]
         units = setup_page_dict["units"]
 
         ### 1. Get all the data ###
@@ -77,6 +78,8 @@ async def async_dp_forecast_page():
         ### 2. Plot of raw forecast data. ###
         st.header("Time Series Plot")
 
+        show_probabilistic = st.checkbox("Show Probabilistic Forecasts", value=True)
+
         fig = plot_forecast_time_series(
             all_forecast_data_df=all_forecast_data_df,
             all_observations_df=all_observations_df,
@@ -86,11 +89,13 @@ async def async_dp_forecast_page():
             units=units,
             selected_forecast_type=selected_forecast_type,
             selected_forecast_horizon=selected_forecast_horizon,
+            selected_t0s=selected_t0s,
+            show_probabilistic=show_probabilistic
         )
         st.plotly_chart(fig)
 
         ### 3. Summary Accuracy Graph. ###
-        st.header("Summary Accuracy")
+        st.header("Accuracy")
         
 
         st.write(metrics)
@@ -99,7 +104,7 @@ async def async_dp_forecast_page():
         if align_t0s:
             merged_df = align_t0(merged_df)
 
-        st.subheader("Summary Accuracy Graph")
+        st.subheader("Metric vs Forecast Horizon")
 
         if selected_metric == 'MAE':
             show_sem = st.checkbox("Show SEM", value=True)
@@ -165,7 +170,6 @@ async def async_dp_forecast_page():
 
         st.write("Bug: cache not releasing, the cache should stay for 5 minutes")
         st.write("Add more metrics")
-        st.write("Add creation time / t0 forecast filter")
         st.write("speed up read, use async and more caching")
         st.write("Get page working with no observations data")
     
diff --git a/src/dataplatform/forecast/plot.py b/src/dataplatform/forecast/plot.py
index 25ba7e1..99c9d71 100644
--- a/src/dataplatform/forecast/plot.py
+++ b/src/dataplatform/forecast/plot.py
@@ -3,6 +3,44 @@
 from dataplatform.forecast.constanst import colours
 
 
+def make_time_series_trace(fig, forecaster_df, forecaster_name, scale_factor, i, show_probabilistic=True):
+
+    fig.add_trace(
+    go.Scatter(
+        x=forecaster_df["target_timestamp_utc"],
+        y=forecaster_df["p50_watts"] / scale_factor,
+        mode="lines",
+        name=forecaster_name,
+        line=dict(color=colours[i % len(colours)]),
+        legendgroup=forecaster_name,
+    ),
+    )
+    if show_probabilistic and "p10_watts" in forecaster_df.columns and "p90_watts" in forecaster_df.columns:
+        fig.add_trace(
+            go.Scatter(
+                x=forecaster_df["target_timestamp_utc"],
+                y=forecaster_df["p10_watts"] / scale_factor,
+                mode="lines",
+                line=dict(color=colours[i % len(colours)], width=0),
+                legendgroup=forecaster_name,
+                showlegend=False,
+            ),
+        )
+
+        fig.add_trace(
+            go.Scatter(
+                x=forecaster_df["target_timestamp_utc"],
+                y=forecaster_df["p90_watts"] / scale_factor,
+                mode="lines",
+                line=dict(color=colours[i % len(colours)], width=0),
+                legendgroup=forecaster_name,
+                showlegend=False,
+                fill="tonexty",
+            ),
+        )
+
+    return fig
+
 def plot_forecast_time_series(
     all_forecast_data_df,
     all_observations_df,
@@ -12,6 +50,8 @@ def plot_forecast_time_series(
     units,
     selected_forecast_type,
     selected_forecast_horizon,
+    selected_t0s,
+    show_probabilistic=True,
 ):
     if selected_forecast_type == "Current":
         # Choose current forecast
@@ -32,8 +72,10 @@ def plot_forecast_time_series(
                 "horizon_mins"
             ].idxmin()
         ]
-    else:
-        pass
+    elif selected_forecast_type == "t0":
+        current_forecast_df = all_forecast_data_df[
+            all_forecast_data_df["init_timestamp"].isin(selected_t0s)
+        ]
 
     # plot the results
     fig = go.Figure()
@@ -58,43 +100,19 @@ def plot_forecast_time_series(
             ),
         )
 
+
     for i, forecaster_name in enumerate(forecaster_names):
         forecaster_df = current_forecast_df[
-            current_forecast_df["forecaster_name"] == forecaster_name
-        ]
-        fig.add_trace(
-            go.Scatter(
-                x=forecaster_df["target_timestamp_utc"],
-                y=forecaster_df["p50_watts"] / scale_factor,
-                mode="lines",
-                name=forecaster_name,
-                line=dict(color=colours[i % len(colours)]),
-                legendgroup=forecaster_name,
-            ),
-        )
-        if "p10_watts" in forecaster_df.columns and "p90_watts" in forecaster_df.columns:
-            fig.add_trace(
-                go.Scatter(
-                    x=forecaster_df["target_timestamp_utc"],
-                    y=forecaster_df["p10_watts"] / scale_factor,
-                    mode="lines",
-                    line=dict(color=colours[i % len(colours)], width=0),
-                    legendgroup=forecaster_name,
-                    showlegend=False,
-                ),
-            )
+                current_forecast_df["forecaster_name"] == forecaster_name
+            ]
+        if selected_forecast_type in ["Current", "Horizon"]:
 
-            fig.add_trace(
-                go.Scatter(
-                    x=forecaster_df["target_timestamp_utc"],
-                    y=forecaster_df["p90_watts"] / scale_factor,
-                    mode="lines",
-                    line=dict(color=colours[i % len(colours)], width=0),
-                    legendgroup=forecaster_name,
-                    showlegend=False,
-                    fill="tonexty",
-                ),
-            )
+            fig = make_time_series_trace(fig, forecaster_df, forecaster_name, scale_factor, i, show_probabilistic)
+        elif selected_forecast_type == "t0":
+            for _, t0 in enumerate(selected_t0s):
+                forecaster_with_t0_df = forecaster_df[forecaster_df["init_timestamp"] == t0]
+                forecaster_name_wth_t0 = f"{forecaster_name} | t0: {t0}"
+                fig = make_time_series_trace(fig, forecaster_with_t0_df, forecaster_name_wth_t0, scale_factor, i, show_probabilistic)
 
     fig.update_layout(
         title="Current Forecast",
@@ -246,3 +264,5 @@ def plot_forecast_metric_per_day(
     )
 
     return fig3
+
+
diff --git a/src/dataplatform/forecast/setup.py b/src/dataplatform/forecast/setup.py
index 36d3649..b821d85 100644
--- a/src/dataplatform/forecast/setup.py
+++ b/src/dataplatform/forecast/setup.py
@@ -1,5 +1,6 @@
 from datetime import UTC, datetime, timedelta
 
+import pandas as pd
 import streamlit as st
 from dp_sdk.ocf import dp
 
@@ -67,12 +68,24 @@ async def setup_page(client) -> dict:
     )
 
     selected_forecast_horizon = None
+    selected_t0s = None
     if selected_forecast_type == "Horizon":
         selected_forecast_horizon = st.sidebar.selectbox(
             "Select a Forecast Horizon",
-            list(range(0, 2400, 30)),
+            list(range(0, 24*60, 30)),
             index=3,
         )
+    if selected_forecast_type == "t0":
+
+        # make datetimes every 30 minutes from start_date to end_date
+        all_t0s = pd.date_range(start=start_date, end=end_date, freq='30T').to_pydatetime().tolist()
+
+        
+        selected_t0s = st.sidebar.multiselect(
+            "Select t0s",
+            all_t0s,
+            default=all_t0s[:5],
+        )
 
     # select units
     default_unit_index = 2  # MW
@@ -92,5 +105,6 @@ async def setup_page(client) -> dict:
         "selected_metric": selected_metric,
         "forecaster_names": forecaster_names,
         "selected_forecast_horizon": selected_forecast_horizon,
+        "selected_t0s": selected_t0s,
         "units": units,
     }

From 839cd21e84538ff3fa8af69ab1b13f1f6ae54f28 Mon Sep 17 00:00:00 2001
From: Peter Dudfield <peter.dudfield@hotmail.com>
Date: Tue, 25 Nov 2025 10:22:40 +0000
Subject: [PATCH 47/60] tidy

---
 src/dataplatform/forecast/main.py  | 1 +
 src/dataplatform/forecast/setup.py | 5 ++---
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/dataplatform/forecast/main.py b/src/dataplatform/forecast/main.py
index 6b047f3..67eddbf 100644
--- a/src/dataplatform/forecast/main.py
+++ b/src/dataplatform/forecast/main.py
@@ -172,6 +172,7 @@ async def async_dp_forecast_page():
         st.write("Add more metrics")
         st.write("speed up read, use async and more caching")
         st.write("Get page working with no observations data")
+        st.write("MAE vs horizon plot should start at 0")
     
 
 def make_summary_data(merged_df, min_horizon, max_horizon, scale_factor, units):
diff --git a/src/dataplatform/forecast/setup.py b/src/dataplatform/forecast/setup.py
index b821d85..c40697c 100644
--- a/src/dataplatform/forecast/setup.py
+++ b/src/dataplatform/forecast/setup.py
@@ -78,13 +78,12 @@ async def setup_page(client) -> dict:
     if selected_forecast_type == "t0":
 
         # make datetimes every 30 minutes from start_date to end_date
-        all_t0s = pd.date_range(start=start_date, end=end_date, freq='30T').to_pydatetime().tolist()
+        all_t0s = pd.date_range(start=start_date, end=end_date, freq='30min').to_pydatetime().tolist()
 
-        
         selected_t0s = st.sidebar.multiselect(
             "Select t0s",
             all_t0s,
-            default=all_t0s[:5],
+            default=all_t0s[:min(5, len(all_t0s))],
         )
 
     # select units

From 86c6f8c1ee192e9e21934cef1258f392bd1f4ab4 Mon Sep 17 00:00:00 2001
From: Peter Dudfield <peter.dudfield@hotmail.com>
Date: Wed, 26 Nov 2025 08:37:42 +0000
Subject: [PATCH 48/60] add todo

---
 src/dataplatform/forecast/main.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/dataplatform/forecast/main.py b/src/dataplatform/forecast/main.py
index 67eddbf..acbada9 100644
--- a/src/dataplatform/forecast/main.py
+++ b/src/dataplatform/forecast/main.py
@@ -164,12 +164,14 @@ async def async_dp_forecast_page():
             selected_metric=selected_metric
         )
 
+
         st.plotly_chart(fig3)
 
         st.header("Known Issues and TODOs")
 
         st.write("Bug: cache not releasing, the cache should stay for 5 minutes")
         st.write("Add more metrics")
+        st.write("Group adjust and non-adjust")
         st.write("speed up read, use async and more caching")
         st.write("Get page working with no observations data")
         st.write("MAE vs horizon plot should start at 0")

From 44b08bac260dcd7f737305993123365dfe9dc7de Mon Sep 17 00:00:00 2001
From: Peter Dudfield <peter.dudfield@hotmail.com>
Date: Wed, 26 Nov 2025 13:32:05 +0000
Subject: [PATCH 49/60] cache more functions

---
 src/dataplatform/forecast/cache.py | 16 ++++++++
 src/dataplatform/forecast/data.py  | 26 +++---------
 src/dataplatform/forecast/main.py  | 25 ++++++-----
 src/dataplatform/forecast/plot.py  | 62 ++++++++++++++++++----------
 src/dataplatform/forecast/setup.py | 66 ++++++++++++++++++++----------
 5 files changed, 123 insertions(+), 72 deletions(-)
 create mode 100644 src/dataplatform/forecast/cache.py

diff --git a/src/dataplatform/forecast/cache.py b/src/dataplatform/forecast/cache.py
new file mode 100644
index 0000000..5f804ec
--- /dev/null
+++ b/src/dataplatform/forecast/cache.py
@@ -0,0 +1,16 @@
+from dp_sdk.ocf import dp
+
+
+def key_builder_remove_client(func, *args, **kwargs):
+    """Custom key builder that ignores the client argument for caching purposes."""
+    key = f"{func.__name__}:"
+    for arg in args:
+        if not isinstance(arg, dp.DataPlatformDataServiceStub):
+            key += f"{arg}-"
+
+    for k, v in kwargs.items():
+        key += f"{k}={v}-"
+
+    print(key)
+
+    return key
diff --git a/src/dataplatform/forecast/data.py b/src/dataplatform/forecast/data.py
index 984bb89..e214d30 100644
--- a/src/dataplatform/forecast/data.py
+++ b/src/dataplatform/forecast/data.py
@@ -1,4 +1,3 @@
-import os
 import time
 from datetime import timedelta
 
@@ -7,23 +6,12 @@
 from aiocache import Cache, cached
 from dp_sdk.ocf import dp
 
+from dataplatform.forecast.cache import key_builder_remove_client
+
 # TODO make this dynamic
 observer_names = ["pvlive_in_day", "pvlive_day_after"]
 
 
-def key_builder_remove_client(func, *args, **kwargs):
-    """Custom key builder that ignores the client argument for caching purposes."""
-    key = f"{func.__name__}:"
-    for arg in args:
-        if not isinstance(arg, dp.DataPlatformDataServiceStub):
-            key += f"{arg}-"
-
-    for k, v in kwargs.items():
-        key += f"{k}={v}-"
-
-    return key
-
-
 async def get_forecast_data(
     client,
     location,
@@ -156,7 +144,7 @@ async def get_all_observations(client, location, start_date, end_date) -> pd.Dat
         float,
     ) * all_observations_df["effective_capacity_watts"].astype(float)
     all_observations_df["timestamp_utc"] = pd.to_datetime(all_observations_df["timestamp_utc"])
-    
+
     return all_observations_df
 
 
@@ -188,7 +176,6 @@ async def get_all_data(client, selected_location, start_date, end_date, selected
         one_observations_df = all_observations_df[
             all_observations_df["observer_name"] == "pvlive_day_after"
         ]
-    
 
     # make target_timestamp_utc
     all_forecast_data_df["init_timestamp"] = pd.to_datetime(all_forecast_data_df["init_timestamp"])
@@ -224,8 +211,7 @@ async def get_all_data(client, selected_location, start_date, end_date, selected
 
 
 def align_t0(merged_df: pd.DataFrame) -> pd.DataFrame:
-    """ Align t0 forecasts for different forecasters """
-
+    """Align t0 forecasts for different forecasters"""
     # get all forecaster names
     forecaster_names = merged_df["forecaster_name"].unique()
 
@@ -233,7 +219,7 @@ def align_t0(merged_df: pd.DataFrame) -> pd.DataFrame:
     t0s_per_forecaster = {}
     for forecaster_name in forecaster_names:
         forecaster_df = merged_df[merged_df["forecaster_name"] == forecaster_name]
-        
+
         t0s = forecaster_df["init_timestamp"].unique()
         t0s_per_forecaster[forecaster_name] = set(t0s)
 
@@ -243,4 +229,4 @@ def align_t0(merged_df: pd.DataFrame) -> pd.DataFrame:
     # align common t0s in merged_df
     merged_df = merged_df[merged_df["init_timestamp"].isin(common_t0s)]
 
-    return merged_df
\ No newline at end of file
+    return merged_df
diff --git a/src/dataplatform/forecast/main.py b/src/dataplatform/forecast/main.py
index acbada9..fde4056 100644
--- a/src/dataplatform/forecast/main.py
+++ b/src/dataplatform/forecast/main.py
@@ -6,7 +6,7 @@
 from grpclib.client import Channel
 
 from dataplatform.forecast.constanst import metrics
-from dataplatform.forecast.data import get_all_data, align_t0
+from dataplatform.forecast.data import align_t0, get_all_data
 from dataplatform.forecast.plot import (
     plot_forecast_metric_per_day,
     plot_forecast_metric_vs_horizon_minutes,
@@ -90,29 +90,35 @@ async def async_dp_forecast_page():
             selected_forecast_type=selected_forecast_type,
             selected_forecast_horizon=selected_forecast_horizon,
             selected_t0s=selected_t0s,
-            show_probabilistic=show_probabilistic
+            show_probabilistic=show_probabilistic,
         )
         st.plotly_chart(fig)
 
         ### 3. Summary Accuracy Graph. ###
         st.header("Accuracy")
-        
 
         st.write(metrics)
-    
-        align_t0s = st.checkbox("Align t0s (Only common t0s across all forecaster are used)", value=True)
+
+        align_t0s = st.checkbox(
+            "Align t0s (Only common t0s across all forecaster are used)", value=True,
+        )
         if align_t0s:
             merged_df = align_t0(merged_df)
 
         st.subheader("Metric vs Forecast Horizon")
 
-        if selected_metric == 'MAE':
+        if selected_metric == "MAE":
             show_sem = st.checkbox("Show SEM", value=True)
         else:
             show_sem = False
 
         fig2, summary_df = plot_forecast_metric_vs_horizon_minutes(
-            merged_df, forecaster_names, selected_metric, scale_factor, units, show_sem
+            merged_df,
+            forecaster_names,
+            selected_metric,
+            scale_factor,
+            units,
+            show_sem,
         )
 
         st.plotly_chart(fig2)
@@ -161,10 +167,9 @@ async def async_dp_forecast_page():
             forecaster_names=forecaster_names,
             scale_factor=scale_factor,
             units=units,
-            selected_metric=selected_metric
+            selected_metric=selected_metric,
         )
 
-
         st.plotly_chart(fig3)
 
         st.header("Known Issues and TODOs")
@@ -175,7 +180,7 @@ async def async_dp_forecast_page():
         st.write("speed up read, use async and more caching")
         st.write("Get page working with no observations data")
         st.write("MAE vs horizon plot should start at 0")
-    
+
 
 def make_summary_data(merged_df, min_horizon, max_horizon, scale_factor, units):
     # Reduce my horizon mins
diff --git a/src/dataplatform/forecast/plot.py b/src/dataplatform/forecast/plot.py
index 99c9d71..dc16685 100644
--- a/src/dataplatform/forecast/plot.py
+++ b/src/dataplatform/forecast/plot.py
@@ -3,19 +3,24 @@
 from dataplatform.forecast.constanst import colours
 
 
-def make_time_series_trace(fig, forecaster_df, forecaster_name, scale_factor, i, show_probabilistic=True):
-
+def make_time_series_trace(
+    fig, forecaster_df, forecaster_name, scale_factor, i, show_probabilistic=True,
+):
     fig.add_trace(
-    go.Scatter(
-        x=forecaster_df["target_timestamp_utc"],
-        y=forecaster_df["p50_watts"] / scale_factor,
-        mode="lines",
-        name=forecaster_name,
-        line=dict(color=colours[i % len(colours)]),
-        legendgroup=forecaster_name,
-    ),
+        go.Scatter(
+            x=forecaster_df["target_timestamp_utc"],
+            y=forecaster_df["p50_watts"] / scale_factor,
+            mode="lines",
+            name=forecaster_name,
+            line=dict(color=colours[i % len(colours)]),
+            legendgroup=forecaster_name,
+        ),
     )
-    if show_probabilistic and "p10_watts" in forecaster_df.columns and "p90_watts" in forecaster_df.columns:
+    if (
+        show_probabilistic
+        and "p10_watts" in forecaster_df.columns
+        and "p90_watts" in forecaster_df.columns
+    ):
         fig.add_trace(
             go.Scatter(
                 x=forecaster_df["target_timestamp_utc"],
@@ -41,6 +46,7 @@ def make_time_series_trace(fig, forecaster_df, forecaster_name, scale_factor, i,
 
     return fig
 
+
 def plot_forecast_time_series(
     all_forecast_data_df,
     all_observations_df,
@@ -100,19 +106,26 @@ def plot_forecast_time_series(
             ),
         )
 
-
     for i, forecaster_name in enumerate(forecaster_names):
         forecaster_df = current_forecast_df[
-                current_forecast_df["forecaster_name"] == forecaster_name
-            ]
+            current_forecast_df["forecaster_name"] == forecaster_name
+        ]
         if selected_forecast_type in ["Current", "Horizon"]:
-
-            fig = make_time_series_trace(fig, forecaster_df, forecaster_name, scale_factor, i, show_probabilistic)
+            fig = make_time_series_trace(
+                fig, forecaster_df, forecaster_name, scale_factor, i, show_probabilistic,
+            )
         elif selected_forecast_type == "t0":
             for _, t0 in enumerate(selected_t0s):
                 forecaster_with_t0_df = forecaster_df[forecaster_df["init_timestamp"] == t0]
                 forecaster_name_wth_t0 = f"{forecaster_name} | t0: {t0}"
-                fig = make_time_series_trace(fig, forecaster_with_t0_df, forecaster_name_wth_t0, scale_factor, i, show_probabilistic)
+                fig = make_time_series_trace(
+                    fig,
+                    forecaster_with_t0_df,
+                    forecaster_name_wth_t0,
+                    scale_factor,
+                    i,
+                    show_probabilistic,
+                )
 
     fig.update_layout(
         title="Current Forecast",
@@ -125,7 +138,12 @@ def plot_forecast_time_series(
 
 
 def plot_forecast_metric_vs_horizon_minutes(
-    merged_df, forecaster_names, selected_metric, scale_factor, units, show_sem
+    merged_df,
+    forecaster_names,
+    selected_metric,
+    scale_factor,
+    units,
+    show_sem,
 ):
     # Get the mean observed generation
     mean_observed_generation = merged_df["value_watts"].mean()
@@ -224,7 +242,11 @@ def plot_forecast_metric_vs_horizon_minutes(
 
 
 def plot_forecast_metric_per_day(
-    merged_df, forecaster_names, selected_metric, scale_factor, units
+    merged_df,
+    forecaster_names,
+    selected_metric,
+    scale_factor,
+    units,
 ):
     daily_plots_df = merged_df
     daily_plots_df["date_utc"] = daily_plots_df["timestamp_utc"].dt.date
@@ -264,5 +286,3 @@ def plot_forecast_metric_per_day(
     )
 
     return fig3
-
-
diff --git a/src/dataplatform/forecast/setup.py b/src/dataplatform/forecast/setup.py
index c40697c..fbf5c51 100644
--- a/src/dataplatform/forecast/setup.py
+++ b/src/dataplatform/forecast/setup.py
@@ -2,11 +2,42 @@
 
 import pandas as pd
 import streamlit as st
+from aiocache import Cache, cached
 from dp_sdk.ocf import dp
 
+from dataplatform.forecast.cache import key_builder_remove_client
 from dataplatform.forecast.constanst import metrics
 
 
+@cached(ttl=300, cache=Cache.MEMORY, key_builder=key_builder_remove_client)
+async def get_location_names(client, location_type) -> dict:
+    # List Location
+    list_locations_request = dp.ListLocationsRequest(location_type_filter=location_type)
+    list_locations_response = await client.list_locations(list_locations_request)
+    all_locations = list_locations_response.locations
+
+    location_names = {loc.location_name: loc for loc in all_locations}
+    if location_type == dp.LocationType.GSP:
+        location_names = {
+            f"{int(loc.metadata.fields['gsp_id'].number_value)}:{loc.location_name}": loc
+            for loc in all_locations
+        }
+        # sort by gsp id
+        location_names = dict(
+            sorted(location_names.items(), key=lambda item: int(item[0].split(":")[0])),
+        )
+
+    return location_names
+
+
+@cached(ttl=300, cache=Cache.MEMORY, key_builder=key_builder_remove_client)
+async def get_forecasters(client):
+    get_forecasters_request = dp.ListForecastersRequest()
+    get_forecasters_response = await client.list_forecasters(get_forecasters_request)
+    forecasters = get_forecasters_response.forecasters
+    return forecasters
+
+
 async def setup_page(client) -> dict:
     # Select Country
     country = st.sidebar.selectbox("TODO Select a Country", ["UK", "NL"], index=0)
@@ -19,25 +50,15 @@ async def setup_page(client) -> dict:
     ]
     location_type = st.sidebar.selectbox("Select a Location Type", location_types, index=0)
 
-    # List Location
-    list_locations_request = dp.ListLocationsRequest(location_type_filter=location_type)
-    list_locations_response = await client.list_locations(list_locations_request)
-    all_locations = list_locations_response.locations
-    
-    location_names = {loc.location_name:loc for loc in all_locations}
-    if location_type == dp.LocationType.GSP:
-        location_names = {f'{int(loc.metadata.fields['gsp_id'].number_value)}:{loc.location_name}': loc for loc in all_locations}
-        # sort by gsp id
-        location_names = dict(sorted(location_names.items(), key=lambda item: int(item[0].split(":")[0])))
-
-    # slect locations
-    selected_location_name = st.sidebar.selectbox("Select a Location", location_names.keys(), index=0)
+    # select locations
+    location_names = await get_location_names(client, location_type)
+    selected_location_name = st.sidebar.selectbox(
+        "Select a Location", location_names.keys(), index=0,
+    )
     selected_location = location_names[selected_location_name]
 
     # get models
-    get_forecasters_request = dp.ListForecastersRequest()
-    get_forecasters_response = await client.list_forecasters(get_forecasters_request)
-    forecasters = get_forecasters_response.forecasters
+    forecasters = await get_forecasters(client)
     forecaster_names = sorted(list(set([forecaster.forecaster_name for forecaster in forecasters])))
     if "pvnet_v2" in forecaster_names:
         default_index = forecaster_names.index("pvnet_v2")
@@ -58,7 +79,9 @@ async def setup_page(client) -> dict:
     start_date = st.sidebar.date_input("Start date:", datetime.now().date() - timedelta(days=7))
     end_date = st.sidebar.date_input("End date:", datetime.now().date() + timedelta(days=3))
     start_date = datetime.combine(start_date, datetime.min.time()).replace(tzinfo=UTC)
-    end_date = datetime.combine(end_date, datetime.min.time()).replace(tzinfo=UTC) - timedelta(seconds=1)
+    end_date = datetime.combine(end_date, datetime.min.time()).replace(tzinfo=UTC) - timedelta(
+        seconds=1,
+    )
 
     # select forecast type
     selected_forecast_type = st.sidebar.selectbox(
@@ -72,18 +95,19 @@ async def setup_page(client) -> dict:
     if selected_forecast_type == "Horizon":
         selected_forecast_horizon = st.sidebar.selectbox(
             "Select a Forecast Horizon",
-            list(range(0, 24*60, 30)),
+            list(range(0, 24 * 60, 30)),
             index=3,
         )
     if selected_forecast_type == "t0":
-
         # make datetimes every 30 minutes from start_date to end_date
-        all_t0s = pd.date_range(start=start_date, end=end_date, freq='30min').to_pydatetime().tolist()
+        all_t0s = (
+            pd.date_range(start=start_date, end=end_date, freq="30min").to_pydatetime().tolist()
+        )
 
         selected_t0s = st.sidebar.multiselect(
             "Select t0s",
             all_t0s,
-            default=all_t0s[:min(5, len(all_t0s))],
+            default=all_t0s[: min(5, len(all_t0s))],
         )
 
     # select units

From ab92c25f92670b8a4776fba51986ebc6991a0262 Mon Sep 17 00:00:00 2001
From: Peter Dudfield <peter.dudfield@hotmail.com>
Date: Wed, 26 Nov 2025 13:49:52 +0000
Subject: [PATCH 50/60] ruff

---
 src/dataplatform/forecast/cache.py            |   6 +-
 .../forecast/{constanst.py => constant.py}    |   5 +-
 src/dataplatform/forecast/data.py             |  48 ++++++---
 src/dataplatform/forecast/main.py             |  39 ++++---
 src/dataplatform/forecast/plot.py             | 101 +++++++++++-------
 src/dataplatform/forecast/setup.py            |  36 ++++---
 6 files changed, 150 insertions(+), 85 deletions(-)
 rename src/dataplatform/forecast/{constanst.py => constant.py} (52%)

diff --git a/src/dataplatform/forecast/cache.py b/src/dataplatform/forecast/cache.py
index 5f804ec..71ee679 100644
--- a/src/dataplatform/forecast/cache.py
+++ b/src/dataplatform/forecast/cache.py
@@ -1,7 +1,9 @@
+"""Cache utilities for the forecast module."""
+
 from dp_sdk.ocf import dp
 
 
-def key_builder_remove_client(func, *args, **kwargs):
+def key_builder_remove_client(func: callable, *args: list, **kwargs: dict) -> str:
     """Custom key builder that ignores the client argument for caching purposes."""
     key = f"{func.__name__}:"
     for arg in args:
@@ -11,6 +13,4 @@ def key_builder_remove_client(func, *args, **kwargs):
     for k, v in kwargs.items():
         key += f"{k}={v}-"
 
-    print(key)
-
     return key
diff --git a/src/dataplatform/forecast/constanst.py b/src/dataplatform/forecast/constant.py
similarity index 52%
rename from src/dataplatform/forecast/constanst.py
rename to src/dataplatform/forecast/constant.py
index 9fd0aa2..bdf7e21 100644
--- a/src/dataplatform/forecast/constanst.py
+++ b/src/dataplatform/forecast/constant.py
@@ -1,3 +1,5 @@
+"""Constants for the forecast module."""
+
 colours = [
     "#FFD480",
     "#FF8F73",
@@ -14,7 +16,4 @@
 metrics = {
     "MAE": "MAE is absolute mean error, average(abs(forecast-actual))",
     "ME": "ME is mean (bias) error, average((forecast-actual))",
-    # "TODO NMAE (by capacity)": " NMAE (by capacity), average(abs(y-x)) / mean(capacity)",
-    # "TODO NMAE (by mean observed generation)": " NMAE (by mean observed generation), average(abs(y-x)) / mean(y)",
-    #    "NMAE (by observed generation)":" NAME (by observed generation)"
 }
diff --git a/src/dataplatform/forecast/data.py b/src/dataplatform/forecast/data.py
index e214d30..335a65a 100644
--- a/src/dataplatform/forecast/data.py
+++ b/src/dataplatform/forecast/data.py
@@ -1,5 +1,7 @@
+"""Functions to get forecast and observation data from Data Platform."""
+
 import time
-from datetime import timedelta
+from datetime import datetime, timedelta
 
 import betterproto
 import pandas as pd
@@ -13,12 +15,13 @@
 
 
 async def get_forecast_data(
-    client,
-    location,
-    start_date,
-    end_date,
-    selected_forecasters,
+    client: dp.DataPlatformDataServiceStub,
+    location: dp.ListLocationsResponseLocationSummary,
+    start_date: datetime,
+    end_date: datetime,
+    selected_forecasters: list[dp.Forecaster],
 ) -> pd.DataFrame:
+    """Get forecast data for the given location and time window."""
     all_data_df = []
 
     for forecaster in selected_forecasters:
@@ -49,12 +52,13 @@ async def get_forecast_data(
 
 @cached(ttl=300, cache=Cache.MEMORY, key_builder=key_builder_remove_client)
 async def get_forecast_data_one_forecaster(
-    client,
-    location,
-    start_date,
-    end_date,
-    selected_forecaster,
+    client: dp,
+    location: dp.ListLocationsResponseLocationSummary,
+    start_date: datetime,
+    end_date: datetime,
+    selected_forecaster: dp.Forecaster,
 ) -> pd.DataFrame:
+    """Get forecast data for one forecaster for the given location and time window."""
     all_data_df = []
 
     # loop over 30 days of data
@@ -100,7 +104,13 @@ async def get_forecast_data_one_forecaster(
 
 
 @cached(ttl=300, cache=Cache.MEMORY, key_builder=key_builder_remove_client)
-async def get_all_observations(client, location, start_date, end_date) -> pd.DataFrame:
+async def get_all_observations(
+    client: dp.DataPlatformDataServiceStub,
+    location: dp.ListLocationsResponseLocationSummary,
+    start_date: datetime,
+    end_date: datetime,
+) -> pd.DataFrame:
+    """Get all observations for the given location and time window."""
     all_observations_df = []
 
     for observer_name in observer_names:
@@ -148,7 +158,14 @@ async def get_all_observations(client, location, start_date, end_date) -> pd.Dat
     return all_observations_df
 
 
-async def get_all_data(client, selected_location, start_date, end_date, selected_forecasters):
+async def get_all_data(
+    client: dp.DataPlatformDataServiceStub,
+    selected_location: dp.ListLocationsResponseLocationSummary,
+    start_date: datetime,
+    end_date: datetime,
+    selected_forecasters: list[dp.Forecaster],
+) -> dict:
+    """Get all forecast and observation data, and merge them."""
     # get generation data
     time_start = time.time()
     all_observations_df = await get_all_observations(
@@ -170,7 +187,8 @@ async def get_all_data(client, selected_location, start_date, end_date, selected
     )
     forecast_seconds = time.time() - time_start
 
-    # If the observation data includes pvlive_day_after and pvlive_in_day, then lets just take pvlive_day_after
+    # If the observation data includes pvlive_day_after and pvlive_in_day,
+    # then lets just take pvlive_day_after
     one_observations_df = all_observations_df.copy()
     if "pvlive_day_after" in all_observations_df["observer_name"].values:
         one_observations_df = all_observations_df[
@@ -211,7 +229,7 @@ async def get_all_data(client, selected_location, start_date, end_date, selected
 
 
 def align_t0(merged_df: pd.DataFrame) -> pd.DataFrame:
-    """Align t0 forecasts for different forecasters"""
+    """Align t0 forecasts for different forecasters."""
     # get all forecaster names
     forecaster_names = merged_df["forecaster_name"].unique()
 
diff --git a/src/dataplatform/forecast/main.py b/src/dataplatform/forecast/main.py
index fde4056..09e6c64 100644
--- a/src/dataplatform/forecast/main.py
+++ b/src/dataplatform/forecast/main.py
@@ -1,11 +1,14 @@
+"""Data Platform Forecast Streamlit Page Main Code."""
+
 import asyncio
 import os
 
+import pandas as pd
 import streamlit as st
 from dp_sdk.ocf import dp
 from grpclib.client import Channel
 
-from dataplatform.forecast.constanst import metrics
+from dataplatform.forecast.constant import metrics
 from dataplatform.forecast.data import align_t0, get_all_data
 from dataplatform.forecast.plot import (
     plot_forecast_metric_per_day,
@@ -21,11 +24,13 @@
 observer_names = ["pvlive_in_day", "pvlive_day_after"]
 
 
-def dp_forecast_page():
+def dp_forecast_page() -> None:
+    """Wrapper function that is not async to call the main async function."""
     asyncio.run(async_dp_forecast_page())
 
 
-async def async_dp_forecast_page():
+async def async_dp_forecast_page() -> None:
+    """Async Main function for the Data Platform Forecast Streamlit page."""
     st.title("Data Platform Forecast Page")
     st.write("This is the forecast page from the Data Platform module. This is very much a WIP")
 
@@ -61,8 +66,10 @@ async def async_dp_forecast_page():
 
         st.write(f"Selected Location uuid: `{selected_location.location_uuid}`.")
         st.write(
-            f"Fetched `{len(all_forecast_data_df)}` rows of forecast data in `{forecast_seconds:.2f}` seconds. \
-            Fetched `{len(all_observations_df)}` rows of observation data in `{observation_seconds:.2f}` seconds. \
+            f"Fetched `{len(all_forecast_data_df)}` rows of forecast data \
+            in `{forecast_seconds:.2f}` seconds. \
+            Fetched `{len(all_observations_df)}` rows of observation data \
+            in `{observation_seconds:.2f}` seconds. \
             We cache data for 5 minutses to speed up repeated requests.",
         )
 
@@ -100,17 +107,15 @@ async def async_dp_forecast_page():
         st.write(metrics)
 
         align_t0s = st.checkbox(
-            "Align t0s (Only common t0s across all forecaster are used)", value=True,
+            "Align t0s (Only common t0s across all forecaster are used)",
+            value=True,
         )
         if align_t0s:
             merged_df = align_t0(merged_df)
 
         st.subheader("Metric vs Forecast Horizon")
 
-        if selected_metric == "MAE":
-            show_sem = st.checkbox("Show SEM", value=True)
-        else:
-            show_sem = False
+        show_sem = st.checkbox("Show SEM", value=True) if selected_metric == "MAE" else False
 
         fig2, summary_df = plot_forecast_metric_vs_horizon_minutes(
             merged_df,
@@ -159,7 +164,8 @@ async def async_dp_forecast_page():
         ### 4. Daily metric plots. ###
         st.subheader("Daily Metrics Plots")
         st.write(
-            "Plotted below are the daily MAE for each forecaster. This is for all forecast horizons.",
+            "Plotted below are the daily MAE for each forecaster. "
+            "This is for all forecast horizons.",
         )
 
         fig3 = plot_forecast_metric_per_day(
@@ -182,7 +188,14 @@ async def async_dp_forecast_page():
         st.write("MAE vs horizon plot should start at 0")
 
 
-def make_summary_data(merged_df, min_horizon, max_horizon, scale_factor, units):
+def make_summary_data(
+    merged_df: pd.DataFrame,
+    min_horizon: int,
+    max_horizon: int,
+    scale_factor: float,
+    units: str,
+) -> pd.DataFrame:
+    """Make summary data table for given min and max horizon mins."""
     # Reduce my horizon mins
     summary_table_df = merged_df[
         (merged_df["horizon_mins"] >= min_horizon) & (merged_df["horizon_mins"] <= max_horizon)
@@ -203,7 +216,7 @@ def make_summary_data(merged_df, min_horizon, max_horizon, scale_factor, units):
         "Capacity_watts",
     ]
 
-    summary_table_df = summary_table_df[["forecaster_name"] + value_columns]
+    summary_table_df = summary_table_df[["forecaster_name", *value_columns]]
 
     summary_table_df["Capacity_watts"] = summary_table_df["Capacity_watts"].astype(float)
 
diff --git a/src/dataplatform/forecast/plot.py b/src/dataplatform/forecast/plot.py
index dc16685..92f3d9b 100644
--- a/src/dataplatform/forecast/plot.py
+++ b/src/dataplatform/forecast/plot.py
@@ -1,18 +1,32 @@
+"""Plotting functions for forecast analysis."""
+
+from datetime import datetime
+
+import pandas as pd
 import plotly.graph_objects as go
 
-from dataplatform.forecast.constanst import colours
+from dataplatform.forecast.constant import colours
 
 
 def make_time_series_trace(
-    fig, forecaster_df, forecaster_name, scale_factor, i, show_probabilistic=True,
-):
+    fig: go.Figure,
+    forecaster_df: pd.DataFrame,
+    forecaster_name: str,
+    scale_factor: float,
+    i: int,
+    show_probabilistic: bool = True,
+) -> go.Figure:
+    """Make time series trace for a forecaster.
+
+    Include p10 and p90 shading if show_probabilistic is True.
+    """
     fig.add_trace(
         go.Scatter(
             x=forecaster_df["target_timestamp_utc"],
             y=forecaster_df["p50_watts"] / scale_factor,
             mode="lines",
             name=forecaster_name,
-            line=dict(color=colours[i % len(colours)]),
+            line={"color": colours[i % len(colours)]},
             legendgroup=forecaster_name,
         ),
     )
@@ -26,7 +40,7 @@ def make_time_series_trace(
                 x=forecaster_df["target_timestamp_utc"],
                 y=forecaster_df["p10_watts"] / scale_factor,
                 mode="lines",
-                line=dict(color=colours[i % len(colours)], width=0),
+                line={"color": colours[i % len(colours)], "width": 0},
                 legendgroup=forecaster_name,
                 showlegend=False,
             ),
@@ -37,7 +51,7 @@ def make_time_series_trace(
                 x=forecaster_df["target_timestamp_utc"],
                 y=forecaster_df["p90_watts"] / scale_factor,
                 mode="lines",
-                line=dict(color=colours[i % len(colours)], width=0),
+                line={"color": colours[i % len(colours)], "width": 0},
                 legendgroup=forecaster_name,
                 showlegend=False,
                 fill="tonexty",
@@ -48,17 +62,21 @@ def make_time_series_trace(
 
 
 def plot_forecast_time_series(
-    all_forecast_data_df,
-    all_observations_df,
-    forecaster_names,
-    observer_names,
-    scale_factor,
-    units,
-    selected_forecast_type,
-    selected_forecast_horizon,
-    selected_t0s,
-    show_probabilistic=True,
-):
+    all_forecast_data_df: pd.DataFrame,
+    all_observations_df: pd.DataFrame,
+    forecaster_names: list[str],
+    observer_names: list[str],
+    scale_factor: float,
+    units: str,
+    selected_forecast_type: str,
+    selected_forecast_horizon: int,
+    selected_t0s: list[datetime],
+    show_probabilistic: bool = True,
+) -> go.Figure:
+    """Plot forecast time series.
+
+    This make a plot of the raw forecasts and observations, for mulitple forecast.
+    """
     if selected_forecast_type == "Current":
         # Choose current forecast
         # this is done by selecting the unique target_timestamp_utc with the the lowest horizonMins
@@ -90,11 +108,11 @@ def plot_forecast_time_series(
 
         if observer_name == "pvlive_in_day":
             # dashed white line
-            line = dict(color="white", dash="dash")
+            line = {"color": "white", "dash": "dash"}
         elif observer_name == "pvlive_day_after":
-            line = dict(color="white")
+            line = {"color": "white"}
         else:
-            line = dict()
+            line = {}
 
         fig.add_trace(
             go.Scatter(
@@ -112,7 +130,12 @@ def plot_forecast_time_series(
         ]
         if selected_forecast_type in ["Current", "Horizon"]:
             fig = make_time_series_trace(
-                fig, forecaster_df, forecaster_name, scale_factor, i, show_probabilistic,
+                fig,
+                forecaster_df,
+                forecaster_name,
+                scale_factor,
+                i,
+                show_probabilistic,
             )
         elif selected_forecast_type == "t0":
             for _, t0 in enumerate(selected_t0s):
@@ -138,13 +161,14 @@ def plot_forecast_time_series(
 
 
 def plot_forecast_metric_vs_horizon_minutes(
-    merged_df,
-    forecaster_names,
-    selected_metric,
-    scale_factor,
-    units,
-    show_sem,
-):
+    merged_df: pd.DataFrame,
+    forecaster_names: list[str],
+    selected_metric: str,
+    scale_factor: float,
+    units: str,
+    show_sem: bool,
+) -> go.Figure:
+    """Plot forecast metric vs horizon minutes."""
     # Get the mean observed generation
     mean_observed_generation = merged_df["value_watts"].mean()
 
@@ -199,7 +223,7 @@ def plot_forecast_metric_vs_horizon_minutes(
                 y=forecaster_df[selected_metric] / scale_factor,
                 mode="lines+markers",
                 name=forecaster_name,
-                line=dict(color=colours[i % len(colours)]),
+                line={"color": colours[i % len(colours)]},
                 legendgroup=forecaster_name,
             ),
         )
@@ -210,7 +234,7 @@ def plot_forecast_metric_vs_horizon_minutes(
                     x=forecaster_df["horizon_mins"],
                     y=(forecaster_df[selected_metric] - 1.96 * forecaster_df["sem"]) / scale_factor,
                     mode="lines",
-                    line=dict(color=colours[i % len(colours)], width=0),
+                    line={"color": colours[i % len(colours)], "width": 0},
                     legendgroup=forecaster_name,
                     showlegend=False,
                 ),
@@ -221,7 +245,7 @@ def plot_forecast_metric_vs_horizon_minutes(
                     x=forecaster_df["horizon_mins"],
                     y=(forecaster_df[selected_metric] + 1.96 * forecaster_df["sem"]) / scale_factor,
                     mode="lines",
-                    line=dict(color=colours[i % len(colours)], width=0),
+                    line={"color": colours[i % len(colours)], "width": 0},
                     legendgroup=forecaster_name,
                     showlegend=False,
                     fill="tonexty",
@@ -242,12 +266,13 @@ def plot_forecast_metric_vs_horizon_minutes(
 
 
 def plot_forecast_metric_per_day(
-    merged_df,
-    forecaster_names,
-    selected_metric,
-    scale_factor,
-    units,
-):
+    merged_df: pd.DataFrame,
+    forecaster_names: list,
+    selected_metric: str,
+    scale_factor: float,
+    units: str,
+) -> go.Figure:
+    """Plot forecast metric per day."""
     daily_plots_df = merged_df
     daily_plots_df["date_utc"] = daily_plots_df["timestamp_utc"].dt.date
 
@@ -274,7 +299,7 @@ def plot_forecast_metric_per_day(
                 y=forecaster_df[selected_metric] / scale_factor,
                 # mode="lines+markers",
                 name=forecaster_name,
-                line=dict(color=colours[i % len(colours)]),
+                line={"color": colours[i % len(colours)]},
             ),
         )
 
diff --git a/src/dataplatform/forecast/setup.py b/src/dataplatform/forecast/setup.py
index fbf5c51..a5094f1 100644
--- a/src/dataplatform/forecast/setup.py
+++ b/src/dataplatform/forecast/setup.py
@@ -1,3 +1,5 @@
+"""Setup Forecast Streamlit Page."""
+
 from datetime import UTC, datetime, timedelta
 
 import pandas as pd
@@ -6,11 +8,15 @@
 from dp_sdk.ocf import dp
 
 from dataplatform.forecast.cache import key_builder_remove_client
-from dataplatform.forecast.constanst import metrics
+from dataplatform.forecast.constant import metrics
 
 
 @cached(ttl=300, cache=Cache.MEMORY, key_builder=key_builder_remove_client)
-async def get_location_names(client, location_type) -> dict:
+async def get_location_names(
+    client: dp.DataPlatformDataServiceStub,
+    location_type: dp.LocationType,
+) -> dict:
+    """Get location names for a given location type."""
     # List Location
     list_locations_request = dp.ListLocationsRequest(location_type_filter=location_type)
     list_locations_response = await client.list_locations(list_locations_request)
@@ -31,16 +37,18 @@ async def get_location_names(client, location_type) -> dict:
 
 
 @cached(ttl=300, cache=Cache.MEMORY, key_builder=key_builder_remove_client)
-async def get_forecasters(client):
+async def get_forecasters(client: dp.DataPlatformDataServiceStub) -> list[dp.Forecaster]:
+    """Get all forecasters."""
     get_forecasters_request = dp.ListForecastersRequest()
     get_forecasters_response = await client.list_forecasters(get_forecasters_request)
     forecasters = get_forecasters_response.forecasters
     return forecasters
 
 
-async def setup_page(client) -> dict:
+async def setup_page(client: dp.DataPlatformDataServiceStub) -> dict:
+    """Setup the Streamlit page with sidebar options."""
     # Select Country
-    country = st.sidebar.selectbox("TODO Select a Country", ["UK", "NL"], index=0)
+    st.sidebar.selectbox("TODO Select a Country", ["UK", "NL"], index=0)
 
     # Select Location Type
     location_types = [
@@ -53,17 +61,16 @@ async def setup_page(client) -> dict:
     # select locations
     location_names = await get_location_names(client, location_type)
     selected_location_name = st.sidebar.selectbox(
-        "Select a Location", location_names.keys(), index=0,
+        "Select a Location",
+        location_names.keys(),
+        index=0,
     )
     selected_location = location_names[selected_location_name]
 
     # get models
     forecasters = await get_forecasters(client)
-    forecaster_names = sorted(list(set([forecaster.forecaster_name for forecaster in forecasters])))
-    if "pvnet_v2" in forecaster_names:
-        default_index = forecaster_names.index("pvnet_v2")
-    else:
-        default_index = 0
+    forecaster_names = sorted({forecaster.forecaster_name for forecaster in forecasters})
+    default_index = forecaster_names.index("pvnet_v2") if "pvnet_v2" in forecaster_names else 0
     selected_forecaster_name = st.sidebar.multiselect(
         "Select a Forecaster",
         forecaster_names,
@@ -76,8 +83,11 @@ async def setup_page(client) -> dict:
     ]
 
     # select start and end date
-    start_date = st.sidebar.date_input("Start date:", datetime.now().date() - timedelta(days=7))
-    end_date = st.sidebar.date_input("End date:", datetime.now().date() + timedelta(days=3))
+    start_date = st.sidebar.date_input(
+        "Start date:",
+        datetime.now(tz=UTC).date() - timedelta(days=7),
+    )
+    end_date = st.sidebar.date_input("End date:", datetime.now(tz=UTC).date() + timedelta(days=3))
     start_date = datetime.combine(start_date, datetime.min.time()).replace(tzinfo=UTC)
     end_date = datetime.combine(end_date, datetime.min.time()).replace(tzinfo=UTC) - timedelta(
         seconds=1,

From 0f4058e3bdd6562e05fe0f3b86cb213730160489 Mon Sep 17 00:00:00 2001
From: Peter Dudfield <peter.dudfield@hotmail.com>
Date: Wed, 26 Nov 2025 15:31:06 +0000
Subject: [PATCH 51/60] Feedback, add details

---
 src/dataplatform/forecast/main.py | 17 ++++++++++++-----
 src/dataplatform/forecast/plot.py |  3 +++
 2 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/src/dataplatform/forecast/main.py b/src/dataplatform/forecast/main.py
index 09e6c64..aa3776d 100644
--- a/src/dataplatform/forecast/main.py
+++ b/src/dataplatform/forecast/main.py
@@ -74,12 +74,13 @@ async def async_dp_forecast_page() -> None:
         )
 
         # add download button
-        csv = all_forecast_data_df.to_csv().encode("utf-8")
+        csv = merged_df.to_csv().encode("utf-8")
         st.download_button(
-            label="⬇️",
+            label="⬇️ Download data",
             data=csv,
             file_name=f"site_forecast_{selected_location.location_uuid}_{start_date}_{end_date}.csv",
             mime="text/csv",
+            help='Download the forecast and generation data as a CSV file.'
         )
 
         ### 2. Plot of raw forecast data. ###
@@ -115,7 +116,13 @@ async def async_dp_forecast_page() -> None:
 
         st.subheader("Metric vs Forecast Horizon")
 
-        show_sem = st.checkbox("Show SEM", value=True) if selected_metric == "MAE" else False
+        if selected_metric == "MAE":
+            show_sem = st.checkbox("Show Uncertainty", 
+                                   value=True, 
+                                   help='On the plot below show the uncertainty bands associated with the MAE. ' \
+                                   'This is done by looking at Standard Error of the Mean (SEM) of the absolute errors.')
+        else:
+            show_sem = False
 
         fig2, summary_df = plot_forecast_metric_vs_horizon_minutes(
             merged_df,
@@ -130,10 +137,11 @@ async def async_dp_forecast_page() -> None:
 
         csv = summary_df.to_csv().encode("utf-8")
         st.download_button(
-            label="⬇️",
+            label="⬇️ Download summary",
             data=csv,
             file_name=f"summary_accuracy_{selected_location.location_uuid}_{start_date}_{end_date}.csv",
             mime="text/csv",
+            help='Download the summary accuracy data as a CSV file.'
         )
 
         ### 4. Summary Accuracy Table, with slider to select min and max horizon mins. ###
@@ -185,7 +193,6 @@ async def async_dp_forecast_page() -> None:
         st.write("Group adjust and non-adjust")
         st.write("speed up read, use async and more caching")
         st.write("Get page working with no observations data")
-        st.write("MAE vs horizon plot should start at 0")
 
 
 def make_summary_data(
diff --git a/src/dataplatform/forecast/plot.py b/src/dataplatform/forecast/plot.py
index 92f3d9b..42d7e5f 100644
--- a/src/dataplatform/forecast/plot.py
+++ b/src/dataplatform/forecast/plot.py
@@ -310,4 +310,7 @@ def plot_forecast_metric_per_day(
         legend_title="Forecaster",
     )
 
+    if selected_metric == "MAE":
+        fig3.update_yaxes(range=[0, None])
+
     return fig3

From 3af7531d95f16727a0b3e523f4ea8d961fac862c Mon Sep 17 00:00:00 2001
From: Peter Dudfield <peter.dudfield@hotmail.com>
Date: Thu, 27 Nov 2025 09:54:25 +0000
Subject: [PATCH 52/60] robustness against no forecast data

---
 src/dataplatform/forecast/data.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/dataplatform/forecast/data.py b/src/dataplatform/forecast/data.py
index 335a65a..6b9168c 100644
--- a/src/dataplatform/forecast/data.py
+++ b/src/dataplatform/forecast/data.py
@@ -32,7 +32,8 @@ async def get_forecast_data(
             end_date,
             forecaster,
         )
-        all_data_df.append(forecaster_data_df)
+        if forecaster_data_df is not None:
+            all_data_df.append(forecaster_data_df)
 
     all_data_df = pd.concat(all_data_df, ignore_index=True)
 
@@ -57,7 +58,7 @@ async def get_forecast_data_one_forecaster(
     start_date: datetime,
     end_date: datetime,
     selected_forecaster: dp.Forecaster,
-) -> pd.DataFrame:
+) -> pd.DataFrame | None:
     """Get forecast data for one forecaster for the given location and time window."""
     all_data_df = []
 
@@ -93,6 +94,9 @@ async def get_forecast_data_one_forecaster(
 
         temp_start_date = temp_start_date + timedelta(days=30)
 
+    if len(all_data_df) == 0:
+        return None
+    
     all_data_df = pd.concat(all_data_df, ignore_index=True)
 
     # create column forecaster_name, its forecaster_fullname with version removed

From 4a0ff3372d906cb9ecc3e611e94f59e3e09453b1 Mon Sep 17 00:00:00 2001
From: Peter Dudfield <peter.dudfield@hotmail.com>
Date: Fri, 28 Nov 2025 15:30:38 +0000
Subject: [PATCH 53/60] release cache data every 5 mins

---
 src/dataplatform/forecast/cache.py    | 11 +++++++++++
 src/dataplatform/forecast/constant.py |  2 ++
 src/dataplatform/forecast/data.py     |  7 ++++---
 src/dataplatform/forecast/main.py     | 16 +++++++++-------
 src/dataplatform/forecast/setup.py    |  6 +++---
 5 files changed, 29 insertions(+), 13 deletions(-)

diff --git a/src/dataplatform/forecast/cache.py b/src/dataplatform/forecast/cache.py
index 71ee679..a14402b 100644
--- a/src/dataplatform/forecast/cache.py
+++ b/src/dataplatform/forecast/cache.py
@@ -1,7 +1,11 @@
 """Cache utilities for the forecast module."""
 
+from datetime import UTC, datetime, timedelta
+
 from dp_sdk.ocf import dp
 
+from dataplatform.forecast.constant import cache_seconds
+
 
 def key_builder_remove_client(func: callable, *args: list, **kwargs: dict) -> str:
     """Custom key builder that ignores the client argument for caching purposes."""
@@ -13,4 +17,11 @@ def key_builder_remove_client(func: callable, *args: list, **kwargs: dict) -> st
     for k, v in kwargs.items():
         key += f"{k}={v}-"
 
+    # get the time now to the closest 5 minutes, this forces a new cache every 5 minutes
+    current_time = datetime.now(UTC).replace(second=0, microsecond=0)
+    current_time = current_time - timedelta(
+        minutes=current_time.minute % (int(cache_seconds / 60)),
+    )
+    key += f"time={current_time}-"
+
     return key
diff --git a/src/dataplatform/forecast/constant.py b/src/dataplatform/forecast/constant.py
index bdf7e21..0de2e21 100644
--- a/src/dataplatform/forecast/constant.py
+++ b/src/dataplatform/forecast/constant.py
@@ -17,3 +17,5 @@
     "MAE": "MAE is absolute mean error, average(abs(forecast-actual))",
     "ME": "ME is mean (bias) error, average((forecast-actual))",
 }
+
+cache_seconds = 300  # 5 minutes
diff --git a/src/dataplatform/forecast/data.py b/src/dataplatform/forecast/data.py
index 6b9168c..77ba125 100644
--- a/src/dataplatform/forecast/data.py
+++ b/src/dataplatform/forecast/data.py
@@ -9,6 +9,7 @@
 from dp_sdk.ocf import dp
 
 from dataplatform.forecast.cache import key_builder_remove_client
+from dataplatform.forecast.constant import cache_seconds
 
 # TODO make this dynamic
 observer_names = ["pvlive_in_day", "pvlive_day_after"]
@@ -51,7 +52,7 @@ async def get_forecast_data(
     return all_data_df
 
 
-@cached(ttl=300, cache=Cache.MEMORY, key_builder=key_builder_remove_client)
+@cached(ttl=cache_seconds, cache=Cache.MEMORY, key_builder=key_builder_remove_client)
 async def get_forecast_data_one_forecaster(
     client: dp,
     location: dp.ListLocationsResponseLocationSummary,
@@ -96,7 +97,7 @@ async def get_forecast_data_one_forecaster(
 
     if len(all_data_df) == 0:
         return None
-    
+
     all_data_df = pd.concat(all_data_df, ignore_index=True)
 
     # create column forecaster_name, its forecaster_fullname with version removed
@@ -107,7 +108,7 @@ async def get_forecast_data_one_forecaster(
     return all_data_df
 
 
-@cached(ttl=300, cache=Cache.MEMORY, key_builder=key_builder_remove_client)
+@cached(ttl=cache_seconds, cache=Cache.MEMORY, key_builder=key_builder_remove_client)
 async def get_all_observations(
     client: dp.DataPlatformDataServiceStub,
     location: dp.ListLocationsResponseLocationSummary,
diff --git a/src/dataplatform/forecast/main.py b/src/dataplatform/forecast/main.py
index aa3776d..1700026 100644
--- a/src/dataplatform/forecast/main.py
+++ b/src/dataplatform/forecast/main.py
@@ -80,7 +80,7 @@ async def async_dp_forecast_page() -> None:
             data=csv,
             file_name=f"site_forecast_{selected_location.location_uuid}_{start_date}_{end_date}.csv",
             mime="text/csv",
-            help='Download the forecast and generation data as a CSV file.'
+            help="Download the forecast and generation data as a CSV file.",
         )
 
         ### 2. Plot of raw forecast data. ###
@@ -117,10 +117,13 @@ async def async_dp_forecast_page() -> None:
         st.subheader("Metric vs Forecast Horizon")
 
         if selected_metric == "MAE":
-            show_sem = st.checkbox("Show Uncertainty", 
-                                   value=True, 
-                                   help='On the plot below show the uncertainty bands associated with the MAE. ' \
-                                   'This is done by looking at Standard Error of the Mean (SEM) of the absolute errors.')
+            show_sem = st.checkbox(
+                "Show Uncertainty",
+                value=True,
+                help="On the plot below show the uncertainty bands associated with the MAE. "
+                "This is done by looking at " \
+                "Standard Error of the Mean (SEM) of the absolute errors.",
+            )
         else:
             show_sem = False
 
@@ -141,7 +144,7 @@ async def async_dp_forecast_page() -> None:
             data=csv,
             file_name=f"summary_accuracy_{selected_location.location_uuid}_{start_date}_{end_date}.csv",
             mime="text/csv",
-            help='Download the summary accuracy data as a CSV file.'
+            help="Download the summary accuracy data as a CSV file.",
         )
 
         ### 4. Summary Accuracy Table, with slider to select min and max horizon mins. ###
@@ -188,7 +191,6 @@ async def async_dp_forecast_page() -> None:
 
         st.header("Known Issues and TODOs")
 
-        st.write("Bug: cache not releasing, the cache should stay for 5 minutes")
         st.write("Add more metrics")
         st.write("Group adjust and non-adjust")
         st.write("speed up read, use async and more caching")
diff --git a/src/dataplatform/forecast/setup.py b/src/dataplatform/forecast/setup.py
index a5094f1..b82c8a5 100644
--- a/src/dataplatform/forecast/setup.py
+++ b/src/dataplatform/forecast/setup.py
@@ -8,10 +8,10 @@
 from dp_sdk.ocf import dp
 
 from dataplatform.forecast.cache import key_builder_remove_client
-from dataplatform.forecast.constant import metrics
+from dataplatform.forecast.constant import cache_seconds, metrics
 
 
-@cached(ttl=300, cache=Cache.MEMORY, key_builder=key_builder_remove_client)
+@cached(ttl=cache_seconds, cache=Cache.MEMORY, key_builder=key_builder_remove_client)
 async def get_location_names(
     client: dp.DataPlatformDataServiceStub,
     location_type: dp.LocationType,
@@ -36,7 +36,7 @@ async def get_location_names(
     return location_names
 
 
-@cached(ttl=300, cache=Cache.MEMORY, key_builder=key_builder_remove_client)
+@cached(ttl=cache_seconds, cache=Cache.MEMORY, key_builder=key_builder_remove_client)
 async def get_forecasters(client: dp.DataPlatformDataServiceStub) -> list[dp.Forecaster]:
     """Get all forecasters."""
     get_forecasters_request = dp.ListForecastersRequest()

From 697893511694fe720c756ccd1bffad89ba8e52a1 Mon Sep 17 00:00:00 2001
From: Peter Dudfield <peter.dudfield@hotmail.com>
Date: Mon, 1 Dec 2025 17:07:02 +0000
Subject: [PATCH 54/60] PR comments

---
 src/dataplatform/forecast/constant.py |   3 +
 src/dataplatform/forecast/data.py     |  84 ++++++++-----------
 src/dataplatform/forecast/main.py     | 112 ++++++++++++++++++--------
 src/dataplatform/forecast/plot.py     |  49 +----------
 src/dataplatform/forecast/setup.py    |   2 +-
 5 files changed, 117 insertions(+), 133 deletions(-)

diff --git a/src/dataplatform/forecast/constant.py b/src/dataplatform/forecast/constant.py
index 0de2e21..5c51245 100644
--- a/src/dataplatform/forecast/constant.py
+++ b/src/dataplatform/forecast/constant.py
@@ -19,3 +19,6 @@
 }
 
 cache_seconds = 300  # 5 minutes
+
+# This is used for a specific case for the UK National and GSP
+observer_names = ["pvlive_in_day", "pvlive_day_after"]
diff --git a/src/dataplatform/forecast/data.py b/src/dataplatform/forecast/data.py
index 77ba125..33f653c 100644
--- a/src/dataplatform/forecast/data.py
+++ b/src/dataplatform/forecast/data.py
@@ -9,10 +9,7 @@
 from dp_sdk.ocf import dp
 
 from dataplatform.forecast.cache import key_builder_remove_client
-from dataplatform.forecast.constant import cache_seconds
-
-# TODO make this dynamic
-observer_names = ["pvlive_in_day", "pvlive_day_after"]
+from dataplatform.forecast.constant import cache_seconds, observer_names
 
 
 async def get_forecast_data(
@@ -38,16 +35,14 @@ async def get_forecast_data(
 
     all_data_df = pd.concat(all_data_df, ignore_index=True)
 
+    all_data_df["effective_capacity_watts"] = all_data_df["effective_capacity_watts"].astype(float)
+
     # get watt value
-    all_data_df["p50_watts"] = all_data_df["p50_fraction"].astype(float) * all_data_df[
-        "effective_capacity_watts"
-    ].astype(float)
+    all_data_df["p50_watts"] = all_data_df["p50_fraction"] * all_data_df["effective_capacity_watts"]
 
     for col in ["p10", "p25", "p75", "p90"]:
         if col in all_data_df.columns:
-            all_data_df[f"{col}_watts"] = all_data_df[col].astype(float) * all_data_df[
-                "effective_capacity_watts"
-            ].astype(float)
+            all_data_df[f"{col}_watts"] = all_data_df[col] * all_data_df["effective_capacity_watts"]
 
     return all_data_df
 
@@ -61,14 +56,12 @@ async def get_forecast_data_one_forecaster(
     selected_forecaster: dp.Forecaster,
 ) -> pd.DataFrame | None:
     """Get forecast data for one forecaster for the given location and time window."""
-    all_data_df = []
+    all_data_list_dict = []
 
-    # loop over 30 days of data
+    # Grab all the data, in chunks of 30 days to avoid too large requests
     temp_start_date = start_date
     while temp_start_date <= end_date:
-        temp_end_date = temp_start_date + timedelta(days=30)
-        if temp_end_date > end_date:
-            temp_end_date = end_date
+        temp_end_date = min(temp_start_date + timedelta(days=30), end_date)
 
         # fetch data
         stream_forecast_data_request = dp.StreamForecastDataRequest(
@@ -87,18 +80,18 @@ async def get_forecast_data_one_forecaster(
             )
 
         if len(forecasts) > 0:
-            all_data_df.append(
-                pd.DataFrame.from_dict(forecasts)
-                .pipe(lambda df: df.join(pd.json_normalize(df["other_statistics_fractions"])))
-                .drop("other_statistics_fractions", axis=1),
-            )
+            all_data_list_dict.extend(forecasts)
 
         temp_start_date = temp_start_date + timedelta(days=30)
 
+    all_data_df = pd.DataFrame.from_dict(all_data_list_dict)
     if len(all_data_df) == 0:
         return None
 
-    all_data_df = pd.concat(all_data_df, ignore_index=True)
+    # get plevels into columns
+    all_data_df = all_data_df.pipe(
+        lambda df: df.join(pd.json_normalize(df["other_statistics_fractions"])),
+    ).drop("other_statistics_fractions", axis=1)
 
     # create column forecaster_name, its forecaster_fullname with version removed
     all_data_df["forecaster_name"] = all_data_df["forecaster_fullname"].apply(
@@ -119,13 +112,11 @@ async def get_all_observations(
     all_observations_df = []
 
     for observer_name in observer_names:
-        # loop over 7 days of data
+        # Get all the observations for this observer_name, in chunks of 7 days
         observation_one_df = []
         temp_start_date = start_date
         while temp_start_date <= end_date:
-            temp_end_date = temp_start_date + timedelta(days=7)
-            if temp_end_date > end_date:
-                temp_end_date = end_date
+            temp_end_date = min(temp_start_date + timedelta(days=7), end_date)
 
             get_observations_request = dp.GetObservationsAsTimeseriesRequest(
                 observer_name=observer_name,
@@ -155,9 +146,13 @@ async def get_all_observations(
 
     all_observations_df = pd.concat(all_observations_df, ignore_index=True)
 
-    all_observations_df["value_watts"] = all_observations_df["value_fraction"].astype(
-        float,
-    ) * all_observations_df["effective_capacity_watts"].astype(float)
+    all_observations_df["effective_capacity_watts"] = all_observations_df[
+        "effective_capacity_watts"
+    ].astype(float)
+
+    all_observations_df["value_watts"] = (
+        all_observations_df["value_fraction"] * all_observations_df["effective_capacity_watts"]
+    )
     all_observations_df["timestamp_utc"] = pd.to_datetime(all_observations_df["timestamp_utc"])
 
     return all_observations_df
@@ -202,9 +197,9 @@ async def get_all_data(
 
     # make target_timestamp_utc
     all_forecast_data_df["init_timestamp"] = pd.to_datetime(all_forecast_data_df["init_timestamp"])
-    all_forecast_data_df["target_timestamp_utc"] = pd.to_datetime(
-        all_forecast_data_df["init_timestamp"],
-    ) + pd.to_timedelta(all_forecast_data_df["horizon_mins"], unit="m")
+    all_forecast_data_df["target_timestamp_utc"] = all_forecast_data_df[
+        "init_timestamp"
+    ] + pd.to_timedelta(all_forecast_data_df["horizon_mins"], unit="m")
 
     # take the foecast data, and group by horizonMins, forecasterFullName
     # calculate mean absolute error between p50Fraction and observations valueFraction
@@ -218,7 +213,7 @@ async def get_all_data(
     )
     merged_df["effective_capacity_watts_observation"] = merged_df[
         "effective_capacity_watts_observation"
-    ].astype(float)
+    ]
 
     # error and absolute error
     merged_df["error"] = merged_df["p50_watts"] - merged_df["value_watts"]
@@ -235,21 +230,10 @@ async def get_all_data(
 
 def align_t0(merged_df: pd.DataFrame) -> pd.DataFrame:
     """Align t0 forecasts for different forecasters."""
-    # get all forecaster names
-    forecaster_names = merged_df["forecaster_name"].unique()
-
-    # align t0 for each forecaster
-    t0s_per_forecaster = {}
-    for forecaster_name in forecaster_names:
-        forecaster_df = merged_df[merged_df["forecaster_name"] == forecaster_name]
-
-        t0s = forecaster_df["init_timestamp"].unique()
-        t0s_per_forecaster[forecaster_name] = set(t0s)
-
-    # find common t0s
-    common_t0s = set.intersection(*t0s_per_forecaster.values())
-
-    # align common t0s in merged_df
-    merged_df = merged_df[merged_df["init_timestamp"].isin(common_t0s)]
-
-    return merged_df
+    # number of unique forecasters
+    num_forecasters = merged_df["forecaster_name"].nunique()
+    # Count number of forecasters that have each t0 time
+    counts = merged_df.groupby("init_timestamp")["forecaster_name"].nunique()
+    # Filter to just those t0s that all forecasters have
+    common_t0s = counts[counts == num_forecasters].index
+    return merged_df[merged_df["init_timestamp"].isin(common_t0s)]
diff --git a/src/dataplatform/forecast/main.py b/src/dataplatform/forecast/main.py
index 1700026..27ee535 100644
--- a/src/dataplatform/forecast/main.py
+++ b/src/dataplatform/forecast/main.py
@@ -8,7 +8,7 @@
 from dp_sdk.ocf import dp
 from grpclib.client import Channel
 
-from dataplatform.forecast.constant import metrics
+from dataplatform.forecast.constant import metrics, observer_names
 from dataplatform.forecast.data import align_t0, get_all_data
 from dataplatform.forecast.plot import (
     plot_forecast_metric_per_day,
@@ -20,9 +20,6 @@
 data_platform_host = os.getenv("DATA_PLATFORM_HOST", "localhost")
 data_platform_port = int(os.getenv("DATA_PLATFORM_PORT", "50051"))
 
-# TODO make this dynamic
-observer_names = ["pvlive_in_day", "pvlive_day_after"]
-
 
 def dp_forecast_page() -> None:
     """Wrapper function that is not async to call the main async function."""
@@ -121,14 +118,17 @@ async def async_dp_forecast_page() -> None:
                 "Show Uncertainty",
                 value=True,
                 help="On the plot below show the uncertainty bands associated with the MAE. "
-                "This is done by looking at " \
-                "Standard Error of the Mean (SEM) of the absolute errors.",
+                "This is done by looking at the "
+                "Standard Error of the Mean (SEM) of the absolute errors. "
+                "We plot the 5 to 95 percentile range around the MAE.",
             )
         else:
             show_sem = False
 
-        fig2, summary_df = plot_forecast_metric_vs_horizon_minutes(
-            merged_df,
+        summary_df = make_summary_data_metric_vs_horizon_minutes(merged_df)
+
+        fig2 = plot_forecast_metric_vs_horizon_minutes(
+            summary_df,
             forecaster_names,
             selected_metric,
             scale_factor,
@@ -151,13 +151,15 @@ async def async_dp_forecast_page() -> None:
         st.subheader("Summary Accuracy Table")
 
         # add slider to select min and max horizon mins
+        default_min_horizon = int(summary_df["horizon_mins"].min())
+        default_max_horizon = int(summary_df["horizon_mins"].max())
         min_horizon, max_horizon = st.slider(
             "Select Horizon Mins Range",
-            int(summary_df["horizon_mins"].min()),
-            int(summary_df["horizon_mins"].max()),
+            default_min_horizon,
+            default_max_horizon,
             (
-                int(summary_df["horizon_mins"].min()),
-                int(summary_df["horizon_mins"].max()),
+                default_min_horizon,
+                default_max_horizon,
             ),
             step=30,
         )
@@ -210,39 +212,19 @@ def make_summary_data(
         (merged_df["horizon_mins"] >= min_horizon) & (merged_df["horizon_mins"] <= max_horizon)
     ]
 
-    summary_table_df = summary_table_df.rename(
-        columns={
-            "effective_capacity_watts_observation": "Capacity_watts",
-            "value_watts": "Mean_Observed_Generation_watts",
-        },
-    )
+    capacity_watts_col = "effective_capacity_watts_observation"
 
     value_columns = [
         "error",
         "absolute_error",
-        #  'absolute_error_normalized_by_generation',
-        "Mean_Observed_Generation_watts",
-        "Capacity_watts",
+        "value_watts",
+        capacity_watts_col,
     ]
-
     summary_table_df = summary_table_df[["forecaster_name", *value_columns]]
 
-    summary_table_df["Capacity_watts"] = summary_table_df["Capacity_watts"].astype(float)
-
     # group by forecaster full name a
     summary_table_df = summary_table_df.groupby("forecaster_name").mean()
 
-    # rename
-    summary_table_df = summary_table_df.rename(
-        columns={
-            "error": "ME",
-            "absolute_error": "MAE",
-            # 'absolute_error_normalized_by_generation': 'NMAE (by observed generation)',
-            "Capacity_watts": "Mean Capacity",
-            "Mean_Observed_Generation_watts": "Mean Observed Generation",
-        },
-    )
-
     # scale by units
     summary_table_df = summary_table_df / scale_factor
     summary_table_df = summary_table_df.rename(
@@ -256,4 +238,64 @@ def make_summary_data(
         values=summary_table_df.columns.tolist(),
     )
 
+    # rename
+    summary_table_df = summary_table_df.rename(
+        columns={
+            "error": "ME",
+            "absolute_error": "MAE",
+            capacity_watts_col: "Mean Capacity",
+            "value_watts": "Mean Observed Generation",
+        },
+    )
+
     return summary_table_df
+
+
+def make_summary_data_metric_vs_horizon_minutes(
+    merged_df: pd.DataFrame,
+) -> pd.DataFrame:
+    """Make summary data for forecast metric vs horizon minutes."""
+    # Get the mean observed generation
+    mean_observed_generation = merged_df["value_watts"].mean()
+
+    # mean absolute error by horizonMins and forecasterFullName
+    summary_df = (
+        merged_df.groupby(["horizon_mins", "forecaster_name"])
+        .agg({"absolute_error": "mean"})
+        .reset_index()
+    )
+    summary_df["std"] = (
+        merged_df.groupby(["horizon_mins", "forecaster_name"])
+        .agg({"absolute_error": "std"})
+        .reset_index()["absolute_error"]
+    )
+    summary_df["count"] = (
+        merged_df.groupby(["horizon_mins", "forecaster_name"])
+        .agg({"absolute_error": "count"})
+        .reset_index()["absolute_error"]
+    )
+    summary_df["sem"] = summary_df["std"] / (summary_df["count"] ** 0.5)
+
+    # ME
+    summary_df["ME"] = (
+        merged_df.groupby(["horizon_mins", "forecaster_name"])
+        .agg({"error": "mean"})
+        .reset_index()["error"]
+    )
+
+    # TODO more metrics
+
+    summary_df["effective_capacity_watts_observation"] = (
+        merged_df.groupby(["horizon_mins", "forecaster_name"])
+        .agg({"effective_capacity_watts_observation": "mean"})
+        .reset_index()["effective_capacity_watts_observation"]
+    )
+
+    # rename absolute_error to MAE
+    summary_df = summary_df.rename(columns={"absolute_error": "MAE"})
+    summary_df["NMAE (by capacity)"] = (
+        summary_df["MAE"] / summary_df["effective_capacity_watts_observation"]
+    )
+    summary_df["NMAE (by mean observed generation)"] = summary_df["MAE"] / mean_observed_generation
+
+    return summary_df
diff --git a/src/dataplatform/forecast/plot.py b/src/dataplatform/forecast/plot.py
index 42d7e5f..bd89138 100644
--- a/src/dataplatform/forecast/plot.py
+++ b/src/dataplatform/forecast/plot.py
@@ -161,7 +161,7 @@ def plot_forecast_time_series(
 
 
 def plot_forecast_metric_vs_horizon_minutes(
-    merged_df: pd.DataFrame,
+    summary_df: pd.DataFrame,
     forecaster_names: list[str],
     selected_metric: str,
     scale_factor: float,
@@ -169,50 +169,6 @@ def plot_forecast_metric_vs_horizon_minutes(
     show_sem: bool,
 ) -> go.Figure:
     """Plot forecast metric vs horizon minutes."""
-    # Get the mean observed generation
-    mean_observed_generation = merged_df["value_watts"].mean()
-
-    # mean absolute error by horizonMins and forecasterFullName
-    summary_df = (
-        merged_df.groupby(["horizon_mins", "forecaster_name"])
-        .agg({"absolute_error": "mean"})
-        .reset_index()
-    )
-    summary_df["std"] = (
-        merged_df.groupby(["horizon_mins", "forecaster_name"])
-        .agg({"absolute_error": "std"})
-        .reset_index()["absolute_error"]
-    )
-    summary_df["count"] = (
-        merged_df.groupby(["horizon_mins", "forecaster_name"])
-        .agg({"absolute_error": "count"})
-        .reset_index()["absolute_error"]
-    )
-    summary_df["sem"] = summary_df["std"] / (summary_df["count"] ** 0.5)
-
-    # ME
-    summary_df["ME"] = (
-        merged_df.groupby(["horizon_mins", "forecaster_name"])
-        .agg({"error": "mean"})
-        .reset_index()["error"]
-    )
-
-    # TODO more metrics
-
-    summary_df["effective_capacity_watts_observation"] = (
-        merged_df.groupby(["horizon_mins", "forecaster_name"])
-        .agg({"effective_capacity_watts_observation": "mean"})
-        .reset_index()["effective_capacity_watts_observation"]
-    )
-
-    # rename absolute_error to MAE
-    summary_df = summary_df.rename(columns={"absolute_error": "MAE"})
-    summary_df["NMAE (by capacity)"] = (
-        summary_df["MAE"] / summary_df["effective_capacity_watts_observation"]
-    )
-    summary_df["NMAE (by mean observed generation)"] = summary_df["MAE"] / mean_observed_generation
-    # summary_df["NMAE (by observed generation)"] = summary_df["absolute_error_divided_by_observed"]
-
     fig2 = go.Figure()
 
     for i, forecaster_name in enumerate(forecaster_names):
@@ -262,7 +218,7 @@ def plot_forecast_metric_vs_horizon_minutes(
     if selected_metric == "MAE":
         fig2.update_yaxes(range=[0, None])
 
-    return fig2, summary_df
+    return fig2
 
 
 def plot_forecast_metric_per_day(
@@ -297,7 +253,6 @@ def plot_forecast_metric_per_day(
             go.Scatter(
                 x=forecaster_df["date_utc"],
                 y=forecaster_df[selected_metric] / scale_factor,
-                # mode="lines+markers",
                 name=forecaster_name,
                 line={"color": colours[i % len(colours)]},
             ),
diff --git a/src/dataplatform/forecast/setup.py b/src/dataplatform/forecast/setup.py
index b82c8a5..e6b8e8a 100644
--- a/src/dataplatform/forecast/setup.py
+++ b/src/dataplatform/forecast/setup.py
@@ -105,7 +105,7 @@ async def setup_page(client: dp.DataPlatformDataServiceStub) -> dict:
     if selected_forecast_type == "Horizon":
         selected_forecast_horizon = st.sidebar.selectbox(
             "Select a Forecast Horizon",
-            list(range(0, 24 * 60, 30)),
+            list(range(0, 36 * 60, 30)),
             index=3,
         )
     if selected_forecast_type == "t0":

From 2b2da5bb00d813e786271dcb3661332f0567dfef Mon Sep 17 00:00:00 2001
From: Peter Dudfield <peter.dudfield@hotmail.com>
Date: Mon, 1 Dec 2025 17:25:23 +0000
Subject: [PATCH 55/60] add option for strict forecast filtering

---
 src/dataplatform/forecast/main.py  |  2 ++
 src/dataplatform/forecast/plot.py  | 12 +++++++++---
 src/dataplatform/forecast/setup.py |  8 ++++++++
 3 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/src/dataplatform/forecast/main.py b/src/dataplatform/forecast/main.py
index 27ee535..0a6125f 100644
--- a/src/dataplatform/forecast/main.py
+++ b/src/dataplatform/forecast/main.py
@@ -46,6 +46,7 @@ async def async_dp_forecast_page() -> None:
         selected_forecast_horizon = setup_page_dict["selected_forecast_horizon"]
         selected_t0s = setup_page_dict["selected_t0s"]
         units = setup_page_dict["units"]
+        strict_horizon_filtering = setup_page_dict["strict_horizon_filtering"]
 
         ### 1. Get all the data ###
         all_data_dict = await get_all_data(
@@ -96,6 +97,7 @@ async def async_dp_forecast_page() -> None:
             selected_forecast_horizon=selected_forecast_horizon,
             selected_t0s=selected_t0s,
             show_probabilistic=show_probabilistic,
+            strict_horizon_filtering=strict_horizon_filtering,
         )
         st.plotly_chart(fig)
 
diff --git a/src/dataplatform/forecast/plot.py b/src/dataplatform/forecast/plot.py
index bd89138..ebaa66c 100644
--- a/src/dataplatform/forecast/plot.py
+++ b/src/dataplatform/forecast/plot.py
@@ -72,6 +72,7 @@ def plot_forecast_time_series(
     selected_forecast_horizon: int,
     selected_t0s: list[datetime],
     show_probabilistic: bool = True,
+    strict_horizon_filtering: bool = False,
 ) -> go.Figure:
     """Plot forecast time series.
 
@@ -88,9 +89,14 @@ def plot_forecast_time_series(
         ]
     elif selected_forecast_type == "Horizon":
         # Choose horizon forecast
-        current_forecast_df = all_forecast_data_df[
-            all_forecast_data_df["horizon_mins"] >= selected_forecast_horizon
-        ]
+        if strict_horizon_filtering:
+            current_forecast_df = all_forecast_data_df[
+                all_forecast_data_df["horizon_mins"] == selected_forecast_horizon
+            ]
+        else:
+            current_forecast_df = all_forecast_data_df[
+                all_forecast_data_df["horizon_mins"] >= selected_forecast_horizon
+            ]
         current_forecast_df = current_forecast_df.loc[
             current_forecast_df.groupby(["target_timestamp_utc", "forecaster_name"])[
                 "horizon_mins"
diff --git a/src/dataplatform/forecast/setup.py b/src/dataplatform/forecast/setup.py
index e6b8e8a..a46db86 100644
--- a/src/dataplatform/forecast/setup.py
+++ b/src/dataplatform/forecast/setup.py
@@ -101,6 +101,7 @@ async def setup_page(client: dp.DataPlatformDataServiceStub) -> dict:
     )
 
     selected_forecast_horizon = None
+    strict_horizon_filtering = False
     selected_t0s = None
     if selected_forecast_type == "Horizon":
         selected_forecast_horizon = st.sidebar.selectbox(
@@ -108,6 +109,12 @@ async def setup_page(client: dp.DataPlatformDataServiceStub) -> dict:
             list(range(0, 36 * 60, 30)),
             index=3,
         )
+        strict_horizon_filtering = st.sidebar.checkbox(
+            "Strict Horizon Filtering",
+            value=False,
+            help="Only show forecasts that exactly match the selected horizon, "
+            "if not, we use any forecast horizon greater or equal than",
+        )
     if selected_forecast_type == "t0":
         # make datetimes every 30 minutes from start_date to end_date
         all_t0s = (
@@ -140,4 +147,5 @@ async def setup_page(client: dp.DataPlatformDataServiceStub) -> dict:
         "selected_forecast_horizon": selected_forecast_horizon,
         "selected_t0s": selected_t0s,
         "units": units,
+        "strict_horizon_filtering": strict_horizon_filtering,
     }

From b1bee0b78ba86cd409fce85546df04fba5883a25 Mon Sep 17 00:00:00 2001
From: Peter Dudfield <peter.dudfield@hotmail.com>
Date: Mon, 1 Dec 2025 17:26:30 +0000
Subject: [PATCH 56/60] tidy

---
 src/dataplatform/forecast/data.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/dataplatform/forecast/data.py b/src/dataplatform/forecast/data.py
index 33f653c..34c3df5 100644
--- a/src/dataplatform/forecast/data.py
+++ b/src/dataplatform/forecast/data.py
@@ -211,9 +211,6 @@ async def get_all_data(
         how="inner",
         suffixes=("_forecast", "_observation"),
     )
-    merged_df["effective_capacity_watts_observation"] = merged_df[
-        "effective_capacity_watts_observation"
-    ]
 
     # error and absolute error
     merged_df["error"] = merged_df["p50_watts"] - merged_df["value_watts"]

From 50e0ae225bf8a3c0bfa556d914382e20e3d67660 Mon Sep 17 00:00:00 2001
From: Peter Dudfield <peter.dudfield@hotmail.com>
Date: Mon, 1 Dec 2025 17:47:11 +0000
Subject: [PATCH 57/60] Pr commens, use agg better

---
 src/dataplatform/forecast/main.py | 31 +++++++++++++------------------
 src/dataplatform/forecast/plot.py | 10 +++-------
 2 files changed, 16 insertions(+), 25 deletions(-)

diff --git a/src/dataplatform/forecast/main.py b/src/dataplatform/forecast/main.py
index 0a6125f..2f642a0 100644
--- a/src/dataplatform/forecast/main.py
+++ b/src/dataplatform/forecast/main.py
@@ -263,26 +263,21 @@ def make_summary_data_metric_vs_horizon_minutes(
     # mean absolute error by horizonMins and forecasterFullName
     summary_df = (
         merged_df.groupby(["horizon_mins", "forecaster_name"])
-        .agg({"absolute_error": "mean"})
+        .agg(
+            {
+                "absolute_error": ["mean", "std", "count"],
+                "error": "mean",
+            },
+        )
         .reset_index()
     )
-    summary_df["std"] = (
-        merged_df.groupby(["horizon_mins", "forecaster_name"])
-        .agg({"absolute_error": "std"})
-        .reset_index()["absolute_error"]
-    )
-    summary_df["count"] = (
-        merged_df.groupby(["horizon_mins", "forecaster_name"])
-        .agg({"absolute_error": "count"})
-        .reset_index()["absolute_error"]
-    )
-    summary_df["sem"] = summary_df["std"] / (summary_df["count"] ** 0.5)
 
-    # ME
-    summary_df["ME"] = (
-        merged_df.groupby(["horizon_mins", "forecaster_name"])
-        .agg({"error": "mean"})
-        .reset_index()["error"]
+    summary_df.columns = ["_".join(col).strip() for col in summary_df.columns.values]
+    summary_df.columns = [col[:-1] if col.endswith("_") else col for col in summary_df.columns]
+
+    # calculate sem of MAE
+    summary_df["sem"] = summary_df["absolute_error_std"] / (
+        summary_df["absolute_error_count"] ** 0.5
     )
 
     # TODO more metrics
@@ -294,7 +289,7 @@ def make_summary_data_metric_vs_horizon_minutes(
     )
 
     # rename absolute_error to MAE
-    summary_df = summary_df.rename(columns={"absolute_error": "MAE"})
+    summary_df = summary_df.rename(columns={"absolute_error_mean": "MAE", "error_mean": "ME"})
     summary_df["NMAE (by capacity)"] = (
         summary_df["MAE"] / summary_df["effective_capacity_watts_observation"]
     )
diff --git a/src/dataplatform/forecast/plot.py b/src/dataplatform/forecast/plot.py
index ebaa66c..ff53bc4 100644
--- a/src/dataplatform/forecast/plot.py
+++ b/src/dataplatform/forecast/plot.py
@@ -241,16 +241,12 @@ def plot_forecast_metric_per_day(
     # group by forecaster name and date
     daily_metrics_df = (
         daily_plots_df.groupby(["date_utc", "forecaster_name"])
-        .agg({"absolute_error": "mean"})
+        .agg({"absolute_error": "mean", "error": "mean"})
         .reset_index()
-    ).rename(columns={"absolute_error": "MAE"})
-    # ME
-    daily_metrics_df["ME"] = (
-        daily_plots_df.groupby(["date_utc", "forecaster_name"])
-        .agg({"error": "mean"})
-        .reset_index()["error"]
     )
 
+    daily_metrics_df = daily_metrics_df.rename(columns={"absolute_error": "MAE", "error": "ME"})
+
     fig3 = go.Figure()
     for i, forecaster_name in enumerate(forecaster_names):
         name_and_version = f"{forecaster_name}"

From 5232456ca0f650d1b57d52d87a7465d2617e1980 Mon Sep 17 00:00:00 2001
From: Peter Dudfield <peter.dudfield@hotmail.com>
Date: Mon, 1 Dec 2025 18:09:43 +0000
Subject: [PATCH 58/60] use p10_fraction, rather than p10

---
 src/dataplatform/forecast/data.py | 5 +++--
 src/dataplatform/forecast/main.py | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/dataplatform/forecast/data.py b/src/dataplatform/forecast/data.py
index 34c3df5..fa45943 100644
--- a/src/dataplatform/forecast/data.py
+++ b/src/dataplatform/forecast/data.py
@@ -41,8 +41,9 @@ async def get_forecast_data(
     all_data_df["p50_watts"] = all_data_df["p50_fraction"] * all_data_df["effective_capacity_watts"]
 
     for col in ["p10", "p25", "p75", "p90"]:
-        if col in all_data_df.columns:
-            all_data_df[f"{col}_watts"] = all_data_df[col] * all_data_df["effective_capacity_watts"]
+        col_fraction = f"{col}_fraction"
+        if col_fraction in all_data_df.columns:
+            all_data_df[f"{col}_watts"] = all_data_df[col_fraction] * all_data_df["effective_capacity_watts"]
 
     return all_data_df
 
diff --git a/src/dataplatform/forecast/main.py b/src/dataplatform/forecast/main.py
index 2f642a0..0602268 100644
--- a/src/dataplatform/forecast/main.py
+++ b/src/dataplatform/forecast/main.py
@@ -68,7 +68,7 @@ async def async_dp_forecast_page() -> None:
             in `{forecast_seconds:.2f}` seconds. \
             Fetched `{len(all_observations_df)}` rows of observation data \
             in `{observation_seconds:.2f}` seconds. \
-            We cache data for 5 minutses to speed up repeated requests.",
+            We cache data for 5 minutes to speed up repeated requests.",
         )
 
         # add download button

From 612ebbf0b915f728d5225580e99527eee811d045 Mon Sep 17 00:00:00 2001
From: Peter Dudfield <peter.dudfield@hotmail.com>
Date: Mon, 1 Dec 2025 21:05:37 +0000
Subject: [PATCH 59/60] add _fraction to column from other_statistics_fractions
 column

---
 src/dataplatform/forecast/data.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/dataplatform/forecast/data.py b/src/dataplatform/forecast/data.py
index fa45943..bdf66b1 100644
--- a/src/dataplatform/forecast/data.py
+++ b/src/dataplatform/forecast/data.py
@@ -89,10 +89,14 @@ async def get_forecast_data_one_forecaster(
     if len(all_data_df) == 0:
         return None
 
-    # get plevels into columns
+    # get plevels into columns and rename them 'fraction
+    columns_before_expand = set(all_data_df.columns)
     all_data_df = all_data_df.pipe(
         lambda df: df.join(pd.json_normalize(df["other_statistics_fractions"])),
     ).drop("other_statistics_fractions", axis=1)
+    new_columns = set(all_data_df.columns) - columns_before_expand
+    if len(new_columns) > 0:
+        all_data_df = all_data_df.rename(columns={col: f"{col}_fraction" for col in new_columns})
 
     # create column forecaster_name, its forecaster_fullname with version removed
     all_data_df["forecaster_name"] = all_data_df["forecaster_fullname"].apply(

From 0726b1044dde215e57331c250ab0c5f44f1ed3f1 Mon Sep 17 00:00:00 2001
From: Peter Dudfield <peter.dudfield@hotmail.com>
Date: Mon, 1 Dec 2025 21:06:07 +0000
Subject: [PATCH 60/60] lint

---
 src/dataplatform/forecast/data.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/dataplatform/forecast/data.py b/src/dataplatform/forecast/data.py
index bdf66b1..d4c3d46 100644
--- a/src/dataplatform/forecast/data.py
+++ b/src/dataplatform/forecast/data.py
@@ -43,7 +43,9 @@ async def get_forecast_data(
     for col in ["p10", "p25", "p75", "p90"]:
         col_fraction = f"{col}_fraction"
         if col_fraction in all_data_df.columns:
-            all_data_df[f"{col}_watts"] = all_data_df[col_fraction] * all_data_df["effective_capacity_watts"]
+            all_data_df[f"{col}_watts"] = (
+                all_data_df[col_fraction] * all_data_df["effective_capacity_watts"]
+            )
 
     return all_data_df