@@ -41,6 +41,7 @@ def set_kwargs(self):
4141 model_kwargs ["uppper_quantile" ] = uppper_quantile
4242 return model_kwargs
4343
44+
4445 def preprocess (self , df , series_id ):
4546 pass
4647
@@ -53,54 +54,70 @@ def preprocess(self, df, series_id):
5354 err_msg = "lightgbm is not installed, please install it with 'pip install lightgbm'" ,
5455 )
5556 def _train_model (self , data_train , data_test , model_kwargs ):
57+ import lightgbm as lgb
58+ from mlforecast import MLForecast
59+ from mlforecast .lag_transforms import ExpandingMean , RollingMean
60+ from mlforecast .target_transforms import Differences
61+
62+ def set_model_config (freq ):
63+ seasonal_map = {
64+ "H" : 24 ,
65+ "D" : 7 ,
66+ "W" : 52 ,
67+ "M" : 12 ,
68+ "Q" : 4 ,
69+ }
70+ sp = seasonal_map .get (freq .upper (), 7 )
71+ series_lengths = data_train .groupby (ForecastOutputColumns .SERIES ).size ()
72+ min_len = series_lengths .min ()
73+ max_allowed = min_len - sp
74+
75+ default_lags = [lag for lag in [1 , sp , 2 * sp ] if lag <= max_allowed ]
76+ lags = model_kwargs .get ("lags" , default_lags )
77+
78+ default_roll = 2 * sp
79+ roll = model_kwargs .get ("RollingMean" , default_roll )
80+
81+ default_diff = sp if sp <= max_allowed else None
82+ diff = model_kwargs .get ("Differences" , default_diff )
83+
84+ return {
85+ "target_transforms" : [Differences ([diff ])],
86+ "lags" : lags ,
87+ "lag_transforms" : {
88+ 1 : [ExpandingMean ()],
89+ sp : [RollingMean (window_size = roll , min_samples = 1 )]
90+ }
91+ }
92+
5693 try :
57- import lightgbm as lgb
58- from mlforecast import MLForecast
59- from mlforecast .lag_transforms import ExpandingMean , RollingMean
60- from mlforecast .target_transforms import Differences
6194
6295 lgb_params = {
6396 "verbosity" : model_kwargs .get ("verbosity" , - 1 ),
6497 "num_leaves" : model_kwargs .get ("num_leaves" , 512 ),
6598 }
66- additional_data_params = {}
67- if len (self .datasets .get_additional_data_column_names ()) > 0 :
68- additional_data_params = {
69- "target_transforms" : [
70- Differences ([model_kwargs .get ("Differences" , 12 )])
71- ],
72- "lags" : model_kwargs .get ("lags" , [1 , 6 , 12 ]),
73- "lag_transforms" : (
74- {
75- 1 : [ExpandingMean ()],
76- 12 : [
77- RollingMean (
78- window_size = model_kwargs .get ("RollingMean" , 24 ),
79- min_samples = 1 ,
80- )
81- ],
82- }
83- ),
84- }
99+
100+ data_freq = pd .infer_freq (data_train [self .date_col ].drop_duplicates ()) \
101+ or pd .infer_freq (data_train [self .date_col ].drop_duplicates ()[- 5 :])
102+
103+ additional_data_params = set_model_config (data_freq )
85104
86105 fcst = MLForecast (
87106 models = {
88107 "forecast" : lgb .LGBMRegressor (** lgb_params ),
89- # "p" + str(int(model_kwargs["uppper_quantile"] * 100))
90108 "upper" : lgb .LGBMRegressor (
91109 ** lgb_params ,
92110 objective = "quantile" ,
93111 alpha = model_kwargs ["uppper_quantile" ],
94112 ),
95- # "p" + str(int(model_kwargs["lower_quantile"] * 100))
96113 "lower" : lgb .LGBMRegressor (
97114 ** lgb_params ,
98115 objective = "quantile" ,
99116 alpha = model_kwargs ["lower_quantile" ],
100117 ),
101118 },
102- freq = pd . infer_freq ( data_train [ self . date_col ]. drop_duplicates ())
103- or pd . infer_freq ( data_train [ self . date_col ]. drop_duplicates ()[ - 5 :]) ,
119+ freq = data_freq ,
120+ date_features = [ 'year' , 'month' , 'day' , 'dayofweek' , 'dayofyear' ] ,
104121 ** additional_data_params ,
105122 )
106123
@@ -158,6 +175,7 @@ def _train_model(self, data_train, data_test, model_kwargs):
158175 self .model_parameters [s_id ] = {
159176 "framework" : SupportedModels .LGBForecast ,
160177 ** lgb_params ,
178+ ** fcst .models_ ['forecast' ].get_params (),
161179 }
162180
163181 logger .debug ("===========Done===========" )
@@ -191,48 +209,21 @@ def _generate_report(self):
191209 Generates the report for the model
192210 """
193211 import report_creator as rc
194- from utilsforecast .plotting import plot_series
195212
196213 logging .getLogger ("report_creator" ).setLevel (logging .WARNING )
197214
198- # Section 1: Forecast Overview
199- sec1_text = rc .Block (
200- rc .Heading ("Forecast Overview" , level = 2 ),
201- rc .Text (
202- "These plots show your forecast in the context of historical data."
203- ),
204- )
205- sec_1 = _select_plot_list (
206- lambda s_id : plot_series (
207- self .datasets .get_all_data_long (include_horizon = False ),
208- pd .concat (
209- [self .fitted_values , self .outputs ], axis = 0 , ignore_index = True
210- ),
211- id_col = ForecastOutputColumns .SERIES ,
212- time_col = self .spec .datetime_column .name ,
213- target_col = self .original_target_column ,
214- seed = 42 ,
215- ids = [s_id ],
216- ),
217- self .datasets .list_series_ids (),
218- )
219-
220215 # Section 2: LGBForecast Model Parameters
221216 sec2_text = rc .Block (
222217 rc .Heading ("LGBForecast Model Parameters" , level = 2 ),
223218 rc .Text ("These are the parameters used for the LGBForecast model." ),
224219 )
225220
226- blocks = [
227- rc .Html (
228- str (s_id [1 ]),
229- label = s_id [0 ],
230- )
231- for _ , s_id in enumerate (self .model_parameters .items ())
232- ]
233- sec_2 = rc .Select (blocks = blocks )
221+ k , v = next (iter (self .model_parameters .items ()))
222+ sec_2 = rc .Html (
223+ pd .DataFrame (list (v .items ())).to_html (index = False , header = False ),
224+ )
234225
235- all_sections = [sec1_text , sec_1 , sec2_text , sec_2 ]
226+ all_sections = [sec2_text , sec_2 ]
236227 model_description = rc .Text (
237228 "LGBForecast uses mlforecast framework to perform time series forecasting using machine learning models"
238229 "with the option to scale to massive amounts of data using remote clusters."
0 commit comments