@@ -284,6 +284,19 @@ def _get_main_dictionary(self):
284
284
self ._assert_is_fitted ()
285
285
return self .model_ .get_dictionary (self .model_main_dictionary_name_ )
286
286
287
+ def _read_model_from_dictionary_file (self , model_dictionary_file_path ):
288
+ """Removes dictionaries that do not have the model prefix in their name
289
+
290
+ This function is necessary for the regression case because Khiops generates a
291
+ baseline model which has to be removed for sklearn predictor.
292
+ """
293
+ model = kh .read_dictionary_file (model_dictionary_file_path )
294
+ assert self ._khiops_model_prefix is not None
295
+ for dictionary_name in [kdic .name for kdic in model .dictionaries ]:
296
+ if not dictionary_name .startswith (self ._khiops_model_prefix ):
297
+ model .remove_dictionary (dictionary_name )
298
+ return model
299
+
287
300
def export_report_file (self , report_file_path ):
288
301
"""Exports the model report to a JSON file
289
302
@@ -309,7 +322,7 @@ def export_dictionary_file(self, dictionary_file_path):
309
322
310
323
def _import_model (self , kdic_path ):
311
324
"""Sets model instance attribute by importing model from ``.kdic``"""
312
- self .model_ = kh . read_dictionary_file (kdic_path )
325
+ self .model_ = self . _read_model_from_dictionary_file (kdic_path )
313
326
314
327
def _get_output_dir (self , fallback_dir ):
315
328
if self .output_dir :
@@ -808,7 +821,7 @@ def _fit_train_model(self, ds, computation_dir, **kwargs):
808
821
809
822
# Update the `model_` attribute of the coclustering estimator to the
810
823
# new coclustering model
811
- self .model_ = kh . read_dictionary_file (
824
+ self .model_ = self . _read_model_from_dictionary_file (
812
825
fs .get_child_path (
813
826
output_dir , f"{ self .model_main_dictionary_name_ } _deployed.kdic"
814
827
)
@@ -1021,7 +1034,7 @@ def _simplify(
1021
1034
1022
1035
# Set the `model_` attribute of the new coclustering estimator to
1023
1036
# the new coclustering model
1024
- simplified_cc .model_ = kh . read_dictionary_file (
1037
+ simplified_cc .model_ = self . _read_model_from_dictionary_file (
1025
1038
fs .get_child_path (
1026
1039
output_dir , f"{ self .model_main_dictionary_name_ } _deployed.kdic"
1027
1040
)
@@ -1205,6 +1218,7 @@ def __init__(
1205
1218
self .construction_rules = construction_rules
1206
1219
self ._original_target_dtype = None
1207
1220
self ._predicted_target_meta_data_tag = None
1221
+ self ._khiops_baseline_model_prefix = None
1208
1222
1209
1223
def __sklearn_tags__ (self ):
1210
1224
# If we don't implement this trivial method it's not found by the sklearn. This
@@ -1295,7 +1309,7 @@ def _fit_train_model(self, ds, computation_dir, **kwargs):
1295
1309
return
1296
1310
1297
1311
# Save the model domain object and report
1298
- self .model_ = kh . read_dictionary_file (model_kdic_file_path )
1312
+ self .model_ = self . _read_model_from_dictionary_file (model_kdic_file_path )
1299
1313
self .model_report_ = kh .read_analysis_results_file (report_file_path )
1300
1314
1301
1315
@abstractmethod
@@ -1384,15 +1398,24 @@ def _fit_training_post_process(self, ds):
1384
1398
self .model_main_dictionary_name_ = self .model_ .dictionaries [0 ].name
1385
1399
else :
1386
1400
for dictionary in self .model_ .dictionaries :
1387
- assert dictionary .name .startswith (self ._khiops_model_prefix ), (
1401
+
1402
+ # The baseline model is mandatory for regression;
1403
+ # absent for classification and encoding
1404
+ assert dictionary .name .startswith (
1405
+ self ._khiops_model_prefix
1406
+ ) or dictionary .name .startswith (self ._khiops_baseline_model_prefix ), (
1388
1407
f"Dictionary '{ dictionary .name } ' "
1389
- f"does not have prefix '{ self ._khiops_model_prefix } '"
1390
- )
1391
- initial_dictionary_name = dictionary .name .replace (
1392
- self ._khiops_model_prefix , "" , 1
1408
+ f"does not have prefix '{ self ._khiops_model_prefix } ' "
1409
+ f"or '{ self ._khiops_baseline_model_prefix } '."
1393
1410
)
1394
- if initial_dictionary_name == ds .main_table .name :
1395
- self .model_main_dictionary_name_ = dictionary .name
1411
+
1412
+ # Skip baseline model
1413
+ if dictionary .name .startswith (self ._khiops_model_prefix ):
1414
+ initial_dictionary_name = dictionary .name .replace (
1415
+ self ._khiops_model_prefix , "" , 1
1416
+ )
1417
+ if initial_dictionary_name == ds .main_table .name :
1418
+ self .model_main_dictionary_name_ = dictionary .name
1396
1419
if self .model_main_dictionary_name_ is None :
1397
1420
raise ValueError ("No model dictionary after Khiops call" )
1398
1421
@@ -2185,6 +2208,7 @@ def __init__(
2185
2208
auto_sort = auto_sort ,
2186
2209
)
2187
2210
self ._khiops_model_prefix = "SNB_"
2211
+ self ._khiops_baseline_model_prefix = "B_"
2188
2212
self ._predicted_target_meta_data_tag = "Mean"
2189
2213
self ._predicted_target_name_prefix = "M"
2190
2214
self ._original_target_dtype = np .float64
@@ -2286,6 +2310,9 @@ def predict(self, X):
2286
2310
- str (a path for the file containing the array) if X is a dataset spec
2287
2311
containing file-path tables.
2288
2312
"""
2313
+ assert (
2314
+ self ._khiops_baseline_model_prefix is not None
2315
+ ), "Baseline model prefix is not set (mandatory for regression)"
2289
2316
# Call the parent's method
2290
2317
y_pred = super ().predict (X )
2291
2318
0 commit comments