@@ -59,7 +59,7 @@ def load_model(path: Path) -> BaseEstimator:
59
59
def split_data (
60
60
* data : Union [np .ndarray , pd .DataFrame , sparse ._csr .csr_matrix , List [Number ]],
61
61
split_size : float = 0.2 ,
62
- random_state : Optional [int ] = 42 ,
62
+ random_state : Optional [int ] = None ,
63
63
shuffle : bool = True ,
64
64
) -> List [Union [np .ndarray , pd .DataFrame , sparse ._csr .csr_matrix , List [Number ]]]:
65
65
"""
@@ -75,7 +75,7 @@ def split_data(
75
75
split_size: The proportion of the second part of the split. Typically this is the size of test/validation
76
76
part. The first part will be complemental proportion. For example, if split_size = 0.2, the first part
77
77
will have 80% of the data and the second part 20% of the data. Defaults to 0.2.
78
- random_state: Seed for random number generation. Defaults to 42 .
78
+ random_state: Seed for random number generation. Defaults to None .
79
79
shuffle: If data is shuffled before splitting. Defaults to True.
80
80
81
81
Returns:
@@ -100,8 +100,6 @@ def test_model(
100
100
"""
101
101
Test and score a trained model.
102
102
103
- TODO: Implement for Keras models.
104
-
105
103
Args:
106
104
X_test: Test data.
107
105
y_test: Target labels for test data.
@@ -157,7 +155,8 @@ def _train_and_validate_sklearn_model(
157
155
metrics : Sequence [Literal ["mse" , "rmse" , "mae" , "r2" , "accuracy" , "precision" , "recall" , "f1" ]],
158
156
split_size : float = 0.2 ,
159
157
cv_folds : int = 5 ,
160
- random_state : Optional [int ] = 42 ,
158
+ shuffle : bool = True ,
159
+ random_state : Optional [int ] = None ,
161
160
) -> Tuple [BaseEstimator , dict ]:
162
161
"""
163
162
Train and validate Sklearn model.
@@ -187,7 +186,7 @@ def _train_and_validate_sklearn_model(
187
186
# Approach 2: Validation with splitting data once
188
187
elif validation_method == SPLIT :
189
188
X_train , X_valid , y_train , y_valid = split_data (
190
- X , y , split_size = split_size , random_state = random_state , shuffle = True
189
+ X , y , split_size = split_size , random_state = random_state , shuffle = shuffle
191
190
)
192
191
model .fit (X_train , y_train )
193
192
y_pred = model .predict (X_valid )
@@ -199,7 +198,7 @@ def _train_and_validate_sklearn_model(
199
198
200
199
# Approach 3: Cross-validation
201
200
elif validation_method in [KFOLD_CV , SKFOLD_CV , LOO_CV ]:
202
- cv = _get_cross_validator (validation_method , cv_folds , random_state )
201
+ cv = _get_cross_validator (validation_method , cv_folds , shuffle , random_state )
203
202
204
203
# Initialize output metrics dictionary
205
204
out_metrics = {}
@@ -284,13 +283,13 @@ def _score_model(
284
283
285
284
@beartype
286
285
def _get_cross_validator (
287
- cv : str , folds : int , random_state : Optional [int ]
286
+ cv : str , folds : int , shuffle : bool , random_state : Optional [int ]
288
287
) -> Union [KFold , StratifiedKFold , LeaveOneOut ]:
289
288
"""Create and return a Sklearn cross-validator based on given parameter values."""
290
289
if cv == KFOLD_CV :
291
- cross_validator = KFold (n_splits = folds , shuffle = True , random_state = random_state )
290
+ cross_validator = KFold (n_splits = folds , shuffle = shuffle , random_state = random_state )
292
291
elif cv == SKFOLD_CV :
293
- cross_validator = StratifiedKFold (n_splits = folds , shuffle = True , random_state = random_state )
292
+ cross_validator = StratifiedKFold (n_splits = folds , shuffle = shuffle , random_state = random_state )
294
293
elif cv == LOO_CV :
295
294
cross_validator = LeaveOneOut ()
296
295
else :
0 commit comments