7
7
from sklearn .ensemble import GradientBoostingClassifier , GradientBoostingRegressor
8
8
9
9
from eis_toolkit import exceptions
10
- from eis_toolkit .prediction .model_utils import _train_and_evaluate_sklearn_model
10
+ from eis_toolkit .prediction .model_utils import _train_and_validate_sklearn_model
11
11
12
12
13
13
@beartype
14
14
def gradient_boosting_classifier_train (
15
15
X : Union [np .ndarray , pd .DataFrame ],
16
16
y : Union [np .ndarray , pd .Series ],
17
- test_method : Literal ["simple_split " , "kfold_cv" , "skfold_cv" , "loo_cv" , "none" ] = "simple_split " ,
17
+ validation_method : Literal ["split " , "kfold_cv" , "skfold_cv" , "loo_cv" , "none" ] = "split " ,
18
18
metrics : Sequence [Literal ["accuracy" , "precision" , "recall" , "f1" , "auc" ]] = ["accuracy" ],
19
- simple_split_size : float = 0.2 ,
19
+ split_size : float = 0.2 ,
20
20
cv_folds : int = 5 ,
21
21
loss : Literal ["log_loss" , "exponential" ] = "log_loss" ,
22
22
learning_rate : Number = 0.1 ,
@@ -31,22 +31,22 @@ def gradient_boosting_classifier_train(
31
31
Train and optionally validate a Gradient Boosting classifier model using Sklearn.
32
32
33
33
Various options and configurations for model performance evaluation are available. No validation,
34
- simple train-test and cross-validation can be chosen. If validation is performed, metric(s) to
35
- calculate can be defined and validation process configured (cross-validation method, number of folds ,
36
- size of the simple train-test split). Depending on the details of the validation process, the output
37
- metrics dictionary can be empty, one-dimensional or nested.
34
+ split to train and validation parts, and cross-validation can be chosen. If validation is performed,
35
+ metric(s) to calculate can be defined and validation process configured (cross-validation method,
36
+ number of folds, size of the split). Depending on the details of the validation process,
37
+ the output metrics dictionary can be empty, one-dimensional or nested.
38
38
39
39
Args:
40
40
X: Training data.
41
41
y: Target labels.
42
- test_method: Test / validation method to use. "simple_split " divides data into two parts, "kfold_cv"
42
+ validation_method: Validation method to use. "split " divides data into two parts, "kfold_cv"
43
43
performs k-fold cross-validation, "skfold_cv" performs stratified k-fold cross-validation,
44
- "loo_cv" performs leave-one-out cross-validation and "none" will not test / validate model at all
44
+ "loo_cv" performs leave-one-out cross-validation and "none" will not validate model at all
45
45
(in this case, all X and y will be used solely for training).
46
46
metrics: Metrics to use for scoring the model. Defaults to "accuracy".
47
- simple_split_size : Fraction of the dataset to be used as test data (rest is used for training).
48
- Used only when test_method is "simple_split ". Defaults to 0.2.
49
- cv_folds: Number of folds used in cross-validation. Used only when test_method is "kfold_cv"
47
+ split_size : Fraction of the dataset to be used as validation data (rest is used for training).
48
+ Used only when validation_method is "split ". Defaults to 0.2.
49
+ cv_folds: Number of folds used in cross-validation. Used only when validation_method is "kfold_cv"
50
50
or "skfold_cv". Defaults to 5.
51
51
loss: The loss function to be optimized. Defaults to "log_loss" (same as in logistic regression).
52
52
learning_rate: Shrinks the contribution of each tree. Values must be >= 0. Defaults to 0.1.
@@ -92,13 +92,13 @@ def gradient_boosting_classifier_train(
92
92
** kwargs ,
93
93
)
94
94
95
- model , metrics = _train_and_evaluate_sklearn_model (
95
+ model , metrics = _train_and_validate_sklearn_model (
96
96
X = X ,
97
97
y = y ,
98
98
model = model ,
99
- test_method = test_method ,
99
+ validation_method = validation_method ,
100
100
metrics = metrics ,
101
- simple_split_size = simple_split_size ,
101
+ split_size = split_size ,
102
102
cv_folds = cv_folds ,
103
103
random_state = random_state ,
104
104
)
@@ -110,9 +110,9 @@ def gradient_boosting_classifier_train(
110
110
def gradient_boosting_regressor_train (
111
111
X : Union [np .ndarray , pd .DataFrame ],
112
112
y : Union [np .ndarray , pd .Series ],
113
- test_method : Literal ["simple_split " , "kfold_cv" , "skfold_cv" , "loo_cv" , "none" ] = "simple_split " ,
113
+ validation_method : Literal ["split " , "kfold_cv" , "skfold_cv" , "loo_cv" , "none" ] = "split " ,
114
114
metrics : Sequence [Literal ["mse" , "rmse" , "mae" , "r2" ]] = ["mse" ],
115
- simple_split_size : float = 0.2 ,
115
+ split_size : float = 0.2 ,
116
116
cv_folds : int = 5 ,
117
117
loss : Literal ["squared_error" , "absolute_error" , "huber" , "quantile" ] = "squared_error" ,
118
118
learning_rate : Number = 0.1 ,
@@ -127,22 +127,22 @@ def gradient_boosting_regressor_train(
127
127
Train and optionally validate a Gradient Boosting regressor model using Sklearn.
128
128
129
129
Various options and configurations for model performance evaluation are available. No validation,
130
- simple train-test and cross-validation can be chosen. If validation is performed, metric(s) to
131
- calculate can be defined and validation process configured (cross-validation method, number of folds ,
132
- size of the simple train-test split). Depending on the details of the validation process, the output
133
- metrics dictionary can be empty, one-dimensional or nested.
130
+ split to train and validation parts, and cross-validation can be chosen. If validation is performed,
131
+ metric(s) to calculate can be defined and validation process configured (cross-validation method,
132
+ number of folds, size of the split). Depending on the details of the validation process,
133
+ the output metrics dictionary can be empty, one-dimensional or nested.
134
134
135
135
Args:
136
136
X: Training data.
137
137
y: Target labels.
138
- test_method: Test / validation method to use. "simple_split " divides data into two parts, "kfold_cv"
138
+ validation_method: Validation method to use. "split " divides data into two parts, "kfold_cv"
139
139
performs k-fold cross-validation, "skfold_cv" performs stratified k-fold cross-validation,
140
- "loo_cv" performs leave-one-out cross-validation and "none" will not test / validate model at all
140
+ "loo_cv" performs leave-one-out cross-validation and "none" will not validate model at all
141
141
(in this case, all X and y will be used solely for training).
142
142
metrics: Metrics to use for scoring the model. Defaults to "mse".
143
- simple_split_size : Fraction of the dataset to be used as test data (rest is used for training).
144
- Used only when test_method is "simple_split ". Defaults to 0.2.
145
- cv_folds: Number of folds used in cross-validation. Used only when test_method is "kfold_cv"
143
+ split_size : Fraction of the dataset to be used as validation data (rest is used for training).
144
+ Used only when validation_method is "split ". Defaults to 0.2.
145
+ cv_folds: Number of folds used in cross-validation. Used only when validation_method is "kfold_cv"
146
146
or "skfold_cv". Defaults to 5.
147
147
loss: The loss function to be optimized. Defaults to "squared_error".
148
148
learning_rate: Shrinks the contribution of each tree. Values must be > 0. Defaults to 0.1.
@@ -188,13 +188,13 @@ def gradient_boosting_regressor_train(
188
188
** kwargs ,
189
189
)
190
190
191
- model , metrics = _train_and_evaluate_sklearn_model (
191
+ model , metrics = _train_and_validate_sklearn_model (
192
192
X = X ,
193
193
y = y ,
194
194
model = model ,
195
- test_method = test_method ,
195
+ validation_method = validation_method ,
196
196
metrics = metrics ,
197
- simple_split_size = simple_split_size ,
197
+ split_size = split_size ,
198
198
cv_folds = cv_folds ,
199
199
random_state = random_state ,
200
200
)
0 commit comments