2
2
import pandas as pd
3
3
from beartype import beartype
4
4
from beartype .typing import Tuple , Union
5
- from sklearn .base import BaseEstimator
5
+ from sklearn .base import BaseEstimator , is_classifier
6
6
from tensorflow import keras
7
7
8
+ from eis_toolkit .exceptions import InvalidModelTypeException
9
+
8
10
9
11
@beartype
10
12
def predict_classifier (
11
- data : Union [np .ndarray , pd .DataFrame ], model : Union [BaseEstimator , keras .Model ], include_probabilities : bool = True
13
+ data : Union [np .ndarray , pd .DataFrame ],
14
+ model : Union [BaseEstimator , keras .Model ],
15
+ classification_threshold : float = 0.5 ,
16
+ include_probabilities : bool = True ,
12
17
) -> Union [np .ndarray , Tuple [np .ndarray , np .ndarray ]]:
13
18
"""
14
- Predict with a trained model.
19
+ Predict with a trained classifier model.
20
+
21
+ Only works for binary classification currently.
15
22
16
23
Args:
17
24
data: Data used to make predictions.
18
25
model: Trained classifier or regressor. Can be any machine learning model trained with
19
26
EIS Toolkit (Sklearn and Keras models).
27
+ classification_threshold: Threshold for classifying based on probabilities. Defaults to 0.5.
20
28
include_probabilities: If the probability array should be returned too. Defaults to True.
21
29
22
30
Returns:
23
- Predicted labels and optionally predicted probabilities by a classifier model.
31
+ Predicted labels and optionally predicted probabilities as one-dimensional arrays by a classifier model.
32
+
33
+ Raises:
34
+ InvalidModelTypeException: Input model is not a classifier model.
24
35
"""
25
36
if isinstance (model , keras .Model ):
26
- probabilities = model .predict (data )
27
- labels = probabilities . argmax ( axis = - 1 )
37
+ probabilities = model .predict (data ). squeeze ()
38
+ labels = probabilities >= classification_threshold
28
39
if include_probabilities :
29
- return labels , probabilities
40
+ return labels , probabilities . astype ( np . float32 )
30
41
else :
31
42
return labels
32
43
elif isinstance (model , BaseEstimator ):
33
- labels = model .predict (data )
44
+ if not is_classifier (model ):
45
+ raise InvalidModelTypeException (f"Expected a classifier model: { type (model )} ." )
46
+ probabilities = model .predict_proba (data )[:, 1 ]
47
+ labels = (probabilities >= classification_threshold ).astype (np .float32 )
34
48
if include_probabilities :
35
- probabilities = model .predict_proba (data )
36
- return labels , probabilities
49
+ return labels , probabilities .astype (np .float32 )
37
50
else :
38
51
return labels
52
+ else :
53
+ raise InvalidModelTypeException (f"Model type not recognized: { type (model )} ." )
39
54
40
55
41
56
@beartype
@@ -44,7 +59,7 @@ def predict_regressor(
44
59
model : Union [BaseEstimator , keras .Model ],
45
60
) -> np .ndarray :
46
61
"""
47
- Predict with a trained model.
62
+ Predict with a trained regressor model.
48
63
49
64
Args:
50
65
data: Data used to make predictions.
@@ -53,6 +68,11 @@ def predict_regressor(
53
68
54
69
Returns:
55
70
Regression model prediction array.
71
+
72
+ Raises:
73
+ InvalidModelTypeException: Input model is not a regressor model.
56
74
"""
75
+ if is_classifier (model ):
76
+ raise InvalidModelTypeException (f"Expected a regressor model: { type (model )} ." )
57
77
result = model .predict (data )
58
78
return result
0 commit comments