-
Notifications
You must be signed in to change notification settings - Fork 1.3k
/
Copy pathliblinear_svc_preprocessor.py
121 lines (106 loc) · 3.98 KB
/
liblinear_svc_preprocessor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
from ConfigSpace.configuration_space import ConfigurationSpace
from ConfigSpace.forbidden import ForbiddenAndConjunction, ForbiddenEqualsClause
from ConfigSpace.hyperparameters import (
CategoricalHyperparameter,
Constant,
UniformFloatHyperparameter,
)
from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA
from autosklearn.util.common import check_for_bool, check_none
class LibLinear_Preprocessor(AutoSklearnPreprocessingAlgorithm):
# Liblinear is not deterministic as it uses a RNG inside
def __init__(
self,
penalty,
loss,
dual,
tol,
C,
multi_class,
fit_intercept,
intercept_scaling,
class_weight=None,
random_state=None,
):
self.penalty = penalty
self.loss = loss
self.dual = dual
self.tol = tol
self.C = C
self.multi_class = multi_class
self.fit_intercept = fit_intercept
self.intercept_scaling = intercept_scaling
self.class_weight = class_weight
self.random_state = random_state
self.preprocessor = None
def fit(self, X, Y):
import sklearn.svm
from sklearn.feature_selection import SelectFromModel
self.C = float(self.C)
self.tol = float(self.tol)
self.dual = check_for_bool(self.dual)
self.fit_intercept = check_for_bool(self.fit_intercept)
self.intercept_scaling = float(self.intercept_scaling)
if check_none(self.class_weight):
self.class_weight = None
estimator = sklearn.svm.LinearSVC(
penalty=self.penalty,
loss=self.loss,
dual=self.dual,
tol=self.tol,
C=self.C,
class_weight=self.class_weight,
fit_intercept=self.fit_intercept,
intercept_scaling=self.intercept_scaling,
multi_class=self.multi_class,
random_state=self.random_state,
)
estimator.fit(X, Y)
self.preprocessor = SelectFromModel(
estimator=estimator, threshold="mean", prefit=True
)
return self
def transform(self, X):
if self.preprocessor is None:
raise NotImplementedError()
return self.preprocessor.transform(X)
@staticmethod
def get_properties(dataset_properties=None):
return {
"shortname": "LinearSVC Preprocessor",
"name": "Liblinear Support Vector Classification Preprocessing",
"handles_regression": False,
"handles_classification": True,
"handles_multiclass": True,
"handles_multilabel": False,
"handles_multioutput": False,
"is_deterministic": False,
"input": (SPARSE, DENSE, UNSIGNED_DATA),
"output": (INPUT,),
}
@staticmethod
def get_hyperparameter_search_space(dataset_properties=None):
cs = ConfigurationSpace()
penalty = Constant("penalty", "l1")
loss = CategoricalHyperparameter(
"loss", ["hinge", "squared_hinge"], default_value="squared_hinge"
)
dual = Constant("dual", "False")
# This is set ad-hoc
tol = UniformFloatHyperparameter(
"tol", 1e-5, 1e-1, default_value=1e-4, log=True
)
C = UniformFloatHyperparameter("C", 0.03125, 32768, log=True, default_value=1.0)
multi_class = Constant("multi_class", "ovr")
# These are set ad-hoc
fit_intercept = Constant("fit_intercept", "True")
intercept_scaling = Constant("intercept_scaling", 1)
cs.add_hyperparameters(
[penalty, loss, dual, tol, C, multi_class, fit_intercept, intercept_scaling]
)
penalty_and_loss = ForbiddenAndConjunction(
ForbiddenEqualsClause(penalty, "l1"), ForbiddenEqualsClause(loss, "hinge")
)
cs.add_forbidden_clause(penalty_and_loss)
return cs