Skip to content

Commit 6b76c61

Browse files
committed
Release
1 parent 6c6a581 commit 6b76c61

File tree

4 files changed

+32
-12
lines changed

4 files changed

+32
-12
lines changed

lineartree/_classes.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ def _parallel_binning_fit(split_feat, _self, X, y,
152152
weights=weights[mask], **largs_right)
153153
wloss_right = loss_right * (weights[mask].sum() / weights.sum())
154154

155-
total_loss = wloss_left + wloss_right
155+
total_loss = round(wloss_left + wloss_right, 5)
156156

157157
# store if best
158158
if total_loss < loss:
@@ -214,15 +214,16 @@ class _LinearTree(BaseEstimator):
214214
"""
215215
def __init__(self, base_estimator, *, criterion, max_depth,
216216
min_samples_split, min_samples_leaf, max_bins,
217-
categorical_features, split_features,
218-
linear_features, n_jobs):
217+
min_impurity_decrease, categorical_features,
218+
split_features, linear_features, n_jobs):
219219

220220
self.base_estimator = base_estimator
221221
self.criterion = criterion
222222
self.max_depth = max_depth
223223
self.min_samples_split = min_samples_split
224224
self.min_samples_leaf = min_samples_leaf
225225
self.max_bins = max_bins
226+
self.min_impurity_decrease = min_impurity_decrease
226227
self.categorical_features = categorical_features
227228
self.split_features = split_features
228229
self.linear_features = linear_features
@@ -295,7 +296,7 @@ def _split(self, X, y, bins,
295296

296297
# select best results
297298
_id_best = np.argmin(_losses)
298-
if _losses[_id_best] < loss:
299+
if loss - _losses[_id_best] > self.min_impurity_decrease:
299300
split_t = split_t[_id_best]
300301
split_col = split_col[_id_best]
301302
left_node = left_node[_id_best]
@@ -362,6 +363,7 @@ def _grow(self, X, y, weights=None):
362363
loss = CRITERIA[self.criterion](
363364
model, X[:, self._linear_features], y,
364365
weights=weights, **largs)
366+
loss = round(loss, 5)
365367

366368
self._nodes[''] = Node(
367369
id=0,
@@ -651,8 +653,8 @@ def summary(self, feature_names=None, only_leaves=False, max_depth=None):
651653

652654
summary[N.id] = {
653655
'col': feature_names[Cl.threshold[-1][0]],
654-
'th': round(Cl.threshold[-1][-1], 4),
655-
'loss': round(Cl.w_loss + Cr.w_loss, 4),
656+
'th': round(Cl.threshold[-1][-1], 5),
657+
'loss': round(Cl.w_loss + Cr.w_loss, 5),
656658
'samples': Cl.n_samples + Cr.n_samples,
657659
'children': (Cl.id, Cr.id),
658660
'models': (Cl.model, Cr.model)
@@ -664,7 +666,7 @@ def summary(self, feature_names=None, only_leaves=False, max_depth=None):
664666
continue
665667

666668
summary[L.id] = {
667-
'loss': round(L.loss, 4),
669+
'loss': round(L.loss, 5),
668670
'samples': L.n_samples,
669671
'models': L.model
670672
}

lineartree/lineartree.py

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,10 @@ class LinearTreeRegressor(_LinearTree, RegressorMixin):
5959
``max_bins`` bins. Must be lower than 120 and larger than 10.
6060
A higher value implies a higher training time.
6161
62+
min_impurity_decrease : float, default=0.0
63+
A node will be split if this split induces a decrease of the impurity
64+
greater than or equal to this value.
65+
6266
categorical_features : int or array-like of int, default=None
6367
Indicates the categorical features.
6468
All categorical indices must be in `[0, n_features)`.
@@ -119,15 +123,16 @@ class LinearTreeRegressor(_LinearTree, RegressorMixin):
119123
"""
120124
def __init__(self, base_estimator, *, criterion='mse', max_depth=5,
121125
min_samples_split=6, min_samples_leaf=0.1, max_bins=25,
122-
categorical_features=None, split_features=None,
123-
linear_features=None, n_jobs=None):
126+
min_impurity_decrease=0.0, categorical_features=None,
127+
split_features=None, linear_features=None, n_jobs=None):
124128

125129
self.base_estimator = base_estimator
126130
self.criterion = criterion
127131
self.max_depth = max_depth
128132
self.min_samples_split = min_samples_split
129133
self.min_samples_leaf = min_samples_leaf
130134
self.max_bins = max_bins
135+
self.min_impurity_decrease = min_impurity_decrease
131136
self.categorical_features = categorical_features
132137
self.split_features = split_features
133138
self.linear_features = linear_features
@@ -281,6 +286,10 @@ class LinearTreeClassifier(_LinearTree, ClassifierMixin):
281286
``max_bins`` bins. Must be lower than 120 and larger than 10.
282287
A higher value implies a higher training time.
283288
289+
min_impurity_decrease : float, default=0.0
290+
A node will be split if this split induces a decrease of the impurity
291+
greater than or equal to this value.
292+
284293
categorical_features : int or array-like of int, default=None
285294
Indicates the categorical features.
286295
All categorical indices must be in `[0, n_features)`.
@@ -341,15 +350,16 @@ class LinearTreeClassifier(_LinearTree, ClassifierMixin):
341350
"""
342351
def __init__(self, base_estimator, *, criterion='hamming', max_depth=5,
343352
min_samples_split=6, min_samples_leaf=0.1, max_bins=25,
344-
categorical_features=None, split_features=None,
345-
linear_features=None, n_jobs=None):
353+
min_impurity_decrease=0.0, categorical_features=None,
354+
split_features=None, linear_features=None, n_jobs=None):
346355

347356
self.base_estimator = base_estimator
348357
self.criterion = criterion
349358
self.max_depth = max_depth
350359
self.min_samples_split = min_samples_split
351360
self.min_samples_leaf = min_samples_leaf
352361
self.max_bins = max_bins
362+
self.min_impurity_decrease = min_impurity_decrease
353363
self.categorical_features = categorical_features
354364
self.split_features = split_features
355365
self.linear_features = linear_features

notebooks/README.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,10 @@ class lineartree.LinearTreeRegressor(base_estimator, *, criterion = 'mse', max_d
4343

4444
The maximum number of bins to use to search the optimal split in each feature. Features with a small number of unique values may use less than ``max_bins`` bins. Must be lower than 120 and larger than 10.
4545
A higher value implies a higher training time.
46+
47+
- ```min_impurity_decrease : float, default=0.0```
48+
49+
A node will be split if this split induces a decrease of the impurity greater than or equal to this value.
4650

4751
- ```categorical_features : int or array-like of int, default=None```
4852

@@ -279,6 +283,10 @@ class lineartree.LinearTreeClassifier(base_estimator, *, criterion = 'hamming',
279283

280284
The maximum number of bins to use to search the optimal split in each feature. Features with a small number of unique values may use less than ``max_bins`` bins. Must be lower than 120 and larger than 10.
281285
A higher value implies a higher training time.
286+
287+
- ```min_impurity_decrease : float, default=0.0```
288+
289+
A node will be split if this split induces a decrease of the impurity greater than or equal to this value.
282290

283291
- ```categorical_features : int or array-like of int, default=None```
284292

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
HERE = pathlib.Path(__file__).parent
55

6-
VERSION = '0.3.3'
6+
VERSION = '0.3.4'
77
PACKAGE_NAME = 'linear-tree'
88
AUTHOR = 'Marco Cerliani'
99
AUTHOR_EMAIL = '[email protected]'

0 commit comments

Comments
 (0)