Skip to content

Commit 58f4a11

Browse files
authored
DOC add test for numpydoc validation and documented param/attributes (#869)
1 parent 56eefdf commit 58f4a11

35 files changed

+774
-76
lines changed

build_tools/azure/install.cmd

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ IF "%PYTHON_ARCH%"=="64" (
1111
call deactivate
1212
@rem Clean up any left-over from a previous build
1313
conda remove --all -q -y -n %VIRTUALENV%
14-
conda create -n %VIRTUALENV% -q -y python=%PYTHON_VERSION% numpy scipy cython wheel joblib git
14+
conda create -n %VIRTUALENV% -q -y python=%PYTHON_VERSION% numpy scipy cython wheel joblib git -c conda-forge
1515

1616
call activate %VIRTUALENV%
1717

build_tools/azure/install.sh

+2-2
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ UNAMESTR=`uname`
77

88
make_conda() {
99
TO_INSTALL="$@"
10-
conda create -n $VIRTUALENV --yes $TO_INSTALL
10+
conda create -n $VIRTUALENV --yes $TO_INSTALL -c conda-forge
1111
source activate $VIRTUALENV
1212
}
1313

@@ -65,7 +65,7 @@ if [[ "$DISTRIB" == "conda" ]]; then
6565
fi
6666

6767
if [[ -n "$TO_INSTALL" ]]; then
68-
conda install --yes $TO_INSTALL
68+
conda install --yes $TO_INSTALL -c conda-forge
6969
fi
7070

7171
if [[ -n "$KERAS_VERSION" ]]; then

imblearn/base.py

+30-1
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,24 @@ def _identity(X, y):
140140
return X, y
141141

142142

143+
def is_sampler(estimator):
144+
"""Return True if the given estimator is a sampler, False otherwise.
145+
146+
Parameters
147+
----------
148+
estimator : object
149+
Estimator to test.
150+
151+
Returns
152+
-------
153+
is_sampler : bool
154+
True if estimator is a sampler, otherwise False.
155+
"""
156+
if estimator._estimator_type == "sampler":
157+
return True
158+
return False
159+
160+
143161
class FunctionSampler(BaseSampler):
144162
"""Construct a sampler from calling an arbitrary callable.
145163
@@ -166,9 +184,20 @@ class FunctionSampler(BaseSampler):
166184
167185
.. versionadded:: 0.6
168186
187+
Attributes
188+
----------
189+
sampling_strategy_ : dict
190+
Dictionary containing the information to sample the dataset. The keys
191+
corresponds to the class labels from which to sample and the values
192+
are the number of samples to sample.
193+
194+
n_features_in_ : int
195+
Number of features in the input dataset.
196+
197+
.. versionadded:: 0.9
198+
169199
See Also
170200
--------
171-
172201
sklearn.preprocessing.FunctionTransfomer : Stateless transformer.
173202
174203
Notes

imblearn/combine/_smote_enn.py

+19
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,25 @@ class SMOTEENN(BaseSampler):
4949
5050
{n_jobs}
5151
52+
Attributes
53+
----------
54+
sampling_strategy_ : dict
55+
Dictionary containing the information to sample the dataset. The keys
56+
corresponds to the class labels from which to sample and the values
57+
are the number of samples to sample.
58+
59+
smote_ : sampler object
60+
The validated :class:`~imblearn.over_sampling.SMOTE` instance.
61+
62+
enn_ : sampler object
63+
The validated :class:`~imblearn.under_sampling.EditedNearestNeighbours`
64+
instance.
65+
66+
n_features_in_ : int
67+
Number of features in the input dataset.
68+
69+
.. versionadded:: 0.9
70+
5271
See Also
5372
--------
5473
SMOTETomek : Over-sample using SMOTE followed by under-sampling removing

imblearn/combine/_smote_tomek.py

+18
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,24 @@ class SMOTETomek(BaseSampler):
4949
5050
{n_jobs}
5151
52+
Attributes
53+
----------
54+
sampling_strategy_ : dict
55+
Dictionary containing the information to sample the dataset. The keys
56+
corresponds to the class labels from which to sample and the values
57+
are the number of samples to sample.
58+
59+
smote_ : sampler object
60+
The validated :class:`~imblearn.over_sampling.SMOTE` instance.
61+
62+
tomek_ : sampler object
63+
The validated :class:`~imblearn.under_sampling.TomekLinks` instance.
64+
65+
n_features_in_ : int
66+
Number of features in the input dataset.
67+
68+
.. versionadded:: 0.9
69+
5270
See Also
5371
--------
5472
SMOTEENN : Over-sample using SMOTE followed by under-sampling using Edited

imblearn/datasets/_imbalance.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,7 @@
1616
def make_imbalance(
1717
X, y, *, sampling_strategy=None, random_state=None, verbose=False, **kwargs
1818
):
19-
"""Turns a dataset into an imbalanced dataset with a specific sampling
20-
strategy.
19+
"""Turn a dataset into an imbalanced dataset with a specific sampling strategy.
2120
2221
A simple toy dataset to visualize clustering and classification
2322
algorithms.
@@ -52,7 +51,7 @@ def make_imbalance(
5251
verbose : bool, default=False
5352
Show information regarding the sampling.
5453
55-
kwargs : dict
54+
**kwargs : dict
5655
Dictionary of additional keyword arguments to pass to
5756
``sampling_strategy``.
5857
@@ -62,7 +61,7 @@ def make_imbalance(
6261
The array containing the imbalanced data.
6362
6463
y_resampled : ndarray of shape (n_samples_new)
65-
The corresponding label of `X_resampled`
64+
The corresponding label of `X_resampled`.
6665
6766
Notes
6867
-----

imblearn/datasets/_zenodo.py

+4-5
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,6 @@
3838
.. [1] Ding, Zejin, "Diversified Ensemble Classifiers for Highly
3939
Imbalanced Data Learning and their Application in Bioinformatics."
4040
Dissertation, Georgia State University, (2011).
41-
4241
"""
4342

4443
# Author: Guillaume Lemaitre
@@ -147,12 +146,12 @@ def fetch_datasets(
147146
The ordered is defined by ``filter_data``. Each Bunch object ---
148147
referred as dataset --- have the following attributes:
149148
150-
dataset.data : ndarray of shape (n_samples, n_features)
149+
dataset.data : ndarray of shape (n_samples, n_features)
151150
152-
dataset.target : ndarray of shape (n_samples,)
151+
dataset.target : ndarray of shape (n_samples,)
153152
154-
dataset.DESCR : str
155-
Description of the each dataset.
153+
dataset.DESCR : str
154+
Description of the each dataset.
156155
157156
Notes
158157
-----

imblearn/ensemble/_bagging.py

+8
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,9 @@ class BalancedBaggingClassifier(BaggingClassifier):
111111
estimators_ : list of estimators
112112
The collection of fitted base estimators.
113113
114+
sampler_ : sampler object
115+
The validate sampler created from the `sampler` parameter.
116+
114117
estimators_samples_ : list of ndarray
115118
The subset of drawn samples (i.e., the in-bag samples) for each base
116119
estimator. Each subset is defined by a boolean mask.
@@ -133,6 +136,11 @@ class BalancedBaggingClassifier(BaggingClassifier):
133136
was never left out during the bootstrap. In this case,
134137
``oob_decision_function_`` might contain NaN.
135138
139+
n_features_in_ : int
140+
Number of features in the input dataset.
141+
142+
.. versionadded:: 0.9
143+
136144
See Also
137145
--------
138146
BalancedRandomForestClassifier : Random forest applying random-under

imblearn/ensemble/_easy_ensemble.py

+11
Original file line numberDiff line numberDiff line change
@@ -73,12 +73,23 @@ class EasyEnsembleClassifier(BaggingClassifier):
7373
estimators_ : list of estimators
7474
The collection of fitted base estimators.
7575
76+
estimators_samples_ : list of arrays
77+
The subset of drawn samples for each base estimator.
78+
79+
estimators_features_ : list of arrays
80+
The subset of drawn features for each base estimator.
81+
7682
classes_ : array, shape (n_classes,)
7783
The classes labels.
7884
7985
n_classes_ : int or list
8086
The number of classes.
8187
88+
n_features_in_ : int
89+
Number of features in the input dataset.
90+
91+
.. versionadded:: 0.9
92+
8293
See Also
8394
--------
8495
BalancedBaggingClassifier : Bagging classifier for which each base

imblearn/ensemble/_forest.py

+15-3
Original file line numberDiff line numberDiff line change
@@ -230,10 +230,17 @@ class BalancedRandomForestClassifier(RandomForestClassifier):
230230
231231
Attributes
232232
----------
233-
estimators_ : list of DecisionTreeClassifier
233+
base_estimator_ : :class:`~sklearn.tree.DecisionTreeClassifier` instance
234+
The child estimator template used to create the collection of fitted
235+
sub-estimators.
236+
237+
estimators_ : list of :class:`~sklearn.tree.DecisionTreeClassifier`
234238
The collection of fitted sub-estimators.
235239
236-
samplers_ : list of RandomUnderSampler
240+
base_sampler_ : :class:`~imblearn.under_sampling.RandomUnderSampler`
241+
The base sampler used to construct the subsequent list of samplers.
242+
243+
samplers_ : list of :class:`~imblearn.under_sampling.RandomUnderSampler`
237244
The collection of fitted samplers.
238245
239246
pipelines_ : list of Pipeline.
@@ -250,6 +257,11 @@ class labels (multi-output problem).
250257
n_features_ : int
251258
The number of features when ``fit`` is performed.
252259
260+
n_features_in_ : int
261+
Number of features in the input dataset.
262+
263+
.. versionadded:: 0.9
264+
253265
n_outputs_ : int
254266
The number of outputs when ``fit`` is performed.
255267
@@ -628,7 +640,7 @@ def _set_oob_score(self, X, y):
628640
@property
629641
def n_features_(self):
630642
"""Number of features when fitting the estimator."""
631-
return getattr(self.n_features_in_, self._n_features)
643+
return getattr(self.n_features_in_, "n_features_", self._n_features)
632644

633645
def _more_tags(self):
634646
return {

imblearn/ensemble/_weight_boosting.py

+9-1
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,10 @@ class RUSBoostClassifier(AdaBoostClassifier):
6868
estimators_ : list of classifiers
6969
The collection of fitted sub-estimators.
7070
71-
samplers_ : list of RandomUnderSampler
71+
base_sampler_ : :class:`~imblearn.under_sampling.RandomUnderSampler`
72+
The base sampler used to generate the subsequent samplers.
73+
74+
samplers_ : list of :class:`~imblearn.under_sampling.RandomUnderSampler`
7275
The collection of fitted samplers.
7376
7477
pipelines_ : list of Pipeline
@@ -90,6 +93,11 @@ class RUSBoostClassifier(AdaBoostClassifier):
9093
feature_importances_ : ndarray of shape (n_features,)
9194
The feature importances if supported by the ``base_estimator``.
9295
96+
n_features_in_ : int
97+
Number of features in the input dataset.
98+
99+
.. versionadded:: 0.9
100+
93101
See Also
94102
--------
95103
BalancedBaggingClassifier : Bagging classifier for which each base

imblearn/exceptions.py

+18
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,24 @@
88

99

1010
def raise_isinstance_error(variable_name, possible_type, variable):
11+
"""Raise consistent error message for isinstance() function.
12+
13+
Parameters
14+
----------
15+
variable_name : str
16+
The name of the variable.
17+
18+
possible_type : type
19+
The possible type of the variable.
20+
21+
variable : object
22+
The variable to check.
23+
24+
Raises
25+
------
26+
ValueError
27+
If the instance is not of the possible type.
28+
"""
1129
raise ValueError(
1230
f"{variable_name} has to be one of {possible_type}. "
1331
f"Got {type(variable)} instead."

imblearn/keras/_generator.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,9 @@ def import_keras():
1515

1616
def import_from_keras():
1717
try:
18-
import keras
18+
import keras # noqa
1919

20-
return (keras.utils.Sequence,), True
20+
return (keras.utils.data_utils.Sequence,), True
2121
except ImportError:
2222
return tuple(), False
2323

@@ -33,7 +33,10 @@ def import_from_tensforflow():
3333
ParentClassTensorflow, has_keras_tf = import_from_tensforflow()
3434
has_keras = has_keras_k or has_keras_tf
3535
if has_keras:
36-
ParentClass = ParentClassKeras + ParentClassTensorflow
36+
if has_keras_tf:
37+
ParentClass = ParentClassTensorflow
38+
else:
39+
ParentClass = ParentClassKeras
3740
else:
3841
ParentClass = (object,)
3942
return ParentClass, has_keras

0 commit comments

Comments
 (0)