Skip to content

Commit eaad6a3

Browse files
authored
Merge pull request #48 from guitarmind/master
Fixed "Tuple Index Out of range error", unit test and example notebook
2 parents 47f6cd4 + badc33e commit eaad6a3

File tree

3 files changed

+52
-38
lines changed

3 files changed

+52
-38
lines changed

boruta/boruta_py.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -333,7 +333,7 @@ def _fit(self, X, y):
333333
imp_history_rejected = imp_history[1:, not_selected] * -1
334334

335335
# update rank for not_selected features
336-
if not_selected.shape[0] > 0 and not_selected.shape[1] > 0:
336+
if not_selected.shape[0] > 0:
337337
# calculate ranks in each iteration, then median of ranks across feats
338338
iter_ranks = self._nanrankdata(imp_history_rejected, axis=1)
339339
rank_medians = np.nanmedian(iter_ranks, axis=0)

boruta/examples/Madalon_Data_Set.ipynb

+50-36
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,7 @@
3030
{
3131
"cell_type": "code",
3232
"execution_count": 2,
33-
"metadata": {
34-
"collapsed": false
35-
},
33+
"metadata": {},
3634
"outputs": [],
3735
"source": [
3836
"import pandas as pd\n",
@@ -53,21 +51,18 @@
5351
" # URLS for dataset via UCI\n",
5452
" train_data_url='https://archive.ics.uci.edu/ml/machine-learning-databases/madelon/MADELON/madelon_train.data'\n",
5553
" train_label_url='https://archive.ics.uci.edu/ml/machine-learning-databases/madelon/MADELON/madelon_train.labels'\n",
56-
" \n",
57-
" \n",
54+
"\n",
5855
" X_data = pd.read_csv(train_data_url, sep=\" \", header=None)\n",
5956
" y_data = pd.read_csv(train_label_url, sep=\" \", header=None)\n",
60-
" data = X_data.ix[:,0:499]\n",
61-
" data['target'] = y_data[0] \n",
57+
" data = X_data.loc[:, :499]\n",
58+
" data['target'] = y_data[0]\n",
6259
" return data"
6360
]
6461
},
6562
{
6663
"cell_type": "code",
6764
"execution_count": 4,
68-
"metadata": {
69-
"collapsed": false
70-
},
65+
"metadata": {},
7166
"outputs": [],
7267
"source": [
7368
"data = load_data()"
@@ -76,9 +71,7 @@
7671
{
7772
"cell_type": "code",
7873
"execution_count": 5,
79-
"metadata": {
80-
"collapsed": false
81-
},
74+
"metadata": {},
8275
"outputs": [
8376
{
8477
"data": {
@@ -267,13 +260,11 @@
267260
{
268261
"cell_type": "code",
269262
"execution_count": 6,
270-
"metadata": {
271-
"collapsed": false
272-
},
263+
"metadata": {},
273264
"outputs": [],
274265
"source": [
275-
"y=data.pop('target')\n",
276-
"X=data.copy()"
266+
"y = data.pop('target')\n",
267+
"X = data.copy().values"
277268
]
278269
},
279270
{
@@ -293,9 +284,9 @@
293284
},
294285
"outputs": [],
295286
"source": [
296-
"rf = RandomForestClassifier(n_jobs=-1, class_weight='auto', max_depth=7)\n",
297-
"# define Boruta feature selection method\n",
298-
"feat_selector = BorutaPy(rf, n_estimators='auto', verbose=2)"
287+
"rf = RandomForestClassifier(n_jobs=-1, class_weight=None, max_depth=7, random_state=0)\n",
288+
"# Define Boruta feature selection method\n",
289+
"feat_selector = BorutaPy(rf, n_estimators='auto', verbose=2, random_state=0)"
299290
]
300291
},
301292
{
@@ -308,12 +299,10 @@
308299
{
309300
"cell_type": "code",
310301
"execution_count": null,
311-
"metadata": {
312-
"collapsed": false
313-
},
302+
"metadata": {},
314303
"outputs": [],
315304
"source": [
316-
"feat_selector.fit(X,y)"
305+
"feat_selector.fit(X, y)"
317306
]
318307
},
319308
{
@@ -328,15 +317,13 @@
328317
{
329318
"cell_type": "code",
330319
"execution_count": null,
331-
"metadata": {
332-
"collapsed": false
333-
},
320+
"metadata": {},
334321
"outputs": [],
335322
"source": [
336-
"# check selected features\n",
323+
"# Check selected features\n",
337324
"print(feat_selector.support_)\n",
338-
"#select the chosen features from our dataframe.\n",
339-
"selected = X.ix[:,feat_selector.support_]\n",
325+
"# Select the chosen features from our dataframe.\n",
326+
"selected = X[:, feat_selector.support_]\n",
340327
"print (\"\")\n",
341328
"print (\"Selected Feature Matrix Shape\")\n",
342329
"print (selected.shape)"
@@ -352,9 +339,7 @@
352339
{
353340
"cell_type": "code",
354341
"execution_count": null,
355-
"metadata": {
356-
"collapsed": false
357-
},
342+
"metadata": {},
358343
"outputs": [],
359344
"source": [
360345
"feat_selector.ranking_"
@@ -386,9 +371,38 @@
386371
"name": "python",
387372
"nbconvert_exporter": "python",
388373
"pygments_lexer": "ipython3",
389-
"version": "3.5.1"
374+
"version": "3.6.5"
375+
},
376+
"varInspector": {
377+
"cols": {
378+
"lenName": 16,
379+
"lenType": 16,
380+
"lenVar": 40
381+
},
382+
"kernels_config": {
383+
"python": {
384+
"delete_cmd_postfix": "",
385+
"delete_cmd_prefix": "del ",
386+
"library": "var_list.py",
387+
"varRefreshCmd": "print(var_dic_list())"
388+
},
389+
"r": {
390+
"delete_cmd_postfix": ") ",
391+
"delete_cmd_prefix": "rm(",
392+
"library": "var_list.r",
393+
"varRefreshCmd": "cat(var_dic_list()) "
394+
}
395+
},
396+
"types_to_exclude": [
397+
"module",
398+
"function",
399+
"builtin_function_or_method",
400+
"instance",
401+
"_Feature"
402+
],
403+
"window_display": false
390404
}
391405
},
392406
"nbformat": 4,
393-
"nbformat_minor": 0
407+
"nbformat_minor": 1
394408
}

boruta/test/unit_tests.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ def test_if_boruta_extracts_relevant_features(self):
4040
bt.fit(X, y)
4141

4242
# make sure that only all the relevant features are returned
43-
self.assertItemsEqual(range(5), list(np.where(bt.support_)[0]))
43+
self.assertListEqual(list(range(5)), list(np.where(bt.support_)[0]))
4444

4545

4646
if __name__ == '__main__':

0 commit comments

Comments
 (0)