|
30 | 30 | {
|
31 | 31 | "cell_type": "code",
|
32 | 32 | "execution_count": 2,
|
33 |
| - "metadata": { |
34 |
| - "collapsed": false |
35 |
| - }, |
| 33 | + "metadata": {}, |
36 | 34 | "outputs": [],
|
37 | 35 | "source": [
|
38 | 36 | "import pandas as pd\n",
|
|
53 | 51 | " # URLS for dataset via UCI\n",
|
54 | 52 | " train_data_url='https://archive.ics.uci.edu/ml/machine-learning-databases/madelon/MADELON/madelon_train.data'\n",
|
55 | 53 | " train_label_url='https://archive.ics.uci.edu/ml/machine-learning-databases/madelon/MADELON/madelon_train.labels'\n",
|
56 |
| - " \n", |
57 |
| - " \n", |
| 54 | + "\n", |
58 | 55 | " X_data = pd.read_csv(train_data_url, sep=\" \", header=None)\n",
|
59 | 56 | " y_data = pd.read_csv(train_label_url, sep=\" \", header=None)\n",
|
60 |
| - " data = X_data.ix[:,0:499]\n", |
61 |
| - " data['target'] = y_data[0] \n", |
| 57 | + " data = X_data.loc[:, :499]\n", |
| 58 | + " data['target'] = y_data[0]\n", |
62 | 59 | " return data"
|
63 | 60 | ]
|
64 | 61 | },
|
65 | 62 | {
|
66 | 63 | "cell_type": "code",
|
67 | 64 | "execution_count": 4,
|
68 |
| - "metadata": { |
69 |
| - "collapsed": false |
70 |
| - }, |
| 65 | + "metadata": {}, |
71 | 66 | "outputs": [],
|
72 | 67 | "source": [
|
73 | 68 | "data = load_data()"
|
|
76 | 71 | {
|
77 | 72 | "cell_type": "code",
|
78 | 73 | "execution_count": 5,
|
79 |
| - "metadata": { |
80 |
| - "collapsed": false |
81 |
| - }, |
| 74 | + "metadata": {}, |
82 | 75 | "outputs": [
|
83 | 76 | {
|
84 | 77 | "data": {
|
|
267 | 260 | {
|
268 | 261 | "cell_type": "code",
|
269 | 262 | "execution_count": 6,
|
270 |
| - "metadata": { |
271 |
| - "collapsed": false |
272 |
| - }, |
| 263 | + "metadata": {}, |
273 | 264 | "outputs": [],
|
274 | 265 | "source": [
|
275 |
| - "y=data.pop('target')\n", |
276 |
| - "X=data.copy()" |
| 266 | + "y = data.pop('target')\n", |
| 267 | + "X = data.copy().values" |
277 | 268 | ]
|
278 | 269 | },
|
279 | 270 | {
|
|
293 | 284 | },
|
294 | 285 | "outputs": [],
|
295 | 286 | "source": [
|
296 |
| - "rf = RandomForestClassifier(n_jobs=-1, class_weight='auto', max_depth=7)\n", |
297 |
| - "# define Boruta feature selection method\n", |
298 |
| - "feat_selector = BorutaPy(rf, n_estimators='auto', verbose=2)" |
| 287 | + "rf = RandomForestClassifier(n_jobs=-1, class_weight=None, max_depth=7, random_state=0)\n", |
| 288 | + "# Define Boruta feature selection method\n", |
| 289 | + "feat_selector = BorutaPy(rf, n_estimators='auto', verbose=2, random_state=0)" |
299 | 290 | ]
|
300 | 291 | },
|
301 | 292 | {
|
|
308 | 299 | {
|
309 | 300 | "cell_type": "code",
|
310 | 301 | "execution_count": null,
|
311 |
| - "metadata": { |
312 |
| - "collapsed": false |
313 |
| - }, |
| 302 | + "metadata": {}, |
314 | 303 | "outputs": [],
|
315 | 304 | "source": [
|
316 |
| - "feat_selector.fit(X,y)" |
| 305 | + "feat_selector.fit(X, y)" |
317 | 306 | ]
|
318 | 307 | },
|
319 | 308 | {
|
|
328 | 317 | {
|
329 | 318 | "cell_type": "code",
|
330 | 319 | "execution_count": null,
|
331 |
| - "metadata": { |
332 |
| - "collapsed": false |
333 |
| - }, |
| 320 | + "metadata": {}, |
334 | 321 | "outputs": [],
|
335 | 322 | "source": [
|
336 |
| - "# check selected features\n", |
| 323 | + "# Check selected features\n", |
337 | 324 | "print(feat_selector.support_)\n",
|
338 |
| - "#select the chosen features from our dataframe.\n", |
339 |
| - "selected = X.ix[:,feat_selector.support_]\n", |
| 325 | + "# Select the chosen features from our dataframe.\n", |
| 326 | + "selected = X[:, feat_selector.support_]\n", |
340 | 327 | "print (\"\")\n",
|
341 | 328 | "print (\"Selected Feature Matrix Shape\")\n",
|
342 | 329 | "print (selected.shape)"
|
|
352 | 339 | {
|
353 | 340 | "cell_type": "code",
|
354 | 341 | "execution_count": null,
|
355 |
| - "metadata": { |
356 |
| - "collapsed": false |
357 |
| - }, |
| 342 | + "metadata": {}, |
358 | 343 | "outputs": [],
|
359 | 344 | "source": [
|
360 | 345 | "feat_selector.ranking_"
|
|
386 | 371 | "name": "python",
|
387 | 372 | "nbconvert_exporter": "python",
|
388 | 373 | "pygments_lexer": "ipython3",
|
389 |
| - "version": "3.5.1" |
| 374 | + "version": "3.6.5" |
| 375 | + }, |
| 376 | + "varInspector": { |
| 377 | + "cols": { |
| 378 | + "lenName": 16, |
| 379 | + "lenType": 16, |
| 380 | + "lenVar": 40 |
| 381 | + }, |
| 382 | + "kernels_config": { |
| 383 | + "python": { |
| 384 | + "delete_cmd_postfix": "", |
| 385 | + "delete_cmd_prefix": "del ", |
| 386 | + "library": "var_list.py", |
| 387 | + "varRefreshCmd": "print(var_dic_list())" |
| 388 | + }, |
| 389 | + "r": { |
| 390 | + "delete_cmd_postfix": ") ", |
| 391 | + "delete_cmd_prefix": "rm(", |
| 392 | + "library": "var_list.r", |
| 393 | + "varRefreshCmd": "cat(var_dic_list()) " |
| 394 | + } |
| 395 | + }, |
| 396 | + "types_to_exclude": [ |
| 397 | + "module", |
| 398 | + "function", |
| 399 | + "builtin_function_or_method", |
| 400 | + "instance", |
| 401 | + "_Feature" |
| 402 | + ], |
| 403 | + "window_display": false |
390 | 404 | }
|
391 | 405 | },
|
392 | 406 | "nbformat": 4,
|
393 |
| - "nbformat_minor": 0 |
| 407 | + "nbformat_minor": 1 |
394 | 408 | }
|
0 commit comments