|
3 | 3 | "nbformat_minor": 0, |
4 | 4 | "metadata": { |
5 | 5 | "colab": { |
6 | | - "name": "Lesson16-Python For Data Science-CaseStudies.ipynb", |
| 6 | + "name": "Lesson15-Python For Data Science-CaseStudies.ipynb", |
7 | 7 | "version": "0.3.2", |
8 | 8 | "provenance": [], |
9 | 9 | "collapsed_sections": [ |
|
35 | 35 | "colab_type": "text" |
36 | 36 | }, |
37 | 37 | "source": [ |
38 | | - "<a href=\"https://colab.research.google.com/github/paiml/python_for_datascience/blob/master/Lesson16_Python_For_Data_Science_CaseStudies.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" |
| 38 | + "<a href=\"https://colab.research.google.com/github/paiml/python_for_datascience/blob/master/Lesson15_Python_For_Data_Science_CaseStudies.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" |
39 | 39 | ] |
40 | 40 | }, |
41 | 41 | { |
|
48 | 48 | "# Lesson 16: Case Studies" |
49 | 49 | ] |
50 | 50 | }, |
| 51 | + { |
| 52 | + "metadata": { |
| 53 | + "id": "c_Id55m6Jsbu", |
| 54 | + "colab_type": "text" |
| 55 | + }, |
| 56 | + "cell_type": "markdown", |
| 57 | + "source": [ |
| 58 | + "## Pragmatic AI Labs\n", |
| 59 | + "\n" |
| 60 | + ] |
| 61 | + }, |
| 62 | + { |
| 63 | + "metadata": { |
| 64 | + "id": "e5p96AqpSDZa", |
| 65 | + "colab_type": "text" |
| 66 | + }, |
| 67 | + "cell_type": "markdown", |
| 68 | + "source": [ |
| 69 | + "\n", |
| 70 | + "\n", |
| 71 | + "This notebook was produced by [Pragmatic AI Labs](https://paiml.com/). You can continue learning about these topics by:\n", |
| 72 | + "\n", |
| 73 | + "* Buying a copy of [Pragmatic AI: An Introduction to Cloud-Based Machine Learning](http://www.informit.com/store/pragmatic-ai-an-introduction-to-cloud-based-machine-9780134863917)\n", |
| 74 | + "* Reading an online copy of [Pragmatic AI:Pragmatic AI: An Introduction to Cloud-Based Machine Learning](https://www.safaribooksonline.com/library/view/pragmatic-ai-an/9780134863924/)\n", |
| 75 | + "* Watching video [Essential Machine Learning and AI with Python and Jupyter Notebook-Video-SafariOnline](https://www.safaribooksonline.com/videos/essential-machine-learning/9780135261118) on Safari Books Online.\n", |
| 76 | + "* Watching video [AWS Certified Machine Learning-Speciality](https://learning.oreilly.com/videos/aws-certified-machine/9780135556597)\n", |
| 77 | + "* Purchasing video [Essential Machine Learning and AI with Python and Jupyter Notebook- Purchase Video](http://www.informit.com/store/essential-machine-learning-and-ai-with-python-and-jupyter-9780135261095)\n", |
| 78 | + "* Viewing more content at [noahgift.com](https://noahgift.com/)\n" |
| 79 | + ] |
| 80 | + }, |
| 81 | + { |
| 82 | + "metadata": { |
| 83 | + "id": "pBTeTbnRKG_k", |
| 84 | + "colab_type": "code", |
| 85 | + "colab": {} |
| 86 | + }, |
| 87 | + "cell_type": "code", |
| 88 | + "source": [ |
| 89 | + "" |
| 90 | + ], |
| 91 | + "execution_count": 0, |
| 92 | + "outputs": [] |
| 93 | + }, |
51 | 94 | { |
52 | 95 | "metadata": { |
53 | 96 | "id": "NvoiEwiAWrWy", |
|
84 | 127 | "metadata": { |
85 | 128 | "id": "Q3FrtesdfyV9", |
86 | 129 | "colab_type": "code", |
| 130 | + "outputId": "1db2fd0d-8904-489e-ac1f-70bc70c9704a", |
87 | 131 | "colab": { |
88 | 132 | "base_uri": "https://localhost:8080/", |
89 | 133 | "height": 925 |
90 | | - }, |
91 | | - "outputId": "1db2fd0d-8904-489e-ac1f-70bc70c9704a" |
| 134 | + } |
92 | 135 | }, |
93 | 136 | "cell_type": "code", |
94 | 137 | "source": [ |
|
219 | 262 | "metadata": { |
220 | 263 | "id": "aIbXYrxU8ySd", |
221 | 264 | "colab_type": "code", |
| 265 | + "outputId": "241c61f9-ad81-4c4d-82dd-42bef0502fdf", |
222 | 266 | "colab": { |
223 | 267 | "base_uri": "https://localhost:8080/", |
224 | 268 | "height": 407 |
225 | | - }, |
226 | | - "outputId": "241c61f9-ad81-4c4d-82dd-42bef0502fdf" |
| 269 | + } |
227 | 270 | }, |
228 | 271 | "cell_type": "code", |
229 | 272 | "source": [ |
230 | 273 | "!wget https://raw.githubusercontent.com/uchidalab/book-dataset/master/Task1/book30-listing-train.csv\n", |
231 | 274 | "!wget https://raw.githubusercontent.com/noahgift/recommendations/master/model_definition.yaml" |
232 | 275 | ], |
233 | | - "execution_count": 38, |
| 276 | + "execution_count": 0, |
234 | 277 | "outputs": [ |
235 | 278 | { |
236 | 279 | "output_type": "stream", |
|
276 | 319 | "metadata": { |
277 | 320 | "id": "Ef8dbaV4tHrz", |
278 | 321 | "colab_type": "code", |
| 322 | + "outputId": "e7bbaff9-edcf-43df-f142-f8e5e916338f", |
279 | 323 | "colab": { |
280 | 324 | "base_uri": "https://localhost:8080/", |
281 | 325 | "height": 197 |
282 | | - }, |
283 | | - "outputId": "e7bbaff9-edcf-43df-f142-f8e5e916338f" |
| 326 | + } |
284 | 327 | }, |
285 | 328 | "cell_type": "code", |
286 | 329 | "source": [ |
|
289 | 332 | "df = df.drop(\"Unnamed: 0\", axis=1)\n", |
290 | 333 | "df.head()" |
291 | 334 | ], |
292 | | - "execution_count": 39, |
| 335 | + "execution_count": 0, |
293 | 336 | "outputs": [ |
294 | 337 | { |
295 | 338 | "output_type": "execute_result", |
|
451 | 494 | "metadata": { |
452 | 495 | "id": "KVYJIiwHuhiT", |
453 | 496 | "colab_type": "code", |
| 497 | + "outputId": "c5a39f2f-e99f-4514-8bb8-ebe7c4eee1b1", |
454 | 498 | "colab": { |
455 | 499 | "base_uri": "https://localhost:8080/", |
456 | 500 | "height": 70 |
457 | | - }, |
458 | | - "outputId": "c5a39f2f-e99f-4514-8bb8-ebe7c4eee1b1" |
| 501 | + } |
459 | 502 | }, |
460 | 503 | "cell_type": "code", |
461 | 504 | "source": [ |
462 | 505 | "df.columns" |
463 | 506 | ], |
464 | | - "execution_count": 40, |
| 507 | + "execution_count": 0, |
465 | 508 | "outputs": [ |
466 | 509 | { |
467 | 510 | "output_type": "execute_result", |
|
493 | 536 | "metadata": { |
494 | 537 | "id": "-kaJsKyruyAl", |
495 | 538 | "colab_type": "code", |
| 539 | + "outputId": "f0e37663-1297-49c0-e000-ec0af6665d43", |
496 | 540 | "colab": { |
497 | 541 | "base_uri": "https://localhost:8080/", |
498 | 542 | "height": 34 |
499 | | - }, |
500 | | - "outputId": "f0e37663-1297-49c0-e000-ec0af6665d43" |
| 543 | + } |
501 | 544 | }, |
502 | 545 | "cell_type": "code", |
503 | 546 | "source": [ |
504 | 547 | "df.shape" |
505 | 548 | ], |
506 | | - "execution_count": 41, |
| 549 | + "execution_count": 0, |
507 | 550 | "outputs": [ |
508 | 551 | { |
509 | 552 | "output_type": "execute_result", |
|
533 | 576 | "metadata": { |
534 | 577 | "id": "bldBWuL2Nwmh", |
535 | 578 | "colab_type": "code", |
| 579 | + "outputId": "1c6b54c6-35ec-4fa8-b1a5-dd9bcd42b013", |
536 | 580 | "colab": { |
537 | 581 | "base_uri": "https://localhost:8080/", |
538 | 582 | "height": 214 |
539 | | - }, |
540 | | - "outputId": "1c6b54c6-35ec-4fa8-b1a5-dd9bcd42b013" |
| 583 | + } |
541 | 584 | }, |
542 | 585 | "cell_type": "code", |
543 | 586 | "source": [ |
544 | 587 | "!head book30-listing-train-with-headers.csv" |
545 | 588 | ], |
546 | | - "execution_count": 42, |
| 589 | + "execution_count": 0, |
547 | 590 | "outputs": [ |
548 | 591 | { |
549 | 592 | "output_type": "stream", |
|
567 | 610 | "metadata": { |
568 | 611 | "id": "ous6EqC8Nocg", |
569 | 612 | "colab_type": "code", |
| 613 | + "outputId": "398add9c-ef76-47e2-9fbb-219c8ff1af53", |
570 | 614 | "colab": { |
571 | 615 | "base_uri": "https://localhost:8080/", |
572 | 616 | "height": 212 |
573 | | - }, |
574 | | - "outputId": "398add9c-ef76-47e2-9fbb-219c8ff1af53" |
| 617 | + } |
575 | 618 | }, |
576 | 619 | "cell_type": "code", |
577 | 620 | "source": [ |
578 | 621 | "!cat model_definition.yaml" |
579 | 622 | ], |
580 | | - "execution_count": 43, |
| 623 | + "execution_count": 0, |
581 | 624 | "outputs": [ |
582 | 625 | { |
583 | 626 | "output_type": "stream", |
|
602 | 645 | "metadata": { |
603 | 646 | "id": "WpVA2fyXLRoK", |
604 | 647 | "colab_type": "code", |
| 648 | + "outputId": "abfc3b5b-6f59-469f-cb7b-aff5e28cb8ca", |
605 | 649 | "colab": { |
606 | 650 | "base_uri": "https://localhost:8080/", |
607 | 651 | "height": 20338 |
608 | | - }, |
609 | | - "outputId": "abfc3b5b-6f59-469f-cb7b-aff5e28cb8ca" |
| 652 | + } |
610 | 653 | }, |
611 | 654 | "cell_type": "code", |
612 | 655 | "source": [ |
613 | 656 | "!ludwig experiment --data_csv book30-listing-train-with-headers.csv --model_definition_file model_definition.yaml\n" |
614 | 657 | ], |
615 | | - "execution_count": 3, |
| 658 | + "execution_count": 0, |
616 | 659 | "outputs": [ |
617 | 660 | { |
618 | 661 | "output_type": "stream", |
|
1856 | 1899 | "\n", |
1857 | 1900 | "shap.initjs()" |
1858 | 1901 | ], |
1859 | | - "execution_count": 45, |
| 1902 | + "execution_count": 0, |
1860 | 1903 | "outputs": [ |
1861 | 1904 | { |
1862 | 1905 | "output_type": "display_data", |
|
1934 | 1977 | "metadata": { |
1935 | 1978 | "id": "rKBTIdWgS7Vx", |
1936 | 1979 | "colab_type": "code", |
| 1980 | + "outputId": "73a92dc0-1557-47fd-c546-6f7ff38854af", |
1937 | 1981 | "colab": { |
1938 | 1982 | "base_uri": "https://localhost:8080/", |
1939 | 1983 | "height": 34 |
1940 | | - }, |
1941 | | - "outputId": "73a92dc0-1557-47fd-c546-6f7ff38854af" |
| 1984 | + } |
1942 | 1985 | }, |
1943 | 1986 | "cell_type": "code", |
1944 | 1987 | "source": [ |
1945 | 1988 | "X_train.shape, y_train.shape" |
1946 | 1989 | ], |
1947 | | - "execution_count": 47, |
| 1990 | + "execution_count": 0, |
1948 | 1991 | "outputs": [ |
1949 | 1992 | { |
1950 | 1993 | "output_type": "execute_result", |
|
1985 | 2028 | "knn = sklearn.neighbors.KNeighborsClassifier()\n", |
1986 | 2029 | "knn.fit(X_train, y_train)" |
1987 | 2030 | ], |
1988 | | - "execution_count": 48, |
| 2031 | + "execution_count": 0, |
1989 | 2032 | "outputs": [ |
1990 | 2033 | { |
1991 | 2034 | "output_type": "execute_result", |
|
2034 | 2077 | "shap.initjs()\n", |
2035 | 2078 | "shap.force_plot(explainer.expected_value, shap_values_single, X_display.iloc[0,:])" |
2036 | 2079 | ], |
2037 | | - "execution_count": 49, |
| 2080 | + "execution_count": 0, |
2038 | 2081 | "outputs": [ |
2039 | 2082 | { |
2040 | 2083 | "output_type": "stream", |
|
2142 | 2185 | "source": [ |
2143 | 2186 | " !pip install -q scikit-surprise" |
2144 | 2187 | ], |
2145 | | - "execution_count": 8, |
| 2188 | + "execution_count": 0, |
2146 | 2189 | "outputs": [ |
2147 | 2190 | { |
2148 | 2191 | "output_type": "stream", |
|
2180 | 2223 | "# Run 5-fold cross-validation and print results.\n", |
2181 | 2224 | "cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)" |
2182 | 2225 | ], |
2183 | | - "execution_count": 33, |
| 2226 | + "execution_count": 0, |
2184 | 2227 | "outputs": [ |
2185 | 2228 | { |
2186 | 2229 | "output_type": "stream", |
|
2361 | 2404 | "algo.fit(trainset)\n", |
2362 | 2405 | "\n" |
2363 | 2406 | ], |
2364 | | - "execution_count": 51, |
| 2407 | + "execution_count": 0, |
2365 | 2408 | "outputs": [ |
2366 | 2409 | { |
2367 | 2410 | "output_type": "stream", |
|
2427 | 2470 | "for movie in toy_story_neighbors:\n", |
2428 | 2471 | " print(movie)\n" |
2429 | 2472 | ], |
2430 | | - "execution_count": 52, |
| 2473 | + "execution_count": 0, |
2431 | 2474 | "outputs": [ |
2432 | 2475 | { |
2433 | 2476 | "output_type": "stream", |
|
0 commit comments