jjrunner · clemfeb · Oct 27, 2020
diff --git a/BootcampStats.ipynb b/BootcampStats.ipynb
@@ -11,256 +11,26 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [
     {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style>\n",
-       "    .dataframe thead tr:only-child th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: left;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>Respondent</th>\n",
-       "      <th>Professional</th>\n",
-       "      <th>ProgramHobby</th>\n",
-       "      <th>Country</th>\n",
-       "      <th>University</th>\n",
-       "      <th>EmploymentStatus</th>\n",
-       "      <th>FormalEducation</th>\n",
-       "      <th>MajorUndergrad</th>\n",
-       "      <th>HomeRemote</th>\n",
-       "      <th>CompanySize</th>\n",
-       "      <th>...</th>\n",
-       "      <th>StackOverflowMakeMoney</th>\n",
-       "      <th>Gender</th>\n",
-       "      <th>HighestEducationParents</th>\n",
-       "      <th>Race</th>\n",
-       "      <th>SurveyLong</th>\n",
-       "      <th>QuestionsInteresting</th>\n",
-       "      <th>QuestionsConfusing</th>\n",
-       "      <th>InterestedAnswers</th>\n",
-       "      <th>Salary</th>\n",
-       "      <th>ExpectedSalary</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>1</td>\n",
-       "      <td>Student</td>\n",
-       "      <td>Yes, both</td>\n",
-       "      <td>United States</td>\n",
-       "      <td>No</td>\n",
-       "      <td>Not employed, and not looking for work</td>\n",
-       "      <td>Secondary school</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>...</td>\n",
-       "      <td>Strongly disagree</td>\n",
-       "      <td>Male</td>\n",
-       "      <td>High school</td>\n",
-       "      <td>White or of European descent</td>\n",
-       "      <td>Strongly disagree</td>\n",
-       "      <td>Strongly agree</td>\n",
-       "      <td>Disagree</td>\n",
-       "      <td>Strongly agree</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>2</td>\n",
-       "      <td>Student</td>\n",
-       "      <td>Yes, both</td>\n",
-       "      <td>United Kingdom</td>\n",
-       "      <td>Yes, full-time</td>\n",
-       "      <td>Employed part-time</td>\n",
-       "      <td>Some college/university study without earning ...</td>\n",
-       "      <td>Computer science or software engineering</td>\n",
-       "      <td>More than half, but not all, the time</td>\n",
-       "      <td>20 to 99 employees</td>\n",
-       "      <td>...</td>\n",
-       "      <td>Strongly disagree</td>\n",
-       "      <td>Male</td>\n",
-       "      <td>A master's degree</td>\n",
-       "      <td>White or of European descent</td>\n",
-       "      <td>Somewhat agree</td>\n",
-       "      <td>Somewhat agree</td>\n",
-       "      <td>Disagree</td>\n",
-       "      <td>Strongly agree</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>37500.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>3</td>\n",
-       "      <td>Professional developer</td>\n",
-       "      <td>Yes, both</td>\n",
-       "      <td>United Kingdom</td>\n",
-       "      <td>No</td>\n",
-       "      <td>Employed full-time</td>\n",
-       "      <td>Bachelor's degree</td>\n",
-       "      <td>Computer science or software engineering</td>\n",
-       "      <td>Less than half the time, but at least one day ...</td>\n",
-       "      <td>10,000 or more employees</td>\n",
-       "      <td>...</td>\n",
-       "      <td>Disagree</td>\n",
-       "      <td>Male</td>\n",
-       "      <td>A professional degree</td>\n",
-       "      <td>White or of European descent</td>\n",
-       "      <td>Somewhat agree</td>\n",
-       "      <td>Agree</td>\n",
-       "      <td>Disagree</td>\n",
-       "      <td>Agree</td>\n",
-       "      <td>113750.0</td>\n",
-       "      <td>NaN</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>4</td>\n",
-       "      <td>Professional non-developer who sometimes write...</td>\n",
-       "      <td>Yes, both</td>\n",
-       "      <td>United States</td>\n",
-       "      <td>No</td>\n",
-       "      <td>Employed full-time</td>\n",
-       "      <td>Doctoral degree</td>\n",
-       "      <td>A non-computer-focused engineering discipline</td>\n",
-       "      <td>Less than half the time, but at least one day ...</td>\n",
-       "      <td>10,000 or more employees</td>\n",
-       "      <td>...</td>\n",
-       "      <td>Disagree</td>\n",
-       "      <td>Male</td>\n",
-       "      <td>A doctoral degree</td>\n",
-       "      <td>White or of European descent</td>\n",
-       "      <td>Agree</td>\n",
-       "      <td>Agree</td>\n",
-       "      <td>Somewhat agree</td>\n",
-       "      <td>Strongly agree</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>5</td>\n",
-       "      <td>Professional developer</td>\n",
-       "      <td>Yes, I program as a hobby</td>\n",
-       "      <td>Switzerland</td>\n",
-       "      <td>No</td>\n",
-       "      <td>Employed full-time</td>\n",
-       "      <td>Master's degree</td>\n",
-       "      <td>Computer science or software engineering</td>\n",
-       "      <td>Never</td>\n",
-       "      <td>10 to 19 employees</td>\n",
-       "      <td>...</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>5 rows × 154 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   Respondent                                       Professional  \\\n",
-       "0           1                                            Student   \n",
-       "1           2                                            Student   \n",
-       "2           3                             Professional developer   \n",
-       "3           4  Professional non-developer who sometimes write...   \n",
-       "4           5                             Professional developer   \n",
-       "\n",
-       "                ProgramHobby         Country      University  \\\n",
-       "0                  Yes, both   United States              No   \n",
-       "1                  Yes, both  United Kingdom  Yes, full-time   \n",
-       "2                  Yes, both  United Kingdom              No   \n",
-       "3                  Yes, both   United States              No   \n",
-       "4  Yes, I program as a hobby     Switzerland              No   \n",
-       "\n",
-       "                         EmploymentStatus  \\\n",
-       "0  Not employed, and not looking for work   \n",
-       "1                      Employed part-time   \n",
-       "2                      Employed full-time   \n",
-       "3                      Employed full-time   \n",
-       "4                      Employed full-time   \n",
-       "\n",
-       "                                     FormalEducation  \\\n",
-       "0                                   Secondary school   \n",
-       "1  Some college/university study without earning ...   \n",
-       "2                                  Bachelor's degree   \n",
-       "3                                    Doctoral degree   \n",
-       "4                                    Master's degree   \n",
-       "\n",
-       "                                  MajorUndergrad  \\\n",
-       "0                                            NaN   \n",
-       "1       Computer science or software engineering   \n",
-       "2       Computer science or software engineering   \n",
-       "3  A non-computer-focused engineering discipline   \n",
-       "4       Computer science or software engineering   \n",
-       "\n",
-       "                                          HomeRemote  \\\n",
-       "0                                                NaN   \n",
-       "1              More than half, but not all, the time   \n",
-       "2  Less than half the time, but at least one day ...   \n",
-       "3  Less than half the time, but at least one day ...   \n",
-       "4                                              Never   \n",
-       "\n",
-       "                CompanySize       ...       StackOverflowMakeMoney Gender  \\\n",
-       "0                       NaN       ...            Strongly disagree   Male   \n",
-       "1        20 to 99 employees       ...            Strongly disagree   Male   \n",
-       "2  10,000 or more employees       ...                     Disagree   Male   \n",
-       "3  10,000 or more employees       ...                     Disagree   Male   \n",
-       "4        10 to 19 employees       ...                          NaN    NaN   \n",
-       "\n",
-       "  HighestEducationParents                          Race         SurveyLong  \\\n",
-       "0             High school  White or of European descent  Strongly disagree   \n",
-       "1       A master's degree  White or of European descent     Somewhat agree   \n",
-       "2   A professional degree  White or of European descent     Somewhat agree   \n",
-       "3       A doctoral degree  White or of European descent              Agree   \n",
-       "4                     NaN                           NaN                NaN   \n",
-       "\n",
-       "  QuestionsInteresting QuestionsConfusing InterestedAnswers    Salary  \\\n",
-       "0       Strongly agree           Disagree    Strongly agree       NaN   \n",
-       "1       Somewhat agree           Disagree    Strongly agree       NaN   \n",
-       "2                Agree           Disagree             Agree  113750.0   \n",
-       "3                Agree     Somewhat agree    Strongly agree       NaN   \n",
-       "4                  NaN                NaN               NaN       NaN   \n",
-       "\n",
-       "   ExpectedSalary  \n",
-       "0             NaN  \n",
-       "1         37500.0  \n",
-       "2             NaN  \n",
-       "3             NaN  \n",
-       "4             NaN  \n",
-       "\n",
-       "[5 rows x 154 columns]"
-      ]
-     },
-     "execution_count": 2,
-     "metadata": {},
-     "output_type": "execute_result"
+     "output_type": "error",
+     "ename": "FileNotFoundError",
+     "evalue": "[Errno 2] File ./survey_results_public.csv does not exist: './survey_results_public.csv'",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mFileNotFoundError\u001b[0m                         Traceback (most recent call last)",
+      "\u001b[0;32m<ipython-input-3-4155ff657f88>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[0mget_ipython\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrun_line_magic\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'matplotlib'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'inline'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      5\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m \u001b[0mdf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread_csv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'./survey_results_public.csv'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      7\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhead\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/opt/anaconda3/lib/python3.7/site-packages/pandas/io/parsers.py\u001b[0m in \u001b[0;36mparser_f\u001b[0;34m(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, dialect, error_bad_lines, warn_bad_lines, delim_whitespace, low_memory, memory_map, float_precision)\u001b[0m\n\u001b[1;32m    674\u001b[0m         )\n\u001b[1;32m    675\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 676\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0m_read\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfilepath_or_buffer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    677\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    678\u001b[0m     \u001b[0mparser_f\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__name__\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/opt/anaconda3/lib/python3.7/site-packages/pandas/io/parsers.py\u001b[0m in \u001b[0;36m_read\u001b[0;34m(filepath_or_buffer, kwds)\u001b[0m\n\u001b[1;32m    446\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    447\u001b[0m     \u001b[0;31m# Create the parser.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 448\u001b[0;31m     \u001b[0mparser\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mTextFileReader\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfp_or_buf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    449\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    450\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0mchunksize\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0miterator\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/opt/anaconda3/lib/python3.7/site-packages/pandas/io/parsers.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, f, engine, **kwds)\u001b[0m\n\u001b[1;32m    878\u001b[0m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moptions\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"has_index_names\"\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mkwds\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"has_index_names\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    879\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 880\u001b[0;31m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_make_engine\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mengine\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    881\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    882\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mclose\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/opt/anaconda3/lib/python3.7/site-packages/pandas/io/parsers.py\u001b[0m in \u001b[0;36m_make_engine\u001b[0;34m(self, engine)\u001b[0m\n\u001b[1;32m   1112\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m_make_engine\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mengine\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"c\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1113\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mengine\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m\"c\"\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1114\u001b[0;31m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mCParserWrapper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moptions\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1115\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1116\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mengine\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m\"python\"\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/opt/anaconda3/lib/python3.7/site-packages/pandas/io/parsers.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, src, **kwds)\u001b[0m\n\u001b[1;32m   1889\u001b[0m         \u001b[0mkwds\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"usecols\"\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0musecols\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1890\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1891\u001b[0;31m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_reader\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mparsers\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTextReader\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msrc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1892\u001b[0m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0munnamed_cols\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_reader\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0munnamed_cols\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1893\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32mpandas/_libs/parsers.pyx\u001b[0m in \u001b[0;36mpandas._libs.parsers.TextReader.__cinit__\u001b[0;34m()\u001b[0m\n",
+      "\u001b[0;32mpandas/_libs/parsers.pyx\u001b[0m in \u001b[0;36mpandas._libs.parsers.TextReader._setup_parser_source\u001b[0;34m()\u001b[0m\n",
+      "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] File ./survey_results_public.csv does not exist: './survey_results_public.csv'"
+     ]
     }
    ],
    "source": [
@@ -293,7 +63,7 @@
     "#In this case, we want to look at bootcamp data\n",
     "#First - let's just look at how many people took a bootcamp in the dataset\n",
     "\n",
-    "bootcamp_df = df[df['TimeAfterBootcamp'].isnull()==False]\n",
+    "bootcamp_df = df[df['TimeAfterBootcamp'].isnull()==False] #Sample change only\n",
     "not_bootcamp_df = df[df['TimeAfterBootcamp'].isnull()==True] \n",
     "bootcamp_df.shape"
    ]
@@ -750,9 +520,9 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.1"
+   "version": "3.7.6-final"
   }
  },
  "nbformat": 4,
  "nbformat_minor": 2
-}
+}