treehouse-projects
diff --git a/‎.gitignore‎
Lines changed: 13 additions & 0 deletions b/‎.gitignore‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎s1v4/BodyMeasures.csv‎
Lines changed: 9279 additions & 0 deletions b/‎s1v4/BodyMeasures.csv‎
Lines changed: 9279 additions & 0 deletions
diff --git a/‎s1v4/Demographics.csv‎
Lines changed: 10587 additions & 0 deletions b/‎s1v4/Demographics.csv‎
Lines changed: 10587 additions & 0 deletions
diff --git a/‎s1v4/Occupation.csv‎
Lines changed: 7750 additions & 0 deletions b/‎s1v4/Occupation.csv‎
Lines changed: 7750 additions & 0 deletions
diff --git a/‎s1v4/Stage1-Video4.ipynb‎
Lines changed: 151 additions & 0 deletions b/‎s1v4/Stage1-Video4.ipynb‎
Lines changed: 151 additions & 0 deletions
diff --git a/‎s2v1/BodyMeasures.csv‎
Lines changed: 9283 additions & 0 deletions b/‎s2v1/BodyMeasures.csv‎
Lines changed: 9283 additions & 0 deletions
diff --git a/‎s2v1/Demographics.csv‎
Lines changed: 10587 additions & 0 deletions b/‎s2v1/Demographics.csv‎
Lines changed: 10587 additions & 0 deletions
diff --git a/‎s2v1/Occupation.csv‎
Lines changed: 7750 additions & 0 deletions b/‎s2v1/Occupation.csv‎
Lines changed: 7750 additions & 0 deletions
@@ -0,0 +1,13 @@
+
+# Created by https://www.gitignore.io/api/jupyternotebook
+# Edit at https://www.gitignore.io/?templates=jupyternotebook
+
+### JupyterNotebook ###
+.ipynb_checkpoints
+*/.ipynb_checkpoints/*
+
+# Remove previous ipynb_checkpoints
+#   git rm -r .ipynb_checkpoints/
+#
+
+# End of https://www.gitignore.io/api/jupyternotebook
@@ -0,0 +1,151 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Import the pandas library and load in the data files\n",
+    "---"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "\n",
+    "demo = pd.read_csv('Demographics.csv')\n",
+    "bmx = pd.read_csv('BodyMeasures.csv')\n",
+    "ocp = pd.read_csv('Occupation.csv')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Describe the numeric columns in the demographics DataFrame\n",
+    "---"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "demo.describe()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Display the first few rows of the demographics DataFrame\n",
+    "---"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "demo.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Select the first five columns by name\n",
+    "---"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "demo.loc[:,['SEQN','SDDSRVYR','RIDSTATR', 'RIDEXMON', 'RIAGENDR']].head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Select the first five columns by numeric location\n",
+    "---"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "demo.iloc[0:4,0:5]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Merge the demographics and body measures DataFrames\n",
+    "---\n",
+    "\n",
+    "* Match the values in the SEQN columns between the DataFrames\n",
+    "* Do an inner join (keep only the data for participants listed in both files)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dataset = pd.merge(demo, bmx, on='SEQN', how='inner')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Save the joined dataset to a new file\n",
+    "---"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# dataset.to_csv('MyDataset.csv', index=False)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.1"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}