diff --git a/notebook/1 . EDA STUDENT PERFORMANCE .ipynb b/notebook/1 . EDA STUDENT PERFORMANCE .ipynb index d2df5aa2..e037b25c 100644 --- a/notebook/1 . EDA STUDENT PERFORMANCE .ipynb +++ b/notebook/1 . EDA STUDENT PERFORMANCE .ipynb @@ -1412,6 +1412,24 @@ "#### BIVARIATE ANALYSIS ( Is gender has any impact on student's performance ? ) " ] }, +{ + "cell_type": "code", + "execution_count": null, + "id": "526d49f9", + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.preprocessing import LabelEncoder\n" + +"# Create an instance of LabelEncoder\n" +"label_encoder = LabelEncoder()\n" + +"# Fit and transform the 'gender' column\n" +"df['gender_encoded'] = label_encoder.fit_transform(df['gender'])\n" +"df['gender_encoded']\n" +"non_numeric_data = pd.to_numeric(df['gender_encoded'],errors='coerce').isna().any() # to check is there any numeric data non_numeric_data\n" + ] + }, { "cell_type": "code", "execution_count": null, @@ -1419,7 +1437,7 @@ "metadata": {}, "outputs": [], "source": [ - "gender_group = df.groupby('gender').mean()\n", + "gender_group = df.groupby('gender_encoded').mean()\n", "gender_group" ] }, @@ -1430,24 +1448,20 @@ "metadata": {}, "outputs": [], "source": [ - "plt.figure(figsize=(10, 8))\n", - "\n", - "X = ['Total Average','Math Average']\n", - "\n", - "\n", - "female_scores = [gender_group['average'][0], gender_group['math score'][0]]\n", - "male_scores = [gender_group['average'][1], gender_group['math score'][1]]\n", - "\n", - "X_axis = np.arange(len(X))\n", - " \n", - "plt.bar(X_axis - 0.2, male_scores, 0.4, label = 'Male')\n", - "plt.bar(X_axis + 0.2, female_scores, 0.4, label = 'Female')\n", - " \n", - "plt.xticks(X_axis, X)\n", - "plt.ylabel(\"Marks\")\n", - "plt.title(\"Total average v/s Math average marks of both the genders\", fontweight='bold')\n", - "plt.legend()\n", - "plt.show()" + "import matplotlib.pyplot as plt\n", +"import numpy as np\n", +"plt.figure(figsize=(10, 8))\n", +"X = ['Total Average', 'Math Average']\n", +"female_scores = [df.loc[0, 'average'], df.loc[0, 'math_score']]\n", +"male_scores = [df.loc[1, 'average'], df.loc[1, 'math_score']]\n", +"X_axis = np.arange(len(X)) \n", +"plt.bar(X_axis - 0.2, male_scores, 0.4, label='Male') \n", +"plt.bar(X_axis + 0.2, female_scores, 0.4, label='Female') \n", +"plt.xticks(X_axis, X) \n", +"plt.ylabel("Marks") \n", +"plt.title("Total average v/s Math average marks of both genders",fontweight='bold') \n", +"plt.legend() \n", +"plt.show()" ] }, {