You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
print("Those were a few density plots for each column of the data.\n")
31
+
# data1 = data.copy()
32
+
# columns1 = np.array(data1.columns)
33
+
# for rows in range(data1.shape[0]):
34
+
# for cols in columns1[1:6]:
35
+
# if (data1[cols][rows] == 0):
36
+
# data1[cols][rows] = np.median(data1[cols])
37
+
38
+
# The code snippet above also replaces all the 0 values of a column with its corresponding median, regardless if a patient is diabetic or not,
39
+
# meaning that it doesn't take into consideration that diabetic patients may have different feature values (for the chosen columns-2, 3, 4, and 5)
40
+
# than non-diabetic patients. So, the medians of diabetic and non-diabetic arrays are taken separately and the zeroes are
41
+
# imputed respectively with the corresponding medians.
42
+
43
+
imputation_matrix= {}
44
+
foriincolumns:
45
+
# In the line below, using bitwise AND (i.e. &) gives the correct answer whereas using logical AND (i.e. 'and') throws a KeyError saying that the truth value of a Series is ambiguous
46
+
imputation_matrix.update({i : [data[(data['Diabetic']==0) & (data[i]!=0)][i].median(), data[(data['Diabetic']==1) & (data[i]!=0)][i].median()]}) # update the old dictionary with the imputed values
47
+
time.sleep(1)
48
+
# print(f"The medians of the Plasma_Glucose_Conc. column for non-diabetic persons is {imputation_matrix['Plasma_Glucose_Conc.'][0]} and diabetic persons is {imputation_matrix['Plasma_Glucose_Conc.'][1]}")
49
+
time.sleep(1)
50
+
print(" Median values to be imputed for each column's zeroes")
0 commit comments