|
| 1 | +#!/usr/bin/env python |
| 2 | +# coding: utf-8 |
| 3 | + |
| 4 | +# # import all depedencies |
| 5 | + |
| 6 | +# In[1]: |
| 7 | + |
| 8 | + |
| 9 | +import numpy as np |
| 10 | +import pandas as pd |
| 11 | +import matplotlib.pyplot as plt |
| 12 | +from time import sleep |
| 13 | +import os |
| 14 | +import pyeeg |
| 15 | +import warnings |
| 16 | +warnings.filterwarnings("ignore", category=np.VisibleDeprecationWarning) |
| 17 | +# get_ipython().run_line_magic('matplotlib', 'notebook') |
| 18 | +import joblib |
| 19 | +import pickle |
| 20 | + |
| 21 | +# # Load files from folders |
| 22 | + |
| 23 | +# In[2]: |
| 24 | + |
| 25 | + |
| 26 | +# import each and every file |
| 27 | + |
| 28 | +dirB="./Datasets/setB/" |
| 29 | +tempB = [] |
| 30 | +for file in os.listdir(dirB): |
| 31 | + fl = dirB + file |
| 32 | + tempB.append(fl) |
| 33 | +tempB = sorted(tempB) # class: 1 val: -1 |
| 34 | + |
| 35 | +dirC="./Datasets/setC/" |
| 36 | +tempC = [] |
| 37 | +for file in os.listdir(dirC): |
| 38 | + fl = dirC + file |
| 39 | + tempC.append(fl) |
| 40 | +tempC = sorted(tempC) # class:2 val: 0 |
| 41 | + |
| 42 | +dirE="./Datasets/setE/" |
| 43 | +tempE = [] |
| 44 | +for file in os.listdir(dirE): |
| 45 | + fl = dirE + file |
| 46 | + tempE.append(fl) |
| 47 | +tempE = sorted(tempE) # class: 3 val: 1 |
| 48 | + |
| 49 | + |
| 50 | +# # create small tables |
| 51 | + |
| 52 | +# In[3]: |
| 53 | + |
| 54 | + |
| 55 | +tb=[] |
| 56 | +st = 'A' |
| 57 | +for i in range(len(tempB)): |
| 58 | + x = pd.read_table(tempB[i],header=None) |
| 59 | + x.columns=[st+str(i)] |
| 60 | + tb.append(x) |
| 61 | + |
| 62 | +tc=[] |
| 63 | +st = 'A' |
| 64 | +for i in range(len(tempC)): |
| 65 | + x = pd.read_table(tempC[i],header=None) |
| 66 | + x.columns=[st+str(i)] |
| 67 | + tc.append(x) |
| 68 | + |
| 69 | +te=[] |
| 70 | +st = 'A' |
| 71 | +for i in range(len(tempE)): |
| 72 | + x = pd.read_table(tempE[i],header=None) |
| 73 | + x.columns=[st+str(i)] |
| 74 | + te.append(x) |
| 75 | + |
| 76 | + |
| 77 | +# # make big table |
| 78 | + |
| 79 | +# In[4]: |
| 80 | + |
| 81 | + |
| 82 | +def table(table): |
| 83 | + big_table = None |
| 84 | + for ta in table: |
| 85 | + big_table = pd.concat([big_table, ta],axis=1) |
| 86 | + return big_table |
| 87 | + |
| 88 | + |
| 89 | +# In[5]: |
| 90 | + |
| 91 | + |
| 92 | +bigB = table(tb) |
| 93 | +bigC = table(tc) |
| 94 | +bigE = table(te) |
| 95 | +head = list(bigB.columns.values) |
| 96 | + |
| 97 | + |
| 98 | +# In[6]: |
| 99 | + |
| 100 | + |
| 101 | +len(bigB.columns) |
| 102 | + |
| 103 | + |
| 104 | +# In[7]: |
| 105 | + |
| 106 | + |
| 107 | +bigB.head(10) |
| 108 | + |
| 109 | + |
| 110 | +# # creat sub matrix for each |
| 111 | + |
| 112 | +# In[8]: |
| 113 | + |
| 114 | + |
| 115 | +# creat a matrix |
| 116 | +def creat_mat(mat): |
| 117 | + matx = np.zeros((len(mat),(len(head)))) |
| 118 | + for i in range(len(head)): |
| 119 | + matx[:,i] = mat[head[i]] |
| 120 | + sleep(0.01) |
| 121 | + return matx |
| 122 | +# every mat contain probs mat[:,i] total 28 start form mat1 |
| 123 | + |
| 124 | + |
| 125 | +# In[9]: |
| 126 | + |
| 127 | + |
| 128 | +matB = creat_mat(bigB) # : refers to healthy data |
| 129 | +matC = creat_mat(bigC) # : refers to Inter-ictal (transition between healthy to seizure) |
| 130 | +matE = creat_mat(bigE) # : of ictal or seizures |
| 131 | + |
| 132 | +matB = np.nan_to_num(matB) # matB[:,0] --- > channel 0, matB[:,1] --- > channel 1 like that |
| 133 | +matC = np.nan_to_num(matC) |
| 134 | +matE = np.nan_to_num(matE) |
| 135 | + |
| 136 | + |
| 137 | +# 4097 data point per channel |
| 138 | +# 173.61 Hz sample rate and there are 4097 data point for each channel |
| 139 | +# total 100 channel are their |
| 140 | +# 4097/173.61 = 23.59 sec |
| 141 | +# the raw data from one of the channels for the 23.59 sec |
| 142 | + |
| 143 | + |
| 144 | +# # Visualize the Datapoints |
| 145 | + |
| 146 | +# In[10]: |
| 147 | + |
| 148 | + |
| 149 | +hl, = plt.plot(matB[0],label='healthy') |
| 150 | +trans, = plt.plot(matC[0],label='Inter-ictal') |
| 151 | +seizure, = plt.plot(matE[0],label='seizures') |
| 152 | +plt.legend(handles=[hl,trans,seizure]) |
| 153 | +plt.savefig("fig1.png") |
| 154 | + |
| 155 | + |
| 156 | +# # <--------- create number of featuters ------------> |
| 157 | + |
| 158 | +# # (DFA,HFD,SVD_Entropy,Fisher_Information,PFD) |
| 159 | + |
| 160 | +# In[11]: |
| 161 | + |
| 162 | + |
| 163 | +help(pyeeg.dfa),help(pyeeg.hfd),help(pyeeg.svd_entropy),help(pyeeg.fisher_info),help(pyeeg.pfd) |
| 164 | + |
| 165 | + |
| 166 | +# In[12]: |
| 167 | + |
| 168 | + |
| 169 | +# source: https://www.hindawi.com/journals/cin/2011/406391/ |
| 170 | +def features(mat): |
| 171 | + Kmax = 5 |
| 172 | + Tau = 4 |
| 173 | + DE = 10 |
| 174 | + M = 10 |
| 175 | + R = 0.3 |
| 176 | + Band = np.arange(1,86) |
| 177 | + Fs = 173 |
| 178 | + DFA = pyeeg.dfa(mat) |
| 179 | + HFD = pyeeg.hfd(mat,Kmax) |
| 180 | + SVD_Entropy = pyeeg.svd_entropy(mat,Tau,DE) |
| 181 | + Fisher_Information = pyeeg.fisher_info(mat,Tau,DE) |
| 182 | + PFD = pyeeg.pfd(mat) |
| 183 | + sleep(0.01) |
| 184 | + |
| 185 | + return (DFA,HFD,SVD_Entropy,Fisher_Information,PFD) |
| 186 | + |
| 187 | + |
| 188 | +# # Compute the features of datasets |
| 189 | + |
| 190 | +# In[13]: |
| 191 | + |
| 192 | + |
| 193 | +''' |
| 194 | +# this is three classification proble |
| 195 | +# class1: healthy +1 |
| 196 | +# class2: transition 0 |
| 197 | +# class3: dieases -1 |
| 198 | +
|
| 199 | +''' |
| 200 | + |
| 201 | + |
| 202 | +# In[14]: |
| 203 | + |
| 204 | + |
| 205 | +# create features of class I |
| 206 | +f1_B = np.zeros((100,1)) |
| 207 | +f2_B = np.zeros((100,1)) |
| 208 | +f3_B = np.zeros((100,1)) |
| 209 | +f4_B = np.zeros((100,1)) |
| 210 | +f5_B = np.zeros((100,1)) |
| 211 | +cl_B = np.ones((100,1)) # 1 |
| 212 | + |
| 213 | + |
| 214 | +for i in range(100): |
| 215 | + [f1_B[i,0],f2_B[i,0],f3_B[i,0],f4_B[i,0],f5_B[i,0]]=features(matB[:,i]) |
| 216 | + |
| 217 | + |
| 218 | + |
| 219 | +# In[15]: |
| 220 | + |
| 221 | + |
| 222 | +# create features of class II |
| 223 | +f1_C = np.zeros((100,1)) |
| 224 | +f2_C = np.zeros((100,1)) |
| 225 | +f3_C = np.zeros((100,1)) |
| 226 | +f4_C = np.zeros((100,1)) |
| 227 | +f5_C = np.zeros((100,1)) |
| 228 | +cl_C = np.zeros((100,1)) # transition means 0 |
| 229 | + |
| 230 | +for i in range(100): |
| 231 | + [f1_C[i,0],f2_C[i,0],f3_C[i,0],f4_C[i,0],f5_C[i,0]]=features(matC[:,i]) |
| 232 | + |
| 233 | + |
| 234 | + |
| 235 | +# In[16]: |
| 236 | + |
| 237 | + |
| 238 | +# create features of class III |
| 239 | +f1_E = np.zeros((100,1)) |
| 240 | +f2_E = np.zeros((100,1)) |
| 241 | +f3_E = np.zeros((100,1)) |
| 242 | +f4_E = np.zeros((100,1)) |
| 243 | +f5_E = np.zeros((100,1)) |
| 244 | +cl_E = np.negative(np.ones((100,1))) # -1 |
| 245 | + |
| 246 | +for i in range(100): |
| 247 | + [f1_E[i,0],f2_E[i,0],f3_E[i,0],f4_E[i,0],f5_E[i,0]]=features(matE[:,i]) |
| 248 | + |
| 249 | + |
| 250 | + |
| 251 | +# # create Feature and class matrix for each Classes |
| 252 | + |
| 253 | +# In[17]: |
| 254 | + |
| 255 | + |
| 256 | +MftB = np.concatenate([f1_B,f2_B,f3_B,f4_B,f5_B,cl_B],axis=1) |
| 257 | +MftC = np.concatenate([f1_C,f2_C,f3_C,f4_C,f5_C,cl_C],axis=1) |
| 258 | +MftE = np.concatenate([f1_E,f2_E,f3_E,f4_E,f5_E,cl_E],axis=1) |
| 259 | + |
| 260 | + |
| 261 | +# In[18]: |
| 262 | + |
| 263 | + |
| 264 | +FCM_B = pd.DataFrame(MftB,columns=['f1','f2','f3','f4','f5','class']) |
| 265 | +FCM_C = pd.DataFrame(MftC,columns=['f1','f2','f3','f4','f5','class']) |
| 266 | +FCM_E = pd.DataFrame(MftE,columns=['f1','f2','f3','f4','f5','class']) |
| 267 | +FCM_B.head(4) |
| 268 | + |
| 269 | + |
| 270 | +# In[19]: |
| 271 | + |
| 272 | + |
| 273 | +TotalDataset = pd.concat([FCM_B,FCM_C,FCM_E],ignore_index=True) |
| 274 | +visDat = TotalDataset.copy(deep=True) |
| 275 | +visDat['class'] = visDat['class'].map({1:'healthy',0:'transation',-1:'seizure'}) |
| 276 | + |
| 277 | + |
| 278 | +# In[20]: |
| 279 | + |
| 280 | + |
| 281 | +import seaborn as sbn |
| 282 | +sbn.set(style="whitegrid", palette="muted") |
| 283 | + |
| 284 | + |
| 285 | +# In[21]: |
| 286 | + |
| 287 | + |
| 288 | +visDat.head(5) |
| 289 | + |
| 290 | + |
| 291 | +# In[22]: |
| 292 | + |
| 293 | + |
| 294 | +sbn.pairplot(visDat,hue='class',palette="husl") |
| 295 | +plt.savefig("fig2.png") |
| 296 | + |
| 297 | + |
| 298 | +# In[23]: |
| 299 | + |
| 300 | + |
| 301 | +plt.plot(visDat['f1'],'--o') |
| 302 | + |
| 303 | + |
| 304 | +# # Split arrays or matrices into random train and test subsets |
| 305 | + |
| 306 | +# In[24]: |
| 307 | + |
| 308 | + |
| 309 | +from sklearn.model_selection import train_test_split |
| 310 | + |
| 311 | + |
| 312 | +# In[25]: |
| 313 | + |
| 314 | + |
| 315 | +X = TotalDataset[['f1','f2','f3','f4','f5']] |
| 316 | +y = TotalDataset[['class']] |
| 317 | +X = np.asarray(X) |
| 318 | +y = np.asarray(y) |
| 319 | + |
| 320 | + |
| 321 | +# In[26]: |
| 322 | + |
| 323 | + |
| 324 | +X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42) |
| 325 | + |
| 326 | + |
| 327 | +# # apply machine learning |
| 328 | + |
| 329 | +# In[27]: |
| 330 | + |
| 331 | + |
| 332 | +from sklearn.model_selection import train_test_split |
| 333 | +from sklearn.preprocessing import StandardScaler |
| 334 | +from sklearn.datasets import make_moons, make_circles, make_classification |
| 335 | +from sklearn.neural_network import MLPClassifier |
| 336 | +from sklearn.neighbors import KNeighborsClassifier |
| 337 | +from sklearn.svm import SVC |
| 338 | +from sklearn.gaussian_process import GaussianProcessClassifier |
| 339 | +from sklearn.gaussian_process.kernels import RBF |
| 340 | +from sklearn.tree import DecisionTreeClassifier |
| 341 | +from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier |
| 342 | +from sklearn.naive_bayes import GaussianNB |
| 343 | +from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis |
| 344 | + |
| 345 | + |
| 346 | +# In[28]: |
| 347 | + |
| 348 | + |
| 349 | +names = ["Nearest Neighbors", "Linear SVM", "Gaussian Process", |
| 350 | + "Decision Tree", "Random Forest", "AdaBoost", |
| 351 | + "Naive Bayes"] |
| 352 | + |
| 353 | + |
| 354 | +# In[29]: |
| 355 | + |
| 356 | + |
| 357 | +classifiers = [ |
| 358 | + KNeighborsClassifier(3), |
| 359 | + SVC(kernel="linear", C=0.025), |
| 360 | + GaussianProcessClassifier(1.0 * RBF(1.0), warm_start=True), |
| 361 | + DecisionTreeClassifier(max_depth=5), |
| 362 | + RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1), |
| 363 | + AdaBoostClassifier(), |
| 364 | + GaussianNB()] |
| 365 | + |
| 366 | + |
| 367 | +# In[37]: |
| 368 | + |
| 369 | + |
| 370 | +clf_score=[] |
| 371 | +with warnings.catch_warnings(): |
| 372 | + warnings.simplefilter("ignore") |
| 373 | + for name, clf in zip(names, classifiers): |
| 374 | + clf.fit(X_train, y_train) |
| 375 | + if(name=="Gaussian Process"): |
| 376 | + gaussian = pickle.dumps(clf) |
| 377 | + score = clf.score(X_test, y_test) |
| 378 | + clf_score.append([score,name]) |
| 379 | + |
| 380 | + |
| 381 | +# In[31]: |
| 382 | + |
| 383 | + |
| 384 | +clf_score |
| 385 | + |
| 386 | + |
| 387 | +# In[42]: |
| 388 | + |
| 389 | + |
| 390 | +# print(gaussian) |
| 391 | +joblib.dump(gaussian,'eegmodel.sav') |
| 392 | + |
| 393 | +# eegmodel = |
| 394 | +pickle.dump(gaussian, open('eegmodel.sav', 'wb')) |
0 commit comments