@@ -14,29 +14,21 @@ class KNN():
14
14
def __init__ (self , k = 5 ):
15
15
self .k = k
16
16
17
- def _vote (self , neighbors ):
17
+ def _vote (self , neighbor_labels ):
18
18
""" Return the most common class among the neighbor samples """
19
- counts = np .bincount (neighbors [:, 1 ] .astype ('int' ))
19
+ counts = np .bincount (neighbor_labels .astype ('int' ))
20
20
return counts .argmax ()
21
21
22
22
def predict (self , X_test , X_train , y_train ):
23
23
y_pred = np .empty (X_test .shape [0 ])
24
24
# Determine the class of each sample
25
25
for i , test_sample in enumerate (X_test ):
26
- # Two columns [distance, label], for each observed sample
27
- neighbors = np .empty ((X_train .shape [0 ], 2 ))
28
- # Calculate the distance from each observed sample to the
29
- # sample we wish to predict
30
- for j , observed_sample in enumerate (X_train ):
31
- distance = euclidean_distance (test_sample , observed_sample )
32
- label = y_train [j ]
33
- # Add neighbor information
34
- neighbors [j ] = [distance , label ]
35
- # Sort the list of observed samples from lowest to highest distance
36
- # and select the k first
37
- k_nearest_neighbors = neighbors [neighbors [:, 0 ].argsort ()][:self .k ]
38
- # Get the most common class among the neighbors
39
- label = self ._vote (k_nearest_neighbors )
40
- y_pred [i ] = label
26
+ # Sort the training samples by their distance to the test sample and get the K nearest
27
+ idx = np .argsort ([euclidean_distance (test_sample , x ) for x in X_train ])[:self .k ]
28
+ # Extract the labels of the K nearest neighboring training samples
29
+ k_nearest_neighbors = np .array ([y_train [i ] for i in idx ])
30
+ # Label sample as the most common class label
31
+ y_pred [i ] = self ._vote (k_nearest_neighbors )
32
+
41
33
return y_pred
42
34
0 commit comments