Merge pull request #1140 from serengil/feat-task-2403-eye-coordinates-in-biology

serengil · web-flow · commit 329606ffe83d · 2024-03-24T16:59:48.000Z
Feat task 2403 eye coordinates in biology
diff --git a/deepface/DeepFace.py b/deepface/DeepFace.py
@@ -476,7 +476,9 @@ def extract_faces(
 
         - "facial_area" (Dict[str, Any]): The detected face's regions as a dictionary containing:
             - keys 'x', 'y', 'w', 'h' with int values
-            - keys 'left_eye', 'right_eye' with a tuple of 2 ints as values
+            - keys 'left_eye', 'right_eye' with a tuple of 2 ints as values. left and right eyes
+                are eyes on the left and right respectively with respect to the person itself
+                instead of observer.
 
         - "confidence" (float): The confidence score associated with the detected face.
     """
diff --git a/deepface/__init__.py b/deepface/__init__.py
@@ -1 +1 @@
-__version__ = "0.0.89"
+__version__ = "0.0.90"
diff --git a/deepface/detectors/DetectorWrapper.py b/deepface/detectors/DetectorWrapper.py
@@ -76,7 +76,9 @@ def detect_faces(
 
         - img (np.ndarray): The detected face as a NumPy array.
 
-        - facial_area (FacialAreaRegion): The facial area region represented as x, y, w, h
+        - facial_area (FacialAreaRegion): The facial area region represented as x, y, w, h,
+            left_eye and right eye. left eye and right eye are eyes on the left and right
+            with respect to the person instead of observer.
 
         - confidence (float): The confidence score associated with the detected face.
     """
@@ -123,13 +125,11 @@ def detect_faces(
                 img=img, left_eye=left_eye, right_eye=right_eye
             )
             rotated_x1, rotated_y1, rotated_x2, rotated_y2 = rotate_facial_area(
-                facial_area=(x, y, x + w, y + h),
-                angle=angle,
-                size=(img.shape[0], img.shape[1])
+                facial_area=(x, y, x + w, y + h), angle=angle, size=(img.shape[0], img.shape[1])
             )
             detected_face = aligned_img[
-                int(rotated_y1) : int(rotated_y2),
-                int(rotated_x1) : int(rotated_x2)]
+                int(rotated_y1) : int(rotated_y2), int(rotated_x1) : int(rotated_x2)
+            ]
 
         result = DetectedFace(
             img=detected_face,
@@ -143,9 +143,7 @@ def detect_faces(
 
 
 def rotate_facial_area(
-    facial_area: Tuple[int, int, int, int],
-    angle: float,
-    size: Tuple[int, int]
+    facial_area: Tuple[int, int, int, int], angle: float, size: Tuple[int, int]
 ) -> Tuple[int, int, int, int]:
     """
     Rotate the facial area around its center.
diff --git a/deepface/detectors/Dlib.py b/deepface/detectors/Dlib.py
@@ -88,11 +88,11 @@ def detect_faces(self, img: np.ndarray) -> List[FacialAreaRegion]:
 
                 shape = self.model["sp"](img, detection)
 
-                left_eye = (
+                right_eye = (
                     int((shape.part(2).x + shape.part(3).x) // 2),
                     int((shape.part(2).y + shape.part(3).y) // 2),
                 )
-                right_eye = (
+                left_eye = (
                     int((shape.part(0).x + shape.part(1).x) // 2),
                     int((shape.part(0).y + shape.part(1).y) // 2),
                 )
diff --git a/deepface/detectors/FastMtCnn.py b/deepface/detectors/FastMtCnn.py
@@ -34,8 +34,8 @@ def detect_faces(self, img: np.ndarray) -> List[FacialAreaRegion]:
         ):
             for regions, confidence, eyes in zip(*detections):
                 x, y, w, h = xyxy_to_xywh(regions)
-                left_eye = eyes[0]
-                right_eye = eyes[1]
+                right_eye = eyes[0]
+                left_eye = eyes[1]
 
                 left_eye = tuple(int(i) for i in left_eye)
                 right_eye = tuple(int(i) for i in right_eye)
diff --git a/deepface/detectors/MediaPipe.py b/deepface/detectors/MediaPipe.py
@@ -61,8 +61,8 @@ def detect_faces(self, img: np.ndarray) -> List[FacialAreaRegion]:
             y = int(bounding_box.ymin * img_height)
             h = int(bounding_box.height * img_height)
 
-            left_eye = (int(landmarks[0].x * img_width), int(landmarks[0].y * img_height))
-            right_eye = (int(landmarks[1].x * img_width), int(landmarks[1].y * img_height))
+            right_eye = (int(landmarks[0].x * img_width), int(landmarks[0].y * img_height))
+            left_eye = (int(landmarks[1].x * img_width), int(landmarks[1].y * img_height))
             # nose = (int(landmarks[2].x * img_width), int(landmarks[2].y * img_height))
             # mouth = (int(landmarks[3].x * img_width), int(landmarks[3].y * img_height))
             # right_ear = (int(landmarks[4].x * img_width), int(landmarks[4].y * img_height))
diff --git a/deepface/detectors/MtCnn.py b/deepface/detectors/MtCnn.py
@@ -35,8 +35,10 @@ def detect_faces(self, img: np.ndarray) -> List[FacialAreaRegion]:
             for current_detection in detections:
                 x, y, w, h = current_detection["box"]
                 confidence = current_detection["confidence"]
-                left_eye = current_detection["keypoints"]["left_eye"]
-                right_eye = current_detection["keypoints"]["right_eye"]
+                # mtcnn detector assigns left eye with respect to the observer
+                # but we are setting it with respect to the person itself
+                left_eye = current_detection["keypoints"]["right_eye"]
+                right_eye = current_detection["keypoints"]["left_eye"]
 
                 facial_area = FacialAreaRegion(
                     x=x,
diff --git a/deepface/detectors/OpenCv.py b/deepface/detectors/OpenCv.py
@@ -112,15 +112,18 @@ def find_eyes(self, img: np.ndarray) -> tuple:
             eye_2 = eyes[1]
 
             if eye_1[0] < eye_2[0]:
-                left_eye = eye_1
-                right_eye = eye_2
-            else:
-                left_eye = eye_2
                 right_eye = eye_1
+                left_eye = eye_2
+            else:
+                right_eye = eye_2
+                left_eye = eye_1
 
             # -----------------------
             # find center of eyes
-            left_eye = (int(left_eye[0] + (left_eye[2] / 2)), int(left_eye[1] + (left_eye[3] / 2)))
+            left_eye = (
+                int(left_eye[0] + (left_eye[2] / 2)),
+                int(left_eye[1] + (left_eye[3] / 2)),
+            )
             right_eye = (
                 int(right_eye[0] + (right_eye[2] / 2)),
                 int(right_eye[1] + (right_eye[3] / 2)),
diff --git a/deepface/detectors/RetinaFace.py b/deepface/detectors/RetinaFace.py
@@ -34,9 +34,9 @@ def detect_faces(self, img: np.ndarray) -> List[FacialAreaRegion]:
             x = detection[0]
             w = detection[2] - x
 
-            # notice that these must be inverse for retinaface
-            left_eye = identity["landmarks"]["right_eye"]
-            right_eye = identity["landmarks"]["left_eye"]
+            # retinaface sets left and right eyes with respect to the person
+            left_eye = identity["landmarks"]["left_eye"]
+            right_eye = identity["landmarks"]["right_eye"]
 
             # eyes are list of float, need to cast them tuple of int
             left_eye = tuple(int(i) for i in left_eye)
diff --git a/deepface/detectors/Yolo.py b/deepface/detectors/Yolo.py
@@ -81,10 +81,10 @@ def detect_faces(self, img: np.ndarray) -> List[FacialAreaRegion]:
             x, y, w, h = result.boxes.xywh.tolist()[0]
             confidence = result.boxes.conf.tolist()[0]
 
-            # left_eye_conf = result.keypoints.conf[0][0]
-            # right_eye_conf = result.keypoints.conf[0][1]
-            left_eye = result.keypoints.xy[0][0].tolist()
-            right_eye = result.keypoints.xy[0][1].tolist()
+            # right_eye_conf = result.keypoints.conf[0][0]
+            # left_eye_conf = result.keypoints.conf[0][1]
+            right_eye = result.keypoints.xy[0][0].tolist()
+            left_eye = result.keypoints.xy[0][1].tolist()
 
             # eyes are list of float, need to cast them tuple of int
             left_eye = tuple(int(i) for i in left_eye)
diff --git a/deepface/detectors/YuNet.py b/deepface/detectors/YuNet.py
@@ -99,7 +99,7 @@ def detect_faces(self, img: np.ndarray) -> List[FacialAreaRegion]:
             {x, y}_{re, le, nt, rcm, lcm} stands for the coordinates of right eye,
             left eye, nose tip, the right corner and left corner of the mouth respectively.
             """
-            (x, y, w, h, x_re, y_re, x_le, y_le) = list(map(int, face[:8]))
+            (x, y, w, h, x_le, y_le, x_re, y_re) = list(map(int, face[:8]))
 
             # YuNet returns negative coordinates if it thinks part of the detected face
             # is outside the frame.
diff --git a/deepface/models/Detector.py b/deepface/models/Detector.py
@@ -20,7 +20,9 @@ def detect_faces(self, img: np.ndarray) -> List["FacialAreaRegion"]:
                 where each object contains:
 
             - facial_area (FacialAreaRegion): The facial area region represented
-                as x, y, w, h, left_eye and right_eye
+                as x, y, w, h, left_eye and right_eye. left eye and right eye are
+                eyes on the left and right respectively with respect to the person
+                instead of observer.
         """
         pass
 
@@ -44,6 +46,21 @@ def __init__(
         right_eye: Optional[Tuple[int, int]] = None,
         confidence: Optional[float] = None,
     ):
+        """
+        Initialize a Face object.
+
+        Args:
+            x (int): The x-coordinate of the top-left corner of the bounding box.
+            y (int): The y-coordinate of the top-left corner of the bounding box.
+            w (int): The width of the bounding box.
+            h (int): The height of the bounding box.
+            left_eye (tuple): The coordinates (x, y) of the left eye with respect to
+                the person instead of observer. Default is None.
+            right_eye (tuple): The coordinates (x, y) of the right eye with respect to
+                the person instead of observer. Default is None.
+            confidence (float, optional): Confidence score associated with the face detection.
+                Default is None.
+        """
         self.x = x
         self.y = y
         self.w = w
@@ -59,6 +76,14 @@ class DetectedFace:
     confidence: float
 
     def __init__(self, img: np.ndarray, facial_area: FacialAreaRegion, confidence: float):
+        """
+        Initialize detected face object.
+
+        Args:
+            img (np.ndarray): detected face image as numpy array
+            facial_area (FacialAreaRegion): detected face's metadata (e.g. bounding box)
+            confidence (float): confidence score for face detection
+        """
         self.img = img
         self.facial_area = facial_area
         self.confidence = confidence
diff --git a/deepface/modules/detection.py b/deepface/modules/detection.py
@@ -68,7 +68,9 @@ def extract_faces(
 
         - "facial_area" (Dict[str, Any]): The detected face's regions as a dictionary containing:
             - keys 'x', 'y', 'w', 'h' with int values
-            - keys 'left_eye', 'right_eye' with a tuple of 2 ints as values
+            - keys 'left_eye', 'right_eye' with a tuple of 2 ints as values.
+                left eye and right eye are eyes on the left and right respectively with respect
+                to the person itself instead of observer.
 
         - "confidence" (float): The confidence score associated with the detected face.
     """
@@ -201,8 +203,8 @@ def align_face(
     Align a given image horizantally with respect to their left and right eye locations
     Args:
         img (np.ndarray): pre-loaded image with detected face
-        left_eye (list or tuple): coordinates of left eye with respect to the you
-        right_eye(list or tuple): coordinates of right eye with respect to the you
+        left_eye (list or tuple): coordinates of left eye with respect to the person itself
+        right_eye(list or tuple): coordinates of right eye with respect to the person itself
     Returns:
         img (np.ndarray): aligned facial image
     """
@@ -214,6 +216,6 @@ def align_face(
     if img.shape[0] == 0 or img.shape[1] == 0:
         return img, 0
 
-    angle = float(np.degrees(np.arctan2(right_eye[1] - left_eye[1], right_eye[0] - left_eye[0])))
+    angle = float(np.degrees(np.arctan2(left_eye[1] - right_eye[1], left_eye[0] - right_eye[0])))
     img = np.array(Image.fromarray(img).rotate(angle))
     return img, angle
diff --git a/package_info.json b/package_info.json
@@ -1,3 +1,3 @@
 {
-    "version": "0.0.89"
+    "version": "0.0.90"
 }
diff --git a/tests/test_extract_faces.py b/tests/test_extract_faces.py
@@ -19,6 +19,12 @@ def test_different_detectors():
             assert "y" in img_obj["facial_area"].keys()
             assert "w" in img_obj["facial_area"].keys()
             assert "h" in img_obj["facial_area"].keys()
+            # is left eye set with respect to the person instead of observer
+            assert "left_eye" in img_obj["facial_area"].keys()
+            assert "right_eye" in img_obj["facial_area"].keys()
+            right_eye = img_obj["facial_area"]["right_eye"]
+            left_eye = img_obj["facial_area"]["left_eye"]
+            assert left_eye[0] > right_eye[0]
             assert "confidence" in img_obj.keys()
 
             img = img_obj["face"]
diff --git a/tests/visual-test.py b/tests/visual-test.py
@@ -57,7 +57,7 @@
     logger.info(df)
 
 
-expand_areas = [0, 25]
+expand_areas = [0]
 img_paths = ["dataset/img11.jpg", "dataset/img11_reflection.jpg"]
 for expand_area in expand_areas:
     for img_path in img_paths:
@@ -86,6 +86,15 @@
                     assert isinstance(face_obj["facial_area"]["right_eye"][0], int)
                     assert isinstance(face_obj["facial_area"]["right_eye"][1], int)
 
+                # left eye is really the left eye of the person
+                if (
+                    face_obj["facial_area"]["left_eye"] is not None
+                    and face_obj["facial_area"]["right_eye"] is not None
+                ):
+                    re_x = face_obj["facial_area"]["right_eye"][0]
+                    le_x = face_obj["facial_area"]["left_eye"][0]
+                    assert re_x < le_x, "right eye must be the right eye of the person"
+
                 assert isinstance(face_obj["confidence"], float)
                 assert face_obj["confidence"] <= 1
 

Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-__version__ = "0.0.89"`
	`1`	`+__version__ = "0.0.90"`
Original file line number	Diff line number	Diff line change
`@@ -88,11 +88,11 @@ def detect_faces(self, img: np.ndarray) -> List[FacialAreaRegion]:`
`88`	`88`
`89`	`89`	`shape = self.model["sp"](img, detection)`
`90`	`90`
`91`		`- left_eye = (`
	`91`	`+ right_eye = (`
`92`	`92`	`int((shape.part(2).x + shape.part(3).x) // 2),`
`93`	`93`	`int((shape.part(2).y + shape.part(3).y) // 2),`
`94`	`94`	`)`
`95`		`- right_eye = (`
	`95`	`+ left_eye = (`
`96`	`96`	`int((shape.part(0).x + shape.part(1).x) // 2),`
`97`	`97`	`int((shape.part(0).y + shape.part(1).y) // 2),`
`98`	`98`	`)`
Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,3 @@`
`1`	`1`	`{`
`2`		`- "version": "0.0.89"`
	`2`	`+ "version": "0.0.90"`
`3`	`3`	`}`