Skip to content

Commit 1078be9

Browse files
committed
resize functionality moved to represent module
we were handling resizing in extract faces. with this commit we moved it to representation module to provide seperation of concern.
1 parent 42ee298 commit 1078be9

9 files changed

+152
-171
lines changed

deepface/DeepFace.py

+1-7
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import os
33
import warnings
44
import logging
5-
from typing import Any, Dict, List, Tuple, Union, Optional
5+
from typing import Any, Dict, List, Union, Optional
66

77
# this has to be set before importing tensorflow
88
os.environ["TF_USE_LEGACY_KERAS"] = "1"
@@ -439,7 +439,6 @@ def stream(
439439

440440
def extract_faces(
441441
img_path: Union[str, np.ndarray],
442-
target_size: Optional[Tuple[int, int]] = (224, 224),
443442
detector_backend: str = "opencv",
444443
enforce_detection: bool = True,
445444
align: bool = True,
@@ -453,9 +452,6 @@ def extract_faces(
453452
img_path (str or np.ndarray): Path to the first image. Accepts exact image path
454453
as a string, numpy array (BGR), or base64 encoded images.
455454
456-
target_size (tuple): final shape of facial image. black pixels will be
457-
added to resize the image (default is (224, 224)).
458-
459455
detector_backend (string): face detector backend. Options: 'opencv', 'retinaface',
460456
'mtcnn', 'ssd', 'dlib', 'mediapipe', 'yolov8' (default is opencv).
461457
@@ -485,13 +481,11 @@ def extract_faces(
485481

486482
return detection.extract_faces(
487483
img_path=img_path,
488-
target_size=target_size,
489484
detector_backend=detector_backend,
490485
enforce_detection=enforce_detection,
491486
align=align,
492487
expand_percentage=expand_percentage,
493488
grayscale=grayscale,
494-
human_readable=True,
495489
)
496490

497491

deepface/modules/demography.py

+64-57
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from tqdm import tqdm
77

88
# project dependencies
9-
from deepface.modules import modeling, detection
9+
from deepface.modules import modeling, detection, preprocessing
1010
from deepface.extendedmodels import Gender, Race, Emotion
1111

1212

@@ -118,7 +118,6 @@ def analyze(
118118

119119
img_objs = detection.extract_faces(
120120
img_path=img_path,
121-
target_size=(224, 224),
122121
detector_backend=detector_backend,
123122
grayscale=False,
124123
enforce_detection=enforce_detection,
@@ -130,60 +129,68 @@ def analyze(
130129
img_content = img_obj["face"]
131130
img_region = img_obj["facial_area"]
132131
img_confidence = img_obj["confidence"]
133-
if img_content.shape[0] > 0 and img_content.shape[1] > 0:
134-
obj = {}
135-
# facial attribute analysis
136-
pbar = tqdm(
137-
range(0, len(actions)),
138-
desc="Finding actions",
139-
disable=silent if len(actions) > 1 else True,
140-
)
141-
for index in pbar:
142-
action = actions[index]
143-
pbar.set_description(f"Action: {action}")
144-
145-
if action == "emotion":
146-
emotion_predictions = modeling.build_model("Emotion").predict(img_content)
147-
sum_of_predictions = emotion_predictions.sum()
148-
149-
obj["emotion"] = {}
150-
for i, emotion_label in enumerate(Emotion.labels):
151-
emotion_prediction = 100 * emotion_predictions[i] / sum_of_predictions
152-
obj["emotion"][emotion_label] = emotion_prediction
153-
154-
obj["dominant_emotion"] = Emotion.labels[np.argmax(emotion_predictions)]
155-
156-
elif action == "age":
157-
apparent_age = modeling.build_model("Age").predict(img_content)
158-
# int cast is for exception - object of type 'float32' is not JSON serializable
159-
obj["age"] = int(apparent_age)
160-
161-
elif action == "gender":
162-
gender_predictions = modeling.build_model("Gender").predict(img_content)
163-
obj["gender"] = {}
164-
for i, gender_label in enumerate(Gender.labels):
165-
gender_prediction = 100 * gender_predictions[i]
166-
obj["gender"][gender_label] = gender_prediction
167-
168-
obj["dominant_gender"] = Gender.labels[np.argmax(gender_predictions)]
169-
170-
elif action == "race":
171-
race_predictions = modeling.build_model("Race").predict(img_content)
172-
sum_of_predictions = race_predictions.sum()
173-
174-
obj["race"] = {}
175-
for i, race_label in enumerate(Race.labels):
176-
race_prediction = 100 * race_predictions[i] / sum_of_predictions
177-
obj["race"][race_label] = race_prediction
178-
179-
obj["dominant_race"] = Race.labels[np.argmax(race_predictions)]
180-
181-
# -----------------------------
182-
# mention facial areas
183-
obj["region"] = img_region
184-
# include image confidence
185-
obj["face_confidence"] = img_confidence
186-
187-
resp_objects.append(obj)
132+
if img_content.shape[0] == 0 or img_content.shape[1] == 0:
133+
continue
134+
135+
# rgb to bgr
136+
img_content = img_content[:, :, ::-1]
137+
138+
# resize input image
139+
img_content = preprocessing.resize_image(img=img_content, target_size=(224, 224))
140+
141+
obj = {}
142+
# facial attribute analysis
143+
pbar = tqdm(
144+
range(0, len(actions)),
145+
desc="Finding actions",
146+
disable=silent if len(actions) > 1 else True,
147+
)
148+
for index in pbar:
149+
action = actions[index]
150+
pbar.set_description(f"Action: {action}")
151+
152+
if action == "emotion":
153+
emotion_predictions = modeling.build_model("Emotion").predict(img_content)
154+
sum_of_predictions = emotion_predictions.sum()
155+
156+
obj["emotion"] = {}
157+
for i, emotion_label in enumerate(Emotion.labels):
158+
emotion_prediction = 100 * emotion_predictions[i] / sum_of_predictions
159+
obj["emotion"][emotion_label] = emotion_prediction
160+
161+
obj["dominant_emotion"] = Emotion.labels[np.argmax(emotion_predictions)]
162+
163+
elif action == "age":
164+
apparent_age = modeling.build_model("Age").predict(img_content)
165+
# int cast is for exception - object of type 'float32' is not JSON serializable
166+
obj["age"] = int(apparent_age)
167+
168+
elif action == "gender":
169+
gender_predictions = modeling.build_model("Gender").predict(img_content)
170+
obj["gender"] = {}
171+
for i, gender_label in enumerate(Gender.labels):
172+
gender_prediction = 100 * gender_predictions[i]
173+
obj["gender"][gender_label] = gender_prediction
174+
175+
obj["dominant_gender"] = Gender.labels[np.argmax(gender_predictions)]
176+
177+
elif action == "race":
178+
race_predictions = modeling.build_model("Race").predict(img_content)
179+
sum_of_predictions = race_predictions.sum()
180+
181+
obj["race"] = {}
182+
for i, race_label in enumerate(Race.labels):
183+
race_prediction = 100 * race_predictions[i] / sum_of_predictions
184+
obj["race"][race_label] = race_prediction
185+
186+
obj["dominant_race"] = Race.labels[np.argmax(race_predictions)]
187+
188+
# -----------------------------
189+
# mention facial areas
190+
obj["region"] = img_region
191+
# include image confidence
192+
obj["face_confidence"] = img_confidence
193+
194+
resp_objects.append(obj)
188195

189196
return resp_objects

deepface/modules/detection.py

+4-66
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# built-in dependencies
2-
from typing import Any, Dict, List, Tuple, Union, Optional
2+
from typing import Any, Dict, List, Tuple, Union
33

44
# 3rd part dependencies
55
import numpy as np
@@ -10,30 +10,20 @@
1010
from deepface.modules import preprocessing
1111
from deepface.models.Detector import DetectedFace, FacialAreaRegion
1212
from deepface.detectors import DetectorWrapper
13-
from deepface.commons import package_utils
1413
from deepface.commons.logger import Logger
1514

1615
logger = Logger(module="deepface/modules/detection.py")
1716

1817
# pylint: disable=no-else-raise
1918

2019

21-
tf_major_version = package_utils.get_tf_major_version()
22-
if tf_major_version == 1:
23-
from keras.preprocessing import image
24-
elif tf_major_version == 2:
25-
from tensorflow.keras.preprocessing import image
26-
27-
2820
def extract_faces(
2921
img_path: Union[str, np.ndarray],
30-
target_size: Optional[Tuple[int, int]] = (224, 224),
3122
detector_backend: str = "opencv",
3223
enforce_detection: bool = True,
3324
align: bool = True,
3425
expand_percentage: int = 0,
3526
grayscale: bool = False,
36-
human_readable=False,
3727
) -> List[Dict[str, Any]]:
3828
"""
3929
Extract faces from a given image
@@ -42,9 +32,6 @@ def extract_faces(
4232
img_path (str or np.ndarray): Path to the first image. Accepts exact image path
4333
as a string, numpy array (BGR), or base64 encoded images.
4434
45-
target_size (tuple): final shape of facial image. black pixels will be
46-
added to resize the image.
47-
4835
detector_backend (string): face detector backend. Options: 'opencv', 'retinaface',
4936
'mtcnn', 'ssd', 'dlib', 'mediapipe', 'yolov8' (default is opencv)
5037
@@ -58,13 +45,10 @@ def extract_faces(
5845
grayscale (boolean): Flag to convert the image to grayscale before
5946
processing (default is False).
6047
61-
human_readable (bool): Flag to make the image human readable. 3D RGB for human readable
62-
or 4D BGR for ML models (default is False).
63-
6448
Returns:
6549
results (List[Dict[str, Any]]): A list of dictionaries, where each dictionary contains:
6650
67-
- "face" (np.ndarray): The detected face as a NumPy array.
51+
- "face" (np.ndarray): The detected face as a NumPy array in RGB format.
6852
6953
- "facial_area" (Dict[str, Any]): The detected face's regions as a dictionary containing:
7054
- keys 'x', 'y', 'w', 'h' with int values
@@ -122,57 +106,11 @@ def extract_faces(
122106
if grayscale is True:
123107
current_img = cv2.cvtColor(current_img, cv2.COLOR_BGR2GRAY)
124108

125-
# resize and padding
126-
if target_size is not None:
127-
factor_0 = target_size[0] / current_img.shape[0]
128-
factor_1 = target_size[1] / current_img.shape[1]
129-
factor = min(factor_0, factor_1)
130-
131-
dsize = (
132-
int(current_img.shape[1] * factor),
133-
int(current_img.shape[0] * factor),
134-
)
135-
current_img = cv2.resize(current_img, dsize)
136-
137-
diff_0 = target_size[0] - current_img.shape[0]
138-
diff_1 = target_size[1] - current_img.shape[1]
139-
if grayscale is False:
140-
# Put the base image in the middle of the padded image
141-
current_img = np.pad(
142-
current_img,
143-
(
144-
(diff_0 // 2, diff_0 - diff_0 // 2),
145-
(diff_1 // 2, diff_1 - diff_1 // 2),
146-
(0, 0),
147-
),
148-
"constant",
149-
)
150-
else:
151-
current_img = np.pad(
152-
current_img,
153-
(
154-
(diff_0 // 2, diff_0 - diff_0 // 2),
155-
(diff_1 // 2, diff_1 - diff_1 // 2),
156-
),
157-
"constant",
158-
)
159-
160-
# double check: if target image is not still the same size with target.
161-
if current_img.shape[0:2] != target_size:
162-
current_img = cv2.resize(current_img, target_size)
163-
164-
# normalizing the image pixels
165-
# what this line doing? must?
166-
img_pixels = image.img_to_array(current_img)
167-
img_pixels = np.expand_dims(img_pixels, axis=0)
168-
img_pixels /= 255 # normalize input in [0, 1]
169-
# discard expanded dimension
170-
if human_readable is True and len(img_pixels.shape) == 4:
171-
img_pixels = img_pixels[0]
109+
current_img = current_img / 255 # normalize input in [0, 1]
172110

173111
resp_objs.append(
174112
{
175-
"face": img_pixels[:, :, ::-1] if human_readable is True else img_pixels,
113+
"face": current_img[:, :, ::-1],
176114
"facial_area": {
177115
"x": int(current_region.x),
178116
"y": int(current_region.y),

deepface/modules/preprocessing.py

+59-2
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,16 @@
1111
import requests
1212
from PIL import Image
1313

14+
# project dependencies
15+
from deepface.commons import package_utils
16+
17+
18+
tf_major_version = package_utils.get_tf_major_version()
19+
if tf_major_version == 1:
20+
from keras.preprocessing import image
21+
elif tf_major_version == 2:
22+
from tensorflow.keras.preprocessing import image
23+
1424

1525
def load_image(img: Union[str, np.ndarray]) -> Tuple[np.ndarray, str]:
1626
"""
@@ -66,8 +76,8 @@ def load_image_from_web(url: str) -> np.ndarray:
6676
response = requests.get(url, stream=True, timeout=60)
6777
response.raise_for_status()
6878
image_array = np.asarray(bytearray(response.raw.read()), dtype=np.uint8)
69-
image = cv2.imdecode(image_array, cv2.IMREAD_COLOR)
70-
return image
79+
img = cv2.imdecode(image_array, cv2.IMREAD_COLOR)
80+
return img
7181

7282

7383
def load_base64(uri: str) -> np.ndarray:
@@ -157,3 +167,50 @@ def normalize_input(img: np.ndarray, normalization: str = "base") -> np.ndarray:
157167
raise ValueError(f"unimplemented normalization type - {normalization}")
158168

159169
return img
170+
171+
172+
def resize_image(img: np.ndarray, target_size: Tuple[int, int]) -> np.ndarray:
173+
"""
174+
Resize an image to expected size of a ml model with adding black pixels.
175+
Args:
176+
img (np.ndarray): pre-loaded image as numpy array
177+
target_size (tuple): input shape of ml model
178+
Returns:
179+
img (np.ndarray): resized input image
180+
"""
181+
factor_0 = target_size[0] / img.shape[0]
182+
factor_1 = target_size[1] / img.shape[1]
183+
factor = min(factor_0, factor_1)
184+
185+
dsize = (
186+
int(img.shape[1] * factor),
187+
int(img.shape[0] * factor),
188+
)
189+
img = cv2.resize(img, dsize)
190+
191+
diff_0 = target_size[0] - img.shape[0]
192+
diff_1 = target_size[1] - img.shape[1]
193+
194+
# Put the base image in the middle of the padded image
195+
img = np.pad(
196+
img,
197+
(
198+
(diff_0 // 2, diff_0 - diff_0 // 2),
199+
(diff_1 // 2, diff_1 - diff_1 // 2),
200+
(0, 0),
201+
),
202+
"constant",
203+
)
204+
205+
# double check: if target image is not still the same size with target.
206+
if img.shape[0:2] != target_size:
207+
img = cv2.resize(img, target_size)
208+
209+
# make it 4-dimensional how ML models expect
210+
img = image.img_to_array(img)
211+
img = np.expand_dims(img, axis=0)
212+
213+
if img.max() > 1:
214+
img = (img.astype(np.float32) / 255.0).astype(np.float32)
215+
216+
return img

0 commit comments

Comments
 (0)