Skip to content

Commit 88814e6

Browse files
authored
Merge pull request #967 from serengil/feat-task-2301-vgg-normalization-layer
vgg normalization layer bug for gpu users
2 parents 3265be2 + 5ffa7bf commit 88814e6

File tree

5 files changed

+84
-141
lines changed

5 files changed

+84
-141
lines changed

README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
[![PyPI Downloads](https://static.pepy.tech/personalized-badge/deepface?period=total&units=international_system&left_color=grey&right_color=blue&left_text=pypi%20downloads)](https://pepy.tech/project/deepface)
66
[![Conda Downloads](https://img.shields.io/conda/dn/conda-forge/deepface?color=green&label=conda%20downloads)](https://anaconda.org/conda-forge/deepface)
7-
[![Stars](https://img.shields.io/github/stars/serengil/deepface?color=yellow&style=flat)](https://github.com/serengil/deepface/stargazers)
7+
[![Stars](https://img.shields.io/github/stars/serengil/deepface?color=yellow&style=flat&label=%E2%AD%90%20stars)](https://github.com/serengil/deepface/stargazers)
88
[![License](http://img.shields.io/:license-MIT-green.svg?style=flat)](https://github.com/serengil/deepface/blob/master/LICENSE)
99
[![Tests](https://github.com/serengil/deepface/actions/workflows/tests.yml/badge.svg)](https://github.com/serengil/deepface/actions/workflows/tests.yml)
1010

deepface/DeepFace.py

+64-124
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def build_model(model_name: str) -> Any:
4545
VGG-Face, Facenet, OpenFace, DeepFace, DeepID for face recognition
4646
Age, Gender, Emotion, Race for facial attributes
4747
Returns:
48-
built model with corresponding class
48+
built_model
4949
"""
5050
return modeling.build_model(model_name=model_name)
5151

@@ -62,57 +62,37 @@ def verify(
6262
) -> Dict[str, Any]:
6363
"""
6464
Verify if an image pair represents the same person or different persons.
65-
66-
The verification function converts facial images to vectors and calculates the similarity
67-
between those vectors. Vectors of images of the same person should exhibit higher similarity
68-
(or lower distance) than vectors of images of different persons.
69-
7065
Args:
7166
img1_path (str or np.ndarray): Path to the first image. Accepts exact image path
7267
as a string, numpy array (BGR), or base64 encoded images.
73-
7468
img2_path (str or np.ndarray): Path to the second image. Accepts exact image path
7569
as a string, numpy array (BGR), or base64 encoded images.
76-
7770
model_name (str): Model for face recognition. Options: VGG-Face, Facenet, Facenet512,
7871
OpenFace, DeepFace, DeepID, Dlib, ArcFace and SFace (default is VGG-Face).
79-
8072
detector_backend (string): face detector backend. Options: 'opencv', 'retinaface',
8173
'mtcnn', 'ssd', 'dlib', 'mediapipe', 'yolov8' (default is opencv)
82-
8374
distance_metric (string): Metric for measuring similarity. Options: 'cosine',
8475
'euclidean', 'euclidean_l2' (default is cosine).
85-
8676
enforce_detection (boolean): If no face is detected in an image, raise an exception.
8777
Set to False to avoid the exception for low-resolution images (default is True).
88-
8978
align (bool): Flag to enable face alignment (default is True).
90-
9179
normalization (string): Normalize the input image before feeding it to the model.
9280
Options: base, raw, Facenet, Facenet2018, VGGFace, VGGFace2, ArcFace (default is base)
93-
9481
Returns:
95-
result (dict): A dictionary containing verification results.
96-
82+
result (dict): A dictionary containing verification results with following keys.
9783
- 'verified' (bool): Indicates whether the images represent the same person (True)
9884
or different persons (False).
99-
10085
- 'distance' (float): The distance measure between the face vectors.
10186
A lower distance indicates higher similarity.
102-
10387
- 'max_threshold_to_verify' (float): The maximum threshold used for verification.
10488
If the distance is below this threshold, the images are considered a match.
105-
10689
- 'model' (str): The chosen face recognition model.
107-
10890
- 'similarity_metric' (str): The chosen similarity metric for measuring distances.
109-
11091
- 'facial_areas' (dict): Rectangular regions of interest for faces in both images.
11192
- 'img1': {'x': int, 'y': int, 'w': int, 'h': int}
11293
Region of interest for the first image.
11394
- 'img2': {'x': int, 'y': int, 'w': int, 'h': int}
11495
Region of interest for the second image.
115-
11696
- 'time' (float): Time taken for the verification process in seconds.
11797
"""
11898

@@ -138,77 +118,59 @@ def analyze(
138118
) -> List[Dict[str, Any]]:
139119
"""
140120
Analyze facial attributes such as age, gender, emotion, and race in the provided image.
141-
142121
Args:
143122
img_path (str or np.ndarray): The exact path to the image, a numpy array in BGR format,
144123
or a base64 encoded image. If the source image contains multiple faces, the result will
145124
include information for each detected face.
146-
147125
actions (tuple): Attributes to analyze. The default is ('age', 'gender', 'emotion', 'race').
148126
You can exclude some of these attributes from the analysis if needed.
149-
150127
enforce_detection (boolean): If no face is detected in an image, raise an exception.
151128
Set to False to avoid the exception for low-resolution images (default is True).
152-
153129
detector_backend (string): face detector backend. Options: 'opencv', 'retinaface',
154130
'mtcnn', 'ssd', 'dlib', 'mediapipe', 'yolov8' (default is opencv).
155-
156131
distance_metric (string): Metric for measuring similarity. Options: 'cosine',
157132
'euclidean', 'euclidean_l2' (default is cosine).
158-
159133
align (boolean): Perform alignment based on the eye positions (default is True).
160-
161134
silent (boolean): Suppress or allow some log messages for a quieter analysis process
162135
(default is False).
163-
164136
Returns:
165137
results (List[Dict[str, Any]]): A list of dictionaries, where each dictionary represents
166-
the analysis results for a detected face.
167-
168-
Each dictionary in the list contains the following keys:
169-
170-
- 'region' (dict): Represents the rectangular region of the detected face in the image.
171-
- 'x': x-coordinate of the top-left corner of the face.
172-
- 'y': y-coordinate of the top-left corner of the face.
173-
- 'w': Width of the detected face region.
174-
- 'h': Height of the detected face region.
175-
176-
- 'age' (float): Estimated age of the detected face.
177-
178-
- 'face_confidence' (float): Confidence score for the detected face.
179-
Indicates the reliability of the face detection.
180-
181-
- 'dominant_gender' (str): The dominant gender in the detected face.
182-
Either "Man" or "Woman."
183-
184-
- 'gender' (dict): Confidence scores for each gender category.
185-
- 'Man': Confidence score for the male gender.
186-
- 'Woman': Confidence score for the female gender.
187-
188-
- 'dominant_emotion' (str): The dominant emotion in the detected face.
189-
Possible values include "sad," "angry," "surprise," "fear," "happy,"
190-
"disgust," and "neutral."
191-
192-
- 'emotion' (dict): Confidence scores for each emotion category.
193-
- 'sad': Confidence score for sadness.
194-
- 'angry': Confidence score for anger.
195-
- 'surprise': Confidence score for surprise.
196-
- 'fear': Confidence score for fear.
197-
- 'happy': Confidence score for happiness.
198-
- 'disgust': Confidence score for disgust.
199-
- 'neutral': Confidence score for neutrality.
200-
201-
- 'dominant_race' (str): The dominant race in the detected face.
202-
Possible values include "indian," "asian," "latino hispanic,"
203-
"black," "middle eastern," and "white."
204-
205-
- 'race' (dict): Confidence scores for each race category.
206-
- 'indian': Confidence score for Indian ethnicity.
207-
- 'asian': Confidence score for Asian ethnicity.
208-
- 'latino hispanic': Confidence score for Latino/Hispanic ethnicity.
209-
- 'black': Confidence score for Black ethnicity.
210-
- 'middle eastern': Confidence score for Middle Eastern ethnicity.
211-
- 'white': Confidence score for White ethnicity.
138+
the analysis results for a detected face. Each dictionary in the list contains the
139+
following keys:
140+
- 'region' (dict): Represents the rectangular region of the detected face in the image.
141+
- 'x': x-coordinate of the top-left corner of the face.
142+
- 'y': y-coordinate of the top-left corner of the face.
143+
- 'w': Width of the detected face region.
144+
- 'h': Height of the detected face region.
145+
- 'age' (float): Estimated age of the detected face.
146+
- 'face_confidence' (float): Confidence score for the detected face.
147+
Indicates the reliability of the face detection.
148+
- 'dominant_gender' (str): The dominant gender in the detected face.
149+
Either "Man" or "Woman."
150+
- 'gender' (dict): Confidence scores for each gender category.
151+
- 'Man': Confidence score for the male gender.
152+
- 'Woman': Confidence score for the female gender.
153+
- 'dominant_emotion' (str): The dominant emotion in the detected face.
154+
Possible values include "sad," "angry," "surprise," "fear," "happy,"
155+
"disgust," and "neutral."
156+
- 'emotion' (dict): Confidence scores for each emotion category.
157+
- 'sad': Confidence score for sadness.
158+
- 'angry': Confidence score for anger.
159+
- 'surprise': Confidence score for surprise.
160+
- 'fear': Confidence score for fear.
161+
- 'happy': Confidence score for happiness.
162+
- 'disgust': Confidence score for disgust.
163+
- 'neutral': Confidence score for neutrality.
164+
- 'dominant_race' (str): The dominant race in the detected face.
165+
Possible values include "indian," "asian," "latino hispanic,"
166+
"black," "middle eastern," and "white."
167+
- 'race' (dict): Confidence scores for each race category.
168+
- 'indian': Confidence score for Indian ethnicity.
169+
- 'asian': Confidence score for Asian ethnicity.
170+
- 'latino hispanic': Confidence score for Latino/Hispanic ethnicity.
171+
- 'black': Confidence score for Black ethnicity.
172+
- 'middle eastern': Confidence score for Middle Eastern ethnicity.
173+
- 'white': Confidence score for White ethnicity.
212174
"""
213175
return demography.analyze(
214176
img_path=img_path,
@@ -233,46 +195,36 @@ def find(
233195
) -> List[pd.DataFrame]:
234196
"""
235197
Identify individuals in a database
236-
237198
Args:
238199
img_path (str or np.ndarray): The exact path to the image, a numpy array in BGR format,
239200
or a base64 encoded image. If the source image contains multiple faces, the result will
240201
include information for each detected face.
241-
242202
db_path (string): Path to the folder containing image files. All detected faces
243203
in the database will be considered in the decision-making process.
244-
245204
model_name (str): Model for face recognition. Options: VGG-Face, Facenet, Facenet512,
246-
OpenFace, DeepFace, DeepID, Dlib, ArcFace and SFace
247-
205+
OpenFace, DeepFace, DeepID, Dlib, ArcFace and SFace (default is VGG-Face).
248206
distance_metric (string): Metric for measuring similarity. Options: 'cosine',
249-
'euclidean', 'euclidean_l2'.
250-
207+
'euclidean', 'euclidean_l2' (default is cosine).
251208
enforce_detection (boolean): If no face is detected in an image, raise an exception.
252-
Default is True. Set to False to avoid the exception for low-resolution images.
253-
209+
Set to False to avoid the exception for low-resolution images (default is True).
254210
detector_backend (string): face detector backend. Options: 'opencv', 'retinaface',
255-
'mtcnn', 'ssd', 'dlib', 'mediapipe', 'yolov8'.
256-
257-
align (boolean): Perform alignment based on the eye positions.
258-
211+
'mtcnn', 'ssd', 'dlib', 'mediapipe', 'yolov8' (default is opencv).
212+
align (boolean): Perform alignment based on the eye positions (default is True).
259213
normalization (string): Normalize the input image before feeding it to the model.
260-
Default is base. Options: base, raw, Facenet, Facenet2018, VGGFace, VGGFace2, ArcFace
261-
262-
silent (boolean): Suppress or allow some log messages for a quieter analysis process.
263-
214+
Options: base, raw, Facenet, Facenet2018, VGGFace, VGGFace2, ArcFace (default is base).
215+
silent (boolean): Suppress or allow some log messages for a quieter analysis process
216+
(default is False).
264217
Returns:
265218
results (List[pd.DataFrame]): A list of pandas dataframes. Each dataframe corresponds
266219
to the identity information for an individual detected in the source image.
267220
The DataFrame columns include:
268-
269-
- 'identity': Identity label of the detected individual.
270-
- 'target_x', 'target_y', 'target_w', 'target_h': Bounding box coordinates of the
271-
target face in the database.
272-
- 'source_x', 'source_y', 'source_w', 'source_h': Bounding box coordinates of the
273-
detected face in the source image.
274-
- '{model_name}_{distance_metric}': Similarity score between the faces based on the
275-
specified model and distance metric
221+
- 'identity': Identity label of the detected individual.
222+
- 'target_x', 'target_y', 'target_w', 'target_h': Bounding box coordinates of the
223+
target face in the database.
224+
- 'source_x', 'source_y', 'source_w', 'source_h': Bounding box coordinates of the
225+
detected face in the source image.
226+
- '{model_name}_{distance_metric}': Similarity score between the faces based on the
227+
specified model and distance metric
276228
"""
277229
return recognition.find(
278230
img_path=img_path,
@@ -302,25 +254,20 @@ def represent(
302254
img_path (str or np.ndarray): The exact path to the image, a numpy array in BGR format,
303255
or a base64 encoded image. If the source image contains multiple faces, the result will
304256
include information for each detected face.
305-
306257
model_name (str): Model for face recognition. Options: VGG-Face, Facenet, Facenet512,
307-
OpenFace, DeepFace, DeepID, Dlib, ArcFace and SFace
308-
258+
OpenFace, DeepFace, DeepID, Dlib, ArcFace and SFace (default is VGG-Face.).
309259
enforce_detection (boolean): If no face is detected in an image, raise an exception.
310-
Default is True. Set to False to avoid the exception for low-resolution images.
311-
260+
Default is True. Set to False to avoid the exception for low-resolution images
261+
(default is True).
312262
detector_backend (string): face detector backend. Options: 'opencv', 'retinaface',
313-
'mtcnn', 'ssd', 'dlib', 'mediapipe', 'yolov8'.
314-
315-
align (boolean): Perform alignment based on the eye positions.
316-
263+
'mtcnn', 'ssd', 'dlib', 'mediapipe', 'yolov8' (default is opencv).
264+
align (boolean): Perform alignment based on the eye positions (default is True).
317265
normalization (string): Normalize the input image before feeding it to the model.
318266
Default is base. Options: base, raw, Facenet, Facenet2018, VGGFace, VGGFace2, ArcFace
319-
267+
(default is base).
320268
Returns:
321269
results (List[Dict[str, Any]]): A list of dictionaries, each containing the
322270
following fields:
323-
324271
- embedding (np.array): Multidimensional vector representing facial features.
325272
The number of dimensions varies based on the reference model
326273
(e.g., FaceNet returns 128 dimensions, VGG-Face returns 4096 dimensions).
@@ -359,13 +306,13 @@ def stream(
359306
in the database will be considered in the decision-making process.
360307
361308
model_name (str): Model for face recognition. Options: VGG-Face, Facenet, Facenet512,
362-
OpenFace, DeepFace, DeepID, Dlib, ArcFace and SFace
309+
OpenFace, DeepFace, DeepID, Dlib, ArcFace and SFace (default is VGG-Face).
363310
364311
detector_backend (string): face detector backend. Options: 'opencv', 'retinaface',
365-
'mtcnn', 'ssd', 'dlib', 'mediapipe', 'yolov8'.
312+
'mtcnn', 'ssd', 'dlib', 'mediapipe', 'yolov8' (default is opencv).
366313
367314
distance_metric (string): Metric for measuring similarity. Options: 'cosine',
368-
'euclidean', 'euclidean_l2'.
315+
'euclidean', 'euclidean_l2' (default is cosine).
369316
370317
enable_face_analysis (bool): Flag to enable face analysis (default is True).
371318
@@ -408,22 +355,15 @@ def extract_faces(
408355
Args:
409356
img_path (str or np.ndarray): Path to the first image. Accepts exact image path
410357
as a string, numpy array (BGR), or base64 encoded images.
411-
412358
target_size (tuple): final shape of facial image. black pixels will be
413-
added to resize the image.
414-
359+
added to resize the image (default is (224, 224)).
415360
detector_backend (string): face detector backend. Options: 'opencv', 'retinaface',
416361
'mtcnn', 'ssd', 'dlib', 'mediapipe', 'yolov8' (default is opencv)
417-
418362
enforce_detection (boolean): If no face is detected in an image, raise an exception.
419-
Default is True. Set to False to avoid the exception for low-resolution images.
420-
363+
Set to False to avoid the exception for low-resolution images (default is True).
421364
align (bool): Flag to enable face alignment (default is True).
422-
423365
grayscale (boolean): Flag to convert the image to grayscale before
424366
processing (default is False).
425-
426-
427367
Returns:
428368
results (List[Dict[str, Any]]): A list of dictionaries, where each dictionary contains:
429369
- "face" (np.ndarray): The detected face as a NumPy array.

deepface/basemodels/VGGFace.py

+11-10
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@
22
import os
33
import gdown
44
import numpy as np
5-
from deepface.commons import functions
6-
from deepface.commons.logger import Logger
5+
from deepface.commons import functions, distance
76
from deepface.models.FacialRecognition import FacialRecognition
7+
from deepface.commons.logger import Logger
88

99
logger = Logger(module="basemodels.VGGFace")
1010

@@ -20,9 +20,7 @@
2020
Flatten,
2121
Dropout,
2222
Activation,
23-
Lambda,
2423
)
25-
from keras import backend as K
2624
else:
2725
from tensorflow.keras.models import Model, Sequential
2826
from tensorflow.keras.layers import (
@@ -32,9 +30,7 @@
3230
Flatten,
3331
Dropout,
3432
Activation,
35-
Lambda,
3633
)
37-
from tensorflow.keras import backend as K
3834

3935
# ---------------------------------------
4036

@@ -58,7 +54,11 @@ def find_embeddings(self, img: np.ndarray) -> List[float]:
5854
"""
5955
# model.predict causes memory issue when it is called in a for loop
6056
# embedding = model.predict(img, verbose=0)[0].tolist()
61-
return self.model(img, training=False).numpy()[0].tolist()
57+
# having normalization layer in descriptor troubles for some gpu users (e.g. issue 957, 966)
58+
# instead we are now calculating it with traditional way not with keras backend
59+
embedding = self.model(img, training=False).numpy()[0].tolist()
60+
embedding = distance.l2_normalize(embedding)
61+
return embedding.tolist()
6262

6363

6464
def base_model() -> Sequential:
@@ -144,9 +144,10 @@ def load_model(
144144
# as described here: https://github.com/serengil/deepface/issues/944
145145
base_model_output = Sequential()
146146
base_model_output = Flatten()(model.layers[-5].output)
147-
base_model_output = Lambda(lambda x: K.l2_normalize(x, axis=1), name="norm_layer")(
148-
base_model_output
149-
)
147+
# keras backend's l2 normalization layer troubles some gpu users (e.g. issue 957, 966)
148+
# base_model_output = Lambda(lambda x: K.l2_normalize(x, axis=1), name="norm_layer")(
149+
# base_model_output
150+
# )
150151
vgg_face_descriptor = Model(inputs=model.input, outputs=base_model_output)
151152

152153
return vgg_face_descriptor

0 commit comments

Comments
 (0)