Skip to content

Commit 94937cf

Browse files
committed
checking file's content type instead of extension
1 parent 357a397 commit 94937cf

File tree

4 files changed

+63
-13
lines changed

4 files changed

+63
-13
lines changed

deepface/modules/preprocessing.py

+10-8
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
1+
# built-in dependencies
12
import os
23
from typing import Union, Tuple
34
import base64
45
from pathlib import Path
6+
import imghdr
57

68
# 3rd party
79
import numpy as np
@@ -82,16 +84,16 @@ def load_base64(uri: str) -> np.ndarray:
8284
if len(encoded_data_parts) < 2:
8385
raise ValueError("format error in base64 encoded string")
8486

87+
encoded_data = encoded_data_parts[1]
88+
decoded_bytes = base64.b64decode(encoded_data)
89+
file_type = imghdr.what(None, h=decoded_bytes)
90+
8591
# similar to find functionality, we are just considering these extensions
86-
if not (
87-
uri.startswith("data:image/jpeg")
88-
or uri.startswith("data:image/jpg")
89-
or uri.startswith("data:image/png")
90-
):
91-
raise ValueError(f"input image can be jpg, jpeg or png, but it is {encoded_data_parts}")
92+
# content type is safer option than file extension
93+
if file_type not in ["jpeg", "png"]:
94+
raise ValueError(f"input image can be jpg or png, but it is {file_type}")
9295

93-
encoded_data = encoded_data_parts[1]
94-
nparr = np.fromstring(base64.b64decode(encoded_data), np.uint8)
96+
nparr = np.fromstring(decoded_bytes, np.uint8)
9597
img_bgr = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
9698
# img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
9799
return img_bgr

deepface/modules/recognition.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import pickle
44
from typing import List, Union, Optional, Dict, Any
55
import time
6+
import imghdr
67

78
# 3rd party dependencies
89
import numpy as np
@@ -296,8 +297,9 @@ def __list_images(path: str) -> List[str]:
296297
images = []
297298
for r, _, f in os.walk(path):
298299
for file in f:
299-
if file.lower().endswith((".jpg", ".jpeg", ".png")):
300-
exact_path = os.path.join(r, file)
300+
exact_path = os.path.join(r, file)
301+
file_type = imghdr.what(exact_path)
302+
if file_type in ["jpeg", "png"]:
301303
images.append(exact_path)
302304
return images
303305

tests/test_extract_faces.py

+23
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import numpy as np
2+
import base64
23
import pytest
34
from deepface import DeepFace
5+
from deepface.modules import preprocessing
46
from deepface.commons.logger import Logger
57

68
logger = Logger("tests/test_extract_faces.py")
@@ -48,3 +50,24 @@ def test_backends_for_not_enforced_detection_with_non_facial_inputs():
4850
)
4951
assert objs[0]["face"].shape == (224, 224, 3)
5052
logger.info("✅ extract_faces for not enforced detection and non-facial image test is done")
53+
54+
55+
def test_file_types_while_loading_base64():
56+
img1_path = "dataset/img47.jpg"
57+
img1_base64 = image_to_base64(image_path=img1_path)
58+
59+
with pytest.raises(ValueError, match="input image can be jpg or png, but it is"):
60+
_ = preprocessing.load_base64(uri=img1_base64)
61+
62+
img2_path = "dataset/img1.jpg"
63+
img2_base64 = image_to_base64(image_path=img2_path)
64+
65+
img2 = preprocessing.load_base64(uri=img2_base64)
66+
# 3 dimensional image should be loaded
67+
assert len(img2.shape) == 3
68+
69+
70+
def image_to_base64(image_path):
71+
with open(image_path, "rb") as image_file:
72+
encoded_string = base64.b64encode(image_file.read()).decode("utf-8")
73+
return "data:image/jpeg," + encoded_string

tests/test_find.py

+26-3
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import pandas as pd
44
from deepface import DeepFace
55
from deepface.modules import verification
6+
from deepface.modules import recognition
67
from deepface.commons.logger import Logger
78

89
logger = Logger("tests/test_find.py")
@@ -11,7 +12,7 @@
1112

1213

1314
def test_find_with_exact_path():
14-
img_path = os.path.join("dataset","img1.jpg")
15+
img_path = os.path.join("dataset", "img1.jpg")
1516
dfs = DeepFace.find(img_path=img_path, db_path="dataset", silent=True)
1617
assert len(dfs) > 0
1718
for df in dfs:
@@ -31,7 +32,7 @@ def test_find_with_exact_path():
3132

3233

3334
def test_find_with_array_input():
34-
img_path = os.path.join("dataset","img1.jpg")
35+
img_path = os.path.join("dataset", "img1.jpg")
3536
img1 = cv2.imread(img_path)
3637
dfs = DeepFace.find(img1, db_path="dataset", silent=True)
3738
assert len(dfs) > 0
@@ -53,7 +54,7 @@ def test_find_with_array_input():
5354

5455

5556
def test_find_with_extracted_faces():
56-
img_path = os.path.join("dataset","img1.jpg")
57+
img_path = os.path.join("dataset", "img1.jpg")
5758
face_objs = DeepFace.extract_faces(img_path)
5859
img = face_objs[0]["face"]
5960
dfs = DeepFace.find(img, db_path="dataset", detector_backend="skip", silent=True)
@@ -72,3 +73,25 @@ def test_find_with_extracted_faces():
7273
logger.debug(df.head())
7374
assert df.shape[0] > 0
7475
logger.info("✅ test find for extracted face input done")
76+
77+
78+
def test_filetype_for_find():
79+
"""
80+
only images as jpg and png can be loaded into database
81+
"""
82+
img_path = os.path.join("dataset", "img1.jpg")
83+
dfs = DeepFace.find(img_path=img_path, db_path="dataset", silent=True)
84+
85+
df = dfs[0]
86+
87+
# img47 is webp even though its extension is jpg
88+
assert df[df["identity"] == "dataset/img47.jpg"].shape[0] == 0
89+
90+
91+
def test_filetype_for_find_bulk_embeddings():
92+
imgs = recognition.__list_images("dataset")
93+
94+
assert len(imgs) > 0
95+
96+
# img47 is webp even though its extension is jpg
97+
assert "dataset/img47.jpg" not in imgs

0 commit comments

Comments
 (0)