Skip to content

Commit a0fda4c

Browse files
authored
Add SFace face recognizer cpp demo (#259)
1 parent ec5b2c8 commit a0fda4c

File tree

3 files changed

+350
-0
lines changed

3 files changed

+350
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
cmake_minimum_required(VERSION 3.24.0)
2+
project(opencv_zoo_face_recognition_sface)
3+
4+
set(OPENCV_VERSION "4.9.0")
5+
set(OPENCV_INSTALLATION_PATH "" CACHE PATH "Where to look for OpenCV installation")
6+
7+
# Find OpenCV
8+
find_package(OpenCV ${OPENCV_VERSION} REQUIRED HINTS ${OPENCV_INSTALLATION_PATH})
9+
10+
add_executable(demo demo.cpp)
11+
target_link_libraries(demo ${OpenCV_LIBS})

models/face_recognition_sface/README.md

+17
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ Results of accuracy evaluation with [tools/eval](../../tools/eval).
2424

2525
Run the following command to try the demo:
2626

27+
### Python
2728
```shell
2829
# recognize on images
2930
python demo.py --target /path/to/image1 --query /path/to/image2
@@ -32,6 +33,22 @@ python demo.py --target /path/to/image1 --query /path/to/image2
3233
python demo.py --help
3334
```
3435

36+
### C++
37+
Install latest OpenCV and CMake >= 3.24.0 to get started with:
38+
39+
```shell
40+
# A typical and default installation path of OpenCV is /usr/local
41+
cmake -B build -D OPENCV_INSTALLATION_PATH=/path/to/opencv/installation .
42+
cmake --build build
43+
44+
# detect on camera input
45+
./build/demo -t=/path/to/target_face
46+
# detect on an image
47+
./build/demo -t=/path/to/target_face -q=/path/to/query_face -v
48+
# get help messages
49+
./build/demo -h
50+
```
51+
3552
### Example outputs
3653

3754
![sface demo](./example_outputs/demo.jpg)
+322
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,322 @@
1+
#include "opencv2/opencv.hpp"
2+
#include "opencv2/core/types.hpp"
3+
4+
#include <string>
5+
#include <vector>
6+
7+
const std::vector<std::pair<int, int>> backend_target_pairs = {
8+
{cv::dnn::DNN_BACKEND_OPENCV, cv::dnn::DNN_TARGET_CPU},
9+
{cv::dnn::DNN_BACKEND_CUDA, cv::dnn::DNN_TARGET_CUDA},
10+
{cv::dnn::DNN_BACKEND_CUDA, cv::dnn::DNN_TARGET_CUDA_FP16},
11+
{cv::dnn::DNN_BACKEND_TIMVX, cv::dnn::DNN_TARGET_NPU},
12+
{cv::dnn::DNN_BACKEND_CANN, cv::dnn::DNN_TARGET_NPU}
13+
};
14+
15+
class YuNet
16+
{
17+
public:
18+
YuNet(const std::string& model_path,
19+
const cv::Size& input_size,
20+
const float conf_threshold,
21+
const float nms_threshold,
22+
const int top_k,
23+
const int backend_id,
24+
const int target_id)
25+
{
26+
_detector = cv::FaceDetectorYN::create(
27+
model_path, "", input_size, conf_threshold, nms_threshold, top_k, backend_id, target_id);
28+
}
29+
30+
void setInputSize(const cv::Size& input_size)
31+
{
32+
_detector->setInputSize(input_size);
33+
}
34+
35+
void setTopK(const int top_k)
36+
{
37+
_detector->setTopK(top_k);
38+
}
39+
40+
cv::Mat infer(const cv::Mat& image)
41+
{
42+
cv::Mat result;
43+
_detector->detect(image, result);
44+
return result;
45+
}
46+
47+
private:
48+
cv::Ptr<cv::FaceDetectorYN> _detector;
49+
};
50+
51+
class SFace
52+
{
53+
public:
54+
SFace(const std::string& model_path,
55+
const int backend_id,
56+
const int target_id,
57+
const int distance_type)
58+
: _distance_type(static_cast<cv::FaceRecognizerSF::DisType>(distance_type))
59+
{
60+
_recognizer = cv::FaceRecognizerSF::create(model_path, "", backend_id, target_id);
61+
}
62+
63+
cv::Mat extractFeatures(const cv::Mat& orig_image, const cv::Mat& face_image)
64+
{
65+
// Align and crop detected face from original image
66+
cv::Mat target_aligned;
67+
_recognizer->alignCrop(orig_image, face_image, target_aligned);
68+
// Extract features from cropped detected face
69+
cv::Mat target_features;
70+
_recognizer->feature(target_aligned, target_features);
71+
return target_features.clone();
72+
}
73+
74+
std::pair<double, bool> matchFeatures(const cv::Mat& target_features, const cv::Mat& query_features)
75+
{
76+
const double score = _recognizer->match(target_features, query_features, _distance_type);
77+
if (_distance_type == cv::FaceRecognizerSF::DisType::FR_COSINE)
78+
{
79+
return {score, score >= _threshold_cosine};
80+
}
81+
return {score, score <= _threshold_norml2};
82+
}
83+
84+
private:
85+
cv::Ptr<cv::FaceRecognizerSF> _recognizer;
86+
cv::FaceRecognizerSF::DisType _distance_type;
87+
double _threshold_cosine = 0.363;
88+
double _threshold_norml2 = 1.128;
89+
};
90+
91+
cv::Mat visualize(const cv::Mat& image,
92+
const cv::Mat& faces,
93+
const std::vector<std::pair<double, bool>>& matches,
94+
const float fps = -0.1F,
95+
const cv::Size& target_size = cv::Size(512, 512))
96+
{
97+
static const cv::Scalar matched_box_color{0, 255, 0};
98+
static const cv::Scalar mismatched_box_color{0, 0, 255};
99+
100+
if (fps >= 0)
101+
{
102+
cv::Mat output_image = image.clone();
103+
104+
const int x1 = static_cast<int>(faces.at<float>(0, 0));
105+
const int y1 = static_cast<int>(faces.at<float>(0, 1));
106+
const int w = static_cast<int>(faces.at<float>(0, 2));
107+
const int h = static_cast<int>(faces.at<float>(0, 3));
108+
const auto match = matches.at(0);
109+
110+
cv::Scalar box_color = match.second ? matched_box_color : mismatched_box_color;
111+
// Draw bounding box
112+
cv::rectangle(output_image, cv::Rect(x1, y1, w, h), box_color, 2);
113+
// Draw match score
114+
cv::putText(output_image, cv::format("%.4f", match.first), cv::Point(x1, y1+12), cv::FONT_HERSHEY_DUPLEX, 0.30, box_color);
115+
// Draw FPS
116+
cv::putText(output_image, cv::format("FPS: %.2f", fps), cv::Point(0, 15), cv::FONT_HERSHEY_SIMPLEX, 0.5, box_color, 2);
117+
118+
return output_image;
119+
}
120+
121+
cv::Mat output_image = cv::Mat::zeros(target_size, CV_8UC3);
122+
123+
// Determine new height and width of image with aspect ratio of original image
124+
const double ratio = std::min(static_cast<double>(target_size.height) / image.rows,
125+
static_cast<double>(target_size.width) / image.cols);
126+
const int new_height = static_cast<int>(image.rows * ratio);
127+
const int new_width = static_cast<int>(image.cols * ratio);
128+
129+
// Resize the original image, maintaining aspect ratio
130+
cv::Mat resize_out;
131+
cv::resize(image, resize_out, cv::Size(new_width, new_height), cv::INTER_LINEAR);
132+
133+
// Determine top left corner in resized dimensions
134+
const int top = std::max(0, target_size.height - new_height) / 2;
135+
const int left = std::max(0, target_size.width - new_width) / 2;
136+
137+
// Copy resized image into target output image
138+
const cv::Rect roi = cv::Rect(cv::Point(left, top), cv::Size(new_width, new_height));
139+
cv::Mat out_sub_image = output_image(roi);
140+
resize_out.copyTo(out_sub_image);
141+
142+
for (int i = 0; i < faces.rows; ++i)
143+
{
144+
const int x1 = static_cast<int>(faces.at<float>(i, 0) * ratio) + left;
145+
const int y1 = static_cast<int>(faces.at<float>(i, 1) * ratio) + top;
146+
const int w = static_cast<int>(faces.at<float>(i, 2) * ratio);
147+
const int h = static_cast<int>(faces.at<float>(i, 3) * ratio);
148+
const auto match = matches.at(i);
149+
150+
cv::Scalar box_color = match.second ? matched_box_color : mismatched_box_color;
151+
// Draw bounding box
152+
cv::rectangle(output_image, cv::Rect(x1, y1, w, h), box_color, 2);
153+
// Draw match score
154+
cv::putText(output_image, cv::format("%.4f", match.first), cv::Point(x1, y1+12), cv::FONT_HERSHEY_DUPLEX, 0.30, box_color);
155+
}
156+
return output_image;
157+
}
158+
159+
int main(int argc, char** argv)
160+
{
161+
cv::CommandLineParser parser(argc, argv,
162+
// General options
163+
"{help h | | Print this message}"
164+
"{backend_target b | 0 | Set DNN backend target pair:\n"
165+
"0: (default) OpenCV implementation + CPU,\n"
166+
"1: CUDA + GPU (CUDA),\n"
167+
"2: CUDA + GPU (CUDA FP16),\n"
168+
"3: TIM-VX + NPU,\n"
169+
"4: CANN + NPU}"
170+
"{save s | false | Whether to save result image or not}"
171+
"{vis v | false | Whether to visualize result image or not}"
172+
// SFace options
173+
"{target_face t | | Set path to input image 1 (target face)}"
174+
"{query_face q | | Set path to input image 2 (query face), omit if using camera}"
175+
"{model m | face_recognition_sface_2021dec.onnx | Set path to the model}"
176+
"{distance_type d | 0 | 0 = cosine, 1 = norm_l1}"
177+
// YuNet options
178+
"{yunet_model | ../face_detection_yunet/face_detection_yunet_2023mar.onnx | Set path to the YuNet model}"
179+
"{detect_threshold | 0.9 | Set the minimum confidence for the model\n"
180+
"to identify a face. Filter out faces of\n"
181+
"conf < conf_threshold}"
182+
"{nms_threshold | 0.3 | Set the threshold to suppress overlapped boxes.\n"
183+
"Suppress boxes if IoU(box1, box2) >= nms_threshold\n"
184+
", the one of higher score is kept.}"
185+
"{top_k | 5000 | Keep top_k bounding boxes before NMS}"
186+
);
187+
188+
if (parser.has("help"))
189+
{
190+
parser.printMessage();
191+
return 0;
192+
}
193+
// General CLI options
194+
const int backend = parser.get<int>("backend_target");
195+
const bool save_flag = parser.get<bool>("save");
196+
const bool vis_flag = parser.get<bool>("vis");
197+
const int backend_id = backend_target_pairs.at(backend).first;
198+
const int target_id = backend_target_pairs.at(backend).second;
199+
200+
// YuNet CLI options
201+
const std::string detector_model_path = parser.get<std::string>("yunet_model");
202+
const float detect_threshold = parser.get<float>("detect_threshold");
203+
const float nms_threshold = parser.get<float>("nms_threshold");
204+
const int top_k = parser.get<int>("top_k");
205+
206+
// Use YuNet as the detector backend
207+
auto face_detector = YuNet(
208+
detector_model_path, cv::Size(320, 320), detect_threshold, nms_threshold, top_k, backend_id, target_id);
209+
210+
// SFace CLI options
211+
const std::string target_path = parser.get<std::string>("target_face");
212+
const std::string query_path = parser.get<std::string>("query_face");
213+
const std::string model_path = parser.get<std::string>("model");
214+
const int distance_type = parser.get<int>("distance_type");
215+
216+
auto face_recognizer = SFace(model_path, backend_id, target_id, distance_type);
217+
218+
if (target_path.empty())
219+
{
220+
CV_Error(cv::Error::StsError, "Path to target image " + target_path + " not found");
221+
}
222+
223+
cv::Mat target_image = cv::imread(target_path);
224+
// Detect single face in target image
225+
face_detector.setInputSize(target_image.size());
226+
face_detector.setTopK(1);
227+
cv::Mat target_face = face_detector.infer(target_image);
228+
// Extract features from target face
229+
cv::Mat target_features = face_recognizer.extractFeatures(target_image, target_face.row(0));
230+
231+
if (!query_path.empty()) // use image
232+
{
233+
// Detect any faces in query image
234+
cv::Mat query_image = cv::imread(query_path);
235+
face_detector.setInputSize(query_image.size());
236+
face_detector.setTopK(5000);
237+
cv::Mat query_faces = face_detector.infer(query_image);
238+
239+
// Store match scores for visualization
240+
std::vector<std::pair<double, bool>> matches;
241+
242+
for (int i = 0; i < query_faces.rows; ++i)
243+
{
244+
// Extract features from query face
245+
cv::Mat query_features = face_recognizer.extractFeatures(query_image, query_faces.row(i));
246+
// Measure similarity of target face to query face
247+
const auto match = face_recognizer.matchFeatures(target_features, query_features);
248+
matches.push_back(match);
249+
250+
const int x1 = static_cast<int>(query_faces.at<float>(i, 0));
251+
const int y1 = static_cast<int>(query_faces.at<float>(i, 1));
252+
const int w = static_cast<int>(query_faces.at<float>(i, 2));
253+
const int h = static_cast<int>(query_faces.at<float>(i, 3));
254+
const float conf = query_faces.at<float>(i, 14);
255+
256+
std::cout << cv::format("%d: x1=%d, y1=%d, w=%d, h=%d, conf=%.4f, match=%.4f\n", i, x1, y1, w, h, conf, match.first);
257+
}
258+
259+
if (save_flag || vis_flag)
260+
{
261+
auto vis_target = visualize(target_image, target_face, {{1.0, true}});
262+
auto vis_query = visualize(query_image, query_faces, matches);
263+
cv::Mat output_image;
264+
cv::hconcat(vis_target, vis_query, output_image);
265+
266+
if (save_flag)
267+
{
268+
std::cout << "Results are saved to result.jpg\n";
269+
cv::imwrite("result.jpg", output_image);
270+
}
271+
if (vis_flag)
272+
{
273+
cv::namedWindow(query_path, cv::WINDOW_AUTOSIZE);
274+
cv::imshow(query_path, output_image);
275+
cv::waitKey(0);
276+
}
277+
}
278+
}
279+
else // use video capture
280+
{
281+
const int device_id = 0;
282+
auto cap = cv::VideoCapture(device_id);
283+
const int w = static_cast<int>(cap.get(cv::CAP_PROP_FRAME_WIDTH));
284+
const int h = static_cast<int>(cap.get(cv::CAP_PROP_FRAME_HEIGHT));
285+
face_detector.setInputSize(cv::Size(w, h));
286+
287+
auto tick_meter = cv::TickMeter();
288+
cv::Mat query_frame;
289+
290+
while (cv::waitKey(1) < 0)
291+
{
292+
bool has_frame = cap.read(query_frame);
293+
if (!has_frame)
294+
{
295+
std::cout << "No frames grabbed! Exiting ...\n";
296+
break;
297+
}
298+
tick_meter.start();
299+
// Detect faces from webcam image
300+
cv::Mat query_faces = face_detector.infer(query_frame);
301+
tick_meter.stop();
302+
303+
// Extract features from query face
304+
cv::Mat query_features = face_recognizer.extractFeatures(query_frame, query_faces.row(0));
305+
// Measure similarity of target face to query face
306+
const auto match = face_recognizer.matchFeatures(target_features, query_features);
307+
308+
const auto fps = static_cast<float>(tick_meter.getFPS());
309+
310+
auto vis_target = visualize(target_image, target_face, {{1.0, true}}, -0.1F, cv::Size(w, h));
311+
auto vis_query = visualize(query_frame, query_faces, {match}, fps);
312+
cv::Mat output_image;
313+
cv::hconcat(vis_target, vis_query, output_image);
314+
315+
// Visualize in a new window
316+
cv::imshow("SFace Demo", output_image);
317+
318+
tick_meter.reset();
319+
}
320+
}
321+
return 0;
322+
}

0 commit comments

Comments
 (0)