-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathRTFSNet_file.py
134 lines (113 loc) · 5.91 KB
/
RTFSNet_file.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import cv2
import mediapipe as mp
import numpy as np
import subprocess
import os
def convert_video_to_25fps(input_video_path, output_video_path):
try:
subprocess.run(['ffmpeg', '-i', input_video_path, '-r', '25', output_video_path], check=True)
print(f"Video converted to 25 fps and saved as {output_video_path}")
except subprocess.CalledProcessError as e:
print("An error occurred during video conversion:", e)
def get_lips_bbox(landmarks, lip_indices):
lip_points = np.array([landmarks[i] for i in lip_indices])
x, y, w, h = cv2.boundingRect(lip_points)
return x, y, w, h
# Define a function to align the face using eye landmarks.
def align_face(image, landmarks, desired_left_eye=(0.35, 0.35), desired_face_width=256, desired_face_height=None):
if desired_face_height is None:
desired_face_height = desired_face_width
# The indices for the left and right eye corners.
left_eye_idx = 130
right_eye_idx = 359
# Extract the left and right eye (x, y) coordinates.
left_eye_center = landmarks[left_eye_idx]
right_eye_center = landmarks[right_eye_idx]
# Compute the angle between the eye centroids.
dY = right_eye_center[1] - left_eye_center[1]
dX = right_eye_center[0] - left_eye_center[0]
angle = np.degrees(np.arctan2(dY, dX))
# Calculate the desired right eye x-coordinate based on the desired x-coordinate of the left eye.
desired_right_eye_x = 1.0 - desired_left_eye[0]
# Determine the scale of the new resulting image by taking the ratio of the distance between eyes in the current image to the ratio of distance in the desired image.
dist = np.sqrt((dX ** 2) + (dY ** 2))
desired_dist = (desired_right_eye_x - desired_left_eye[0])
desired_dist *= desired_face_width
scale = desired_dist / dist
# Compute center (x, y)-coordinates between the two eyes in the input image.
eyes_center = ((left_eye_center[0] + right_eye_center[0]) // 2, (left_eye_center[1] + right_eye_center[1]) // 2)
# Grab the rotation matrix for rotating and scaling the face.
M = cv2.getRotationMatrix2D(eyes_center, angle, scale)
# Update the translation component of the matrix.
tX = desired_face_width * 0.5
tY = desired_face_height * desired_left_eye[1]
M[0, 2] += (tX - eyes_center[0])
M[1, 2] += (tY - eyes_center[1])
# Apply the affine transformation.
(w, h) = (desired_face_width, desired_face_height)
output = cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_CUBIC)
# Return the aligned face and the transformation matrix.
return output, M
# Function to transform landmarks using the same transformation as the face alignment
def transform_landmarks(landmarks, M):
transformed_landmarks = []
for landmark in landmarks:
# Apply the transformation matrix to each landmark point
x, y = landmark
transformed_point = np.dot(M, np.array([x, y, 1]))
transformed_landmarks.append((int(transformed_point[0]), int(transformed_point[1])))
return transformed_landmarks
def get_video_crops(video_path):
print('1')
lip_indices = [187, 411, 136, 365]
# Initialize MediaPipe Face Detection and Face Mesh
mp_face_detection = mp.solutions.face_detection
face_detection = mp_face_detection.FaceDetection(model_selection=1, min_detection_confidence=0.5)
mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(static_image_mode=False, max_num_faces=1, min_detection_confidence=0.2, refine_landmarks=True)
convert_video_to_25fps(video_path,'temp.mp4')
cap = cv2.VideoCapture('temp.mp4')
lips_crops_bw_list=[]
while cap.isOpened():
ret, frame = cap.read()
if not ret:
break
# Convert the frame to RGB
rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
# Use face detection to find faces
detection_results = face_detection.process(rgb_frame)
if detection_results.detections:
for detection in detection_results.detections:
bboxC = detection.location_data.relative_bounding_box
ih, iw, _ = frame.shape
x, y, w, h = int(bboxC.xmin * iw), int(bboxC.ymin * ih), int(bboxC.width * iw), int(bboxC.height * ih)
# Crop and display the face
if x >= 0 and y >= 0 and w > 0 and h > 0:
face_crop = frame[y:y+h, x:x+w]
face_crop = cv2.resize(face_crop, (400, 400))
# Align the face crop
face_mesh_results = face_mesh.process(cv2.cvtColor(face_crop, cv2.COLOR_BGR2RGB))
if face_mesh_results.multi_face_landmarks:
for face_landmarks in face_mesh_results.multi_face_landmarks:
points = [(int(p.x * face_crop.shape[1]), int(p.y * face_crop.shape[0])) for p in face_landmarks.landmark]
aligned_face, _ = align_face(face_crop, points)
# Crop lips from the aligned face
transformed_landmarks = transform_landmarks(points, _)
x, y, w, h = get_lips_bbox(transformed_landmarks, lip_indices)
lips_crop = aligned_face[y:y+h, x:x+w]
lips_crop = cv2.resize(lips_crop, (88, 88)) # Resize for better visibility
lips_crop_bw = cv2.cvtColor(lips_crop, cv2.COLOR_BGR2GRAY)
# cv2.imshow('Lips Crop', lips_crop_bw)
lips_crops_bw_list.append(lips_crop_bw)
# Press 'q' to quit the window
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# np.save('lips_crops_bw.npy', np.array(lips_crops_bw_list))
# Release resources
face_mesh.close()
face_detection.close()
cap.release()
cv2.destroyAllWindows()
os.remove('temp.mp4')
print(np.array(lips_crops_bw_list).shape)
return np.array(lips_crops_bw_list)