Skip to content

Commit 7018dad

Browse files
authored
Pipeline with kpt and act (PaddlePaddle#5399)
* add keypoint infer and visualize into Pipeline * add independent action model inference * add action inference into pipeline, still in working * test different display frames and normalization methods * use bbox and scale normalization * Remove debug info and Optimize code structure * remove useless visual param * make action parameters configurable
1 parent 6a17524 commit 7018dad

9 files changed

+601
-38
lines changed

deploy/pphuman/config/infer_cfg.yml

+13-1
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
crop_thresh: 0.5
22
attr_thresh: 0.5
3+
kpt_thresh: 0.2
34
visual: True
45

56
DET:
6-
model_dir: output_inference/mot_ppyolov3//
7+
model_dir: output_inference/mot_ppyolov3/
78
batch_size: 1
89

910
ATTR:
@@ -14,3 +15,14 @@ MOT:
1415
model_dir: output_inference/mot_ppyolov3/
1516
tracker_config: deploy/pphuman/config/tracker_config.yml
1617
batch_size: 1
18+
19+
KPT:
20+
model_dir: output_inference/dark_hrnet_w32_256x192/
21+
batch_size: 8
22+
23+
ACTION:
24+
model_dir: output_inference/STGCN
25+
batch_size: 1
26+
max_frames: 50
27+
display_frames: 80
28+
coord_size: [384, 512]

deploy/pphuman/pipe_utils.py

+36-2
Original file line numberDiff line numberDiff line change
@@ -290,11 +290,15 @@ def crop_image_with_det(batch_input, det_res):
290290
def crop_image_with_mot(input, mot_res):
291291
res = mot_res['boxes']
292292
crop_res = []
293+
new_bboxes = []
294+
ori_bboxes = []
293295
for box in res:
294-
crop_image, new_box, ori_box = expand_crop(input, box[1:])
296+
crop_image, new_bbox, ori_bbox = expand_crop(input, box[1:])
295297
if crop_image is not None:
296298
crop_res.append(crop_image)
297-
return crop_res
299+
new_bboxes.append(new_bbox)
300+
ori_bboxes.append(ori_bbox)
301+
return crop_res, new_bboxes, ori_bboxes
298302

299303

300304
def parse_mot_res(input):
@@ -305,3 +309,33 @@ def parse_mot_res(input):
305309
res = [i, 0, score, xmin, ymin, xmin + w, ymin + h]
306310
mot_res.append(res)
307311
return {'boxes': np.array(mot_res)}
312+
313+
314+
def refine_keypoint_coordinary(kpts, bbox, coord_size):
315+
"""
316+
This function is used to adjust coordinate values to a fixed scale.
317+
"""
318+
tl = bbox[:, 0:2]
319+
wh = bbox[:, 2:] - tl
320+
tl = np.expand_dims(np.transpose(tl, (1, 0)), (2, 3))
321+
wh = np.expand_dims(np.transpose(wh, (1, 0)), (2, 3))
322+
target_w, target_h = coord_size
323+
res = (kpts - tl) / wh * np.expand_dims(
324+
np.array([[target_w], [target_h]]), (2, 3))
325+
return res
326+
327+
328+
def parse_mot_keypoint(input, coord_size):
329+
parsed_skeleton_with_mot = {}
330+
ids = []
331+
skeleton = []
332+
for tracker_id, kpt_seq in input:
333+
ids.append(tracker_id)
334+
kpts = np.array(kpt_seq.kpts, dtype=np.float32)[:, :, :2]
335+
kpts = np.expand_dims(np.transpose(kpts, [2, 0, 1]),
336+
-1) #T, K, C -> C, T, K, 1
337+
bbox = np.array(kpt_seq.bboxes, dtype=np.float32)
338+
skeleton.append(refine_keypoint_coordinary(kpts, bbox, coord_size))
339+
parsed_skeleton_with_mot["mot_id"] = ids
340+
parsed_skeleton_with_mot["skeleton"] = skeleton
341+
return parsed_skeleton_with_mot

deploy/pphuman/pipeline.py

+88-16
Original file line numberDiff line numberDiff line change
@@ -30,10 +30,15 @@
3030
from python.infer import Detector, DetectorPicoDet
3131
from python.mot_sde_infer import SDE_Detector
3232
from python.attr_infer import AttrDetector
33+
from python.keypoint_infer import KeyPointDetector
34+
from python.keypoint_postprocess import translate_to_ori_images
35+
from python.action_infer import ActionRecognizer
36+
from python.action_utils import KeyPointCollector, ActionVisualCollector
37+
3338
from pipe_utils import argsparser, print_arguments, merge_cfg, PipeTimer
34-
from pipe_utils import get_test_images, crop_image_with_det, crop_image_with_mot, parse_mot_res
39+
from pipe_utils import get_test_images, crop_image_with_det, crop_image_with_mot, parse_mot_res, parse_mot_keypoint
3540
from python.preprocess import decode_image
36-
from python.visualize import visualize_box_mask, visualize_attr
41+
from python.visualize import visualize_box_mask, visualize_attr, visualize_pose, visualize_action
3742
from pptracking.python.visualize import plot_tracking
3843

3944

@@ -299,9 +304,45 @@ def __init__(self,
299304
trt_max_shape, trt_opt_shape, trt_calib_mode, cpu_threads,
300305
enable_mkldnn)
301306
if self.with_action:
302-
self.kpt_predictor = KeyPointDetector()
303-
self.kpt_collector = KeyPointCollector()
304-
self.action_predictor = ActionDetector()
307+
kpt_cfg = self.cfg['KPT']
308+
kpt_model_dir = kpt_cfg['model_dir']
309+
kpt_batch_size = kpt_cfg['batch_size']
310+
action_cfg = self.cfg['ACTION']
311+
action_model_dir = action_cfg['model_dir']
312+
action_batch_size = action_cfg['batch_size']
313+
action_frames = action_cfg['max_frames']
314+
display_frames = action_cfg['display_frames']
315+
self.coord_size = action_cfg['coord_size']
316+
317+
self.kpt_predictor = KeyPointDetector(
318+
kpt_model_dir,
319+
device,
320+
run_mode,
321+
kpt_batch_size,
322+
trt_min_shape,
323+
trt_max_shape,
324+
trt_opt_shape,
325+
trt_calib_mode,
326+
cpu_threads,
327+
enable_mkldnn,
328+
use_dark=False)
329+
self.kpt_collector = KeyPointCollector(action_frames)
330+
331+
self.action_predictor = ActionRecognizer(
332+
action_model_dir,
333+
device,
334+
run_mode,
335+
action_batch_size,
336+
trt_min_shape,
337+
trt_max_shape,
338+
trt_opt_shape,
339+
trt_calib_mode,
340+
cpu_threads,
341+
enable_mkldnn,
342+
window_size=action_frames)
343+
344+
self.action_visual_collector = ActionVisualCollector(
345+
display_frames)
305346

306347
def set_file_name(self, path):
307348
self.file_name = os.path.split(path)[-1]
@@ -412,7 +453,8 @@ def predict_video(self, capture):
412453

413454
self.pipeline_res.update(mot_res, 'mot')
414455
if self.with_attr or self.with_action:
415-
crop_input = crop_image_with_mot(frame, mot_res)
456+
crop_input, new_bboxes, ori_bboxes = crop_image_with_mot(
457+
frame, mot_res)
416458

417459
if self.with_attr:
418460
if frame_id > self.warmup_frame:
@@ -424,17 +466,34 @@ def predict_video(self, capture):
424466
self.pipeline_res.update(attr_res, 'attr')
425467

426468
if self.with_action:
427-
kpt_result = self.kpt_predictor.predict_image(crop_input)
428-
self.pipeline_res.update(kpt_result, 'kpt')
429-
430-
self.kpt_collector.update(kpt_result) # collect kpt output
431-
state = self.kpt_collector.state() # whether frame num is enough
432-
469+
kpt_pred = self.kpt_predictor.predict_image(
470+
crop_input, visual=False)
471+
keypoint_vector, score_vector = translate_to_ori_images(
472+
kpt_pred, np.array(new_bboxes))
473+
kpt_res = {}
474+
kpt_res['keypoint'] = [
475+
keypoint_vector.tolist(), score_vector.tolist()
476+
] if len(keypoint_vector) > 0 else [[], []]
477+
kpt_res['bbox'] = ori_bboxes
478+
self.pipeline_res.update(kpt_res, 'kpt')
479+
480+
self.kpt_collector.update(kpt_res,
481+
mot_res) # collect kpt output
482+
state = self.kpt_collector.get_state(
483+
) # whether frame num is enough or lost tracker
484+
485+
action_res = {}
433486
if state:
434-
action_input = self.kpt_collector.collate(
435-
) # reorgnize kpt output in ID
436-
action_res = self.action_predictor.predict_kpt(action_input)
437-
self.pipeline_res.update(action, 'action')
487+
collected_keypoint = self.kpt_collector.get_collected_keypoint(
488+
) # reoragnize kpt output with ID
489+
action_input = parse_mot_keypoint(collected_keypoint,
490+
self.coord_size)
491+
action_res = self.action_predictor.predict_skeleton_with_mot(
492+
action_input)
493+
self.pipeline_res.update(action_res, 'action')
494+
495+
if self.cfg['visual']:
496+
self.action_visual_collector.update(action_res)
438497

439498
if frame_id > self.warmup_frame:
440499
self.pipe_timer.img_num += 1
@@ -474,6 +533,19 @@ def visualize_video(self, image, result, frame_id, fps):
474533
image = visualize_attr(image, attr_res, boxes)
475534
image = np.array(image)
476535

536+
kpt_res = result.get('kpt')
537+
if kpt_res is not None:
538+
image = visualize_pose(
539+
image,
540+
kpt_res,
541+
visual_thresh=self.cfg['kpt_thresh'],
542+
returnimg=True)
543+
544+
action_res = result.get('action')
545+
if action_res is not None:
546+
image = visualize_action(image, mot_res['boxes'],
547+
self.action_visual_collector, "Falling")
548+
477549
return image
478550

479551
def visualize_image(self, im_files, images, result):

0 commit comments

Comments
 (0)