add vehicle attr model into pipeline (PaddlePaddle#6274)

zoooo0820 · web-flow · commit 2f06ad8afe06 · 2022-06-28T16:58:29.000+08:00
* add vehicle attr inti pipeline

* fix in no-rgb in predict_video
diff --git a/deploy/pphuman/config/infer_cfg_ppvehicle.yml b/deploy/pphuman/config/infer_cfg_ppvehicle.yml
@@ -1,6 +1,4 @@
 crop_thresh: 0.5
-attr_thresh: 0.5
-kpt_thresh: 0.2
 visual: True
 warmup_frame: 50
 
@@ -24,12 +22,14 @@ VEHICLE_PLATE:
   rec_batch_num: 6
   word_dict_path: deploy/pphuman/ppvehicle/rec_word_dict.txt
   basemode: "idbased"
-  enable: True
+  enable: False
 
-ATTR:
-  model_dir: output_inference/strongbaseline_r50_30e/
+VEHICLE_ATTR:
+  model_dir: output_inference/vehicle_attribute_infer/
   batch_size: 8
   basemode: "idbased"
+  color_threshold: 0.5
+  type_threshold: 0.5
   enable: False
 
 REID:
diff --git a/deploy/pphuman/datacollector.py b/deploy/pphuman/datacollector.py
@@ -28,7 +28,8 @@ def __init__(self):
             'reid': dict(),
             'det_action': dict(),
             'cls_action': dict(),
-            'vehicleplate': dict()
+            'vehicleplate': dict(),
+            'vehicle_attr': dict()
         }
 
     def update(self, res, name):
diff --git a/deploy/pphuman/pipe_utils.py b/deploy/pphuman/pipe_utils.py
@@ -156,7 +156,8 @@ def __init__(self):
             'skeleton_action': Times(),
             'reid': Times(),
             'det_action': Times(),
-            'cls_action': Times()
+            'cls_action': Times(),
+            'vehicle_attr': Times()
         }
         self.img_num = 0
 
diff --git a/deploy/pphuman/pipeline.py b/deploy/pphuman/pipeline.py
@@ -27,7 +27,6 @@
 from reid import ReID
 from datacollector import DataCollector, Result
 from mtmct import mtmct_process
-from ppvehicle.vehicle_plate import PlateRecognizer
 
 # add deploy path of PadleDetection to sys.path
 parent_path = os.path.abspath(os.path.join(__file__, *(['..'] * 2)))
@@ -51,6 +50,9 @@
 from pptracking.python.mot.visualize import plot_tracking_dict
 from pptracking.python.mot.utils import flow_statistic
 
+from ppvehicle.vehicle_plate import PlateRecognizer
+from ppvehicle.vehicle_attr import VehicleAttr
+
 
 class Pipeline(object):
     """
@@ -224,12 +226,12 @@ def __init__(self, args, cfg, is_video=True, multi_camera=False):
         # general module for pphuman and ppvehicle
         self.with_mot = cfg.get('MOT', False)['enable'] if cfg.get(
             'MOT', False) else False
-        self.with_attr = cfg.get('ATTR', False)['enable'] if cfg.get(
+        self.with_human_attr = cfg.get('ATTR', False)['enable'] if cfg.get(
             'ATTR', False) else False
         if self.with_mot:
             print('Multi-Object Tracking enabled')
-        if self.with_attr:
-            print('Attribute Recognition enabled')
+        if self.with_human_attr:
+            print('Human Attribute Recognition enabled')
 
         # only for pphuman
         self.with_skeleton_action = cfg.get(
@@ -265,6 +267,12 @@ def __init__(self, args, cfg, is_video=True, multi_camera=False):
         if self.with_vehicleplate:
             print('Vehicle Plate Recognition enabled')
 
+        self.with_vehicle_attr = cfg.get(
+            'VEHICLE_ATTR', False)['enable'] if cfg.get('VEHICLE_ATTR',
+                                                        False) else False
+        if self.with_vehicle_attr:
+            print('Vehicle Attribute Recognition enabled')
+
         self.modebase = {
             "framebased": False,
             "videobased": False,
@@ -294,7 +302,7 @@ def __init__(self, args, cfg, is_video=True, multi_camera=False):
                 model_dir, device, run_mode, batch_size, trt_min_shape,
                 trt_max_shape, trt_opt_shape, trt_calib_mode, cpu_threads,
                 enable_mkldnn)
-            if self.with_attr:
+            if self.with_human_attr:
                 attr_cfg = self.cfg['ATTR']
                 model_dir = attr_cfg['model_dir']
                 batch_size = attr_cfg['batch_size']
@@ -305,8 +313,21 @@ def __init__(self, args, cfg, is_video=True, multi_camera=False):
                     trt_max_shape, trt_opt_shape, trt_calib_mode, cpu_threads,
                     enable_mkldnn)
 
+            if self.with_vehicle_attr:
+                vehicleattr_cfg = self.cfg['VEHICLE_ATTR']
+                model_dir = vehicleattr_cfg['model_dir']
+                batch_size = vehicleattr_cfg['batch_size']
+                color_threshold = vehicleattr_cfg['color_threshold']
+                type_threshold = vehicleattr_cfg['type_threshold']
+                basemode = vehicleattr_cfg['basemode']
+                self.modebase[basemode] = True
+                self.vehicle_attr_predictor = VehicleAttr(
+                    model_dir, device, run_mode, batch_size, trt_min_shape,
+                    trt_max_shape, trt_opt_shape, trt_calib_mode, cpu_threads,
+                    enable_mkldnn, color_threshold, type_threshold)
+
         else:
-            if self.with_attr:
+            if self.with_human_attr:
                 attr_cfg = self.cfg['ATTR']
                 model_dir = attr_cfg['model_dir']
                 batch_size = attr_cfg['batch_size']
@@ -412,6 +433,19 @@ def __init__(self, args, cfg, is_video=True, multi_camera=False):
                 basemode = vehicleplate_cfg['basemode']
                 self.modebase[basemode] = True
 
+            if self.with_vehicle_attr:
+                vehicleattr_cfg = self.cfg['VEHICLE_ATTR']
+                model_dir = vehicleattr_cfg['model_dir']
+                batch_size = vehicleattr_cfg['batch_size']
+                color_threshold = vehicleattr_cfg['color_threshold']
+                type_threshold = vehicleattr_cfg['type_threshold']
+                basemode = vehicleattr_cfg['basemode']
+                self.modebase[basemode] = True
+                self.vehicle_attr_predictor = VehicleAttr(
+                    model_dir, device, run_mode, batch_size, trt_min_shape,
+                    trt_max_shape, trt_opt_shape, trt_calib_mode, cpu_threads,
+                    enable_mkldnn, color_threshold, type_threshold)
+
             if self.with_mot or self.modebase["idbased"] or self.modebase[
                     "skeletonbased"]:
                 mot_cfg = self.cfg['MOT']
@@ -510,7 +544,7 @@ def predict_image(self, input):
                 self.pipe_timer.module_time['det'].end()
             self.pipeline_res.update(det_res, 'det')
 
-            if self.with_attr:
+            if self.with_human_attr:
                 crop_inputs = crop_image_with_det(batch_input, det_res)
                 attr_res_list = []
 
@@ -528,6 +562,24 @@ def predict_image(self, input):
                 attr_res = {'output': attr_res_list}
                 self.pipeline_res.update(attr_res, 'attr')
 
+            if self.with_vehicle_attr:
+                crop_inputs = crop_image_with_det(batch_input, det_res)
+                vehicle_attr_res_list = []
+
+                if i > self.warmup_frame:
+                    self.pipe_timer.module_time['vehicle_attr'].start()
+
+                for crop_input in crop_inputs:
+                    attr_res = self.vehicle_attr_predictor.predict_image(
+                        crop_input, visual=False)
+                    vehicle_attr_res_list.extend(attr_res['output'])
+
+                if i > self.warmup_frame:
+                    self.pipe_timer.module_time['vehicle_attr'].end()
+
+                attr_res = {'output': vehicle_attr_res_list}
+                self.pipeline_res.update(attr_res, 'vehicle_attr')
+
             self.pipe_timer.img_num += len(batch_input)
             if i > self.warmup_frame:
                 self.pipe_timer.total_time.end()
@@ -581,13 +633,14 @@ def predict_video(self, video_file):
             ret, frame = capture.read()
             if not ret:
                 break
+            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
 
             if self.modebase["idbased"] or self.modebase["skeletonbased"]:
                 if frame_id > self.warmup_frame:
                     self.pipe_timer.total_time.start()
                     self.pipe_timer.module_time['mot'].start()
                 res = self.mot_predictor.predict_image(
-                    [copy.deepcopy(frame)], visual=False)
+                    [copy.deepcopy(frame_rgb)], visual=False)
 
                 if frame_id > self.warmup_frame:
                     self.pipe_timer.module_time['mot'].end()
@@ -625,14 +678,14 @@ def predict_video(self, video_file):
 
                 self.pipeline_res.update(mot_res, 'mot')
                 crop_input, new_bboxes, ori_bboxes = crop_image_with_mot(
-                    frame, mot_res)
+                    frame_rgb, mot_res)
 
                 if self.with_vehicleplate:
                     platelicense = self.vehicleplate_detector.get_platelicense(
                         crop_input)
                     self.pipeline_res.update(platelicense, 'vehicleplate')
 
-                if self.with_attr:
+                if self.with_human_attr:
                     if frame_id > self.warmup_frame:
                         self.pipe_timer.module_time['attr'].start()
                     attr_res = self.attr_predictor.predict_image(
@@ -641,6 +694,15 @@ def predict_video(self, video_file):
                         self.pipe_timer.module_time['attr'].end()
                     self.pipeline_res.update(attr_res, 'attr')
 
+                if self.with_vehicle_attr:
+                    if frame_id > self.warmup_frame:
+                        self.pipe_timer.module_time['vehicle_attr'].start()
+                    attr_res = self.vehicle_attr_predictor.predict_image(
+                        crop_input, visual=False)
+                    if frame_id > self.warmup_frame:
+                        self.pipe_timer.module_time['vehicle_attr'].end()
+                    self.pipeline_res.update(attr_res, 'vehicle_attr')
+
                 if self.with_idbased_detaction:
                     if frame_id > self.warmup_frame:
                         self.pipe_timer.module_time['det_action'].start()
@@ -708,7 +770,7 @@ def predict_video(self, video_file):
 
                 if self.with_mtmct and frame_id % 10 == 0:
                     crop_input, img_qualities, rects = self.reid_predictor.crop_image_with_mot(
-                        frame, mot_res)
+                        frame_rgb, mot_res)
                     if frame_id > self.warmup_frame:
                         self.pipe_timer.module_time['reid'].start()
                     reid_res = self.reid_predictor.predict_batch(crop_input)
@@ -740,7 +802,7 @@ def predict_video(self, video_file):
                 # collect frames
                 if frame_id % sample_freq == 0:
                     # Scale image
-                    scaled_img = scale(frame)
+                    scaled_img = scale(frame_rgb)
                     video_action_imgs.append(scaled_img)
 
                 # the number of collected frames is enough to predict video action
@@ -820,11 +882,18 @@ def visualize_video(self,
             records=records,
             center_traj=center_traj)
 
-        attr_res = result.get('attr')
-        if attr_res is not None:
+        human_attr_res = result.get('attr')
+        if human_attr_res is not None:
+            boxes = mot_res['boxes'][:, 1:]
+            human_attr_res = human_attr_res['output']
+            image = visualize_attr(image, human_attr_res, boxes)
+            image = np.array(image)
+
+        vehicle_attr_res = result.get('vehicle_attr')
+        if vehicle_attr_res is not None:
             boxes = mot_res['boxes'][:, 1:]
-            attr_res = attr_res['output']
-            image = visualize_attr(image, attr_res, boxes)
+            vehicle_attr_res = vehicle_attr_res['output']
+            image = visualize_attr(image, vehicle_attr_res, boxes)
             image = np.array(image)
 
         vehicleplate_res = result.get('vehicleplate')
@@ -883,7 +952,9 @@ def visualize_video(self,
     def visualize_image(self, im_files, images, result):
         start_idx, boxes_num_i = 0, 0
         det_res = result.get('det')
-        attr_res = result.get('attr')
+        human_attr_res = result.get('attr')
+        vehicle_attr_res = result.get('vehicle_attr')
+
         for i, (im_file, im) in enumerate(zip(im_files, images)):
             if det_res is not None:
                 det_res_i = {}
@@ -897,10 +968,15 @@ def visualize_image(self, im_files, images, result):
                     threshold=self.cfg['crop_thresh'])
                 im = np.ascontiguousarray(np.copy(im))
                 im = cv2.cvtColor(im, cv2.COLOR_RGB2BGR)
-            if attr_res is not None:
-                attr_res_i = attr_res['output'][start_idx:start_idx +
-                                                boxes_num_i]
-                im = visualize_attr(im, attr_res_i, det_res_i['boxes'])
+            if human_attr_res is not None:
+                human_attr_res_i = human_attr_res['output'][start_idx:start_idx
+                                                            + boxes_num_i]
+                im = visualize_attr(im, human_attr_res_i, det_res_i['boxes'])
+            if vehicle_attr_res is not None:
+                vehicle_attr_res_i = vehicle_attr_res['output'][
+                    start_idx:start_idx + boxes_num_i]
+                im = visualize_attr(im, vehicle_attr_res_i, det_res_i['boxes'])
+
             img_name = os.path.split(im_file)[-1]
             if not os.path.exists(self.output_dir):
                 os.makedirs(self.output_dir)
diff --git a/deploy/pphuman/ppvehicle/vehicle_attr.py b/deploy/pphuman/ppvehicle/vehicle_attr.py

Original file line number	Diff line number	Diff line change
`@@ -28,7 +28,8 @@ def __init__(self):`
`28`	`28`	`'reid': dict(),`
`29`	`29`	`'det_action': dict(),`
`30`	`30`	`'cls_action': dict(),`
`31`		`- 'vehicleplate': dict()`
	`31`	`+ 'vehicleplate': dict(),`
	`32`	`+ 'vehicle_attr': dict()`
`32`	`33`	`}`
`33`	`34`
`34`	`35`	`def update(self, res, name):`
Original file line number	Diff line number	Diff line change
`@@ -156,7 +156,8 @@ def __init__(self):`
`156`	`156`	`'skeleton_action': Times(),`
`157`	`157`	`'reid': Times(),`
`158`	`158`	`'det_action': Times(),`
`159`		`- 'cls_action': Times()`
	`159`	`+ 'cls_action': Times(),`
	`160`	`+ 'vehicle_attr': Times()`
`160`	`161`	`}`
`161`	`162`	`self.img_num = 0`
`162`	`163`