[npu-tipc] fix npu tipc (PaddlePaddle#8196)

YanhuiDua · web-flow · commit 6042fcd802c2 · 2023-05-08T19:42:53.000+08:00
* add npu inference support

* change aligned=false for npu

* fix typo
diff --git a/deploy/pipeline/pipeline.py b/deploy/pipeline/pipeline.py
@@ -1315,7 +1315,7 @@ def main():
     parser = argsparser()
     FLAGS = parser.parse_args()
     FLAGS.device = FLAGS.device.upper()
-    assert FLAGS.device in ['CPU', 'GPU', 'XPU'
-                            ], "device should be CPU, GPU or XPU"
+    assert FLAGS.device in ['CPU', 'GPU', 'XPU', 'NPU'
+                            ], "device should be CPU, GPU, XPU or NPU"
 
     main()
diff --git a/deploy/pipeline/pphuman/action_infer.py b/deploy/pipeline/pphuman/action_infer.py
@@ -41,7 +41,7 @@ class SkeletonActionRecognizer(Detector):
     """
     Args:
         model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
-        device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
+        device (str): Choose the device you want to run, it can be: CPU/GPU/XPU/NPU, default is CPU
         run_mode (str): mode of running(paddle/trt_fp32/trt_fp16)
         batch_size (int): size of pre batch in inference
         trt_min_shape (int): min shape for dynamic shape in trt
@@ -285,7 +285,7 @@ class DetActionRecognizer(object):
     """
     Args:
         model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
-        device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
+        device (str): Choose the device you want to run, it can be: CPU/GPU/XPU/NPU, default is CPU
         run_mode (str): mode of running(paddle/trt_fp32/trt_fp16)
         batch_size (int): size of pre batch in inference
         trt_min_shape (int): min shape for dynamic shape in trt
@@ -454,7 +454,7 @@ class ClsActionRecognizer(AttrDetector):
     """
     Args:
         model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
-        device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
+        device (str): Choose the device you want to run, it can be: CPU/GPU/XPU/NPU, default is CPU
         run_mode (str): mode of running(paddle/trt_fp32/trt_fp16)
         batch_size (int): size of pre batch in inference
         trt_min_shape (int): min shape for dynamic shape in trt
@@ -684,8 +684,8 @@ def main():
     FLAGS = parser.parse_args()
     print_arguments(FLAGS)
     FLAGS.device = FLAGS.device.upper()
-    assert FLAGS.device in ['CPU', 'GPU', 'XPU'
-                            ], "device should be CPU, GPU or XPU"
+    assert FLAGS.device in ['CPU', 'GPU', 'XPU', 'NPU'
+                            ], "device should be CPU, GPU, NPU or XPU"
     assert not FLAGS.use_gpu, "use_gpu has been deprecated, please use --device"
 
     main()
diff --git a/deploy/pipeline/pphuman/attr_infer.py b/deploy/pipeline/pphuman/attr_infer.py
@@ -42,7 +42,7 @@ class AttrDetector(Detector):
     """
     Args:
         model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
-        device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
+        device (str): Choose the device you want to run, it can be: CPU/GPU/XPU/NPU, default is CPU
         run_mode (str): mode of running(paddle/trt_fp32/trt_fp16)
         batch_size (int): size of pre batch in inference
         trt_min_shape (int): min shape for dynamic shape in trt
@@ -341,8 +341,8 @@ def main():
     FLAGS = parser.parse_args()
     print_arguments(FLAGS)
     FLAGS.device = FLAGS.device.upper()
-    assert FLAGS.device in ['CPU', 'GPU', 'XPU'
-                            ], "device should be CPU, GPU or XPU"
+    assert FLAGS.device in ['CPU', 'GPU', 'XPU', 'NPU'
+                            ], "device should be CPU, GPU, XPU or NPU"
     assert not FLAGS.use_gpu, "use_gpu has been deprecated, please use --device"
 
     main()
diff --git a/deploy/pipeline/pphuman/reid.py b/deploy/pipeline/pphuman/reid.py
@@ -32,7 +32,7 @@ class ReID(object):
     Args:
         pred_config (object): config of model, defined by `Config(model_dir)`
         model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
-        device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
+        device (str): Choose the device you want to run, it can be: CPU/GPU/XPU/NPU, default is CPU
         run_mode (str): mode of running(paddle/trt_fp32/trt_fp16)
         batch_size (int): size of per batch in inference, default 50 means at most
             50 sub images can be made a batch and send into ReID model
diff --git a/deploy/pipeline/pphuman/video_action_infer.py b/deploy/pipeline/pphuman/video_action_infer.py
@@ -47,7 +47,7 @@ class VideoActionRecognizer(object):
     """
     Args:
         model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
-        device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
+        device (str): Choose the device you want to run, it can be: CPU/GPU/XPU/NPU, default is CPU
         run_mode (str): mode of running(paddle/trt_fp32/trt_fp16)
         batch_size (int): size of pre batch in inference
         trt_min_shape (int): min shape for dynamic shape in trt
@@ -105,6 +105,10 @@ def __init__(self,
 
         if device == "GPU" or device == "gpu":
             self.config.enable_use_gpu(8000, 0)
+        elif device == "XPU" or device == "xpu":
+            self.config.enable_xpu(10 * 1024 * 1024)
+        elif device == "NPU" or device == "npu":
+            self.config.enable_custom_device('npu')
         else:
             self.config.disable_gpu()
         if self.enable_mkldnn:
@@ -308,7 +312,7 @@ def main():
     FLAGS = parser.parse_args()
     print_arguments(FLAGS)
     FLAGS.device = FLAGS.device.upper()
-    assert FLAGS.device in ['CPU', 'GPU', 'XPU'
-                            ], "device should be CPU, GPU or XPU"
+    assert FLAGS.device in ['CPU', 'GPU', 'XPU', 'NPU'
+                            ], "device should be CPU, GPU, XPU or NPU"
 
     main()
diff --git a/deploy/pipeline/ppvehicle/vehicle_attr.py b/deploy/pipeline/ppvehicle/vehicle_attr.py
@@ -41,7 +41,7 @@ class VehicleAttr(AttrDetector):
     """
     Args:
         model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
-        device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
+        device (str): Choose the device you want to run, it can be: CPU/GPU/XPU/NPU, default is CPU
         run_mode (str): mode of running(paddle/trt_fp32/trt_fp16)
         batch_size (int): size of pre batch in inference
         trt_min_shape (int): min shape for dynamic shape in trt
@@ -143,8 +143,8 @@ def postprocess(self, inputs, result):
     FLAGS = parser.parse_args()
     print_arguments(FLAGS)
     FLAGS.device = FLAGS.device.upper()
-    assert FLAGS.device in ['CPU', 'GPU', 'XPU'
-                            ], "device should be CPU, GPU or XPU"
+    assert FLAGS.device in ['CPU', 'GPU', 'XPU', 'NPU'
+                            ], "device should be CPU, GPU, NPU or XPU"
     assert not FLAGS.use_gpu, "use_gpu has been deprecated, please use --device"
 
     main()
diff --git a/deploy/pipeline/ppvehicle/vehicle_plate.py b/deploy/pipeline/ppvehicle/vehicle_plate.py
@@ -325,7 +325,7 @@ def main():
     parser = argsparser()
     FLAGS = parser.parse_args()
     FLAGS.device = FLAGS.device.upper()
-    assert FLAGS.device in ['CPU', 'GPU', 'XPU'
-                            ], "device should be CPU, GPU or XPU"
+    assert FLAGS.device in ['CPU', 'GPU', 'XPU', 'NPU'
+                            ], "device should be CPU, GPU, NPU or XPU"
 
     main()
diff --git a/deploy/pptracking/python/det_infer.py b/deploy/pptracking/python/det_infer.py
@@ -70,7 +70,7 @@ class Detector(object):
     Args:
         pred_config (object): config of model, defined by `Config(model_dir)`
         model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
-        device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
+        device (str): Choose the device you want to run, it can be: CPU/GPU/XPU/NPU, default is CPU
         run_mode (str): mode of running(paddle/trt_fp32/trt_fp16)
         batch_size (int): size of pre batch in inference
         trt_min_shape (int): min shape for dynamic shape in trt
@@ -400,7 +400,7 @@ def load_predictor(model_dir,
     """set AnalysisConfig, generate AnalysisPredictor
     Args:
         model_dir (str): root path of __model__ and __params__
-        device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
+        device (str): Choose the device you want to run, it can be: CPU/GPU/XPU/NPU, default is CPU
         run_mode (str): mode of running(paddle/trt_fp32/trt_fp16/trt_int8)
         use_dynamic_shape (bool): use dynamic shape or not
         trt_min_shape (int): min shape for dynamic shape in trt
@@ -432,8 +432,13 @@ def load_predictor(model_dir,
         # optimize graph and fuse op
         config.switch_ir_optim(True)
     elif device == 'XPU':
-        config.enable_lite_engine()
+        if config.lite_engine_enabled():
+            config.enable_lite_engine()
         config.enable_xpu(10 * 1024 * 1024)
+    elif device == 'NPU':
+        if config.lite_engine_enabled():
+            config.enable_lite_engine()
+        config.enable_custom_device('npu')
     else:
         config.disable_gpu()
         config.set_cpu_math_library_num_threads(cpu_threads)
diff --git a/deploy/python/keypoint_infer.py b/deploy/python/keypoint_infer.py
@@ -50,7 +50,7 @@ class KeyPointDetector(Detector):
     """
     Args:
         model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
-        device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
+        device (str): Choose the device you want to run, it can be: CPU/GPU/XPU/NPU, default is CPU
         run_mode (str): mode of running(paddle/trt_fp32/trt_fp16)
         batch_size (int): size of pre batch in inference
         trt_min_shape (int): min shape for dynamic shape in trt
@@ -408,8 +408,8 @@ def main():
     FLAGS = parser.parse_args()
     print_arguments(FLAGS)
     FLAGS.device = FLAGS.device.upper()
-    assert FLAGS.device in ['CPU', 'GPU', 'XPU'
-                            ], "device should be CPU, GPU or XPU"
+    assert FLAGS.device in ['CPU', 'GPU', 'XPU', 'NPU'
+                            ], "device should be CPU, GPU, XPU or NPU"
     assert not FLAGS.use_gpu, "use_gpu has been deprecated, please use --device"
 
     main()
diff --git a/deploy/python/mot_centertrack_infer.py b/deploy/python/mot_centertrack_infer.py
@@ -65,7 +65,7 @@ class CenterTrack(Detector):
     """
     Args:
         model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
-        device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
+        device (str): Choose the device you want to run, it can be: CPU/GPU/XPU/NPU, default is CPU
         run_mode (str): mode of running(paddle/trt_fp32/trt_fp16)
         batch_size (int): size of pre batch in inference
         trt_min_shape (int): min shape for dynamic shape in trt
@@ -130,7 +130,7 @@ def __init__(
             vertical_ratio=vertical_ratio,
             track_thresh=track_thresh,
             pre_thresh=pre_thresh)
-    
+
         self.pre_image = None
 
     def get_additional_inputs(self, dets, meta, with_hm=True):
@@ -173,19 +173,18 @@ def preprocess(self, image_list):
         #inputs = create_inputs(im, im_info)
         inputs = {}
         inputs['image'] = np.array((im, )).astype('float32')
-        inputs['im_shape'] = np.array(
-            (im_info['im_shape'], )).astype('float32')
+        inputs['im_shape'] = np.array((im_info['im_shape'], )).astype('float32')
         inputs['scale_factor'] = np.array(
             (im_info['scale_factor'], )).astype('float32')
-        
+
         inputs['trans_input'] = im_info['trans_input']
         inputs['inp_width'] = im_info['inp_width']
         inputs['inp_height'] = im_info['inp_height']
         inputs['center'] = im_info['center']
         inputs['scale'] = im_info['scale']
         inputs['out_height'] = im_info['out_height']
         inputs['out_width'] = im_info['out_width']
-        
+
         if self.pre_image is None:
             self.pre_image = inputs['image']
             # initializing tracker for the first frame
@@ -196,7 +195,7 @@ def preprocess(self, image_list):
         # render input heatmap from tracker status
         pre_hm = self.get_additional_inputs(
             self.tracker.tracks, inputs, with_hm=True)
-        inputs['pre_hm'] = pre_hm #.to_tensor(pre_hm)
+        inputs['pre_hm'] = pre_hm  #.to_tensor(pre_hm)
 
         input_names = self.predictor.get_input_names()
         for i in range(len(input_names)):
@@ -256,8 +255,8 @@ def centertrack_post_process(self, dets, meta, out_thresh):
         return preds
 
     def tracking(self, inputs, det_results):
-        result = self.centertrack_post_process(
-            det_results, inputs, self.tracker.out_thresh)
+        result = self.centertrack_post_process(det_results, inputs,
+                                               self.tracker.out_thresh)
         online_targets = self.tracker.update(result)
 
         online_tlwhs, online_scores, online_ids = [], [], []
@@ -292,10 +291,7 @@ def predict(self, repeats=1):
             tracking_tensor = self.predictor.get_output_handle(output_names[2])
             np_tracking = tracking_tensor.copy_to_cpu()
 
-        result = dict(
-            bboxes=np_bboxes,
-            cts=np_cts,
-            tracking=np_tracking)
+        result = dict(bboxes=np_bboxes, cts=np_cts, tracking=np_tracking)
         return result
 
     def predict_image(self,
@@ -333,8 +329,8 @@ def predict_image(self,
                 # tracking
                 result_warmup = self.tracking(inputs, det_result)
                 self.det_times.tracking_time_s.start()
-                online_tlwhs, online_scores, online_ids = self.tracking(inputs,
-                    det_result)
+                online_tlwhs, online_scores, online_ids = self.tracking(
+                    inputs, det_result)
                 self.det_times.tracking_time_s.end()
                 self.det_times.img_num += 1
 
@@ -358,8 +354,8 @@ def predict_image(self,
 
                 # tracking process
                 self.det_times.tracking_time_s.start()
-                online_tlwhs, online_scores, online_ids = self.tracking(inputs, 
-                    det_result)
+                online_tlwhs, online_scores, online_ids = self.tracking(
+                    inputs, det_result)
                 self.det_times.tracking_time_s.end()
                 self.det_times.img_num += 1
 
@@ -499,7 +495,7 @@ def main():
     FLAGS = parser.parse_args()
     print_arguments(FLAGS)
     FLAGS.device = FLAGS.device.upper()
-    assert FLAGS.device in ['CPU', 'GPU', 'XPU'
-                            ], "device should be CPU, GPU or XPU"
+    assert FLAGS.device in ['CPU', 'GPU', 'XPU', 'NPU'
+                            ], "device should be CPU, GPU, NPU or XPU"
 
     main()
diff --git a/deploy/python/mot_jde_infer.py b/deploy/python/mot_jde_infer.py
@@ -45,7 +45,7 @@ class JDE_Detector(Detector):
     """
     Args:
         model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
-        device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
+        device (str): Choose the device you want to run, it can be: CPU/GPU/XPU/NPU, default is CPU
         run_mode (str): mode of running(paddle/trt_fp32/trt_fp16)
         batch_size (int): size of pre batch in inference
         trt_min_shape (int): min shape for dynamic shape in trt
@@ -375,7 +375,7 @@ def main():
     FLAGS = parser.parse_args()
     print_arguments(FLAGS)
     FLAGS.device = FLAGS.device.upper()
-    assert FLAGS.device in ['CPU', 'GPU', 'XPU'
-                            ], "device should be CPU, GPU or XPU"
+    assert FLAGS.device in ['CPU', 'GPU', 'XPU', 'NPU'
+                            ], "device should be CPU, GPU, NPU or XPU"
 
     main()
diff --git a/deploy/python/mot_keypoint_unite_infer.py b/deploy/python/mot_keypoint_unite_infer.py
@@ -295,7 +295,7 @@ def main():
     FLAGS = parser.parse_args()
     print_arguments(FLAGS)
     FLAGS.device = FLAGS.device.upper()
-    assert FLAGS.device in ['CPU', 'GPU', 'XPU'
-                            ], "device should be CPU, GPU or XPU"
+    assert FLAGS.device in ['CPU', 'GPU', 'XPU', 'NPU'
+                            ], "device should be CPU, GPU, NPU or XPU"
 
     main()
diff --git a/deploy/python/mot_keypoint_unite_utils.py b/deploy/python/mot_keypoint_unite_utils.py
@@ -78,7 +78,7 @@ def argsparser():
         "--device",
         type=str,
         default='cpu',
-        help="Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU."
+        help="Choose the device you want to run, it can be: CPU/GPU/XPU/NPU, default is CPU."
     )
     parser.add_argument(
         "--run_benchmark",
diff --git a/deploy/python/mot_sde_infer.py b/deploy/python/mot_sde_infer.py
@@ -40,7 +40,7 @@ class SDE_Detector(Detector):
     Args:
         model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
         tracker_config (str): tracker config path
-        device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
+        device (str): Choose the device you want to run, it can be: CPU/GPU/XPU/NPU, default is CPU
         run_mode (str): mode of running(paddle/trt_fp32/trt_fp16)
         batch_size (int): size of pre batch in inference
         trt_min_shape (int): min shape for dynamic shape in trt
@@ -516,7 +516,7 @@ def main():
     FLAGS = parser.parse_args()
     print_arguments(FLAGS)
     FLAGS.device = FLAGS.device.upper()
-    assert FLAGS.device in ['CPU', 'GPU', 'XPU'
-                            ], "device should be CPU, GPU or XPU"
+    assert FLAGS.device in ['CPU', 'GPU', 'XPU', 'NPU'
+                            ], "device should be CPU, GPU, NPU or XPU"
 
     main()
diff --git a/deploy/python/utils.py b/deploy/python/utils.py
@@ -64,7 +64,7 @@ def argsparser():
         "--device",
         type=str,
         default='cpu',
-        help="Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU."
+        help="Choose the device you want to run, it can be: CPU/GPU/XPU/NPU, default is CPU."
     )
     parser.add_argument(
         "--use_gpu",
diff --git a/test_tipc/test_train_inference_python_npu.sh b/test_tipc/test_train_inference_python_npu.sh
@@ -49,8 +49,8 @@ grep -n '.yml' $FILENAME  | cut -d ":" -f 1 \
 | while read line_num ; do 
     train_cmd=$(func_parser_value "${lines[line_num-1]}")
     trainer_config=$(func_parser_config ${train_cmd})
-    echo ${trainer_config}
     sed -i 's/use_gpu/use_npu/g' "$REPO_ROOT_PATH/$trainer_config"
+    sed -i 's/aligned: True/aligned: False/g' "$REPO_ROOT_PATH/$trainer_config"
     # fine use_gpu in those included yaml
     sub_datalinee=`cat $REPO_ROOT_PATH/$trainer_config`
     IFS=$'\n'
@@ -60,9 +60,10 @@ grep -n '.yml' $FILENAME  | cut -d ":" -f 1 \
         sub_config=${sub_lines[sub_line_num-1]} 
         dst=${#sub_config}-5
         sub_path=$(func_parser_dir "${trainer_config}")
-        sub_config_path="${REPO_ROOT_PATH}${sub_path}/${sub_config:3:${dst}}"
-        echo ${sub_config_path}
+        sub_config_name=$(echo "$sub_config" | awk -F"'" '{ print $2 }')
+        sub_config_path="${REPO_ROOT_PATH}${sub_path}/${sub_config_name}"
         sed -i 's/use_gpu/use_npu/g' "$sub_config_path"
+        sed -i 's/aligned: True/aligned: False/g' "$sub_config_path"
     done
 done
 # pass parameters to test_train_inference_python.sh

Original file line number	Diff line number	Diff line change
`@@ -78,7 +78,7 @@ def argsparser():`
`78`	`78`	`"--device",`
`79`	`79`	`type=str,`
`80`	`80`	`default='cpu',`
`81`		`- help="Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU."`
	`81`	`+ help="Choose the device you want to run, it can be: CPU/GPU/XPU/NPU, default is CPU."`
`82`	`82`	`)`
`83`	`83`	`parser.add_argument(`
`84`	`84`	`"--run_benchmark",`
Original file line number	Diff line number	Diff line change
`@@ -64,7 +64,7 @@ def argsparser():`
`64`	`64`	`"--device",`
`65`	`65`	`type=str,`
`66`	`66`	`default='cpu',`
`67`		`- help="Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU."`
	`67`	`+ help="Choose the device you want to run, it can be: CPU/GPU/XPU/NPU, default is CPU."`
`68`	`68`	`)`
`69`	`69`	`parser.add_argument(`
`70`	`70`	`"--use_gpu",`