最终代码

lizexu123 · lizexu123 · commit 26a6d919b79c · 2024-01-03T09:07:34.000Z
diff --git a/example/auto_compression/detection/configs/ppyoloe_l_qat_dis.yaml b/example/auto_compression/detection/configs/ppyoloe_l_qat_dis.yaml
@@ -2,7 +2,7 @@
 Global:
   reader_config: configs/yolo_reader.yml
   arch: PPYOLOE
-  include_nms: True
+  include_nms: False
   Evaluation: True
   model_dir: ./ppyoloe_crn_l_300e_coco
   model_filename: model.pdmodel
diff --git a/example/auto_compression/detection/configs/yolo_reader.yml b/example/auto_compression/detection/configs/yolo_reader.yml
@@ -6,13 +6,13 @@ TrainDataset:
   !COCODataSet
     image_dir: train2017
     anno_path: annotations/instances_train2017.json
-    dataset_dir: dataset/coco/
+    dataset_dir: /work/GETR-Lite-paddle-new/inference/datasets/coco/
 
 EvalDataset:
   !COCODataSet
     image_dir: val2017
     anno_path: annotations/instances_val2017.json
-    dataset_dir: dataset/coco/
+    dataset_dir: /work/GETR-Lite-paddle-new/inference/datasets/coco/
 
 worker_num: 0
 
diff --git a/example/auto_compression/detection/paddle_inference_eval.py b/example/auto_compression/detection/paddle_inference_eval.py
@@ -18,6 +18,7 @@
 import sys
 import cv2
 import numpy as np
+from tqdm import tqdm
 
 import paddle
 from paddle.inference import Config
@@ -85,12 +86,12 @@ def argsparser():
         type=str,
         default='True',
         help="Whether include nms or not.")
-    # 是否用来测速
     parser.add_argument(
-        '--speed',
-        type=str,
-        default='True',
-        help="if speed is True, it will print the inference time.")
+        "--trt_calib_mode",
+        type=bool,
+        default=False,
+        help="If the model is produced by TRT offline quantitative "
+        "calibration, trt_calib_mode need to set True.")
 
     return parser
 
@@ -214,8 +215,9 @@ def load_predictor(
         use_mkldnn=False,
         batch_size=1,
         device="CPU",
-        min_subgraph_size=3,
+        min_subgraph_size=4,
         use_dynamic_shape=False,
+        trt_calib_mode=False,
         trt_min_shape=1,
         trt_max_shape=1280,
         trt_opt_shape=640,
@@ -273,7 +275,7 @@ def load_predictor(
             min_subgraph_size=min_subgraph_size,
             precision_mode=precision_map[precision],
             use_static=True,
-            use_calib_mode=False, )
+            use_calib_mode=False)
 
         if use_dynamic_shape:
             dynamic_shape_file = os.path.join(FLAGS.model_path,
@@ -363,12 +365,9 @@ def eval(predictor, val_loader, metric, rerun_flag=False):
     input_names = predictor.get_input_names()
     output_names = predictor.get_output_names()
     boxes_tensor = predictor.get_output_handle(output_names[0])
-    print("output_names:", output_names)
-    print("Number of outputs:", len(output_names))
-    print("FLAGS.include_nms:", FLAGS.include_nms)
     if FLAGS.include_nms:
         boxes_num = predictor.get_output_handle(output_names[1])
-    for batch_id, data in enumerate(val_loader):
+    for batch_id, data in tqdm(enumerate(val_loader), total=len(val_loader), desc='Evaluating'):
         data_all = {k: np.array(v) for k, v in data.items()}
         for i, _ in enumerate(input_names):
             input_tensor = predictor.get_input_handle(input_names[i])
@@ -386,79 +385,26 @@ def eval(predictor, val_loader, metric, rerun_flag=False):
         time_min = min(time_min, timed)
         time_max = max(time_max, timed)
         predict_time += timed
-        # print("FLAGS.include_nms:", FLAGS.include_nms)
-        # print("FLAGS.speed:", FLAGS.speed)
-        # 如果include_nms为false且flags.speed为True，则走PPYOLOEPostProcess
-        if not FLAGS.include_nms and FLAGS.speed: 
-            # print("nms为True的时候走了PPYOLOEPostProcess")
+        if not FLAGS.include_nms:
             postprocess = PPYOLOEPostProcess(
                 score_threshold=0.3, nms_threshold=0.6)
             res = postprocess(np_boxes, data_all['scale_factor'])
-        #如果include_nms为false且flags.speed为False,则跳过
-        elif not FLAGS.include_nms and not FLAGS.speed:
-            continue
-        #如果include_nms,则直接返回
-        elif FLAGS.include_nms:
-            # print("nms为False的时候直接返回")
+        else:
             res = {'bbox': np_boxes, 'bbox_num': np_boxes_num}
         metric.update(data_all, res)
         if batch_id % 100 == 0:
-            print("Eval iter:", batch_id)
             sys.stdout.flush()
     metric.accumulate()
-    if not FLAGS.speed:
-        metric.log()
+    metric.log()
     map_res = metric.get_results()
     metric.reset()
     time_avg = predict_time / sample_nums
     print("[Benchmark]Inference time(ms): min={}, max={}, avg={}".format(
         round(time_min * 1000, 2),
         round(time_max * 1000, 1), round(time_avg * 1000, 1)))
-    if not FLAGS.speed:
-        print("[Benchmark] COCO mAP: {}".format(map_res["bbox"][0]))
+    print("[Benchmark] COCO mAP: {}".format(map_res["bbox"][0]))
     sys.stdout.flush()
 
-def inference_time(predictor, val_loader, metric, rerun_flag=False):
-    cpu_mems, gpu_mems = 0, 0
-    predict_time = 0.0
-    time_min = float("inf")
-    time_max = float("-inf")
-    sample_nums = len(val_loader)
-    input_names = predictor.get_input_names()
-    output_names = predictor.get_output_names()
-    boxes_tensor = predictor.get_output_handle(output_names[0])
-    print("output_names:", output_names)
-    print("Number of outputs:", len(output_names))
-    print("FLAGS.include_nms:", FLAGS.include_nms)
-    if FLAGS.include_nms:
-        boxes_num = predictor.get_output_handle(output_names[1])
-    
-    for batch_id, data in enumerate(val_loader):
-        data_all = {k: np.array(v) for k, v in data.items()}
-        for i, _ in enumerate(input_names):
-            input_tensor = predictor.get_input_handle(input_names[i])
-            input_tensor.copy_from_cpu(data_all[input_names[i]])
-        paddle.device.cuda.synchronize()
-        start_time = time.time()
-        predictor.run()
-        # np_boxes = boxes_tensor.copy_to_cpu()
-        if FLAGS.include_nms:
-            np_boxes_num = boxes_num.copy_to_cpu()
-        if rerun_flag:
-            return
-        end_time = time.time()
-        timed = end_time - start_time
-        time_min = min(time_min, timed)
-        time_max = max(time_max, timed)
-        predict_time += timed
-        # print("FLAGS.include_nms:", FLAGS.include_nms)
-        # print("FLAGS.speed:", FLAGS.speed)
-        # 如果include_nms为false且flags.speed为True，则走PPYOLOEPostProcess
-    time_avg = predict_time / sample_nums
-    print("[Benchmark]Inference time(ms): min={}, max={}, avg={}".format(
-        round(time_min * 1000, 2),
-        round(time_max * 1000, 1), round(time_avg * 1000, 1)))
-    sys.stdout.flush()
 
 def main():
     """
@@ -485,7 +431,6 @@ def main():
             repeats=repeats)
     else:
         reader_cfg = load_config(FLAGS.reader_config)
-        
         dataset = reader_cfg["EvalDataset"]
         global val_loader
         val_loader = create("EvalReader")(
@@ -496,11 +441,9 @@ def main():
         anno_file = dataset.get_anno()
         metric = COCOMetric(
             anno_file=anno_file, clsid2catid=clsid2catid, IouType="bbox")
-        if not FLAGS.speed:
-            eval(predictor, val_loader, metric, rerun_flag=rerun_flag)
-        else:
-            inference_time(predictor, val_loader, metric, rerun_flag=rerun_flag)
-
+        
+        eval(predictor, val_loader, metric, rerun_flag=rerun_flag)
+       
     if rerun_flag:
         print(
             "***** Collect dynamic shape done, Please rerun the program to get correct results. *****"
@@ -516,9 +459,6 @@ def main():
     else:
         FLAGS.include_nms = False   
 
-    print('**************main****************')
-    print(FLAGS)
-
     # DataLoader need run on cpu
     paddle.set_device("cpu")
 
diff --git a/example/auto_compression/detection/post_process.py b/example/auto_compression/detection/post_process.py
@@ -122,7 +122,7 @@ def _non_max_suppression(self, prediction, scale_factor):
                 picked_labels.extend([class_index] * box_probs.shape[0])
 
             if len(picked_box_probs) == 0:
-                out_boxes_list.append(np.empty((0, 4)))
+                out_boxes_list.append(np.empty((0, 6)))
 
             else:
                 picked_box_probs = np.concatenate(picked_box_probs)