Skip to content

Commit 6a2ec0b

Browse files
committed
跑通+支持测速
1 parent 521157e commit 6a2ec0b

File tree

1 file changed

+84
-10
lines changed

1 file changed

+84
-10
lines changed

example/auto_compression/detection/paddle_inference_eval.py

+84-10
Original file line numberDiff line numberDiff line change
@@ -82,9 +82,15 @@ def argsparser():
8282
parser.add_argument("--img_shape", type=int, default=640, help="input_size")
8383
parser.add_argument(
8484
'--include_nms',
85-
type=bool,
86-
default=True,
85+
type=str,
86+
default='True',
8787
help="Whether include nms or not.")
88+
# 是否用来测速
89+
parser.add_argument(
90+
'--speed',
91+
type=str,
92+
default='True',
93+
help="if speed is True, it will print the inference time.")
8894

8995
return parser
9096

@@ -238,9 +244,11 @@ def load_predictor(
238244
config = Config(
239245
os.path.join(model_dir, "model.pdmodel"),
240246
os.path.join(model_dir, "model.pdiparams"))
247+
248+
config.enable_memory_optim()
241249
if device == "GPU":
242250
# initial GPU memory(M), device ID
243-
config.enable_use_gpu(200, 0)
251+
config.enable_use_gpu(1000, 0)
244252
# optimize graph and fuse op
245253
config.switch_ir_optim(True)
246254
else:
@@ -260,7 +268,7 @@ def load_predictor(
260268
}
261269
if precision in precision_map.keys() and use_trt:
262270
config.enable_tensorrt_engine(
263-
workspace_size=(1 << 25) * batch_size,
271+
workspace_size=(1 << 30) * batch_size,
264272
max_batch_size=batch_size,
265273
min_subgraph_size=min_subgraph_size,
266274
precision_mode=precision_map[precision],
@@ -297,6 +305,7 @@ def predict_image(predictor,
297305
img, scale_factor = image_preprocess(image_file, image_shape)
298306
inputs = {}
299307
inputs["image"] = img
308+
300309
if FLAGS.include_nms:
301310
inputs['scale_factor'] = scale_factor
302311
input_names = predictor.get_input_names()
@@ -354,6 +363,9 @@ def eval(predictor, val_loader, metric, rerun_flag=False):
354363
input_names = predictor.get_input_names()
355364
output_names = predictor.get_output_names()
356365
boxes_tensor = predictor.get_output_handle(output_names[0])
366+
print("output_names:", output_names)
367+
print("Number of outputs:", len(output_names))
368+
print("FLAGS.include_nms:", FLAGS.include_nms)
357369
if FLAGS.include_nms:
358370
boxes_num = predictor.get_output_handle(output_names[1])
359371
for batch_id, data in enumerate(val_loader):
@@ -374,27 +386,79 @@ def eval(predictor, val_loader, metric, rerun_flag=False):
374386
time_min = min(time_min, timed)
375387
time_max = max(time_max, timed)
376388
predict_time += timed
377-
if not FLAGS.include_nms:
389+
# print("FLAGS.include_nms:", FLAGS.include_nms)
390+
# print("FLAGS.speed:", FLAGS.speed)
391+
# 如果include_nms为false且flags.speed为True,则走PPYOLOEPostProcess
392+
if not FLAGS.include_nms and FLAGS.speed:
393+
# print("nms为True的时候走了PPYOLOEPostProcess")
378394
postprocess = PPYOLOEPostProcess(
379395
score_threshold=0.3, nms_threshold=0.6)
380396
res = postprocess(np_boxes, data_all['scale_factor'])
381-
else:
397+
#如果include_nms为false且flags.speed为False,则跳过
398+
elif not FLAGS.include_nms and not FLAGS.speed:
399+
continue
400+
#如果include_nms,则直接返回
401+
elif FLAGS.include_nms:
402+
# print("nms为False的时候直接返回")
382403
res = {'bbox': np_boxes, 'bbox_num': np_boxes_num}
383404
metric.update(data_all, res)
384405
if batch_id % 100 == 0:
385406
print("Eval iter:", batch_id)
386407
sys.stdout.flush()
387408
metric.accumulate()
388-
metric.log()
409+
if not FLAGS.speed:
410+
metric.log()
389411
map_res = metric.get_results()
390412
metric.reset()
391413
time_avg = predict_time / sample_nums
392414
print("[Benchmark]Inference time(ms): min={}, max={}, avg={}".format(
393415
round(time_min * 1000, 2),
394416
round(time_max * 1000, 1), round(time_avg * 1000, 1)))
395-
print("[Benchmark] COCO mAP: {}".format(map_res["bbox"][0]))
417+
if not FLAGS.speed:
418+
print("[Benchmark] COCO mAP: {}".format(map_res["bbox"][0]))
396419
sys.stdout.flush()
397420

421+
def inference_time(predictor, val_loader, metric, rerun_flag=False):
422+
cpu_mems, gpu_mems = 0, 0
423+
predict_time = 0.0
424+
time_min = float("inf")
425+
time_max = float("-inf")
426+
sample_nums = len(val_loader)
427+
input_names = predictor.get_input_names()
428+
output_names = predictor.get_output_names()
429+
boxes_tensor = predictor.get_output_handle(output_names[0])
430+
print("output_names:", output_names)
431+
print("Number of outputs:", len(output_names))
432+
print("FLAGS.include_nms:", FLAGS.include_nms)
433+
if FLAGS.include_nms:
434+
boxes_num = predictor.get_output_handle(output_names[1])
435+
436+
for batch_id, data in enumerate(val_loader):
437+
data_all = {k: np.array(v) for k, v in data.items()}
438+
for i, _ in enumerate(input_names):
439+
input_tensor = predictor.get_input_handle(input_names[i])
440+
input_tensor.copy_from_cpu(data_all[input_names[i]])
441+
paddle.device.cuda.synchronize()
442+
start_time = time.time()
443+
predictor.run()
444+
# np_boxes = boxes_tensor.copy_to_cpu()
445+
if FLAGS.include_nms:
446+
np_boxes_num = boxes_num.copy_to_cpu()
447+
if rerun_flag:
448+
return
449+
end_time = time.time()
450+
timed = end_time - start_time
451+
time_min = min(time_min, timed)
452+
time_max = max(time_max, timed)
453+
predict_time += timed
454+
# print("FLAGS.include_nms:", FLAGS.include_nms)
455+
# print("FLAGS.speed:", FLAGS.speed)
456+
# 如果include_nms为false且flags.speed为True,则走PPYOLOEPostProcess
457+
time_avg = predict_time / sample_nums
458+
print("[Benchmark]Inference time(ms): min={}, max={}, avg={}".format(
459+
round(time_min * 1000, 2),
460+
round(time_max * 1000, 1), round(time_avg * 1000, 1)))
461+
sys.stdout.flush()
398462

399463
def main():
400464
"""
@@ -421,7 +485,7 @@ def main():
421485
repeats=repeats)
422486
else:
423487
reader_cfg = load_config(FLAGS.reader_config)
424-
488+
425489
dataset = reader_cfg["EvalDataset"]
426490
global val_loader
427491
val_loader = create("EvalReader")(
@@ -432,7 +496,10 @@ def main():
432496
anno_file = dataset.get_anno()
433497
metric = COCOMetric(
434498
anno_file=anno_file, clsid2catid=clsid2catid, IouType="bbox")
435-
eval(predictor, val_loader, metric, rerun_flag=rerun_flag)
499+
if not FLAGS.speed:
500+
eval(predictor, val_loader, metric, rerun_flag=rerun_flag)
501+
else:
502+
inference_time(predictor, val_loader, metric, rerun_flag=rerun_flag)
436503

437504
if rerun_flag:
438505
print(
@@ -444,6 +511,13 @@ def main():
444511
paddle.enable_static()
445512
parser = argsparser()
446513
FLAGS = parser.parse_args()
514+
if FLAGS.include_nms=='True':
515+
FLAGS.include_nms = True
516+
else:
517+
FLAGS.include_nms = False
518+
519+
print('**************main****************')
520+
print(FLAGS)
447521

448522
# DataLoader need run on cpu
449523
paddle.set_device("cpu")

0 commit comments

Comments
 (0)