@@ -82,9 +82,15 @@ def argsparser():
82
82
parser .add_argument ("--img_shape" , type = int , default = 640 , help = "input_size" )
83
83
parser .add_argument (
84
84
'--include_nms' ,
85
- type = bool ,
86
- default = True ,
85
+ type = str ,
86
+ default = ' True' ,
87
87
help = "Whether include nms or not." )
88
+ # 是否用来测速
89
+ parser .add_argument (
90
+ '--speed' ,
91
+ type = str ,
92
+ default = 'True' ,
93
+ help = "if speed is True, it will print the inference time." )
88
94
89
95
return parser
90
96
@@ -238,9 +244,11 @@ def load_predictor(
238
244
config = Config (
239
245
os .path .join (model_dir , "model.pdmodel" ),
240
246
os .path .join (model_dir , "model.pdiparams" ))
247
+
248
+ config .enable_memory_optim ()
241
249
if device == "GPU" :
242
250
# initial GPU memory(M), device ID
243
- config .enable_use_gpu (200 , 0 )
251
+ config .enable_use_gpu (1000 , 0 )
244
252
# optimize graph and fuse op
245
253
config .switch_ir_optim (True )
246
254
else :
@@ -260,7 +268,7 @@ def load_predictor(
260
268
}
261
269
if precision in precision_map .keys () and use_trt :
262
270
config .enable_tensorrt_engine (
263
- workspace_size = (1 << 25 ) * batch_size ,
271
+ workspace_size = (1 << 30 ) * batch_size ,
264
272
max_batch_size = batch_size ,
265
273
min_subgraph_size = min_subgraph_size ,
266
274
precision_mode = precision_map [precision ],
@@ -297,6 +305,7 @@ def predict_image(predictor,
297
305
img , scale_factor = image_preprocess (image_file , image_shape )
298
306
inputs = {}
299
307
inputs ["image" ] = img
308
+
300
309
if FLAGS .include_nms :
301
310
inputs ['scale_factor' ] = scale_factor
302
311
input_names = predictor .get_input_names ()
@@ -354,6 +363,9 @@ def eval(predictor, val_loader, metric, rerun_flag=False):
354
363
input_names = predictor .get_input_names ()
355
364
output_names = predictor .get_output_names ()
356
365
boxes_tensor = predictor .get_output_handle (output_names [0 ])
366
+ print ("output_names:" , output_names )
367
+ print ("Number of outputs:" , len (output_names ))
368
+ print ("FLAGS.include_nms:" , FLAGS .include_nms )
357
369
if FLAGS .include_nms :
358
370
boxes_num = predictor .get_output_handle (output_names [1 ])
359
371
for batch_id , data in enumerate (val_loader ):
@@ -374,27 +386,79 @@ def eval(predictor, val_loader, metric, rerun_flag=False):
374
386
time_min = min (time_min , timed )
375
387
time_max = max (time_max , timed )
376
388
predict_time += timed
377
- if not FLAGS .include_nms :
389
+ # print("FLAGS.include_nms:", FLAGS.include_nms)
390
+ # print("FLAGS.speed:", FLAGS.speed)
391
+ # 如果include_nms为false且flags.speed为True,则走PPYOLOEPostProcess
392
+ if not FLAGS .include_nms and FLAGS .speed :
393
+ # print("nms为True的时候走了PPYOLOEPostProcess")
378
394
postprocess = PPYOLOEPostProcess (
379
395
score_threshold = 0.3 , nms_threshold = 0.6 )
380
396
res = postprocess (np_boxes , data_all ['scale_factor' ])
381
- else :
397
+ #如果include_nms为false且flags.speed为False,则跳过
398
+ elif not FLAGS .include_nms and not FLAGS .speed :
399
+ continue
400
+ #如果include_nms,则直接返回
401
+ elif FLAGS .include_nms :
402
+ # print("nms为False的时候直接返回")
382
403
res = {'bbox' : np_boxes , 'bbox_num' : np_boxes_num }
383
404
metric .update (data_all , res )
384
405
if batch_id % 100 == 0 :
385
406
print ("Eval iter:" , batch_id )
386
407
sys .stdout .flush ()
387
408
metric .accumulate ()
388
- metric .log ()
409
+ if not FLAGS .speed :
410
+ metric .log ()
389
411
map_res = metric .get_results ()
390
412
metric .reset ()
391
413
time_avg = predict_time / sample_nums
392
414
print ("[Benchmark]Inference time(ms): min={}, max={}, avg={}" .format (
393
415
round (time_min * 1000 , 2 ),
394
416
round (time_max * 1000 , 1 ), round (time_avg * 1000 , 1 )))
395
- print ("[Benchmark] COCO mAP: {}" .format (map_res ["bbox" ][0 ]))
417
+ if not FLAGS .speed :
418
+ print ("[Benchmark] COCO mAP: {}" .format (map_res ["bbox" ][0 ]))
396
419
sys .stdout .flush ()
397
420
421
+ def inference_time (predictor , val_loader , metric , rerun_flag = False ):
422
+ cpu_mems , gpu_mems = 0 , 0
423
+ predict_time = 0.0
424
+ time_min = float ("inf" )
425
+ time_max = float ("-inf" )
426
+ sample_nums = len (val_loader )
427
+ input_names = predictor .get_input_names ()
428
+ output_names = predictor .get_output_names ()
429
+ boxes_tensor = predictor .get_output_handle (output_names [0 ])
430
+ print ("output_names:" , output_names )
431
+ print ("Number of outputs:" , len (output_names ))
432
+ print ("FLAGS.include_nms:" , FLAGS .include_nms )
433
+ if FLAGS .include_nms :
434
+ boxes_num = predictor .get_output_handle (output_names [1 ])
435
+
436
+ for batch_id , data in enumerate (val_loader ):
437
+ data_all = {k : np .array (v ) for k , v in data .items ()}
438
+ for i , _ in enumerate (input_names ):
439
+ input_tensor = predictor .get_input_handle (input_names [i ])
440
+ input_tensor .copy_from_cpu (data_all [input_names [i ]])
441
+ paddle .device .cuda .synchronize ()
442
+ start_time = time .time ()
443
+ predictor .run ()
444
+ # np_boxes = boxes_tensor.copy_to_cpu()
445
+ if FLAGS .include_nms :
446
+ np_boxes_num = boxes_num .copy_to_cpu ()
447
+ if rerun_flag :
448
+ return
449
+ end_time = time .time ()
450
+ timed = end_time - start_time
451
+ time_min = min (time_min , timed )
452
+ time_max = max (time_max , timed )
453
+ predict_time += timed
454
+ # print("FLAGS.include_nms:", FLAGS.include_nms)
455
+ # print("FLAGS.speed:", FLAGS.speed)
456
+ # 如果include_nms为false且flags.speed为True,则走PPYOLOEPostProcess
457
+ time_avg = predict_time / sample_nums
458
+ print ("[Benchmark]Inference time(ms): min={}, max={}, avg={}" .format (
459
+ round (time_min * 1000 , 2 ),
460
+ round (time_max * 1000 , 1 ), round (time_avg * 1000 , 1 )))
461
+ sys .stdout .flush ()
398
462
399
463
def main ():
400
464
"""
@@ -421,7 +485,7 @@ def main():
421
485
repeats = repeats )
422
486
else :
423
487
reader_cfg = load_config (FLAGS .reader_config )
424
-
488
+
425
489
dataset = reader_cfg ["EvalDataset" ]
426
490
global val_loader
427
491
val_loader = create ("EvalReader" )(
@@ -432,7 +496,10 @@ def main():
432
496
anno_file = dataset .get_anno ()
433
497
metric = COCOMetric (
434
498
anno_file = anno_file , clsid2catid = clsid2catid , IouType = "bbox" )
435
- eval (predictor , val_loader , metric , rerun_flag = rerun_flag )
499
+ if not FLAGS .speed :
500
+ eval (predictor , val_loader , metric , rerun_flag = rerun_flag )
501
+ else :
502
+ inference_time (predictor , val_loader , metric , rerun_flag = rerun_flag )
436
503
437
504
if rerun_flag :
438
505
print (
@@ -444,6 +511,13 @@ def main():
444
511
paddle .enable_static ()
445
512
parser = argsparser ()
446
513
FLAGS = parser .parse_args ()
514
+ if FLAGS .include_nms == 'True' :
515
+ FLAGS .include_nms = True
516
+ else :
517
+ FLAGS .include_nms = False
518
+
519
+ print ('**************main****************' )
520
+ print (FLAGS )
447
521
448
522
# DataLoader need run on cpu
449
523
paddle .set_device ("cpu" )
0 commit comments