Skip to content

Commit 5715854

Browse files
Add mkldnn bfloat option in inference scripts (PaddlePaddle#5212)
* add mkldnn bfloat16 args * add mkldnn_bfloat16 to static deploy * update * update
1 parent 50da62f commit 5715854

File tree

3 files changed

+47
-10
lines changed

3 files changed

+47
-10
lines changed

deploy/python/infer.py

+18-3
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ class Detector(object):
8989
calibration, trt_calib_mode need to set True
9090
cpu_threads (int): cpu threads
9191
enable_mkldnn (bool): whether to open MKLDNN
92+
enable_mkldnn_bfloat16 (bool): whether to turn on mkldnn bfloat16
9293
output_dir (str): The path of output
9394
threshold (float): The threshold of score for visualization
9495
"""
@@ -105,6 +106,7 @@ def __init__(
105106
trt_calib_mode=False,
106107
cpu_threads=1,
107108
enable_mkldnn=False,
109+
enable_mkldnn_bfloat16=False,
108110
output_dir='output',
109111
threshold=0.5, ):
110112
self.pred_config = self.set_config(model_dir)
@@ -120,7 +122,8 @@ def __init__(
120122
trt_opt_shape=trt_opt_shape,
121123
trt_calib_mode=trt_calib_mode,
122124
cpu_threads=cpu_threads,
123-
enable_mkldnn=enable_mkldnn)
125+
enable_mkldnn=enable_mkldnn,
126+
enable_mkldnn_bfloat16=enable_mkldnn_bfloat16)
124127
self.det_times = Timer()
125128
self.cpu_mem, self.gpu_mem, self.gpu_util = 0, 0, 0
126129
self.batch_size = batch_size
@@ -323,6 +326,7 @@ class DetectorSOLOv2(Detector):
323326
calibration, trt_calib_mode need to set True
324327
cpu_threads (int): cpu threads
325328
enable_mkldnn (bool): whether to open MKLDNN
329+
enable_mkldnn_bfloat16 (bool): Whether to turn on mkldnn bfloat16
326330
output_dir (str): The path of output
327331
threshold (float): The threshold of score for visualization
328332
@@ -340,6 +344,7 @@ def __init__(
340344
trt_calib_mode=False,
341345
cpu_threads=1,
342346
enable_mkldnn=False,
347+
enable_mkldnn_bfloat16=False,
343348
output_dir='./',
344349
threshold=0.5, ):
345350
super(DetectorSOLOv2, self).__init__(
@@ -353,6 +358,7 @@ def __init__(
353358
trt_calib_mode=trt_calib_mode,
354359
cpu_threads=cpu_threads,
355360
enable_mkldnn=enable_mkldnn,
361+
enable_mkldnn_bfloat16=enable_mkldnn_bfloat16,
356362
output_dir=output_dir,
357363
threshold=threshold, )
358364

@@ -399,7 +405,8 @@ class DetectorPicoDet(Detector):
399405
trt_calib_mode (bool): If the model is produced by TRT offline quantitative
400406
calibration, trt_calib_mode need to set True
401407
cpu_threads (int): cpu threads
402-
enable_mkldnn (bool): whether to open MKLDNN
408+
enable_mkldnn (bool): whether to turn on MKLDNN
409+
enable_mkldnn_bfloat16 (bool): whether to turn on MKLDNN_BFLOAT16
403410
"""
404411

405412
def __init__(
@@ -414,6 +421,7 @@ def __init__(
414421
trt_calib_mode=False,
415422
cpu_threads=1,
416423
enable_mkldnn=False,
424+
enable_mkldnn_bfloat16=False,
417425
output_dir='./',
418426
threshold=0.5, ):
419427
super(DetectorPicoDet, self).__init__(
@@ -427,6 +435,7 @@ def __init__(
427435
trt_calib_mode=trt_calib_mode,
428436
cpu_threads=cpu_threads,
429437
enable_mkldnn=enable_mkldnn,
438+
enable_mkldnn_bfloat16=enable_mkldnn_bfloat16,
430439
output_dir=output_dir,
431440
threshold=threshold, )
432441

@@ -571,7 +580,8 @@ def load_predictor(model_dir,
571580
trt_opt_shape=640,
572581
trt_calib_mode=False,
573582
cpu_threads=1,
574-
enable_mkldnn=False):
583+
enable_mkldnn=False,
584+
enable_mkldnn_bfloat16=False):
575585
"""set AnalysisConfig, generate AnalysisPredictor
576586
Args:
577587
model_dir (str): root path of __model__ and __params__
@@ -611,6 +621,8 @@ def load_predictor(model_dir,
611621
# cache 10 different shapes for mkldnn to avoid memory leak
612622
config.set_mkldnn_cache_capacity(10)
613623
config.enable_mkldnn()
624+
if enable_mkldnn_bfloat16:
625+
config.enable_mkldnn_bfloat16()
614626
except Exception as e:
615627
print(
616628
"The current environment does not support `mkldnn`, so disable mkldnn."
@@ -747,6 +759,7 @@ def main():
747759
trt_calib_mode=FLAGS.trt_calib_mode,
748760
cpu_threads=FLAGS.cpu_threads,
749761
enable_mkldnn=FLAGS.enable_mkldnn,
762+
enable_mkldnn_bfloat16=FLAGS.enable_mkldnn_bfloat16,
750763
threshold=FLAGS.threshold,
751764
output_dir=FLAGS.output_dir)
752765

@@ -781,4 +794,6 @@ def main():
781794
], "device should be CPU, GPU or XPU"
782795
assert not FLAGS.use_gpu, "use_gpu has been deprecated, please use --device"
783796

797+
assert not (FLAGS.enable_mkldnn==False and FLAGS.enable_mkldnn_bfloat16==True), 'To enable mkldnn bfloat, please turn on both enable_mkldnn and enable_mkldnn_bfloat16'
798+
784799
main()

deploy/python/utils.py

+5
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,11 @@ def argsparser():
8080
type=ast.literal_eval,
8181
default=False,
8282
help="Whether use mkldnn with CPU.")
83+
parser.add_argument(
84+
"--enable_mkldnn_bfloat16",
85+
type=ast.literal_eval,
86+
default=False,
87+
help="Whether use mkldnn bfloat16 inference with CPU.")
8388
parser.add_argument(
8489
"--cpu_threads", type=int, default=1, help="Num of threads with CPU.")
8590
parser.add_argument(

static/deploy/python/infer.py

+24-7
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ class Detector(object):
6060
run_mode (str): mode of running(fluid/trt_fp32/trt_fp16)
6161
threshold (float): threshold to reserve the result for output.
6262
enable_mkldnn (bool): whether use mkldnn with CPU.
63+
enable_mkldnn_bfloat16 (bool): whether use mkldnn bfloat16 with CPU.
6364
"""
6465

6566
def __init__(self,
@@ -69,7 +70,8 @@ def __init__(self,
6970
run_mode='fluid',
7071
threshold=0.5,
7172
trt_calib_mode=False,
72-
enable_mkldnn=False):
73+
enable_mkldnn=False,
74+
enable_mkldnn_bfloat16=False):
7375
self.config = config
7476
if self.config.use_python_inference:
7577
self.executor, self.program, self.fecth_targets = load_executor(
@@ -81,7 +83,8 @@ def __init__(self,
8183
min_subgraph_size=self.config.min_subgraph_size,
8284
device=device,
8385
trt_calib_mode=trt_calib_mode,
84-
enable_mkldnn=enable_mkldnn)
86+
enable_mkldnn=enable_mkldnn,
87+
enable_mkldnn_bfloat16=enable_mkldnn_bfloat16)
8588

8689
def preprocess(self, im):
8790
preprocess_ops = []
@@ -229,15 +232,17 @@ def __init__(self,
229232
run_mode='fluid',
230233
threshold=0.5,
231234
trt_calib_mode=False,
232-
enable_mkldnn=False):
235+
enable_mkldnn=False,
236+
enable_mkldnn_bfloat16=False):
233237
super(DetectorSOLOv2, self).__init__(
234238
config=config,
235239
model_dir=model_dir,
236240
device=device,
237241
run_mode=run_mode,
238242
threshold=threshold,
239243
trt_calib_mode=trt_calib_mode,
240-
enable_mkldn=enable_mkldnn)
244+
enable_mkldn=enable_mkldnn,
245+
enable_mkldnn_bfloat16=enable_mkldnn_bfloat16)
241246

242247
def predict(self,
243248
image,
@@ -391,14 +396,16 @@ def load_predictor(model_dir,
391396
device='CPU',
392397
min_subgraph_size=3,
393398
trt_calib_mode=False,
394-
enable_mkldnn=False):
399+
enable_mkldnn=False,
400+
enable_mkldnn_bfloat16=False):
395401
"""set AnalysisConfig, generate AnalysisPredictor
396402
Args:
397403
model_dir (str): root path of __model__ and __params__
398404
device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
399405
trt_calib_mode (bool): If the model is produced by TRT offline quantitative
400406
calibration, trt_calib_mode need to set True
401407
enable_mkldnn (bool): Whether use mkldnn with CPU, default is False
408+
enable_mkldnn_bfloat16 (bool): Whether use mkldnn bfloat16 with CPU, default is False
402409
Returns:
403410
predictor (PaddlePredictor): AnalysisPredictor
404411
Raises:
@@ -430,6 +437,8 @@ def load_predictor(model_dir,
430437
config.set_mkldnn_cache_capacity(0)
431438
config.enable_mkldnn()
432439
config.pass_builder().append_pass("interpolate_mkldnn_pass")
440+
if enable_mkldnn_bfloat16:
441+
config.enable_mkldnn_bfloat16()
433442
if run_mode in precision_map.keys():
434443
config.enable_tensorrt_engine(
435444
workspace_size=1 << 10,
@@ -557,15 +566,17 @@ def main():
557566
device=FLAGS.device,
558567
run_mode=FLAGS.run_mode,
559568
trt_calib_mode=FLAGS.trt_calib_mode,
560-
enable_mkldnn=FLAGS.enable_mkldnn)
569+
enable_mkldnn=FLAGS.enable_mkldnn,
570+
enable_mkldnn_bfloat16=FLAGS.enable_mkldnn_bfloat16)
561571
if config.arch == 'SOLOv2':
562572
detector = DetectorSOLOv2(
563573
config,
564574
FLAGS.model_dir,
565575
device=FLAGS.device,
566576
run_mode=FLAGS.run_mode,
567577
trt_calib_mode=FLAGS.trt_calib_mode,
568-
enable_mkldnn=FLAGS.enable_mkldnn)
578+
enable_mkldnn=FLAGS.enable_mkldnn,
579+
enable_mkldnn_bfloat16=FLAGS.enable_mkldnn_bfloat16)
569580
# predict from image
570581
if FLAGS.image_file != '':
571582
predict_image(detector)
@@ -636,6 +647,11 @@ def main():
636647
type=ast.literal_eval,
637648
default=False,
638649
help="Whether use mkldnn with CPU.")
650+
parser.add_argument(
651+
"--enable_mkldnn_bfloat16",
652+
type=ast.literal_eval,
653+
default=False,
654+
help="Whether use mkldnn bfloat16 with CPU.")
639655
FLAGS = parser.parse_args()
640656
print_arguments(FLAGS)
641657
if FLAGS.image_file != '' and FLAGS.video_file != '':
@@ -644,5 +660,6 @@ def main():
644660
assert FLAGS.device in ['CPU', 'GPU', 'XPU'
645661
], "device should be CPU, GPU or XPU"
646662
assert not FLAGS.use_gpu, "use_gpu has been deprecated, please use --device"
663+
assert not (FLAGS.enable_mkldnn==False and FLAGS.enable_mkldnn_bfloat16==True),"To turn on mkldnn_bfloat, please set both enable_mkldnn and enable_mkldnn_bfloat16 True"
647664

648665
main()

0 commit comments

Comments
 (0)