Skip to content

Commit c7ec858

Browse files
authored
Add latency predictor function and doc (#905)
1 parent 2550fb4 commit c7ec858

File tree

7 files changed

+1099
-2
lines changed

7 files changed

+1099
-2
lines changed

demo/analysis/latency_predictor.py

+63
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
import os
2+
import subprocess
3+
import argparse
4+
5+
import paddle
6+
from paddleslim.analysis import TableLatencyPredictor
7+
8+
from paddle.vision.models import mobilenet_v1, mobilenet_v2
9+
10+
opt_tool = 'opt_ubuntu' # use in linux
11+
# opt_tool = 'opt_M1_mac' # use in mac with M1 chip
12+
# opt_tool = 'opt_intel_mac' # use in mac with intel chip
13+
14+
parser = argparse.ArgumentParser(description='latency predictor')
15+
parser.add_argument('--model', type=str, help='which model to test.')
16+
parser.add_argument('--data_type', type=str, default='fp32')
17+
18+
args = parser.parse_args()
19+
20+
if not os.path.exists(opt_tool):
21+
subprocess.call(
22+
f'wget https://paddle-slim-models.bj.bcebos.com/LatencyPredictor/{opt_tool}',
23+
shell=True)
24+
subprocess.call(f'chmod +x {opt_tool}', shell=True)
25+
26+
27+
def get_latency(model, data_type):
28+
paddle.disable_static()
29+
predictor = TableLatencyPredictor(
30+
f'./{opt_tool}', hardware='845', threads=4, power_mode=3, batchsize=1)
31+
latency = predictor.predict_latency(
32+
model,
33+
input_shape=[1, 3, 224, 224],
34+
save_dir='./tmp_model',
35+
data_type=data_type,
36+
task_type='cls')
37+
print('{} latency : {}'.format(data_type, latency))
38+
39+
subprocess.call('rm -rf ./tmp_model', shell=True)
40+
paddle.disable_static()
41+
return latency
42+
43+
44+
if __name__ == '__main__':
45+
if args.model == 'mobilenet_v1':
46+
model = mobilenet_v1()
47+
elif args.model == 'mobilenet_v2':
48+
model = mobilenet_v2()
49+
else:
50+
assert False, f'model should be mobilenet_v1 or mobilenet_v2'
51+
52+
latency = get_latency(model, args.data_type)
53+
54+
if args.model == 'mobilenet_v1' and args.data_type == 'fp32':
55+
assert latency == 41.92806607483133
56+
elif args.model == 'mobilenet_v1' and args.data_type == 'int8':
57+
assert latency == 36.64814722993898
58+
elif args.model == 'mobilenet_v2' and args.data_type == 'fp32':
59+
assert latency == 27.847896889217566
60+
elif args.model == 'mobilenet_v2' and args.data_type == 'int8':
61+
assert latency == 23.967800360138803
62+
else:
63+
assert False, f'model or data_type wrong.'
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
2+
动态图
3+
==============
4+
5+
.. toctree::
6+
:maxdepth: 1
7+
8+
latency_predictor.md
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
# LatencyPredictor使用教程
2+
3+
LatencyPredictor主要功能是根据提供的op-latency映射表,预估神经网络网络在特定硬件设备上的实际耗时。它基于Paddle-Lite开发,适用于使用Paddle-Lite部署的模型。映射表以key-value的形式存储,key包含了神经网络模型经过Paddle-Lite图优化后的各种融合op信息,value则代表在特定硬件上的实际耗时。
4+
5+
## 使用方法
6+
7+
1. 下载或自行编译opt优化工具
8+
2. 构建LatencyPredictor
9+
3. 定义模型和预测
10+
11+
### 1. 下载或自行编译opt优化工具
12+
1.1 下载提供的opt工具,可根据运行环境下载适用的opt,目前提供Mac平台([M1芯片](https://paddle-slim-models.bj.bcebos.com/LatencyPredictor/opt_M1_mac)[Intel芯片](https://paddle-slim-models.bj.bcebos.com/LatencyPredictor/opt_intel_mac))和[Ubuntu](https://paddle-slim-models.bj.bcebos.com/LatencyPredictor/opt_ubuntu)平台的opt工具下载。
13+
1.2 也可以自行通过Paddle-Lite源码编译opt工具,具体请参考请参考Paddle-Lite[文档](https://paddle-lite.readthedocs.io/zh/latest/user_guides/model_optimize_tool.html)。编译时需要关闭Paddle-Lite的内存复用功能,即注释掉这[几行代码](https://github.com/PaddlePaddle/Paddle-Lite/blob/d76f45be989d3e01cebf2ac18e047cfd37d52666/lite/core/optimizer/optimizer.cc#L266-L268)
14+
15+
### 2. 构建LatencyPredictor
16+
17+
提供opt工具路径,以及芯片和测试参数信息,LatencyPredictor会根据这些参数自动下载对应的映射表。如下所示,芯片为845芯片,测试线程数threads为4,测速模式power_mode为3,测试batchsize为1.
18+
```
19+
import paddleslim
20+
21+
opt_path = {opt工具路径}
22+
predictor = paddleslim.TableLatencyPredictor(opt_path, hardware='845', threads=4, power_mode=3, batchsize=1)
23+
```
24+
25+
### 3. 定义模型和预测
26+
27+
定义model后可通过predict_latency函数直接预测模型推理耗时,其中,input_shape为输入大小,save_dir为中间pbmodel模型保存路径,data_type可选fp32或int8,task_type=‘cls'表示该模型为分类模型。
28+
```
29+
import paddle
30+
from paddle.vision.models import mobilenet_v1
31+
32+
model = mobilenet_v1()
33+
latency = predictor.predict_latency(model, input_shape=[1,3,224,224], save_dir='./model', data_type='int8', task_type='cls')
34+
print('predicted latency = {}ms'.format(latency))
35+
```

paddleslim/analysis/__init__.py

+13-2
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,19 @@
1414
from .flops import flops, dygraph_flops
1515
from .model_size import model_size
1616
from .latency import LatencyEvaluator, TableLatencyEvaluator
17+
from .latency_predictor import LatencyPredictor, TableLatencyPredictor
18+
from ._utils import get_key_from_op, save_cls_model, save_det_model, save_seg_model
1719

1820
__all__ = [
19-
'flops', 'dygraph_flops', 'model_size', 'LatencyEvaluator',
20-
'TableLatencyEvaluator'
21+
'flops',
22+
'dygraph_flops',
23+
'model_size',
24+
'LatencyEvaluator',
25+
'TableLatencyEvaluator',
26+
"LatencyPredictor",
27+
"TableLatencyPredictor",
28+
"get_key_from_op",
29+
"save_cls_model",
30+
"save_det_model",
31+
"save_seg_model",
2132
]

0 commit comments

Comments
 (0)