Skip to content

Commit f413e53

Browse files
author
Jian Weng
committed
cgo ae update
1 parent 8effab6 commit f413e53

File tree

4 files changed

+93
-18
lines changed

4 files changed

+93
-18
lines changed

apps/cpu/kernel/conv3d.py

+12-12
Original file line numberDiff line numberDiff line change
@@ -11,17 +11,17 @@
1111
from tvm.relay import op
1212

1313
workloads = [
14-
#(256, 16, 16, 256, 3, 1),
15-
#(512, 9, 9, 512, 3, 1),
14+
(256, 16, 16, 256, 3, 1),
15+
(512, 9, 9, 512, 3, 1),
1616
(128, 30, 30, 128, 3, 1),
17-
#(64, 56, 56, 128, 1, 2),
18-
#(64, 58, 58, 64, 3, 1),
19-
#(128, 28, 28, 256, 1, 2),
20-
#(256, 16, 16, 512, 3, 2),
21-
#(64, 58, 58, 128, 3, 2),
22-
#(4, 230, 230, 64, 7, 2),
23-
#(128, 30, 30, 256, 3, 2),
24-
#(256, 14, 14, 512, 1, 2)
17+
(64, 56, 56, 128, 1, 2),
18+
(64, 58, 58, 64, 3, 1),
19+
(128, 28, 28, 256, 1, 2),
20+
(256, 16, 16, 512, 3, 2),
21+
(64, 58, 58, 128, 3, 2),
22+
(4, 230, 230, 64, 7, 2),
23+
(128, 30, 30, 256, 3, 2),
24+
(256, 14, 14, 512, 1, 2)
2525
]
2626

2727
output = []
@@ -96,8 +96,8 @@
9696

9797
relay.backend.compile_engine.get().clear()
9898
tune.cpu_idx += 1
99-
#if tune.cpu_idx - target > 8:
100-
# break
99+
if tune.cpu_idx - target > 8:
100+
break
101101
if tune.cpu_idx >= tune.total_idx - 1:
102102
break
103103
#print(results)

apps/cpu/kernel/res

+15
Original file line numberDiff line numberDiff line change
@@ -1 +1,16 @@
11
(303464448, 861.4658333333336, 352.2652161674045)
2+
(303464448, 1169.7422000000001, 259.42848603735075)
3+
(303464448, 1008.5250666666667, 300.89926173376887)
4+
(303464448, 864.0738, 351.2020014957056)
5+
(303464448, 987.8491000000001, 307.1971700940963)
6+
(303464448, 1091.261066666667, 278.0860210902175)
7+
(303464448, 1014.0442, 299.2615588156808)
8+
(303464448, 1068.1393, 284.1056854663058)
9+
(3211264, 15.877699999999999, 202.24994804033332)
10+
(303464448, 991.7283666666665, 305.9955308326867)
11+
(3211264, 16.94766666666667, 189.48118718407642)
12+
(75866112, 289.8498333333333, 261.74281740142453)
13+
(75866112, 277.08956666666666, 273.7963500851169)
14+
(1376829440, 955.9776666666668, 1440.231804578419)
15+
(75866112, 280.5848333333334, 270.38564807197343)
16+
(3211264, 16.792066666666667, 191.23697301503486)

poc/vnni/input

+6-6
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1-
608
2-
14
3-
14
4-
192
5-
1
6-
1
1+
128
2+
28
3+
28
4+
256
75
1
86
1
7+
2
8+
2

poc/vnni/run-3d.py

+60
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
import subprocess
2+
3+
#workloads = [(1024,16,16,2048,1024,1,1,1,1),
4+
#(960,8,8,160,960,1,1,1,1),
5+
#(256,56,56,512,256,1,1,1,1),
6+
#(64,56,56,128,64,1,1,1,1),
7+
#(512,10,10,512,512,3,3,1,1),
8+
#(192,16,16,160,192,3,3,1,1),
9+
#(64,65,65,128,64,3,3,1,1),
10+
#(320,65,65,384,320,3,3,1,1),
11+
#(384,8,8,384,384,1,3,1,1),
12+
#(192,23,23,192,192,7,1,1,1),
13+
#(3,224,224,64,3,7,7,2,2),
14+
#(1024,15,15,2048,1024,1,1,2,2),
15+
#(128,65,65,256,128,3,3,2,2)]
16+
#
17+
#workloads = [(1, 288, 35, 35, 384, 288, 3, 3, 2, 2), (1, 160, 9, 9, 224, 160, 3, 3, 1, 1), (1, 1056, 7, 7, 192, 1056, 1, 1, 1, 1), (1, 80, 73, 73, 192, 80, 3, 3, 1, 1), (1, 128, 16, 16, 128, 128, 3, 3, 1, 1), (1, 192, 16, 16, 192, 192, 3, 3, 1, 1), (1, 256, 16, 16, 256, 256, 3, 3, 1, 1), (1, 1024, 14, 14, 512, 1024, 1, 1, 1, 1), (1, 128, 16, 16, 160, 128, 3, 3, 1, 1), (1, 576, 14, 14, 192, 576, 1, 1, 1, 1), (1, 96, 16, 16, 128, 96, 3, 3, 1, 1), (1, 1024, 14, 14, 256, 1024, 1, 1, 1, 1), (1, 576, 14, 14, 128, 576, 1, 1, 1, 1), (1, 64, 29, 29, 96, 64, 3, 3, 1, 1), (1, 64, 56, 56, 128, 64, 1, 1, 2, 2), (1, 608, 14, 14, 192, 608, 1, 1, 1, 1)]
18+
#
19+
workloads = [
20+
(256, 16, 16, 256, 3, 1),
21+
(512, 9, 9, 512, 3, 1),
22+
(128, 30, 30, 128, 3, 1),
23+
(64, 56, 56, 128, 1, 2),
24+
(64, 58, 58, 64, 3, 1),
25+
(128, 28, 28, 256, 1, 2),
26+
(256, 16, 16, 512, 3, 2),
27+
(64, 58, 58, 128, 3, 2),
28+
(4, 230, 230, 64, 7, 2),
29+
(128, 30, 30, 256, 3, 2),
30+
(256, 14, 14, 512, 1, 2)
31+
]
32+
33+
for i in workloads:
34+
exec_time = []
35+
with open('input', 'w') as f:
36+
lst = list(i)
37+
#for j in (i[1:5] + i[6:]):
38+
# f.write(str(j) + '\n')
39+
f.write(str(i[0]) + '\n')
40+
f.write(str(i[1]) + '\n')
41+
f.write(str(i[2]) + '\n')
42+
f.write(str(i[3]) + '\n')
43+
f.write(str(i[4]) + '\n')
44+
f.write(str(i[4]) + '\n')
45+
f.write(str(i[5]) + '\n')
46+
f.write(str(i[5]) + '\n')
47+
try:
48+
avg = []
49+
for j in range(10):
50+
res = subprocess.check_output('./mkldnn_conv3d.out < input', shell=True).decode('utf-8')
51+
res = res.split('\n')[1]
52+
res = res.lstrip('Exec time: ').rstrip('us')
53+
res = float(res) * 1e6
54+
res = int(res)
55+
avg.append(res)
56+
exec_time.append(sum(avg) / len(avg))
57+
except:
58+
print(i, 'fails')
59+
print(i, min(exec_time))
60+

0 commit comments

Comments
 (0)