Skip to content

Commit d4ccf23

Browse files
author
Jian Weng
committed
change to input
1 parent c4b529e commit d4ccf23

File tree

1 file changed

+56
-26
lines changed

1 file changed

+56
-26
lines changed

apps/gpu/relay.py

+56-26
Original file line numberDiff line numberDiff line change
@@ -13,16 +13,16 @@
1313
#t0, t1 = eval(input())
1414
#n, c, h, w = map(int, t0)
1515
#oc, ic, kh, kw = map(int, t1)
16-
n, c, h, w = 1, 192, 16, 16
17-
oc, ic, kh, kw = 192, c, 3, 3
16+
n, c, h, w, oc, ic, kh, kw, sh, sw = map(int, input().split())
17+
18+
oh = (h - kh) // sh + 1
19+
ow = (w - kw) // sw + 1
1820

1921
var_x = relay.var('x', shape=(n, c, h, w), dtype='float32')
2022
var_w = relay.const(tvm.nd.array((np.random.randn(oc, ic, kh, kw) * 128).astype('float32')))
2123
var_b = relay.const(tvm.nd.array((np.random.randn(1, oc, 1, 1) * 128).astype('float32')))
22-
conv2d = relay.nn.conv2d(var_x, var_w, out_dtype='float32', kernel_size=(kh, kw), channels=oc, strides=(1, 1))
23-
biased = relay.add(conv2d, var_b)
24-
y = relay.multiply(biased, relay.const(123., 'float32'))
25-
#y = conv2d
24+
conv2d = relay.nn.conv2d(var_x, var_w, out_dtype='float32', kernel_size=(kh, kw), channels=oc, strides=(sh, sw))
25+
y = conv2d
2626

2727
func = relay.Function([var_x], y)
2828
module = tvm.IRModule()
@@ -38,25 +38,55 @@ def tracer(module, info, is_before):
3838
#else:
3939
# print('Executes: ', info.name, (time.time() - timing) * 1000)
4040

41-
for i in ['fuse', 'pad']:
42-
for j in [16, 32, 64]:
43-
from tensorizer import tune
41+
from tensorizer import tune
42+
tune.enable = False
43+
44+
45+
def run():
46+
passes = [(1, tensorizer.rewrite)]
47+
with tvm.transform.PassContext(opt_level=3, trace=tracer, config={'tir.add_lower_pass': passes}):
48+
#with tvm.transform.PassContext(opt_level=4, trace=tracer):
49+
#graph, lib, params = tvm.relay.build(module, target='cuda -libs=cublas,cudnn')
50+
graph, lib, params = tvm.relay.build(module, target='nvptx -libs=cublas,cudnn')
51+
#from tvm.contrib import graph_runtime as runtime
52+
from tvm.contrib.debugger import debug_runtime as runtime
53+
func = runtime.create(graph, lib, tvm.gpu())
54+
55+
56+
x_ =(np.random.randn(n, c, h, w) * 128).astype('float32')
57+
func.set_input('x', x_)
58+
#timer = func.module.time_evaluator('run', ctx=tvm.gpu(), number=1, repeat=10)
59+
timed = []
60+
for i in range(10):
61+
func.run()
62+
for node, time in zip(func.debug_datum._nodes_list, func.debug_datum._time_list):
63+
if 'conv2d' in node['name']:
64+
timed.append(time[0])
65+
#timed = timer()
66+
#while np.var(timed.results) > 1e-5:
67+
# timed = timer()
68+
return np.mean(timed)
69+
70+
base = None
71+
timed = run()
72+
base = timed * 1e6
73+
relay.backend.compile_engine.get().clear()
74+
75+
results = []
76+
for i in [None, 'fuse', 'pad'] if ow < 32 else [None]:
77+
j = 16
78+
while True:
4479
tune.padding = i
4580
tune.splitk = j
46-
passes = [(1, tensorizer.rewrite)]
47-
with tvm.transform.PassContext(opt_level=3, trace=tracer, config={'tir.add_lower_pass': passes}):
48-
#with tvm.transform.PassContext(opt_level=4, trace=tracer):
49-
#graph, lib, params = tvm.relay.build(module, target='cuda -libs=cublas,cudnn')
50-
graph, lib, params = tvm.relay.build(module, target='nvptx -libs=cublas,cudnn')
51-
print('compiled')
52-
from tvm.contrib import graph_runtime as runtime
53-
#from tvm.contrib.debugger import debug_runtime as runtime
54-
func = runtime.create(graph, lib, tvm.gpu())
55-
56-
x_ =(np.random.randn(n, c, h, w) * 128).astype('float32')
57-
func.set_input('x', x_)
58-
timer = func.module.time_evaluator('run', ctx=tvm.gpu(), number=1, repeat=10)
59-
timed = timer()
60-
61-
print((n * oc * (h - kh + 1) * (w - kw + 1)) * (kh * kw * ic) / timed.mean / 1e9)
62-
print('%d us' % int(timed.mean * 1e6))
81+
timed = run()
82+
83+
results.append(((i, j), timed * 1e6))
84+
85+
relay.backend.compile_engine.get().clear()
86+
j <<= 1
87+
if j > tune.total_idx:
88+
break
89+
90+
with open('/home/ubuntu/gpu-tune.log', 'a') as f:
91+
f.write(f'{tune.ashape} {tune.bshape} {tune.strides} {results}, {base}\n')
92+
f.write(f'{n} {c} {h} {w} {oc} {ic} {kh} {kw} {sh} {sw} {results}, {base}\n')

0 commit comments

Comments
 (0)