13
13
#t0, t1 = eval(input())
14
14
#n, c, h, w = map(int, t0)
15
15
#oc, ic, kh, kw = map(int, t1)
16
- n , c , h , w = 1 , 192 , 16 , 16
17
- oc , ic , kh , kw = 192 , c , 3 , 3
16
+ n , c , h , w , oc , ic , kh , kw , sh , sw = map (int , input ().split ())
17
+
18
+ oh = (h - kh ) // sh + 1
19
+ ow = (w - kw ) // sw + 1
18
20
19
21
var_x = relay .var ('x' , shape = (n , c , h , w ), dtype = 'float32' )
20
22
var_w = relay .const (tvm .nd .array ((np .random .randn (oc , ic , kh , kw ) * 128 ).astype ('float32' )))
21
23
var_b = relay .const (tvm .nd .array ((np .random .randn (1 , oc , 1 , 1 ) * 128 ).astype ('float32' )))
22
- conv2d = relay .nn .conv2d (var_x , var_w , out_dtype = 'float32' , kernel_size = (kh , kw ), channels = oc , strides = (1 , 1 ))
23
- biased = relay .add (conv2d , var_b )
24
- y = relay .multiply (biased , relay .const (123. , 'float32' ))
25
- #y = conv2d
24
+ conv2d = relay .nn .conv2d (var_x , var_w , out_dtype = 'float32' , kernel_size = (kh , kw ), channels = oc , strides = (sh , sw ))
25
+ y = conv2d
26
26
27
27
func = relay .Function ([var_x ], y )
28
28
module = tvm .IRModule ()
@@ -38,25 +38,55 @@ def tracer(module, info, is_before):
38
38
#else:
39
39
# print('Executes: ', info.name, (time.time() - timing) * 1000)
40
40
41
- for i in ['fuse' , 'pad' ]:
42
- for j in [16 , 32 , 64 ]:
43
- from tensorizer import tune
41
+ from tensorizer import tune
42
+ tune .enable = False
43
+
44
+
45
+ def run ():
46
+ passes = [(1 , tensorizer .rewrite )]
47
+ with tvm .transform .PassContext (opt_level = 3 , trace = tracer , config = {'tir.add_lower_pass' : passes }):
48
+ #with tvm.transform.PassContext(opt_level=4, trace=tracer):
49
+ #graph, lib, params = tvm.relay.build(module, target='cuda -libs=cublas,cudnn')
50
+ graph , lib , params = tvm .relay .build (module , target = 'nvptx -libs=cublas,cudnn' )
51
+ #from tvm.contrib import graph_runtime as runtime
52
+ from tvm .contrib .debugger import debug_runtime as runtime
53
+ func = runtime .create (graph , lib , tvm .gpu ())
54
+
55
+
56
+ x_ = (np .random .randn (n , c , h , w ) * 128 ).astype ('float32' )
57
+ func .set_input ('x' , x_ )
58
+ #timer = func.module.time_evaluator('run', ctx=tvm.gpu(), number=1, repeat=10)
59
+ timed = []
60
+ for i in range (10 ):
61
+ func .run ()
62
+ for node , time in zip (func .debug_datum ._nodes_list , func .debug_datum ._time_list ):
63
+ if 'conv2d' in node ['name' ]:
64
+ timed .append (time [0 ])
65
+ #timed = timer()
66
+ #while np.var(timed.results) > 1e-5:
67
+ # timed = timer()
68
+ return np .mean (timed )
69
+
70
+ base = None
71
+ timed = run ()
72
+ base = timed * 1e6
73
+ relay .backend .compile_engine .get ().clear ()
74
+
75
+ results = []
76
+ for i in [None , 'fuse' , 'pad' ] if ow < 32 else [None ]:
77
+ j = 16
78
+ while True :
44
79
tune .padding = i
45
80
tune .splitk = j
46
- passes = [(1 , tensorizer .rewrite )]
47
- with tvm .transform .PassContext (opt_level = 3 , trace = tracer , config = {'tir.add_lower_pass' : passes }):
48
- #with tvm.transform.PassContext(opt_level=4, trace=tracer):
49
- #graph, lib, params = tvm.relay.build(module, target='cuda -libs=cublas,cudnn')
50
- graph , lib , params = tvm .relay .build (module , target = 'nvptx -libs=cublas,cudnn' )
51
- print ('compiled' )
52
- from tvm .contrib import graph_runtime as runtime
53
- #from tvm.contrib.debugger import debug_runtime as runtime
54
- func = runtime .create (graph , lib , tvm .gpu ())
55
-
56
- x_ = (np .random .randn (n , c , h , w ) * 128 ).astype ('float32' )
57
- func .set_input ('x' , x_ )
58
- timer = func .module .time_evaluator ('run' , ctx = tvm .gpu (), number = 1 , repeat = 10 )
59
- timed = timer ()
60
-
61
- print ((n * oc * (h - kh + 1 ) * (w - kw + 1 )) * (kh * kw * ic ) / timed .mean / 1e9 )
62
- print ('%d us' % int (timed .mean * 1e6 ))
81
+ timed = run ()
82
+
83
+ results .append (((i , j ), timed * 1e6 ))
84
+
85
+ relay .backend .compile_engine .get ().clear ()
86
+ j <<= 1
87
+ if j > tune .total_idx :
88
+ break
89
+
90
+ with open ('/home/ubuntu/gpu-tune.log' , 'a' ) as f :
91
+ f .write (f'{ tune .ashape } { tune .bshape } { tune .strides } { results } , { base } \n ' )
92
+ f .write (f'{ n } { c } { h } { w } { oc } { ic } { kh } { kw } { sh } { sw } { results } , { base } \n ' )
0 commit comments