@@ -7,10 +7,16 @@ using namespace Halide::Tools;
7
7
double run_test_1 (bool auto_schedule) {
8
8
Var x (" x" ), y (" y" ), dx (" dx" ), dy (" dy" ), c (" c" );
9
9
10
+ int W = 1024 ;
11
+ int H = 1920 ;
12
+ int search_area = 7 ;
13
+
14
+ Buffer<uint32_t > im (2048 );
15
+ im.fill (17 );
16
+
10
17
Func f (" f" );
11
- f (x, y, dx, dy) = x + y + dx + dy ;
18
+ f (x, y, dx, dy) = im (x) + im ( y + 1 ) + im ( dx + search_area / 2 ) + im (dy + search_area / 2 ) ;
12
19
13
- int search_area = 7 ;
14
20
RDom dom (-search_area / 2 , search_area, -search_area / 2 , search_area, " dom" );
15
21
16
22
// If 'f' is inlined into 'r', the only storage layout that the auto scheduler
@@ -23,23 +29,20 @@ double run_test_1(bool auto_schedule) {
23
29
24
30
if (auto_schedule) {
25
31
// Provide estimates on the pipeline output
26
- r.set_estimates ({{0 , 1024 }, {0 , 1024 }, {0 , 3 }});
32
+ r.set_estimates ({{0 , W }, {0 , H }, {0 , 3 }});
27
33
// Auto-schedule the pipeline
28
34
p.apply_autoscheduler (target, {" Mullapudi2016" });
29
35
} else {
30
- /*
36
+ Var par;
31
37
r.update (0 ).fuse (c, y, par).parallel (par).reorder (x, dom.x , dom.y ).vectorize (x, 4 );
32
- r.fuse(c, y, par).parallel(par).vectorize(x, 4); */
33
-
34
- // The sequential schedule in this case seems to perform best which is
35
- // odd have to investigate this further.
38
+ r.fuse (c, y, par).parallel (par).vectorize (x, 4 );
36
39
}
37
40
38
41
// Inspect the schedule (only for debugging))
39
42
// r.print_loop_nest();
40
43
41
44
// Run the schedule
42
- Buffer<int > out (1024 , 1024 , 3 );
45
+ Buffer<int > out (W, H , 3 );
43
46
double t = benchmark (3 , 10 , [&]() {
44
47
p.realize (out);
45
48
});
@@ -154,7 +157,7 @@ int main(int argc, char **argv) {
154
157
double manual_time = run_test_1 (false );
155
158
double auto_time = run_test_1 (true );
156
159
157
- const double slowdown_factor = 15 .0 ; // TODO: whoa
160
+ const double slowdown_factor = 2 .0 ;
158
161
if (!get_jit_target_from_environment ().has_gpu_feature () && auto_time > manual_time * slowdown_factor) {
159
162
std::cerr << " Autoscheduler time (1) is slower than expected:\n "
160
163
<< " ======================\n "
0 commit comments