Skip to content

Commit a8f10a9

Browse files
committed
implement vectorization fallback mechanisms
1 parent 7cc1016 commit a8f10a9

File tree

3 files changed

+66
-13
lines changed

3 files changed

+66
-13
lines changed

loopy/__init__.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,8 @@
4343
AddressSpace,
4444
TemporaryVariable,
4545
SubstitutionRule,
46-
CallMangleInfo)
46+
CallMangleInfo,
47+
VectorizeTag)
4748
from loopy.kernel.function_interface import (
4849
CallableKernel, ScalarCallable)
4950
from loopy.translation_unit import (
@@ -190,7 +191,7 @@
190191
"AddressSpace",
191192
"TemporaryVariable",
192193
"SubstitutionRule",
193-
"CallMangleInfo",
194+
"CallMangleInfo", "VectorizeTag",
194195

195196
"make_kernel", "UniqueName", "make_function",
196197

loopy/codegen/__init__.py

Lines changed: 33 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -281,23 +281,47 @@ def try_vectorized(self, what, func):
281281
return self.unvectorize(func)
282282

283283
def unvectorize(self, func):
284+
from loopy.codegen.result import (merge_codegen_results,
285+
CodeGenerationResult)
286+
from loopy.target import VectorizationFallback
287+
284288
vinf = self.vectorization_info
285289
assert vinf is not None
286290

287291
result = []
288292
novec_self = self.copy(vectorization_info=None)
289293

290-
for i in range(vinf.length):
291-
idx_aff = isl.Aff.zero_on_domain(vinf.space.params()) + i
292-
new_codegen_state = novec_self.fix(vinf.iname, idx_aff)
293-
generated = func(new_codegen_state)
294-
295-
if isinstance(generated, list):
296-
result.extend(generated)
294+
if self.target.vectorization_fallback == VectorizationFallback.UNROLL:
295+
for i in range(vinf.length):
296+
idx_aff = isl.Aff.zero_on_domain(vinf.space.params()) + i
297+
new_codegen_state = novec_self.fix(vinf.iname, idx_aff)
298+
generated = func(new_codegen_state)
299+
300+
if isinstance(generated, list):
301+
result.extend(generated)
302+
else:
303+
result.append(generated)
304+
elif self.target.vectorization_fallback == VectorizationFallback.OMP_SIMD:
305+
astb = self.ast_builder
306+
inner = func(novec_self)
307+
if isinstance(inner, list):
308+
inner = merge_codegen_results(novec_self, inner)
309+
assert isinstance(inner, CodeGenerationResult)
310+
if isinstance(inner.current_ast(novec_self),
311+
astb.ast_comment_class):
312+
# loop body is a comment => do not emit the loop
313+
loop_cgr = inner
297314
else:
298-
result.append(generated)
315+
result.append(astb.emit_pragma("omp simd"))
316+
loop_cgr = inner.with_new_ast(
317+
novec_self,
318+
astb.emit_sequential_loop(
319+
novec_self, vinf.iname, self.kernel.index_dtype,
320+
0, vinf.length-1, inner.current_ast(novec_self)))
321+
result.append(loop_cgr)
322+
else:
323+
raise NotImplementedError(self.target.vectorization_fallback)
299324

300-
from loopy.codegen.result import merge_codegen_results
301325
return merge_codegen_results(self, result)
302326

303327
@property

loopy/codegen/loop.py

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -160,10 +160,25 @@ def generate_unroll_loop(codegen_state, sched_index):
160160

161161
# {{{ vectorized loops
162162

163+
def raise_for_unvectorizable_loop(codegen_state, sched_index):
164+
kernel = codegen_state.kernel
165+
raise RuntimeError(f"Cannot vectorize {kernel.schedule[sched_index]}")
166+
167+
163168
def generate_vectorize_loop(codegen_state, sched_index):
169+
from loopy.kernel.data import VectorizeTag
170+
from loopy.target import VectorizationFallback
164171
kernel = codegen_state.kernel
165172

166173
iname = kernel.linearization[sched_index].iname
174+
vec_tag, = kernel.inames[iname].tags_of_type(VectorizeTag)
175+
176+
if kernel.target.vectorization_fallback == VectorizationFallback.UNROLL:
177+
fallback_codegen_routine = generate_unroll_loop
178+
elif kernel.target.vectorization_fallback == VectorizationFallback.OMP_SIMD:
179+
fallback_codegen_routine = generate_openmp_simd_loop
180+
else:
181+
raise NotImplementedError(kernel.target.vectorization_fallback)
167182

168183
bounds = kernel.get_iname_bounds(iname, constants_only=True)
169184

@@ -177,7 +192,7 @@ def generate_vectorize_loop(codegen_state, sched_index):
177192
warn(kernel, "vec_upper_not_const",
178193
"upper bound for vectorized loop '%s' is not a constant, "
179194
"cannot vectorize--unrolling instead")
180-
return generate_unroll_loop(codegen_state, sched_index)
195+
return fallback_codegen_routine(codegen_state, sched_index)
181196

182197
length = int(pw_aff_to_expr(length_aff))
183198

@@ -192,7 +207,7 @@ def generate_vectorize_loop(codegen_state, sched_index):
192207
warn(kernel, "vec_lower_not_0",
193208
"lower bound for vectorized loop '%s' is not zero, "
194209
"cannot vectorize--unrolling instead")
195-
return generate_unroll_loop(codegen_state, sched_index)
210+
return fallback_codegen_routine(codegen_state, sched_index)
196211

197212
# {{{ 'implement' vectorization bounds
198213

@@ -484,4 +499,17 @@ def generate_sequential_loop_dim_code(codegen_state, sched_index):
484499

485500
# }}}
486501

502+
503+
# {{{ omp simd loop
504+
505+
def generate_openmp_simd_loop(codegen_state, sched_index):
506+
return merge_codegen_results(
507+
codegen_state,
508+
[codegen_state.ast_builder.emit_pragma("omp simd"),
509+
generate_sequential_loop_dim_code(codegen_state,
510+
sched_index)])
511+
512+
# }}}
513+
514+
487515
# vim: foldmethod=marker

0 commit comments

Comments
 (0)