Skip to content

Commit ca970df

Browse files
committed
implement OpenMPSIMDTag, vectorization fallback mechanisms
1 parent ff1e0fc commit ca970df

File tree

5 files changed

+109
-16
lines changed

5 files changed

+109
-16
lines changed

loopy/__init__.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,8 @@
4343
AddressSpace,
4444
TemporaryVariable,
4545
SubstitutionRule,
46-
CallMangleInfo)
46+
CallMangleInfo,
47+
OpenMPSIMDTag, VectorizeTag)
4748
from loopy.kernel.function_interface import (
4849
CallableKernel, ScalarCallable)
4950
from loopy.translation_unit import (
@@ -190,7 +191,7 @@
190191
"AddressSpace",
191192
"TemporaryVariable",
192193
"SubstitutionRule",
193-
"CallMangleInfo",
194+
"CallMangleInfo", "OpenMPSIMDTag", "VectorizeTag",
194195

195196
"make_kernel", "UniqueName", "make_function",
196197

loopy/codegen/__init__.py

Lines changed: 37 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -395,21 +395,48 @@ def try_vectorized(self, what, func):
395395
return self.unvectorize(func)
396396

397397
def unvectorize(self, func):
398+
from loopy.kernel.data import VectorizeTag, UnrollTag, OpenMPSIMDTag
399+
from loopy.codegen.result import (merge_codegen_results,
400+
CodeGenerationResult)
398401
vinf = self.vectorization_info
402+
vec_tag, = self.kernel.inames[vinf.iname].tags_of_type(VectorizeTag)
399403
result = []
400-
novec_self = self.copy(vectorization_info=False)
401404

402-
for i in range(vinf.length):
403-
idx_aff = isl.Aff.zero_on_domain(vinf.space.params()) + i
404-
new_codegen_state = novec_self.fix(vinf.iname, idx_aff)
405-
generated = func(new_codegen_state)
406-
407-
if isinstance(generated, list):
408-
result.extend(generated)
405+
if isinstance(vec_tag.fallback_impl_tag, UnrollTag):
406+
novec_self = self.copy(vectorization_info=False)
407+
408+
for i in range(vinf.length):
409+
idx_aff = isl.Aff.zero_on_domain(vinf.space.params()) + i
410+
new_codegen_state = novec_self.fix(vinf.iname, idx_aff)
411+
generated = func(new_codegen_state)
412+
413+
if isinstance(generated, list):
414+
result.extend(generated)
415+
else:
416+
result.append(generated)
417+
elif isinstance(vec_tag.fallback_impl_tag, OpenMPSIMDTag):
418+
novec_self = self.copy(vectorization_info=False)
419+
astb = self.ast_builder
420+
inner = func(novec_self)
421+
if isinstance(inner, list):
422+
inner = merge_codegen_results(novec_self, inner)
423+
assert isinstance(inner, CodeGenerationResult)
424+
if isinstance(inner.current_ast(novec_self),
425+
astb.ast_comment_class):
426+
# loop body is a comment => do not emit the loop
427+
loop_cgr = inner
409428
else:
410-
result.append(generated)
429+
result.append(astb.emit_pragma("omp simd"))
430+
loop_cgr = inner.with_new_ast(
431+
novec_self,
432+
astb.emit_sequential_loop(
433+
novec_self, vinf.iname, self.kernel.index_dtype,
434+
0, vinf.length-1, inner.current_ast(novec_self)))
435+
result.append(loop_cgr)
436+
elif vec_tag.fallback_impl_tag is None:
437+
raise RuntimeError("Could not vectorize all statements"
438+
f" in name {vinf.iname}")
411439

412-
from loopy.codegen.result import merge_codegen_results
413440
return merge_codegen_results(self, result)
414441

415442
@property

loopy/codegen/control.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,7 @@ def generate_code_for_sched_index(codegen_state, sched_index):
108108
elif isinstance(sched_item, EnterLoop):
109109
from loopy.kernel.data import (UnrolledIlpTag, UnrollTag,
110110
ForceSequentialTag, LoopedIlpTag, VectorizeTag,
111+
OpenMPSIMDTag,
111112
InameImplementationTag,
112113
InOrderSequentialSequentialTag, filter_iname_tags_by_type)
113114

@@ -117,12 +118,15 @@ def generate_code_for_sched_index(codegen_state, sched_index):
117118
from loopy.codegen.loop import (
118119
generate_unroll_loop,
119120
generate_vectorize_loop,
120-
generate_sequential_loop_dim_code)
121+
generate_sequential_loop_dim_code,
122+
generate_openmp_simd_loop)
121123

122124
if filter_iname_tags_by_type(tags, (UnrollTag, UnrolledIlpTag)):
123125
func = generate_unroll_loop
124126
elif filter_iname_tags_by_type(tags, VectorizeTag):
125127
func = generate_vectorize_loop
128+
elif filter_iname_tags_by_type(tags, OpenMPSIMDTag):
129+
func = generate_openmp_simd_loop
126130
elif not tags or filter_iname_tags_by_type(tags, (LoopedIlpTag,
127131
ForceSequentialTag, InOrderSequentialSequentialTag)):
128132
func = generate_sequential_loop_dim_code

loopy/codegen/loop.py

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -160,10 +160,27 @@ def generate_unroll_loop(codegen_state, sched_index):
160160

161161
# {{{ vectorized loops
162162

163+
def raise_for_unvectorizable_loop(codegen_state, sched_index):
164+
kernel = codegen_state.kernel
165+
raise RuntimeError(f"Cannot vectorize {kernel.schedule[sched_index]}")
166+
167+
163168
def generate_vectorize_loop(codegen_state, sched_index):
169+
from loopy.kernel.data import VectorizeTag, UnrollTag, OpenMPSIMDTag
164170
kernel = codegen_state.kernel
165171

166172
iname = kernel.linearization[sched_index].iname
173+
vec_tag, = kernel.inames[iname].tags_of_type(VectorizeTag)
174+
fallback_impl_tag = vec_tag.fallback_impl_tag
175+
176+
if isinstance(fallback_impl_tag, UnrollTag):
177+
fallback_codegen_routine = generate_unroll_loop
178+
elif isinstance(fallback_impl_tag, OpenMPSIMDTag):
179+
fallback_codegen_routine = generate_openmp_simd_loop
180+
elif fallback_impl_tag is None:
181+
fallback_codegen_routine = raise_for_unvectorizable_loop
182+
else:
183+
raise NotImplementedError(fallback_impl_tag)
167184

168185
bounds = kernel.get_iname_bounds(iname, constants_only=True)
169186

@@ -177,7 +194,7 @@ def generate_vectorize_loop(codegen_state, sched_index):
177194
warn(kernel, "vec_upper_not_const",
178195
"upper bound for vectorized loop '%s' is not a constant, "
179196
"cannot vectorize--unrolling instead")
180-
return generate_unroll_loop(codegen_state, sched_index)
197+
return fallback_codegen_routine(codegen_state, sched_index)
181198

182199
length = int(pw_aff_to_expr(length_aff))
183200

@@ -192,7 +209,7 @@ def generate_vectorize_loop(codegen_state, sched_index):
192209
warn(kernel, "vec_lower_not_0",
193210
"lower bound for vectorized loop '%s' is not zero, "
194211
"cannot vectorize--unrolling instead")
195-
return generate_unroll_loop(codegen_state, sched_index)
212+
return fallback_codegen_routine(codegen_state, sched_index)
196213

197214
# {{{ 'implement' vectorization bounds
198215

@@ -484,4 +501,17 @@ def generate_sequential_loop_dim_code(codegen_state, sched_index):
484501

485502
# }}}
486503

504+
505+
# {{{ omp simd loop
506+
507+
def generate_openmp_simd_loop(codegen_state, sched_index):
508+
return merge_codegen_results(
509+
codegen_state,
510+
[codegen_state.ast_builder.emit_pragma("omp simd"),
511+
generate_sequential_loop_dim_code(codegen_state,
512+
sched_index)])
513+
514+
# }}}
515+
516+
487517
# vim: foldmethod=marker

loopy/kernel/data.py

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,29 @@ def __str__(self):
203203
# }}}
204204

205205

206+
class _NotProvided:
207+
pass
208+
209+
206210
class VectorizeTag(UniqueInameTag, HardwareConcurrentTag):
211+
"""
212+
.. attribute:: fallback_impl_tag
213+
214+
If the loop contains instructions that are not vectorizable, the code
215+
generator will implement the loop as directed by `fallback_impl_tag`.
216+
If *None*, then a :class:`RuntimeError` would be raised while
217+
generating code for an unvectorizable instruction within the loop.
218+
"""
219+
def __init__(self, fallback_impl_tag=_NotProvided):
220+
if fallback_impl_tag is _NotProvided:
221+
from warnings import warn
222+
warn("`fallback_impl_tag` not provided to VectorizeTag."
223+
" This will be an error from 2023. To keep the current"
224+
" behavior, instantiate as `VectorizeTag(UnrollTag())`",
225+
DeprecationWarning, stacklevel=2)
226+
fallback_impl_tag = UnrollTag()
227+
super().__init__(fallback_impl_tag=fallback_impl_tag)
228+
207229
def __str__(self):
208230
return "vec"
209231

@@ -223,6 +245,15 @@ def __str__(self):
223245
return "ord"
224246

225247

248+
class OpenMPSIMDTag(InameImplementationTag):
249+
"""
250+
Directs the code generator to emit code with ``#pragma omp simd``
251+
directive atop the loop.
252+
"""
253+
def __str__(self):
254+
return "omp.simd"
255+
256+
226257
def parse_tag(tag):
227258
from pytools.tag import Tag as TagBase
228259
if tag is None:
@@ -241,7 +272,7 @@ def parse_tag(tag):
241272
elif tag in ["unr"]:
242273
return UnrollTag()
243274
elif tag in ["vec"]:
244-
return VectorizeTag()
275+
return VectorizeTag(UnrollTag())
245276
elif tag in ["ilp", "ilp.unr"]:
246277
return UnrolledIlpTag()
247278
elif tag == "ilp.seq":

0 commit comments

Comments
 (0)