Skip to content

Commit d056656

Browse files
committed
implments c vector extensions target
1 parent a8f10a9 commit d056656

File tree

3 files changed

+185
-1
lines changed

3 files changed

+185
-1
lines changed

loopy/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,8 @@
155155
from loopy.target.c import (CFamilyTarget, CTarget, ExecutableCTarget,
156156
generate_header, CWithGNULibcTarget,
157157
ExecutableCWithGNULibcTarget)
158+
from loopy.target.c_vector_extensions import (CVectorExtensionsTarget,
159+
ExecutableCVectorExtensionsTarget)
158160
from loopy.target.cuda import CudaTarget
159161
from loopy.target.opencl import OpenCLTarget
160162
from loopy.target.pyopencl import PyOpenCLTarget
@@ -302,6 +304,7 @@
302304
"TargetBase", "VectorizationFallback",
303305
"CFamilyTarget", "CTarget", "ExecutableCTarget", "generate_header",
304306
"CWithGNULibcTarget", "ExecutableCWithGNULibcTarget",
307+
"CVectorExtensionsTarget", "ExecutableCVectorExtensionsTarget",
305308
"CudaTarget", "OpenCLTarget",
306309
"PyOpenCLTarget", "ISPCTarget",
307310
"ASTBuilderBase",

loopy/target/c/codegen/expression.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,8 @@ def make_var(name):
225225

226226
if self.kernel.target.allows_non_constant_indexing_for_vec_types:
227227
access_info = get_access_info(self.kernel, ary, index_tuple,
228-
lambda expr: substitute(expr, self.codegen_state.var_subst_map),
228+
lambda expr: substitute(expr,
229+
dict(self.codegen_state.var_subst_map)),
229230
self.codegen_state.vectorization_info)
230231
else:
231232
access_info = get_access_info(self.kernel, ary, index_tuple,
Lines changed: 180 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,180 @@
1+
import numpy as np
2+
from cgen import Declarator
3+
from pytools import memoize_method
4+
from loopy.target import VectorizationFallback
5+
from loopy.target.c import CTarget, CWithGNULibcASTBuilder, ExecutableCTarget
6+
from loopy.types import NumpyType
7+
from loopy.kernel.array import (ArrayBase, FixedStrideArrayDimTag,
8+
VectorArrayDimTag)
9+
10+
11+
# {{{ vector types
12+
13+
class vec: # noqa
14+
pass
15+
16+
17+
def _create_vector_types():
18+
field_names = ["x", "y", "z", "w"]
19+
20+
vec.types = {}
21+
vec.names_and_dtypes = []
22+
vec.type_to_scalar_and_count = {}
23+
24+
counts = [2, 3, 4, 8, 16]
25+
26+
for base_name, base_type in [
27+
("char", np.int8),
28+
("unsigned char", np.uint8),
29+
("short", np.int16),
30+
("unsigned short", np.uint16),
31+
("int", np.int32),
32+
("unsigned int", np.uint32),
33+
("long", np.int64),
34+
("unsigned long", np.uint64),
35+
("float", np.float32),
36+
("double", np.float64),
37+
]:
38+
for count in counts:
39+
byte_count = count*np.dtype(base_type).itemsize
40+
name = "%s __attribute__((vector_size(%d)))" % (base_name,
41+
byte_count)
42+
43+
titles = field_names[:count]
44+
45+
names = [f"s{i}" for i in range(count)]
46+
47+
if len(titles) < len(names):
48+
titles.extend((len(names)-len(titles))*[None])
49+
50+
try:
51+
dtype = np.dtype(dict(
52+
names=names,
53+
formats=[base_type]*count,
54+
titles=titles))
55+
except NotImplementedError:
56+
try:
57+
dtype = np.dtype([((n, title), base_type)
58+
for (n, title) in zip(names, titles)])
59+
except TypeError:
60+
dtype = np.dtype([(n, base_type) for (n, title)
61+
in zip(names, titles)])
62+
63+
setattr(vec, name, dtype)
64+
65+
vec.names_and_dtypes.append((name, dtype))
66+
67+
vec.types[np.dtype(base_type), count] = dtype
68+
vec.type_to_scalar_and_count[dtype] = np.dtype(base_type), count
69+
70+
71+
_create_vector_types()
72+
73+
74+
def _register_vector_types(dtype_registry):
75+
for name, dtype in vec.names_and_dtypes:
76+
dtype_registry.get_or_register_dtype(name, dtype)
77+
78+
# }}}
79+
80+
81+
# {{{ target
82+
83+
class CVectorExtensionsTarget(CTarget):
84+
"""A specialized C-target that represents vectorization through GCC/Clang
85+
language extensions.
86+
"""
87+
def __init__(self,
88+
vec_fallback: VectorizationFallback = VectorizationFallback.UNROLL,
89+
fortran_abi=False):
90+
super().__init__(fortran_abi=fortran_abi)
91+
self.vec_fallback = vec_fallback
92+
93+
def get_host_ast_builder(self):
94+
return CVectorExtensionsASTBuilder(self)
95+
96+
def get_device_ast_builder(self):
97+
return CVectorExtensionsASTBuilder(self)
98+
99+
@memoize_method
100+
def get_dtype_registry(self):
101+
from loopy.target.c.compyte.dtypes import (
102+
DTypeRegistry, fill_registry_with_c99_stdint_types,
103+
fill_registry_with_c99_complex_types)
104+
from loopy.target.c import DTypeRegistryWrapper
105+
106+
result = DTypeRegistry()
107+
fill_registry_with_c99_stdint_types(result)
108+
fill_registry_with_c99_complex_types(result)
109+
110+
_register_vector_types(result)
111+
return DTypeRegistryWrapper(result)
112+
113+
def is_vector_dtype(self, dtype):
114+
return (isinstance(dtype, NumpyType)
115+
and dtype.numpy_dtype in list(vec.types.values()))
116+
117+
def vector_dtype(self, base, count):
118+
return NumpyType(
119+
vec.types[base.numpy_dtype, count],
120+
target=self)
121+
122+
@property
123+
def allows_non_constant_indexing_for_vec_types(self):
124+
return True
125+
126+
@property
127+
def broadcasts_scalar_assignment_to_vec_types(self):
128+
return False
129+
130+
@property
131+
def vectorization_fallback(self):
132+
return self.vec_fallback
133+
134+
135+
class ExecutableCVectorExtensionsTarget(CVectorExtensionsTarget,
136+
ExecutableCTarget):
137+
def __init__(self,
138+
vec_fallback: VectorizationFallback = VectorizationFallback.UNROLL,
139+
compiler=None,
140+
fortran_abi=False):
141+
ExecutableCTarget.__init__(self, compiler=compiler, fortran_abi=fortran_abi)
142+
self.vec_fallback = vec_fallback
143+
144+
def get_kernel_executor_cache_key(self, *args, **kwargs):
145+
return ExecutableCTarget.get_kernel_executor_cache_key(self, *args, **kwargs)
146+
147+
def get_kernel_executor(self, t_unit, *args, **kwargs):
148+
return ExecutableCTarget.get_kernel_executor(self, t_unit, *args, **kwargs)
149+
150+
# }}}
151+
152+
153+
# {{{ AST builder
154+
155+
class CVectorExtensionsASTBuilder(CWithGNULibcASTBuilder):
156+
def add_vector_access(self, access_expr, index):
157+
return access_expr[index]
158+
159+
def get_array_base_declarator(self, ary: ArrayBase) -> Declarator:
160+
from loopy.target.c import POD
161+
dtype = ary.dtype
162+
vec_size = ary.vector_size(self.target)
163+
if vec_size > 1:
164+
dtype = self.target.vector_dtype(dtype, vec_size)
165+
166+
if ary.dim_tags:
167+
for dim_tag in ary.dim_tags:
168+
if isinstance(dim_tag, (FixedStrideArrayDimTag,
169+
VectorArrayDimTag)):
170+
# we're OK with that
171+
pass
172+
else:
173+
raise NotImplementedError(
174+
f"{type(self).__name__} does not understand axis tag "
175+
f"'{type(dim_tag)}.")
176+
177+
arg_decl = POD(self, dtype, ary.name)
178+
return arg_decl
179+
180+
# }}}

0 commit comments

Comments
 (0)