Skip to content

Commit c6c7f72

Browse files
committed
implments c vector extensions target
1 parent d03d39b commit c6c7f72

File tree

2 files changed

+145
-0
lines changed

2 files changed

+145
-0
lines changed

loopy/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,8 @@
154154
from loopy.target.c import (CFamilyTarget, CTarget, ExecutableCTarget,
155155
generate_header, CWithGNULibcTarget,
156156
ExecutableCWithGNULibcTarget)
157+
from loopy.target.c_vector_extensions import (CVectorExtensionsTarget,
158+
ExecutableCVectorExtensionsTarget)
157159
from loopy.target.cuda import CudaTarget
158160
from loopy.target.opencl import OpenCLTarget
159161
from loopy.target.pyopencl import PyOpenCLTarget
@@ -301,6 +303,7 @@
301303
"TargetBase",
302304
"CFamilyTarget", "CTarget", "ExecutableCTarget", "generate_header",
303305
"CWithGNULibcTarget", "ExecutableCWithGNULibcTarget",
306+
"CVectorExtensionsTarget", "ExecutableCVectorExtensionsTarget",
304307
"CudaTarget", "OpenCLTarget",
305308
"PyOpenCLTarget", "ISPCTarget",
306309
"NumbaTarget", "NumbaCudaTarget",
Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
import numpy as np
2+
from pytools import memoize_method
3+
from loopy.target.c import CTarget, CWithGNULibcASTBuilder, ExecutableCTarget
4+
from loopy.types import NumpyType
5+
6+
7+
# {{{ vector types
8+
9+
class vec: # noqa
10+
pass
11+
12+
13+
def _create_vector_types():
14+
field_names = ["x", "y", "z", "w"]
15+
16+
vec.types = {}
17+
vec.names_and_dtypes = []
18+
vec.type_to_scalar_and_count = {}
19+
20+
counts = [2, 3, 4, 8, 16]
21+
22+
for base_name, base_type in [
23+
("char", np.int8),
24+
("unsigned char", np.uint8),
25+
("short", np.int16),
26+
("unsigned short", np.uint16),
27+
("int", np.int32),
28+
("unsigned int", np.uint32),
29+
("long", np.int64),
30+
("unsigned long", np.uint64),
31+
("float", np.float32),
32+
("double", np.float64),
33+
]:
34+
for count in counts:
35+
byte_count = count*np.dtype(base_type).itemsize
36+
name = "%s __attribute__((vector_size(%d)))" % (base_name,
37+
byte_count)
38+
39+
titles = field_names[:count]
40+
41+
names = [f"s{i}" for i in range(count)]
42+
43+
if len(titles) < len(names):
44+
titles.extend((len(names)-len(titles))*[None])
45+
46+
try:
47+
dtype = np.dtype(dict(
48+
names=names,
49+
formats=[base_type]*count,
50+
titles=titles))
51+
except NotImplementedError:
52+
try:
53+
dtype = np.dtype([((n, title), base_type)
54+
for (n, title) in zip(names, titles)])
55+
except TypeError:
56+
dtype = np.dtype([(n, base_type) for (n, title)
57+
in zip(names, titles)])
58+
59+
setattr(vec, name, dtype)
60+
61+
vec.names_and_dtypes.append((name, dtype))
62+
63+
vec.types[np.dtype(base_type), count] = dtype
64+
vec.type_to_scalar_and_count[dtype] = np.dtype(base_type), count
65+
66+
67+
_create_vector_types()
68+
69+
70+
def _register_vector_types(dtype_registry):
71+
for name, dtype in vec.names_and_dtypes:
72+
dtype_registry.get_or_register_dtype(name, dtype)
73+
74+
# }}}
75+
76+
77+
# {{{ target
78+
79+
class CVectorExtensionsTarget(CTarget):
80+
"""A specialized C-target that represents vectorization through GCC/Clang
81+
language extensions.
82+
"""
83+
84+
def get_device_ast_builder(self):
85+
return CVectorExtensionsASTBuilder(self)
86+
87+
@memoize_method
88+
def get_dtype_registry(self):
89+
from loopy.target.c.compyte.dtypes import (
90+
DTypeRegistry, fill_registry_with_c99_stdint_types,
91+
fill_registry_with_c99_complex_types)
92+
from loopy.target.c import DTypeRegistryWrapper
93+
94+
result = DTypeRegistry()
95+
fill_registry_with_c99_stdint_types(result)
96+
fill_registry_with_c99_complex_types(result)
97+
98+
_register_vector_types(result)
99+
return DTypeRegistryWrapper(result)
100+
101+
def is_vector_dtype(self, dtype):
102+
return (isinstance(dtype, NumpyType)
103+
and dtype.numpy_dtype in list(vec.types.values()))
104+
105+
def vector_dtype(self, base, count):
106+
return NumpyType(
107+
vec.types[base.numpy_dtype, count],
108+
target=self)
109+
110+
@property
111+
def allows_non_constant_indexing_for_vec_types(self):
112+
return True
113+
114+
@property
115+
def broadcasts_scalar_assignment_to_vec_types(self):
116+
return False
117+
118+
119+
class ExecutableCVectorExtensionsTarget(CVectorExtensionsTarget,
120+
ExecutableCTarget):
121+
def __init__(self, compiler=None, fortran_abi=False):
122+
ExecutableCTarget.__init__(self, compiler=compiler, fortran_abi=fortran_abi)
123+
124+
def get_kernel_executor_cache_key(self, *args, **kwargs):
125+
return ExecutableCTarget.get_kernel_executor_cache_key(self, *args, **kwargs)
126+
127+
def get_kernel_executor(self, t_unit, *args, **kwargs):
128+
return ExecutableCTarget.get_kernel_executor(self, t_unit, *args, **kwargs)
129+
130+
def get_host_ast_builder(self):
131+
return ExecutableCTarget.get_host_ast_builder(self)
132+
133+
# }}}
134+
135+
136+
# {{{ AST builder
137+
138+
class CVectorExtensionsASTBuilder(CWithGNULibcASTBuilder):
139+
def add_vector_access(self, access_expr, index):
140+
return access_expr[index]
141+
142+
# }}}

0 commit comments

Comments
 (0)