@@ -149,6 +149,12 @@ void CodeGen_GPU_C::visit(const Shuffle *op) {
149
149
internal_assert (op->vectors [0 ].type () == op->vectors [i].type ());
150
150
}
151
151
internal_assert (op->type .lanes () == (int )op->indices .size ());
152
+ // We need to construct the mapping between shuffled-index,
153
+ // and source-vector-index and source-element-index-within-the-vector.
154
+ // To start, we'll figure out what the first shuffle-index is per
155
+ // source-vector. Also let's compute the total number of
156
+ // source-elements the to be able to assert that all of the
157
+ // shuffle-indices are within range.
152
158
std::vector<int > vector_first_index;
153
159
int max_index = 0 ;
154
160
for (const Expr &v : op->vectors ) {
@@ -182,15 +188,21 @@ void CodeGen_GPU_C::visit(const Shuffle *op) {
182
188
for (int i : op->indices ) {
183
189
size_t vector_idx;
184
190
int lane_idx = -1 ;
191
+ // Find in which source vector this shuffle-index "i" falls:
185
192
for (vector_idx = 0 ; vector_idx < op->vectors .size (); ++vector_idx) {
186
- if (i >= vector_first_index[vector_idx] && i < vector_first_index[vector_idx] + op->vectors [vector_idx].type ().lanes ()) {
187
- lane_idx = i - vector_first_index[vector_idx];
193
+ const int first_index = vector_first_index[vector_idx];
194
+ if (i >= first_index &&
195
+ i < first_index + op->vectors [vector_idx].type ().lanes ()) {
196
+ lane_idx = i - first_index;
188
197
break ;
189
198
}
190
199
}
191
200
internal_assert (lane_idx != -1 ) << " Shuffle lane index not found: i=" << i;
192
201
internal_assert (vector_idx < op->vectors .size ()) << " Shuffle vector index not found: i=" << i << " , lane=" << lane_idx;
202
+ // Print the vector in which we will index.
193
203
rhs << vecs[vector_idx];
204
+ // In case we are dealing with an actual vector instead of scalar,
205
+ // print out the required indexing syntax.
194
206
if (op->vectors [vector_idx].type ().lanes () > 1 ) {
195
207
switch (vector_declaration_style) {
196
208
case VectorDeclarationStyle::OpenCLSyntax:
@@ -202,6 +214,8 @@ void CodeGen_GPU_C::visit(const Shuffle *op) {
202
214
break ;
203
215
}
204
216
}
217
+
218
+ // Elements of a vector are comma separated.
205
219
if (elem_num < (int )(op->indices .size () - 1 )) {
206
220
rhs << " , " ;
207
221
}
0 commit comments