@@ -262,14 +262,15 @@ define amdgpu_kernel void @i64_2d_load_store_subvec_3_i64_offset(ptr %out) {
262262; CHECK-NEXT: [[TMP13:%.*]] = insertelement <6 x i64> [[TMP12]], i64 3, i32 3
263263; CHECK-NEXT: [[TMP14:%.*]] = insertelement <6 x i64> [[TMP13]], i64 4, i32 4
264264; CHECK-NEXT: [[TMP15:%.*]] = insertelement <6 x i64> [[TMP14]], i64 5, i32 5
265- ; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[SEL3]], 3
266- ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <6 x i64> [[TMP15]], i64 [[TMP1]]
265+ ; CHECK-NEXT: [[TMP7:%.*]] = trunc i64 [[SEL3]] to i32
266+ ; CHECK-NEXT: [[TMP16:%.*]] = mul i32 [[TMP7]], 3
267+ ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <6 x i64> [[TMP15]], i32 [[TMP16]]
267268; CHECK-NEXT: [[TMP3:%.*]] = insertelement <3 x i64> poison, i64 [[TMP2]], i64 0
268- ; CHECK-NEXT: [[TMP4 :%.*]] = add i64 [[TMP1 ]], 1
269- ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <6 x i64> [[TMP15]], i64 [[TMP4 ]]
269+ ; CHECK-NEXT: [[TMP17 :%.*]] = add i32 [[TMP16 ]], 1
270+ ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <6 x i64> [[TMP15]], i32 [[TMP17 ]]
270271; CHECK-NEXT: [[TMP6:%.*]] = insertelement <3 x i64> [[TMP3]], i64 [[TMP5]], i64 1
271- ; CHECK-NEXT: [[TMP7 :%.*]] = add i64 [[TMP1 ]], 2
272- ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <6 x i64> [[TMP15]], i64 [[TMP7 ]]
272+ ; CHECK-NEXT: [[TMP18 :%.*]] = add i32 [[TMP16 ]], 2
273+ ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <6 x i64> [[TMP15]], i32 [[TMP18 ]]
273274; CHECK-NEXT: [[TMP9:%.*]] = insertelement <3 x i64> [[TMP6]], i64 [[TMP8]], i64 2
274275; CHECK-NEXT: [[ELEM:%.*]] = extractelement <3 x i64> [[TMP9]], i32 2
275276; CHECK-NEXT: store i64 [[ELEM]], ptr [[OUT]], align 8
@@ -311,15 +312,16 @@ define amdgpu_kernel void @i64_2d_load_store_subvec_3_i64_offset_index(ptr %out)
311312; CHECK-NEXT: [[TMP14:%.*]] = insertelement <6 x i64> [[TMP13]], i64 3, i32 3
312313; CHECK-NEXT: [[TMP15:%.*]] = insertelement <6 x i64> [[TMP14]], i64 4, i32 4
313314; CHECK-NEXT: [[TMP16:%.*]] = insertelement <6 x i64> [[TMP15]], i64 5, i32 5
314- ; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[SEL3]], 3
315- ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[TMP1]], 6
316- ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <6 x i64> [[TMP16]], i64 [[TMP2]]
315+ ; CHECK-NEXT: [[TMP17:%.*]] = trunc i64 [[SEL3]] to i32
316+ ; CHECK-NEXT: [[TMP8:%.*]] = mul i32 [[TMP17]], 3
317+ ; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[TMP8]], 6
318+ ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <6 x i64> [[TMP16]], i32 [[TMP18]]
317319; CHECK-NEXT: [[TMP4:%.*]] = insertelement <3 x i64> poison, i64 [[TMP3]], i64 0
318- ; CHECK-NEXT: [[TMP5 :%.*]] = add i64 [[TMP2 ]], 1
319- ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <6 x i64> [[TMP16]], i64 [[TMP5 ]]
320+ ; CHECK-NEXT: [[TMP19 :%.*]] = add i32 [[TMP18 ]], 1
321+ ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <6 x i64> [[TMP16]], i32 [[TMP19 ]]
320322; CHECK-NEXT: [[TMP7:%.*]] = insertelement <3 x i64> [[TMP4]], i64 [[TMP6]], i64 1
321- ; CHECK-NEXT: [[TMP8 :%.*]] = add i64 [[TMP2 ]], 2
322- ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <6 x i64> [[TMP16]], i64 [[TMP8 ]]
323+ ; CHECK-NEXT: [[TMP20 :%.*]] = add i32 [[TMP18 ]], 2
324+ ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <6 x i64> [[TMP16]], i32 [[TMP20 ]]
323325; CHECK-NEXT: [[TMP10:%.*]] = insertelement <3 x i64> [[TMP7]], i64 [[TMP9]], i64 2
324326; CHECK-NEXT: [[ELEM:%.*]] = extractelement <3 x i64> [[TMP10]], i32 2
325327; CHECK-NEXT: store i64 [[ELEM]], ptr [[OUT]], align 8
0 commit comments