Skip to content

Commit 91ea128

Browse files
agrabezhigcbot
authored andcommitted
Fix the JointMatrixFuncsResolution alloca construction
Construct allocas by the JointMatrixFuncsResolution pass in the entry node of the function.
1 parent 0db1db3 commit 91ea128

File tree

5 files changed

+48
-21
lines changed

5 files changed

+48
-21
lines changed

IGC/Compiler/Optimizer/OpenCLPasses/JointMatrixFuncsResolutionPass/JointMatrixFuncsResolutionPass.cpp

+15-3
Original file line numberDiff line numberDiff line change
@@ -962,8 +962,12 @@ Instruction *JointMatrixFuncsResolutionPass::ResolveLoad(CallInst *CI)
962962

963963
InstsToErase.insert(CI);
964964

965-
IRBuilder builder(CI);
965+
// Create alloca in the entry node of the function
966+
IRBuilder<> builder(&*CI->getFunction()->getEntryBlock().getFirstInsertionPt());
967+
builder.SetCurrentDebugLocation(CI->getDebugLoc());
966968
Value *sliceArray = builder.CreateAlloca(matTy, ADDRESS_SPACE_PRIVATE);
969+
970+
builder.SetInsertPoint(CI);
967971
Value *dst = builder.CreateBitCast(sliceArray, arrayTy);
968972

969973
std::vector<Value *> Args = { dst, ptrVal, strideVal };
@@ -992,7 +996,6 @@ Instruction *JointMatrixFuncsResolutionPass::ResolveStore(CallInst *CI)
992996
Type *arrayTy = Type::getInt8PtrTy(ctx, ADDRESS_SPACE_PRIVATE);
993997

994998
Module *M = CI->getParent()->getModule();
995-
IRBuilder builder(CI);
996999

9971000
Value *matVal = Resolve(matrixVal);
9981001

@@ -1008,7 +1011,12 @@ Instruction *JointMatrixFuncsResolutionPass::ResolveStore(CallInst *CI)
10081011

10091012
InstsToErase.insert(CI);
10101013

1014+
// Create alloca in the entry node of the function
1015+
IRBuilder<> builder(&*CI->getFunction()->getEntryBlock().getFirstInsertionPt());
1016+
builder.SetCurrentDebugLocation(CI->getDebugLoc());
10111017
Value *sliceArray = builder.CreateAlloca(matVal->getType(), ADDRESS_SPACE_PRIVATE);
1018+
1019+
builder.SetInsertPoint(CI);
10121020
builder.CreateStore(matVal, sliceArray);
10131021
Value *src = builder.CreateBitCast(sliceArray, arrayTy);
10141022

@@ -1132,13 +1140,17 @@ Instruction *JointMatrixFuncsResolutionPass::ResolveMad(CallInst *CI, unsigned O
11321140
Value *bMat = Resolve(bMatVal);
11331141
Value *cMat = Resolve(cMatVal);
11341142

1135-
IRBuilder builder(CI);
1143+
// Create alloca in the entry node of the function
1144+
IRBuilder<> builder(&*CI->getFunction()->getEntryBlock().getFirstInsertionPt());
1145+
builder.SetCurrentDebugLocation(CI->getDebugLoc());
11361146

11371147
Value *sliceA = builder.CreateAlloca(aMat->getType(), ADDRESS_SPACE_PRIVATE);
11381148
Value *sliceB = builder.CreateAlloca(bMat->getType(), ADDRESS_SPACE_PRIVATE);
11391149
Value *sliceC = builder.CreateAlloca(cMat->getType(), ADDRESS_SPACE_PRIVATE);
11401150
Value *sliceD = builder.CreateAlloca(cMat->getType(), ADDRESS_SPACE_PRIVATE);
11411151

1152+
builder.SetInsertPoint(CI);
1153+
11421154
builder.CreateStore(aMat, sliceA);
11431155
builder.CreateStore(bMat, sliceB);
11441156
builder.CreateStore(cMat, sliceC);

IGC/Compiler/tests/JointMatrixFuncsResolutionPass/ErrorsReporting/validate-load-store-error.ll

+4-1
Original file line numberDiff line numberDiff line change
@@ -12,15 +12,18 @@
1212
; ------------------------------------------------
1313

1414

15+
; Debug-info related check
16+
; CHECK: CheckModuleDebugify: PASS
17+
1518
%intel.joint_matrix_packedA_8x16_i32_ = type opaque
1619

1720
define spir_kernel void @load_store_legacy_error(i8* %a, i8* %dst) {
1821
; CHECK-LABEL: define spir_kernel void @load_store_legacy_error(
22+
; CHECK: [[TMP4:%.*]] = alloca <16 x i32>
1923
; CHECK: [[PTR:%.*]] = alloca <16 x i32>
2024
; CHECK: [[MATPTR:%.*]] = bitcast <16 x i32>* [[PTR]] to i8*
2125
; CHECK: call void @__builtin_spriv_OpJointMatrixLoadINTEL_PackedA_RowMajor_8x16_i32_16_generic_v8i8_pi32_i32(i8* [[MATPTR]], i8* %a, i32 16), !dbg [[DBG2:![0-9]*]]
2226
; CHECK: [[MATRIX:%.*]] = load <16 x i32>, <16 x i32>* [[PTR]]
23-
; CHECK: [[TMP4:%.*]] = alloca <16 x i32>
2427
; CHECK: store <16 x i32> [[MATRIX]], <16 x i32>* [[TMP4]]
2528
; CHECK: [[TMP5:%.*]] = bitcast <16 x i32>* [[TMP4]] to i8*
2629
; CHECK: call void @__builtin_spriv_OpJointMatrixStoreINTEL_PackedA_RowMajor_8x16_i32_16_generic_pi64_v8i8(i8* %dst, i8* [[TMP5]], i32 8), !dbg [[DBG3:![0-9]*]]

IGC/Compiler/tests/JointMatrixFuncsResolutionPass/acc_fill_store.ll

+2-2
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
; CHECK-LABEL: define spir_kernel void @test_fill_store(
1818
; CHECK-SAME: float addrspace(1)* [[DST0:%.*]], float addrspace(1)* [[DST1:%.*]], float addrspace(1)* [[DST2:%.*]]) {
1919
define spir_kernel void @test_fill_store(float addrspace(1)* %dst0, float addrspace(1)* %dst1, float addrspace(1)* %dst2){
20+
; CHECK-NEXT: [[TMP5:%.*]] = alloca [2 x <32 x i64>]
21+
; CHECK-NEXT: [[TMP3:%.*]] = alloca <8 x float>
2022
; CHECK-NEXT: [[TMP1:%.*]] = alloca <16 x float>
2123
; CHECK-NEXT: store <16 x float> <float 5.000000e+00, float 5.000000e+00, float 5.000000e+00, float 5.000000e+00, float 5.000000e+00, float 5.000000e+00, float 5.000000e+00, float 5.000000e+00, float 5.000000e+00, float 5.000000e+00, float 5.000000e+00, float 5.000000e+00, float 5.000000e+00, float 5.000000e+00, float 5.000000e+00, float 5.000000e+00>, <16 x float>* [[TMP1]]
2224
%1 = call spir_func %spirv.JointMatrixINTEL._float_16_16_3_3_2 addrspace(1)* @_Z26__spirv_CompositeConstructf(float 5.000000e+00)
@@ -25,15 +27,13 @@ define spir_kernel void @test_fill_store(float addrspace(1)* %dst0, float addrsp
2527
; CHECK-NEXT: call void @__builtin_spriv_OpJointMatrixStoreINTEL_Accumulator_RowMajor_SG16_16x16_i32_16_global_pi64_v8i8(float addrspace(1)* [[DST0]], i8* [[TMP2]], i64 16)
2628
call spir_func void @_Z29__spirv_JointMatrixStoreINTELPU3AS1fPU3AS143__spirv_JointMatrixINTEL__float_16_16_3_3_2liii(float addrspace(1)* %dst0, %spirv.JointMatrixINTEL._float_16_16_3_3_2 addrspace(1)* %1, i64 16, i32 0, i32 3, i32 0)
2729

28-
; CHECK-NEXT: [[TMP3:%.*]] = alloca <8 x float>
2930
; CHECK-NEXT: store <8 x float> <float 5.000000e+00, float 5.000000e+00, float 5.000000e+00, float 5.000000e+00, float 5.000000e+00, float 5.000000e+00, float 5.000000e+00, float 5.000000e+00>, <8 x float>* [[TMP3]]
3031
%2 = call spir_func %spirv.JointMatrixINTEL._float_8_16_3_3_2 addrspace(1)* @_Z26__spirv_CompositeConstructf.1(float 5.000000e+00)
3132

3233
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x float>* [[TMP3]] to i8*
3334
; CHECK-NEXT: call void @__builtin_spriv_OpJointMatrixStoreINTEL_Accumulator_RowMajor_SG16_8x16_i32_8_global_pi64_v8i8(float addrspace(1)* [[DST1]], i8* [[TMP4]], i64 16)
3435
call spir_func void @_Z29__spirv_JointMatrixStoreINTELPU3AS1fPU3AS142__spirv_JointMatrixINTEL__float_8_16_3_3_2liii(float addrspace(1)* %dst1, %spirv.JointMatrixINTEL._float_8_16_3_3_2 addrspace(1)* %2, i64 16, i32 0, i32 3, i32 0)
3536

36-
; CHECK-NEXT: [[TMP5:%.*]] = alloca [2 x <32 x i64>]
3737
; CHECK-NEXT: store [2 x <32 x i64>] [<32 x i64> <i64 4656722015785320448, i64 4656722015785320448, i64 4656722015785320448, i64 4656722015785320448, i64 4656722015785320448, i64 4656722015785320448, i64 4656722015785320448, i64 4656722015785320448, i64 4656722015785320448, i64 4656722015785320448, i64 4656722015785320448, i64 4656722015785320448, i64 4656722015785320448, i64 4656722015785320448, i64 4656722015785320448, i64 4656722015785320448, i64 4656722015785320448, i64 4656722015785320448, i64 4656722015785320448, i64 4656722015785320448, i64 4656722015785320448, i64 4656722015785320448, i64 4656722015785320448, i64 4656722015785320448, i64 4656722015785320448, i64 4656722015785320448, i64 4656722015785320448, i64 4656722015785320448, i64 4656722015785320448, i64 4656722015785320448, i64 4656722015785320448, i64 4656722015785320448>, <32 x i64> <i64 4656722015785320448, i64 4656722015785320448, i64 4656722015785320448, i64 4656722015785320448, i64 4656722015785320448, i64 4656722015785320448, i64 4656722015785320448, i64 4656722015785320448, i64 4656722015785320448, i64 4656722015785320448, i64 4656722015785320448, i64 4656722015785320448, i64 4656722015785320448, i64 4656722015785320448, i64 4656722015785320448, i64 4656722015785320448, i64 4656722015785320448, i64 4656722015785320448, i64 4656722015785320448, i64 4656722015785320448, i64 4656722015785320448, i64 4656722015785320448, i64 4656722015785320448, i64 4656722015785320448, i64 4656722015785320448, i64 4656722015785320448, i64 4656722015785320448, i64 4656722015785320448, i64 4656722015785320448, i64 4656722015785320448, i64 4656722015785320448, i64 4656722015785320448>], [2 x <32 x i64>]* [[TMP5]]
3838
%3 = call spir_func %spirv.JointMatrixINTEL._float_32_64_3_3_2 addrspace(1)* @_Z26__spirv_CompositeConstructf.2(float 5.000000e+00)
3939

IGC/Compiler/tests/JointMatrixFuncsResolutionPass/address-spaces.ll

+24-12
Original file line numberDiff line numberDiff line change
@@ -35,16 +35,18 @@ define spir_kernel void @test_local(i8 addrspace(3)* %src, i8 addrspace(3)* %ds
3535
; CHECK-LABEL: define void @load_store_generic(
3636
define void @load_store_generic(i8* %src, i8* %dst) {
3737

38-
; Matrix load sequence:
38+
; Allocas:
39+
; CHECK: [[TMP4:%.*]] = alloca <4 x i32>
3940
; CHECK: [[PTR:%.*]] = alloca <4 x i32>
41+
42+
; Matrix load sequence:
4043
; CHECK: [[MATPTR:%.*]] = bitcast <4 x i32>* [[PTR]] to i8*
4144
; CHECK: call void @__builtin_spriv_OpJointMatrixLoadINTEL_PackedA_RowMajor_SG16_8x8_i32_4_generic_v8i8_pi32_i32(i8* [[MATPTR]], i8* %src, i32 8)
4245
; CHECK: [[MATRIX:%.*]] = load <4 x i32>, <4 x i32>* [[PTR]]
4346

4447
%1 = call spir_func %intel.joint_matrix_packedA_8x8_f32_t* @__builtin_spirv_OpJointMatrixLoadINTEL_generic(i8* %src, i32 8, i32 0)
4548

4649
; Matrix store sequence:
47-
; CHECK: [[TMP4:%.*]] = alloca <4 x i32>
4850
; CHECK: store <4 x i32> [[MATRIX]], <4 x i32>* [[TMP4]]
4951
; CHECK: [[TMP5:%.*]] = bitcast <4 x i32>* [[TMP4]] to i8*
5052
; CHECK: call void @__builtin_spriv_OpJointMatrixStoreINTEL_PackedA_RowMajor_SG16_8x8_i32_4_generic_pi64_v8i8(i8* %dst, i8* [[TMP5]], i32 8)
@@ -59,8 +61,11 @@ define void @load_store_generic(i8* %src, i8* %dst) {
5961
; CHECK-LABEL: define void @load_store_large_generic(
6062
define void @load_store_large_generic(i8* %src, i8* %dst) {
6163

62-
; Matrix load sequence:
64+
; Allocas:
65+
; CHECK: [[TMP4:%.*]] = alloca [2 x <32 x i64>]
6366
; CHECK: [[PTR:%.*]] = alloca [2 x <32 x i64>]
67+
68+
; Matrix load sequence:
6469
; CHECK: [[MATPTR:%.*]] = bitcast [2 x <32 x i64>]* [[PTR]] to i8*
6570
; CHECK: call void @__builtin_spriv_OpJointMatrixLoadINTEL_Accumulator_RowMajor_SG16_32x64_i32_128_generic_v8i8_pi32_i32(i8* [[MATPTR]], i8* %src, i64 16)
6671
; CHECK: [[HALF_PTR_0:%.*]] = bitcast [2 x <32 x i64>]* [[PTR]] to <32 x i64>*
@@ -73,7 +78,6 @@ define void @load_store_large_generic(i8* %src, i8* %dst) {
7378
%1 = call spir_func %intel.joint_matrix_acc_32x64_f32_t* @__builtin_spirv_OpJointMatrixLoadINTELacc_32x64_f32_p1i8_i64_i32_generic(i8* %src, i64 16, i32 0)
7479

7580
; Matrix store sequence:
76-
; CHECK: [[TMP4:%.*]] = alloca [2 x <32 x i64>]
7781
; CHECK: store [2 x <32 x i64>] [[MATRIX]], [2 x <32 x i64>]* [[TMP4]]
7882
; CHECK: [[TMP5:%.*]] = bitcast [2 x <32 x i64>]* [[TMP4]] to i8*
7983
; CHECK: call void @__builtin_spriv_OpJointMatrixStoreINTEL_Accumulator_RowMajor_SG16_32x64_i32_128_generic_pi64_v8i8(i8* %dst, i8* [[TMP5]], i64 8)
@@ -88,16 +92,18 @@ define void @load_store_large_generic(i8* %src, i8* %dst) {
8892
; CHECK-LABEL: define void @load_store_global(
8993
define void @load_store_global(i8 addrspace(1)* %src, i8 addrspace(1)* %dst) {
9094

91-
; Matrix load sequence:
95+
; Allocas:
96+
; CHECK: [[TMP4:%.*]] = alloca <4 x i32>
9297
; CHECK: [[PTR:%.*]] = alloca <4 x i32>
98+
99+
; Matrix load sequence:
93100
; CHECK: [[MATPTR:%.*]] = bitcast <4 x i32>* [[PTR]] to i8*
94101
; CHECK: call void @__builtin_spriv_OpJointMatrixLoadINTEL_PackedA_RowMajor_SG16_8x8_i32_4_global_v8i8_pi32_i32(i8* [[MATPTR]], i8 addrspace(1)* %src, i32 8)
95102
; CHECK: [[MATRIX:%.*]] = load <4 x i32>, <4 x i32>* [[PTR]]
96103

97104
%1 = call spir_func %intel.joint_matrix_packedA_8x8_f32_t* @__builtin_spirv_OpJointMatrixLoadINTEL_global(i8 addrspace(1)* %src, i32 8, i32 0)
98105

99106
; Matrix store sequence:
100-
; CHECK: [[TMP4:%.*]] = alloca <4 x i32>
101107
; CHECK: store <4 x i32> [[MATRIX]], <4 x i32>* [[TMP4]]
102108
; CHECK: [[TMP5:%.*]] = bitcast <4 x i32>* [[TMP4]] to i8*
103109
; CHECK: call void @__builtin_spriv_OpJointMatrixStoreINTEL_PackedA_RowMajor_SG16_8x8_i32_4_global_pi64_v8i8(i8 addrspace(1)* %dst, i8* [[TMP5]], i32 8)
@@ -112,8 +118,11 @@ define void @load_store_global(i8 addrspace(1)* %src, i8 addrspace(1)* %dst) {
112118
; CHECK-LABEL: define void @load_store_large_global(
113119
define void @load_store_large_global(i8 addrspace(1)* %src, i8 addrspace(1)* %dst) {
114120

115-
; Matrix load sequence:
121+
; Allocas:
122+
; CHECK: [[TMP4:%.*]] = alloca [2 x <32 x i64>]
116123
; CHECK: [[PTR:%.*]] = alloca [2 x <32 x i64>]
124+
125+
; Matrix load sequence:
117126
; CHECK: [[MATPTR:%.*]] = bitcast [2 x <32 x i64>]* [[PTR]] to i8*
118127
; CHECK: call void @__builtin_spriv_OpJointMatrixLoadINTEL_Accumulator_RowMajor_SG16_32x64_i32_128_global_v8i8_pi32_i32(i8* [[MATPTR]], i8 addrspace(1)* %src, i64 16)
119128
; CHECK: [[HALF_PTR_0:%.*]] = bitcast [2 x <32 x i64>]* [[PTR]] to <32 x i64>*
@@ -126,7 +135,6 @@ define void @load_store_large_global(i8 addrspace(1)* %src, i8 addrspace(1)* %ds
126135
%1 = call spir_func %intel.joint_matrix_acc_32x64_f32_t* @__builtin_spirv_OpJointMatrixLoadINTELacc_32x64_f32_p1i8_i64_i32_global(i8 addrspace(1)* %src, i64 16, i32 0)
127136

128137
; Matrix store sequence:
129-
; CHECK: [[TMP4:%.*]] = alloca [2 x <32 x i64>]
130138
; CHECK: store [2 x <32 x i64>] [[MATRIX]], [2 x <32 x i64>]* [[TMP4]]
131139
; CHECK: [[TMP5:%.*]] = bitcast [2 x <32 x i64>]* [[TMP4]] to i8*
132140
; CHECK: call void @__builtin_spriv_OpJointMatrixStoreINTEL_Accumulator_RowMajor_SG16_32x64_i32_128_global_pi64_v8i8(i8 addrspace(1)* %dst, i8* [[TMP5]], i64 8)
@@ -141,16 +149,18 @@ define void @load_store_large_global(i8 addrspace(1)* %src, i8 addrspace(1)* %ds
141149
; CHECK-LABEL: define void @load_store_local(
142150
define void @load_store_local(i8 addrspace(3)* %src, i8 addrspace(3)* %dst) {
143151

144-
; Matrix load sequence:
152+
; Allocas:
153+
; CHECK: [[TMP4:%.*]] = alloca <4 x i32>
145154
; CHECK: [[PTR:%.*]] = alloca <4 x i32>
155+
156+
; Matrix load sequence:
146157
; CHECK: [[MATPTR:%.*]] = bitcast <4 x i32>* [[PTR]] to i8*
147158
; CHECK: call void @__builtin_spriv_OpJointMatrixLoadINTEL_PackedA_RowMajor_SG16_8x8_i32_4_local_v8i8_pi32_i32(i8* [[MATPTR]], i8 addrspace(3)* %src, i32 8)
148159
; CHECK: [[MATRIX:%.*]] = load <4 x i32>, <4 x i32>* [[PTR]]
149160

150161
%1 = call spir_func %intel.joint_matrix_packedA_8x8_f32_t* @__builtin_spirv_OpJointMatrixLoadINTEL_local(i8 addrspace(3)* %src, i32 8, i32 0)
151162

152163
; Matrix store sequence:
153-
; CHECK: [[TMP4:%.*]] = alloca <4 x i32>
154164
; CHECK: store <4 x i32> [[MATRIX]], <4 x i32>* [[TMP4]]
155165
; CHECK: [[TMP5:%.*]] = bitcast <4 x i32>* [[TMP4]] to i8*
156166
; CHECK: call void @__builtin_spriv_OpJointMatrixStoreINTEL_PackedA_RowMajor_SG16_8x8_i32_4_local_pi64_v8i8(i8 addrspace(3)* %dst, i8* [[TMP5]], i32 8)
@@ -165,8 +175,11 @@ define void @load_store_local(i8 addrspace(3)* %src, i8 addrspace(3)* %dst) {
165175
; CHECK-LABEL: define void @load_store_large_local(
166176
define void @load_store_large_local(i8 addrspace(3)* %src, i8 addrspace(3)* %dst) {
167177

168-
; Matrix load sequence:
178+
; Allocas:
179+
; CHECK: [[TMP4:%.*]] = alloca [2 x <32 x i64>]
169180
; CHECK: [[PTR:%.*]] = alloca [2 x <32 x i64>]
181+
182+
; Matrix load sequence:
170183
; CHECK: [[MATPTR:%.*]] = bitcast [2 x <32 x i64>]* [[PTR]] to i8*
171184
; CHECK: call void @__builtin_spriv_OpJointMatrixLoadINTEL_Accumulator_RowMajor_SG16_32x64_i32_128_local_v8i8_pi32_i32(i8* [[MATPTR]], i8 addrspace(3)* %src, i64 16)
172185
; CHECK: [[HALF_PTR_0:%.*]] = bitcast [2 x <32 x i64>]* [[PTR]] to <32 x i64>*
@@ -179,7 +192,6 @@ define void @load_store_large_local(i8 addrspace(3)* %src, i8 addrspace(3)* %dst
179192
%1 = call spir_func %intel.joint_matrix_acc_32x64_f32_t* @__builtin_spirv_OpJointMatrixLoadINTELacc_32x64_f32_p1i8_i64_i32_local(i8 addrspace(3)* %src, i64 16, i32 0)
180193

181194
; Matrix store sequence:
182-
; CHECK: [[TMP4:%.*]] = alloca [2 x <32 x i64>]
183195
; CHECK: store [2 x <32 x i64>] [[MATRIX]], [2 x <32 x i64>]* [[TMP4]]
184196
; CHECK: [[TMP5:%.*]] = bitcast [2 x <32 x i64>]* [[TMP4]] to i8*
185197
; CHECK: call void @__builtin_spriv_OpJointMatrixStoreINTEL_Accumulator_RowMajor_SG16_32x64_i32_128_local_pi64_v8i8(i8 addrspace(3)* %dst, i8* [[TMP5]], i64 8)

IGC/Compiler/tests/JointMatrixFuncsResolutionPass/basic.ll

+3-3
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ define spir_kernel void @test_jm(i32 %t1_a, i8* %t1_dst1, i32* %t1_dst2, i8* %t2
2727
%intel.joint_matrix_packedA_8x32_i8_ = type opaque
2828
define void @fill_length(i32 %a, i8* %dst, i32* %dst2) {
2929
; CHECK-LABEL: define void @fill_length(
30+
; CHECK: [[PTR:%.*]] = alloca <8 x i32>
3031
; CHECK: [[TMP1:%.*]] = insertelement <8 x i32> undef, i32 [[A:%.*]], i64 0
3132
; CHECK: [[TMP2:%.*]] = insertelement <8 x i32> [[TMP1]], i32 [[A]], i64 1
3233
; CHECK: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[A]], i64 2
@@ -35,7 +36,6 @@ define void @fill_length(i32 %a, i8* %dst, i32* %dst2) {
3536
; CHECK: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[A]], i64 5
3637
; CHECK: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[A]], i64 6
3738
; CHECK: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[A]], i64 7
38-
; CHECK: [[PTR:%.*]] = alloca <8 x i32>
3939
; CHECK: store <8 x i32> [[TMP8]], <8 x i32>* [[PTR:%.*]]
4040
; CHECK: [[MATPTR:%.*]] = bitcast <8 x i32>* [[PTR:%.*]] to i8*, !dbg [[DBG1:![0-9]*]]
4141
; CHECK: call void @__builtin_spriv_OpJointMatrixStoreINTEL_PackedA_RowMajor_8x32_i8_8_generic_pi64_v8i8(i8* %dst, i8* [[MATPTR]], i32 8), !dbg [[DBG1]]
@@ -56,11 +56,11 @@ declare spir_func void @__builtin_spirv_OpJointMatrixStoreINTEL(i8*, %intel.join
5656
%intel.joint_matrix_packedA_8x16_i16_ = type opaque
5757
define void @load_store_legacy(i8* %a, i8* %dst) {
5858
; CHECK-LABEL: define void @load_store_legacy(
59+
; CHECK: [[TMP4:%.*]] = alloca <8 x i32>
5960
; CHECK: [[PTR:%.*]] = alloca <8 x i32>
6061
; CHECK: [[MATPTR:%.*]] = bitcast <8 x i32>* [[PTR]] to i8*
6162
; CHECK: call void @__builtin_spriv_OpJointMatrixLoadINTEL_PackedA_RowMajor_8x16_i16_8_generic_v8i8_pi32_i32(i8* [[MATPTR]], i8* %a, i32 16), !dbg [[DBG2:![0-9]*]]
6263
; CHECK: [[MATRIX:%.*]] = load <8 x i32>, <8 x i32>* [[PTR]]
63-
; CHECK: [[TMP4:%.*]] = alloca <8 x i32>
6464
; CHECK: store <8 x i32> [[MATRIX]], <8 x i32>* [[TMP4]]
6565
; CHECK: [[TMP5:%.*]] = bitcast <8 x i32>* [[TMP4]] to i8*
6666
; CHECK: call void @__builtin_spriv_OpJointMatrixStoreINTEL_PackedA_RowMajor_8x16_i16_8_generic_pi64_v8i8(i8* %dst, i8* [[TMP5]], i32 8), !dbg [[DBG3:![0-9]*]]
@@ -78,11 +78,11 @@ declare spir_func void @__builtin_spirv_OpJointMatrixStoreINTEL.8x16(i8*, %intel
7878
%spirv.JointMatrixINTEL._float_8_8_3_3_2 = type opaque
7979
define void @load_store_acc_transpose(float addrspace(1)* %a, float addrspace(1)* %dst) {
8080
; CHECK-LABEL: define void @load_store_acc_transpose(
81+
; CHECK: [[TMP4:%.*]] = alloca <8 x float>
8182
; CHECK: [[PTR:%.*]] = alloca <8 x float>
8283
; CHECK: [[MATPTR:%.*]] = bitcast <8 x float>* [[PTR]] to i8*
8384
; CHECK: call void @__builtin_spriv_OpJointMatrixLoadINTEL_Accumulator_ColumnMajor_8x8_i32_8_global_v8i8_pi32_i32(i8* [[MATPTR]], float addrspace(1)* %a, i64 64), !dbg [[DBG2:![0-9]*]]
8485
; CHECK: [[MATRIX:%.*]] = load <8 x float>, <8 x float>* [[PTR]]
85-
; CHECK: [[TMP4:%.*]] = alloca <8 x float>
8686
; CHECK: store <8 x float> [[MATRIX]], <8 x float>* [[TMP4]]
8787
; CHECK: [[TMP5:%.*]] = bitcast <8 x float>* [[TMP4]] to i8*
8888
; CHECK: call void @__builtin_spriv_OpJointMatrixStoreINTEL_Accumulator_ColumnMajor_8x8_i32_8_global_pi64_v8i8(float addrspace(1)* %dst, i8* [[TMP5]], i64 64), !dbg [[DBG3:![0-9]*]]

0 commit comments

Comments
 (0)