@@ -35,16 +35,18 @@ define spir_kernel void @test_local(i8 addrspace(3)* %src, i8 addrspace(3)* %ds
35
35
; CHECK-LABEL: define void @load_store_generic(
36
36
define void @load_store_generic (i8* %src , i8* %dst ) {
37
37
38
- ; Matrix load sequence:
38
+ ; Allocas:
39
+ ; CHECK: [[TMP4:%.*]] = alloca <4 x i32>
39
40
; CHECK: [[PTR:%.*]] = alloca <4 x i32>
41
+
42
+ ; Matrix load sequence:
40
43
; CHECK: [[MATPTR:%.*]] = bitcast <4 x i32>* [[PTR]] to i8*
41
44
; CHECK: call void @__builtin_spriv_OpJointMatrixLoadINTEL_PackedA_RowMajor_SG16_8x8_i32_4_generic_v8i8_pi32_i32(i8* [[MATPTR]], i8* %src, i32 8)
42
45
; CHECK: [[MATRIX:%.*]] = load <4 x i32>, <4 x i32>* [[PTR]]
43
46
44
47
%1 = call spir_func %intel.joint_matrix_packedA_8x8_f32_t* @__builtin_spirv_OpJointMatrixLoadINTEL_generic (i8* %src , i32 8 , i32 0 )
45
48
46
49
; Matrix store sequence:
47
- ; CHECK: [[TMP4:%.*]] = alloca <4 x i32>
48
50
; CHECK: store <4 x i32> [[MATRIX]], <4 x i32>* [[TMP4]]
49
51
; CHECK: [[TMP5:%.*]] = bitcast <4 x i32>* [[TMP4]] to i8*
50
52
; CHECK: call void @__builtin_spriv_OpJointMatrixStoreINTEL_PackedA_RowMajor_SG16_8x8_i32_4_generic_pi64_v8i8(i8* %dst, i8* [[TMP5]], i32 8)
@@ -59,8 +61,11 @@ define void @load_store_generic(i8* %src, i8* %dst) {
59
61
; CHECK-LABEL: define void @load_store_large_generic(
60
62
define void @load_store_large_generic (i8* %src , i8* %dst ) {
61
63
62
- ; Matrix load sequence:
64
+ ; Allocas:
65
+ ; CHECK: [[TMP4:%.*]] = alloca [2 x <32 x i64>]
63
66
; CHECK: [[PTR:%.*]] = alloca [2 x <32 x i64>]
67
+
68
+ ; Matrix load sequence:
64
69
; CHECK: [[MATPTR:%.*]] = bitcast [2 x <32 x i64>]* [[PTR]] to i8*
65
70
; CHECK: call void @__builtin_spriv_OpJointMatrixLoadINTEL_Accumulator_RowMajor_SG16_32x64_i32_128_generic_v8i8_pi32_i32(i8* [[MATPTR]], i8* %src, i64 16)
66
71
; CHECK: [[HALF_PTR_0:%.*]] = bitcast [2 x <32 x i64>]* [[PTR]] to <32 x i64>*
@@ -73,7 +78,6 @@ define void @load_store_large_generic(i8* %src, i8* %dst) {
73
78
%1 = call spir_func %intel.joint_matrix_acc_32x64_f32_t* @__builtin_spirv_OpJointMatrixLoadINTELacc_32x64_f32_p1i8_i64_i32_generic (i8* %src , i64 16 , i32 0 )
74
79
75
80
; Matrix store sequence:
76
- ; CHECK: [[TMP4:%.*]] = alloca [2 x <32 x i64>]
77
81
; CHECK: store [2 x <32 x i64>] [[MATRIX]], [2 x <32 x i64>]* [[TMP4]]
78
82
; CHECK: [[TMP5:%.*]] = bitcast [2 x <32 x i64>]* [[TMP4]] to i8*
79
83
; CHECK: call void @__builtin_spriv_OpJointMatrixStoreINTEL_Accumulator_RowMajor_SG16_32x64_i32_128_generic_pi64_v8i8(i8* %dst, i8* [[TMP5]], i64 8)
@@ -88,16 +92,18 @@ define void @load_store_large_generic(i8* %src, i8* %dst) {
88
92
; CHECK-LABEL: define void @load_store_global(
89
93
define void @load_store_global (i8 addrspace (1 )* %src , i8 addrspace (1 )* %dst ) {
90
94
91
- ; Matrix load sequence:
95
+ ; Allocas:
96
+ ; CHECK: [[TMP4:%.*]] = alloca <4 x i32>
92
97
; CHECK: [[PTR:%.*]] = alloca <4 x i32>
98
+
99
+ ; Matrix load sequence:
93
100
; CHECK: [[MATPTR:%.*]] = bitcast <4 x i32>* [[PTR]] to i8*
94
101
; CHECK: call void @__builtin_spriv_OpJointMatrixLoadINTEL_PackedA_RowMajor_SG16_8x8_i32_4_global_v8i8_pi32_i32(i8* [[MATPTR]], i8 addrspace(1)* %src, i32 8)
95
102
; CHECK: [[MATRIX:%.*]] = load <4 x i32>, <4 x i32>* [[PTR]]
96
103
97
104
%1 = call spir_func %intel.joint_matrix_packedA_8x8_f32_t* @__builtin_spirv_OpJointMatrixLoadINTEL_global (i8 addrspace (1 )* %src , i32 8 , i32 0 )
98
105
99
106
; Matrix store sequence:
100
- ; CHECK: [[TMP4:%.*]] = alloca <4 x i32>
101
107
; CHECK: store <4 x i32> [[MATRIX]], <4 x i32>* [[TMP4]]
102
108
; CHECK: [[TMP5:%.*]] = bitcast <4 x i32>* [[TMP4]] to i8*
103
109
; CHECK: call void @__builtin_spriv_OpJointMatrixStoreINTEL_PackedA_RowMajor_SG16_8x8_i32_4_global_pi64_v8i8(i8 addrspace(1)* %dst, i8* [[TMP5]], i32 8)
@@ -112,8 +118,11 @@ define void @load_store_global(i8 addrspace(1)* %src, i8 addrspace(1)* %dst) {
112
118
; CHECK-LABEL: define void @load_store_large_global(
113
119
define void @load_store_large_global (i8 addrspace (1 )* %src , i8 addrspace (1 )* %dst ) {
114
120
115
- ; Matrix load sequence:
121
+ ; Allocas:
122
+ ; CHECK: [[TMP4:%.*]] = alloca [2 x <32 x i64>]
116
123
; CHECK: [[PTR:%.*]] = alloca [2 x <32 x i64>]
124
+
125
+ ; Matrix load sequence:
117
126
; CHECK: [[MATPTR:%.*]] = bitcast [2 x <32 x i64>]* [[PTR]] to i8*
118
127
; CHECK: call void @__builtin_spriv_OpJointMatrixLoadINTEL_Accumulator_RowMajor_SG16_32x64_i32_128_global_v8i8_pi32_i32(i8* [[MATPTR]], i8 addrspace(1)* %src, i64 16)
119
128
; CHECK: [[HALF_PTR_0:%.*]] = bitcast [2 x <32 x i64>]* [[PTR]] to <32 x i64>*
@@ -126,7 +135,6 @@ define void @load_store_large_global(i8 addrspace(1)* %src, i8 addrspace(1)* %ds
126
135
%1 = call spir_func %intel.joint_matrix_acc_32x64_f32_t* @__builtin_spirv_OpJointMatrixLoadINTELacc_32x64_f32_p1i8_i64_i32_global (i8 addrspace (1 )* %src , i64 16 , i32 0 )
127
136
128
137
; Matrix store sequence:
129
- ; CHECK: [[TMP4:%.*]] = alloca [2 x <32 x i64>]
130
138
; CHECK: store [2 x <32 x i64>] [[MATRIX]], [2 x <32 x i64>]* [[TMP4]]
131
139
; CHECK: [[TMP5:%.*]] = bitcast [2 x <32 x i64>]* [[TMP4]] to i8*
132
140
; CHECK: call void @__builtin_spriv_OpJointMatrixStoreINTEL_Accumulator_RowMajor_SG16_32x64_i32_128_global_pi64_v8i8(i8 addrspace(1)* %dst, i8* [[TMP5]], i64 8)
@@ -141,16 +149,18 @@ define void @load_store_large_global(i8 addrspace(1)* %src, i8 addrspace(1)* %ds
141
149
; CHECK-LABEL: define void @load_store_local(
142
150
define void @load_store_local (i8 addrspace (3 )* %src , i8 addrspace (3 )* %dst ) {
143
151
144
- ; Matrix load sequence:
152
+ ; Allocas:
153
+ ; CHECK: [[TMP4:%.*]] = alloca <4 x i32>
145
154
; CHECK: [[PTR:%.*]] = alloca <4 x i32>
155
+
156
+ ; Matrix load sequence:
146
157
; CHECK: [[MATPTR:%.*]] = bitcast <4 x i32>* [[PTR]] to i8*
147
158
; CHECK: call void @__builtin_spriv_OpJointMatrixLoadINTEL_PackedA_RowMajor_SG16_8x8_i32_4_local_v8i8_pi32_i32(i8* [[MATPTR]], i8 addrspace(3)* %src, i32 8)
148
159
; CHECK: [[MATRIX:%.*]] = load <4 x i32>, <4 x i32>* [[PTR]]
149
160
150
161
%1 = call spir_func %intel.joint_matrix_packedA_8x8_f32_t* @__builtin_spirv_OpJointMatrixLoadINTEL_local (i8 addrspace (3 )* %src , i32 8 , i32 0 )
151
162
152
163
; Matrix store sequence:
153
- ; CHECK: [[TMP4:%.*]] = alloca <4 x i32>
154
164
; CHECK: store <4 x i32> [[MATRIX]], <4 x i32>* [[TMP4]]
155
165
; CHECK: [[TMP5:%.*]] = bitcast <4 x i32>* [[TMP4]] to i8*
156
166
; CHECK: call void @__builtin_spriv_OpJointMatrixStoreINTEL_PackedA_RowMajor_SG16_8x8_i32_4_local_pi64_v8i8(i8 addrspace(3)* %dst, i8* [[TMP5]], i32 8)
@@ -165,8 +175,11 @@ define void @load_store_local(i8 addrspace(3)* %src, i8 addrspace(3)* %dst) {
165
175
; CHECK-LABEL: define void @load_store_large_local(
166
176
define void @load_store_large_local (i8 addrspace (3 )* %src , i8 addrspace (3 )* %dst ) {
167
177
168
- ; Matrix load sequence:
178
+ ; Allocas:
179
+ ; CHECK: [[TMP4:%.*]] = alloca [2 x <32 x i64>]
169
180
; CHECK: [[PTR:%.*]] = alloca [2 x <32 x i64>]
181
+
182
+ ; Matrix load sequence:
170
183
; CHECK: [[MATPTR:%.*]] = bitcast [2 x <32 x i64>]* [[PTR]] to i8*
171
184
; CHECK: call void @__builtin_spriv_OpJointMatrixLoadINTEL_Accumulator_RowMajor_SG16_32x64_i32_128_local_v8i8_pi32_i32(i8* [[MATPTR]], i8 addrspace(3)* %src, i64 16)
172
185
; CHECK: [[HALF_PTR_0:%.*]] = bitcast [2 x <32 x i64>]* [[PTR]] to <32 x i64>*
@@ -179,7 +192,6 @@ define void @load_store_large_local(i8 addrspace(3)* %src, i8 addrspace(3)* %dst
179
192
%1 = call spir_func %intel.joint_matrix_acc_32x64_f32_t* @__builtin_spirv_OpJointMatrixLoadINTELacc_32x64_f32_p1i8_i64_i32_local (i8 addrspace (3 )* %src , i64 16 , i32 0 )
180
193
181
194
; Matrix store sequence:
182
- ; CHECK: [[TMP4:%.*]] = alloca [2 x <32 x i64>]
183
195
; CHECK: store [2 x <32 x i64>] [[MATRIX]], [2 x <32 x i64>]* [[TMP4]]
184
196
; CHECK: [[TMP5:%.*]] = bitcast [2 x <32 x i64>]* [[TMP4]] to i8*
185
197
; CHECK: call void @__builtin_spriv_OpJointMatrixStoreINTEL_Accumulator_RowMajor_SG16_32x64_i32_128_local_pi64_v8i8(i8 addrspace(3)* %dst, i8* [[TMP5]], i64 8)
0 commit comments