Skip to content

Commit aa56d87

Browse files
mmereckiigcbot
authored andcommitted
Fix ConstantCoalescing chunk size
Fix a couple of places that produced incorrect chunk sizes in `ConstantCoalescing` pass
1 parent 7957d4b commit aa56d87

File tree

3 files changed

+96
-2
lines changed

3 files changed

+96
-2
lines changed

IGC/Compiler/CISACodeGen/ConstantCoalescing.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -1792,7 +1792,7 @@ Instruction* ConstantCoalescing::FindOrAddChunkExtract(BufChunk* cov_chunk, uint
17921792
void ConstantCoalescing::AdjustChunk(
17931793
BufChunk* cov_chunk, uint start_adj, uint size_adj, const ExtensionKind &Extension)
17941794
{
1795-
cov_chunk->chunkSize += size_adj;
1795+
cov_chunk->chunkSize = RoundChunkSize(cov_chunk->chunkSize + size_adj, cov_chunk->elementSize);
17961796
cov_chunk->chunkStart -= start_adj;
17971797
// mutateType to change array-size
17981798
Type* originalType = cov_chunk->chunkIO->getType();
@@ -2038,7 +2038,7 @@ void ConstantCoalescing::MoveExtracts(BufChunk* cov_chunk, Instruction* load, ui
20382038

20392039
void ConstantCoalescing::EnlargeChunk(BufChunk* cov_chunk, uint size_adj)
20402040
{
2041-
cov_chunk->chunkSize += size_adj;
2041+
cov_chunk->chunkSize = RoundChunkSize(cov_chunk->chunkSize + size_adj, cov_chunk->elementSize);
20422042
// mutateType to change array-size
20432043
Type* originalType = cov_chunk->chunkIO->getType();
20442044
Type* vty = IGCLLVM::FixedVectorType::get(cov_chunk->chunkIO->getType()->getScalarType(), cov_chunk->chunkSize);
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
;=========================== begin_copyright_notice ============================
2+
;
3+
; Copyright (C) 2025 Intel Corporation
4+
;
5+
; SPDX-License-Identifier: MIT
6+
;
7+
;============================ end_copyright_notice =============================
8+
9+
; REQUIRES: llvm-14-plus
10+
; RUN: igc_opt --opaque-pointers %s -S -o - -igc-constant-coalescing -instcombine -dce | FileCheck %s --check-prefixes=CHECK
11+
12+
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f80:128:128-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-a:64:64-f80:128:128-n8:16:32:64"
13+
14+
define <3 x half> @f0(i32 %src) {
15+
entry:
16+
%buf = inttoptr i32 %src to ptr addrspace(2490373)
17+
%off = add i32 %src, 4
18+
%z = call half @llvm.genx.GenISA.ldraw.indexed.f16.p2490373(ptr addrspace(2490373) %buf, i32 %off, i32 4, i1 false)
19+
%xy = call <2 x half> @llvm.genx.GenISA.ldraw.indexed.v2f16.p2490373(ptr addrspace(2490373) %buf, i32 %src, i32 4, i1 false)
20+
%x = extractelement <2 x half> %xy, i32 0
21+
%y = extractelement <2 x half> %xy, i32 1
22+
%res.x = insertelement <3 x half> undef, half %x, i32 0
23+
%res.xy = insertelement <3 x half> %res.x, half %y, i32 1
24+
%res.xyz = insertelement <3 x half> %res.xy, half %z, i32 2
25+
ret <3 x half> %res.xyz
26+
}
27+
28+
; CHECK-LABEL: define <3 x half> @f0
29+
; CHECK: [[PTR:%.*]] = inttoptr i32 %src to ptr addrspace(2490373)
30+
; CHECK: [[CHUNK:%.*]] = call <4 x half> @llvm.genx.GenISA.ldrawvector.indexed.v4f16.p2490373(ptr addrspace(2490373) [[PTR]], i32 %src, i32 4, i1 false)
31+
; CHECK: [[RESULT:%.*]] = shufflevector <4 x half> [[CHUNK]], <4 x half> undef, <3 x i32> <i32 0, i32 1, i32 2>
32+
; CHECK: ret <3 x half> [[RESULT]]
33+
34+
35+
; Function Attrs: argmemonly nounwind readonly willreturn
36+
declare half @llvm.genx.GenISA.ldraw.indexed.f16.p2490373(ptr addrspace(2490373), i32, i32, i1) argmemonly nounwind readonly willreturn
37+
declare <2 x half> @llvm.genx.GenISA.ldraw.indexed.v2f16.p2490373(ptr addrspace(2490373), i32, i32, i1) argmemonly nounwind readonly willreturn
38+
39+
40+
!igc.functions = !{!0}
41+
42+
!0 = !{ptr @f0, !1}
43+
44+
45+
!1 = !{!2}
46+
!2 = !{!"function_type", i32 0}
47+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
;=========================== begin_copyright_notice ============================
2+
;
3+
; Copyright (C) 2025 Intel Corporation
4+
;
5+
; SPDX-License-Identifier: MIT
6+
;
7+
;============================ end_copyright_notice =============================
8+
9+
; REQUIRES: llvm-14-plus
10+
; RUN: igc_opt --opaque-pointers %s -S -o - -igc-constant-coalescing -instcombine -dce | FileCheck %s --check-prefixes=CHECK
11+
12+
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f80:128:128-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-a:64:64-f80:128:128-n8:16:32:64"
13+
14+
define <3 x half> @f0(i32 %src) {
15+
entry:
16+
%buf = inttoptr i32 %src to ptr addrspace(2490373)
17+
%xy = call <2 x half> @llvm.genx.GenISA.ldraw.indexed.v2f16.p2490373(ptr addrspace(2490373) %buf, i32 %src, i32 4, i1 false)
18+
%off = add i32 %src, 4
19+
%z = call half @llvm.genx.GenISA.ldraw.indexed.f16.p2490373(ptr addrspace(2490373) %buf, i32 %off, i32 4, i1 false)
20+
%x = extractelement <2 x half> %xy, i32 0
21+
%y = extractelement <2 x half> %xy, i32 1
22+
%res.x = insertelement <3 x half> undef, half %x, i32 0
23+
%res.xy = insertelement <3 x half> %res.x, half %y, i32 1
24+
%res.xyz = insertelement <3 x half> %res.xy, half %z, i32 2
25+
ret <3 x half> %res.xyz
26+
}
27+
28+
; CHECK-LABEL: define <3 x half> @f0
29+
; CHECK: [[PTR:%.*]] = inttoptr i32 %src to ptr addrspace(2490373)
30+
; CHECK: [[CHUNK:%.*]] = call <4 x half> @llvm.genx.GenISA.ldrawvector.indexed.v4f16.p2490373(ptr addrspace(2490373) [[PTR]], i32 %src, i32 4, i1 false)
31+
; CHECK: [[RESULT:%.*]] = shufflevector <4 x half> [[CHUNK]], <4 x half> undef, <3 x i32> <i32 0, i32 1, i32 2>
32+
; CHECK: ret <3 x half> [[RESULT]]
33+
34+
35+
; Function Attrs: argmemonly nounwind readonly willreturn
36+
declare half @llvm.genx.GenISA.ldraw.indexed.f16.p2490373(ptr addrspace(2490373), i32, i32, i1) argmemonly nounwind readonly willreturn
37+
declare <2 x half> @llvm.genx.GenISA.ldraw.indexed.v2f16.p2490373(ptr addrspace(2490373), i32, i32, i1) argmemonly nounwind readonly willreturn
38+
39+
40+
!igc.functions = !{!0}
41+
42+
!0 = !{ptr @f0, !1}
43+
44+
45+
!1 = !{!2}
46+
!2 = !{!"function_type", i32 0}
47+

0 commit comments

Comments
 (0)