Skip to content

Commit 02e2e6b

Browse files
admitricigcbot
authored andcommitted
Loop sinking tuning to sink load chains
Prepopulate all load chains for a loop and sink the instructions with awareness of the loads that are already in loop
1 parent 8d912db commit 02e2e6b

File tree

4 files changed

+126
-9
lines changed

4 files changed

+126
-9
lines changed

IGC/Compiler/CISACodeGen/CodeSinking.cpp

+49-7
Original file line numberDiff line numberDiff line change
@@ -1440,8 +1440,11 @@ namespace IGC {
14401440
MovedInsts.clear();
14411441
UndoLocas.clear();
14421442

1443+
if (IGC_IS_FLAG_ENABLED(PrepopulateLoadChainLoopSink))
1444+
prepopulateLoadChains(L, LoadChains);
1445+
14431446
bool AllowLoadSinking = IGC_IS_FLAG_ENABLED(ForceLoadsLoopSink);
1444-
bool AllowOnlyLoadChainSinking = false;
1447+
bool AllowOnlySingleUseLoadChainSinking = false;
14451448
bool IterChanged = false;
14461449

14471450
uint MaxLoopPressure = 0;
@@ -1476,7 +1479,7 @@ namespace IGC {
14761479

14771480
if (isLoopSinkCandidate(I, L, AllowLoadSinking))
14781481
{
1479-
if (!AllowOnlyLoadChainSinking || isLoadChain(I, LoadChains))
1482+
if (!AllowOnlySingleUseLoadChainSinking || isLoadChain(I, LoadChains, true))
14801483
{
14811484
SinkCandidates.push_back(I);
14821485
}
@@ -1510,7 +1513,7 @@ namespace IGC {
15101513
if (IGC_IS_FLAG_ENABLED(EnableLoadChainLoopSink) && !LoadChains.empty())
15111514
{
15121515
PrintDump("Allowing only chain sinking...\n");
1513-
AllowOnlyLoadChainSinking = true;
1516+
AllowOnlySingleUseLoadChainSinking = true;
15141517
}
15151518
else
15161519
{
@@ -1580,15 +1583,54 @@ namespace IGC {
15801583

15811584
// Check that this instruction is a part of address calc
15821585
// chain of an already sinked load
1583-
bool CodeSinking::isLoadChain(Instruction *I, SmallPtrSet<Instruction *, 32> &LoadChains)
1586+
bool CodeSinking::isLoadChain(Instruction *I, SmallPtrSet<Instruction *, 32> &LoadChains, bool EnsureSingleUser)
15841587
{
15851588
if (!isa<BinaryOperator>(I) && !isa<CastInst>(I))
15861589
return false;
15871590
User *InstrUser = IGCLLVM::getUniqueUndroppableUser(I);
1588-
if (!InstrUser)
1591+
if (EnsureSingleUser && !InstrUser)
15891592
return false;
1590-
Instruction *UI = dyn_cast<Instruction>(InstrUser);
1591-
return UI && LoadChains.count(UI);
1593+
1594+
return std::all_of(I->user_begin(), I->user_end(),
1595+
[&](User *U)
1596+
{
1597+
Instruction *UI = dyn_cast<Instruction>(U);
1598+
return UI && LoadChains.count(UI);
1599+
});
1600+
}
1601+
1602+
// Prepopulate load chain with the loads that are already in the loop
1603+
void CodeSinking::prepopulateLoadChains(Loop *L, SmallPtrSet<Instruction *, 32> &LoadChains)
1604+
{
1605+
std::function<void(Value *)> addInstructionIfLoadChain = [&](Value *V)-> void
1606+
{
1607+
Instruction *I = dyn_cast<Instruction>(V);
1608+
if (!I)
1609+
return;
1610+
1611+
if (!L->contains(I))
1612+
return;
1613+
1614+
if (!isLoadChain(I, LoadChains))
1615+
return;
1616+
1617+
LoadChains.insert(I);
1618+
for (auto &U : I->operands()) {
1619+
addInstructionIfLoadChain(U);
1620+
}
1621+
};
1622+
1623+
for (BasicBlock *BB: L->blocks())
1624+
{
1625+
for (Instruction &I : *BB)
1626+
{
1627+
if (LoadInst *LI = dyn_cast<LoadInst>(&I))
1628+
{
1629+
LoadChains.insert(&I);
1630+
addInstructionIfLoadChain(LI->getPointerOperand());
1631+
}
1632+
}
1633+
}
15921634
}
15931635

15941636
bool CodeSinking::isLoopSinkCandidate(Instruction *I, Loop *L, bool AllowLoadSinking)

IGC/Compiler/CISACodeGen/CodeSinking.hpp

+3-1
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,9 @@ namespace IGC {
7777
llvm::SmallPtrSetImpl<llvm::Instruction*>& Stores);
7878
bool isSafeToLoopSinkLoad(llvm::Instruction* I, llvm::Loop* Loop, llvm::AliasAnalysis* AA);
7979
bool isAlwaysSinkInstruction(llvm::Instruction* I);
80-
bool isLoadChain(llvm::Instruction* I, SmallPtrSet<Instruction *, 32> &LoadChains);
80+
bool isLoadChain(llvm::Instruction* I, SmallPtrSet<Instruction*, 32>& LoadChains, bool EnsureSingleUser=false);
81+
void prepopulateLoadChains(llvm::Loop* I, SmallPtrSet<Instruction*, 32>& LoadChains);
82+
8183

8284
/// rollback sinking. Uses MovedInsts and UndoLocas members implicitly
8385
void rollbackSinking(bool ReverseOrder, llvm::BasicBlock* BB);
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
;=========================== begin_copyright_notice ============================
2+
;
3+
; Copyright (C) 2024 Intel Corporation
4+
;
5+
; SPDX-License-Identifier: MIT
6+
;
7+
;============================ end_copyright_notice =============================
8+
; REQUIRES: regkeys
9+
; RUN: igc_opt --regkey ForceLoopSink=1 --regkey LoopSinkMinSave=1 --regkey LoopSinkThresholdDelta=10 --regkey CodeSinkingMinSize=10 --regkey PrepopulateLoadChainLoopSink=1 --igc-wi-analysis %enable-basic-aa% --igc-code-sinking -S %s | FileCheck %s
10+
11+
; Check that address computations are considered beneficial and are sinked
12+
; even if the loads are already in the loop (when PrepopulateLoadChainLoopSink=1 is passed)
13+
14+
define spir_kernel void @dsmm23x23x23(i16 %localIdX) {
15+
; CHECK-LABEL: @dsmm23x23x23(
16+
; CHECK: for.body:
17+
; CHECK: [[SINK_IDXPROM36:%.*]] = zext i16 [[LOCALIDX:%.*]] to i64
18+
; CHECK: [[SINK_317:%.*]] = shl nuw nsw i64 [[SINK_IDXPROM36]], 3
19+
; CHECK: [[TMP40:%.*]] = add i64 [[TMP39:%.*]], [[SINK_317]]
20+
; CHECK: [[TMP41:%.*]] = inttoptr i64 [[TMP40]] to double addrspace(2)*
21+
; CHECK: [[TMP42:%.*]] = load double, double addrspace(2)* [[TMP41]], align 8
22+
; CHECK: [[SINK_CONV2:%.*]] = zext i16 [[LOCALIDX]] to i32
23+
; CHECK: [[SINK_ADD33_1:%.*]] = add nuw nsw i32 [[SINK_CONV2]], 23
24+
; CHECK: [[SINK_IDXPROM34_1:%.*]] = zext i32 [[SINK_ADD33_1]] to i64
25+
; CHECK: [[SINK_316:%.*]] = shl nuw nsw i64 [[SINK_IDXPROM34_1]], 3
26+
; CHECK: [[TMP43:%.*]] = add i64 [[TMP39]], [[SINK_316]]
27+
; CHECK: [[TMP44:%.*]] = inttoptr i64 [[TMP43]] to double addrspace(2)*
28+
; CHECK: [[TMP45:%.*]] = load double, double addrspace(2)* [[TMP44]], align 8
29+
; CHECK: [[SINK_ADD33_2:%.*]] = add nuw nsw i32 [[SINK_CONV2]], 46
30+
; CHECK: [[SINK_IDXPROM34_2:%.*]] = zext i32 [[SINK_ADD33_2]] to i64
31+
; CHECK: [[SINK_315:%.*]] = shl nuw nsw i64 [[SINK_IDXPROM34_2]], 3
32+
; CHECK: [[TMP48:%.*]] = add i64 [[TMP39]], [[SINK_315]]
33+
; CHECK: [[TMP49:%.*]] = inttoptr i64 [[TMP48]] to double addrspace(2)*
34+
; CHECK: [[TMP50:%.*]] = load double, double addrspace(2)* [[TMP49]], align 8
35+
; CHECK: [[SINK_ADD33_3:%.*]] = add nuw nsw i32 [[SINK_CONV2]], 69
36+
; CHECK: [[SINK_IDXPROM34_3:%.*]] = zext i32 [[SINK_ADD33_3]] to i64
37+
; CHECK: [[SINK_314:%.*]] = shl nuw nsw i64 [[SINK_IDXPROM34_3]], 3
38+
; CHECK: [[TMP51:%.*]] = add i64 [[TMP39]], [[SINK_314]]
39+
; CHECK: [[TMP52:%.*]] = inttoptr i64 [[TMP51]] to double addrspace(2)*
40+
; CHECK: [[TMP53:%.*]] = load double, double addrspace(2)* [[TMP52]], align 8
41+
42+
entry:
43+
%conv2 = zext i16 %localIdX to i32
44+
%idxprom36 = zext i16 %localIdX to i64
45+
%add33.1 = add nuw nsw i32 %conv2, 23
46+
%idxprom34.1 = zext i32 %add33.1 to i64
47+
%add33.2 = add nuw nsw i32 %conv2, 46
48+
%idxprom34.2 = zext i32 %add33.2 to i64
49+
%add33.3 = add nuw nsw i32 %conv2, 69
50+
%idxprom34.3 = zext i32 %add33.3 to i64
51+
%0 = shl nuw nsw i64 %idxprom36, 3
52+
%1 = shl nuw nsw i64 %idxprom34.1, 3
53+
%2 = shl nuw nsw i64 %idxprom34.2, 3
54+
%3 = shl nuw nsw i64 %idxprom34.3, 3
55+
br label %for.body
56+
57+
for.body: ; preds = %for.body, %entry
58+
%4 = add i64 0, 0
59+
%5 = add i64 %4, %0
60+
%6 = inttoptr i64 %5 to double addrspace(2)*
61+
%7 = load double, double addrspace(2)* %6, align 8
62+
%8 = add i64 %4, %1
63+
%9 = inttoptr i64 %8 to double addrspace(2)*
64+
%10 = load double, double addrspace(2)* %9, align 8
65+
%11 = add i64 %4, %2
66+
%12 = inttoptr i64 %11 to double addrspace(2)*
67+
%13 = load double, double addrspace(2)* %12, align 8
68+
%14 = add i64 %4, %3
69+
%15 = inttoptr i64 %14 to double addrspace(2)*
70+
%16 = load double, double addrspace(2)* %15, align 8
71+
br label %for.body
72+
}

IGC/common/igc_flags.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,8 @@ DECLARE_IGC_REGKEY(bool, DisableLoopSink, false, "Disable sinking
134134
DECLARE_IGC_REGKEY(bool, ForceLoopSink, false, "Force sinking in all loops", false)
135135
DECLARE_IGC_REGKEY(bool, EnableLoadsLoopSink, true, "Allow sinking of loads in the loop", false)
136136
DECLARE_IGC_REGKEY(bool, ForceLoadsLoopSink, false, "Force sinking of loads in the loop from the beginning", false)
137-
DECLARE_IGC_REGKEY(bool, EnableLoadChainLoopSink, true, "Allow sinking of load address calculation when the load was sinked to the loop, even if the needed regpressure is achieved", false)
137+
DECLARE_IGC_REGKEY(bool, PrepopulateLoadChainLoopSink, true, "Check the loop for loop chains before sinking to use the existing chains in a heuristic", false)
138+
DECLARE_IGC_REGKEY(bool, EnableLoadChainLoopSink, true, "Allow sinking of load address calculation when the load was sinked to the loop, even if the needed regpressure is achieved (only single use instructions)", false)
138139
DECLARE_IGC_REGKEY(DWORD, LoopSinkRegpressureMargin, 10, "Sink into the loop until the pressure becomes less than #grf-margin", false)
139140
DECLARE_IGC_REGKEY(DWORD, CodeSinkingMinSize, 32, "Don't sink if the number of instructions in the kernel is less", false)
140141
DECLARE_IGC_REGKEY(DWORD, CodeSinkingLoadSchedulingInstr, 20, "Instructions number to step to schedule loads in advance before the load use to cover latency. 1 to insert it immediately before use", false)

0 commit comments

Comments
 (0)