Skip to content

Commit 4a7f81a

Browse files
stefan-iligcbot
authored andcommitted
Check number of calls per kernel for inlining
For number of calls check for inlining check only calls in kernel for given call site.
1 parent 5f218cb commit 4a7f81a

File tree

4 files changed

+85
-19
lines changed

4 files changed

+85
-19
lines changed

IGC/Compiler/CISACodeGen/EstimateFunctionSize.cpp

+7-10
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@ SPDX-License-Identifier: MIT
2828
#include "common/LLVMWarningsPop.hpp"
2929
#include "Probe/Assertion.h"
3030
#include <deque>
31-
#include <iostream>
3231
#include <cfloat>
3332
#include <algorithm>
3433
#include <cmath>
@@ -1112,26 +1111,24 @@ std::size_t EstimateFunctionSize::getExpandedSize(const Function* F) const {
11121111
return std::numeric_limits<std::size_t>::max();
11131112
}
11141113

1115-
bool EstimateFunctionSize::onlyCalledOnce(const Function* F) {
1114+
bool EstimateFunctionSize::onlyCalledOnce(const Function* F, const Function* CallerF) {
11161115
//IGC_ASSERT(IGC_IS_FLAG_DISABLED(ControlKernelTotalSize));
11171116
auto I = ECG.find((Function*)F);
11181117
if (I != ECG.end()) {
1119-
FunctionNode* Node = (FunctionNode*)I->second;
1118+
auto* Node = (FunctionNode*)I->second;
11201119
IGC_ASSERT(F == Node->F);
11211120
// one call-site and not a recursion
11221121
if (Node->CallerList.size() == 1 &&
11231122
Node->CallerList.begin()->second == 1 &&
11241123
Node->CallerList.begin()->first != Node) {
11251124
return true;
11261125
}
1127-
// OpenCL specific, called once by each kernel
1128-
auto MdWrapper = getAnalysisIfAvailable<MetaDataUtilsWrapper>();
1126+
// OpenCL specific, called once by passed kernel
1127+
auto* MdWrapper = getAnalysisIfAvailable<MetaDataUtilsWrapper>();
11291128
if (MdWrapper) {
1130-
auto pMdUtils = MdWrapper->getMetaDataUtils();
1131-
for (const auto &node : Node->CallerList) {
1132-
FunctionNode* Caller = node.first;
1133-
uint32_t cnt = node.second;
1134-
if (cnt > 1) {
1129+
auto* pMdUtils = MdWrapper->getMetaDataUtils();
1130+
for (const auto &[Caller, CallCount] : Node->CallerList) {
1131+
if (CallCount > 1 && Caller->F == CallerF) {
11351132
return false;
11361133
}
11371134
if (!isEntryFunc(pMdUtils, Caller->F)) {

IGC/Compiler/CISACodeGen/EstimateFunctionSize.h

+3-4
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,10 @@ SPDX-License-Identifier: MIT
1313
#include "llvm/Pass.h"
1414
#include <llvm/IR/InstVisitor.h>
1515
#include <llvm/ADT/StringRef.h>
16-
#include "llvm/ADT/DenseSet.h"
1716
#include "llvm/Support/ScaledNumber.h"
1817
#include "common/LLVMWarningsPop.hpp"
19-
#include <cstddef>
2018
#include "Probe/Assertion.h"
19+
#include <cstddef>
2120
#include <deque>
2221
#include <unordered_map>
2322
#include <unordered_set>
@@ -46,7 +45,7 @@ namespace IGC {
4645
~EstimateFunctionSize();
4746
EstimateFunctionSize(const EstimateFunctionSize&) = delete;
4847
EstimateFunctionSize& operator=(const EstimateFunctionSize&) = delete;
49-
virtual llvm::StringRef getPassName() const override { return "Estimate Function Sizes"; }
48+
llvm::StringRef getPassName() const override { return "Estimate Function Sizes"; }
5049
void getAnalysisUsage(llvm::AnalysisUsage& AU) const override;
5150
bool runOnModule(llvm::Module& M) override;
5251

@@ -56,7 +55,7 @@ namespace IGC {
5655
/// \brief Return the estimated function size after complete inlining.
5756
std::size_t getExpandedSize(const llvm::Function* F) const;
5857

59-
bool onlyCalledOnce(const llvm::Function* F);
58+
bool onlyCalledOnce(const llvm::Function* F, const llvm::Function* CallerF);
6059

6160
bool hasRecursion() const { return HasRecursion; }
6261

IGC/Compiler/CISACodeGen/GenCodeGenModule.cpp

+2-5
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@ SPDX-License-Identifier: MIT
1717
#include "Compiler/MetaDataUtilsWrapper.h"
1818
#include "common/igc_regkeys.hpp"
1919
#include "common/LLVMWarningsPush.hpp"
20-
#include "llvm/Config/llvm-config.h"
2120
#include "llvm/IR/Argument.h"
2221
#include "llvm/IR/Attributes.h"
2322
#include "llvm/ADT/SetVector.h"
@@ -32,14 +31,12 @@ SPDX-License-Identifier: MIT
3231
#include "llvm/Transforms/IPO.h"
3332
#include "llvm/Transforms/IPO/Inliner.h"
3433
#include "llvmWrapper/IR/DerivedTypes.h"
35-
#include "llvmWrapper/Transforms/Utils/Cloning.h"
3634
#include "llvm/IR/DebugInfo.h"
3735
#include "llvm/IR/DIBuilder.h"
3836
#include "common/LLVMWarningsPop.hpp"
3937
#include "DebugInfo/VISADebugEmitter.hpp"
4038
#include <numeric>
4139
#include <utility>
42-
#include <iostream>
4340
#include "Probe/Assertion.h"
4441

4542
using namespace llvm;
@@ -1458,11 +1455,11 @@ InlineCost SubroutineInliner::getInlineCost(IGCLLVM::CallSiteRef CS)
14581455
{
14591456
return llvm::InlineCost::getAlways("Caller size smaller than per func. threshold");
14601457
}
1461-
else if (isTrivialCall(Callee) || FSA->onlyCalledOnce(Callee))
1458+
if (isTrivialCall(Callee) || FSA->onlyCalledOnce(Callee, Caller))
14621459
{
14631460
return llvm::InlineCost::getAlways("Callee is called only once");
14641461
}
1465-
else if (!FSA->shouldEnableSubroutine())
1462+
if (!FSA->shouldEnableSubroutine())
14661463
{
14671464
// This function returns true if the estimated total inlining size exceeds some module threshold.
14681465
// If we don't exceed it, and there's no preference on inline vs noinline, we just inline.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
;=========================== begin_copyright_notice ============================
2+
;
3+
; Copyright (C) 2025 Intel Corporation
4+
;
5+
; SPDX-License-Identifier: MIT
6+
;
7+
;============================ end_copyright_notice =============================
8+
9+
; REQUIRES: regkeys,pvc-supported,llvm-14-plus
10+
11+
; Check that functions called once from kernel are inlined in it even if they are called multiple times from other kernels.
12+
13+
; RUN: llvm-as %s -o %t.bc
14+
; RUN: ocloc compile -llvm_input -file %t.bc -options "-igc_opts 'DisableRecompilation=1, SubroutineThreshold=50, SubroutineInlinerThreshold=10, KernelTotalSizeThreshold=50, PrintToConsole=1, PrintBefore=EmitPass'" -device pvc 2>&1 | FileCheck %s
15+
; CHECK-LABEL: @_ZTS28Kernel_A_Supposed_2B_Inlined(
16+
; CHECK-NOT: call spir_func void @testInlineFn
17+
; CHECK: ret void
18+
19+
; CHECK-LABEL: @_ZTS28Kernel_B_Supposed_2B_Inlined(
20+
; CHECK: call spir_func void @testInlineFn
21+
; CHECK: call spir_func void @testInlineFn
22+
; CHECK: ret void
23+
24+
25+
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024"
26+
target triple = "spir64-unknown-unknown"
27+
28+
define spir_kernel void @_ZTS28Kernel_A_Supposed_2B_Inlined() {
29+
call spir_func void @testInlineFn()
30+
ret void
31+
}
32+
33+
define linkonce_odr spir_func void @testInlineFn() {
34+
call spir_func void @_Z8mulAccumRP4DataRK5ColorS4_()
35+
call spir_func void @_Z8mulAccumRP4DataRK5ColorS4_()
36+
call spir_func void @_Z8mulAccumRP4DataRK5ColorS4_()
37+
call spir_func void @_Z8mulAccumRP4DataRK5ColorS4_()
38+
call spir_func void @_Z8mulAccumRP4DataRK5ColorS4_()
39+
call spir_func void @_Z8mulAccumRP4DataRK5ColorS4_()
40+
call spir_func void @_Z8mulAccumRP4DataRK5ColorS4_()
41+
call spir_func void @_Z8mulAccumRP4DataRK5ColorS4_()
42+
call spir_func void @_Z8mulAccumRP4DataRK5ColorS4_()
43+
call spir_func void @_Z8mulAccumRP4DataRK5ColorS4_()
44+
call spir_func void @_Z8mulAccumRP4DataRK5ColorS4_()
45+
call spir_func void @_Z8mulAccumRP4DataRK5ColorS4_()
46+
call spir_func void @_Z8mulAccumRP4DataRK5ColorS4_()
47+
call spir_func void @_Z8mulAccumRP4DataRK5ColorS4_()
48+
call spir_func void @_Z8mulAccumRP4DataRK5ColorS4_()
49+
call spir_func void @_Z8mulAccumRP4DataRK5ColorS4_()
50+
call spir_func void @_Z8mulAccumRP4DataRK5ColorS4_()
51+
call spir_func void @_Z8mulAccumRP4DataRK5ColorS4_()
52+
call spir_func void @_Z8mulAccumRP4DataRK5ColorS4_()
53+
call spir_func void @_Z8mulAccumRP4DataRK5ColorS4_()
54+
call spir_func void @_Z8mulAccumRP4DataRK5ColorS4_()
55+
call spir_func void @_Z8mulAccumRP4DataRK5ColorS4_()
56+
call spir_func void @_Z8mulAccumRP4DataRK5ColorS4_()
57+
call spir_func void @_Z8mulAccumRP4DataRK5ColorS4_()
58+
call spir_func void @_Z8mulAccumRP4DataRK5ColorS4_()
59+
call spir_func void @_Z8mulAccumRP4DataRK5ColorS4_()
60+
call spir_func void @_Z8mulAccumRP4DataRK5ColorS4_()
61+
ret void
62+
}
63+
64+
declare spir_func void @_Z8mulAccumRP4DataRK5ColorS4_()
65+
66+
define spir_kernel void @_ZTS28Kernel_B_Supposed_2B_Inlined() {
67+
call spir_func void @testInlineFn()
68+
call spir_func void @testInlineFn()
69+
ret void
70+
}
71+
72+
; uselistorder directives
73+
uselistorder void ()* @_Z8mulAccumRP4DataRK5ColorS4_, { 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }

0 commit comments

Comments
 (0)