Skip to content

Commit e21ef06

Browse files
aratajewigcbot
authored andcommitted
Lower memcpy right after LowerByValAttribute pass
`LowerByValAttribute` inserts calls to memcpy intrinsic. This change introduces calling `ReplaceUnsupportedIntrinsics` pass right after `LowerByValAttribute` to transform all `memcpy` calls to a series a loads/stores. Without that, `LegalizationPass` doesn't legalize all loads and stores, as they are in a form of `memcpy` calls. This change also implements the possibility to run `LowerByValAttribute` pass only if optimizations are enabled. `LowerByValAttribute` pass generates an explicit copy (alloca+memcpy) for all function arguments with `byval` attribute. The copy is not necessary if a SPIRV was compiled in O0, as in such case, FE compilers will generate an explicit copy (not optimize it out) in SPIRV.
1 parent ff32516 commit e21ef06

File tree

4 files changed

+16
-10
lines changed

4 files changed

+16
-10
lines changed

IGC/Compiler/CISACodeGen/ShaderCodeGen.cpp

+12-2
Original file line numberDiff line numberDiff line change
@@ -295,8 +295,13 @@ void AddAnalysisPasses(CodeGenContext& ctx, IGCPassManager& mpm)
295295
}
296296
}
297297
mpm.add(createPromoteMemoryToRegisterPass());
298-
if (ctx.type == ShaderType::OPENCL_SHADER)
298+
if (ctx.type == ShaderType::OPENCL_SHADER &&
299+
!isOptDisabled &&
300+
IGC_IS_FLAG_ENABLED(EnableExplicitCopyForByVal))
301+
{
299302
mpm.add(new LowerByValAttribute());
303+
mpm.add(createReplaceUnsupportedIntrinsicsPass());
304+
}
300305
// Resolving private memory allocas
301306
mpm.add(CreatePrivateMemoryResolution());
302307
}
@@ -673,8 +678,13 @@ void AddLegalizationPasses(CodeGenContext& ctx, IGCPassManager& mpm, PSSignature
673678
if (!(IGC_IS_FLAG_ENABLED(EnableUnmaskedFunctions) &&
674679
IGC_IS_FLAG_ENABLED(LateInlineUnmaskedFunc)))
675680
{
676-
if (ctx.type == ShaderType::OPENCL_SHADER)
681+
if (ctx.type == ShaderType::OPENCL_SHADER &&
682+
!isOptDisabled &&
683+
IGC_IS_FLAG_ENABLED(EnableExplicitCopyForByVal))
684+
{
677685
mpm.add(new LowerByValAttribute());
686+
mpm.add(createReplaceUnsupportedIntrinsicsPass());
687+
}
678688
mpm.add(CreatePrivateMemoryResolution());
679689
}
680690
// Should help MemOpt pass to merge more loads

IGC/Compiler/Optimizer/OpenCLPasses/PrivateMemory/LowerByValAttribute.cpp

-4
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ SPDX-License-Identifier: MIT
88

99
#include "Compiler/Optimizer/OpenCLPasses/PrivateMemory/LowerByValAttribute.hpp"
1010
#include "Compiler/IGCPassSupport.h"
11-
#include "common/igc_regkeys.hpp"
1211

1312
#include "common/LLVMWarningsPush.hpp"
1413
#include <llvm/Pass.h>
@@ -49,9 +48,6 @@ LowerByValAttribute::LowerByValAttribute(void) : FunctionPass(ID)
4948

5049
bool LowerByValAttribute::runOnFunction(Function& F)
5150
{
52-
if (IGC_IS_FLAG_DISABLED(EnableExplicitCopyForByVal))
53-
return false;
54-
5551
visit(F);
5652

5753
return m_changed;

IGC/Compiler/tests/LowerByValAttribute/explicit-copy-for-byval.ll

+1-3
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,7 @@
66
;
77
;============================ end_copyright_notice =============================
88
;
9-
; REQUIRES: regkeys
10-
;
11-
; RUN: igc_opt %s -S -o - --igc-lower-byval-attribute -regkey EnableExplicitCopyForByVal | FileCheck %s
9+
; RUN: igc_opt %s -S -o - --igc-lower-byval-attribute | FileCheck %s
1210

1311
; ------------------------------------------------
1412
; LegalizeFunctionSignatures

IGC/ocloc_tests/lowerbyval-before-privatememresolution.cl

+3-1
Original file line numberDiff line numberDiff line change
@@ -8,16 +8,18 @@ SPDX-License-Identifier: MIT
88

99
// REQUIRES: regkeys
1010

11-
// RUN: ocloc compile -file %s -options " -igc_opts 'ShaderDisplayAllPassesNames=1'" -device dg2 2>&1 | FileCheck %s
11+
// RUN: ocloc compile -file %s -options " -igc_opts 'ShaderDisplayAllPassesNames=1 EnableExplicitCopyForByVal=1'" -device dg2 2>&1 | FileCheck %s
1212

1313
// Verify if LowerByValAttribute pass is run right before PrivateMemoryResolution pass.
14+
// (ReplaceUnsupportedIntrinsics is run between them just to lower memcpy instructions inserted by LowerByValAttribute)
1415

1516
// If LowerByValAttribute was run after PrivateMemoryResolution, alloca instructions inserted by LowerByValAttribute
1617
// wouldn't be resolved.
1718
// If LowerByValAttribute was run earlier than right before PrivateMemoryResolution, there is a chance that
1819
// optimization passes would remove explicit copy (alloca + memcpy) inserted by LowerByValAttribute.
1920

2021
// CHECK: LowerByValAttribute
22+
// CHECK-NEXT: ReplaceUnsupportedIntrinsics
2123
// CHECK-NEXT: PrivateMemoryResolution
2224

2325
__kernel void foo(int a, int b, __global int *res) { *res = a + b; }

0 commit comments

Comments
 (0)