Skip to content

Commit 23e6dbf

Browse files
authored
AMDGPU: Use ConstantPool as source value for DAG lowered kernarg loads (#168917)
This isn't quite a constant pool, but probably close enough for this purpose. We just need some known invariant value address. The aliasing queries against the real kernarg base pointer will falsely report no aliasing, but for invariant memory it probably doesn't matter.
1 parent 734a912 commit 23e6dbf

File tree

10 files changed

+430
-391
lines changed

10 files changed

+430
-391
lines changed

llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "llvm/CodeGen/FunctionLoweringInfo.h"
2222
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
2323
#include "llvm/CodeGen/MachineFrameInfo.h"
24+
#include "llvm/CodeGen/PseudoSourceValueManager.h"
2425
#include "llvm/IR/IntrinsicsAMDGPU.h"
2526

2627
#define DEBUG_TYPE "amdgpu-call-lowering"
@@ -414,7 +415,8 @@ void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &B, ArgInfo &OrigArg,
414415
MachineFunction &MF = B.getMF();
415416
const Function &F = MF.getFunction();
416417
const DataLayout &DL = F.getDataLayout();
417-
MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
418+
const SITargetLowering &TLI = *getTLI<SITargetLowering>();
419+
MachinePointerInfo PtrInfo = TLI.getKernargSegmentPtrInfo(MF);
418420

419421
LLT PtrTy = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64);
420422

llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,17 @@ Intrinsic::ID AMDGPU::getIntrinsicID(const MachineInstr &I) {
2828

2929
// TODO: Should largely merge with AMDGPUTTIImpl::isSourceOfDivergence.
3030
bool AMDGPU::isUniformMMO(const MachineMemOperand *MMO) {
31-
// FIXME: null value is should be treated as unknown, not as uniform.
3231
const Value *Ptr = MMO->getValue();
32+
if (!Ptr) {
33+
if (const PseudoSourceValue *PSV = MMO->getPseudoValue()) {
34+
return PSV->isConstantPool() || PSV->isStack() || PSV->isGOT() ||
35+
PSV->isJumpTable();
36+
}
37+
38+
// FIXME: null value is should be treated as unknown, not as uniform.
39+
return true;
40+
}
41+
3342
// UndefValue means this is a load of a kernel input. These are uniform.
3443
// Sometimes LDS instructions have constant pointers.
3544
// If Ptr is null, then that means this mem operand contains a

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
3131
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
3232
#include "llvm/CodeGen/GlobalISel/Utils.h"
33+
#include "llvm/CodeGen/PseudoSourceValueManager.h"
3334
#include "llvm/CodeGen/TargetOpcodes.h"
3435
#include "llvm/IR/DiagnosticInfo.h"
3536
#include "llvm/IR/IntrinsicsAMDGPU.h"
@@ -2321,14 +2322,14 @@ Register AMDGPULegalizerInfo::getSegmentAperture(
23212322
return B.buildUnmerge(S32, Dst).getReg(1);
23222323
}
23232324

2324-
// TODO: can we be smarter about machine pointer info?
2325-
MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
23262325
Register LoadAddr = MRI.createGenericVirtualRegister(
23272326
LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64));
23282327
// For code object version 5, private_base and shared_base are passed through
23292328
// implicit kernargs.
23302329
if (AMDGPU::getAMDHSACodeObjectVersion(*MF.getFunction().getParent()) >=
23312330
AMDGPU::AMDHSA_COV5) {
2331+
MachinePointerInfo PtrInfo = getKernargSegmentPtrInfo(B.getMF());
2332+
23322333
AMDGPUTargetLowering::ImplicitParameter Param =
23332334
AS == AMDGPUAS::LOCAL_ADDRESS ? AMDGPUTargetLowering::SHARED_BASE
23342335
: AMDGPUTargetLowering::PRIVATE_BASE;
@@ -2343,7 +2344,7 @@ Register AMDGPULegalizerInfo::getSegmentAperture(
23432344
return Register();
23442345

23452346
MachineMemOperand *MMO = MF.getMachineMemOperand(
2346-
PtrInfo,
2347+
PtrInfo.getWithOffset(Offset),
23472348
MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
23482349
MachineMemOperand::MOInvariant,
23492350
LLT::scalar(32), commonAlignment(Align(64), Offset));
@@ -2361,6 +2362,9 @@ Register AMDGPULegalizerInfo::getSegmentAperture(
23612362
if (!loadInputValue(QueuePtr, B, AMDGPUFunctionArgInfo::QUEUE_PTR))
23622363
return Register();
23632364

2365+
// TODO: can we be smarter about machine pointer info?
2366+
MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
2367+
23642368
// Offset into amd_queue_t for group_segment_aperture_base_hi /
23652369
// private_segment_aperture_base_hi.
23662370
uint32_t StructOffset = (AS == AMDGPUAS::LOCAL_ADDRESS) ? 0x40 : 0x44;
@@ -4715,6 +4719,14 @@ bool AMDGPULegalizerInfo::legalizeWorkitemIDIntrinsic(
47154719
return true;
47164720
}
47174721

4722+
MachinePointerInfo
4723+
AMDGPULegalizerInfo::getKernargSegmentPtrInfo(MachineFunction &MF) const {
4724+
// This isn't really a constant pool but close enough.
4725+
MachinePointerInfo PtrInfo(MF.getPSVManager().getConstantPool());
4726+
PtrInfo.AddrSpace = AMDGPUAS::CONSTANT_ADDRESS;
4727+
return PtrInfo;
4728+
}
4729+
47184730
Register AMDGPULegalizerInfo::getKernargParameterPtr(MachineIRBuilder &B,
47194731
int64_t Offset) const {
47204732
LLT PtrTy = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64);
@@ -4742,8 +4754,8 @@ bool AMDGPULegalizerInfo::legalizeKernargMemParameter(MachineInstr &MI,
47424754
"unexpected kernarg parameter type");
47434755

47444756
Register Ptr = getKernargParameterPtr(B, Offset);
4745-
MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
4746-
B.buildLoad(DstReg, Ptr, PtrInfo, Align(4),
4757+
MachinePointerInfo PtrInfo = getKernargSegmentPtrInfo(B.getMF());
4758+
B.buildLoad(DstReg, Ptr, PtrInfo.getWithOffset(Offset), Align(4),
47474759
MachineMemOperand::MODereferenceable |
47484760
MachineMemOperand::MOInvariant);
47494761
MI.eraseFromParent();
@@ -7266,9 +7278,9 @@ bool AMDGPULegalizerInfo::legalizeTrapHsaQueuePtr(
72667278
return false;
72677279

72687280
// TODO: can we be smarter about machine pointer info?
7269-
MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
7281+
MachinePointerInfo PtrInfo = getKernargSegmentPtrInfo(MF);
72707282
MachineMemOperand *MMO = MF.getMachineMemOperand(
7271-
PtrInfo,
7283+
PtrInfo.getWithOffset(Offset),
72727284
MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
72737285
MachineMemOperand::MOInvariant,
72747286
LLT::scalar(64), commonAlignment(Align(64), Offset));

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,7 @@ class AMDGPULegalizerInfo final : public LegalizerInfo {
132132
MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B,
133133
unsigned Dim, AMDGPUFunctionArgInfo::PreloadedValue ArgType) const;
134134

135+
MachinePointerInfo getKernargSegmentPtrInfo(MachineFunction &MF) const;
135136
Register getKernargParameterPtr(MachineIRBuilder &B, int64_t Offset) const;
136137
bool legalizeKernargMemParameter(MachineInstr &MI, MachineIRBuilder &B,
137138
uint64_t Offset,

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
#include "llvm/CodeGen/MachineFrameInfo.h"
3636
#include "llvm/CodeGen/MachineFunction.h"
3737
#include "llvm/CodeGen/MachineLoopInfo.h"
38+
#include "llvm/CodeGen/PseudoSourceValueManager.h"
3839
#include "llvm/CodeGen/SDPatternMatch.h"
3940
#include "llvm/IR/DiagnosticInfo.h"
4041
#include "llvm/IR/IRBuilder.h"
@@ -2265,6 +2266,14 @@ bool SITargetLowering::isTypeDesirableForOp(unsigned Op, EVT VT) const {
22652266
return TargetLowering::isTypeDesirableForOp(Op, VT);
22662267
}
22672268

2269+
MachinePointerInfo
2270+
SITargetLowering::getKernargSegmentPtrInfo(MachineFunction &MF) const {
2271+
// This isn't really a constant pool but close enough.
2272+
MachinePointerInfo PtrInfo(MF.getPSVManager().getConstantPool());
2273+
PtrInfo.AddrSpace = AMDGPUAS::CONSTANT_ADDRESS;
2274+
return PtrInfo;
2275+
}
2276+
22682277
SDValue SITargetLowering::lowerKernArgParameterPtr(SelectionDAG &DAG,
22692278
const SDLoc &SL,
22702279
SDValue Chain,
@@ -2341,7 +2350,9 @@ SDValue SITargetLowering::lowerKernargMemParameter(
23412350
SelectionDAG &DAG, EVT VT, EVT MemVT, const SDLoc &SL, SDValue Chain,
23422351
uint64_t Offset, Align Alignment, bool Signed,
23432352
const ISD::InputArg *Arg) const {
2344-
MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
2353+
2354+
MachinePointerInfo PtrInfo =
2355+
getKernargSegmentPtrInfo(DAG.getMachineFunction());
23452356

23462357
// Try to avoid using an extload by loading earlier than the argument address,
23472358
// and extracting the relevant bits. The load should hopefully be merged with
@@ -2356,7 +2367,8 @@ SDValue SITargetLowering::lowerKernargMemParameter(
23562367
// TODO: If we passed in the base kernel offset we could have a better
23572368
// alignment than 4, but we don't really need it.
23582369
SDValue Ptr = lowerKernArgParameterPtr(DAG, SL, Chain, AlignDownOffset);
2359-
SDValue Load = DAG.getLoad(MVT::i32, SL, Chain, Ptr, PtrInfo, Align(4),
2370+
SDValue Load = DAG.getLoad(MVT::i32, SL, Chain, Ptr,
2371+
PtrInfo.getWithOffset(AlignDownOffset), Align(4),
23602372
MachineMemOperand::MODereferenceable |
23612373
MachineMemOperand::MOInvariant);
23622374

@@ -2371,9 +2383,9 @@ SDValue SITargetLowering::lowerKernargMemParameter(
23712383
}
23722384

23732385
SDValue Ptr = lowerKernArgParameterPtr(DAG, SL, Chain, Offset);
2374-
SDValue Load = DAG.getLoad(MemVT, SL, Chain, Ptr, PtrInfo, Alignment,
2375-
MachineMemOperand::MODereferenceable |
2376-
MachineMemOperand::MOInvariant);
2386+
SDValue Load = DAG.getLoad(
2387+
MemVT, SL, Chain, Ptr, PtrInfo.getWithOffset(Offset), Alignment,
2388+
MachineMemOperand::MODereferenceable | MachineMemOperand::MOInvariant);
23772389

23782390
SDValue Val = convertArgType(DAG, VT, MemVT, SL, Load, Signed, Arg);
23792391
return DAG.getMergeValues({Val, Load.getValue(1)}, SL);
@@ -8143,10 +8155,11 @@ SITargetLowering::loadImplicitKernelArgument(SelectionDAG &DAG, MVT VT,
81438155
MachineFunction &MF = DAG.getMachineFunction();
81448156
uint64_t Offset = getImplicitParameterOffset(MF, Param);
81458157
SDValue Ptr = lowerKernArgParameterPtr(DAG, DL, DAG.getEntryNode(), Offset);
8146-
MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
8147-
return DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, PtrInfo, Alignment,
8148-
MachineMemOperand::MODereferenceable |
8149-
MachineMemOperand::MOInvariant);
8158+
MachinePointerInfo PtrInfo =
8159+
getKernargSegmentPtrInfo(DAG.getMachineFunction());
8160+
return DAG.getLoad(
8161+
VT, DL, DAG.getEntryNode(), Ptr, PtrInfo.getWithOffset(Offset), Alignment,
8162+
MachineMemOperand::MODereferenceable | MachineMemOperand::MOInvariant);
81508163
}
81518164

81528165
SDValue SITargetLowering::lowerTrapHsaQueuePtr(SDValue Op,

llvm/lib/Target/AMDGPU/SIISelLowering.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@ class SITargetLowering final : public AMDGPUTargetLowering {
4545
LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
4646
unsigned &NumIntermediates, MVT &RegisterVT) const override;
4747

48+
MachinePointerInfo getKernargSegmentPtrInfo(MachineFunction &MF) const;
49+
4850
private:
4951
SDValue lowerKernArgParameterPtr(SelectionDAG &DAG, const SDLoc &SL,
5052
SDValue Chain, uint64_t Offset) const;

0 commit comments

Comments
 (0)