Skip to content

Commit 75f810e

Browse files
authored
[Offload] Guard HSA implicit arguments if they aren't created (#133073)
Summary: We conditionally allocate the implicit arguments, so they possibly are null. The flang compiler seems to hit this case, even though it shouldn't when it's supposed to conform to the HSA code object. For now guard this to fix the regression and cover a case in the future where someone rolls a fully custom implementatation. Fixes: #132982
1 parent 1b07e86 commit 75f810e

File tree

1 file changed

+18
-21
lines changed
  • offload/plugins-nextgen/amdgpu/src

1 file changed

+18
-21
lines changed

offload/plugins-nextgen/amdgpu/src/rtl.cpp

+18-21
Original file line numberDiff line numberDiff line change
@@ -3363,16 +3363,6 @@ Error AMDGPUKernelTy::launchImpl(GenericDeviceTy &GenericDevice,
33633363
if (auto Err = GenericDevice.getDeviceStackSize(StackSize))
33643364
return Err;
33653365

3366-
hsa_utils::AMDGPUImplicitArgsTy *ImplArgs = nullptr;
3367-
if (ArgsSize == LaunchParams.Size + getImplicitArgsSize()) {
3368-
// Initialize implicit arguments.
3369-
ImplArgs = reinterpret_cast<hsa_utils::AMDGPUImplicitArgsTy *>(
3370-
utils::advancePtr(AllArgs, LaunchParams.Size));
3371-
3372-
// Initialize the implicit arguments to zero.
3373-
std::memset(ImplArgs, 0, getImplicitArgsSize());
3374-
}
3375-
33763366
// Copy the explicit arguments.
33773367
// TODO: We should expose the args memory manager alloc to the common part as
33783368
// alternative to copying them twice.
@@ -3385,17 +3375,24 @@ Error AMDGPUKernelTy::launchImpl(GenericDeviceTy &GenericDevice,
33853375
if (auto Err = AMDGPUDevice.getStream(AsyncInfoWrapper, Stream))
33863376
return Err;
33873377

3388-
// Set the COV5+ implicit arguments to the appropriate values.
3389-
ImplArgs->BlockCountX = NumBlocks[0];
3390-
ImplArgs->BlockCountY = NumBlocks[1];
3391-
ImplArgs->BlockCountZ = NumBlocks[2];
3392-
ImplArgs->GroupSizeX = NumThreads[0];
3393-
ImplArgs->GroupSizeY = NumThreads[1];
3394-
ImplArgs->GroupSizeZ = NumThreads[2];
3395-
ImplArgs->GridDims = NumBlocks[2] * NumThreads[2] > 1
3396-
? 3
3397-
: 1 + (NumBlocks[1] * NumThreads[1] != 1);
3398-
ImplArgs->DynamicLdsSize = KernelArgs.DynCGroupMem;
3378+
hsa_utils::AMDGPUImplicitArgsTy *ImplArgs = nullptr;
3379+
if (ArgsSize == LaunchParams.Size + getImplicitArgsSize()) {
3380+
ImplArgs = reinterpret_cast<hsa_utils::AMDGPUImplicitArgsTy *>(
3381+
utils::advancePtr(AllArgs, LaunchParams.Size));
3382+
3383+
// Set the COV5+ implicit arguments to the appropriate values.
3384+
std::memset(ImplArgs, 0, getImplicitArgsSize());
3385+
ImplArgs->BlockCountX = NumBlocks[0];
3386+
ImplArgs->BlockCountY = NumBlocks[1];
3387+
ImplArgs->BlockCountZ = NumBlocks[2];
3388+
ImplArgs->GroupSizeX = NumThreads[0];
3389+
ImplArgs->GroupSizeY = NumThreads[1];
3390+
ImplArgs->GroupSizeZ = NumThreads[2];
3391+
ImplArgs->GridDims = NumBlocks[2] * NumThreads[2] > 1
3392+
? 3
3393+
: 1 + (NumBlocks[1] * NumThreads[1] != 1);
3394+
ImplArgs->DynamicLdsSize = KernelArgs.DynCGroupMem;
3395+
}
33993396

34003397
// Push the kernel launch into the stream.
34013398
return Stream->pushKernelLaunch(*this, AllArgs, NumThreads, NumBlocks,

0 commit comments

Comments
 (0)