Skip to content

Commit 6a6ab80

Browse files
Add option to compact event L3 flush packet
Related-To: NEO-7434 Signed-off-by: Zbigniew Zdanowicz <[email protected]>
1 parent 709e322 commit 6a6ab80

13 files changed

+1063
-30
lines changed

level_zero/core/source/cmdlist/cmdlist_hw.h

+8
Original file line numberDiff line numberDiff line change
@@ -287,10 +287,18 @@ struct CommandListCoreFamily : CommandListImp {
287287
size_t dstSize,
288288
CmdListFillKernelArguments &outArguments,
289289
Kernel *kernel);
290+
bool compactL3FlushEvent(bool dcFlush) const {
291+
return this->compactL3FlushEventPacket && dcFlush;
292+
}
293+
bool eventSignalPipeControl(bool splitKernel, bool dcFlush) const {
294+
return (this->pipeControlMultiKernelEventSync && splitKernel) ||
295+
compactL3FlushEvent(dcFlush);
296+
}
290297

291298
size_t cmdListCurrentStartOffset = 0;
292299
bool containsAnyKernel = false;
293300
bool pipeControlMultiKernelEventSync = false;
301+
bool compactL3FlushEventPacket = false;
294302
};
295303

296304
template <PRODUCT_FAMILY gfxProductFamily>

level_zero/core/source/cmdlist/cmdlist_hw.inl

+7-3
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::initialize(Device *device, NEO
138138
this->frontEndStateTracking = L0HwHelper::enableFrontEndStateTracking(hwInfo);
139139
this->pipelineSelectStateTracking = L0HwHelper::enablePipelineSelectStateTracking(hwInfo);
140140
this->pipeControlMultiKernelEventSync = L0HwHelper::usePipeControlMultiKernelEventSync(hwInfo);
141+
this->compactL3FlushEventPacket = L0HwHelper::useCompactL3FlushEventPacket(hwInfo);
141142

142143
if (device->isImplicitScalingCapable() && !this->internalUsage && !isCopyOnly()) {
143144
this->partitionCount = static_cast<uint32_t>(this->device->getNEODevice()->getDeviceBitfield().count());
@@ -1187,19 +1188,20 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
11871188
}
11881189

11891190
CmdListKernelLaunchParams launchParams = {};
1190-
1191+
bool dcFlush = false;
11911192
Event *signalEvent = nullptr;
11921193
if (hSignalEvent) {
11931194
signalEvent = Event::fromHandle(hSignalEvent);
11941195
launchParams.isHostSignalScopeEvent = !!(signalEvent->signalScope & ZE_EVENT_SCOPE_FLAG_HOST);
1196+
dcFlush = getDcFlushRequired(!!signalEvent->signalScope);
11951197
}
11961198

11971199
uint32_t kernelCounter = leftSize > 0 ? 1 : 0;
11981200
kernelCounter += middleSizeBytes > 0 ? 1 : 0;
11991201
kernelCounter += rightSize > 0 ? 1 : 0;
12001202

12011203
launchParams.isKernelSplitOperation = kernelCounter > 1;
1202-
bool singlePipeControlPacket = this->pipeControlMultiKernelEventSync && launchParams.isKernelSplitOperation;
1204+
bool singlePipeControlPacket = eventSignalPipeControl(launchParams.isKernelSplitOperation, dcFlush);
12031205

12041206
appendEventForProfilingAllWalkers(signalEvent, true, singlePipeControlPacket);
12051207

@@ -1551,9 +1553,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
15511553
CmdListKernelLaunchParams launchParams = {};
15521554

15531555
Event *signalEvent = nullptr;
1556+
bool dcFlush = false;
15541557
if (hSignalEvent) {
15551558
signalEvent = Event::fromHandle(hSignalEvent);
15561559
launchParams.isHostSignalScopeEvent = !!(signalEvent->signalScope & ZE_EVENT_SCOPE_FLAG_HOST);
1560+
dcFlush = getDcFlushRequired(!!signalEvent->signalScope);
15571561
}
15581562

15591563
if (isCopyOnly()) {
@@ -1610,7 +1614,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
16101614
setupFillKernelArguments(dstAllocation.offset, patternSize, size, fillArguments, builtinKernel);
16111615

16121616
launchParams.isKernelSplitOperation = (fillArguments.leftRemainingBytes > 0 || fillArguments.rightRemainingBytes > 0);
1613-
bool singlePipeControlPacket = this->pipeControlMultiKernelEventSync && launchParams.isKernelSplitOperation;
1617+
bool singlePipeControlPacket = eventSignalPipeControl(launchParams.isKernelSplitOperation, dcFlush);
16141618

16151619
appendEventForProfilingAllWalkers(signalEvent, true, singlePipeControlPacket);
16161620

level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl

+22-10
Original file line numberDiff line numberDiff line change
@@ -163,20 +163,26 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
163163
threadGroupDimensions->groupCountY,
164164
threadGroupDimensions->groupCountZ);
165165
}
166-
NEO::GraphicsAllocation *eventAlloc = nullptr;
166+
167167
uint64_t eventAddress = 0;
168168
bool isTimestampEvent = false;
169169
bool l3FlushEnable = false;
170170
bool isHostSignalScopeEvent = launchParams.isHostSignalScopeEvent;
171+
Event *compactEvent = nullptr;
171172
if (event) {
172-
eventAlloc = &event->getAllocation(this->device);
173-
commandContainer.addToResidencyContainer(eventAlloc);
174-
bool flushRequired = !!event->signalScope &&
175-
!launchParams.isKernelSplitOperation;
176-
l3FlushEnable = getDcFlushRequired(flushRequired);
177-
isTimestampEvent = event->isUsingContextEndOffset();
178-
eventAddress = event->getPacketAddress(this->device);
179173
isHostSignalScopeEvent = !!(event->signalScope & ZE_EVENT_SCOPE_FLAG_HOST);
174+
if (compactL3FlushEvent(getDcFlushRequired(!!event->signalScope))) {
175+
compactEvent = event;
176+
event = nullptr;
177+
} else {
178+
NEO::GraphicsAllocation *eventAlloc = &event->getAllocation(this->device);
179+
commandContainer.addToResidencyContainer(eventAlloc);
180+
bool flushRequired = !!event->signalScope &&
181+
!launchParams.isKernelSplitOperation;
182+
l3FlushEnable = getDcFlushRequired(flushRequired);
183+
isTimestampEvent = event->isUsingContextEndOffset();
184+
eventAddress = event->getPacketAddress(this->device);
185+
}
180186
}
181187

182188
bool isKernelUsingSystemAllocation = false;
@@ -249,6 +255,10 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
249255

250256
std::list<void *> additionalCommands;
251257

258+
if (compactEvent) {
259+
appendEventForProfilingAllWalkers(compactEvent, true, true);
260+
}
261+
252262
NEO::EncodeDispatchKernelArgs dispatchKernelArgs{
253263
eventAddress, // eventAddress
254264
neoDevice, // device
@@ -273,7 +283,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
273283
NEO::EncodeDispatchKernel<GfxFamily>::encode(commandContainer, dispatchKernelArgs, getLogicalStateHelper());
274284
this->containsStatelessUncachedResource = dispatchKernelArgs.requiresUncachedMocs;
275285

276-
if (event) {
286+
if (compactEvent) {
287+
appendEventForProfilingAllWalkers(compactEvent, false, true);
288+
} else if (event) {
277289
if (partitionCount > 1) {
278290
event->setPacketsInUse(partitionCount);
279291
}
@@ -404,7 +416,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelSplit(Kernel
404416
Event *event,
405417
const CmdListKernelLaunchParams &launchParams) {
406418
if (event) {
407-
if (this->pipeControlMultiKernelEventSync && launchParams.isKernelSplitOperation) {
419+
if (eventSignalPipeControl(launchParams.isKernelSplitOperation, getDcFlushRequired(!!event->signalScope))) {
408420
event = nullptr;
409421
} else {
410422
event->increaseKernelCount();

level_zero/core/source/hw_helpers/l0_hw_helper.cpp

+7
Original file line numberDiff line numberDiff line change
@@ -53,4 +53,11 @@ bool L0HwHelper::usePipeControlMultiKernelEventSync(const NEO::HardwareInfo &hwI
5353
return false;
5454
}
5555

56+
bool L0HwHelper::useCompactL3FlushEventPacket(const NEO::HardwareInfo &hwInfo) {
57+
if (NEO::DebugManager.flags.CompactL3FlushEventPacket.get() != -1) {
58+
return !!NEO::DebugManager.flags.CompactL3FlushEventPacket.get();
59+
}
60+
return false;
61+
}
62+
5663
} // namespace L0

level_zero/core/source/hw_helpers/l0_hw_helper.h

+1
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ class L0HwHelper {
3535
static bool enableStateComputeModeTracking(const NEO::HardwareInfo &hwInfo);
3636
static bool enableImmediateCmdListHeapSharing(const NEO::HardwareInfo &hwInfo, bool cmdlistSupport);
3737
static bool usePipeControlMultiKernelEventSync(const NEO::HardwareInfo &hwInfo);
38+
static bool useCompactL3FlushEventPacket(const NEO::HardwareInfo &hwInfo);
3839
virtual void setAdditionalGroupProperty(ze_command_queue_group_properties_t &groupProperty, NEO::EngineGroupT &group) const = 0;
3940
virtual L0::Event *createEvent(L0::EventPool *eventPool, const ze_event_desc_t *desc, L0::Device *device) const = 0;
4041

level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h

+2
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,9 @@ struct TestExpectedValues {
188188
uint32_t expectedKernelCount = 0;
189189
uint32_t expectedWalkerPostSyncOp = 0;
190190
uint32_t expectedPostSyncPipeControls = 0;
191+
uint32_t expectDcFlush = 0;
191192
bool postSyncAddressZero = false;
193+
bool workloadPartition = false;
192194
};
193195

194196
} // namespace ult

level_zero/core/test/unit_tests/mocks/mock_cmdlist.h

+3
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ struct WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>
4747
using BaseClass::commandListPerThreadScratchSize;
4848
using BaseClass::commandListPreemptionMode;
4949
using BaseClass::commandsToPatch;
50+
using BaseClass::compactL3FlushEventPacket;
5051
using BaseClass::containsAnyKernel;
5152
using BaseClass::containsCooperativeKernelsFlag;
5253
using BaseClass::csr;
@@ -123,6 +124,7 @@ struct WhiteBox<L0::CommandListCoreFamilyImmediate<gfxCoreFamily>>
123124
using BaseClass::clearCommandsToPatch;
124125
using BaseClass::cmdQImmediate;
125126
using BaseClass::commandsToPatch;
127+
using BaseClass::compactL3FlushEventPacket;
126128
using BaseClass::csr;
127129
using BaseClass::finalStreamState;
128130
using BaseClass::frontEndStateTracking;
@@ -142,6 +144,7 @@ struct WhiteBox<L0::CommandListCoreFamilyImmediate<gfxCoreFamily>>
142144
template <GFXCORE_FAMILY gfxCoreFamily>
143145
struct MockCommandListImmediate : public CommandListCoreFamilyImmediate<gfxCoreFamily> {
144146
using BaseClass = CommandListCoreFamilyImmediate<gfxCoreFamily>;
147+
using BaseClass::compactL3FlushEventPacket;
145148
using BaseClass::containsAnyKernel;
146149
using BaseClass::immediateCmdListHeapSharing;
147150
using BaseClass::indirectAllocationsAllowed;

0 commit comments

Comments
 (0)