@@ -8829,7 +8829,64 @@ pi_result _pi_buffer::free() {
8829
8829
return PI_SUCCESS;
8830
8830
}
8831
8831
8832
- /// command-buffer Extension
8832
+ /* Command-buffer Extension
8833
+
8834
+ The PI interface for submitting a PI command-buffer takes a list
8835
+ of events to wait on, and an event representing the completion of
8836
+ that particular submission of the command-buffer.
8837
+
8838
+ However, in `zeCommandQueueExecuteCommandLists` there are no parameters to
8839
+ take a waitlist and also the only sync primitive returned is to block on
8840
+ host.
8841
+
8842
+ In order to get the PI command-buffer enqueue semantics we want with L0
8843
+ this adapter adds extra commands to the L0 command-list representing a
8844
+ PI command-buffer.
8845
+
8846
+ Prefix - Commands added to the start of the L0 command-list by L0 adapter.
8847
+ Suffix - Commands added to the end of the L0 command-list by L0 adapter.
8848
+
8849
+ These extra commands operate on L0 event synchronisation primitives used by
8850
+ the command-list to interact with the external PI wait-list and PI return
8851
+ event required for the enqueue interface.
8852
+
8853
+ The `pi_ext_command_buffer` class for this adapter contains a SignalEvent
8854
+ which signals the completion of the command-list in the suffix, and
8855
+ is reset in the prefix. This signal is detected by a new PI return
8856
+ event created on PI command-buffer enqueue.
8857
+
8858
+ There is also a WaitEvent used by the `pi_ext_command_buffer` class
8859
+ in the prefix to wait on any dependencies passed in the enqueue wait-list.
8860
+
8861
+ ┌──────────┬────────────────────────────────────────────────┬─────────┐
8862
+ │ Prefix │ Commands added to PI command-buffer by PI user │ Suffix │
8863
+ └──────────┴────────────────────────────────────────────────┴─────────┘
8864
+
8865
+ ┌───────────────────┬──────────────────────────────┐
8866
+ Prefix │Reset signal event │ Barrier waiting on wait event│
8867
+ └───────────────────┴──────────────────────────────┘
8868
+
8869
+ ┌─────────────────────────────────────────┐
8870
+ Suffix │Signal the PI command-buffer signal event│
8871
+ └─────────────────────────────────────────┘
8872
+
8873
+
8874
+ For a call to `piextEnqueueCommandBuffer` with an event_list `EL`,
8875
+ command-buffer `CB`, and return event `RE` our implementation has to create
8876
+ and submit two new command-lists for the above approach to work. One before
8877
+ the command-list with extra commands associated with `CB`, and the other
8878
+ after `CB`.
8879
+
8880
+ Command-list created on `piextEnqueueCommandBuffer` to execution before `CB`:
8881
+ ┌───────────────────────────────────────────────────────────┐
8882
+ │Barrier on `EL` than signals `CB` WaitEvent when completed │
8883
+ └───────────────────────────────────────────────────────────┘
8884
+
8885
+ Command-list created on `piextEnqueueCommandBuffer` to execution after `CB`:
8886
+ ┌─────────────────────────────────────────────────────────────┐
8887
+ │Barrier on `CB` SignalEvent that signals `RE` when completed │
8888
+ └─────────────────────────────────────────────────────────────┘
8889
+ */
8833
8890
8834
8891
// / Helper function to take a list of pi_ext_sync_points and fill the provided
8835
8892
// / vector with the associated ZeEvents
@@ -8872,6 +8929,19 @@ pi_result piextCommandBufferCreate(pi_context Context, pi_device Device,
8872
8929
} catch (...) {
8873
8930
return PI_ERROR_UNKNOWN;
8874
8931
}
8932
+
8933
+ // Create signal & wait events to be used in the command-list for sync
8934
+ // on command-buffer enqueue.
8935
+ auto CommandBuffer = *RetCommandBuffer;
8936
+ PI_CALL (EventCreate (Context, nullptr , true , &CommandBuffer->SignalEvent ));
8937
+ PI_CALL (EventCreate (Context, nullptr , false , &CommandBuffer->WaitEvent ));
8938
+
8939
+ // Add prefix commands
8940
+ ZE_CALL (zeCommandListAppendEventReset,
8941
+ (ZeCommandList, CommandBuffer->SignalEvent ->ZeEvent ));
8942
+ ZE_CALL (zeCommandListAppendBarrier,
8943
+ (ZeCommandList, nullptr , 1 , &CommandBuffer->WaitEvent ->ZeEvent ));
8944
+
8875
8945
return PI_SUCCESS;
8876
8946
}
8877
8947
@@ -8891,13 +8961,10 @@ pi_result piextCommandBufferRelease(pi_ext_command_buffer CommandBuffer) {
8891
8961
}
8892
8962
8893
8963
pi_result piextCommandBufferFinalize (pi_ext_command_buffer CommandBuffer) {
8894
- // We need to append some signal that will indicate that command-buffer has
8964
+ // We need to append signal that will indicate that command-buffer has
8895
8965
// finished executing.
8896
- EventCreate(CommandBuffer->Context, nullptr, true,
8897
- &CommandBuffer->ExecutionEvent);
8898
- ZE_CALL(
8899
- zeCommandListAppendSignalEvent,
8900
- (CommandBuffer->ZeCommandList, CommandBuffer->ExecutionEvent->ZeEvent));
8966
+ ZE_CALL (zeCommandListAppendSignalEvent,
8967
+ (CommandBuffer->ZeCommandList , CommandBuffer->SignalEvent ->ZeEvent ));
8901
8968
// Close the command list and have it ready for dispatch.
8902
8969
ZE_CALL (zeCommandListClose, (CommandBuffer->ZeCommandList ));
8903
8970
return PI_SUCCESS;
@@ -9026,17 +9093,11 @@ pi_result piextEnqueueCommandBuffer(pi_ext_command_buffer CommandBuffer,
9026
9093
pi_uint32 NumEventsInWaitList,
9027
9094
const pi_event *EventWaitList,
9028
9095
pi_event *Event) {
9029
-
9030
- // Execute command list asynchronously, as the event will be used
9031
- // to track down its completion.
9032
-
9033
- uint32_t QueueGroupOrdinal;
9034
- // TODO: Revisit forcing compute engine
9035
- auto UseCopyEngine = false;
9096
+ // Use compute engine rather than copy engine
9097
+ const auto UseCopyEngine = false ;
9036
9098
auto &QGroup = Queue->getQueueGroup (UseCopyEngine);
9037
- auto &ZeCommandQueue =
9038
- // ForcedCmdQueue ? *ForcedCmdQueue :
9039
- QGroup.getZeQueue(&QueueGroupOrdinal);
9099
+ uint32_t QueueGroupOrdinal;
9100
+ auto &ZeCommandQueue = QGroup.getZeQueue (&QueueGroupOrdinal);
9040
9101
9041
9102
ze_fence_handle_t ZeFence;
9042
9103
ZeStruct<ze_fence_desc_t > ZeFenceDesc;
@@ -9050,25 +9111,69 @@ pi_result piextEnqueueCommandBuffer(pi_ext_command_buffer CommandBuffer,
9050
9111
CommandBuffer->ZeCommandList ,
9051
9112
{ZeFence, false , false , ZeCommandQueue, QueueGroupOrdinal}));
9052
9113
9053
- Queue->insertActiveBarriers(CommandListPtr, UseCopyEngine);
9054
-
9114
+ // Previous execution will have closed the command list, we need to reopen
9115
+ // it otherwise calling `executeCommandList` will return early.
9116
+ CommandListPtr->second .IsClosed = false ;
9055
9117
CommandListPtr->second .ZeFenceInUse = true ;
9056
9118
9057
- // Return the command-buffer's execution event as the user visible pi_event
9058
- *Event = CommandBuffer->ExecutionEvent;
9059
- (*Event)->Queue = Queue;
9060
- (*Event)->RefCount.increment();
9061
- Queue->RefCount.increment();
9119
+ // Create command-list to execute before `CommandListPtr` and will signal
9120
+ // when `EventWaitList` dependencies are complete.
9121
+ pi_command_list_ptr_t WaitCommandList{};
9122
+ if (NumEventsInWaitList) {
9123
+ _pi_ze_event_list_t TmpWaitList;
9124
+ if (auto Res = TmpWaitList.createAndRetainPiZeEventList (
9125
+ NumEventsInWaitList, EventWaitList, Queue, UseCopyEngine))
9126
+ return Res;
9062
9127
9063
- PI_CALL(piEventRetain(*Event));
9128
+ if (auto Res = Queue->Context ->getAvailableCommandList (
9129
+ Queue, WaitCommandList, false , false ))
9130
+ return Res;
9064
9131
9065
- // Previous execution will have closed the command list so we need to reopen
9066
- // it.
9067
- CommandListPtr->second.IsClosed = false;
9132
+ ZE_CALL (zeCommandListAppendBarrier,
9133
+ (WaitCommandList->first , CommandBuffer->WaitEvent ->ZeEvent ,
9134
+ NumEventsInWaitList, TmpWaitList.ZeEventList ));
9135
+ } else {
9136
+ if (auto Res = Queue->Context ->getAvailableCommandList (
9137
+ Queue, WaitCommandList, false , false ))
9138
+ return Res;
9139
+
9140
+ ZE_CALL (zeCommandListAppendSignalEvent,
9141
+ (WaitCommandList->first , CommandBuffer->WaitEvent ->ZeEvent ));
9142
+ }
9143
+
9144
+ // Execution event for this enqueue of the PI command-buffer
9145
+ pi_event RetEvent{};
9146
+ // Create a command-list to signal RetEvent on completion
9147
+ pi_command_list_ptr_t SignalCommandList{};
9148
+ if (Event) {
9149
+ if (auto Res = Queue->Context ->getAvailableCommandList (
9150
+ Queue, SignalCommandList, false , false ))
9151
+ return Res;
9152
+
9153
+ if (auto Res = createEventAndAssociateQueue (
9154
+ Queue, &RetEvent, PI_COMMAND_TYPE_EXT_COMMAND_BUFFER,
9155
+ SignalCommandList, false ))
9156
+ return Res;
9157
+
9158
+ ZE_CALL (zeCommandListAppendBarrier,
9159
+ (SignalCommandList->first , RetEvent->ZeEvent , 1 ,
9160
+ &(CommandBuffer->SignalEvent ->ZeEvent )));
9161
+ }
9162
+
9163
+ // Execution our command-lists asynchronously
9164
+ if (auto Res = Queue->executeCommandList (WaitCommandList, false , false ))
9165
+ return Res;
9068
9166
9069
9167
if (auto Res = Queue->executeCommandList (CommandListPtr, false , false ))
9070
9168
return Res;
9071
9169
9170
+ if (auto Res = Queue->executeCommandList (SignalCommandList, false , false ))
9171
+ return Res;
9172
+
9173
+ if (Event) {
9174
+ *Event = RetEvent;
9175
+ }
9176
+
9072
9177
return PI_SUCCESS;
9073
9178
}
9074
9179
@@ -9088,8 +9193,11 @@ _pi_ext_command_buffer::~_pi_ext_command_buffer() {
9088
9193
if (ZeCommandList) {
9089
9194
ZE_CALL_NOCHECK (zeCommandListDestroy, (ZeCommandList));
9090
9195
}
9091
- if (ExecutionEvent) {
9092
- ExecutionEvent->RefCount.decrementAndTest();
9196
+ if (SignalEvent) {
9197
+ SignalEvent->RefCount .decrementAndTest ();
9198
+ }
9199
+ if (WaitEvent) {
9200
+ WaitEvent->RefCount .decrementAndTest ();
9093
9201
}
9094
9202
Context->RefCount .decrementAndTest ();
9095
9203
}
0 commit comments