@@ -8829,7 +8829,64 @@ pi_result _pi_buffer::free() {
88298829 return PI_SUCCESS;
88308830}
88318831
8832- // / command-buffer Extension
8832+ /* Command-buffer Extension
8833+
8834+ The PI interface for submitting a PI command-buffer takes a list
8835+ of events to wait on, and an event representing the completion of
8836+ that particular submission of the command-buffer.
8837+
8838+ However, in `zeCommandQueueExecuteCommandLists` there are no parameters to
8839+ take a waitlist and also the only sync primitive returned is to block on
8840+ host.
8841+
8842+ In order to get the PI command-buffer enqueue semantics we want with L0
8843+ this adapter adds extra commands to the L0 command-list representing a
8844+ PI command-buffer.
8845+
8846+ Prefix - Commands added to the start of the L0 command-list by L0 adapter.
8847+ Suffix - Commands added to the end of the L0 command-list by L0 adapter.
8848+
8849+ These extra commands operate on L0 event synchronisation primitives used by
8850+ the command-list to interact with the external PI wait-list and PI return
8851+ event required for the enqueue interface.
8852+
8853+ The `pi_ext_command_buffer` class for this adapter contains a SignalEvent
8854+ which signals the completion of the command-list in the suffix, and
8855+ is reset in the prefix. This signal is detected by a new PI return
8856+ event created on PI command-buffer enqueue.
8857+
8858+ There is also a WaitEvent used by the `pi_ext_command_buffer` class
8859+ in the prefix to wait on any dependencies passed in the enqueue wait-list.
8860+
8861+ ┌──────────┬────────────────────────────────────────────────┬─────────┐
8862+ │ Prefix │ Commands added to PI command-buffer by PI user │ Suffix │
8863+ └──────────┴────────────────────────────────────────────────┴─────────┘
8864+
8865+ ┌───────────────────┬──────────────────────────────┐
8866+ Prefix │Reset signal event │ Barrier waiting on wait event│
8867+ └───────────────────┴──────────────────────────────┘
8868+
8869+ ┌─────────────────────────────────────────┐
8870+ Suffix │Signal the PI command-buffer signal event│
8871+ └─────────────────────────────────────────┘
8872+
8873+
8874+ For a call to `piextEnqueueCommandBuffer` with an event_list `EL`,
8875+ command-buffer `CB`, and return event `RE` our implementation has to create
8876+ and submit two new command-lists for the above approach to work. One before
8877+ the command-list with extra commands associated with `CB`, and the other
8878+ after `CB`.
8879+
8880+ Command-list created on `piextEnqueueCommandBuffer` to execution before `CB`:
8881+ ┌───────────────────────────────────────────────────────────┐
8882+ │Barrier on `EL` than signals `CB` WaitEvent when completed │
8883+ └───────────────────────────────────────────────────────────┘
8884+
8885+ Command-list created on `piextEnqueueCommandBuffer` to execution after `CB`:
8886+ ┌─────────────────────────────────────────────────────────────┐
8887+ │Barrier on `CB` SignalEvent that signals `RE` when completed │
8888+ └─────────────────────────────────────────────────────────────┘
8889+ */
88338890
88348891// / Helper function to take a list of pi_ext_sync_points and fill the provided
88358892// / vector with the associated ZeEvents
@@ -8872,6 +8929,19 @@ pi_result piextCommandBufferCreate(pi_context Context, pi_device Device,
88728929 } catch (...) {
88738930 return PI_ERROR_UNKNOWN;
88748931 }
8932+
8933+ // Create signal & wait events to be used in the command-list for sync
8934+ // on command-buffer enqueue.
8935+ auto CommandBuffer = *RetCommandBuffer;
8936+ PI_CALL (EventCreate (Context, nullptr , true , &CommandBuffer->SignalEvent ));
8937+ PI_CALL (EventCreate (Context, nullptr , false , &CommandBuffer->WaitEvent ));
8938+
8939+ // Add prefix commands
8940+ ZE_CALL (zeCommandListAppendEventReset,
8941+ (ZeCommandList, CommandBuffer->SignalEvent ->ZeEvent ));
8942+ ZE_CALL (zeCommandListAppendBarrier,
8943+ (ZeCommandList, nullptr , 1 , &CommandBuffer->WaitEvent ->ZeEvent ));
8944+
88758945 return PI_SUCCESS;
88768946}
88778947
@@ -8891,13 +8961,10 @@ pi_result piextCommandBufferRelease(pi_ext_command_buffer CommandBuffer) {
88918961}
88928962
88938963pi_result piextCommandBufferFinalize (pi_ext_command_buffer CommandBuffer) {
8894- // We need to append some signal that will indicate that command-buffer has
8964+ // We need to append signal that will indicate that command-buffer has
88958965 // finished executing.
8896- EventCreate (CommandBuffer->Context , nullptr , true ,
8897- &CommandBuffer->ExecutionEvent );
8898- ZE_CALL (
8899- zeCommandListAppendSignalEvent,
8900- (CommandBuffer->ZeCommandList , CommandBuffer->ExecutionEvent ->ZeEvent ));
8966+ ZE_CALL (zeCommandListAppendSignalEvent,
8967+ (CommandBuffer->ZeCommandList , CommandBuffer->SignalEvent ->ZeEvent ));
89018968 // Close the command list and have it ready for dispatch.
89028969 ZE_CALL (zeCommandListClose, (CommandBuffer->ZeCommandList ));
89038970 return PI_SUCCESS;
@@ -9026,17 +9093,11 @@ pi_result piextEnqueueCommandBuffer(pi_ext_command_buffer CommandBuffer,
90269093 pi_uint32 NumEventsInWaitList,
90279094 const pi_event *EventWaitList,
90289095 pi_event *Event) {
9029-
9030- // Execute command list asynchronously, as the event will be used
9031- // to track down its completion.
9032-
9033- uint32_t QueueGroupOrdinal;
9034- // TODO: Revisit forcing compute engine
9035- auto UseCopyEngine = false ;
9096+ // Use compute engine rather than copy engine
9097+ const auto UseCopyEngine = false ;
90369098 auto &QGroup = Queue->getQueueGroup (UseCopyEngine);
9037- auto &ZeCommandQueue =
9038- // ForcedCmdQueue ? *ForcedCmdQueue :
9039- QGroup.getZeQueue (&QueueGroupOrdinal);
9099+ uint32_t QueueGroupOrdinal;
9100+ auto &ZeCommandQueue = QGroup.getZeQueue (&QueueGroupOrdinal);
90409101
90419102 ze_fence_handle_t ZeFence;
90429103 ZeStruct<ze_fence_desc_t > ZeFenceDesc;
@@ -9050,25 +9111,69 @@ pi_result piextEnqueueCommandBuffer(pi_ext_command_buffer CommandBuffer,
90509111 CommandBuffer->ZeCommandList ,
90519112 {ZeFence, false , false , ZeCommandQueue, QueueGroupOrdinal}));
90529113
9053- Queue->insertActiveBarriers (CommandListPtr, UseCopyEngine);
9054-
9114+ // Previous execution will have closed the command list, we need to reopen
9115+ // it otherwise calling `executeCommandList` will return early.
9116+ CommandListPtr->second .IsClosed = false ;
90559117 CommandListPtr->second .ZeFenceInUse = true ;
90569118
9057- // Return the command-buffer's execution event as the user visible pi_event
9058- *Event = CommandBuffer->ExecutionEvent ;
9059- (*Event)->Queue = Queue;
9060- (*Event)->RefCount .increment ();
9061- Queue->RefCount .increment ();
9119+ // Create command-list to execute before `CommandListPtr` and will signal
9120+ // when `EventWaitList` dependencies are complete.
9121+ pi_command_list_ptr_t WaitCommandList{};
9122+ if (NumEventsInWaitList) {
9123+ _pi_ze_event_list_t TmpWaitList;
9124+ if (auto Res = TmpWaitList.createAndRetainPiZeEventList (
9125+ NumEventsInWaitList, EventWaitList, Queue, UseCopyEngine))
9126+ return Res;
90629127
9063- PI_CALL (piEventRetain (*Event));
9128+ if (auto Res = Queue->Context ->getAvailableCommandList (
9129+ Queue, WaitCommandList, false , false ))
9130+ return Res;
90649131
9065- // Previous execution will have closed the command list so we need to reopen
9066- // it.
9067- CommandListPtr->second .IsClosed = false ;
9132+ ZE_CALL (zeCommandListAppendBarrier,
9133+ (WaitCommandList->first , CommandBuffer->WaitEvent ->ZeEvent ,
9134+ NumEventsInWaitList, TmpWaitList.ZeEventList ));
9135+ } else {
9136+ if (auto Res = Queue->Context ->getAvailableCommandList (
9137+ Queue, WaitCommandList, false , false ))
9138+ return Res;
9139+
9140+ ZE_CALL (zeCommandListAppendSignalEvent,
9141+ (WaitCommandList->first , CommandBuffer->WaitEvent ->ZeEvent ));
9142+ }
9143+
9144+ // Execution event for this enqueue of the PI command-buffer
9145+ pi_event RetEvent{};
9146+ // Create a command-list to signal RetEvent on completion
9147+ pi_command_list_ptr_t SignalCommandList{};
9148+ if (Event) {
9149+ if (auto Res = Queue->Context ->getAvailableCommandList (
9150+ Queue, SignalCommandList, false , false ))
9151+ return Res;
9152+
9153+ if (auto Res = createEventAndAssociateQueue (
9154+ Queue, &RetEvent, PI_COMMAND_TYPE_EXT_COMMAND_BUFFER,
9155+ SignalCommandList, false ))
9156+ return Res;
9157+
9158+ ZE_CALL (zeCommandListAppendBarrier,
9159+ (SignalCommandList->first , RetEvent->ZeEvent , 1 ,
9160+ &(CommandBuffer->SignalEvent ->ZeEvent )));
9161+ }
9162+
9163+ // Execution our command-lists asynchronously
9164+ if (auto Res = Queue->executeCommandList (WaitCommandList, false , false ))
9165+ return Res;
90689166
90699167 if (auto Res = Queue->executeCommandList (CommandListPtr, false , false ))
90709168 return Res;
90719169
9170+ if (auto Res = Queue->executeCommandList (SignalCommandList, false , false ))
9171+ return Res;
9172+
9173+ if (Event) {
9174+ *Event = RetEvent;
9175+ }
9176+
90729177 return PI_SUCCESS;
90739178}
90749179
@@ -9088,8 +9193,11 @@ _pi_ext_command_buffer::~_pi_ext_command_buffer() {
90889193 if (ZeCommandList) {
90899194 ZE_CALL_NOCHECK (zeCommandListDestroy, (ZeCommandList));
90909195 }
9091- if (ExecutionEvent) {
9092- ExecutionEvent->RefCount .decrementAndTest ();
9196+ if (SignalEvent) {
9197+ SignalEvent->RefCount .decrementAndTest ();
9198+ }
9199+ if (WaitEvent) {
9200+ WaitEvent->RefCount .decrementAndTest ();
90939201 }
90949202 Context->RefCount .decrementAndTest ();
90959203}
0 commit comments