10000 [SYCL][L0][Plugin] Call ZeCommandQueueCreate on demand by asudarsa · Pull Request #5109 · intel/llvm · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

[SYCL][L0][Plugin] Call ZeCommandQueueCreate on demand #5109

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Dec 10, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8000 95 changes: 63 additions & 32 deletions sycl/plugins/level_zero/pi_level_zero.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1197,8 +1197,11 @@ pi_result _pi_queue::executeCommandList(pi_command_list_ptr_t CommandList,
zePrint("Command list to be executed on copy engine\n");
// If available, get the copy command queue assosciated with
// ZeCommandList
auto ZeCopyCommandQueue =
(Index == -1) ? nullptr : ZeCopyCommandQueues[Index];
ze_command_queue_handle_t ZeCopyCommandQueue = nullptr;
if (Index != -1) {
if (auto Res = getOrCreateCopyCommandQueue(Index, ZeCopyCommandQueue))
return Res;
}
auto &ZeCommandQueue =
(UseCopyEngine) ? ZeCopyCommandQueue : ZeComputeCommandQueue;
// Scope of the lock must be till the end of the function, otherwise new mem
Expand Down Expand Up @@ -1260,6 +1263,42 @@ bool _pi_queue::isBatchingAllowed() {
return (this->QueueBatchSize > 0 && ((ZeSerialize & ZeSerializeBlock) == 0));
}

pi_result _pi_queue::getOrCreateCopyCommandQueue(
int Index, ze_command_queue_handle_t &ZeCopyCommandQueue) {
ZeCopyCommandQueue = nullptr;

// Make sure 'Index' is within limits
PI_ASSERT((Index >= 0) && (Index < (int)(ZeCopyCommandQueues.size())),
PI_INVALID_VALUE);

// Return the Ze copy command queue, if already available
if (ZeCopyCommandQueues[Index]) {
ZeCopyCommandQueue = ZeCopyCommandQueues[Index];
return PI_SUCCESS;
}

// Ze copy command queue is not available at 'Index'. So we create it below.
ZeStruct<ze_command_queue_desc_t> ZeCommandQueueDesc;
ZeCommandQueueDesc.ordinal = (Index == 0) ? Device->ZeMainCopyQueueGroupIndex
: Device->ZeLinkCopyQueueGroupIndex;
// There are two copy queues: main copy queues and link copy queues.
// ZeCommandQueueDesc.index is the index into the list of main (or link)
// copy queues. (Index == 0) means we are using the main copy queue and
// ZeCommandQueueDesc.index is set to 0. Otherwise, we use one of the link
// copy queues and ZeCommandQueueDesc.index is set to (Index - 1) as Index
// for link copy engines in the overall list starts from 1.
ZeCommandQueueDesc.index = (Index == 0) ? 0 : Index - 1;
zePrint("NOTE: Copy Engine ZeCommandQueueDesc.ordinal = %d, "
"ZeCommandQueueDesc.index = %d\n",
ZeCommandQueueDesc.ordinal, ZeCommandQueueDesc.index);
ZE_CALL(zeCommandQueueCreate,
(Context->ZeContext, Device->ZeDevice,
&ZeCommandQueueDesc, // TODO: translate properties
&ZeCopyCommandQueue));
ZeCopyCommandQueues[Index] = ZeCopyCommandQueue;
return PI_SUCCESS;
}

// This function will return one of possibly multiple available copy queues.
// Currently, a round robin strategy is used.
// This function also sends back the value of CopyQueueIndex and
Expand Down Expand Up @@ -1291,7 +1330,10 @@ _pi_queue::getZeCopyCommandQueue(int *CopyQueueIndex,
if (CopyQueueGroupIndex)
*CopyQueueGroupIndex = Device->ZeMainCopyQueueGroupIndex;
zePrint("Note: CopyQueueIndex = %d\n", *CopyQueueIndex);
return ZeCopyCommandQueues[0];
ze_command_queue_handle_t ZeCopyCommandQueue = nullptr;
if (getOrCreateCopyCommandQueue(0, ZeCopyCommandQueue))
return nullptr;
return ZeCopyCommandQueue;
}

// Round robin logic is used here to access copy command queues.
Expand All @@ -1317,7 +1359,10 @@ _pi_queue::getZeCopyCommandQueue(int *CopyQueueIndex,
((*CopyQueueIndex == 0) && Device->hasMainCopyEngine())
? Device->ZeMainCopyQueueGroupIndex
: Device->ZeLinkCopyQueueGroupIndex;
return ZeCopyCommandQueues[*CopyQueueIndex];
ze_command_queue_handle_t ZeCopyCommandQueue = nullptr;
if (getOrCreateCopyCommandQueue(*CopyQueueIndex, ZeCopyCommandQueue))
return nullptr;
return ZeCopyCommandQueue;
}

pi_result _pi_queue::executeOpenCommandList() {
Expand Down Expand Up @@ -2803,39 +2848,21 @@ pi_result piQueueCreate(pi_context Context, pi_device Device,

std::vector<ze_command_queue_handle_t> ZeCopyCommandQueues;

// Create queue to main copy engine
// Create a placeholder in ZeCopyCommandQueues for a queue that will be used
// to submit commands to main copy engine. This queue is initially NULL and
// will be replaced by the Ze Command Queue which gets created just before its
// first use.
ze_command_queue_handle_t ZeMainCopyCommandQueue = nullptr;
if (Device->hasMainCopyEngine()) {
zePrint("NOTE: Main Copy Engine ZeCommandQueueDesc.ordinal = %d, "
"ZeCommandQueueDesc.index = %d\n",
Device->ZeMainCopyQueueGroupIndex, 0);
ZeCommandQueueDesc.ordinal = Device->ZeMainCopyQueueGroupIndex;
ZeCommandQueueDesc.index = 0;
ZE_CALL(zeCommandQueueCreate,
(Context->ZeContext, ZeDevice,
&ZeCommandQueueDesc, // TODO: translate properties
&ZeMainCopyCommandQueue));
// Main Copy Command Queue is pushed at start of ZeCopyCommandQueues
// vector.
ZeCopyCommandQueues.push_back(ZeMainCopyCommandQueue);
}
PI_ASSERT(Queue, PI_INVALID_QUEUE);

// Create additional queues to link copy engines and push them into
// ZeCopyCommandQueues vector.
// Create additional 'placeholder queues' to link copy engines and push them
// into ZeCopyCommandQueues.
if (Device->hasLinkCopyEngine()) {
auto ZeNumLinkCopyQueues = Device->ZeLinkCopyQueueGroupProperties.numQueues;
for (uint32_t i = 0; i < ZeNumLinkCopyQueues; ++i) {
zePrint("NOTE: Link Copy Engine ZeCommandQueueDesc.ordinal = %d, "
"ZeCommandQueueDesc.index = %d\n",
Device->ZeLinkCopyQueueGroupIndex, i);
ze_command_queue_handle_t ZeLinkCopyCommandQueue = nullptr;
ZeCommandQueueDesc.ordinal = Device->ZeLinkCopyQueueGroupIndex;
ZeCommandQueueDesc.index = i;
ZE_CALL(zeCommandQueueCreate,
(Context->ZeContext, ZeDevice,
&ZeCommandQueueDesc, // TODO: translate properties
&ZeLinkCopyCommandQueue));
ZeCopyCommandQueues.push_back(ZeLinkCopyCommandQueue);
}
}
Expand Down Expand Up @@ -2917,7 +2944,8 @@ pi_result piQueueRelease(pi_queue Queue) {
// Make sure all commands get executed.
ZE_CALL(zeHostSynchronize, (Queue->ZeComputeCommandQueue));
for (uint32_t i = 0; i < Queue->ZeCopyCommandQueues.size(); ++i) {
ZE_CALL(zeHostSynchronize, (Queue->ZeCopyCommandQueues[i]));
if (Queue->ZeCopyCommandQueues[i])
ZE_CALL(zeHostSynchronize, (Queue->ZeCopyCommandQueues[i]));
}

// Destroy all the fences created associated with this queue.
Expand Down Expand Up @@ -2958,7 +2986,8 @@ static pi_result QueueRelease(pi_queue Queue, pi_queue LockedQueue) {
if (Queue->OwnZeCommandQueue) {
ZE_CALL(zeCommandQueueDestroy, (Queue->ZeComputeCommandQueue));
for (uint32_t i = 0; i < Queue->ZeCopyCommandQueues.size(); ++i) {
ZE_CALL(zeCommandQueueDestroy, (Queue->ZeCopyCommandQueues[i]));
if (Queue->ZeCopyCommandQueues[i])
ZE_CALL(zeCommandQueueDestroy, (Queue->ZeCopyCommandQueues[i]));
}
}

Expand Down Expand Up @@ -2991,7 +3020,8 @@ pi_result piQueueFinish(pi_queue Queue) {

ZE_CALL(zeHostSynchronize, (Queue->ZeComputeCommandQueue));
for (uint32_t i = 0; i < Queue->ZeCopyCommandQueues.size(); ++i) {
ZE_CALL(zeHostSynchronize, (Queue->ZeCopyCommandQueues[i]));
if (Queue->ZeCopyCommandQueues[i])
ZE_CALL(zeHostSynchronize, (Queue->ZeCopyCommandQueues[i]));
}

return PI_SUCCESS;
Expand Down Expand Up @@ -5387,7 +5417,8 @@ pi_result piEnqueueEventsWait(pi_queue Queue, pi_uint32 NumEventsInWaitList,

ZE_CALL(zeHostSynchronize, (Queue->ZeComputeCommandQueue));
for (uint32_t i = 0; i < Queue->ZeCopyCommandQueues.size(); ++i) {
ZE_CALL(zeHostSynchronize, (Queue->ZeCopyCommandQueues[i]));
if (Queue->ZeCopyCommandQueues[i])
ZE_CALL(zeHostSynchronize, (Queue->ZeCopyCommandQueues[i]));
}

Queue->LastCommandEvent = *Event;
Expand Down
8 changes: 8 additions & 0 deletions sycl/plugins/level_zero/pi_level_zero.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -621,6 +621,14 @@ struct _pi_queue : _pi_object {
// link copy engines, if available.
std::vector<ze_command_queue_handle_t> ZeCopyCommandQueues;

// This function will check if a Ze copy command queue is available in
// ZeCopyCommandQueues at index 'Index'.
// If available, it will return the queue. Otherwise, it will create a new
// Ze copy command queue and return a newly created queue.
pi_result
getOrCreateCopyCommandQueue(int Index,
ze_command_queue_handle_t &ZeCopyCommandQueue);

// One of the many available copy command queues will be used for
// submitting command lists to. This variable stores index of the last used
// copy command queue in the ZeCopyCommandQueues vector.
Expand Down
0