Skip to content

Commit 87da156

Browse files
[NFC][SYCL] More queue_impl passing by raw ptr/ref (#18714)
Same as #18712, part of a bigger refactoring around internal RT APIs passing raw references instead of `std::shared_ptr<*_impl>`, similar to what have been implemented for `device_impl` earlier.
1 parent bda408a commit 87da156

File tree

8 files changed

+23
-25
lines changed

8 files changed

+23
-25
lines changed

sycl/source/detail/helpers.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,9 @@ markBufferAsInternal(const std::shared_ptr<buffer_impl> &BufImpl) {
3838
}
3939

4040
std::tuple<const RTDeviceBinaryImage *, ur_program_handle_t>
41-
retrieveKernelBinary(const QueueImplPtr &Queue, const char *KernelName,
41+
retrieveKernelBinary(queue_impl &Queue, const char *KernelName,
4242
CGExecKernel *KernelCG) {
43-
device_impl &Dev = Queue->getDeviceImpl();
43+
device_impl &Dev = Queue.getDeviceImpl();
4444
bool isNvidia = Dev.getBackend() == backend::ext_oneapi_cuda;
4545
bool isHIP = Dev.getBackend() == backend::ext_oneapi_hip;
4646
if (isNvidia || isHIP) {
@@ -59,7 +59,7 @@ retrieveKernelBinary(const QueueImplPtr &Queue, const char *KernelName,
5959
if (DeviceImage == DeviceImages.end()) {
6060
return {nullptr, nullptr};
6161
}
62-
auto ContextImpl = Queue->getContextImplPtr();
62+
auto ContextImpl = Queue.getContextImplPtr();
6363
ur_program_handle_t Program =
6464
detail::ProgramManager::getInstance().createURProgram(
6565
**DeviceImage, ContextImpl, {createSyclObjFromImpl<device>(Dev)});
@@ -80,7 +80,7 @@ retrieveKernelBinary(const QueueImplPtr &Queue, const char *KernelName,
8080
DeviceImage = SyclKernelImpl->getDeviceImage()->get_bin_image_ref();
8181
Program = SyclKernelImpl->getDeviceImage()->get_ur_program_ref();
8282
} else {
83-
auto ContextImpl = Queue->getContextImplPtr();
83+
auto ContextImpl = Queue.getContextImplPtr();
8484
DeviceImage = &detail::ProgramManager::getInstance().getDeviceImage(
8585
KernelName, ContextImpl, &Dev);
8686
Program = detail::ProgramManager::getInstance().createURProgram(

sycl/source/detail/helpers.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ void waitEvents(std::vector<sycl::event> DepEvents);
2727
#endif
2828

2929
std::tuple<const RTDeviceBinaryImage *, ur_program_handle_t>
30-
retrieveKernelBinary(const QueueImplPtr &, const char *KernelName,
30+
retrieveKernelBinary(queue_impl &Queue, const char *KernelName,
3131
CGExecKernel *CGKernel = nullptr);
3232
} // namespace detail
3333
} // namespace _V1

sycl/source/detail/jit_compiler.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -124,8 +124,8 @@ translateBinaryImageFormat(ur::DeviceBinaryType Type) {
124124
}
125125
}
126126

127-
static ::jit_compiler::BinaryFormat getTargetFormat(const QueueImplPtr &Queue) {
128-
auto Backend = Queue->getDeviceImpl().getBackend();
127+
static ::jit_compiler::BinaryFormat getTargetFormat(queue_impl &Queue) {
128+
auto Backend = Queue.getDeviceImpl().getBackend();
129129
switch (Backend) {
130130
case backend::ext_oneapi_level_zero:
131131
case backend::opencl:
@@ -143,7 +143,7 @@ static ::jit_compiler::BinaryFormat getTargetFormat(const QueueImplPtr &Queue) {
143143
#endif // _WIN32
144144

145145
ur_kernel_handle_t jit_compiler::materializeSpecConstants(
146-
const QueueImplPtr &Queue, const RTDeviceBinaryImage *BinImage,
146+
queue_impl &Queue, const RTDeviceBinaryImage *BinImage,
147147
KernelNameStrRefT KernelName,
148148
const std::vector<unsigned char> &SpecConstBlob) {
149149
#ifndef _WIN32
@@ -220,8 +220,8 @@ ur_kernel_handle_t jit_compiler::materializeSpecConstants(
220220
}
221221

222222
RTDeviceBinaryImage MaterializedRTDevBinImage{&MaterializedRawDeviceImage};
223-
const auto &Context = Queue->get_context();
224-
const auto &Device = Queue->get_device();
223+
const auto &Context = Queue.get_context();
224+
const auto &Device = Queue.get_device();
225225
auto NewKernel = PM.getOrCreateMaterializedKernel(
226226
MaterializedRTDevBinImage, Context, Device, KernelName, SpecConstBlob);
227227

sycl/source/detail/jit_compiler.hpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,13 +32,12 @@ using JITEnvVar = DynArray<char>;
3232
namespace sycl {
3333
inline namespace _V1 {
3434
namespace detail {
35-
using QueueImplPtr = std::shared_ptr<queue_impl>;
3635

3736
class jit_compiler {
3837

3938
public:
4039
ur_kernel_handle_t
41-
materializeSpecConstants(const QueueImplPtr &Queue,
40+
materializeSpecConstants(queue_impl &Queue,
4241
const RTDeviceBinaryImage *BinImage,
4342
KernelNameStrRefT KernelName,
4443
const std::vector<unsigned char> &SpecConstBlob);

sycl/source/detail/scheduler/commands.cpp

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2383,7 +2383,7 @@ void SetArgBasedOnType(
23832383
}
23842384

23852385
static ur_result_t SetKernelParamsAndLaunch(
2386-
const QueueImplPtr &Queue, std::vector<ArgDesc> &Args,
2386+
queue_impl &Queue, std::vector<ArgDesc> &Args,
23872387
const std::shared_ptr<device_image_impl> &DeviceImageImpl,
23882388
ur_kernel_handle_t Kernel, NDRDescT &NDRDesc,
23892389
std::vector<ur_event_handle_t> &RawEvents, detail::event_impl *OutEventImpl,
@@ -2395,8 +2395,7 @@ static ur_result_t SetKernelParamsAndLaunch(
23952395
int KernelNumArgs = 0,
23962396
detail::kernel_param_desc_t (*KernelParamDescGetter)(int) = nullptr,
23972397
bool KernelHasSpecialCaptures = true) {
2398-
assert(Queue && "Kernel submissions should have an associated queue");
2399-
const AdapterPtr &Adapter = Queue->getAdapter();
2398+
const AdapterPtr &Adapter = Queue.getAdapter();
24002399

24012400
if (SYCLConfig<SYCL_JIT_AMDGCN_PTX_KERNELS>::get()) {
24022401
std::vector<unsigned char> Empty;
@@ -2434,7 +2433,7 @@ static ur_result_t SetKernelParamsAndLaunch(
24342433
auto setFunc = [&Adapter, Kernel, &DeviceImageImpl, &getMemAllocationFunc,
24352434
&Queue](detail::ArgDesc &Arg, size_t NextTrueIndex) {
24362435
SetArgBasedOnType(Adapter, Kernel, DeviceImageImpl, getMemAllocationFunc,
2437-
Queue->getContextImplPtr(), Arg, NextTrueIndex);
2436+
Queue.getContextImplPtr(), Arg, NextTrueIndex);
24382437
};
24392438
applyFuncOnFilteredArgs(EliminatedArgMask, Args, setFunc);
24402439
}
@@ -2450,7 +2449,7 @@ static ur_result_t SetKernelParamsAndLaunch(
24502449
Kernel, ImplicitLocalArg.value(), WorkGroupMemorySize, nullptr);
24512450
}
24522451

2453-
adjustNDRangePerKernel(NDRDesc, Kernel, Queue->getDeviceImpl());
2452+
adjustNDRangePerKernel(NDRDesc, Kernel, Queue.getDeviceImpl());
24542453

24552454
// Remember this information before the range dimensions are reversed
24562455
const bool HasLocalSize = (NDRDesc.LocalSize[0] != 0);
@@ -2464,7 +2463,7 @@ static ur_result_t SetKernelParamsAndLaunch(
24642463
LocalSize = &NDRDesc.LocalSize[0];
24652464
else {
24662465
Adapter->call<UrApiKind::urKernelGetGroupInfo>(
2467-
Kernel, Queue->getDeviceImpl().getHandleRef(),
2466+
Kernel, Queue.getDeviceImpl().getHandleRef(),
24682467
UR_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE, sizeof(RequiredWGSize),
24692468
RequiredWGSize,
24702469
/* pPropSizeRet = */ nullptr);
@@ -2509,7 +2508,7 @@ static ur_result_t SetKernelParamsAndLaunch(
25092508
ur_event_handle_t UREvent = nullptr;
25102509
ur_result_t Error =
25112510
Adapter->call_nocheck<UrApiKind::urEnqueueKernelLaunchCustomExp>(
2512-
Queue->getHandleRef(), Kernel, NDRDesc.Dims,
2511+
Queue.getHandleRef(), Kernel, NDRDesc.Dims,
25132512
HasOffset ? &NDRDesc.GlobalOffset[0] : nullptr,
25142513
&NDRDesc.GlobalSize[0], LocalSize, property_list.size(),
25152514
property_list.data(), RawEvents.size(),
@@ -2529,7 +2528,7 @@ static ur_result_t SetKernelParamsAndLaunch(
25292528
Args...);
25302529
}
25312530
return Adapter->call_nocheck<UrApiKind::urEnqueueKernelLaunch>(Args...);
2532-
}(Queue->getHandleRef(), Kernel, NDRDesc.Dims,
2531+
}(Queue.getHandleRef(), Kernel, NDRDesc.Dims,
25332532
HasOffset ? &NDRDesc.GlobalOffset[0] : nullptr, &NDRDesc.GlobalSize[0],
25342533
LocalSize, RawEvents.size(),
25352534
RawEvents.empty() ? nullptr : &RawEvents[0],
@@ -2780,7 +2779,7 @@ void enqueueImpKernel(
27802779
}
27812780

27822781
Error = SetKernelParamsAndLaunch(
2783-
Queue, Args, DeviceImageImpl, Kernel, NDRDesc, EventsWaitList,
2782+
*Queue, Args, DeviceImageImpl, Kernel, NDRDesc, EventsWaitList,
27842783
OutEventImpl, EliminatedArgMask, getMemAllocationFunc,
27852784
KernelIsCooperative, KernelUsesClusterLaunch, WorkGroupMemorySize,
27862785
BinImage, KernelName, KernelFuncPtr, KernelNumArgs,
@@ -3286,7 +3285,7 @@ ur_result_t ExecCGCommand::enqueueImpQueue() {
32863285
const RTDeviceBinaryImage *BinImage = nullptr;
32873286
if (detail::SYCLConfig<detail::SYCL_JIT_AMDGCN_PTX_KERNELS>::get()) {
32883287
std::tie(BinImage, std::ignore) =
3289-
retrieveKernelBinary(MQueue, KernelName.data());
3288+
retrieveKernelBinary(*MQueue, KernelName.data());
32903289
assert(BinImage && "Failed to obtain a binary image.");
32913290
}
32923291
enqueueImpKernel(

sycl/source/detail/scheduler/scheduler.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -615,7 +615,7 @@ void Scheduler::cleanupAuxiliaryResources(BlockingT Blocking) {
615615
}
616616

617617
ur_kernel_handle_t Scheduler::completeSpecConstMaterialization(
618-
[[maybe_unused]] const QueueImplPtr &Queue,
618+
[[maybe_unused]] queue_impl &Queue,
619619
[[maybe_unused]] const RTDeviceBinaryImage *BinImage,
620620
[[maybe_unused]] KernelNameStrRefT KernelName,
621621
[[maybe_unused]] std::vector<unsigned char> &SpecConstBlob) {

sycl/source/detail/scheduler/scheduler.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -453,7 +453,7 @@ class Scheduler {
453453
void deferMemObjRelease(const std::shared_ptr<detail::SYCLMemObjI> &MemObj);
454454

455455
ur_kernel_handle_t completeSpecConstMaterialization(
456-
const QueueImplPtr &Queue, const RTDeviceBinaryImage *BinImage,
456+
queue_impl &Queue, const RTDeviceBinaryImage *BinImage,
457457
KernelNameStrRefT KernelName, std::vector<unsigned char> &SpecConstBlob);
458458

459459
void releaseResources(BlockingT Blocking = BlockingT::BLOCKING);

sycl/source/handler.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -582,7 +582,7 @@ event handler::finalize() {
582582
const detail::RTDeviceBinaryImage *BinImage = nullptr;
583583
if (detail::SYCLConfig<detail::SYCL_JIT_AMDGCN_PTX_KERNELS>::get()) {
584584
std::tie(BinImage, std::ignore) =
585-
detail::retrieveKernelBinary(MQueue, MKernelName.data());
585+
detail::retrieveKernelBinary(*MQueue, MKernelName.data());
586586
assert(BinImage && "Failed to obtain a binary image.");
587587
}
588588
enqueueImpKernel(

0 commit comments

Comments
 (0)