Skip to content

Commit 97b255e

Browse files
committed
[SYCL] Enable USM address spaces heneration upder opt
This patch serves two purposes: 1. It reverts shift of accessors to global buffers from __global to __global_device address space, which was considered as not a good idea until proven otherwise. Still this shift is enabled under -fsycl_enable_usm_address_spaces option for testing; 2. It fixes a mangling issue of functions that accepts arguments of multi_ptr<> type for non-SPIR targets (see: intel#2039) Signed-off-by: Dmitry Sidorov <[email protected]>
1 parent 0a8e7d3 commit 97b255e

File tree

9 files changed

+51
-11
lines changed

9 files changed

+51
-11
lines changed

clang/include/clang/Driver/Options.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1842,7 +1842,7 @@ def fintelfpga : Flag<["-"], "fintelfpga">, Group<f_Group>,
18421842
Flags<[CC1Option, CoreOption]>, HelpText<"Perform ahead of time compilation for FPGA">;
18431843
def fsycl_enable_usm_address_spaces : Flag<["-"], "fsycl-enable-usm-address-spaces">,
18441844
Group<f_Group>, Flags<[CC1Option, CoreOption]>,
1845-
HelpText<"Enable SPV_INTEL_usm_storage_classes extension">;
1845+
HelpText<"Enable USM address spaces">;
18461846
def fsycl_device_only : Flag<["-"], "fsycl-device-only">, Flags<[CoreOption]>,
18471847
HelpText<"Compile SYCL kernels for device">;
18481848
def fsycl_targets_EQ : CommaJoined<["-"], "fsycl-targets=">, Flags<[DriverOption, CC1Option, CoreOption]>,

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6183,6 +6183,13 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
61836183
}
61846184
if (Args.hasArg(options::OPT_fsycl_unnamed_lambda))
61856185
CmdArgs.push_back("-fsycl-unnamed-lambda");
6186+
6187+
// Enable generation of USM address spaces as opt-in.
6188+
// __ENABLE_USM_ADDR_SPACE__ will be used during compilation of SYCL headers
6189+
if (getToolChain().getTriple().getSubArch() ==
6190+
llvm::Triple::SPIRSubArch_fpga &&
6191+
Args.hasArg(options::OPT_fsycl_enable_usm_address_spaces))
6192+
CmdArgs.push_back("-D__ENABLE_USM_ADDR_SPACE__");
61866193
}
61876194

61886195
if (IsHIP)

clang/test/Driver/sycl-offload.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -668,7 +668,8 @@
668668
// CHK-TOOLS-GEN: clang-offload-wrapper{{.*}} "-o=[[OUTPUT5:.+\.bc]]" "-host=x86_64-unknown-linux-gnu" "-target=spir64_gen{{.*}}" "-kind=sycl" "[[OUTPUT4]]"
669669
// CHK-TOOLS-CPU: clang-offload-wrapper{{.*}} "-o=[[OUTPUT5:.+\.bc]]" "-host=x86_64-unknown-linux-gnu" "-target=spir64_x86_64{{.*}}" "-kind=sycl" "[[OUTPUT4]]"
670670
// CHK-TOOLS-AOT: llc{{.*}} "-filetype=obj" "-o" "[[OUTPUT6:.+\.o]]" "[[OUTPUT5]]"
671-
// CHK-TOOLS-FPGA: clang{{.*}} "-triple" "spir64_fpga-unknown-unknown-sycldevice" {{.*}} "-fsycl-int-header=[[INPUT1:.+\.h]]" "-faddrsig"
671+
// CHK-TOOLS-FPGA-USM-DISABLE: clang{{.*}} "-triple" "spir64_fpga-unknown-unknown-sycldevice" {{.*}} "-fsycl-int-header=[[INPUT1:.+\.h]]" "-faddrsig"
672+
// CHK-TOOLS-FPGA-USM-ENABLE: clang{{.*}} "-triple" "spir64_fpga-unknown-unknown-sycldevice" {{.*}} "-fsycl-int-header=[[INPUT1:.+\.h]]" "-D__ENABLE_USM_ADDR_SPACE__" "-faddrsig"
672673
// CHK-TOOLS-GEN: clang{{.*}} "-triple" "spir64_gen-unknown-unknown-sycldevice" {{.*}} "-fsycl-int-header=[[INPUT1:.+\.h]]" "-faddrsig"
673674
// CHK-TOOLS-CPU: clang{{.*}} "-triple" "spir64_x86_64-unknown-unknown-sycldevice" {{.*}} "-fsycl-int-header=[[INPUT1:.+\.h]]" "-faddrsig"
674675
// CHK-TOOLS-AOT: clang{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-include" "[[INPUT1]]" {{.*}} "-o" "[[OUTPUT7:.+\.o]]"

sycl/include/CL/sycl/access/access.hpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,8 +105,13 @@ constexpr bool modeWritesNewData(access::mode m) {
105105

106106
#ifdef __SYCL_DEVICE_ONLY__
107107
#define __OPENCL_GLOBAL_AS__ __attribute__((opencl_global))
108+
#ifdef __ENABLE_USM_ADDR_SPACE__
108109
#define __OPENCL_GLOBAL_DEVICE_AS__ __attribute__((opencl_global_device))
109110
#define __OPENCL_GLOBAL_HOST_AS__ __attribute__((opencl_global_host))
111+
#else
112+
#define __OPENCL_GLOBAL_DEVICE_AS__ __attribute__((opencl_global))
113+
#define __OPENCL_GLOBAL_HOST_AS__ __attribute__((opencl_global))
114+
#endif // __ENABLE_USM_ADDR_SPACE__
110115
#define __OPENCL_LOCAL_AS__ __attribute__((opencl_local))
111116
#define __OPENCL_CONSTANT_AS__ __attribute__((opencl_constant))
112117
#define __OPENCL_PRIVATE_AS__ __attribute__((opencl_private))
@@ -124,10 +129,12 @@ template <access::target accessTarget> struct TargetToAS {
124129
access::address_space::global_space;
125130
};
126131

132+
#ifdef __ENABLE_USM_ADDR_SPACE__
127133
template <> struct TargetToAS<access::target::global_buffer> {
128134
constexpr static access::address_space AS =
129135
access::address_space::global_device_space;
130136
};
137+
#endif // __ENABLE_USM_ADDR_SPACE__
131138

132139
template <> struct TargetToAS<access::target::local> {
133140
constexpr static access::address_space AS =
@@ -192,13 +199,15 @@ struct remove_AS<__OPENCL_GLOBAL_AS__ T> {
192199
typedef T type;
193200
};
194201

202+
#ifdef __ENABLE_USM_ADDR_SPACE__
195203
template <class T> struct remove_AS<__OPENCL_GLOBAL_DEVICE_AS__ T> {
196204
typedef T type;
197205
};
198206

199207
template <class T> struct remove_AS<__OPENCL_GLOBAL_HOST_AS__ T> {
200208
typedef T type;
201209
};
210+
#endif // __ENABLE_USM_ADDR_SPACE__
202211

203212
template <class T>
204213
struct remove_AS<__OPENCL_PRIVATE_AS__ T> {

sycl/include/CL/sycl/atomic.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,7 @@ class atomic {
198198
"T and pointerT must be same size");
199199
}
200200

201+
#ifdef __ENABLE_USM_ADDR_SPACE__
201202
// Create atomic in global_space with one from global_device_space
202203
template <access::address_space _Space = addressSpace,
203204
typename = typename std::enable_if<
@@ -214,6 +215,7 @@ class atomic {
214215
atomic(atomic<T, access::address_space::global_device_space> &&RHS) {
215216
Ptr = RHS.Ptr;
216217
}
218+
#endif // __ENABLE_USM_ADDR_SPACE__
217219

218220
void store(T Operand, memory_order Order = memory_order::relaxed) {
219221
__spirv_AtomicStore(

sycl/include/CL/sycl/handler.hpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -506,7 +506,11 @@ class __SYCL_EXPORT handler {
506506
access::placeholder IsPH>
507507
detail::enable_if_t<Dim == 0 && Mode == access::mode::atomic, T>
508508
readFromFirstAccElement(accessor<T, Dim, Mode, Target, IsPH> Src) const {
509+
#ifdef __ENABLE_USM_ADDR_SPACE__
509510
atomic<T, access::address_space::global_device_space> AtomicSrc = Src;
511+
#else
512+
atomic<T, access::address_space::global_space> AtomicSrc = Src;
513+
#endif // __ENABLE_USM_ADDR_SPACE__
510514
return AtomicSrc.load();
511515
}
512516

@@ -529,7 +533,11 @@ class __SYCL_EXPORT handler {
529533
access::placeholder IsPH>
530534
detail::enable_if_t<Dim == 0 && Mode == access::mode::atomic, void>
531535
writeToFirstAccElement(accessor<T, Dim, Mode, Target, IsPH> Dst, T V) const {
536+
#ifdef __ENABLE_USM_ADDR_SPACE__
532537
atomic<T, access::address_space::global_device_space> AtomicDst = Dst;
538+
#else
539+
atomic<T, access::address_space::global_space> AtomicDst = Dst;
540+
#endif // __ENABLE_USM_ADDR_SPACE__
533541
AtomicDst.store(V);
534542
}
535543

sycl/include/CL/sycl/multi_ptr.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,7 @@ template <typename ElementType, access::address_space Space> class multi_ptr {
275275
return multi_ptr(m_Pointer - r);
276276
}
277277

278+
#ifdef __ENABLE_USM_ADDR_SPACE__
278279
// Explicit conversion to global_space
279280
// Only available if Space == address_space::global_device_space ||
280281
// Space == address_space::global_host_space
@@ -290,6 +291,7 @@ template <typename ElementType, access::address_space Space> class multi_ptr {
290291
return multi_ptr<ElementType, access::address_space::global_space>(
291292
reinterpret_cast<global_pointer_t>(m_Pointer));
292293
}
294+
#endif // __ENABLE_USM_ADDR_SPACE__
293295

294296
// Only if Space == global_space
295297
template <access::address_space _Space = Space,

sycl/test/check_device_code/kernel_arguments_as.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,14 @@
11
// RUN: %clangxx -fsycl-device-only -Xclang -fsycl-is-device -emit-llvm %s -S -o %t.ll -I %sycl_include -Wno-sycl-strict -Xclang -verify-ignore-unexpected=note,warning -Xclang -disable-llvm-passes
2-
// RUN: FileCheck %s --input-file %t.ll
2+
// RUN: FileCheck %s --input-file %t.ll --check-prefixes=CHECK,CHECK-DISABLE
3+
// RUN: %clangxx -fsycl-device-only -Xclang -fsycl-is-device -emit-llvm %s -S -o %t.ll -I %sycl_include -Wno-sycl-strict -Xclang -verify-ignore-unexpected=note,warning -Xclang -disable-llvm-passes -D__ENABLE_USM_ADDR_SPACE__
4+
// RUN: FileCheck %s --input-file %t.ll --check-prefixes=CHECK,CHECK-ENABLE
35
//
46
// Check the address space of the pointer in accessor class.
57
//
68
// CHECK: %struct{{.*}}AccWrapper = type { %"class{{.*}}cl::sycl::accessor" }
79
// CHECK: %"class{{.*}}cl::sycl::accessor" = type { %"class{{.*}}AccessorImplDevice", %[[UNION:.*]] }
8-
// CHECK: %[[UNION]] = type { i32 addrspace(5)* }
10+
// CHECK-DISABLE: %[[UNION]] = type { i32 addrspace(1)* }
11+
// CHECK-ENABLE: %[[UNION]] = type { i32 addrspace(5)* }
912
// CHECK: %struct{{.*}}AccWrapper = type { %"class{{.*}}cl::sycl::accessor" }
1013
// CHECK-NEXT: %"class{{.*}}cl::sycl::accessor" = type { %"class{{.*}}LocalAccessorBaseDevice", i32 addrspace(3)* }
1114
//

sycl/test/check_device_code/usm_pointers.cpp

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,29 @@
11
// RUN: %clangxx -fsycl-device-only -Xclang -fsycl-is-device -emit-llvm %s -S -o %t.ll -I %sycl_include -Wno-sycl-strict -Xclang -verify-ignore-unexpected=note,warning -Xclang -disable-llvm-passes
2-
// RUN: FileCheck %s --input-file %t.ll
2+
// RUN: FileCheck %s --input-file %t.ll --check-prefixes=CHECK,CHECK-DISABLE
3+
// RUN: %clangxx -fsycl-device-only -Xclang -fsycl-is-device -emit-llvm %s -S -o %t.ll -I %sycl_include -Wno-sycl-strict -Xclang -verify-ignore-unexpected=note,warning -Xclang -disable-llvm-passes -D__ENABLE_USM_ADDR_SPACE__
4+
// RUN: FileCheck %s --input-file %t.ll --check-prefixes=CHECK,CHECK-ENABLE
35
//
46
// Check the address space of the pointer in multi_ptr class
57
//
6-
// CHECK: %[[DEVPTR_T:.*]] = type { i8 addrspace(5)* }
7-
// CHECK: %[[HOSTPTR_T:.*]] = type { i8 addrspace(6)* }
8+
// CHECK-DISABLE: %[[DEVPTR_T:.*]] = type { i8 addrspace(1)* }
9+
// CHECK-DISABLE: %[[HOSTPTR_T:.*]] = type { i8 addrspace(1)* }
10+
// CHECK-ENABLE: %[[DEVPTR_T:.*]] = type { i8 addrspace(5)* }
11+
// CHECK-ENABLE: %[[HOSTPTR_T:.*]] = type { i8 addrspace(6)* }
812
//
913
// CHECK-LABEL: define {{.*}} spir_func i8 addrspace(4)* @{{.*}}multi_ptr{{.*}}
1014
// CHECK: %[[M_PTR:.*]] = getelementptr inbounds %[[DEVPTR_T]]
11-
// CHECK-NEXT: %[[DEVLOAD:[0-9]+]] = load i8 addrspace(5)*, i8 addrspace(5)* addrspace(4)* %[[M_PTR]]
12-
// CHECK-NEXT: %[[DEVCAST:[0-9]+]] = addrspacecast i8 addrspace(5)* %[[DEVLOAD]] to i8 addrspace(4)*
15+
// CHECK-DISABLE-NEXT: %[[DEVLOAD:[0-9]+]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* %[[M_PTR]]
16+
// CHECK-DISABLE-NEXT: %[[DEVCAST:[0-9]+]] = addrspacecast i8 addrspace(1)* %[[DEVLOAD]] to i8 addrspace(4)*
17+
// CHECK-ENABLE-NEXT: %[[DEVLOAD:[0-9]+]] = load i8 addrspace(5)*, i8 addrspace(5)* addrspace(4)* %[[M_PTR]]
18+
// CHECK-ENABLE-NEXT: %[[DEVCAST:[0-9]+]] = addrspacecast i8 addrspace(5)* %[[DEVLOAD]] to i8 addrspace(4)*
1319
// ret i8 addrspace(4)* %[[DEVCAST]]
1420
//
1521
// CHECK-LABEL: define {{.*}} spir_func i8 addrspace(4)* @{{.*}}multi_ptr{{.*}}
1622
// CHECK: %[[M_PTR]] = getelementptr inbounds %[[HOSTPTR_T]]
17-
// CHECK-NEXT: %[[HOSTLOAD:[0-9]+]] = load i8 addrspace(6)*, i8 addrspace(6)* addrspace(4)* %[[M_PTR]]
18-
// CHECK-NEXT: %[[HOSTCAST:[0-9]+]] = addrspacecast i8 addrspace(6)* %[[HOSTLOAD]] to i8 addrspace(4)*
23+
// CHECK-DISABLE-NEXT: %[[HOSTLOAD:[0-9]+]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* %[[M_PTR]]
24+
// CHECK-DISABLE-NEXT: %[[HOSTCAST:[0-9]+]] = addrspacecast i8 addrspace(1)* %[[HOSTLOAD]] to i8 addrspace(4)*
25+
// CHECK-ENABLE-NEXT: %[[HOSTLOAD:[0-9]+]] = load i8 addrspace(6)*, i8 addrspace(6)* addrspace(4)* %[[M_PTR]]
26+
// CHECK-ENABLE-NEXT: %[[HOSTCAST:[0-9]+]] = addrspacecast i8 addrspace(6)* %[[HOSTLOAD]] to i8 addrspace(4)*
1927
// ret i8 addrspace(4)* %[[HOSTCAST]]
2028

2129
#include <CL/sycl.hpp>

0 commit comments

Comments
 (0)