Skip to content

Commit c54c92e

Browse files
authored
[SYCL] Add global_device and global_host address spaces (#1704)
This patch introduces 2 new address spaces in OpenCL: global_device and global_host which are a subset of a global address space. We want to give the user a way to tell the compiler the allocation type of a USM pointer for optimization purposes. While it is usually easy for our compiler to distinguish loads or stores that access local memory from those that access global memory, distinguishing USM pointers that access host memory from those that access device memory or even distinguishing USM pointers that access host memory from accessors that access global memory is currently impossible. This is because all host code has been stripped out before we reach the backend and both accessors and USM pointers are presenting in LLVM IR as pointers in the global address space in the kernel's arguments. Being able to distinguish between these types of pointers at compile time is valuable because it allows us to instantiate simpler load-store units to perform memory transactions. Signed-off-by: Dmitry Sidorov <[email protected]>
1 parent efdd6ef commit c54c92e

21 files changed

+278
-52
lines changed

clang/include/clang/AST/Type.h

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -480,6 +480,12 @@ class Qualifiers {
480480
// Otherwise in OpenCLC v2.0 s6.5.5: every address space except
481481
// for __constant can be used as __generic.
482482
(A == LangAS::opencl_generic && B != LangAS::opencl_constant) ||
483+
// We also define global_device and global_host address spaces,
484+
// to distinguish global pointers allocated on host from pointers
485+
// allocated on device, which are a subset of __global.
486+
// FIXME: add a reference to spec when ready
487+
(A == LangAS::opencl_global && (B == LangAS::opencl_global_device ||
488+
B == LangAS::opencl_global_host)) ||
483489
// Consider pointer size address spaces to be equivalent to default.
484490
((isPtrSizeAddressSpace(A) || A == LangAS::Default) &&
485491
(isPtrSizeAddressSpace(B) || B == LangAS::Default));
@@ -493,7 +499,9 @@ class Qualifiers {
493499
(!hasAddressSpace() &&
494500
(other.getAddressSpace() == LangAS::opencl_private ||
495501
other.getAddressSpace() == LangAS::opencl_local ||
496-
other.getAddressSpace() == LangAS::opencl_global));
502+
other.getAddressSpace() == LangAS::opencl_global ||
503+
other.getAddressSpace() == LangAS::opencl_global_device ||
504+
other.getAddressSpace() == LangAS::opencl_global_host));
497505
}
498506

499507
/// Determines if these qualifiers compatibly include another set.

clang/include/clang/Basic/AddressSpaces.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@ enum class LangAS : unsigned {
3636
opencl_constant,
3737
opencl_private,
3838
opencl_generic,
39+
opencl_global_device,
40+
opencl_global_host,
3941

4042
// CUDA specific address spaces.
4143
cuda_device,

clang/include/clang/Basic/Attr.td

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1294,6 +1294,16 @@ def OpenCLGlobalAddressSpace : TypeAttr {
12941294
let Documentation = [OpenCLAddressSpaceGlobalDocs];
12951295
}
12961296

1297+
def OpenCLGlobalDeviceAddressSpace : TypeAttr {
1298+
let Spellings = [Clang<"opencl_global_device">];
1299+
let Documentation = [OpenCLGlobalAddressSpacesDocs];
1300+
}
1301+
1302+
def OpenCLGlobalHostAddressSpace : TypeAttr {
1303+
let Spellings = [Clang<"opencl_global_host">];
1304+
let Documentation = [OpenCLGlobalAddressSpacesDocs];
1305+
}
1306+
12971307
def OpenCLLocalAddressSpace : TypeAttr {
12981308
let Spellings = [Keyword<"__local">, Keyword<"local">, Clang<"opencl_local">];
12991309
let Documentation = [OpenCLAddressSpaceLocalDocs];

clang/include/clang/Basic/AttrDocs.td

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3559,6 +3559,17 @@ scope) variables and static local variable as well.
35593559
}];
35603560
}
35613561

3562+
def OpenCLGlobalAddressSpacesDocs : Documentation {
3563+
let Category = DocOpenCLAddressSpaces;
3564+
let Heading = "[[clang::opencl_global_device]], [[clang::opencl_global_host]]";
3565+
let Content = [{
3566+
The (global_device) and (global_host) address space attributes specify that an
3567+
object is allocated in global memory on the device/host. It helps distinguishing
3568+
USM pointers that access device memory and accessors that access global memory
3569+
from those that access host memory.
3570+
}];
3571+
}
3572+
35623573
def OpenCLAddressSpaceLocalDocs : Documentation {
35633574
let Category = DocOpenCLAddressSpaces;
35643575
let Heading = "__local, local, [[clang::opencl_local]]";

clang/include/clang/Sema/ParsedAttr.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -606,6 +606,10 @@ class ParsedAttr final
606606
return LangAS::opencl_constant;
607607
case ParsedAttr::AT_OpenCLGlobalAddressSpace:
608608
return LangAS::opencl_global;
609+
case ParsedAttr::AT_OpenCLGlobalDeviceAddressSpace:
610+
return LangAS::opencl_global_device;
611+
case ParsedAttr::AT_OpenCLGlobalHostAddressSpace:
612+
return LangAS::opencl_global_host;
609613
case ParsedAttr::AT_OpenCLLocalAddressSpace:
610614
return LangAS::opencl_local;
611615
case ParsedAttr::AT_OpenCLPrivateAddressSpace:

clang/lib/AST/ASTContext.cpp

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -919,18 +919,20 @@ static const LangASMap *getAddressSpaceMap(const TargetInfo &T,
919919
// The fake address space map must have a distinct entry for each
920920
// language-specific address space.
921921
static const unsigned FakeAddrSpaceMap[] = {
922-
0, // Default
923-
1, // opencl_global
924-
3, // opencl_local
925-
2, // opencl_constant
926-
0, // opencl_private
927-
4, // opencl_generic
928-
5, // cuda_device
929-
6, // cuda_constant
930-
7, // cuda_shared
931-
8, // ptr32_sptr
932-
9, // ptr32_uptr
933-
10 // ptr64
922+
0, // Default
923+
1, // opencl_global
924+
3, // opencl_local
925+
2, // opencl_constant
926+
0, // opencl_private
927+
4, // opencl_generic
928+
11, // opencl_global_device
929+
12, // opencl_global_host
930+
5, // cuda_device
931+
6, // cuda_constant
932+
7, // cuda_shared
933+
8, // ptr32_sptr
934+
9, // ptr32_uptr
935+
10 // ptr64
934936
};
935937
return &FakeAddrSpaceMap;
936938
} else {

clang/lib/AST/ItaniumMangle.cpp

Lines changed: 32 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2387,16 +2387,39 @@ void CXXNameMangler::mangleQualifiers(Qualifiers Quals, const DependentAddressSp
23872387
switch (AS) {
23882388
default: llvm_unreachable("Not a language specific address space");
23892389
// <OpenCL-addrspace> ::= "CL" [ "global" | "local" | "constant" |
2390-
// "private"| "generic" ]
2391-
case LangAS::opencl_global: ASString = "CLglobal"; break;
2392-
case LangAS::opencl_local: ASString = "CLlocal"; break;
2393-
case LangAS::opencl_constant: ASString = "CLconstant"; break;
2394-
case LangAS::opencl_private: ASString = "CLprivate"; break;
2395-
case LangAS::opencl_generic: ASString = "CLgeneric"; break;
2390+
// "private"| "generic" | "global_device" |
2391+
// "global_host" ]
2392+
case LangAS::opencl_global:
2393+
ASString = "CLglobal";
2394+
break;
2395+
case LangAS::opencl_global_device:
2396+
ASString = "CLDevice";
2397+
break;
2398+
case LangAS::opencl_global_host:
2399+
ASString = "CLHost";
2400+
break;
2401+
case LangAS::opencl_local:
2402+
ASString = "CLlocal";
2403+
break;
2404+
case LangAS::opencl_constant:
2405+
ASString = "CLconstant";
2406+
break;
2407+
case LangAS::opencl_private:
2408+
ASString = "CLprivate";
2409+
break;
2410+
case LangAS::opencl_generic:
2411+
ASString = "CLgeneric";
2412+
break;
23962413
// <CUDA-addrspace> ::= "CU" [ "device" | "constant" | "shared" ]
2397-
case LangAS::cuda_device: ASString = "CUdevice"; break;
2398-
case LangAS::cuda_constant: ASString = "CUconstant"; break;
2399-
case LangAS::cuda_shared: ASString = "CUshared"; break;
2414+
case LangAS::cuda_device:
2415+
ASString = "CUdevice";
2416+
break;
2417+
case LangAS::cuda_constant:
2418+
ASString = "CUconstant";
2419+
break;
2420+
case LangAS::cuda_shared:
2421+
ASString = "CUshared";
2422+
break;
24002423
// <ptrsize-addrspace> ::= [ "ptr32_sptr" | "ptr32_uptr" | "ptr64" ]
24012424
case LangAS::ptr32_sptr:
24022425
ASString = "ptr32_sptr";

clang/lib/AST/MicrosoftMangle.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1823,6 +1823,12 @@ void MicrosoftCXXNameMangler::mangleAddressSpaceType(QualType T,
18231823
case LangAS::opencl_global:
18241824
Extra.mangleSourceName("_ASCLglobal");
18251825
break;
1826+
case LangAS::opencl_global_device:
1827+
Extra.mangleSourceName("_ASCLDevice");
1828+
break;
1829+
case LangAS::opencl_global_host:
1830+
Extra.mangleSourceName("_ASCLHost");
1831+
break;
18261832
case LangAS::opencl_local:
18271833
Extra.mangleSourceName("_ASCLlocal");
18281834
break;

clang/lib/AST/TypePrinter.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1577,6 +1577,8 @@ void TypePrinter::printAttributedAfter(const AttributedType *T,
15771577

15781578
case attr::OpenCLPrivateAddressSpace:
15791579
case attr::OpenCLGlobalAddressSpace:
1580+
case attr::OpenCLGlobalDeviceAddressSpace:
1581+
case attr::OpenCLGlobalHostAddressSpace:
15801582
case attr::OpenCLLocalAddressSpace:
15811583
case attr::OpenCLConstantAddressSpace:
15821584
case attr::OpenCLGenericAddressSpace:
@@ -1880,6 +1882,10 @@ std::string Qualifiers::getAddrSpaceAsString(LangAS AS) {
18801882
return "__constant";
18811883
case LangAS::opencl_generic:
18821884
return "__generic";
1885+
case LangAS::opencl_global_device:
1886+
return "__global_device";
1887+
case LangAS::opencl_global_host:
1888+
return "__global_host";
18831889
case LangAS::cuda_device:
18841890
return "__device__";
18851891
case LangAS::cuda_constant:

clang/lib/Basic/Targets/AMDGPU.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@ const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
4545
Constant, // opencl_constant
4646
Private, // opencl_private
4747
Generic, // opencl_generic
48+
Global, // opencl_global_device
49+
Global, // opencl_global_host
4850
Global, // cuda_device
4951
Constant, // cuda_constant
5052
Local, // cuda_shared
@@ -60,6 +62,8 @@ const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
6062
Constant, // opencl_constant
6163
Private, // opencl_private
6264
Generic, // opencl_generic
65+
Global, // opencl_global_device
66+
Global, // opencl_global_host
6367
Global, // cuda_device
6468
Constant, // cuda_constant
6569
Local, // cuda_shared

clang/lib/Basic/Targets/NVPTX.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ static const unsigned NVPTXAddrSpaceMap[] = {
3030
0, // opencl_private
3131
// FIXME: generic has to be added to the target
3232
0, // opencl_generic
33+
1, // opencl_global_device
34+
1, // opencl_global_host
3335
1, // cuda_device
3436
4, // cuda_constant
3537
3, // cuda_shared

clang/lib/Basic/Targets/SPIR.h

Lines changed: 28 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -24,33 +24,37 @@ namespace clang {
2424
namespace targets {
2525

2626
static const unsigned SPIRAddrSpaceMap[] = {
27-
0, // Default
28-
1, // opencl_global
29-
3, // opencl_local
30-
2, // opencl_constant
31-
0, // opencl_private
32-
4, // opencl_generic
33-
0, // cuda_device
34-
0, // cuda_constant
35-
0, // cuda_shared
36-
0, // ptr32_sptr
37-
0, // ptr32_uptr
38-
0 // ptr64
27+
0, // Default
28+
1, // opencl_global
29+
3, // opencl_local
30+
2, // opencl_constant
31+
0, // opencl_private
32+
4, // opencl_generic
33+
11, // opencl_global_device
34+
12, // opencl_global_host
35+
0, // cuda_device
36+
0, // cuda_constant
37+
0, // cuda_shared
38+
0, // ptr32_sptr
39+
0, // ptr32_uptr
40+
0 // ptr64
3941
};
4042

4143
static const unsigned SYCLAddrSpaceMap[] = {
42-
4, // Default
43-
1, // opencl_global
44-
3, // opencl_local
45-
2, // opencl_constant
46-
0, // opencl_private
47-
4, // opencl_generic
48-
0, // cuda_device
49-
0, // cuda_constant
50-
0, // cuda_shared
51-
0, // ptr32_sptr
52-
0, // ptr32_uptr
53-
0 // ptr64
44+
4, // Default
45+
1, // opencl_global
46+
3, // opencl_local
47+
2, // opencl_constant
48+
0, // opencl_private
49+
4, // opencl_generic
50+
11, // opencl_global_device
51+
12, // opencl_global_host
52+
0, // cuda_device
53+
0, // cuda_constant
54+
0, // cuda_shared
55+
0, // ptr32_sptr
56+
0, // ptr32_uptr
57+
0 // ptr64
5458
};
5559

5660
class LLVM_LIBRARY_VISIBILITY SPIRTargetInfo : public TargetInfo {

clang/lib/Basic/Targets/TCE.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@ static const unsigned TCEOpenCLAddrSpaceMap[] = {
3535
4, // opencl_local
3636
5, // opencl_constant
3737
0, // opencl_private
38+
1, // opencl_global_device
39+
1, // opencl_global_host
3840
// FIXME: generic has to be added to the target
3941
0, // opencl_generic
4042
0, // cuda_device

clang/lib/Basic/Targets/X86.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@ static const unsigned X86AddrSpaceMap[] = {
2929
0, // opencl_constant
3030
0, // opencl_private
3131
0, // opencl_generic
32+
0, // opencl_global_device
33+
0, // opencl_global_host
3234
0, // cuda_device
3335
0, // cuda_constant
3436
0, // cuda_shared

clang/lib/CodeGen/CodeGenModule.cpp

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1363,10 +1363,18 @@ static void removeImageAccessQualifier(std::string& TyName) {
13631363
// (basically all single AS CPUs).
13641364
static unsigned ArgInfoAddressSpace(LangAS AS) {
13651365
switch (AS) {
1366-
case LangAS::opencl_global: return 1;
1367-
case LangAS::opencl_constant: return 2;
1368-
case LangAS::opencl_local: return 3;
1369-
case LangAS::opencl_generic: return 4; // Not in SPIR 2.0 specs.
1366+
case LangAS::opencl_global:
1367+
return 1;
1368+
case LangAS::opencl_constant:
1369+
return 2;
1370+
case LangAS::opencl_local:
1371+
return 3;
1372+
case LangAS::opencl_generic:
1373+
return 4; // Not in SPIR 2.0 specs.
1374+
case LangAS::opencl_global_device:
1375+
return 11;
1376+
case LangAS::opencl_global_host:
1377+
return 12;
13701378
default:
13711379
return 0; // Assume private.
13721380
}
@@ -3861,6 +3869,8 @@ LangAS CodeGenModule::GetGlobalVarAddressSpace(const VarDecl *D) {
38613869
if (LangOpts.OpenCL) {
38623870
AddrSpace = D ? D->getType().getAddressSpace() : LangAS::opencl_global;
38633871
assert(AddrSpace == LangAS::opencl_global ||
3872+
AddrSpace == LangAS::opencl_global_device ||
3873+
AddrSpace == LangAS::opencl_global_host ||
38643874
AddrSpace == LangAS::opencl_constant ||
38653875
AddrSpace == LangAS::opencl_local ||
38663876
AddrSpace >= LangAS::FirstTargetAddressSpace);

clang/lib/Sema/SemaType.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7967,6 +7967,8 @@ static bool isAddressSpaceKind(const ParsedAttr &attr) {
79677967
return attrKind == ParsedAttr::AT_AddressSpace ||
79687968
attrKind == ParsedAttr::AT_OpenCLPrivateAddressSpace ||
79697969
attrKind == ParsedAttr::AT_OpenCLGlobalAddressSpace ||
7970+
attrKind == ParsedAttr::AT_OpenCLGlobalDeviceAddressSpace ||
7971+
attrKind == ParsedAttr::AT_OpenCLGlobalHostAddressSpace ||
79707972
attrKind == ParsedAttr::AT_OpenCLLocalAddressSpace ||
79717973
attrKind == ParsedAttr::AT_OpenCLConstantAddressSpace ||
79727974
attrKind == ParsedAttr::AT_OpenCLGenericAddressSpace;
@@ -8048,6 +8050,8 @@ static void processTypeAttrs(TypeProcessingState &state, QualType &type,
80488050
break;
80498051
case ParsedAttr::AT_OpenCLPrivateAddressSpace:
80508052
case ParsedAttr::AT_OpenCLGlobalAddressSpace:
8053+
case ParsedAttr::AT_OpenCLGlobalDeviceAddressSpace:
8054+
case ParsedAttr::AT_OpenCLGlobalHostAddressSpace:
80518055
case ParsedAttr::AT_OpenCLLocalAddressSpace:
80528056
case ParsedAttr::AT_OpenCLConstantAddressSpace:
80538057
case ParsedAttr::AT_OpenCLGenericAddressSpace:

clang/test/AST/language_address_space_attribute.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,18 @@ void langas() {
1010
// CHECK: VarDecl {{.*}} z_global '__global int *'
1111
[[clang::opencl_global]] int *z_global;
1212

13+
// CHECK: VarDecl {{.*}} x_global_device '__global_device int *'
14+
__attribute__((opencl_global_device)) int *x_global_device;
15+
16+
// CHECK: VarDecl {{.*}} z_global_device '__global_device int *'
17+
[[clang::opencl_global_device]] int *z_global_device;
18+
19+
// CHECK: VarDecl {{.*}} x_global_host '__global_host int *'
20+
__attribute__((opencl_global_host)) int *x_global_host;
21+
22+
// CHECK: VarDecl {{.*}} z_global_host '__global_host int *'
23+
[[clang::opencl_global_host]] int *z_global_host;
24+
1325
// CHECK: VarDecl {{.*}} x_local '__local int *'
1426
__attribute__((opencl_local)) int *x_local;
1527

clang/test/CodeGenCXX/mangle-address-space.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,10 @@ void ocl_f0(char __private *p) { }
4343

4444
struct ocl_OpaqueType;
4545
typedef ocl_OpaqueType __global * ocl_OpaqueTypePtr;
46+
typedef ocl_OpaqueType __attribute__((opencl_global_host)) * ocl_OpaqueTypePtrH;
47+
typedef ocl_OpaqueType
48+
__attribute__((opencl_global_device)) *
49+
ocl_OpaqueTypePtrD;
4650

4751
// CHECKOCL-LABEL: define {{.*}}void @_Z6ocl_f0PU8CLglobal14ocl_OpaqueType
4852
// WINOCL-LABEL: define {{.*}}void @"?ocl_f0@@YAXPEAU?$_ASCLglobal@$$CAUocl_OpaqueType@@@__clang@@@Z"
@@ -61,4 +65,12 @@ __constant float *ocl_f1(char __generic const *p) { return 0;}
6165
// CHECKOCL-LABEL: define {{.*}}float* @_Z6ocl_f2PU9CLgenericKc
6266
// WINOCL-LABEL: define {{.*}}float* @"?ocl_f2@@YAPEAU?$_ASCLgeneric@$$CAM@__clang@@QEAU?$_ASCLgeneric@$$CBD@2@@Z"
6367
__generic float *ocl_f2(__generic char const * const p) { return 0;}
68+
69+
// CHECKOCL-LABEL: define {{.*}}void @_Z6ocl_f3PU6CLHost14ocl_OpaqueType
70+
// WINOCL-LABEL: define {{.*}}void @"?ocl_f3@@YAXPEAU?$_ASCLHost@$$CAUocl_OpaqueType@@@__clang@@@Z"
71+
void ocl_f3(ocl_OpaqueTypePtrH) {}
72+
73+
// CHECKOCL-LABEL: define {{.*}}void @_Z6ocl_f4PU8CLDevice14ocl_OpaqueType
74+
// WINOCL-LABEL: define {{.*}}void @"?ocl_f4@@YAXPEAU?$_ASCLDevice@$$CAUocl_OpaqueType@@@__clang@@@Z"
75+
void ocl_f4(ocl_OpaqueTypePtrD) {}
6476
#endif

0 commit comments

Comments
 (0)