Skip to content

Commit 0359bba

Browse files
committed
nvptx: fix _syncthreads to use unaligned barrier
* Deprecate _syncthreads (the CUDA name) in favor of new _barrier_sync (NVPTX name barrier.sync). * The: barrier.sync instruction is equivalent to barrier.sync.aligned prior to sm_70, and will lead to errors/deadlock if passes (such as MIR JumpThreading) lose the aligned property. rust-lang/rust#137086 https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-bar * Since: MIR does not currently have a way to apply something like LLVM's convergent attribute (and because convergent does not preserve alignment, which can be broken by inlining), we cannot prevent loss of alignment, and thus we require target feature sm_70. https://llvm.org/docs/ConvergentOperations.html WIP: compile-time dependency on ptx60
1 parent 303f0dd commit 0359bba

File tree

1 file changed

+45
-3
lines changed
  • crates/core_arch/src/nvptx

1 file changed

+45
-3
lines changed

crates/core_arch/src/nvptx/mod.rs

Lines changed: 45 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@ pub use packed::*;
2020

2121
#[allow(improper_ctypes)]
2222
unsafe extern "C" {
23-
#[link_name = "llvm.nvvm.barrier0"]
24-
fn syncthreads() -> ();
23+
#[link_name = "llvm.nvvm.barrier.sync"]
24+
fn barrier_sync(_: u32) -> ();
2525
#[link_name = "llvm.nvvm.read.ptx.sreg.ntid.x"]
2626
fn block_dim_x() -> i32;
2727
#[link_name = "llvm.nvvm.read.ptx.sreg.ntid.y"]
@@ -49,10 +49,52 @@ unsafe extern "C" {
4949
}
5050

5151
/// Synchronizes all threads in the block.
52+
///
53+
/// The argument `a` is a logical barrier resource with value `0` through `15`.
54+
///
55+
/// This does not require textual alignment, so the following code is valid.
56+
///
57+
/// ```
58+
/// if tid % 2 == 0 {
59+
/// shared[tid] *= 2;
60+
/// _barrier_sync(0);
61+
/// myval += shared[tid + 1];
62+
/// } else {
63+
/// shared[tid] *= 4;
64+
/// _barrier_sync(0);
65+
/// }
66+
/// ```
67+
///
68+
/// This intrinsic has different execution semantics prior to `sm_70`, and thus
69+
/// it requires the `sm_70` target feature for correct behavior. The instruction
70+
/// was introduced in PTX 6.0, so its use has a compile-time dependency on the
71+
/// `ptx60` target feature.
72+
///
73+
/// TODO: The more restrictive "aligned" semantics of
74+
/// `llvm.nvvm.barrier.sync.aligned` are [currently
75+
/// miscompiled](https://github.com/rust-lang/rust/issues/137086) due to MIR
76+
/// JumpThreading and lack of `convergent` attribute propagated to LLVM. Once
77+
/// resolved, a `_barrier_sync_aligned` intrinsic can be exposed at all target
78+
/// features.
79+
///
80+
#[inline]
81+
#[cfg(target_feature = "ptx60")]
82+
#[target_feature(enable = "sm_70", enable = "ptx60")]
83+
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
84+
pub unsafe fn _barrier_sync(a: u32) -> () {
85+
barrier_sync(a)
86+
}
87+
88+
/// Synchronizes all threads in the block.
89+
///
90+
/// Deprecated alias for [`_barrier_sync`].
5291
#[inline]
92+
#[cfg(target_feature = "ptx60")]
93+
#[target_feature(enable = "sm_70", enable = "ptx60")]
5394
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
95+
#[deprecated(since = "1.88.0", note = "use _barrier_sync(0)")]
5496
pub unsafe fn _syncthreads() -> () {
55-
syncthreads()
97+
_barrier_sync(0)
5698
}
5799

58100
/// x-th thread-block dimension.

0 commit comments

Comments
 (0)