Skip to content

Commit 9ab44b4

Browse files
authored
Move x86-specific types to the vendor module (rust-lang#293)
I believe we're reserving the `simd` module for exclusively the portable types and their operations, so this commit moves the various x86-specific types from the portable modules to the `x86` module. Along the way this also adds some doc blocks for all the existing x86 types.
1 parent f67eb4e commit 9ab44b4

File tree

7 files changed

+298
-43
lines changed

7 files changed

+298
-43
lines changed

coresimd/src/v128.rs

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -42,11 +42,6 @@ define_impl! {
4242
x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15
4343
}
4444

45-
define_ty_doc! {
46-
__m128i, i64, i64 |
47-
/// 128-bit wide signed integer vector type
48-
}
49-
5045
define_from!(
5146
u64x2,
5247
i64x2,

coresimd/src/v256.rs

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -66,12 +66,6 @@ define_impl! {
6666
x24, x25, x26, x27, x28, x29, x30, x31
6767
}
6868

69-
define_ty_doc! {
70-
__m256i,
71-
i64, i64, i64, i64 |
72-
/// 256-bit wide signed integer vector type
73-
}
74-
7569
define_from!(
7670
u64x4,
7771
i64x4,

coresimd/src/v64.rs

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -29,12 +29,6 @@ define_impl! { u8x8, u8, 8, i8x8, x0, x1, x2, x3, x4, x5, x6, x7 }
2929
define_ty! { i8x8, i8, i8, i8, i8, i8, i8, i8, i8 }
3030
define_impl! { i8x8, i8, 8, i8x8, x0, x1, x2, x3, x4, x5, x6, x7 }
3131

32-
// On `x86` corresponds to llvm's `x86_mmx` type.
33-
define_ty_doc! {
34-
__m64, i64 |
35-
/// 64-bit wide integer vector type.
36-
}
37-
3832
define_from!(u32x2, i32x2, u16x4, i16x4, u8x8, i8x8);
3933
define_from!(i32x2, u32x2, u16x4, i16x4, u8x8, i8x8);
4034
define_from!(u16x4, u32x2, i32x2, i16x4, u8x8, i8x8);

coresimd/src/x86/i586/sse.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ use core::ptr;
66
use simd_llvm::*;
77
use v128::*;
88
use v64::*;
9-
use x86::__m128;
9+
use x86::*;
1010

1111
#[cfg(test)]
1212
use stdsimd_test::assert_instr;
@@ -1705,8 +1705,8 @@ mod tests {
17051705
use std::mem::transmute;
17061706
use std::f32::NAN;
17071707

1708-
use v128::u32x4;
1709-
use v64::{i8x8, __m64};
1708+
use v128::*;
1709+
use v64::*;
17101710
use x86::*;
17111711
use stdsimd_test::simd_test;
17121712
use test::black_box; // Used to inhibit constant-folding.

coresimd/src/x86/i686/mmx.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
1010
1111
use v64::*;
12+
use x86::*;
1213
use core::mem;
1314

1415
#[cfg(test)]

coresimd/src/x86/i686/ssse3.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
#[cfg(test)]
44
use stdsimd_test::assert_instr;
55

6-
use v64::*;
6+
use x86::*;
77

88
/// Compute the absolute value of packed 8-bit integers in `a` and
99
/// return the unsigned results.

coresimd/src/x86/mod.rs

Lines changed: 293 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -5,29 +5,300 @@ use core::mem;
55
#[macro_use]
66
mod macros;
77

8-
#[repr(simd)]
9-
#[derive(Clone, Copy, Debug)]
10-
#[allow(non_camel_case_types)]
11-
pub struct __m128(f32, f32, f32, f32);
12-
13-
#[repr(simd)]
14-
#[derive(Clone, Copy, Debug)]
15-
#[allow(non_camel_case_types)]
16-
pub struct __m128d(f64, f64);
17-
18-
#[repr(simd)]
19-
#[derive(Clone, Copy, Debug)]
20-
#[allow(non_camel_case_types)]
21-
pub struct __m256(f32, f32, f32, f32, f32, f32, f32, f32);
22-
23-
#[repr(simd)]
24-
#[derive(Clone, Copy, Debug)]
25-
#[allow(non_camel_case_types)]
26-
pub struct __m256d(f64, f64, f64, f64);
8+
macro_rules! types {
9+
($(
10+
$(#[$doc:meta])*
11+
pub struct $name:ident($($fields:tt)*);
12+
)*) => ($(
13+
$(#[$doc])*
14+
#[derive(Copy, Debug)]
15+
#[allow(non_camel_case_types)]
16+
#[repr(simd)]
17+
pub struct $name($($fields)*);
18+
19+
impl Clone for $name {
20+
#[inline(always)] // currently needed for correctness
21+
fn clone(&self) -> $name {
22+
*self
23+
}
24+
}
25+
)*)
26+
}
2727

28-
pub use v128::__m128i;
29-
pub use v256::__m256i;
30-
pub use v64::__m64;
28+
types! {
29+
/// 64-bit wide integer vector type, x86-specific
30+
///
31+
/// This type is the same as the `__m64` type defined by Intel,
32+
/// representing a 64-bit SIMD register. Usage of this type typically
33+
/// corresponds to the `mmx` target feature.
34+
///
35+
/// Internally this type may be viewed as:
36+
///
37+
/// * `i8x8` - eight `i8` variables packed together
38+
/// * `i16x4` - four `i16` variables packed together
39+
/// * `i32x2` - two `i32` variables packed together
40+
///
41+
/// (as well as unsgined versions). Each intrinsic may interpret the
42+
/// internal bits differently, check the documentation of the intrinsic
43+
/// to see how it's being used.
44+
///
45+
/// Note that this means that an instance of `__m64` typically just means
46+
/// a "bag of bits" which is left up to interpretation at the point of use.
47+
///
48+
/// Most intrinsics using `__m64` are prefixed with `_mm_` and the
49+
/// integer types tend to correspond to suffixes like "pi8" or "pi32" (not
50+
/// to be confused with "epiXX", used for `__m128i`).
51+
///
52+
/// # Examples
53+
///
54+
/// ```
55+
/// # #![feature(cfg_target_feature, target_feature)]
56+
/// # #[macro_use]
57+
/// # extern crate stdsimd;
58+
/// # fn main() {
59+
/// # #[target_feature(enable = "mmx")]
60+
/// # unsafe fn foo() {
61+
/// use stdsimd::vendor::*;
62+
///
63+
/// let all_bytes_zero = _mm_setzero_si64();
64+
/// let all_bytes_one = _mm_set1_pi8(1);
65+
/// let two_i32 = _mm_set_pi32(1, 2);
66+
/// # }
67+
/// # if cfg_feature_enabled!("mmx") { unsafe { foo() } }
68+
/// # }
69+
/// ```
70+
#[derive(PartialEq)]
71+
pub struct __m64(i64);
72+
73+
/// 128-bit wide integer vector type, x86-specific
74+
///
75+
/// This type is the same as the `__m128i` type defined by Intel,
76+
/// representing a 128-bit SIMD register. Usage of this type typically
77+
/// corresponds to the `sse` and up target features for x86/x86_64.
78+
///
79+
/// Internally this type may be viewed as:
80+
///
81+
/// * `i8x16` - sixteen `i8` variables packed together
82+
/// * `i16x8` - eight `i16` variables packed together
83+
/// * `i32x4` - four `i32` variables packed together
84+
/// * `i64x2` - two `i64` variables packed together
85+
///
86+
/// (as well as unsgined versions). Each intrinsic may interpret the
87+
/// internal bits differently, check the documentation of the intrinsic
88+
/// to see how it's being used.
89+
///
90+
/// Note that this means that an instance of `__m128i` typically just means
91+
/// a "bag of bits" which is left up to interpretation at the point of use.
92+
///
93+
/// Most intrinsics using `__m128i` are prefixed with `_mm_` and the
94+
/// integer types tend to correspond to suffixes like "epi8" or "epi32".
95+
///
96+
/// # Examples
97+
///
98+
/// ```
99+
/// # #![feature(cfg_target_feature, target_feature)]
100+
/// # #[macro_use]
101+
/// # extern crate stdsimd;
102+
/// # fn main() {
103+
/// # #[target_feature(enable = "sse2")]
104+
/// # unsafe fn foo() {
105+
/// use stdsimd::vendor::*;
106+
///
107+
/// let all_bytes_zero = _mm_setzero_si128();
108+
/// let all_bytes_one = _mm_set1_epi8(1);
109+
/// let four_i32 = _mm_set_epi32(1, 2, 3, 4);
110+
/// # }
111+
/// # if cfg_feature_enabled!("sse2") { unsafe { foo() } }
112+
/// # }
113+
/// ```
114+
#[derive(PartialEq)]
115+
pub struct __m128i(i64, i64);
116+
117+
/// 128-bit wide set of four `f32` types, x86-specific
118+
///
119+
/// This type is the same as the `__m128` type defined by Intel,
120+
/// representing a 128-bit SIMD register which internally is consisted of
121+
/// four packed `f32` instances. Usage of this type typically corresponds
122+
/// to the `sse` and up target features for x86/x86_64.
123+
///
124+
/// Note that unlike `__m128i`, the integer version of the 128-bit
125+
/// registers, this `__m128` type has *one* interpretation. Each instance
126+
/// of `__m128` always corresponds to `f32x4`, or four `f32` types packed
127+
/// together.
128+
///
129+
/// Most intrinsics using `__m128` are prefixed with `_mm_` and are
130+
/// suffixed with "ps" (or otherwise contain "ps"). Not to be confused with
131+
/// "pd" which is used for `__m128d`.
132+
///
133+
/// # Examples
134+
///
135+
/// ```
136+
/// # #![feature(cfg_target_feature, target_feature)]
137+
/// # #[macro_use]
138+
/// # extern crate stdsimd;
139+
/// # fn main() {
140+
/// # #[target_feature(enable = "sse")]
141+
/// # unsafe fn foo() {
142+
/// use stdsimd::vendor::*;
143+
///
144+
/// let four_zeros = _mm_setzero_ps();
145+
/// let four_ones = _mm_set1_ps(1.0);
146+
/// let four_floats = _mm_set_ps(1.0, 2.0, 3.0, 4.0);
147+
/// # }
148+
/// # if cfg_feature_enabled!("sse") { unsafe { foo() } }
149+
/// # }
150+
/// ```
151+
pub struct __m128(f32, f32, f32, f32);
152+
153+
/// 128-bit wide set of two `f64` types, x86-specific
154+
///
155+
/// This type is the same as the `__m128d` type defined by Intel,
156+
/// representing a 128-bit SIMD register which internally is consisted of
157+
/// two packed `f64` instances. Usage of this type typically corresponds
158+
/// to the `sse` and up target features for x86/x86_64.
159+
///
160+
/// Note that unlike `__m128i`, the integer version of the 128-bit
161+
/// registers, this `__m128d` type has *one* interpretation. Each instance
162+
/// of `__m128d` always corresponds to `f64x2`, or two `f64` types packed
163+
/// together.
164+
///
165+
/// Most intrinsics using `__m128d` are prefixed with `_mm_` and are
166+
/// suffixed with "pd" (or otherwise contain "pd"). Not to be confused with
167+
/// "ps" which is used for `__m128`.
168+
///
169+
/// # Examples
170+
///
171+
/// ```
172+
/// # #![feature(cfg_target_feature, target_feature)]
173+
/// # #[macro_use]
174+
/// # extern crate stdsimd;
175+
/// # fn main() {
176+
/// # #[target_feature(enable = "sse")]
177+
/// # unsafe fn foo() {
178+
/// use stdsimd::vendor::*;
179+
///
180+
/// let two_zeros = _mm_setzero_pd();
181+
/// let two_ones = _mm_set1_pd(1.0);
182+
/// let two_floats = _mm_set_pd(1.0, 2.0);
183+
/// # }
184+
/// # if cfg_feature_enabled!("sse") { unsafe { foo() } }
185+
/// # }
186+
/// ```
187+
pub struct __m128d(f64, f64);
188+
189+
/// 256-bit wide integer vector type, x86-specific
190+
///
191+
/// This type is the same as the `__m256i` type defined by Intel,
192+
/// representing a 256-bit SIMD register. Usage of this type typically
193+
/// corresponds to the `avx` and up target features for x86/x86_64.
194+
///
195+
/// Internally this type may be viewed as:
196+
///
197+
/// * `i8x32` - thirty two `i8` variables packed together
198+
/// * `i16x16` - sixteen `i16` variables packed together
199+
/// * `i32x8` - eight `i32` variables packed together
200+
/// * `i64x4` - four `i64` variables packed together
201+
///
202+
/// (as well as unsgined versions). Each intrinsic may interpret the
203+
/// internal bits differently, check the documentation of the intrinsic
204+
/// to see how it's being used.
205+
///
206+
/// Note that this means that an instance of `__m256i` typically just means
207+
/// a "bag of bits" which is left up to interpretation at the point of use.
208+
///
209+
/// # Examples
210+
///
211+
/// ```
212+
/// # #![feature(cfg_target_feature, target_feature)]
213+
/// # #[macro_use]
214+
/// # extern crate stdsimd;
215+
/// # fn main() {
216+
/// # #[target_feature(enable = "avx")]
217+
/// # unsafe fn foo() {
218+
/// use stdsimd::vendor::*;
219+
///
220+
/// let all_bytes_zero = _mm256_setzero_si256();
221+
/// let all_bytes_one = _mm256_set1_epi8(1);
222+
/// let eight_i32 = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
223+
/// # }
224+
/// # if cfg_feature_enabled!("avx") { unsafe { foo() } }
225+
/// # }
226+
/// ```
227+
#[derive(PartialEq)]
228+
pub struct __m256i(i64, i64, i64, i64);
229+
230+
/// 256-bit wide set of eight `f32` types, x86-specific
231+
///
232+
/// This type is the same as the `__m256` type defined by Intel,
233+
/// representing a 256-bit SIMD register which internally is consisted of
234+
/// eight packed `f32` instances. Usage of this type typically corresponds
235+
/// to the `avx` and up target features for x86/x86_64.
236+
///
237+
/// Note that unlike `__m256i`, the integer version of the 256-bit
238+
/// registers, this `__m256` type has *one* interpretation. Each instance
239+
/// of `__m256` always corresponds to `f32x8`, or eight `f32` types packed
240+
/// together.
241+
///
242+
/// Most intrinsics using `__m256` are prefixed with `_mm256_` and are
243+
/// suffixed with "ps" (or otherwise contain "ps"). Not to be confused with
244+
/// "pd" which is used for `__m256d`.
245+
///
246+
/// # Examples
247+
///
248+
/// ```
249+
/// # #![feature(cfg_target_feature, target_feature)]
250+
/// # #[macro_use]
251+
/// # extern crate stdsimd;
252+
/// # fn main() {
253+
/// # #[target_feature(enable = "sse")]
254+
/// # unsafe fn foo() {
255+
/// use stdsimd::vendor::*;
256+
///
257+
/// let eight_zeros = _mm256_setzero_ps();
258+
/// let eight_ones = _mm256_set1_ps(1.0);
259+
/// let eight_floats = _mm256_set_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
260+
/// # }
261+
/// # if cfg_feature_enabled!("sse") { unsafe { foo() } }
262+
/// # }
263+
/// ```
264+
pub struct __m256(f32, f32, f32, f32, f32, f32, f32, f32);
265+
266+
/// 256-bit wide set of four `f64` types, x86-specific
267+
///
268+
/// This type is the same as the `__m256d` type defined by Intel,
269+
/// representing a 256-bit SIMD register which internally is consisted of
270+
/// four packed `f64` instances. Usage of this type typically corresponds
271+
/// to the `avx` and up target features for x86/x86_64.
272+
///
273+
/// Note that unlike `__m256i`, the integer version of the 256-bit
274+
/// registers, this `__m256d` type has *one* interpretation. Each instance
275+
/// of `__m256d` always corresponds to `f64x4`, or four `f64` types packed
276+
/// together.
277+
///
278+
/// Most intrinsics using `__m256d` are prefixed with `_mm256_` and are
279+
/// suffixed with "pd" (or otherwise contain "pd"). Not to be confused with
280+
/// "ps" which is used for `__m256`.
281+
///
282+
/// # Examples
283+
///
284+
/// ```
285+
/// # #![feature(cfg_target_feature, target_feature)]
286+
/// # #[macro_use]
287+
/// # extern crate stdsimd;
288+
/// # fn main() {
289+
/// # #[target_feature(enable = "avx")]
290+
/// # unsafe fn foo() {
291+
/// use stdsimd::vendor::*;
292+
///
293+
/// let four_zeros = _mm256_setzero_pd();
294+
/// let four_ones = _mm256_set1_pd(1.0);
295+
/// let four_floats = _mm256_set_pd(1.0, 2.0, 3.0, 4.0);
296+
/// # }
297+
/// # if cfg_feature_enabled!("avx") { unsafe { foo() } }
298+
/// # }
299+
/// ```
300+
pub struct __m256d(f64, f64, f64, f64);
301+
}
31302

32303
#[cfg(test)]
33304
mod test;

0 commit comments

Comments
 (0)