Move x86-specific types to the vendor module (rust-lang#293)

alexcrichton · web-flow · commit 9ab44b4f09db · 2018-01-19T21:20:44.000-06:00
I believe we're reserving the `simd` module for exclusively the portable types
and their operations, so this commit moves the various x86-specific types from
the portable modules to the `x86` module. Along the way this also adds some doc
blocks for all the existing x86 types.
diff --git a/coresimd/src/v128.rs b/coresimd/src/v128.rs
@@ -42,11 +42,6 @@ define_impl! {
     x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15
 }
 
-define_ty_doc! {
-    __m128i, i64, i64 |
-    /// 128-bit wide signed integer vector type
-}
-
 define_from!(
     u64x2,
     i64x2,
diff --git a/coresimd/src/v256.rs b/coresimd/src/v256.rs
@@ -66,12 +66,6 @@ define_impl! {
     x24, x25, x26, x27, x28, x29, x30, x31
 }
 
-define_ty_doc! {
-    __m256i,
-    i64, i64, i64, i64 |
-    /// 256-bit wide signed integer vector type
-}
-
 define_from!(
     u64x4,
     i64x4,
diff --git a/coresimd/src/v64.rs b/coresimd/src/v64.rs
@@ -29,12 +29,6 @@ define_impl! { u8x8, u8, 8, i8x8, x0, x1, x2, x3, x4, x5, x6, x7 }
 define_ty! { i8x8, i8, i8, i8, i8, i8, i8, i8, i8 }
 define_impl! { i8x8, i8, 8, i8x8, x0, x1, x2, x3, x4, x5, x6, x7 }
 
-// On `x86` corresponds to llvm's `x86_mmx` type.
-define_ty_doc! {
-    __m64, i64 |
-    /// 64-bit wide integer vector type.
-}
-
 define_from!(u32x2, i32x2, u16x4, i16x4, u8x8, i8x8);
 define_from!(i32x2, u32x2, u16x4, i16x4, u8x8, i8x8);
 define_from!(u16x4, u32x2, i32x2, i16x4, u8x8, i8x8);
diff --git a/coresimd/src/x86/i586/sse.rs b/coresimd/src/x86/i586/sse.rs
@@ -6,7 +6,7 @@ use core::ptr;
 use simd_llvm::*;
 use v128::*;
 use v64::*;
-use x86::__m128;
+use x86::*;
 
 #[cfg(test)]
 use stdsimd_test::assert_instr;
@@ -1705,8 +1705,8 @@ mod tests {
     use std::mem::transmute;
     use std::f32::NAN;
 
-    use v128::u32x4;
-    use v64::{i8x8, __m64};
+    use v128::*;
+    use v64::*;
     use x86::*;
     use stdsimd_test::simd_test;
     use test::black_box; // Used to inhibit constant-folding.
diff --git a/coresimd/src/x86/i686/mmx.rs b/coresimd/src/x86/i686/mmx.rs
@@ -9,6 +9,7 @@
 //! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
 
 use v64::*;
+use x86::*;
 use core::mem;
 
 #[cfg(test)]
diff --git a/coresimd/src/x86/i686/ssse3.rs b/coresimd/src/x86/i686/ssse3.rs
@@ -3,7 +3,7 @@
 #[cfg(test)]
 use stdsimd_test::assert_instr;
 
-use v64::*;
+use x86::*;
 
 /// Compute the absolute value of packed 8-bit integers in `a` and
 /// return the unsigned results.
diff --git a/coresimd/src/x86/mod.rs b/coresimd/src/x86/mod.rs
@@ -5,29 +5,300 @@ use core::mem;
 #[macro_use]
 mod macros;
 
-#[repr(simd)]
-#[derive(Clone, Copy, Debug)]
-#[allow(non_camel_case_types)]
-pub struct __m128(f32, f32, f32, f32);
-
-#[repr(simd)]
-#[derive(Clone, Copy, Debug)]
-#[allow(non_camel_case_types)]
-pub struct __m128d(f64, f64);
-
-#[repr(simd)]
-#[derive(Clone, Copy, Debug)]
-#[allow(non_camel_case_types)]
-pub struct __m256(f32, f32, f32, f32, f32, f32, f32, f32);
-
-#[repr(simd)]
-#[derive(Clone, Copy, Debug)]
-#[allow(non_camel_case_types)]
-pub struct __m256d(f64, f64, f64, f64);
+macro_rules! types {
+    ($(
+        $(#[$doc:meta])*
+        pub struct $name:ident($($fields:tt)*);
+    )*) => ($(
+        $(#[$doc])*
+        #[derive(Copy, Debug)]
+        #[allow(non_camel_case_types)]
+        #[repr(simd)]
+        pub struct $name($($fields)*);
+
+        impl Clone for $name {
+            #[inline(always)] // currently needed for correctness
+            fn clone(&self) -> $name {
+                *self
+            }
+        }
+    )*)
+}
 
-pub use v128::__m128i;
-pub use v256::__m256i;
-pub use v64::__m64;
+types! {
+    /// 64-bit wide integer vector type, x86-specific
+    ///
+    /// This type is the same as the `__m64` type defined by Intel,
+    /// representing a 64-bit SIMD register. Usage of this type typically
+    /// corresponds to the `mmx` target feature.
+    ///
+    /// Internally this type may be viewed as:
+    ///
+    /// * `i8x8` - eight `i8` variables packed together
+    /// * `i16x4` - four `i16` variables packed together
+    /// * `i32x2` - two `i32` variables packed together
+    ///
+    /// (as well as unsgined versions). Each intrinsic may interpret the
+    /// internal bits differently, check the documentation of the intrinsic
+    /// to see how it's being used.
+    ///
+    /// Note that this means that an instance of `__m64` typically just means
+    /// a "bag of bits" which is left up to interpretation at the point of use.
+    ///
+    /// Most intrinsics using `__m64` are prefixed with `_mm_` and the
+    /// integer types tend to correspond to suffixes like "pi8" or "pi32" (not
+    /// to be confused with "epiXX", used for `__m128i`).
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # #![feature(cfg_target_feature, target_feature)]
+    /// # #[macro_use]
+    /// # extern crate stdsimd;
+    /// # fn main() {
+    /// # #[target_feature(enable = "mmx")]
+    /// # unsafe fn foo() {
+    /// use stdsimd::vendor::*;
+    ///
+    /// let all_bytes_zero = _mm_setzero_si64();
+    /// let all_bytes_one = _mm_set1_pi8(1);
+    /// let two_i32 = _mm_set_pi32(1, 2);
+    /// # }
+    /// # if cfg_feature_enabled!("mmx") { unsafe { foo() } }
+    /// # }
+    /// ```
+    #[derive(PartialEq)]
+    pub struct __m64(i64);
+
+    /// 128-bit wide integer vector type, x86-specific
+    ///
+    /// This type is the same as the `__m128i` type defined by Intel,
+    /// representing a 128-bit SIMD register. Usage of this type typically
+    /// corresponds to the `sse` and up target features for x86/x86_64.
+    ///
+    /// Internally this type may be viewed as:
+    ///
+    /// * `i8x16` - sixteen `i8` variables packed together
+    /// * `i16x8` - eight `i16` variables packed together
+    /// * `i32x4` - four `i32` variables packed together
+    /// * `i64x2` - two `i64` variables packed together
+    ///
+    /// (as well as unsgined versions). Each intrinsic may interpret the
+    /// internal bits differently, check the documentation of the intrinsic
+    /// to see how it's being used.
+    ///
+    /// Note that this means that an instance of `__m128i` typically just means
+    /// a "bag of bits" which is left up to interpretation at the point of use.
+    ///
+    /// Most intrinsics using `__m128i` are prefixed with `_mm_` and the
+    /// integer types tend to correspond to suffixes like "epi8" or "epi32".
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # #![feature(cfg_target_feature, target_feature)]
+    /// # #[macro_use]
+    /// # extern crate stdsimd;
+    /// # fn main() {
+    /// # #[target_feature(enable = "sse2")]
+    /// # unsafe fn foo() {
+    /// use stdsimd::vendor::*;
+    ///
+    /// let all_bytes_zero = _mm_setzero_si128();
+    /// let all_bytes_one = _mm_set1_epi8(1);
+    /// let four_i32 = _mm_set_epi32(1, 2, 3, 4);
+    /// # }
+    /// # if cfg_feature_enabled!("sse2") { unsafe { foo() } }
+    /// # }
+    /// ```
+    #[derive(PartialEq)]
+    pub struct __m128i(i64, i64);
+
+    /// 128-bit wide set of four `f32` types, x86-specific
+    ///
+    /// This type is the same as the `__m128` type defined by Intel,
+    /// representing a 128-bit SIMD register which internally is consisted of
+    /// four packed `f32` instances. Usage of this type typically corresponds
+    /// to the `sse` and up target features for x86/x86_64.
+    ///
+    /// Note that unlike `__m128i`, the integer version of the 128-bit
+    /// registers, this `__m128` type has *one* interpretation. Each instance
+    /// of `__m128` always corresponds to `f32x4`, or four `f32` types packed
+    /// together.
+    ///
+    /// Most intrinsics using `__m128` are prefixed with `_mm_` and are
+    /// suffixed with "ps" (or otherwise contain "ps"). Not to be confused with
+    /// "pd" which is used for `__m128d`.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # #![feature(cfg_target_feature, target_feature)]
+    /// # #[macro_use]
+    /// # extern crate stdsimd;
+    /// # fn main() {
+    /// # #[target_feature(enable = "sse")]
+    /// # unsafe fn foo() {
+    /// use stdsimd::vendor::*;
+    ///
+    /// let four_zeros = _mm_setzero_ps();
+    /// let four_ones = _mm_set1_ps(1.0);
+    /// let four_floats = _mm_set_ps(1.0, 2.0, 3.0, 4.0);
+    /// # }
+    /// # if cfg_feature_enabled!("sse") { unsafe { foo() } }
+    /// # }
+    /// ```
+    pub struct __m128(f32, f32, f32, f32);
+
+    /// 128-bit wide set of two `f64` types, x86-specific
+    ///
+    /// This type is the same as the `__m128d` type defined by Intel,
+    /// representing a 128-bit SIMD register which internally is consisted of
+    /// two packed `f64` instances. Usage of this type typically corresponds
+    /// to the `sse` and up target features for x86/x86_64.
+    ///
+    /// Note that unlike `__m128i`, the integer version of the 128-bit
+    /// registers, this `__m128d` type has *one* interpretation. Each instance
+    /// of `__m128d` always corresponds to `f64x2`, or two `f64` types packed
+    /// together.
+    ///
+    /// Most intrinsics using `__m128d` are prefixed with `_mm_` and are
+    /// suffixed with "pd" (or otherwise contain "pd"). Not to be confused with
+    /// "ps" which is used for `__m128`.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # #![feature(cfg_target_feature, target_feature)]
+    /// # #[macro_use]
+    /// # extern crate stdsimd;
+    /// # fn main() {
+    /// # #[target_feature(enable = "sse")]
+    /// # unsafe fn foo() {
+    /// use stdsimd::vendor::*;
+    ///
+    /// let two_zeros = _mm_setzero_pd();
+    /// let two_ones = _mm_set1_pd(1.0);
+    /// let two_floats = _mm_set_pd(1.0, 2.0);
+    /// # }
+    /// # if cfg_feature_enabled!("sse") { unsafe { foo() } }
+    /// # }
+    /// ```
+    pub struct __m128d(f64, f64);
+
+    /// 256-bit wide integer vector type, x86-specific
+    ///
+    /// This type is the same as the `__m256i` type defined by Intel,
+    /// representing a 256-bit SIMD register. Usage of this type typically
+    /// corresponds to the `avx` and up target features for x86/x86_64.
+    ///
+    /// Internally this type may be viewed as:
+    ///
+    /// * `i8x32` - thirty two `i8` variables packed together
+    /// * `i16x16` - sixteen `i16` variables packed together
+    /// * `i32x8` - eight `i32` variables packed together
+    /// * `i64x4` - four `i64` variables packed together
+    ///
+    /// (as well as unsgined versions). Each intrinsic may interpret the
+    /// internal bits differently, check the documentation of the intrinsic
+    /// to see how it's being used.
+    ///
+    /// Note that this means that an instance of `__m256i` typically just means
+    /// a "bag of bits" which is left up to interpretation at the point of use.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # #![feature(cfg_target_feature, target_feature)]
+    /// # #[macro_use]
+    /// # extern crate stdsimd;
+    /// # fn main() {
+    /// # #[target_feature(enable = "avx")]
+    /// # unsafe fn foo() {
+    /// use stdsimd::vendor::*;
+    ///
+    /// let all_bytes_zero = _mm256_setzero_si256();
+    /// let all_bytes_one = _mm256_set1_epi8(1);
+    /// let eight_i32 = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
+    /// # }
+    /// # if cfg_feature_enabled!("avx") { unsafe { foo() } }
+    /// # }
+    /// ```
+    #[derive(PartialEq)]
+    pub struct __m256i(i64, i64, i64, i64);
+
+    /// 256-bit wide set of eight `f32` types, x86-specific
+    ///
+    /// This type is the same as the `__m256` type defined by Intel,
+    /// representing a 256-bit SIMD register which internally is consisted of
+    /// eight packed `f32` instances. Usage of this type typically corresponds
+    /// to the `avx` and up target features for x86/x86_64.
+    ///
+    /// Note that unlike `__m256i`, the integer version of the 256-bit
+    /// registers, this `__m256` type has *one* interpretation. Each instance
+    /// of `__m256` always corresponds to `f32x8`, or eight `f32` types packed
+    /// together.
+    ///
+    /// Most intrinsics using `__m256` are prefixed with `_mm256_` and are
+    /// suffixed with "ps" (or otherwise contain "ps"). Not to be confused with
+    /// "pd" which is used for `__m256d`.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # #![feature(cfg_target_feature, target_feature)]
+    /// # #[macro_use]
+    /// # extern crate stdsimd;
+    /// # fn main() {
+    /// # #[target_feature(enable = "sse")]
+    /// # unsafe fn foo() {
+    /// use stdsimd::vendor::*;
+    ///
+    /// let eight_zeros = _mm256_setzero_ps();
+    /// let eight_ones = _mm256_set1_ps(1.0);
+    /// let eight_floats = _mm256_set_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+    /// # }
+    /// # if cfg_feature_enabled!("sse") { unsafe { foo() } }
+    /// # }
+    /// ```
+    pub struct __m256(f32, f32, f32, f32, f32, f32, f32, f32);
+
+    /// 256-bit wide set of four `f64` types, x86-specific
+    ///
+    /// This type is the same as the `__m256d` type defined by Intel,
+    /// representing a 256-bit SIMD register which internally is consisted of
+    /// four packed `f64` instances. Usage of this type typically corresponds
+    /// to the `avx` and up target features for x86/x86_64.
+    ///
+    /// Note that unlike `__m256i`, the integer version of the 256-bit
+    /// registers, this `__m256d` type has *one* interpretation. Each instance
+    /// of `__m256d` always corresponds to `f64x4`, or four `f64` types packed
+    /// together.
+    ///
+    /// Most intrinsics using `__m256d` are prefixed with `_mm256_` and are
+    /// suffixed with "pd" (or otherwise contain "pd"). Not to be confused with
+    /// "ps" which is used for `__m256`.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # #![feature(cfg_target_feature, target_feature)]
+    /// # #[macro_use]
+    /// # extern crate stdsimd;
+    /// # fn main() {
+    /// # #[target_feature(enable = "avx")]
+    /// # unsafe fn foo() {
+    /// use stdsimd::vendor::*;
+    ///
+    /// let four_zeros = _mm256_setzero_pd();
+    /// let four_ones = _mm256_set1_pd(1.0);
+    /// let four_floats = _mm256_set_pd(1.0, 2.0, 3.0, 4.0);
+    /// # }
+    /// # if cfg_feature_enabled!("avx") { unsafe { foo() } }
+    /// # }
+    /// ```
+    pub struct __m256d(f64, f64, f64, f64);
+}
 
 #[cfg(test)]
 mod test;