diff --git a/crates/core_arch/src/aarch64/neon/generated.rs b/crates/core_arch/src/aarch64/neon/generated.rs index b9650cab50..82b4e8fc2a 100644 --- a/crates/core_arch/src/aarch64/neon/generated.rs +++ b/crates/core_arch/src/aarch64/neon/generated.rs @@ -1153,6 +1153,454 @@ pub unsafe fn vcvtx_high_f32_f64(a: float32x2_t, b: float64x2_t) -> float32x4_t simd_shuffle4(a, vcvtx_f32_f64(b), [0, 1, 2, 3]) } +/// Floating-point convert to signed fixed-point, rounding toward zero +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fcvtzs))] +pub unsafe fn vcvt_s64_f64(a: float64x1_t) -> int64x1_t { + simd_cast(a) +} + +/// Floating-point convert to signed fixed-point, rounding toward zero +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fcvtzs))] +pub unsafe fn vcvtq_s64_f64(a: float64x2_t) -> int64x2_t { + simd_cast(a) +} + +/// Floating-point convert to unsigned fixed-point, rounding toward zero +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fcvtzu))] +pub unsafe fn vcvt_u64_f64(a: float64x1_t) -> uint64x1_t { + simd_cast(a) +} + +/// Floating-point convert to unsigned fixed-point, rounding toward zero +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fcvtzu))] +pub unsafe fn vcvtq_u64_f64(a: float64x2_t) -> uint64x2_t { + simd_cast(a) +} + +/// Floating-point convert to signed integer, rounding to nearest with ties to away +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fcvtas))] +pub unsafe fn vcvta_s32_f32(a: float32x2_t) -> int32x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtas.v2i32.v2f32")] + fn vcvta_s32_f32_(a: float32x2_t) -> int32x2_t; + } + vcvta_s32_f32_(a) +} + +/// Floating-point convert to signed integer, rounding to nearest with ties to away +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fcvtas))] +pub unsafe fn vcvtaq_s32_f32(a: float32x4_t) -> int32x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtas.v4i32.v4f32")] + fn vcvtaq_s32_f32_(a: float32x4_t) -> int32x4_t; + } + vcvtaq_s32_f32_(a) +} + +/// Floating-point convert to signed integer, rounding to nearest with ties to away +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fcvtas))] +pub unsafe fn vcvta_s64_f64(a: float64x1_t) -> int64x1_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtas.v1i64.v1f64")] + fn vcvta_s64_f64_(a: float64x1_t) -> int64x1_t; + } + vcvta_s64_f64_(a) +} + +/// Floating-point convert to signed integer, rounding to nearest with ties to away +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fcvtas))] +pub unsafe fn vcvtaq_s64_f64(a: float64x2_t) -> int64x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtas.v2i64.v2f64")] + fn vcvtaq_s64_f64_(a: float64x2_t) -> int64x2_t; + } + vcvtaq_s64_f64_(a) +} + +/// Floating-point convert to signed integer, rounding to nearest with ties to even +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fcvtns))] +pub unsafe fn vcvtn_s32_f32(a: float32x2_t) -> int32x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtns.v2i32.v2f32")] + fn vcvtn_s32_f32_(a: float32x2_t) -> int32x2_t; + } + vcvtn_s32_f32_(a) +} + +/// Floating-point convert to signed integer, rounding to nearest with ties to even +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fcvtns))] +pub unsafe fn vcvtnq_s32_f32(a: float32x4_t) -> int32x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtns.v4i32.v4f32")] + fn vcvtnq_s32_f32_(a: float32x4_t) -> int32x4_t; + } + vcvtnq_s32_f32_(a) +} + +/// Floating-point convert to signed integer, rounding to nearest with ties to even +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fcvtns))] +pub unsafe fn vcvtn_s64_f64(a: float64x1_t) -> int64x1_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtns.v1i64.v1f64")] + fn vcvtn_s64_f64_(a: float64x1_t) -> int64x1_t; + } + vcvtn_s64_f64_(a) +} + +/// Floating-point convert to signed integer, rounding to nearest with ties to even +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fcvtns))] +pub unsafe fn vcvtnq_s64_f64(a: float64x2_t) -> int64x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtns.v2i64.v2f64")] + fn vcvtnq_s64_f64_(a: float64x2_t) -> int64x2_t; + } + vcvtnq_s64_f64_(a) +} + +/// Floating-point convert to signed integer, rounding toward minus infinity +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fcvtms))] +pub unsafe fn vcvtm_s32_f32(a: float32x2_t) -> int32x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtms.v2i32.v2f32")] + fn vcvtm_s32_f32_(a: float32x2_t) -> int32x2_t; + } + vcvtm_s32_f32_(a) +} + +/// Floating-point convert to signed integer, rounding toward minus infinity +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fcvtms))] +pub unsafe fn vcvtmq_s32_f32(a: float32x4_t) -> int32x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtms.v4i32.v4f32")] + fn vcvtmq_s32_f32_(a: float32x4_t) -> int32x4_t; + } + vcvtmq_s32_f32_(a) +} + +/// Floating-point convert to signed integer, rounding toward minus infinity +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fcvtms))] +pub unsafe fn vcvtm_s64_f64(a: float64x1_t) -> int64x1_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtms.v1i64.v1f64")] + fn vcvtm_s64_f64_(a: float64x1_t) -> int64x1_t; + } + vcvtm_s64_f64_(a) +} + +/// Floating-point convert to signed integer, rounding toward minus infinity +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fcvtms))] +pub unsafe fn vcvtmq_s64_f64(a: float64x2_t) -> int64x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtms.v2i64.v2f64")] + fn vcvtmq_s64_f64_(a: float64x2_t) -> int64x2_t; + } + vcvtmq_s64_f64_(a) +} + +/// Floating-point convert to signed integer, rounding toward plus infinity +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fcvtps))] +pub unsafe fn vcvtp_s32_f32(a: float32x2_t) -> int32x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtps.v2i32.v2f32")] + fn vcvtp_s32_f32_(a: float32x2_t) -> int32x2_t; + } + vcvtp_s32_f32_(a) +} + +/// Floating-point convert to signed integer, rounding toward plus infinity +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fcvtps))] +pub unsafe fn vcvtpq_s32_f32(a: float32x4_t) -> int32x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtps.v4i32.v4f32")] + fn vcvtpq_s32_f32_(a: float32x4_t) -> int32x4_t; + } + vcvtpq_s32_f32_(a) +} + +/// Floating-point convert to signed integer, rounding toward plus infinity +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fcvtps))] +pub unsafe fn vcvtp_s64_f64(a: float64x1_t) -> int64x1_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtps.v1i64.v1f64")] + fn vcvtp_s64_f64_(a: float64x1_t) -> int64x1_t; + } + vcvtp_s64_f64_(a) +} + +/// Floating-point convert to signed integer, rounding toward plus infinity +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fcvtps))] +pub unsafe fn vcvtpq_s64_f64(a: float64x2_t) -> int64x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtps.v2i64.v2f64")] + fn vcvtpq_s64_f64_(a: float64x2_t) -> int64x2_t; + } + vcvtpq_s64_f64_(a) +} + +/// Floating-point convert to unsigned integer, rounding to nearest with ties to away +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fcvtau))] +pub unsafe fn vcvta_u32_f32(a: float32x2_t) -> uint32x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtau.v2i32.v2f32")] + fn vcvta_u32_f32_(a: float32x2_t) -> uint32x2_t; + } + vcvta_u32_f32_(a) +} + +/// Floating-point convert to unsigned integer, rounding to nearest with ties to away +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fcvtau))] +pub unsafe fn vcvtaq_u32_f32(a: float32x4_t) -> uint32x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtau.v4i32.v4f32")] + fn vcvtaq_u32_f32_(a: float32x4_t) -> uint32x4_t; + } + vcvtaq_u32_f32_(a) +} + +/// Floating-point convert to unsigned integer, rounding to nearest with ties to away +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fcvtau))] +pub unsafe fn vcvta_u64_f64(a: float64x1_t) -> uint64x1_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtau.v1i64.v1f64")] + fn vcvta_u64_f64_(a: float64x1_t) -> uint64x1_t; + } + vcvta_u64_f64_(a) +} + +/// Floating-point convert to unsigned integer, rounding to nearest with ties to away +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fcvtau))] +pub unsafe fn vcvtaq_u64_f64(a: float64x2_t) -> uint64x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtau.v2i64.v2f64")] + fn vcvtaq_u64_f64_(a: float64x2_t) -> uint64x2_t; + } + vcvtaq_u64_f64_(a) +} + +/// Floating-point convert to unsigned integer, rounding to nearest with ties to even +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fcvtnu))] +pub unsafe fn vcvtn_u32_f32(a: float32x2_t) -> uint32x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtnu.v2i32.v2f32")] + fn vcvtn_u32_f32_(a: float32x2_t) -> uint32x2_t; + } + vcvtn_u32_f32_(a) +} + +/// Floating-point convert to unsigned integer, rounding to nearest with ties to even +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fcvtnu))] +pub unsafe fn vcvtnq_u32_f32(a: float32x4_t) -> uint32x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtnu.v4i32.v4f32")] + fn vcvtnq_u32_f32_(a: float32x4_t) -> uint32x4_t; + } + vcvtnq_u32_f32_(a) +} + +/// Floating-point convert to unsigned integer, rounding to nearest with ties to even +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fcvtnu))] +pub unsafe fn vcvtn_u64_f64(a: float64x1_t) -> uint64x1_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtnu.v1i64.v1f64")] + fn vcvtn_u64_f64_(a: float64x1_t) -> uint64x1_t; + } + vcvtn_u64_f64_(a) +} + +/// Floating-point convert to unsigned integer, rounding to nearest with ties to even +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fcvtnu))] +pub unsafe fn vcvtnq_u64_f64(a: float64x2_t) -> uint64x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtnu.v2i64.v2f64")] + fn vcvtnq_u64_f64_(a: float64x2_t) -> uint64x2_t; + } + vcvtnq_u64_f64_(a) +} + +/// Floating-point convert to unsigned integer, rounding toward minus infinity +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fcvtmu))] +pub unsafe fn vcvtm_u32_f32(a: float32x2_t) -> uint32x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtmu.v2i32.v2f32")] + fn vcvtm_u32_f32_(a: float32x2_t) -> uint32x2_t; + } + vcvtm_u32_f32_(a) +} + +/// Floating-point convert to unsigned integer, rounding toward minus infinity +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fcvtmu))] +pub unsafe fn vcvtmq_u32_f32(a: float32x4_t) -> uint32x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtmu.v4i32.v4f32")] + fn vcvtmq_u32_f32_(a: float32x4_t) -> uint32x4_t; + } + vcvtmq_u32_f32_(a) +} + +/// Floating-point convert to unsigned integer, rounding toward minus infinity +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fcvtmu))] +pub unsafe fn vcvtm_u64_f64(a: float64x1_t) -> uint64x1_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtmu.v1i64.v1f64")] + fn vcvtm_u64_f64_(a: float64x1_t) -> uint64x1_t; + } + vcvtm_u64_f64_(a) +} + +/// Floating-point convert to unsigned integer, rounding toward minus infinity +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fcvtmu))] +pub unsafe fn vcvtmq_u64_f64(a: float64x2_t) -> uint64x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtmu.v2i64.v2f64")] + fn vcvtmq_u64_f64_(a: float64x2_t) -> uint64x2_t; + } + vcvtmq_u64_f64_(a) +} + +/// Floating-point convert to unsigned integer, rounding toward plus infinity +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fcvtpu))] +pub unsafe fn vcvtp_u32_f32(a: float32x2_t) -> uint32x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtpu.v2i32.v2f32")] + fn vcvtp_u32_f32_(a: float32x2_t) -> uint32x2_t; + } + vcvtp_u32_f32_(a) +} + +/// Floating-point convert to unsigned integer, rounding toward plus infinity +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fcvtpu))] +pub unsafe fn vcvtpq_u32_f32(a: float32x4_t) -> uint32x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtpu.v4i32.v4f32")] + fn vcvtpq_u32_f32_(a: float32x4_t) -> uint32x4_t; + } + vcvtpq_u32_f32_(a) +} + +/// Floating-point convert to unsigned integer, rounding toward plus infinity +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fcvtpu))] +pub unsafe fn vcvtp_u64_f64(a: float64x1_t) -> uint64x1_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtpu.v1i64.v1f64")] + fn vcvtp_u64_f64_(a: float64x1_t) -> uint64x1_t; + } + vcvtp_u64_f64_(a) +} + +/// Floating-point convert to unsigned integer, rounding toward plus infinity +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fcvtpu))] +pub unsafe fn vcvtpq_u64_f64(a: float64x2_t) -> uint64x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtpu.v2i64.v2f64")] + fn vcvtpq_u64_f64_(a: float64x2_t) -> uint64x2_t; + } + vcvtpq_u64_f64_(a) +} + /// Multiply #[inline] #[target_feature(enable = "neon")] @@ -2470,6 +2918,294 @@ mod test { assert_eq!(r, e); } + #[simd_test(enable = "neon")] + unsafe fn test_vcvt_s64_f64() { + let a: f64 = -1.1; + let e: i64x1 = i64x1::new(-1); + let r: i64x1 = transmute(vcvt_s64_f64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcvtq_s64_f64() { + let a: f64x2 = f64x2::new(-1.1, 2.1); + let e: i64x2 = i64x2::new(-1, 2); + let r: i64x2 = transmute(vcvtq_s64_f64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcvt_u64_f64() { + let a: f64 = 1.1; + let e: u64x1 = u64x1::new(1); + let r: u64x1 = transmute(vcvt_u64_f64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcvtq_u64_f64() { + let a: f64x2 = f64x2::new(1.1, 2.1); + let e: u64x2 = u64x2::new(1, 2); + let r: u64x2 = transmute(vcvtq_u64_f64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcvta_s32_f32() { + let a: f32x2 = f32x2::new(-1.1, 2.1); + let e: i32x2 = i32x2::new(-1, 2); + let r: i32x2 = transmute(vcvta_s32_f32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcvtaq_s32_f32() { + let a: f32x4 = f32x4::new(-1.1, 2.1, -2.9, 3.9); + let e: i32x4 = i32x4::new(-1, 2, -3, 4); + let r: i32x4 = transmute(vcvtaq_s32_f32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcvta_s64_f64() { + let a: f64 = -1.1; + let e: i64x1 = i64x1::new(-1); + let r: i64x1 = transmute(vcvta_s64_f64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcvtaq_s64_f64() { + let a: f64x2 = f64x2::new(-1.1, 2.1); + let e: i64x2 = i64x2::new(-1, 2); + let r: i64x2 = transmute(vcvtaq_s64_f64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcvtn_s32_f32() { + let a: f32x2 = f32x2::new(-1.5, 2.1); + let e: i32x2 = i32x2::new(-2, 2); + let r: i32x2 = transmute(vcvtn_s32_f32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcvtnq_s32_f32() { + let a: f32x4 = f32x4::new(-1.5, 2.1, -2.9, 3.9); + let e: i32x4 = i32x4::new(-2, 2, -3, 4); + let r: i32x4 = transmute(vcvtnq_s32_f32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcvtn_s64_f64() { + let a: f64 = -1.5; + let e: i64x1 = i64x1::new(-2); + let r: i64x1 = transmute(vcvtn_s64_f64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcvtnq_s64_f64() { + let a: f64x2 = f64x2::new(-1.5, 2.1); + let e: i64x2 = i64x2::new(-2, 2); + let r: i64x2 = transmute(vcvtnq_s64_f64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcvtm_s32_f32() { + let a: f32x2 = f32x2::new(-1.1, 2.1); + let e: i32x2 = i32x2::new(-2, 2); + let r: i32x2 = transmute(vcvtm_s32_f32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcvtmq_s32_f32() { + let a: f32x4 = f32x4::new(-1.1, 2.1, -2.9, 3.9); + let e: i32x4 = i32x4::new(-2, 2, -3, 3); + let r: i32x4 = transmute(vcvtmq_s32_f32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcvtm_s64_f64() { + let a: f64 = -1.1; + let e: i64x1 = i64x1::new(-2); + let r: i64x1 = transmute(vcvtm_s64_f64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcvtmq_s64_f64() { + let a: f64x2 = f64x2::new(-1.1, 2.1); + let e: i64x2 = i64x2::new(-2, 2); + let r: i64x2 = transmute(vcvtmq_s64_f64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcvtp_s32_f32() { + let a: f32x2 = f32x2::new(-1.1, 2.1); + let e: i32x2 = i32x2::new(-1, 3); + let r: i32x2 = transmute(vcvtp_s32_f32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcvtpq_s32_f32() { + let a: f32x4 = f32x4::new(-1.1, 2.1, -2.9, 3.9); + let e: i32x4 = i32x4::new(-1, 3, -2, 4); + let r: i32x4 = transmute(vcvtpq_s32_f32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcvtp_s64_f64() { + let a: f64 = -1.1; + let e: i64x1 = i64x1::new(-1); + let r: i64x1 = transmute(vcvtp_s64_f64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcvtpq_s64_f64() { + let a: f64x2 = f64x2::new(-1.1, 2.1); + let e: i64x2 = i64x2::new(-1, 3); + let r: i64x2 = transmute(vcvtpq_s64_f64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcvta_u32_f32() { + let a: f32x2 = f32x2::new(1.1, 2.1); + let e: u32x2 = u32x2::new(1, 2); + let r: u32x2 = transmute(vcvta_u32_f32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcvtaq_u32_f32() { + let a: f32x4 = f32x4::new(1.1, 2.1, 2.9, 3.9); + let e: u32x4 = u32x4::new(1, 2, 3, 4); + let r: u32x4 = transmute(vcvtaq_u32_f32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcvta_u64_f64() { + let a: f64 = 1.1; + let e: u64x1 = u64x1::new(1); + let r: u64x1 = transmute(vcvta_u64_f64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcvtaq_u64_f64() { + let a: f64x2 = f64x2::new(1.1, 2.1); + let e: u64x2 = u64x2::new(1, 2); + let r: u64x2 = transmute(vcvtaq_u64_f64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcvtn_u32_f32() { + let a: f32x2 = f32x2::new(1.5, 2.1); + let e: u32x2 = u32x2::new(2, 2); + let r: u32x2 = transmute(vcvtn_u32_f32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcvtnq_u32_f32() { + let a: f32x4 = f32x4::new(1.5, 2.1, 2.9, 3.9); + let e: u32x4 = u32x4::new(2, 2, 3, 4); + let r: u32x4 = transmute(vcvtnq_u32_f32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcvtn_u64_f64() { + let a: f64 = 1.5; + let e: u64x1 = u64x1::new(2); + let r: u64x1 = transmute(vcvtn_u64_f64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcvtnq_u64_f64() { + let a: f64x2 = f64x2::new(1.5, 2.1); + let e: u64x2 = u64x2::new(2, 2); + let r: u64x2 = transmute(vcvtnq_u64_f64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcvtm_u32_f32() { + let a: f32x2 = f32x2::new(1.1, 2.1); + let e: u32x2 = u32x2::new(1, 2); + let r: u32x2 = transmute(vcvtm_u32_f32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcvtmq_u32_f32() { + let a: f32x4 = f32x4::new(1.1, 2.1, 2.9, 3.9); + let e: u32x4 = u32x4::new(1, 2, 2, 3); + let r: u32x4 = transmute(vcvtmq_u32_f32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcvtm_u64_f64() { + let a: f64 = 1.1; + let e: u64x1 = u64x1::new(1); + let r: u64x1 = transmute(vcvtm_u64_f64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcvtmq_u64_f64() { + let a: f64x2 = f64x2::new(1.1, 2.1); + let e: u64x2 = u64x2::new(1, 2); + let r: u64x2 = transmute(vcvtmq_u64_f64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcvtp_u32_f32() { + let a: f32x2 = f32x2::new(1.1, 2.1); + let e: u32x2 = u32x2::new(2, 3); + let r: u32x2 = transmute(vcvtp_u32_f32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcvtpq_u32_f32() { + let a: f32x4 = f32x4::new(1.1, 2.1, 2.9, 3.9); + let e: u32x4 = u32x4::new(2, 3, 3, 4); + let r: u32x4 = transmute(vcvtpq_u32_f32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcvtp_u64_f64() { + let a: f64 = 1.1; + let e: u64x1 = u64x1::new(2); + let r: u64x1 = transmute(vcvtp_u64_f64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcvtpq_u64_f64() { + let a: f64x2 = f64x2::new(1.1, 2.1); + let e: u64x2 = u64x2::new(2, 3); + let r: u64x2 = transmute(vcvtpq_u64_f64(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] unsafe fn test_vmul_f64() { let a: f64 = 1.0; diff --git a/crates/core_arch/src/aarch64/neon/mod.rs b/crates/core_arch/src/aarch64/neon/mod.rs index 207d4b5ed7..652cd6e9da 100644 --- a/crates/core_arch/src/aarch64/neon/mod.rs +++ b/crates/core_arch/src/aarch64/neon/mod.rs @@ -314,11 +314,6 @@ extern "C" { c: uint8x16_t, ) -> int8x16_t; - #[link_name = "llvm.aarch64.neon.fcvtzu.v4i32.v4f32"] - fn vcvtq_u32_f32_(a: float32x4_t) -> uint32x4_t; - #[link_name = "llvm.aarch64.neon.fcvtzs.v4i32.v4f32"] - fn vcvtq_s32_f32_(a: float32x4_t) -> int32x4_t; - #[link_name = "llvm.aarch64.neon.vsli.v8i8"] fn vsli_n_s8_(a: int8x8_t, b: int8x8_t, n: i32) -> int8x8_t; #[link_name = "llvm.aarch64.neon.vsli.v16i8"] @@ -2364,21 +2359,6 @@ pub unsafe fn vqtbx4q_p8(a: poly8x16_t, t: poly8x16x4_t, idx: uint8x16_t) -> pol )) } -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(fcvtzs))] -pub unsafe fn vcvtq_s32_f32(a: float32x4_t) -> int32x4_t { - vcvtq_s32_f32_(a) -} - -/// Floating-point Convert to Unsigned fixed-point, rounding toward Zero (vector) -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(fcvtzu))] -pub unsafe fn vcvtq_u32_f32(a: float32x4_t) -> uint32x4_t { - vcvtq_u32_f32_(a) -} - /// Shift Left and Insert (immediate) #[inline] #[target_feature(enable = "neon")] @@ -2749,42 +2729,6 @@ mod tests { use std::mem::transmute; use stdarch_test::simd_test; - #[simd_test(enable = "neon")] - unsafe fn test_vcvtq_s32_f32() { - let f = f32x4::new(-1., 2., 3., 4.); - let e = i32x4::new(-1, 2, 3, 4); - let r: i32x4 = transmute(vcvtq_s32_f32(transmute(f))); - assert_eq!(r, e); - - let f = f32x4::new(10e37, 2., 3., 4.); - let e = i32x4::new(0x7fffffff, 2, 3, 4); - let r: i32x4 = transmute(vcvtq_s32_f32(transmute(f))); - assert_eq!(r, e); - - let f = f32x4::new(-10e37, 2., 3., 4.); - let e = i32x4::new(-0x80000000, 2, 3, 4); - let r: i32x4 = transmute(vcvtq_s32_f32(transmute(f))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vcvtq_u32_f32() { - let f = f32x4::new(1., 2., 3., 4.); - let e = u32x4::new(1, 2, 3, 4); - let r: u32x4 = transmute(vcvtq_u32_f32(transmute(f))); - assert_eq!(r, e); - - let f = f32x4::new(-1., 2., 3., 4.); - let e = u32x4::new(0, 2, 3, 4); - let r: u32x4 = transmute(vcvtq_u32_f32(transmute(f))); - assert_eq!(r, e); - - let f = f32x4::new(10e37, 2., 3., 4.); - let e = u32x4::new(0xffffffff, 2, 3, 4); - let r: u32x4 = transmute(vcvtq_u32_f32(transmute(f))); - assert_eq!(r, e); - } - #[simd_test(enable = "neon")] unsafe fn test_vuqadd_s8() { let a = i8x8::new(i8::MIN, -3, -2, -1, 0, 1, 2, i8::MAX); diff --git a/crates/core_arch/src/arm/neon/generated.rs b/crates/core_arch/src/arm/neon/generated.rs index e57b67adfc..e395d511ea 100644 --- a/crates/core_arch/src/arm/neon/generated.rs +++ b/crates/core_arch/src/arm/neon/generated.rs @@ -1941,6 +1941,46 @@ pub unsafe fn vcaleq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t { vcageq_f32(b, a) } +/// Floating-point convert to signed fixed-point, rounding toward zero +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fcvtzs))] +pub unsafe fn vcvt_s32_f32(a: float32x2_t) -> int32x2_t { + simd_cast(a) +} + +/// Floating-point convert to signed fixed-point, rounding toward zero +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fcvtzs))] +pub unsafe fn vcvtq_s32_f32(a: float32x4_t) -> int32x4_t { + simd_cast(a) +} + +/// Floating-point convert to unsigned fixed-point, rounding toward zero +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fcvtzu))] +pub unsafe fn vcvt_u32_f32(a: float32x2_t) -> uint32x2_t { + simd_cast(a) +} + +/// Floating-point convert to unsigned fixed-point, rounding toward zero +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fcvtzu))] +pub unsafe fn vcvtq_u32_f32(a: float32x4_t) -> uint32x4_t { + simd_cast(a) +} + /// Saturating subtract #[inline] #[target_feature(enable = "neon")] @@ -5485,6 +5525,38 @@ mod test { assert_eq!(r, e); } + #[simd_test(enable = "neon")] + unsafe fn test_vcvt_s32_f32() { + let a: f32x2 = f32x2::new(-1.1, 2.1); + let e: i32x2 = i32x2::new(-1, 2); + let r: i32x2 = transmute(vcvt_s32_f32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcvtq_s32_f32() { + let a: f32x4 = f32x4::new(-1.1, 2.1, -2.9, 3.9); + let e: i32x4 = i32x4::new(-1, 2, -2, 3); + let r: i32x4 = transmute(vcvtq_s32_f32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcvt_u32_f32() { + let a: f32x2 = f32x2::new(1.1, 2.1); + let e: u32x2 = u32x2::new(1, 2); + let r: u32x2 = transmute(vcvt_u32_f32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcvtq_u32_f32() { + let a: f32x4 = f32x4::new(1.1, 2.1, 2.9, 3.9); + let e: u32x4 = u32x4::new(1, 2, 2, 3); + let r: u32x4 = transmute(vcvtq_u32_f32(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] unsafe fn test_vqsub_u8() { let a: u8x8 = u8x8::new(42, 42, 42, 42, 42, 42, 42, 42); diff --git a/crates/stdarch-gen/neon.spec b/crates/stdarch-gen/neon.spec index a50f3956ac..a30c823186 100644 --- a/crates/stdarch-gen/neon.spec +++ b/crates/stdarch-gen/neon.spec @@ -652,6 +652,112 @@ validate -1.0, 2.0, -3.0, 4.0 aarch64 = fcvtxn generate float32x2_t:float64x2_t:float32x4_t +/// Floating-point convert to signed fixed-point, rounding toward zero +name = vcvt +double-suffixes +fn = simd_cast +a = -1.1, 2.1, -2.9, 3.9 +validate -1, 2, -2, 3 + +aarch64 = fcvtzs +generate float64x1_t:int64x1_t, float64x2_t:int64x2_t + +arm = vcvt +generate float32x2_t:int32x2_t, float32x4_t:int32x4_t + +/// Floating-point convert to unsigned fixed-point, rounding toward zero +name = vcvt +double-suffixes +fn = simd_cast +a = 1.1, 2.1, 2.9, 3.9 +validate 1, 2, 2, 3 + +aarch64 = fcvtzu +generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t + +arm = vcvt +generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t + +/// Floating-point convert to signed integer, rounding to nearest with ties to away +name = vcvta +double-suffixes +a = -1.1, 2.1, -2.9, 3.9 +validate -1, 2, -3, 4 + +aarch64 = fcvtas +link-aarch64 = fcvtas._EXT2_._EXT_ +generate float32x2_t:int32x2_t, float32x4_t:int32x4_t, float64x1_t:int64x1_t, float64x2_t:int64x2_t + +/// Floating-point convert to signed integer, rounding to nearest with ties to even +name = vcvtn +double-suffixes +a = -1.5, 2.1, -2.9, 3.9 +validate -2, 2, -3, 4 + +aarch64 = fcvtns +link-aarch64 = fcvtns._EXT2_._EXT_ +generate float32x2_t:int32x2_t, float32x4_t:int32x4_t, float64x1_t:int64x1_t, float64x2_t:int64x2_t + +/// Floating-point convert to signed integer, rounding toward minus infinity +name = vcvtm +double-suffixes +a = -1.1, 2.1, -2.9, 3.9 +validate -2, 2, -3, 3 + +aarch64 = fcvtms +link-aarch64 = fcvtms._EXT2_._EXT_ +generate float32x2_t:int32x2_t, float32x4_t:int32x4_t, float64x1_t:int64x1_t, float64x2_t:int64x2_t + +/// Floating-point convert to signed integer, rounding toward plus infinity +name = vcvtp +double-suffixes +a = -1.1, 2.1, -2.9, 3.9 +validate -1, 3, -2, 4 + +aarch64 = fcvtps +link-aarch64 = fcvtps._EXT2_._EXT_ +generate float32x2_t:int32x2_t, float32x4_t:int32x4_t, float64x1_t:int64x1_t, float64x2_t:int64x2_t + +/// Floating-point convert to unsigned integer, rounding to nearest with ties to away +name = vcvta +double-suffixes +a = 1.1, 2.1, 2.9, 3.9 +validate 1, 2, 3, 4 + +aarch64 = fcvtau +link-aarch64 = fcvtau._EXT2_._EXT_ +generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t, float64x1_t:uint64x1_t, float64x2_t:uint64x2_t + +/// Floating-point convert to unsigned integer, rounding to nearest with ties to even +name = vcvtn +double-suffixes +a = 1.5, 2.1, 2.9, 3.9 +validate 2, 2, 3, 4 + +aarch64 = fcvtnu +link-aarch64 = fcvtnu._EXT2_._EXT_ +generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t, float64x1_t:uint64x1_t, float64x2_t:uint64x2_t + +/// Floating-point convert to unsigned integer, rounding toward minus infinity +name = vcvtm +double-suffixes +a = 1.1, 2.1, 2.9, 3.9 +validate 1, 2, 2, 3 + +aarch64 = fcvtmu +link-aarch64 = fcvtmu._EXT2_._EXT_ +generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t, float64x1_t:uint64x1_t, float64x2_t:uint64x2_t + +/// Floating-point convert to unsigned integer, rounding toward plus infinity +name = vcvtp +double-suffixes +a = 1.1, 2.1, 2.9, 3.9 +validate 2, 3, 3, 4 + +aarch64 = fcvtpu +link-aarch64 = fcvtpu._EXT2_._EXT_ +generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t, float64x1_t:uint64x1_t, float64x2_t:uint64x2_t + /// Saturating subtract name = vqsub a = 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 diff --git a/crates/stdarch-gen/src/main.rs b/crates/stdarch-gen/src/main.rs index 90ec610d9b..fd4eedf8c0 100644 --- a/crates/stdarch-gen/src/main.rs +++ b/crates/stdarch-gen/src/main.rs @@ -170,6 +170,10 @@ fn type_to_double_suffixes<'a>(out_t: &'a str, in_t: &'a str) -> &'a str { ("int32x4_t", "float32x4_t") => "q_s32_f32", ("int64x1_t", "float64x1_t") => "_s64_f64", ("int64x2_t", "float64x2_t") => "q_s64_f64", + ("uint32x2_t", "float32x2_t") => "_u32_f32", + ("uint32x4_t", "float32x4_t") => "q_u32_f32", + ("uint64x1_t", "float64x1_t") => "_u64_f64", + ("uint64x2_t", "float64x2_t") => "q_u64_f64", (_, _) => panic!("unknown type: {}, {}", out_t, in_t), } }