diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index cadfc38300..7e4b33b2f0 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -77,7 +77,7 @@ jobs: - mips64-unknown-linux-gnuabi64 - mips64el-unknown-linux-gnuabi64 - s390x-unknown-linux-gnu - - wasm32-wasi + #- wasm32-wasi - i586-unknown-linux-gnu - x86_64-linux-android - arm-linux-androideabi @@ -129,8 +129,8 @@ jobs: disable_assert_instr: true - target: s390x-unknown-linux-gnu os: ubuntu-latest - - target: wasm32-wasi - os: ubuntu-latest + #- target: wasm32-wasi + # os: ubuntu-latest - target: aarch64-unknown-linux-gnu os: ubuntu-latest - target: x86_64-apple-darwin diff --git a/crates/core_arch/src/aarch64/neon/mod.rs b/crates/core_arch/src/aarch64/neon/mod.rs index 66da36463a..0c73e5935d 100644 --- a/crates/core_arch/src/aarch64/neon/mod.rs +++ b/crates/core_arch/src/aarch64/neon/mod.rs @@ -82,8 +82,41 @@ extern "C" { #[link_name = "llvm.aarch64.neon.pmull64"] fn vmull_p64_(a: i64, b: i64) -> int8x16_t; + #[link_name = "llvm.aarch64.neon.addp.v8i16"] + fn vpaddq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t; + #[link_name = "llvm.aarch64.neon.addp.v4i32"] + fn vpaddq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t; #[link_name = "llvm.aarch64.neon.addp.v16i8"] - fn vpaddq_u8_(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; + fn vpaddq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t; + + #[link_name = "llvm.aarch64.neon.saddv.i32.v4i16"] + fn vaddv_s16_(a: int16x4_t) -> i16; + #[link_name = "llvm.aarch64.neon.saddv.i32.v2i32"] + fn vaddv_s32_(a: int32x2_t) -> i32; + #[link_name = "llvm.aarch64.neon.saddv.i32.v8i8"] + fn vaddv_s8_(a: int8x8_t) -> i8; + #[link_name = "llvm.aarch64.neon.uaddv.i32.v4i16"] + fn vaddv_u16_(a: uint16x4_t) -> u16; + #[link_name = "llvm.aarch64.neon.uaddv.i32.v2i32"] + fn vaddv_u32_(a: uint32x2_t) -> u32; + #[link_name = "llvm.aarch64.neon.uaddv.i32.v8i8"] + fn vaddv_u8_(a: uint8x8_t) -> u8; + #[link_name = "llvm.aarch64.neon.saddv.i32.v8i16"] + fn vaddvq_s16_(a: int16x8_t) -> i16; + #[link_name = "llvm.aarch64.neon.saddv.i32.v4i32"] + fn vaddvq_s32_(a: int32x4_t) -> i32; + #[link_name = "llvm.aarch64.neon.saddv.i32.v16i8"] + fn vaddvq_s8_(a: int8x16_t) -> i8; + #[link_name = "llvm.aarch64.neon.uaddv.i32.v8i16"] + fn vaddvq_u16_(a: uint16x8_t) -> u16; + #[link_name = "llvm.aarch64.neon.uaddv.i32.v4i32"] + fn vaddvq_u32_(a: uint32x4_t) -> u32; + #[link_name = "llvm.aarch64.neon.uaddv.i32.v16i8"] + fn vaddvq_u8_(a: uint8x16_t) -> u8; + #[link_name = "llvm.aarch64.neon.saddv.i64.v2i64"] + fn vaddvq_s64_(a: int64x2_t) -> i64; + #[link_name = "llvm.aarch64.neon.uaddv.i64.v2i64"] + fn vaddvq_u64_(a: uint64x2_t) -> u64; #[link_name = "llvm.aarch64.neon.smaxv.i8.v8i8"] fn vmaxv_s8_(a: int8x8_t) -> i8; @@ -276,12 +309,160 @@ pub unsafe fn vabsq_s64(a: int64x2_t) -> int64x2_t { vabsq_s64_(a) } +/// Add pairwise +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(addp))] +pub unsafe fn vpaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + vpaddq_s16_(a, b) +} +/// Add pairwise +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(addp))] +pub unsafe fn vpaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + transmute(vpaddq_s16_(transmute(a), transmute(b))) +} +/// Add pairwise +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(addp))] +pub unsafe fn vpaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + vpaddq_s32_(a, b) +} +/// Add pairwise +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(addp))] +pub unsafe fn vpaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + transmute(vpaddq_s32_(transmute(a), transmute(b))) +} +/// Add pairwise +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(addp))] +pub unsafe fn vpaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + vpaddq_s8_(a, b) +} /// Add pairwise #[inline] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(addp))] pub unsafe fn vpaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { - vpaddq_u8_(a, b) + transmute(vpaddq_s8_(transmute(a), transmute(b))) +} +/// Add pairwise +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(addp))] +pub unsafe fn vpaddd_s64(a: int64x2_t) -> i64 { + transmute(vaddvq_u64_(transmute(a))) +} +/// Add pairwise +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(addp))] +pub unsafe fn vpaddd_u64(a: uint64x2_t) -> u64 { + transmute(vaddvq_u64_(transmute(a))) +} + +/// Add across vector +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(addv))] +pub unsafe fn vaddv_s16(a: int16x4_t) -> i16 { + vaddv_s16_(a) +} +/// Add across vector +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(addp))] +pub unsafe fn vaddv_s32(a: int32x2_t) -> i32 { + vaddv_s32_(a) +} +/// Add across vector +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(addv))] +pub unsafe fn vaddv_s8(a: int8x8_t) -> i8 { + vaddv_s8_(a) +} +/// Add across vector +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(addv))] +pub unsafe fn vaddv_u16(a: uint16x4_t) -> u16 { + vaddv_u16_(a) +} +/// Add across vector +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(addp))] +pub unsafe fn vaddv_u32(a: uint32x2_t) -> u32 { + vaddv_u32_(a) +} +/// Add across vector +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(addv))] +pub unsafe fn vaddv_u8(a: uint8x8_t) -> u8 { + vaddv_u8_(a) +} +/// Add across vector +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(addv))] +pub unsafe fn vaddvq_s16(a: int16x8_t) -> i16 { + vaddvq_s16_(a) +} +/// Add across vector +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(addv))] +pub unsafe fn vaddvq_s32(a: int32x4_t) -> i32 { + vaddvq_s32_(a) +} +/// Add across vector +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(addv))] +pub unsafe fn vaddvq_s8(a: int8x16_t) -> i8 { + vaddvq_s8_(a) +} +/// Add across vector +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(addv))] +pub unsafe fn vaddvq_u16(a: uint16x8_t) -> u16 { + vaddvq_u16_(a) +} +/// Add across vector +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(addv))] +pub unsafe fn vaddvq_u32(a: uint32x4_t) -> u32 { + vaddvq_u32_(a) +} +/// Add across vector +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(addv))] +pub unsafe fn vaddvq_u8(a: uint8x16_t) -> u8 { + vaddvq_u8_(a) +} +/// Add across vector +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(addp))] +pub unsafe fn vaddvq_s64(a: int64x2_t) -> i64 { + vaddvq_s64_(a) +} +/// Add across vector +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(addp))] +pub unsafe fn vaddvq_u64(a: uint64x2_t) -> u64 { + vaddvq_u64_(a) } /// Polynomial multiply long @@ -308,6 +489,22 @@ pub unsafe fn vaddq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { simd_add(a, b) } +/// Vector add. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(add))] +pub unsafe fn vadd_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + simd_add(a, b) +} + +/// Vector add. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(add))] +pub unsafe fn vadd_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { + simd_add(a, b) +} + /// Vector add. #[inline] #[target_feature(enable = "neon")] @@ -1610,6 +1807,46 @@ mod tests { use std::mem::transmute; use stdarch_test::simd_test; + #[simd_test(enable = "neon")] + unsafe fn test_vpaddq_s16() { + let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b = i16x8::new(0, -1, -2, -3, -4, -5, -6, -7); + let r: i16x8 = transmute(vpaddq_s16(transmute(a), transmute(b))); + let e = i16x8::new(3, 7, 11, 15, -1, -5, -9, -13); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vpaddq_s32() { + let a = i32x4::new(1, 2, 3, 4); + let b = i32x4::new(0, -1, -2, -3); + let r: i32x4 = transmute(vpaddq_s32(transmute(a), transmute(b))); + let e = i32x4::new(3, 7, -1, -5); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vpaddq_s8() { + let a = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let b = i8x16::new(0, -1, -2, -3, -4, -5, -6, -7, -8, -8, -10, -11, -12, -13, -14, -15); + let r: i8x16 = transmute(vpaddq_s8(transmute(a), transmute(b))); + let e = i8x16::new(3, 7, 11, 15, 19, 23, 27, 31, -1, -5, -9, -13, -16, -21, -25, -29); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vpaddq_u16() { + let a = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let b = u16x8::new(17, 18, 19, 20, 20, 21, 22, 23); + let r: u16x8 = transmute(vpaddq_u16(transmute(a), transmute(b))); + let e = u16x8::new(1, 5, 9, 13, 35, 39, 41, 45); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vpaddq_u32() { + let a = u32x4::new(0, 1, 2, 3); + let b = u32x4::new(17, 18, 19, 20); + let r: u32x4 = transmute(vpaddq_u32(transmute(a), transmute(b))); + let e = u32x4::new(1, 5, 35, 39); + assert_eq!(r, e); + } #[simd_test(enable = "neon")] unsafe fn test_vpaddq_u8() { let a = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); @@ -1620,6 +1857,20 @@ mod tests { let e: i8x16 = transmute(vpaddq_u8(transmute(a), transmute(b))); assert_eq!(r, e); } + #[simd_test(enable = "neon")] + unsafe fn test_vpaddd_s64() { + let a = i64x2::new(2, -3); + let r: i64 = transmute(vpaddd_s64(transmute(a))); + let e = -1_i64; + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vpaddd_u64() { + let a = i64x2::new(2, 3); + let r: u64 = transmute(vpaddd_u64(transmute(a))); + let e = 5_u64; + assert_eq!(r, e); + } #[simd_test(enable = "neon")] unsafe fn test_vmull_p64() { @@ -1669,6 +1920,24 @@ mod tests { assert_eq!(r, e); } + #[simd_test(enable = "neon")] + unsafe fn test_vadd_s64() { + let a = 1_i64; + let b = 8_i64; + let e = 9_i64; + let r: i64 = transmute(vadd_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vadd_u64() { + let a = 1_u64; + let b = 8_u64; + let e = 9_u64; + let r: u64 = transmute(vadd_u64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] unsafe fn test_vaddd_s64() { let a = 1_i64; @@ -2560,6 +2829,105 @@ mod tests { let e = i64x2::new(i64::MIN, i64::MAX); assert_eq!(r, e); } + + #[simd_test(enable = "neon")] + unsafe fn test_vaddv_s16() { + let a = i16x4::new(1, 2, 3, -4); + let r: i16 = transmute(vaddv_s16(transmute(a))); + let e = 2_i16; + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vaddv_u16() { + let a = u16x4::new(1, 2, 3, 4); + let r: u16 = transmute(vaddv_u16(transmute(a))); + let e = 10_u16; + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vaddv_s32() { + let a = i32x2::new(1, -2); + let r: i32 = transmute(vaddv_s32(transmute(a))); + let e = -1_i32; + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vaddv_u32() { + let a = u32x2::new(1, 2); + let r: u32 = transmute(vaddv_u32(transmute(a))); + let e = 3_u32; + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vaddv_s8() { + let a = i8x8::new(1, 2, 3, 4, 5, 6, 7, -8); + let r: i8 = transmute(vaddv_s8(transmute(a))); + let e = 20_i8; + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vaddv_u8() { + let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let r: u8 = transmute(vaddv_u8(transmute(a))); + let e = 36_u8; + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vaddvq_s16() { + let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, -8); + let r: i16 = transmute(vaddvq_s16(transmute(a))); + let e = 20_i16; + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vaddvq_u16() { + let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let r: u16 = transmute(vaddvq_u16(transmute(a))); + let e = 36_u16; + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vaddvq_s32() { + let a = i32x4::new(1, 2, 3, -4); + let r: i32 = transmute(vaddvq_s32(transmute(a))); + let e = 2_i32; + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vaddvq_u32() { + let a = u32x4::new(1, 2, 3, 4); + let r: u32 = transmute(vaddvq_u32(transmute(a))); + let e = 10_u32; + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vaddvq_s8() { + let a = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, -16); + let r: i8 = transmute(vaddvq_s8(transmute(a))); + let e = 104_i8; + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vaddvq_u8() { + let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let r: u8 = transmute(vaddvq_u8(transmute(a))); + let e = 136_u8; + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vaddvq_s64() { + let a = i64x2::new(1, -2); + let r: i64 = transmute(vaddvq_s64(transmute(a))); + let e = -1_i64; + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vaddvq_u64() { + let a = u64x2::new(1, 2); + let r: u64 = transmute(vaddvq_u64(transmute(a))); + let e = 3_u64; + assert_eq!(r, e); + } } #[cfg(test)] diff --git a/crates/core_arch/src/arm/neon/mod.rs b/crates/core_arch/src/arm/neon/mod.rs index b39b1191e3..c006ea70d4 100644 --- a/crates/core_arch/src/arm/neon/mod.rs +++ b/crates/core_arch/src/arm/neon/mod.rs @@ -175,6 +175,19 @@ extern "C" { #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v2f32")] #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxp.v2f32")] fn vpmaxf_v2f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; + + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.addp.v4i16")] + fn vpadd_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.addp.v2i32")] + fn vpadd_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.addp.v8i8")] + fn vpadd_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v16i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.addp.v16i8")] + fn vpaddq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t; } #[cfg(target_arch = "arm")] @@ -261,6 +274,61 @@ pub unsafe fn vabsq_s32(a: int32x4_t) -> int32x4_t { vabsq_s32_(a) } +/// Add pairwise. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(addp))] +pub unsafe fn vpadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + vpadd_s16_(a, b) +} +/// Add pairwise. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(addp))] +pub unsafe fn vpadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + vpadd_s32_(a, b) +} +/// Add pairwise. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(addp))] +pub unsafe fn vpadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + vpadd_s8_(a,b) +} +/// Add pairwise. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(addp))] +pub unsafe fn vpadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + transmute(vpadd_s16_(transmute(a),transmute(b))) +} +/// Add pairwise. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(addp))] +pub unsafe fn vpadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + transmute(vpadd_s32_(transmute(a),transmute(b))) +} +/// Add pairwise. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(addp))] +pub unsafe fn vpadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + transmute(vpadd_s8_(transmute(a),transmute(b))) +} + /// Unsigned saturating extract narrow. #[inline] #[target_feature(enable = "neon")] @@ -4063,6 +4131,54 @@ mod tests { let e = i32x4::new(i32::MIN, i32::MAX, 0, 1); assert_eq!(r, e); } + #[simd_test(enable = "neon")] + unsafe fn test_vpadd_s16() { + let a = i16x4::new(1, 2, 3, 4); + let b = i16x4::new(0, -1, -2, -3); + let r: i16x4 = transmute(vpadd_s16(transmute(a), transmute(b))); + let e = i16x4::new(3, 7, -1, -5); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vpadd_s32() { + let a = i32x2::new(1, 2); + let b = i32x2::new(0, -1); + let r: i32x2 = transmute(vpadd_s32(transmute(a), transmute(b))); + let e = i32x2::new(3, -1); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vpadd_s8() { + let a = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b = i8x8::new(0, -1, -2, -3, -4, -5, -6, -7); + let r: i8x8 = transmute(vpadd_s8(transmute(a), transmute(b))); + let e = i8x8::new(3, 7, 11, 15, -1, -5, -9, -13); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vpadd_u16() { + let a = u16x4::new(1, 2, 3, 4); + let b = u16x4::new(30, 31, 32, 33); + let r: u16x4 = transmute(vpadd_u16(transmute(a), transmute(b))); + let e = u16x4::new(3, 7, 61, 65); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vpadd_u32() { + let a = u32x2::new(1, 2); + let b = u32x2::new(30, 31); + let r: u32x2 = transmute(vpadd_u32(transmute(a), transmute(b))); + let e = u32x2::new(3, 61); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vpadd_u8() { + let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b = u8x8::new(30, 31, 32, 33, 34, 35, 36, 37); + let r: u8x8 = transmute(vpadd_u8(transmute(a), transmute(b))); + let e = u8x8::new(3, 7, 11, 15, 61, 65, 69, 73); + assert_eq!(r, e); + } } #[cfg(test)] diff --git a/crates/stdarch-test/src/lib.rs b/crates/stdarch-test/src/lib.rs index c66b6a8d9d..03711e911e 100644 --- a/crates/stdarch-test/src/lib.rs +++ b/crates/stdarch-test/src/lib.rs @@ -3,7 +3,6 @@ //! This basically just disassembles the current executable and then parses the //! output once globally and then provides the `assert` function which makes //! assertions about the disassembly of a function. -#![feature(vec_leak)] #![allow(clippy::missing_docs_in_private_items, clippy::print_stdout)] extern crate assert_instr_macro;