Skip to content

Commit 507cef8

Browse files
committed
Refactor avx512f: mask operations
1 parent b348352 commit 507cef8

File tree

2 files changed

+198
-10
lines changed

2 files changed

+198
-10
lines changed

crates/core_arch/missing-x86.md

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -149,19 +149,10 @@
149149

150150
<details><summary>["AVX512F"]</summary><p>
151151

152-
* [ ] [`_cvtmask16_u32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_cvtmask16_u32)
153-
* [ ] [`_cvtu32_mask16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_cvtu32_mask16)
154-
* [ ] [`_kortest_mask16_u8`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_kortest_mask16_u8)
155-
* [ ] [`_kortestc_mask16_u8`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_kortestc_mask16_u8)
156-
* [ ] [`_kortestz_mask16_u8`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_kortestz_mask16_u8)
157-
* [ ] [`_kshiftli_mask16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_kshiftli_mask16)
158-
* [ ] [`_kshiftri_mask16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_kshiftri_mask16)
159-
* [ ] [`_load_mask16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_load_mask16)
160152
* [ ] [`_mm512_i32logather_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32logather_epi64)
161153
* [ ] [`_mm512_i32logather_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32logather_pd)
162154
* [ ] [`_mm512_i32loscatter_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32loscatter_epi64)
163155
* [ ] [`_mm512_i32loscatter_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32loscatter_pd)
164-
* [ ] [`_mm512_kortestz`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_kortestz)
165156
* [ ] [`_mm512_mask_i32logather_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32logather_epi64)
166157
* [ ] [`_mm512_mask_i32logather_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32logather_pd)
167158
* [ ] [`_mm512_mask_i32loscatter_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32loscatter_epi64)
@@ -173,7 +164,6 @@
173164
* [ ] [`_mm_mask_store_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_store_ss)
174165
* [ ] [`_mm_maskz_load_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_load_sd)
175166
* [ ] [`_mm_maskz_load_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_load_ss)
176-
* [ ] [`_store_mask16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_store_mask16)
177167
</p></details>
178168

179169

crates/core_arch/src/x86/avx512f.rs

Lines changed: 198 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27279,6 +27279,26 @@ pub unsafe fn _mm512_andnot_si512(a: __m512i, b: __m512i) -> __m512i {
2727927279
_mm512_and_epi64(_mm512_xor_epi64(a, _mm512_set1_epi64(u64::MAX as i64)), b)
2728027280
}
2728127281

27282+
/// Convert 16-bit mask a into an integer value, and store the result in dst.
27283+
///
27284+
/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_cvtmask16_u32)
27285+
#[inline]
27286+
#[target_feature(enable = "avx512f")]
27287+
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27288+
pub unsafe fn _cvtmask16_u32(a: __mmask16) -> u32 {
27289+
a as u32
27290+
}
27291+
27292+
/// Convert 32-bit integer value a to an 16-bit mask and store the result in dst.
27293+
///
27294+
/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_cvtu32_mask16)
27295+
#[inline]
27296+
#[target_feature(enable = "avx512f")]
27297+
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27298+
pub unsafe fn _cvtu32_mask16(a: u32) -> __mmask16 {
27299+
a as __mmask16
27300+
}
27301+
2728227302
/// Compute the bitwise AND of 16-bit masks a and b, and store the result in k.
2728327303
///
2728427304
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kand_mask16&expand=3212)
@@ -27409,6 +27429,83 @@ pub unsafe fn _mm512_kxnor(a: __mmask16, b: __mmask16) -> __mmask16 {
2740927429
_mm512_knot(_mm512_kxor(a, b))
2741027430
}
2741127431

27432+
/// Compute the bitwise OR of 16-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
27433+
/// store 0 in dst. If the result is all ones, store 1 in all_ones, otherwise store 0 in all_ones.
27434+
///
27435+
/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortest_mask16_u8)
27436+
#[inline]
27437+
#[target_feature(enable = "avx512f")]
27438+
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27439+
pub unsafe fn _kortest_mask16_u8(a: __mmask16, b: __mmask16, all_ones: *mut u8) -> u8 {
27440+
let tmp = _kor_mask16(a, b);
27441+
*all_ones = (tmp == 0xff) as u8;
27442+
(tmp == 0) as u8
27443+
}
27444+
27445+
/// Compute the bitwise OR of 16-bit masks a and b. If the result is all ones, store 1 in dst, otherwise
27446+
/// store 0 in dst.
27447+
///
27448+
/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestc_mask16_u8)
27449+
#[inline]
27450+
#[target_feature(enable = "avx512f")]
27451+
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27452+
pub unsafe fn _kortestc_mask16_u8(a: __mmask16, b: __mmask16) -> u8 {
27453+
(_kor_mask16(a, b) == 0xff) as u8
27454+
}
27455+
27456+
/// Compute the bitwise OR of 16-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
27457+
/// store 0 in dst.
27458+
///
27459+
/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestz_mask16_u8)
27460+
#[inline]
27461+
#[target_feature(enable = "avx512f")]
27462+
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27463+
pub unsafe fn _kortestz_mask16_u8(a: __mmask16, b: __mmask16) -> u8 {
27464+
(_kor_mask16(a, b) == 0) as u8
27465+
}
27466+
27467+
/// Shift 16-bit mask a left by count bits while shifting in zeros, and store the result in dst.
27468+
///
27469+
/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftli_mask16)
27470+
#[inline]
27471+
#[target_feature(enable = "avx512f")]
27472+
#[rustc_legacy_const_generics(1)]
27473+
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27474+
pub unsafe fn _kshiftli_mask16<const COUNT: u32>(a: __mmask16) -> __mmask16 {
27475+
a << COUNT
27476+
}
27477+
27478+
/// Shift 16-bit mask a right by count bits while shifting in zeros, and store the result in dst.
27479+
///
27480+
/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftri_mask16)
27481+
#[inline]
27482+
#[target_feature(enable = "avx512f")]
27483+
#[rustc_legacy_const_generics(1)]
27484+
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27485+
pub unsafe fn _kshiftri_mask16<const COUNT: u32>(a: __mmask16) -> __mmask16 {
27486+
a >> COUNT
27487+
}
27488+
27489+
/// Load 16-bit mask from memory
27490+
///
27491+
/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_load_mask16)
27492+
#[inline]
27493+
#[target_feature(enable = "avx512f")]
27494+
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27495+
pub unsafe fn _load_mask16(mem_addr: *const __mmask16) -> __mmask16 {
27496+
*mem_addr
27497+
}
27498+
27499+
/// Store 16-bit mask to memory
27500+
///
27501+
/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_store_mask16)
27502+
#[inline]
27503+
#[target_feature(enable = "avx512f")]
27504+
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27505+
pub unsafe fn _store_mask16(mem_addr: *mut __mmask16, a: __mmask16) {
27506+
*mem_addr = a;
27507+
}
27508+
2741227509
/// Copy 16-bit mask a to k.
2741327510
///
2741427511
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm512_kmov&expand=3228)
@@ -27468,6 +27565,22 @@ pub unsafe fn _mm512_kortestc(a: __mmask16, b: __mmask16) -> i32 {
2746827565
}
2746927566
}
2747027567

27568+
/// Performs bitwise OR between k1 and k2, storing the result in dst. ZF flag is set if dst is 0.
27569+
///
27570+
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_kortestz)
27571+
#[inline]
27572+
#[target_feature(enable = "avx512f")]
27573+
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27574+
#[cfg_attr(test, assert_instr(xor))] // generate normal and code instead of kortestw
27575+
pub unsafe fn _mm512_kortestz(a: __mmask16, b: __mmask16) -> i32 {
27576+
let r = a | b;
27577+
if r == 0 {
27578+
1
27579+
} else {
27580+
0
27581+
}
27582+
}
27583+
2747127584
/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
2747227585
///
2747327586
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_test_epi32_mask&expand=5890)
@@ -54084,6 +54197,22 @@ mod tests {
5408454197
assert_eq_m128i(r, e);
5408554198
}
5408654199

54200+
#[simd_test(enable = "avx512f")]
54201+
unsafe fn test_cvtmask16_u32() {
54202+
let a: __mmask16 = 0b11001100_00110011;
54203+
let r = _cvtmask16_u32(a);
54204+
let e: u32 = 0b11001100_00110011;
54205+
assert_eq!(r, e);
54206+
}
54207+
54208+
#[simd_test(enable = "avx512f")]
54209+
unsafe fn test_cvtu32_mask16() {
54210+
let a: u32 = 0b11001100_00110011;
54211+
let r = _cvtu32_mask16(a);
54212+
let e: __mmask16 = 0b11001100_00110011;
54213+
assert_eq!(r, e);
54214+
}
54215+
5408754216
#[simd_test(enable = "avx512f")]
5408854217
unsafe fn test_mm512_kand() {
5408954218
let a: u16 = 0b11001100_00110011;
@@ -54190,6 +54319,65 @@ mod tests {
5419054319
assert_eq!(r, e);
5419154320
}
5419254321

54322+
#[simd_test(enable = "avx512dq")]
54323+
unsafe fn test_kortest_mask16_u8() {
54324+
let a: __mmask16 = 0b0110100101101001;
54325+
let b: __mmask16 = 0b1011011010110110;
54326+
let mut all_ones: u8 = 0;
54327+
let r = _kortest_mask16_u8(a, b, &mut all_ones);
54328+
assert_eq!(r, 0);
54329+
assert_eq!(all_ones, 1);
54330+
}
54331+
54332+
#[simd_test(enable = "avx512dq")]
54333+
unsafe fn test_kortestc_mask16_u8() {
54334+
let a: __mmask16 = 0b0110100101101001;
54335+
let b: __mmask16 = 0b1011011010110110;
54336+
let r = _kortestc_mask16_u8(a, b);
54337+
assert_eq!(r, 1);
54338+
}
54339+
54340+
#[simd_test(enable = "avx512dq")]
54341+
unsafe fn test_kortestz_mask16_u8() {
54342+
let a: __mmask16 = 0b0110100101101001;
54343+
let b: __mmask16 = 0b1011011010110110;
54344+
let r = _kortestz_mask16_u8(a, b);
54345+
assert_eq!(r, 0);
54346+
}
54347+
54348+
#[simd_test(enable = "avx512dq")]
54349+
unsafe fn test_kshiftli_mask16() {
54350+
let a: __mmask16 = 0b1001011011000011;
54351+
let r = _kshiftli_mask16::<3>(a);
54352+
let e: __mmask16 = 0b1011011000011000;
54353+
assert_eq!(r, e);
54354+
}
54355+
54356+
#[simd_test(enable = "avx512dq")]
54357+
unsafe fn test_kshiftri_mask16() {
54358+
let a: __mmask16 = 0b0110100100111100;
54359+
let r = _kshiftri_mask16::<3>(a);
54360+
let e: __mmask16 = 0b0000110100100111;
54361+
assert_eq!(r, e);
54362+
}
54363+
54364+
#[simd_test(enable = "avx512f")]
54365+
unsafe fn test_load_mask16() {
54366+
let a: __mmask16 = 0b1001011011000011;
54367+
let r = _load_mask16(&a);
54368+
let e: __mmask16 = 0b1001011011000011;
54369+
assert_eq!(r, e);
54370+
}
54371+
54372+
#[simd_test(enable = "avx512f")]
54373+
unsafe fn test_store_mask16() {
54374+
let a: __mmask16 = 0b0110100100111100;
54375+
let mut r = 0;
54376+
_store_mask16(&mut r, a);
54377+
let e: __mmask16 = 0b0110100100111100;
54378+
assert_eq!(r, e);
54379+
}
54380+
5419354381
#[simd_test(enable = "avx512f")]
5419454382
unsafe fn test_mm512_kmov() {
5419554383
let a: u16 = 0b11001100_00110011;
@@ -54234,6 +54422,16 @@ mod tests {
5423454422
assert_eq!(r, 1);
5423554423
}
5423654424

54425+
#[simd_test(enable = "avx512f")]
54426+
unsafe fn test_mm512_kortestz() {
54427+
let a: u16 = 0b11001100_00110011;
54428+
let b: u16 = 0b00101110_00001011;
54429+
let r = _mm512_kortestz(a, b);
54430+
assert_eq!(r, 0);
54431+
let r = _mm512_kortestz(0, 0);
54432+
assert_eq!(r, 1);
54433+
}
54434+
5423754435
#[simd_test(enable = "avx512f")]
5423854436
unsafe fn test_mm512_test_epi32_mask() {
5423954437
let a = _mm512_set1_epi32(1 << 0);

0 commit comments

Comments
 (0)