diff --git a/src/lib.rs b/src/lib.rs index 4ccd783..d85f95e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1043,6 +1043,122 @@ struct HeaderParserConfig { ignore_invalid_headers: bool, } +// Runtime build of parse_headers_iter_uninit +#[cfg(all( + httparse_simd, + not(any( + httparse_simd_target_feature_sse42, + httparse_simd_target_feature_avx2, + )), + any( + target_arch = "x86", + target_arch = "x86_64", + ), +))] +fn parse_headers_iter_uninit<'a>(headers: &mut &mut [MaybeUninit>], bytes: &mut Bytes<'a>, config: &HeaderParserConfig) -> Result { + static mut PARSE_FUNC: fn(&mut &mut [MaybeUninit>], &mut Bytes<'a>, &HeaderParserConfig) -> Result = parse_headers_setup; + + fn parse_headers_avx2<'a>(headers: &mut &mut [MaybeUninit>], bytes: &mut Bytes<'a>, config: &HeaderParserConfig) -> Result { + struct Avx2HeaderMatcher; + impl HeaderMatcher for Avx2HeaderMatcher { + #[inline(always)] + fn match_name(bytes: &mut Bytes) { + simd::avx2_match_header_name_vectored(bytes) + } + #[inline(always)] + fn match_value(bytes: &mut Bytes) { + simd::avx2_match_header_value_vectored(bytes) + } + } + + _parse_headers_iter_uninit::<'a, Avx2HeaderMatcher>(headers, bytes, config) + } + + fn parse_headers_sse42<'a>(headers: &mut &mut [MaybeUninit>], bytes: &mut Bytes<'a>, config: &HeaderParserConfig) -> Result { + struct Sse42HeaderMatcher; + impl HeaderMatcher for Sse42HeaderMatcher { + #[inline(always)] + fn match_name(bytes: &mut Bytes) { + simd::sse42_match_header_name_vectored(bytes) + } + #[inline(always)] + fn match_value(bytes: &mut Bytes) { + simd::sse42_match_header_value_vectored(bytes) + } + } + + _parse_headers_iter_uninit::<'a, Sse42HeaderMatcher>(headers, bytes, config) + } + + fn parse_headers_swar<'a>(headers: &mut &mut [MaybeUninit>], bytes: &mut Bytes<'a>, config: &HeaderParserConfig) -> Result { + struct SwarHeaderMatcher; + impl HeaderMatcher for SwarHeaderMatcher { + #[inline(always)] + fn match_name(bytes: &mut Bytes) { + simd::swar_match_header_name_vectored(bytes) + } + #[inline(always)] + fn match_value(bytes: &mut Bytes) { + simd::swar_match_header_value_vectored(bytes) + } + } + + _parse_headers_iter_uninit::<'a, SwarHeaderMatcher>(headers, bytes, config) + } + + fn parse_headers_setup(headers: &mut &mut [MaybeUninit>], bytes: &mut Bytes<'a>, config: &HeaderParserConfig) -> Result { + if is_x86_feature_detected!("avx2") { + unsafe { + PARSE_FUNC = parse_headers_avx2; + } + } else if is_x86_feature_detected!("sse4.2") { + unsafe { + PARSE_FUNC = parse_headers_sse42; + } + } else { + unsafe { + PARSE_FUNC = parse_headers_swar; + } + } + + unsafe { + PARSE_FUNC(headers, bytes, config) + } + } +} + +// Specialized build of parse_headers_iter_uninit +#[cfg(not(all( + httparse_simd, + not(any( + httparse_simd_target_feature_sse42, + httparse_simd_target_feature_avx2, + )), + any( + target_arch = "x86", + target_arch = "x86_64", + ), +)))] +fn parse_headers_iter_uninit<'a>(headers: &mut &mut [MaybeUninit>], bytes: &mut Bytes<'a>, config: &HeaderParserConfig) -> Result { + struct SimdHeaderMatcher; + impl HeaderMatcher for SimdHeaderMatcher { + #[inline(always)] + fn match_name(bytes: &mut Bytes) { + simd::match_header_name_vectored(bytes) + } + #[inline(always)] + fn match_value(bytes: &mut Bytes) { + simd::match_header_value_vectored(bytes) + } + } + _parse_headers_iter_uninit::(headers, bytes, config) +} + +trait HeaderMatcher { + fn match_name(bytes: &mut Bytes); + fn match_value(bytes: &mut Bytes); +} + /* Function which parsers headers into uninitialized buffer. * * Guarantees that it doesn't write garbage, so casting @@ -1052,11 +1168,12 @@ struct HeaderParserConfig { * Also it promises `headers` get shrunk to number of initialized headers, * so casting the other way around after calling this function is safe */ -fn parse_headers_iter_uninit<'a>( +fn _parse_headers_iter_uninit<'a, Matcher: HeaderMatcher>( headers: &mut &mut [MaybeUninit>], bytes: &mut Bytes<'a>, config: &HeaderParserConfig -) -> Result { +) -> Result +{ /* Flow of this function is pretty complex, especially with macros, * so this struct makes sure we shrink `headers` to only parsed ones. @@ -1181,7 +1298,7 @@ fn parse_headers_iter_uninit<'a>( #[allow(clippy::never_loop)] // parse header name until colon let header_name: &str = 'name: loop { - simd::match_header_name_vectored(bytes); + Matcher::match_name(bytes); let mut b = next!(bytes); // SAFETY: previously bumped by 1 with next! -> always safe. @@ -1241,7 +1358,7 @@ fn parse_headers_iter_uninit<'a>( 'value_lines: loop { // parse value till EOL - simd::match_header_value_vectored(bytes); + Matcher::match_value(bytes); let b = next!(bytes); //found_ctl diff --git a/src/simd/runtime.rs b/src/simd/runtime.rs index c523a92..71dfd15 100644 --- a/src/simd/runtime.rs +++ b/src/simd/runtime.rs @@ -1,7 +1,14 @@ -use std::sync::atomic::{AtomicU8, Ordering}; -use crate::iter::Bytes; use super::avx2; use super::sse42; +use crate::iter::Bytes; +use std::sync::atomic::{AtomicU8, Ordering}; + +pub use self::avx2::match_header_name_vectored as avx2_match_header_name_vectored; +pub use self::avx2::match_header_value_vectored as avx2_match_header_value_vectored; +pub use self::sse42::match_header_name_vectored as sse42_match_header_name_vectored; +pub use self::sse42::match_header_value_vectored as sse42_match_header_value_vectored; +pub use self::swar::match_header_name_vectored as swar_match_header_name_vectored; +pub use self::swar::match_header_value_vectored as swar_match_header_value_vectored; const AVX2: u8 = 1; const SSE42: u8 = 2; @@ -34,24 +41,41 @@ pub fn match_header_name_vectored(bytes: &mut Bytes) { super::swar::match_header_name_vectored(bytes); } +static mut MATCH_URI_VECTORED: fn(&mut Bytes) = setup_and_call_match_uri_vectored; +static mut MATCH_HEADER_VALUE_VECTORED: fn(&mut Bytes) = setup_and_call_match_header_value_vectored; + +fn setup_and_call_match_uri_vectored(bytes: &mut Bytes) { + unsafe { + let feature = get_runtime_feature(); + MATCH_URI_VECTORED = match feature { + AVX2 => avx2::match_uri_vectored, + SSE42 => sse42::match_uri_vectored, + _ /* NOP */ => super::swar::match_uri_vectored, + }; + MATCH_URI_VECTORED(bytes); + } +} + +fn setup_and_call_match_header_value_vectored(bytes: &mut Bytes) { + unsafe { + let feature = get_runtime_feature(); + MATCH_HEADER_VALUE_VECTORED = match feature { + AVX2 => avx2::match_header_value_vectored, + SSE42 => sse42::match_header_value_vectored, + _ /* NOP */ => super::swar::match_header_value_vectored, + }; + MATCH_HEADER_VALUE_VECTORED(bytes); + } +} + pub fn match_uri_vectored(bytes: &mut Bytes) { - // SAFETY: calls are guarded by a feature check unsafe { - match get_runtime_feature() { - AVX2 => avx2::match_uri_vectored(bytes), - SSE42 => sse42::match_uri_vectored(bytes), - _ /* NOP */ => super::swar::match_uri_vectored(bytes), - } + MATCH_URI_VECTORED(bytes); } } pub fn match_header_value_vectored(bytes: &mut Bytes) { - // SAFETY: calls are guarded by a feature check unsafe { - match get_runtime_feature() { - AVX2 => avx2::match_header_value_vectored(bytes), - SSE42 => sse42::match_header_value_vectored(bytes), - _ /* NOP */ => super::swar::match_header_value_vectored(bytes), - } + MATCH_HEADER_VALUE_VECTORED(bytes); } }