@@ -21,25 +21,62 @@ pub unsafe fn match_uri_vectored(bytes: &mut Bytes) {
21
21
#[ allow( non_snake_case, overflowing_literals) ]
22
22
#[ allow( unused) ]
23
23
unsafe fn match_url_char_32_avx ( buf : & [ u8 ] ) -> usize {
24
+ // NOTE: This check might be not necessary since this function is only used in
25
+ // `match_uri_vectored` where buffer overflow is taken care of.
24
26
debug_assert ! ( buf. len( ) >= 32 ) ;
25
27
26
28
#[ cfg( target_arch = "x86" ) ]
27
29
use core:: arch:: x86:: * ;
28
30
#[ cfg( target_arch = "x86_64" ) ]
29
31
use core:: arch:: x86_64:: * ;
30
32
33
+ // pointer to buffer
31
34
let ptr = buf. as_ptr ( ) ;
32
35
33
36
// %x21-%x7e %x80-%xff
37
+ //
38
+ // Character ranges allowed by this function, can also be interpreted as:
39
+ // 33 =< (x != 127) =< 255
40
+ //
41
+ // Create a vector full of DEL (0x7f) characters.
34
42
let DEL : __m256i = _mm256_set1_epi8 ( 0x7f ) ;
43
+ // Create a vector full of exclamation mark (!) (0x21) characters.
44
+ // Used as lower threshold, characters in URLs cannot be smaller than this.
35
45
let LOW : __m256i = _mm256_set1_epi8 ( 0x21 ) ;
36
46
47
+ // Load a chunk of 32 bytes from `ptr` as a vector.
48
+ // We can check 32 bytes in parallel at most with AVX2 since
49
+ // YMM registers can only have 256 bits most.
37
50
let dat = _mm256_lddqu_si256 ( ptr as * const _ ) ;
51
+
38
52
// unsigned comparison dat >= LOW
53
+ //
54
+ // We create a new via `_mm256_max_epu8` which compares vectors `dat` and `LOW`
55
+ // and picks the max. values from each for all indices.
56
+ // So if a byte in `dat` is <= 32, it'll be represented as 33
57
+ // which is the smallest valid character.
58
+ //
59
+ // Then, we compare the new vector with `dat` for equality.
60
+ //
61
+ // `_mm256_cmpeq_epi8` returns a new vector where;
62
+ // * matching bytes are set to 0xFF (all bits set),
63
+ // * nonmatching bytes are set to 0 (no bits set).
39
64
let low = _mm256_cmpeq_epi8 ( _mm256_max_epu8 ( dat, LOW ) , dat) ;
65
+ // Similar to what we did before, but now invalid characters are set to 0xFF.
40
66
let del = _mm256_cmpeq_epi8 ( dat, DEL ) ;
67
+
68
+ // We glue the both comparisons via `_mm256_andnot_si256`.
69
+ //
70
+ // Since the representation of truthy/falsy differ in these comparisons,
71
+ // we cannot use
72
+ // we are in need of bitwise NOT to convert valid characters of `del`.
41
73
let bit = _mm256_andnot_si256 ( del, low) ;
74
+ // This creates a bitmask from the most significant bit of each byte.
75
+ // Simply, we're converting a vector value to scalar value here.
42
76
let res = _mm256_movemask_epi8 ( bit) as u32 ;
77
+
78
+ // Count trailing zeros to find the first encountered invalid character.
79
+ // Bitwise NOT is required once again to flip truthiness.
43
80
// TODO: use .trailing_ones() once MSRV >= 1.46
44
81
( !res) . trailing_zeros ( ) as usize
45
82
}
@@ -72,17 +109,38 @@ unsafe fn match_header_value_char_32_avx(buf: &[u8]) -> usize {
72
109
let ptr = buf. as_ptr ( ) ;
73
110
74
111
// %x09 %x20-%x7e %x80-%xff
112
+ // Create a vector full of horizontal tab (\t) (0x09) characters.
75
113
let TAB : __m256i = _mm256_set1_epi8 ( 0x09 ) ;
114
+ // Create a vector full of DEL (0x7f) characters.
76
115
let DEL : __m256i = _mm256_set1_epi8 ( 0x7f ) ;
116
+ // Create a vector full of space (0x20) characters.
77
117
let LOW : __m256i = _mm256_set1_epi8 ( 0x20 ) ;
78
118
119
+ // Load a chunk of 32 bytes from `ptr` as a vector.
79
120
let dat = _mm256_lddqu_si256 ( ptr as * const _ ) ;
121
+
80
122
// unsigned comparison dat >= LOW
123
+ //
124
+ // Same as what we do in `match_url_char_32_avx`.
125
+ // This time the lower threshold is set to space character though.
81
126
let low = _mm256_cmpeq_epi8 ( _mm256_max_epu8 ( dat, LOW ) , dat) ;
127
+ // Check if `dat` includes `TAB` characters.
82
128
let tab = _mm256_cmpeq_epi8 ( dat, TAB ) ;
129
+ // Check if `dat` includes `DEL` characters.
83
130
let del = _mm256_cmpeq_epi8 ( dat, DEL ) ;
131
+
132
+ // Combine all comparisons together, notice that we're also using OR
133
+ // to connect `low` and `tab` but flip bits of `del`.
134
+ //
135
+ // In the end, this is simply:
136
+ // ~del & (low | tab)
84
137
let bit = _mm256_andnot_si256 ( del, _mm256_or_si256 ( low, tab) ) ;
138
+ // This creates a bitmask from the most significant bit of each byte.
139
+ // Creates a scalar value from vector value.
85
140
let res = _mm256_movemask_epi8 ( bit) as u32 ;
141
+
142
+ // Count trailing zeros to find the first encountered invalid character.
143
+ // Bitwise NOT is required once again to flip truthiness.
86
144
// TODO: use .trailing_ones() once MSRV >= 1.46
87
145
( !res) . trailing_zeros ( ) as usize
88
146
}
0 commit comments