1
1
/// SWAR: SIMD Within A Register
2
2
/// SIMD validator backend that validates register-sized chunks of data at a time.
3
- // TODO: current impl assumes 64-bit registers, optimize for 32-bit
4
3
use crate :: { is_header_name_token, is_header_value_token, is_uri_token, Bytes } ;
5
4
5
+ // Adapt block-size to match native register size, i.e: 32bit => 4, 64bit => 8
6
+ const BLOCK_SIZE : usize = core:: mem:: size_of :: < usize > ( ) ;
7
+ type ByteBlock = [ u8 ; BLOCK_SIZE ] ;
8
+
6
9
#[ inline]
7
10
pub fn match_uri_vectored ( bytes : & mut Bytes ) {
8
11
loop {
9
- if let Some ( bytes8) = bytes. peek_n :: < [ u8 ; 8 ] > ( 8 ) {
12
+ if let Some ( bytes8) = bytes. peek_n :: < ByteBlock > ( BLOCK_SIZE ) {
10
13
let n = match_uri_char_8_swar ( bytes8) ;
11
14
unsafe {
12
15
bytes. advance ( n) ;
13
16
}
14
- if n == 8 {
17
+ if n == BLOCK_SIZE {
15
18
continue ;
16
19
}
17
20
}
@@ -28,12 +31,12 @@ pub fn match_uri_vectored(bytes: &mut Bytes) {
28
31
#[ inline]
29
32
pub fn match_header_value_vectored ( bytes : & mut Bytes ) {
30
33
loop {
31
- if let Some ( bytes8) = bytes. peek_n :: < [ u8 ; 8 ] > ( 8 ) {
34
+ if let Some ( bytes8) = bytes. peek_n :: < ByteBlock > ( BLOCK_SIZE ) {
32
35
let n = match_header_value_char_8_swar ( bytes8) ;
33
36
unsafe {
34
37
bytes. advance ( n) ;
35
38
}
36
- if n == 8 {
39
+ if n == BLOCK_SIZE {
37
40
continue ;
38
41
}
39
42
}
@@ -49,19 +52,19 @@ pub fn match_header_value_vectored(bytes: &mut Bytes) {
49
52
50
53
#[ inline]
51
54
pub fn match_header_name_vectored ( bytes : & mut Bytes ) {
52
- while let Some ( block) = bytes. peek_n :: < [ u8 ; 8 ] > ( 8 ) {
55
+ while let Some ( block) = bytes. peek_n :: < ByteBlock > ( BLOCK_SIZE ) {
53
56
let n = match_block ( is_header_name_token, block) ;
54
57
unsafe {
55
58
bytes. advance ( n) ;
56
59
}
57
- if n != 8 {
60
+ if n != BLOCK_SIZE {
58
61
return ;
59
62
}
60
63
}
61
64
unsafe { bytes. advance ( match_tail ( is_header_name_token, bytes. as_ref ( ) ) ) } ;
62
65
}
63
66
64
- // Matches "tail", i.e: when we have <8 bytes in the buffer, should be uncommon
67
+ // Matches "tail", i.e: when we have <BLOCK_SIZE bytes in the buffer, should be uncommon
65
68
#[ cold]
66
69
#[ inline]
67
70
fn match_tail ( f : impl Fn ( u8 ) -> bool , bytes : & [ u8 ] ) -> usize {
@@ -75,35 +78,35 @@ fn match_tail(f: impl Fn(u8) -> bool, bytes: &[u8]) -> usize {
75
78
76
79
// Naive fallback block matcher
77
80
#[ inline( always) ]
78
- fn match_block ( f : impl Fn ( u8 ) -> bool , block : [ u8 ; 8 ] ) -> usize {
81
+ fn match_block ( f : impl Fn ( u8 ) -> bool , block : ByteBlock ) -> usize {
79
82
for ( i, & b) in block. iter ( ) . enumerate ( ) {
80
83
if !f ( b) {
81
84
return i;
82
85
}
83
86
}
84
- 8
87
+ BLOCK_SIZE
85
88
}
86
89
87
- /// // A const alternative to u64::from_ne_bytes to avoid bumping MSRV (1.36 => 1.44)
90
+ // A const alternative to u64::from_ne_bytes to avoid bumping MSRV (1.36 => 1.44)
88
91
// creates a u64 whose bytes are each equal to b
89
- const fn uniform_block ( b : u8 ) -> u64 {
90
- b as u64 * 0x01_01_01_01_01_01_01_01 // [1_u8; 8]
92
+ const fn uniform_block ( b : u8 ) -> usize {
93
+ ( b as u64 * 0x01_01_01_01_01_01_01_01 /* [1_u8; 8] */ ) as usize
91
94
}
92
95
93
96
// A byte-wise range-check on an enire word/block,
94
97
// ensuring all bytes in the word satisfy
95
98
// `33 <= x <= 126 && x != '>' && x != '<'`
96
99
// IMPORTANT: it false negatives if the block contains '?'
97
100
#[ inline]
98
- fn match_uri_char_8_swar ( block : [ u8 ; 8 ] ) -> usize {
101
+ fn match_uri_char_8_swar ( block : ByteBlock ) -> usize {
99
102
// 33 <= x <= 126
100
103
const M : u8 = 0x21 ;
101
104
const N : u8 = 0x7E ;
102
- const BM : u64 = uniform_block ( M ) ;
103
- const BN : u64 = uniform_block ( 127 - N ) ;
104
- const M128 : u64 = uniform_block ( 128 ) ;
105
+ const BM : usize = uniform_block ( M ) ;
106
+ const BN : usize = uniform_block ( 127 - N ) ;
107
+ const M128 : usize = uniform_block ( 128 ) ;
105
108
106
- let x = u64 :: from_ne_bytes ( block) ; // Really just a transmute
109
+ let x = usize :: from_ne_bytes ( block) ; // Really just a transmute
107
110
let lt = x. wrapping_sub ( BM ) & !x; // <= m
108
111
let gt = x. wrapping_add ( BN ) | x; // >= n
109
112
@@ -130,8 +133,8 @@ fn match_uri_char_8_swar(block: [u8; 8]) -> usize {
130
133
// }
131
134
// (xordist(b'<', 2), xordist(b'>', 2))
132
135
// ```
133
- const B3 : u64 = uniform_block ( 3 ) ; // (dist <= 2) + 1 to wrap
134
- const BGT : u64 = uniform_block ( b'>' ) ;
136
+ const B3 : usize = uniform_block ( 3 ) ; // (dist <= 2) + 1 to wrap
137
+ const BGT : usize = uniform_block ( b'>' ) ;
135
138
136
139
let xgt = x ^ BGT ;
137
140
let ltgtq = xgt. wrapping_sub ( B3 ) & !xgt;
@@ -143,15 +146,15 @@ fn match_uri_char_8_swar(block: [u8; 8]) -> usize {
143
146
// ensuring all bytes in the word satisfy `32 <= x <= 126`
144
147
// IMPORTANT: false negatives if obs-text is present (0x80..=0xFF)
145
148
#[ inline]
146
- fn match_header_value_char_8_swar ( block : [ u8 ; 8 ] ) -> usize {
149
+ fn match_header_value_char_8_swar ( block : ByteBlock ) -> usize {
147
150
// 32 <= x <= 126
148
151
const M : u8 = 0x20 ;
149
152
const N : u8 = 0x7E ;
150
- const BM : u64 = uniform_block ( M ) ;
151
- const BN : u64 = uniform_block ( 127 - N ) ;
152
- const M128 : u64 = uniform_block ( 128 ) ;
153
+ const BM : usize = uniform_block ( M ) ;
154
+ const BN : usize = uniform_block ( 127 - N ) ;
155
+ const M128 : usize = uniform_block ( 128 ) ;
153
156
154
- let x = u64 :: from_ne_bytes ( block) ; // Really just a transmute
157
+ let x = usize :: from_ne_bytes ( block) ; // Really just a transmute
155
158
let lt = x. wrapping_sub ( BM ) & !x; // <= m
156
159
let gt = x. wrapping_add ( BN ) | x; // >= n
157
160
offsetnz ( ( lt | gt) & M128 )
@@ -160,10 +163,10 @@ fn match_header_value_char_8_swar(block: [u8; 8]) -> usize {
160
163
/// Check block to find offset of first non-zero byte
161
164
// NOTE: Curiously `block.trailing_zeros() >> 3` appears to be slower, maybe revisit
162
165
#[ inline]
163
- fn offsetnz ( block : u64 ) -> usize {
166
+ fn offsetnz ( block : usize ) -> usize {
164
167
// fast path optimistic case (common for long valid sequences)
165
168
if block == 0 {
166
- return 8 ;
169
+ return BLOCK_SIZE ;
167
170
}
168
171
169
172
// perf: rust will unroll this loop
@@ -177,19 +180,19 @@ fn offsetnz(block: u64) -> usize {
177
180
178
181
#[ test]
179
182
fn test_is_header_value_block ( ) {
180
- let is_header_value_block = |b| match_header_value_char_8_swar ( b) == 8 ;
183
+ let is_header_value_block = |b| match_header_value_char_8_swar ( b) == BLOCK_SIZE ;
181
184
182
185
// 0..32 => false
183
186
for b in 0 ..32_u8 {
184
- assert_eq ! ( is_header_value_block( [ b; 8 ] ) , false , "b={}" , b) ;
187
+ assert_eq ! ( is_header_value_block( [ b; BLOCK_SIZE ] ) , false , "b={}" , b) ;
185
188
}
186
189
// 32..127 => true
187
190
for b in 32 ..127_u8 {
188
- assert_eq ! ( is_header_value_block( [ b; 8 ] ) , true , "b={}" , b) ;
191
+ assert_eq ! ( is_header_value_block( [ b; BLOCK_SIZE ] ) , true , "b={}" , b) ;
189
192
}
190
193
// 127..=255 => false
191
194
for b in 127 ..=255_u8 {
192
- assert_eq ! ( is_header_value_block( [ b; 8 ] ) , false , "b={}" , b) ;
195
+ assert_eq ! ( is_header_value_block( [ b; BLOCK_SIZE ] ) , false , "b={}" , b) ;
193
196
}
194
197
195
198
// A few sanity checks on non-uniform bytes for safe-measure
@@ -199,30 +202,30 @@ fn test_is_header_value_block() {
199
202
200
203
#[ test]
201
204
fn test_is_uri_block ( ) {
202
- let is_uri_block = |b| match_uri_char_8_swar ( b) == 8 ;
205
+ let is_uri_block = |b| match_uri_char_8_swar ( b) == BLOCK_SIZE ;
203
206
204
207
// 0..33 => false
205
208
for b in 0 ..33_u8 {
206
- assert_eq ! ( is_uri_block( [ b; 8 ] ) , false , "b={}" , b) ;
209
+ assert_eq ! ( is_uri_block( [ b; BLOCK_SIZE ] ) , false , "b={}" , b) ;
207
210
}
208
211
// 33..127 => true if b not in { '<', '?', '>' }
209
212
let falsy = |b| b"<?>" . contains ( & b) ;
210
213
for b in 33 ..127_u8 {
211
- assert_eq ! ( is_uri_block( [ b; 8 ] ) , !falsy( b) , "b={}" , b) ;
214
+ assert_eq ! ( is_uri_block( [ b; BLOCK_SIZE ] ) , !falsy( b) , "b={}" , b) ;
212
215
}
213
216
// 127..=255 => false
214
217
for b in 127 ..=255_u8 {
215
- assert_eq ! ( is_uri_block( [ b; 8 ] ) , false , "b={}" , b) ;
218
+ assert_eq ! ( is_uri_block( [ b; BLOCK_SIZE ] ) , false , "b={}" , b) ;
216
219
}
217
220
}
218
221
219
222
#[ test]
220
223
fn test_offsetnz ( ) {
221
- let seq = [ 0_u8 ; 8 ] ;
222
- for i in 0 ..8 {
224
+ let seq = [ 0_u8 ; BLOCK_SIZE ] ;
225
+ for i in 0 ..BLOCK_SIZE {
223
226
let mut seq = seq. clone ( ) ;
224
227
seq[ i] = 1 ;
225
- let x = u64 :: from_ne_bytes ( seq) ;
228
+ let x = usize :: from_ne_bytes ( seq) ;
226
229
assert_eq ! ( offsetnz( x) , i) ;
227
230
}
228
231
}
0 commit comments