Skip to content

Commit 24c723a

Browse files
acfoltzerseanmonstar
authored andcommitted
Add flags to allow multiple spaces in request and status lines
These new flags set whether multiple spaces are allowed as delimiters in request lines and response status lines. The latest version of the HTTP/1.1 spec ([request lines][spec-req], [response status lines][spec-resp]) allows implementations to parse multiple whitespace characters in place of the `SP` delimiter in the response status line, including: > SP, HTAB, VT (%x0B), FF (%x0C), or bare CR This option relaxes the parsers to allow for multiple spaces, but does *not* allow the delimiters to contain the other mentioned whitespace characters. We'd rather wait for someone to have a concrete use case before deciding to support that, as allowing chars like `\r` raises serious security questions as described by the spec. Happily this seems to be a performance _improvement_ rather than a regression even when the new flags are disabled (results vs 6f6ff10): ``` req/req time: [292.13 ns 295.81 ns 301.23 ns] thrpt: [2.2261 GiB/s 2.2668 GiB/s 2.2954 GiB/s] change: time: [-13.390% -12.468% -11.159%] (p = 0.00 < 0.05) thrpt: [+12.561% +14.244% +15.460%] Performance has improved. req_short/req_short time: [62.834 ns 62.992 ns 63.188 ns] thrpt: [1.0023 GiB/s 1.0054 GiB/s 1.0079 GiB/s] change: time: [-11.112% -9.9009% -8.9416%] (p = 0.00 < 0.05) thrpt: [+9.8196% +10.989% +12.501%] Performance has improved. resp/resp time: [303.51 ns 304.23 ns 305.34 ns] thrpt: [2.1320 GiB/s 2.1398 GiB/s 2.1449 GiB/s] change: time: [-12.059% -11.512% -11.016%] (p = 0.00 < 0.05) thrpt: [+12.379% +13.010% +13.713%] Performance has improved. resp_short/resp_short time: [67.521 ns 67.686 ns 67.929 ns] thrpt: [1.2476 GiB/s 1.2521 GiB/s 1.2552 GiB/s] change: time: [-9.1562% -8.7366% -8.3331%] (p = 0.00 < 0.05) thrpt: [+9.0906% +9.5730% +10.079%] Performance has improved. ``` I haven't thrown it into godbolt or anything yet, but I suspect something about this change triggers a different inlining behavior compared to the baseline. [spec-req]: https://httpwg.org/http-core/draft-ietf-httpbis-messaging-latest.html#rfc.section.3.p.3 [spec-resp]: https://httpwg.org/http-core/draft-ietf-httpbis-messaging-latest.html#rfc.section.4.p.3
1 parent 58ebf11 commit 24c723a

File tree

1 file changed

+246
-9
lines changed

1 file changed

+246
-9
lines changed

src/lib.rs

Lines changed: 246 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,8 @@ impl<T> Status<T> {
245245
pub struct ParserConfig {
246246
allow_spaces_after_header_name_in_responses: bool,
247247
allow_obsolete_multiline_headers_in_responses: bool,
248+
allow_multiple_spaces_in_request_line_delimiters: bool,
249+
allow_multiple_spaces_in_response_status_delimiters: bool,
248250
}
249251

250252
impl ParserConfig {
@@ -257,6 +259,53 @@ impl ParserConfig {
257259
self
258260
}
259261

262+
/// Sets whether multiple spaces are allowed as delimiters in request lines.
263+
///
264+
/// # Background
265+
///
266+
/// The [latest version of the HTTP/1.1 spec][spec] allows implementations to parse multiple
267+
/// whitespace characters in place of the `SP` delimiters in the request line, including:
268+
///
269+
/// > SP, HTAB, VT (%x0B), FF (%x0C), or bare CR
270+
///
271+
/// This option relaxes the parser to allow for multiple spaces, but does *not* allow the
272+
/// request line to contain the other mentioned whitespace characters.
273+
///
274+
/// [spec]: https://httpwg.org/http-core/draft-ietf-httpbis-messaging-latest.html#rfc.section.3.p.3
275+
pub fn allow_multiple_spaces_in_request_line_delimiters(&mut self, value: bool) -> &mut Self {
276+
self.allow_multiple_spaces_in_request_line_delimiters = value;
277+
self
278+
}
279+
280+
/// Whether multiple spaces are allowed as delimiters in request lines.
281+
pub fn multiple_spaces_in_request_line_delimiters_are_allowed(&self) -> bool {
282+
self.allow_multiple_spaces_in_request_line_delimiters
283+
}
284+
285+
/// Sets whether multiple spaces are allowed as delimiters in response status lines.
286+
///
287+
/// # Background
288+
///
289+
/// The [latest version of the HTTP/1.1 spec][spec] allows implementations to parse multiple
290+
/// whitespace characters in place of the `SP` delimiters in the response status line,
291+
/// including:
292+
///
293+
/// > SP, HTAB, VT (%x0B), FF (%x0C), or bare CR
294+
///
295+
/// This option relaxes the parser to allow for multiple spaces, but does *not* allow the status
296+
/// line to contain the other mentioned whitespace characters.
297+
///
298+
/// [spec]: https://httpwg.org/http-core/draft-ietf-httpbis-messaging-latest.html#rfc.section.4.p.3
299+
pub fn allow_multiple_spaces_in_response_status_delimiters(&mut self, value: bool) -> &mut Self {
300+
self.allow_multiple_spaces_in_response_status_delimiters = value;
301+
self
302+
}
303+
304+
/// Whether multiple spaces are allowed as delimiters in response status lines.
305+
pub fn multiple_spaces_in_response_status_delimiters_are_allowed(&self) -> bool {
306+
self.allow_multiple_spaces_in_response_status_delimiters
307+
}
308+
260309
/// Sets whether obsolete multiline headers should be allowed.
261310
///
262311
/// This is an obsolete part of HTTP/1. Use at your own risk. If you are
@@ -293,6 +342,25 @@ impl ParserConfig {
293342
self.allow_obsolete_multiline_headers_in_responses
294343
}
295344

345+
/// Parses a request with the given config.
346+
pub fn parse_request<'headers, 'buf>(
347+
&self,
348+
request: &mut Request<'headers, 'buf>,
349+
buf: &'buf [u8],
350+
) -> Result<usize> {
351+
request.parse_with_config(buf, self)
352+
}
353+
354+
/// Parses a request with the given config and buffer for headers
355+
pub fn parse_request_with_uninit_headers<'headers, 'buf>(
356+
&self,
357+
request: &mut Request<'headers, 'buf>,
358+
buf: &'buf [u8],
359+
headers: &'headers mut [MaybeUninit<Header<'buf>>],
360+
) -> Result<usize> {
361+
request.parse_with_config_and_uninit_headers(buf, self, headers)
362+
}
363+
296364
/// Parses a response with the given config.
297365
pub fn parse_response<'headers, 'buf>(
298366
&self,
@@ -362,20 +430,23 @@ impl<'h, 'b> Request<'h, 'b> {
362430
}
363431
}
364432

365-
/// Try to parse a buffer of bytes into the Request,
366-
/// except use an uninitialized slice of `Header`s.
367-
///
368-
/// For more information, see `parse`
369-
pub fn parse_with_uninit_headers(
433+
fn parse_with_config_and_uninit_headers(
370434
&mut self,
371435
buf: &'b [u8],
436+
config: &ParserConfig,
372437
mut headers: &'h mut [MaybeUninit<Header<'b>>],
373438
) -> Result<usize> {
374439
let orig_len = buf.len();
375440
let mut bytes = Bytes::new(buf);
376441
complete!(skip_empty_lines(&mut bytes));
377442
self.method = Some(complete!(parse_token(&mut bytes)));
443+
if config.allow_multiple_spaces_in_request_line_delimiters {
444+
complete!(skip_spaces(&mut bytes));
445+
}
378446
self.path = Some(complete!(parse_uri(&mut bytes)));
447+
if config.allow_multiple_spaces_in_request_line_delimiters {
448+
complete!(skip_spaces(&mut bytes));
449+
}
379450
self.version = Some(complete!(parse_version(&mut bytes)));
380451
newline!(bytes);
381452

@@ -391,17 +462,26 @@ impl<'h, 'b> Request<'h, 'b> {
391462
Ok(Status::Complete(len + headers_len))
392463
}
393464

394-
/// Try to parse a buffer of bytes into the Request.
465+
/// Try to parse a buffer of bytes into the Request,
466+
/// except use an uninitialized slice of `Header`s.
395467
///
396-
/// Returns byte offset in `buf` to start of HTTP body.
397-
pub fn parse(&mut self, buf: &'b [u8]) -> Result<usize> {
468+
/// For more information, see `parse`
469+
pub fn parse_with_uninit_headers(
470+
&mut self,
471+
buf: &'b [u8],
472+
headers: &'h mut [MaybeUninit<Header<'b>>],
473+
) -> Result<usize> {
474+
self.parse_with_config_and_uninit_headers(buf, &Default::default(), headers)
475+
}
476+
477+
fn parse_with_config(&mut self, buf: &'b [u8], config: &ParserConfig) -> Result<usize> {
398478
let headers = mem::replace(&mut self.headers, &mut []);
399479

400480
/* SAFETY: see `parse_headers_iter_uninit` guarantees */
401481
unsafe {
402482
let headers: *mut [Header] = headers;
403483
let headers = headers as *mut [MaybeUninit<Header>];
404-
match self.parse_with_uninit_headers(buf, &mut *headers) {
484+
match self.parse_with_config_and_uninit_headers(buf, config, &mut *headers) {
405485
Ok(Status::Complete(idx)) => Ok(Status::Complete(idx)),
406486
other => {
407487
// put the original headers back
@@ -411,6 +491,13 @@ impl<'h, 'b> Request<'h, 'b> {
411491
}
412492
}
413493
}
494+
495+
/// Try to parse a buffer of bytes into the Request.
496+
///
497+
/// Returns byte offset in `buf` to start of HTTP body.
498+
pub fn parse(&mut self, buf: &'b [u8]) -> Result<usize> {
499+
self.parse_with_config(buf, &Default::default())
500+
}
414501
}
415502

416503
#[inline]
@@ -436,6 +523,24 @@ fn skip_empty_lines(bytes: &mut Bytes) -> Result<()> {
436523
}
437524
}
438525

526+
#[inline]
527+
fn skip_spaces(bytes: &mut Bytes) -> Result<()> {
528+
loop {
529+
let b = bytes.peek();
530+
match b {
531+
Some(b' ') => {
532+
// there's ` `, so it's safe to bump 1 pos
533+
unsafe { bytes.bump() };
534+
}
535+
Some(..) => {
536+
bytes.slice();
537+
return Ok(Status::Complete(()));
538+
}
539+
None => return Ok(Status::Partial),
540+
}
541+
}
542+
}
543+
439544
/// A parsed Response.
440545
///
441546
/// See `Request` docs for explanation of optional values.
@@ -499,6 +604,9 @@ impl<'h, 'b> Response<'h, 'b> {
499604
complete!(skip_empty_lines(&mut bytes));
500605
self.version = Some(complete!(parse_version(&mut bytes)));
501606
space!(bytes or Error::Version);
607+
if config.allow_multiple_spaces_in_response_status_delimiters {
608+
complete!(skip_spaces(&mut bytes));
609+
}
502610
self.code = Some(complete!(parse_code(&mut bytes)));
503611

504612
// RFC7230 says there must be 'SP' and then reason-phrase, but admits
@@ -512,6 +620,9 @@ impl<'h, 'b> Response<'h, 'b> {
512620
// Anything else we'll say is a malformed status.
513621
match next!(bytes) {
514622
b' ' => {
623+
if config.allow_multiple_spaces_in_response_status_delimiters {
624+
complete!(skip_spaces(&mut bytes));
625+
}
515626
bytes.slice();
516627
self.reason = Some(complete!(parse_reason(&mut bytes)));
517628
},
@@ -1662,4 +1773,130 @@ mod tests {
16621773
assert_eq!(parse_chunk_size(b"Affffffffffffffff\r\n"), Err(::InvalidChunkSize));
16631774
assert_eq!(parse_chunk_size(b"fffffffffffffffff\r\n"), Err(::InvalidChunkSize));
16641775
}
1776+
1777+
static RESPONSE_WITH_MULTIPLE_SPACE_DELIMITERS: &'static [u8] =
1778+
b"HTTP/1.1 200 OK\r\n\r\n";
1779+
1780+
#[test]
1781+
fn test_forbid_response_with_multiple_space_delimiters() {
1782+
let mut headers = [EMPTY_HEADER; NUM_OF_HEADERS];
1783+
let mut response = Response::new(&mut headers[..]);
1784+
let result = response.parse(RESPONSE_WITH_MULTIPLE_SPACE_DELIMITERS);
1785+
1786+
assert_eq!(result, Err(::Error::Status));
1787+
}
1788+
1789+
#[test]
1790+
fn test_allow_response_with_multiple_space_delimiters() {
1791+
let mut headers = [EMPTY_HEADER; NUM_OF_HEADERS];
1792+
let mut response = Response::new(&mut headers[..]);
1793+
let result = ::ParserConfig::default()
1794+
.allow_multiple_spaces_in_response_status_delimiters(true)
1795+
.parse_response(&mut response, RESPONSE_WITH_MULTIPLE_SPACE_DELIMITERS);
1796+
1797+
assert_eq!(result, Ok(Status::Complete(RESPONSE_WITH_MULTIPLE_SPACE_DELIMITERS.len())));
1798+
assert_eq!(response.version.unwrap(), 1);
1799+
assert_eq!(response.code.unwrap(), 200);
1800+
assert_eq!(response.reason.unwrap(), "OK");
1801+
assert_eq!(response.headers.len(), 0);
1802+
}
1803+
1804+
/// This is technically allowed by the spec, but we only support multiple spaces as an option,
1805+
/// not stray `\r`s.
1806+
static RESPONSE_WITH_WEIRD_WHITESPACE_DELIMITERS: &'static [u8] =
1807+
b"HTTP/1.1 200\rOK\r\n\r\n";
1808+
1809+
#[test]
1810+
fn test_forbid_response_with_weird_whitespace_delimiters() {
1811+
let mut headers = [EMPTY_HEADER; NUM_OF_HEADERS];
1812+
let mut response = Response::new(&mut headers[..]);
1813+
let result = response.parse(RESPONSE_WITH_WEIRD_WHITESPACE_DELIMITERS);
1814+
1815+
assert_eq!(result, Err(::Error::Status));
1816+
}
1817+
1818+
#[test]
1819+
fn test_still_forbid_response_with_weird_whitespace_delimiters() {
1820+
let mut headers = [EMPTY_HEADER; NUM_OF_HEADERS];
1821+
let mut response = Response::new(&mut headers[..]);
1822+
let result = ::ParserConfig::default()
1823+
.allow_multiple_spaces_in_response_status_delimiters(true)
1824+
.parse_response(&mut response, RESPONSE_WITH_WEIRD_WHITESPACE_DELIMITERS);
1825+
assert_eq!(result, Err(::Error::Status));
1826+
}
1827+
1828+
static REQUEST_WITH_MULTIPLE_SPACE_DELIMITERS: &'static [u8] =
1829+
b"GET / HTTP/1.1\r\n\r\n";
1830+
1831+
#[test]
1832+
fn test_forbid_request_with_multiple_space_delimiters() {
1833+
let mut headers = [EMPTY_HEADER; NUM_OF_HEADERS];
1834+
let mut request = Request::new(&mut headers[..]);
1835+
let result = request.parse(REQUEST_WITH_MULTIPLE_SPACE_DELIMITERS);
1836+
1837+
assert_eq!(result, Err(::Error::Token));
1838+
}
1839+
1840+
#[test]
1841+
fn test_allow_request_with_multiple_space_delimiters() {
1842+
let mut headers = [EMPTY_HEADER; NUM_OF_HEADERS];
1843+
let mut request = Request::new(&mut headers[..]);
1844+
let result = ::ParserConfig::default()
1845+
.allow_multiple_spaces_in_request_line_delimiters(true)
1846+
.parse_request(&mut request, REQUEST_WITH_MULTIPLE_SPACE_DELIMITERS);
1847+
1848+
assert_eq!(result, Ok(Status::Complete(REQUEST_WITH_MULTIPLE_SPACE_DELIMITERS.len())));
1849+
assert_eq!(request.method.unwrap(), "GET");
1850+
assert_eq!(request.path.unwrap(), "/");
1851+
assert_eq!(request.version.unwrap(), 1);
1852+
assert_eq!(request.headers.len(), 0);
1853+
}
1854+
1855+
/// This is technically allowed by the spec, but we only support multiple spaces as an option,
1856+
/// not stray `\r`s.
1857+
static REQUEST_WITH_WEIRD_WHITESPACE_DELIMITERS: &'static [u8] =
1858+
b"GET\r/\rHTTP/1.1\r\n\r\n";
1859+
1860+
#[test]
1861+
fn test_forbid_request_with_weird_whitespace_delimiters() {
1862+
let mut headers = [EMPTY_HEADER; NUM_OF_HEADERS];
1863+
let mut request = Request::new(&mut headers[..]);
1864+
let result = request.parse(REQUEST_WITH_WEIRD_WHITESPACE_DELIMITERS);
1865+
1866+
assert_eq!(result, Err(::Error::Token));
1867+
}
1868+
1869+
#[test]
1870+
fn test_still_forbid_request_with_weird_whitespace_delimiters() {
1871+
let mut headers = [EMPTY_HEADER; NUM_OF_HEADERS];
1872+
let mut request = Request::new(&mut headers[..]);
1873+
let result = ::ParserConfig::default()
1874+
.allow_multiple_spaces_in_request_line_delimiters(true)
1875+
.parse_request(&mut request, REQUEST_WITH_WEIRD_WHITESPACE_DELIMITERS);
1876+
assert_eq!(result, Err(::Error::Token));
1877+
}
1878+
1879+
static REQUEST_WITH_MULTIPLE_SPACES_AND_BAD_PATH: &'static [u8] = b"GET /foo>ohno HTTP/1.1\r\n\r\n";
1880+
1881+
#[test]
1882+
fn test_request_with_multiple_spaces_and_bad_path() {
1883+
let mut headers = [EMPTY_HEADER; NUM_OF_HEADERS];
1884+
let mut request = Request::new(&mut headers[..]);
1885+
let result = ::ParserConfig::default()
1886+
.allow_multiple_spaces_in_request_line_delimiters(true)
1887+
.parse_request(&mut request, REQUEST_WITH_MULTIPLE_SPACES_AND_BAD_PATH);
1888+
assert_eq!(result, Err(::Error::Token));
1889+
}
1890+
1891+
static RESPONSE_WITH_SPACES_IN_CODE: &'static [u8] = b"HTTP/1.1 99 200 OK\r\n\r\n";
1892+
1893+
#[test]
1894+
fn test_response_with_spaces_in_code() {
1895+
let mut headers = [EMPTY_HEADER; NUM_OF_HEADERS];
1896+
let mut response = Response::new(&mut headers[..]);
1897+
let result = ::ParserConfig::default()
1898+
.allow_multiple_spaces_in_response_status_delimiters(true)
1899+
.parse_response(&mut response, RESPONSE_WITH_SPACES_IN_CODE);
1900+
assert_eq!(result, Err(::Error::Status));
1901+
}
16651902
}

0 commit comments

Comments
 (0)