Skip to content

Commit ad47c7d

Browse files
authored
[3.10] gh-99581: Fix a buffer overflow in the tokenizer when copying lines that fill the available buffer (GH-99605). (#99630)
1 parent 88b101f commit ad47c7d

File tree

3 files changed

+25
-1
lines changed

3 files changed

+25
-1
lines changed

Lib/test/test_tokenize.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
from unittest import TestCase, mock
1111
from test.test_grammar import (VALID_UNDERSCORE_LITERALS,
1212
INVALID_UNDERSCORE_LITERALS)
13+
from test.support import os_helper
14+
from test.support.script_helper import run_test_script, make_script
1315
import os
1416
import token
1517

@@ -1654,5 +1656,19 @@ def test_indentation_semantics_retained(self):
16541656
self.check_roundtrip(code)
16551657

16561658

1659+
class CTokenizerBufferTests(unittest.TestCase):
1660+
def test_newline_at_the_end_of_buffer(self):
1661+
# See issue 99581: Make sure that if we need to add a new line at the
1662+
# end of the buffer, we have enough space in the buffer, specially when
1663+
# the current line is as long as the buffer space available.
1664+
test_script = f"""\
1665+
#coding: latin-1
1666+
#{"a"*10000}
1667+
#{"a"*10002}"""
1668+
with os_helper.temp_dir() as temp_dir:
1669+
file_name = make_script(temp_dir, 'foo', test_script)
1670+
run_test_script(file_name)
1671+
1672+
16571673
if __name__ == "__main__":
16581674
unittest.main()
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Fixed a bug that was causing a buffer overflow if the tokenizer copies a
2+
line missing the newline caracter from a file that is as long as the
3+
available tokenizer buffer. Patch by Pablo galindo

Parser/tokenizer.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -419,7 +419,11 @@ tok_readline_recode(struct tok_state *tok) {
419419
error_ret(tok);
420420
goto error;
421421
}
422-
if (!tok_reserve_buf(tok, buflen + 1)) {
422+
// Make room for the null terminator *and* potentially
423+
// an extra newline character that we may need to artificially
424+
// add.
425+
size_t buffer_size = buflen + 2;
426+
if (!tok_reserve_buf(tok, buffer_size)) {
423427
goto error;
424428
}
425429
memcpy(tok->inp, buf, buflen);
@@ -973,6 +977,7 @@ tok_underflow_file(struct tok_state *tok) {
973977
return 0;
974978
}
975979
if (tok->inp[-1] != '\n') {
980+
assert(tok->inp + 1 < tok->end);
976981
/* Last line does not end in \n, fake one */
977982
*tok->inp++ = '\n';
978983
*tok->inp = '\0';

0 commit comments

Comments
 (0)