diff --git a/Doc/reference/lexical_analysis.rst b/Doc/reference/lexical_analysis.rst index 6c4a4ea81afe29..b22eb4db7945d1 100644 --- a/Doc/reference/lexical_analysis.rst +++ b/Doc/reference/lexical_analysis.rst @@ -489,8 +489,9 @@ String literals are described by the following lexical definitions: .. productionlist:: python-grammar stringliteral: [`stringprefix`](`shortstring` | `longstring`) - stringprefix: "r" | "u" | "R" | "U" | "f" | "F" + stringprefix: "r" | "u" | "R" | "U" | "f" | "F" | "t" | "T" : | "fr" | "Fr" | "fR" | "FR" | "rf" | "rF" | "Rf" | "RF" + : | "tr" | "Tr" | "tR" | "TR" | "rt" | "rT" | "Rt" | "RT" shortstring: "'" `shortstringitem`* "'" | '"' `shortstringitem`* '"' longstring: "'''" `longstringitem`* "'''" | '"""' `longstringitem`* '"""' shortstringitem: `shortstringchar` | `stringescapeseq` diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py index e6b19fe1812d44..d4b51841891b28 100644 --- a/Lib/test/test_tokenize.py +++ b/Lib/test/test_tokenize.py @@ -1,6 +1,8 @@ import contextlib +import itertools import os import re +import string import tempfile import token import tokenize @@ -3238,5 +3240,59 @@ def test_exact_flag(self): self.check_output(source, expect, flag) +class StringPrefixTest(unittest.TestCase): + def test_prefixes(self): + # Get the list of defined string prefixes. I don't see an + # obvious documented way of doing this, but probably the best + # thing is to split apart tokenize.StringPrefix. + + # Make sure StringPrefix begins and ends in parens. + self.assertEqual(tokenize.StringPrefix[0], '(') + self.assertEqual(tokenize.StringPrefix[-1], ')') + + # Then split apart everything else by '|'. + defined_prefixes = set(tokenize.StringPrefix[1:-1].split('|')) + + # Now compute the actual string prefixes, by exec-ing all + # valid prefix combinations, followed by an empty string. + + # Try all prefix lengths until we find a length that has zero + # valid prefixes. This will miss the case where for example + # there are no valid 3 character prefixes, but there are valid + # 4 character prefixes. That seems extremely unlikely. + + # Note that the empty prefix is being included, because length + # starts at 0. That's expected, since StringPrefix includes + # the empty prefix. + + valid_prefixes = set() + for length in itertools.count(): + num_at_this_length = 0 + for prefix in ( + "".join(l) for l in list(itertools.combinations(string.ascii_lowercase, length)) + ): + for t in itertools.permutations(prefix): + for u in itertools.product(*[(c, c.upper()) for c in t]): + p = ''.join(u) + if p == "not": + # 'not' can never be a string prefix, + # because it's a valid expression: not "" + continue + try: + eval(f'{p}""') + + # No syntax error, so p is a valid string + # prefix. + + valid_prefixes.add(p) + num_at_this_length += 1 + except SyntaxError: + pass + if num_at_this_length == 0: + break + + self.assertEqual(defined_prefixes, valid_prefixes) + + if __name__ == "__main__": unittest.main() diff --git a/Lib/tokenize.py b/Lib/tokenize.py index 559a7aecbde2d1..7e71755068e1df 100644 --- a/Lib/tokenize.py +++ b/Lib/tokenize.py @@ -86,7 +86,7 @@ def _all_string_prefixes(): # The valid string prefixes. Only contain the lower case versions, # and don't contain any permutations (include 'fr', but not # 'rf'). The various permutations will be generated. - _valid_string_prefixes = ['b', 'r', 'u', 'f', 'br', 'fr'] + _valid_string_prefixes = ['b', 'r', 'u', 'f', 't', 'br', 'fr', 'tr'] # if we add binary f-strings, add: ['fb', 'fbr'] result = {''} for prefix in _valid_string_prefixes: