Skip to content

Commit 608e917

Browse files
committed
pythongh-73435: Implement recursive wildcards in pathlib.PurePath.match()
Add a new *recursive* argument to `pathlib.PurePath.match()`, defaulting to `False`. If set to true, `match()` handles the `**` wildcard as in `Path.glob()`, i.e. it matches any number of path segments. We now compile a `re.Pattern` object for the entire pattern. This is made more difficult by `fnmatch` not treating directory separators as special when evaluating wildcards (`*`, `?`, etc), and so we arrange the path parts onto separate *lines* in a string, and ensure we don't set `re.DOTALL`.
1 parent b5c4d60 commit 608e917

File tree

4 files changed

+62
-22
lines changed

4 files changed

+62
-22
lines changed

Doc/library/pathlib.rst

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -544,11 +544,14 @@ Pure paths provide the following methods and properties:
544544
PureWindowsPath('c:/Program Files')
545545

546546

547-
.. method:: PurePath.match(pattern)
547+
.. method:: PurePath.match(pattern, recursive=False)
548548

549549
Match this path against the provided glob-style pattern. Return ``True``
550550
if matching is successful, ``False`` otherwise.
551551

552+
If *recursive* is true, the pattern "``**``" will match any number of file
553+
or directory segments.
554+
552555
If *pattern* is relative, the path can be either relative or absolute,
553556
and matching is done from the right::
554557

@@ -574,6 +577,9 @@ Pure paths provide the following methods and properties:
574577
>>> PureWindowsPath('b.py').match('*.PY')
575578
True
576579

580+
.. versionadded:: 3.12
581+
The *recursive* argument.
582+
577583

578584
.. method:: PurePath.relative_to(other, walk_up=False)
579585

Lib/fnmatch.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,11 @@ def translate(pat):
7777
There is no way to quote meta-characters.
7878
"""
7979

80+
res = _translate(pat)
81+
return fr'(?s:{res})\Z'
82+
83+
84+
def _translate(pat):
8085
STAR = object()
8186
res = []
8287
add = res.append
@@ -182,4 +187,4 @@ def translate(pat):
182187
add(f"(?>.*?{fixed})")
183188
assert i == n
184189
res = "".join(res)
185-
return fr'(?s:{res})\Z'
190+
return res

Lib/pathlib.py

Lines changed: 23 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,25 @@ def _is_wildcard_pattern(pat):
6464
# Globbing helpers
6565
#
6666

67+
@functools.lru_cache()
68+
def _make_matcher(path_cls, pattern, recursive):
69+
pattern = path_cls(pattern)
70+
if not pattern._parts:
71+
raise ValueError("empty pattern")
72+
result = [r'\A' if pattern._drv or pattern._root else '^']
73+
for part in pattern._parts_normcase:
74+
if recursive:
75+
if part == '**':
76+
result.append('(.+\n)*')
77+
continue
78+
elif '**' in part:
79+
raise ValueError("Invalid pattern: '**' can only be an entire path component")
80+
part = fnmatch._translate(part)
81+
result.append(f'{part}\n')
82+
result.append(r'\Z')
83+
return re.compile(''.join(result), flags=re.MULTILINE)
84+
85+
6786
@functools.lru_cache()
6887
def _make_selector(pattern_parts, flavour):
6988
pat = pattern_parts[0]
@@ -639,29 +658,13 @@ def is_reserved(self):
639658
name = self._parts[-1].partition('.')[0].partition(':')[0].rstrip(' ')
640659
return name.upper() in _WIN_RESERVED_NAMES
641660

642-
def match(self, path_pattern):
661+
def match(self, path_pattern, recursive=False):
643662
"""
644663
Return True if this path matches the given pattern.
645664
"""
646-
path_pattern = self._flavour.normcase(path_pattern)
647-
drv, root, pat_parts = self._parse_parts((path_pattern,))
648-
if not pat_parts:
649-
raise ValueError("empty pattern")
650-
elif drv and drv != self._flavour.normcase(self._drv):
651-
return False
652-
elif root and root != self._root:
653-
return False
654-
parts = self._parts_normcase
655-
if drv or root:
656-
if len(pat_parts) != len(parts):
657-
return False
658-
pat_parts = pat_parts[1:]
659-
elif len(pat_parts) > len(parts):
660-
return False
661-
for part, pat in zip(reversed(parts), reversed(pat_parts)):
662-
if not fnmatch.fnmatchcase(part, pat):
663-
return False
664-
return True
665+
matcher = _make_matcher(type(self), path_pattern, recursive)
666+
lines = ''.join(f'{part}\n' for part in self._parts_normcase)
667+
return matcher.search(lines) is not None
665668

666669
# Can't subclass os.PathLike from PurePath and keep the constructor
667670
# optimizations in PurePath._parse_args().

Lib/test/test_pathlib.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -319,6 +319,32 @@ def test_match_common(self):
319319
# Multi-part glob-style pattern.
320320
self.assertFalse(P('/a/b/c.py').match('/**/*.py'))
321321
self.assertTrue(P('/a/b/c.py').match('/a/**/*.py'))
322+
# Recursive patterns.
323+
self.assertTrue(P('a').match('**', recursive=True))
324+
self.assertTrue(P('c.py').match('**', recursive=True))
325+
self.assertTrue(P('a/b/c.py').match('**', recursive=True))
326+
self.assertTrue(P('/a/b/c.py').match('**', recursive=True))
327+
self.assertTrue(P('/a/b/c.py').match('/**', recursive=True))
328+
self.assertTrue(P('/a/b/c.py').match('**/', recursive=True))
329+
self.assertTrue(P('/a/b/c.py').match('/a/**', recursive=True))
330+
self.assertTrue(P('/a/b/c.py').match('**/*.py', recursive=True))
331+
self.assertTrue(P('/a/b/c.py').match('/**/*.py', recursive=True))
332+
self.assertTrue(P('/a/b/c.py').match('/a/**/*.py', recursive=True))
333+
self.assertTrue(P('/a/b/c.py').match('/a/b/**/*.py', recursive=True))
334+
self.assertTrue(P('/a/b/c.py').match('**/a/b/c.py/**', recursive=True))
335+
self.assertTrue(P('/a/b/c.py').match('/**/**/**/**/*.py', recursive=True))
336+
self.assertFalse(P('c.py').match('**/a.py', recursive=True))
337+
self.assertFalse(P('c.py').match('c/**', recursive=True))
338+
self.assertFalse(P('a/b/c.py').match('**/a', recursive=True))
339+
self.assertFalse(P('a/b/c.py').match('**/a/b', recursive=True))
340+
self.assertFalse(P('a/b/c.py').match('**/a/b/c', recursive=True))
341+
self.assertFalse(P('a/b/c.py').match('**/a/b/c.', recursive=True))
342+
self.assertFalse(P('a/b/c.py').match('**/a/b/c./**', recursive=True))
343+
self.assertFalse(P('a/b/c.py').match('**/a/b/c./**', recursive=True))
344+
self.assertFalse(P('a/b/c.py').match('/a/b/c.py/**', recursive=True))
345+
self.assertFalse(P('a/b/c.py').match('/**/a/b/c.py', recursive=True))
346+
self.assertRaises(ValueError, P('a').match, '**a/b/c', recursive=True)
347+
self.assertRaises(ValueError, P('a').match, 'a/b/c**', recursive=True)
322348

323349
def test_ordering_common(self):
324350
# Ordering is tuple-alike.

0 commit comments

Comments
 (0)