@@ -60,17 +60,33 @@ def _compile_pattern(pat, sep, case_sensitive, recursive=True):
60
60
return re .compile (regex , flags = flags ).match
61
61
62
62
63
- def _select_special (paths , part ):
64
- """Yield special literal children of the given paths."""
65
- for path in paths :
66
- yield path ._make_child_relpath (part )
63
+ _special_segments = ('' , '.' , '..' )
67
64
68
65
69
- def _select_children (parent_paths , dir_only , follow_symlinks , match ):
70
- """Yield direct children of given paths, filtering by name and type."""
71
- if follow_symlinks is None :
72
- follow_symlinks = True
73
- for parent_path in parent_paths :
66
+ def _terminating_selector (path ):
67
+ yield path
68
+
69
+
70
+ def _special_selector (part , parts , sep , case_sensitive , follow_symlinks , recurse_symlinks ):
71
+ """Returns a function that yields a special literal child of a given path.
72
+ """
73
+ select_next = _selector (parts , sep , case_sensitive , follow_symlinks , recurse_symlinks )
74
+
75
+ def select_special (path ):
76
+ yield from select_next (path ._make_child_relpath (part ))
77
+ return select_special
78
+
79
+
80
+ def _wildcard_selector (part , parts , sep , case_sensitive , follow_symlinks , recurse_symlinks ):
81
+ """Returns a function that yields direct children of a given path,
82
+ filtering by name and type."""
83
+
84
+ # If the pattern component isn't '*', compile an re.Pattern
85
+ # object based on the component.
86
+ match = _compile_pattern (part , sep , case_sensitive ) if part != '*' else None
87
+ select_next = _selector (parts , sep , case_sensitive , follow_symlinks , recurse_symlinks )
88
+
89
+ def select_wildcard (parent_path ):
74
90
try :
75
91
# We must close the scandir() object before proceeding to
76
92
# avoid exhausting file descriptors when globbing deep trees.
@@ -80,7 +96,7 @@ def _select_children(parent_paths, dir_only, follow_symlinks, match):
80
96
pass
81
97
else :
82
98
for entry in entries :
83
- if dir_only :
99
+ if parts :
84
100
try :
85
101
if not entry .is_dir (follow_symlinks = follow_symlinks ):
86
102
continue
@@ -89,16 +105,34 @@ def _select_children(parent_paths, dir_only, follow_symlinks, match):
89
105
# Avoid cost of making a path object for non-matching paths by
90
106
# matching against the os.DirEntry.name string.
91
107
if match is None or match (entry .name ):
92
- yield parent_path ._make_child_direntry (entry )
93
-
94
-
95
- def _select_recursive (parent_paths , dir_only , follow_symlinks , match ):
96
- """Yield given paths and all their children, recursively, filtering by
97
- string and type.
98
- """
99
- if follow_symlinks is None :
100
- follow_symlinks = False
101
- for parent_path in parent_paths :
108
+ yield from select_next (parent_path ._make_child_direntry (entry ))
109
+ return select_wildcard
110
+
111
+
112
+ def _recursive_selector (part , parts , sep , case_sensitive , follow_symlinks , recurse_symlinks ):
113
+ """Returns a function that yields a given path and all its children,
114
+ recursively, filtering by pattern and type."""
115
+
116
+ # Consume following '**' components, which have no effect.
117
+ part_idx = 0
118
+ while part_idx < len (parts ) and parts [part_idx ] == '**' :
119
+ part_idx += 1
120
+
121
+ # Consume following non-special components, provided we're treating
122
+ # symlinks consistently. Each component is joined onto 'part', which is
123
+ # used to generate an re.Pattern object.
124
+ if follow_symlinks == recurse_symlinks :
125
+ while part_idx < len (parts ) and parts [part_idx ] not in _special_segments :
126
+ part += sep + parts [part_idx ]
127
+ part_idx += 1
128
+ parts = parts [part_idx :]
129
+
130
+ # If the previous loop consumed pattern components, compile an re.Pattern
131
+ # object based on those components.
132
+ match = _compile_pattern (part , sep , case_sensitive ) if part != '**' else None
133
+ select_next = _selector (parts , sep , case_sensitive , follow_symlinks , recurse_symlinks )
134
+
135
+ def select_recursive (parent_path ):
102
136
if match is not None :
103
137
# If we're filtering paths through a regex, record the length of
104
138
# the parent path. We'll pass it to match(path, pos=...) later.
@@ -108,7 +142,7 @@ def _select_recursive(parent_paths, dir_only, follow_symlinks, match):
108
142
path = paths .pop ()
109
143
if match is None or match (str (path ), parent_len ):
110
144
# Yield *directory* path that matches pattern (if any).
111
- yield path
145
+ yield from select_next ( path )
112
146
try :
113
147
# We must close the scandir() object before proceeding to
114
148
# avoid exhausting file descriptors when globbing deep trees.
@@ -120,33 +154,37 @@ def _select_recursive(parent_paths, dir_only, follow_symlinks, match):
120
154
for entry in entries :
121
155
# Handle directory entry.
122
156
try :
123
- if entry .is_dir (follow_symlinks = follow_symlinks ):
157
+ if entry .is_dir (follow_symlinks = recurse_symlinks ):
124
158
# Recurse into this directory.
125
159
paths .append (path ._make_child_direntry (entry ))
126
160
continue
127
161
except OSError :
128
162
pass
129
163
130
164
# Handle file entry.
131
- if not dir_only :
165
+ if not parts :
132
166
# Avoid cost of making a path object for non-matching
133
167
# files by matching against the os.DirEntry object.
134
168
if match is None or match (path ._direntry_str (entry ), parent_len ):
135
169
# Yield *file* path that matches pattern (if any).
136
- yield path ._make_child_direntry (entry )
170
+ yield from select_next (path ._make_child_direntry (entry ))
171
+ return select_recursive
137
172
138
173
139
- def _select_unique (paths ):
140
- """Yields the given paths, filtering out duplicates."""
141
- yielded = set ()
142
- try :
143
- for path in paths :
144
- path_str = str (path )
145
- if path_str not in yielded :
146
- yield path
147
- yielded .add (path_str )
148
- finally :
149
- yielded .clear ()
174
+ def _selector (parts , sep , case_sensitive , follow_symlinks , recurse_symlinks ):
175
+ """Returns a function that selects from a given path, walking and
176
+ filtering according to the glob-style pattern parts in *parts*."""
177
+
178
+ if not parts :
179
+ return _terminating_selector
180
+ part = parts [0 ]
181
+ if part == '**' :
182
+ selector = _recursive_selector
183
+ elif part in _special_segments :
184
+ selector = _special_selector
185
+ else :
186
+ selector = _wildcard_selector
187
+ return selector (part , parts [1 :], sep , case_sensitive , follow_symlinks , recurse_symlinks )
150
188
151
189
152
190
class UnsupportedOperation (NotImplementedError ):
@@ -459,12 +497,12 @@ def is_absolute(self):
459
497
return self .pathmod .isabs (self ._raw_path )
460
498
461
499
@property
462
- def _pattern_stack (self ):
463
- """Stack of path components, to be used with patterns in glob()."""
500
+ def _pattern_parts (self ):
501
+ """List of path components, to be used with patterns in glob()."""
464
502
anchor , parts = self ._stack
465
503
if anchor :
466
504
raise NotImplementedError ("Non-relative patterns are unsupported" )
467
- return parts
505
+ return tuple ( reversed ( parts ))
468
506
469
507
@property
470
508
def _pattern_str (self ):
@@ -798,53 +836,19 @@ def glob(self, pattern, *, case_sensitive=None, follow_symlinks=True):
798
836
if case_sensitive is None :
799
837
# TODO: evaluate case-sensitivity of each directory in _select_children().
800
838
case_sensitive = _is_case_sensitive (self .pathmod )
839
+ if follow_symlinks is None :
840
+ # TODO: remove this legacy behaviour.
841
+ follow_symlinks = True
842
+ recurse_symlinks = False
843
+ else :
844
+ recurse_symlinks = follow_symlinks
801
845
802
- stack = pattern ._pattern_stack
803
- specials = ('' , '.' , '..' )
804
- deduplicate_paths = False
805
846
sep = self .pathmod .sep
806
- paths = iter ([self ] if self .is_dir () else [])
807
- while stack :
808
- part = stack .pop ()
809
- if part in specials :
810
- # Join special component (e.g. '..') onto paths.
811
- paths = _select_special (paths , part )
812
-
813
- elif part == '**' :
814
- # Consume following '**' components, which have no effect.
815
- while stack and stack [- 1 ] == '**' :
816
- stack .pop ()
817
-
818
- # Consume following non-special components, provided we're
819
- # treating symlinks consistently. Each component is joined
820
- # onto 'part', which is used to generate an re.Pattern object.
821
- if follow_symlinks is not None :
822
- while stack and stack [- 1 ] not in specials :
823
- part += sep + stack .pop ()
824
-
825
- # If the previous loop consumed pattern components, compile an
826
- # re.Pattern object based on those components.
827
- match = _compile_pattern (part , sep , case_sensitive ) if part != '**' else None
828
-
829
- # Recursively walk directories, filtering by type and regex.
830
- paths = _select_recursive (paths , bool (stack ), follow_symlinks , match )
831
-
832
- # De-duplicate if we've already seen a '**' component.
833
- if deduplicate_paths :
834
- paths = _select_unique (paths )
835
- deduplicate_paths = True
836
-
837
- elif '**' in part :
838
- raise ValueError ("Invalid pattern: '**' can only be an entire path component" )
839
-
840
- else :
841
- # If the pattern component isn't '*', compile an re.Pattern
842
- # object based on the component.
843
- match = _compile_pattern (part , sep , case_sensitive ) if part != '*' else None
844
-
845
- # Iterate over directories' children filtering by type and regex.
846
- paths = _select_children (paths , bool (stack ), follow_symlinks , match )
847
- return paths
847
+ parts = pattern ._pattern_parts
848
+ select = _selector (parts , sep , case_sensitive , follow_symlinks , recurse_symlinks )
849
+ if not self .is_dir ():
850
+ return iter ([])
851
+ return select (self )
848
852
849
853
def rglob (self , pattern , * , case_sensitive = None , follow_symlinks = True ):
850
854
"""Recursively yield all existing files (of any kind, including
0 commit comments