##// END OF EJS Templates
match: move _normalize() into the match class...
Matt Harbison -
r24789:0b1577c8 default
parent child Browse files
Show More
@@ -1,454 +1,454
1 # match.py - filename matching
1 # match.py - filename matching
2 #
2 #
3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 import re
8 import re
9 import util, pathutil
9 import util, pathutil
10 from i18n import _
10 from i18n import _
11
11
12 propertycache = util.propertycache
12 propertycache = util.propertycache
13
13
14 def _rematcher(regex):
14 def _rematcher(regex):
15 '''compile the regexp with the best available regexp engine and return a
15 '''compile the regexp with the best available regexp engine and return a
16 matcher function'''
16 matcher function'''
17 m = util.re.compile(regex)
17 m = util.re.compile(regex)
18 try:
18 try:
19 # slightly faster, provided by facebook's re2 bindings
19 # slightly faster, provided by facebook's re2 bindings
20 return m.test_match
20 return m.test_match
21 except AttributeError:
21 except AttributeError:
22 return m.match
22 return m.match
23
23
24 def _expandsets(kindpats, ctx):
24 def _expandsets(kindpats, ctx):
25 '''Returns the kindpats list with the 'set' patterns expanded.'''
25 '''Returns the kindpats list with the 'set' patterns expanded.'''
26 fset = set()
26 fset = set()
27 other = []
27 other = []
28
28
29 for kind, pat in kindpats:
29 for kind, pat in kindpats:
30 if kind == 'set':
30 if kind == 'set':
31 if not ctx:
31 if not ctx:
32 raise util.Abort("fileset expression with no context")
32 raise util.Abort("fileset expression with no context")
33 s = ctx.getfileset(pat)
33 s = ctx.getfileset(pat)
34 fset.update(s)
34 fset.update(s)
35 continue
35 continue
36 other.append((kind, pat))
36 other.append((kind, pat))
37 return fset, other
37 return fset, other
38
38
39 def _kindpatsalwaysmatch(kindpats):
39 def _kindpatsalwaysmatch(kindpats):
40 """"Checks whether the kindspats match everything, as e.g.
40 """"Checks whether the kindspats match everything, as e.g.
41 'relpath:.' does.
41 'relpath:.' does.
42 """
42 """
43 for kind, pat in kindpats:
43 for kind, pat in kindpats:
44 if pat != '' or kind not in ['relpath', 'glob']:
44 if pat != '' or kind not in ['relpath', 'glob']:
45 return False
45 return False
46 return True
46 return True
47
47
48 class match(object):
48 class match(object):
49 def __init__(self, root, cwd, patterns, include=[], exclude=[],
49 def __init__(self, root, cwd, patterns, include=[], exclude=[],
50 default='glob', exact=False, auditor=None, ctx=None):
50 default='glob', exact=False, auditor=None, ctx=None):
51 """build an object to match a set of file patterns
51 """build an object to match a set of file patterns
52
52
53 arguments:
53 arguments:
54 root - the canonical root of the tree you're matching against
54 root - the canonical root of the tree you're matching against
55 cwd - the current working directory, if relevant
55 cwd - the current working directory, if relevant
56 patterns - patterns to find
56 patterns - patterns to find
57 include - patterns to include (unless they are excluded)
57 include - patterns to include (unless they are excluded)
58 exclude - patterns to exclude (even if they are included)
58 exclude - patterns to exclude (even if they are included)
59 default - if a pattern in patterns has no explicit type, assume this one
59 default - if a pattern in patterns has no explicit type, assume this one
60 exact - patterns are actually filenames (include/exclude still apply)
60 exact - patterns are actually filenames (include/exclude still apply)
61
61
62 a pattern is one of:
62 a pattern is one of:
63 'glob:<glob>' - a glob relative to cwd
63 'glob:<glob>' - a glob relative to cwd
64 're:<regexp>' - a regular expression
64 're:<regexp>' - a regular expression
65 'path:<path>' - a path relative to repository root
65 'path:<path>' - a path relative to repository root
66 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
66 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
67 'relpath:<path>' - a path relative to cwd
67 'relpath:<path>' - a path relative to cwd
68 'relre:<regexp>' - a regexp that needn't match the start of a name
68 'relre:<regexp>' - a regexp that needn't match the start of a name
69 'set:<fileset>' - a fileset expression
69 'set:<fileset>' - a fileset expression
70 '<something>' - a pattern of the specified default type
70 '<something>' - a pattern of the specified default type
71 """
71 """
72
72
73 self._root = root
73 self._root = root
74 self._cwd = cwd
74 self._cwd = cwd
75 self._files = [] # exact files and roots of patterns
75 self._files = [] # exact files and roots of patterns
76 self._anypats = bool(include or exclude)
76 self._anypats = bool(include or exclude)
77 self._always = False
77 self._always = False
78 self._pathrestricted = bool(include or exclude or patterns)
78 self._pathrestricted = bool(include or exclude or patterns)
79
79
80 matchfns = []
80 matchfns = []
81 if include:
81 if include:
82 kindpats = _normalize(include, 'glob', root, cwd, auditor)
82 kindpats = self._normalize(include, 'glob', root, cwd, auditor)
83 self.includepat, im = _buildmatch(ctx, kindpats, '(?:/|$)')
83 self.includepat, im = _buildmatch(ctx, kindpats, '(?:/|$)')
84 matchfns.append(im)
84 matchfns.append(im)
85 if exclude:
85 if exclude:
86 kindpats = _normalize(exclude, 'glob', root, cwd, auditor)
86 kindpats = self._normalize(exclude, 'glob', root, cwd, auditor)
87 self.excludepat, em = _buildmatch(ctx, kindpats, '(?:/|$)')
87 self.excludepat, em = _buildmatch(ctx, kindpats, '(?:/|$)')
88 matchfns.append(lambda f: not em(f))
88 matchfns.append(lambda f: not em(f))
89 if exact:
89 if exact:
90 if isinstance(patterns, list):
90 if isinstance(patterns, list):
91 self._files = patterns
91 self._files = patterns
92 else:
92 else:
93 self._files = list(patterns)
93 self._files = list(patterns)
94 matchfns.append(self.exact)
94 matchfns.append(self.exact)
95 elif patterns:
95 elif patterns:
96 kindpats = _normalize(patterns, default, root, cwd, auditor)
96 kindpats = self._normalize(patterns, default, root, cwd, auditor)
97 if not _kindpatsalwaysmatch(kindpats):
97 if not _kindpatsalwaysmatch(kindpats):
98 self._files = _roots(kindpats)
98 self._files = _roots(kindpats)
99 self._anypats = self._anypats or _anypats(kindpats)
99 self._anypats = self._anypats or _anypats(kindpats)
100 self.patternspat, pm = _buildmatch(ctx, kindpats, '$')
100 self.patternspat, pm = _buildmatch(ctx, kindpats, '$')
101 matchfns.append(pm)
101 matchfns.append(pm)
102
102
103 if not matchfns:
103 if not matchfns:
104 m = util.always
104 m = util.always
105 self._always = True
105 self._always = True
106 elif len(matchfns) == 1:
106 elif len(matchfns) == 1:
107 m = matchfns[0]
107 m = matchfns[0]
108 else:
108 else:
109 def m(f):
109 def m(f):
110 for matchfn in matchfns:
110 for matchfn in matchfns:
111 if not matchfn(f):
111 if not matchfn(f):
112 return False
112 return False
113 return True
113 return True
114
114
115 self.matchfn = m
115 self.matchfn = m
116 self._fmap = set(self._files)
116 self._fmap = set(self._files)
117
117
118 def __call__(self, fn):
118 def __call__(self, fn):
119 return self.matchfn(fn)
119 return self.matchfn(fn)
120 def __iter__(self):
120 def __iter__(self):
121 for f in self._files:
121 for f in self._files:
122 yield f
122 yield f
123
123
124 # Callbacks related to how the matcher is used by dirstate.walk.
124 # Callbacks related to how the matcher is used by dirstate.walk.
125 # Subscribers to these events must monkeypatch the matcher object.
125 # Subscribers to these events must monkeypatch the matcher object.
126 def bad(self, f, msg):
126 def bad(self, f, msg):
127 '''Callback from dirstate.walk for each explicit file that can't be
127 '''Callback from dirstate.walk for each explicit file that can't be
128 found/accessed, with an error message.'''
128 found/accessed, with an error message.'''
129 pass
129 pass
130
130
131 # If an explicitdir is set, it will be called when an explicitly listed
131 # If an explicitdir is set, it will be called when an explicitly listed
132 # directory is visited.
132 # directory is visited.
133 explicitdir = None
133 explicitdir = None
134
134
135 # If an traversedir is set, it will be called when a directory discovered
135 # If an traversedir is set, it will be called when a directory discovered
136 # by recursive traversal is visited.
136 # by recursive traversal is visited.
137 traversedir = None
137 traversedir = None
138
138
139 def abs(self, f):
139 def abs(self, f):
140 '''Convert a repo path back to path that is relative to the root of the
140 '''Convert a repo path back to path that is relative to the root of the
141 matcher.'''
141 matcher.'''
142 return f
142 return f
143
143
144 def rel(self, f):
144 def rel(self, f):
145 '''Convert repo path back to path that is relative to cwd of matcher.'''
145 '''Convert repo path back to path that is relative to cwd of matcher.'''
146 return util.pathto(self._root, self._cwd, f)
146 return util.pathto(self._root, self._cwd, f)
147
147
148 def uipath(self, f):
148 def uipath(self, f):
149 '''Convert repo path to a display path. If patterns or -I/-X were used
149 '''Convert repo path to a display path. If patterns or -I/-X were used
150 to create this matcher, the display path will be relative to cwd.
150 to create this matcher, the display path will be relative to cwd.
151 Otherwise it is relative to the root of the repo.'''
151 Otherwise it is relative to the root of the repo.'''
152 return (self._pathrestricted and self.rel(f)) or self.abs(f)
152 return (self._pathrestricted and self.rel(f)) or self.abs(f)
153
153
154 def files(self):
154 def files(self):
155 '''Explicitly listed files or patterns or roots:
155 '''Explicitly listed files or patterns or roots:
156 if no patterns or .always(): empty list,
156 if no patterns or .always(): empty list,
157 if exact: list exact files,
157 if exact: list exact files,
158 if not .anypats(): list all files and dirs,
158 if not .anypats(): list all files and dirs,
159 else: optimal roots'''
159 else: optimal roots'''
160 return self._files
160 return self._files
161
161
162 @propertycache
162 @propertycache
163 def _dirs(self):
163 def _dirs(self):
164 return set(util.dirs(self._fmap)) | set(['.'])
164 return set(util.dirs(self._fmap)) | set(['.'])
165
165
166 def visitdir(self, dir):
166 def visitdir(self, dir):
167 '''Helps while traversing a directory tree. Returns the string 'all' if
167 '''Helps while traversing a directory tree. Returns the string 'all' if
168 the given directory and all subdirectories should be visited. Otherwise
168 the given directory and all subdirectories should be visited. Otherwise
169 returns True or False indicating whether the given directory should be
169 returns True or False indicating whether the given directory should be
170 visited. If 'all' is returned, calling this method on a subdirectory
170 visited. If 'all' is returned, calling this method on a subdirectory
171 gives an undefined result.'''
171 gives an undefined result.'''
172 if not self._fmap or self.exact(dir):
172 if not self._fmap or self.exact(dir):
173 return 'all'
173 return 'all'
174 return dir in self._dirs
174 return dir in self._dirs
175
175
176 def exact(self, f):
176 def exact(self, f):
177 '''Returns True if f is in .files().'''
177 '''Returns True if f is in .files().'''
178 return f in self._fmap
178 return f in self._fmap
179
179
180 def anypats(self):
180 def anypats(self):
181 '''Matcher uses patterns or include/exclude.'''
181 '''Matcher uses patterns or include/exclude.'''
182 return self._anypats
182 return self._anypats
183
183
184 def always(self):
184 def always(self):
185 '''Matcher will match everything and .files() will be empty
185 '''Matcher will match everything and .files() will be empty
186 - optimization might be possible and necessary.'''
186 - optimization might be possible and necessary.'''
187 return self._always
187 return self._always
188
188
189 def isexact(self):
189 def isexact(self):
190 return self.matchfn == self.exact
190 return self.matchfn == self.exact
191
191
192 def _normalize(self, patterns, default, root, cwd, auditor):
193 '''Convert 'kind:pat' from the patterns list to tuples with kind and
194 normalized and rooted patterns and with listfiles expanded.'''
195 kindpats = []
196 for kind, pat in [_patsplit(p, default) for p in patterns]:
197 if kind in ('glob', 'relpath'):
198 pat = pathutil.canonpath(root, cwd, pat, auditor)
199 elif kind in ('relglob', 'path'):
200 pat = util.normpath(pat)
201 elif kind in ('listfile', 'listfile0'):
202 try:
203 files = util.readfile(pat)
204 if kind == 'listfile0':
205 files = files.split('\0')
206 else:
207 files = files.splitlines()
208 files = [f for f in files if f]
209 except EnvironmentError:
210 raise util.Abort(_("unable to read file list (%s)") % pat)
211 kindpats += self._normalize(files, default, root, cwd, auditor)
212 continue
213 # else: re or relre - which cannot be normalized
214 kindpats.append((kind, pat))
215 return kindpats
216
192 def exact(root, cwd, files):
217 def exact(root, cwd, files):
193 return match(root, cwd, files, exact=True)
218 return match(root, cwd, files, exact=True)
194
219
195 def always(root, cwd):
220 def always(root, cwd):
196 return match(root, cwd, [])
221 return match(root, cwd, [])
197
222
198 class narrowmatcher(match):
223 class narrowmatcher(match):
199 """Adapt a matcher to work on a subdirectory only.
224 """Adapt a matcher to work on a subdirectory only.
200
225
201 The paths are remapped to remove/insert the path as needed:
226 The paths are remapped to remove/insert the path as needed:
202
227
203 >>> m1 = match('root', '', ['a.txt', 'sub/b.txt'])
228 >>> m1 = match('root', '', ['a.txt', 'sub/b.txt'])
204 >>> m2 = narrowmatcher('sub', m1)
229 >>> m2 = narrowmatcher('sub', m1)
205 >>> bool(m2('a.txt'))
230 >>> bool(m2('a.txt'))
206 False
231 False
207 >>> bool(m2('b.txt'))
232 >>> bool(m2('b.txt'))
208 True
233 True
209 >>> bool(m2.matchfn('a.txt'))
234 >>> bool(m2.matchfn('a.txt'))
210 False
235 False
211 >>> bool(m2.matchfn('b.txt'))
236 >>> bool(m2.matchfn('b.txt'))
212 True
237 True
213 >>> m2.files()
238 >>> m2.files()
214 ['b.txt']
239 ['b.txt']
215 >>> m2.exact('b.txt')
240 >>> m2.exact('b.txt')
216 True
241 True
217 >>> util.pconvert(m2.rel('b.txt'))
242 >>> util.pconvert(m2.rel('b.txt'))
218 'sub/b.txt'
243 'sub/b.txt'
219 >>> def bad(f, msg):
244 >>> def bad(f, msg):
220 ... print "%s: %s" % (f, msg)
245 ... print "%s: %s" % (f, msg)
221 >>> m1.bad = bad
246 >>> m1.bad = bad
222 >>> m2.bad('x.txt', 'No such file')
247 >>> m2.bad('x.txt', 'No such file')
223 sub/x.txt: No such file
248 sub/x.txt: No such file
224 >>> m2.abs('c.txt')
249 >>> m2.abs('c.txt')
225 'sub/c.txt'
250 'sub/c.txt'
226 """
251 """
227
252
228 def __init__(self, path, matcher):
253 def __init__(self, path, matcher):
229 self._root = matcher._root
254 self._root = matcher._root
230 self._cwd = matcher._cwd
255 self._cwd = matcher._cwd
231 self._path = path
256 self._path = path
232 self._matcher = matcher
257 self._matcher = matcher
233 self._always = matcher._always
258 self._always = matcher._always
234 self._pathrestricted = matcher._pathrestricted
259 self._pathrestricted = matcher._pathrestricted
235
260
236 self._files = [f[len(path) + 1:] for f in matcher._files
261 self._files = [f[len(path) + 1:] for f in matcher._files
237 if f.startswith(path + "/")]
262 if f.startswith(path + "/")]
238 self._anypats = matcher._anypats
263 self._anypats = matcher._anypats
239 self.matchfn = lambda fn: matcher.matchfn(self._path + "/" + fn)
264 self.matchfn = lambda fn: matcher.matchfn(self._path + "/" + fn)
240 self._fmap = set(self._files)
265 self._fmap = set(self._files)
241
266
242 def abs(self, f):
267 def abs(self, f):
243 return self._matcher.abs(self._path + "/" + f)
268 return self._matcher.abs(self._path + "/" + f)
244
269
245 def bad(self, f, msg):
270 def bad(self, f, msg):
246 self._matcher.bad(self._path + "/" + f, msg)
271 self._matcher.bad(self._path + "/" + f, msg)
247
272
248 def rel(self, f):
273 def rel(self, f):
249 return self._matcher.rel(self._path + "/" + f)
274 return self._matcher.rel(self._path + "/" + f)
250
275
251 def patkind(pattern, default=None):
276 def patkind(pattern, default=None):
252 '''If pattern is 'kind:pat' with a known kind, return kind.'''
277 '''If pattern is 'kind:pat' with a known kind, return kind.'''
253 return _patsplit(pattern, default)[0]
278 return _patsplit(pattern, default)[0]
254
279
255 def _patsplit(pattern, default):
280 def _patsplit(pattern, default):
256 """Split a string into the optional pattern kind prefix and the actual
281 """Split a string into the optional pattern kind prefix and the actual
257 pattern."""
282 pattern."""
258 if ':' in pattern:
283 if ':' in pattern:
259 kind, pat = pattern.split(':', 1)
284 kind, pat = pattern.split(':', 1)
260 if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
285 if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
261 'listfile', 'listfile0', 'set'):
286 'listfile', 'listfile0', 'set'):
262 return kind, pat
287 return kind, pat
263 return default, pattern
288 return default, pattern
264
289
265 def _globre(pat):
290 def _globre(pat):
266 r'''Convert an extended glob string to a regexp string.
291 r'''Convert an extended glob string to a regexp string.
267
292
268 >>> print _globre(r'?')
293 >>> print _globre(r'?')
269 .
294 .
270 >>> print _globre(r'*')
295 >>> print _globre(r'*')
271 [^/]*
296 [^/]*
272 >>> print _globre(r'**')
297 >>> print _globre(r'**')
273 .*
298 .*
274 >>> print _globre(r'**/a')
299 >>> print _globre(r'**/a')
275 (?:.*/)?a
300 (?:.*/)?a
276 >>> print _globre(r'a/**/b')
301 >>> print _globre(r'a/**/b')
277 a\/(?:.*/)?b
302 a\/(?:.*/)?b
278 >>> print _globre(r'[a*?!^][^b][!c]')
303 >>> print _globre(r'[a*?!^][^b][!c]')
279 [a*?!^][\^b][^c]
304 [a*?!^][\^b][^c]
280 >>> print _globre(r'{a,b}')
305 >>> print _globre(r'{a,b}')
281 (?:a|b)
306 (?:a|b)
282 >>> print _globre(r'.\*\?')
307 >>> print _globre(r'.\*\?')
283 \.\*\?
308 \.\*\?
284 '''
309 '''
285 i, n = 0, len(pat)
310 i, n = 0, len(pat)
286 res = ''
311 res = ''
287 group = 0
312 group = 0
288 escape = util.re.escape
313 escape = util.re.escape
289 def peek():
314 def peek():
290 return i < n and pat[i]
315 return i < n and pat[i]
291 while i < n:
316 while i < n:
292 c = pat[i]
317 c = pat[i]
293 i += 1
318 i += 1
294 if c not in '*?[{},\\':
319 if c not in '*?[{},\\':
295 res += escape(c)
320 res += escape(c)
296 elif c == '*':
321 elif c == '*':
297 if peek() == '*':
322 if peek() == '*':
298 i += 1
323 i += 1
299 if peek() == '/':
324 if peek() == '/':
300 i += 1
325 i += 1
301 res += '(?:.*/)?'
326 res += '(?:.*/)?'
302 else:
327 else:
303 res += '.*'
328 res += '.*'
304 else:
329 else:
305 res += '[^/]*'
330 res += '[^/]*'
306 elif c == '?':
331 elif c == '?':
307 res += '.'
332 res += '.'
308 elif c == '[':
333 elif c == '[':
309 j = i
334 j = i
310 if j < n and pat[j] in '!]':
335 if j < n and pat[j] in '!]':
311 j += 1
336 j += 1
312 while j < n and pat[j] != ']':
337 while j < n and pat[j] != ']':
313 j += 1
338 j += 1
314 if j >= n:
339 if j >= n:
315 res += '\\['
340 res += '\\['
316 else:
341 else:
317 stuff = pat[i:j].replace('\\','\\\\')
342 stuff = pat[i:j].replace('\\','\\\\')
318 i = j + 1
343 i = j + 1
319 if stuff[0] == '!':
344 if stuff[0] == '!':
320 stuff = '^' + stuff[1:]
345 stuff = '^' + stuff[1:]
321 elif stuff[0] == '^':
346 elif stuff[0] == '^':
322 stuff = '\\' + stuff
347 stuff = '\\' + stuff
323 res = '%s[%s]' % (res, stuff)
348 res = '%s[%s]' % (res, stuff)
324 elif c == '{':
349 elif c == '{':
325 group += 1
350 group += 1
326 res += '(?:'
351 res += '(?:'
327 elif c == '}' and group:
352 elif c == '}' and group:
328 res += ')'
353 res += ')'
329 group -= 1
354 group -= 1
330 elif c == ',' and group:
355 elif c == ',' and group:
331 res += '|'
356 res += '|'
332 elif c == '\\':
357 elif c == '\\':
333 p = peek()
358 p = peek()
334 if p:
359 if p:
335 i += 1
360 i += 1
336 res += escape(p)
361 res += escape(p)
337 else:
362 else:
338 res += escape(c)
363 res += escape(c)
339 else:
364 else:
340 res += escape(c)
365 res += escape(c)
341 return res
366 return res
342
367
343 def _regex(kind, pat, globsuffix):
368 def _regex(kind, pat, globsuffix):
344 '''Convert a (normalized) pattern of any kind into a regular expression.
369 '''Convert a (normalized) pattern of any kind into a regular expression.
345 globsuffix is appended to the regexp of globs.'''
370 globsuffix is appended to the regexp of globs.'''
346 if not pat:
371 if not pat:
347 return ''
372 return ''
348 if kind == 're':
373 if kind == 're':
349 return pat
374 return pat
350 if kind == 'path':
375 if kind == 'path':
351 return '^' + util.re.escape(pat) + '(?:/|$)'
376 return '^' + util.re.escape(pat) + '(?:/|$)'
352 if kind == 'relglob':
377 if kind == 'relglob':
353 return '(?:|.*/)' + _globre(pat) + globsuffix
378 return '(?:|.*/)' + _globre(pat) + globsuffix
354 if kind == 'relpath':
379 if kind == 'relpath':
355 return util.re.escape(pat) + '(?:/|$)'
380 return util.re.escape(pat) + '(?:/|$)'
356 if kind == 'relre':
381 if kind == 'relre':
357 if pat.startswith('^'):
382 if pat.startswith('^'):
358 return pat
383 return pat
359 return '.*' + pat
384 return '.*' + pat
360 return _globre(pat) + globsuffix
385 return _globre(pat) + globsuffix
361
386
362 def _buildmatch(ctx, kindpats, globsuffix):
387 def _buildmatch(ctx, kindpats, globsuffix):
363 '''Return regexp string and a matcher function for kindpats.
388 '''Return regexp string and a matcher function for kindpats.
364 globsuffix is appended to the regexp of globs.'''
389 globsuffix is appended to the regexp of globs.'''
365 fset, kindpats = _expandsets(kindpats, ctx)
390 fset, kindpats = _expandsets(kindpats, ctx)
366 if not kindpats:
391 if not kindpats:
367 return "", fset.__contains__
392 return "", fset.__contains__
368
393
369 regex, mf = _buildregexmatch(kindpats, globsuffix)
394 regex, mf = _buildregexmatch(kindpats, globsuffix)
370 if fset:
395 if fset:
371 return regex, lambda f: f in fset or mf(f)
396 return regex, lambda f: f in fset or mf(f)
372 return regex, mf
397 return regex, mf
373
398
374 def _buildregexmatch(kindpats, globsuffix):
399 def _buildregexmatch(kindpats, globsuffix):
375 """Build a match function from a list of kinds and kindpats,
400 """Build a match function from a list of kinds and kindpats,
376 return regexp string and a matcher function."""
401 return regexp string and a matcher function."""
377 try:
402 try:
378 regex = '(?:%s)' % '|'.join([_regex(k, p, globsuffix)
403 regex = '(?:%s)' % '|'.join([_regex(k, p, globsuffix)
379 for (k, p) in kindpats])
404 for (k, p) in kindpats])
380 if len(regex) > 20000:
405 if len(regex) > 20000:
381 raise OverflowError
406 raise OverflowError
382 return regex, _rematcher(regex)
407 return regex, _rematcher(regex)
383 except OverflowError:
408 except OverflowError:
384 # We're using a Python with a tiny regex engine and we
409 # We're using a Python with a tiny regex engine and we
385 # made it explode, so we'll divide the pattern list in two
410 # made it explode, so we'll divide the pattern list in two
386 # until it works
411 # until it works
387 l = len(kindpats)
412 l = len(kindpats)
388 if l < 2:
413 if l < 2:
389 raise
414 raise
390 regexa, a = _buildregexmatch(kindpats[:l//2], globsuffix)
415 regexa, a = _buildregexmatch(kindpats[:l//2], globsuffix)
391 regexb, b = _buildregexmatch(kindpats[l//2:], globsuffix)
416 regexb, b = _buildregexmatch(kindpats[l//2:], globsuffix)
392 return regex, lambda s: a(s) or b(s)
417 return regex, lambda s: a(s) or b(s)
393 except re.error:
418 except re.error:
394 for k, p in kindpats:
419 for k, p in kindpats:
395 try:
420 try:
396 _rematcher('(?:%s)' % _regex(k, p, globsuffix))
421 _rematcher('(?:%s)' % _regex(k, p, globsuffix))
397 except re.error:
422 except re.error:
398 raise util.Abort(_("invalid pattern (%s): %s") % (k, p))
423 raise util.Abort(_("invalid pattern (%s): %s") % (k, p))
399 raise util.Abort(_("invalid pattern"))
424 raise util.Abort(_("invalid pattern"))
400
425
401 def _normalize(patterns, default, root, cwd, auditor):
402 '''Convert 'kind:pat' from the patterns list to tuples with kind and
403 normalized and rooted patterns and with listfiles expanded.'''
404 kindpats = []
405 for kind, pat in [_patsplit(p, default) for p in patterns]:
406 if kind in ('glob', 'relpath'):
407 pat = pathutil.canonpath(root, cwd, pat, auditor)
408 elif kind in ('relglob', 'path'):
409 pat = util.normpath(pat)
410 elif kind in ('listfile', 'listfile0'):
411 try:
412 files = util.readfile(pat)
413 if kind == 'listfile0':
414 files = files.split('\0')
415 else:
416 files = files.splitlines()
417 files = [f for f in files if f]
418 except EnvironmentError:
419 raise util.Abort(_("unable to read file list (%s)") % pat)
420 kindpats += _normalize(files, default, root, cwd, auditor)
421 continue
422 # else: re or relre - which cannot be normalized
423 kindpats.append((kind, pat))
424 return kindpats
425
426 def _roots(kindpats):
426 def _roots(kindpats):
427 '''return roots and exact explicitly listed files from patterns
427 '''return roots and exact explicitly listed files from patterns
428
428
429 >>> _roots([('glob', 'g/*'), ('glob', 'g'), ('glob', 'g*')])
429 >>> _roots([('glob', 'g/*'), ('glob', 'g'), ('glob', 'g*')])
430 ['g', 'g', '.']
430 ['g', 'g', '.']
431 >>> _roots([('relpath', 'r'), ('path', 'p/p'), ('path', '')])
431 >>> _roots([('relpath', 'r'), ('path', 'p/p'), ('path', '')])
432 ['r', 'p/p', '.']
432 ['r', 'p/p', '.']
433 >>> _roots([('relglob', 'rg*'), ('re', 're/'), ('relre', 'rr')])
433 >>> _roots([('relglob', 'rg*'), ('re', 're/'), ('relre', 'rr')])
434 ['.', '.', '.']
434 ['.', '.', '.']
435 '''
435 '''
436 r = []
436 r = []
437 for kind, pat in kindpats:
437 for kind, pat in kindpats:
438 if kind == 'glob': # find the non-glob prefix
438 if kind == 'glob': # find the non-glob prefix
439 root = []
439 root = []
440 for p in pat.split('/'):
440 for p in pat.split('/'):
441 if '[' in p or '{' in p or '*' in p or '?' in p:
441 if '[' in p or '{' in p or '*' in p or '?' in p:
442 break
442 break
443 root.append(p)
443 root.append(p)
444 r.append('/'.join(root) or '.')
444 r.append('/'.join(root) or '.')
445 elif kind in ('relpath', 'path'):
445 elif kind in ('relpath', 'path'):
446 r.append(pat or '.')
446 r.append(pat or '.')
447 else: # relglob, re, relre
447 else: # relglob, re, relre
448 r.append('.')
448 r.append('.')
449 return r
449 return r
450
450
451 def _anypats(kindpats):
451 def _anypats(kindpats):
452 for kind, pat in kindpats:
452 for kind, pat in kindpats:
453 if kind in ('glob', 're', 'relglob', 'relre', 'set'):
453 if kind in ('glob', 're', 'relglob', 'relre', 'set'):
454 return True
454 return True
General Comments 0
You need to be logged in to leave comments. Login now