##// END OF EJS Templates
match: extract base class for matchers...
Martin von Zweigbergk -
r32454:a04bc552 default
parent child Browse files
Show More
@@ -1,800 +1,819
1 1 # match.py - filename matching
2 2 #
3 3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import copy
11 11 import os
12 12 import re
13 13
14 14 from .i18n import _
15 15 from . import (
16 16 error,
17 17 pathutil,
18 18 util,
19 19 )
20 20
21 21 propertycache = util.propertycache
22 22
23 23 def _rematcher(regex):
24 24 '''compile the regexp with the best available regexp engine and return a
25 25 matcher function'''
26 26 m = util.re.compile(regex)
27 27 try:
28 28 # slightly faster, provided by facebook's re2 bindings
29 29 return m.test_match
30 30 except AttributeError:
31 31 return m.match
32 32
33 33 def _expandsets(kindpats, ctx, listsubrepos):
34 34 '''Returns the kindpats list with the 'set' patterns expanded.'''
35 35 fset = set()
36 36 other = []
37 37
38 38 for kind, pat, source in kindpats:
39 39 if kind == 'set':
40 40 if not ctx:
41 41 raise error.ProgrammingError("fileset expression with no "
42 42 "context")
43 43 s = ctx.getfileset(pat)
44 44 fset.update(s)
45 45
46 46 if listsubrepos:
47 47 for subpath in ctx.substate:
48 48 s = ctx.sub(subpath).getfileset(pat)
49 49 fset.update(subpath + '/' + f for f in s)
50 50
51 51 continue
52 52 other.append((kind, pat, source))
53 53 return fset, other
54 54
55 55 def _expandsubinclude(kindpats, root):
56 56 '''Returns the list of subinclude matcher args and the kindpats without the
57 57 subincludes in it.'''
58 58 relmatchers = []
59 59 other = []
60 60
61 61 for kind, pat, source in kindpats:
62 62 if kind == 'subinclude':
63 63 sourceroot = pathutil.dirname(util.normpath(source))
64 64 pat = util.pconvert(pat)
65 65 path = pathutil.join(sourceroot, pat)
66 66
67 67 newroot = pathutil.dirname(path)
68 68 matcherargs = (newroot, '', [], ['include:%s' % path])
69 69
70 70 prefix = pathutil.canonpath(root, root, newroot)
71 71 if prefix:
72 72 prefix += '/'
73 73 relmatchers.append((prefix, matcherargs))
74 74 else:
75 75 other.append((kind, pat, source))
76 76
77 77 return relmatchers, other
78 78
79 79 def _kindpatsalwaysmatch(kindpats):
80 80 """"Checks whether the kindspats match everything, as e.g.
81 81 'relpath:.' does.
82 82 """
83 83 for kind, pat, source in kindpats:
84 84 if pat != '' or kind not in ['relpath', 'glob']:
85 85 return False
86 86 return True
87 87
88 88 def match(root, cwd, patterns, include=None, exclude=None, default='glob',
89 89 exact=False, auditor=None, ctx=None, listsubrepos=False, warn=None,
90 90 badfn=None, icasefs=False):
91 91 """build an object to match a set of file patterns
92 92
93 93 arguments:
94 94 root - the canonical root of the tree you're matching against
95 95 cwd - the current working directory, if relevant
96 96 patterns - patterns to find
97 97 include - patterns to include (unless they are excluded)
98 98 exclude - patterns to exclude (even if they are included)
99 99 default - if a pattern in patterns has no explicit type, assume this one
100 100 exact - patterns are actually filenames (include/exclude still apply)
101 101 warn - optional function used for printing warnings
102 102 badfn - optional bad() callback for this matcher instead of the default
103 103 icasefs - make a matcher for wdir on case insensitive filesystems, which
104 104 normalizes the given patterns to the case in the filesystem
105 105
106 106 a pattern is one of:
107 107 'glob:<glob>' - a glob relative to cwd
108 108 're:<regexp>' - a regular expression
109 109 'path:<path>' - a path relative to repository root, which is matched
110 110 recursively
111 111 'rootfilesin:<path>' - a path relative to repository root, which is
112 112 matched non-recursively (will not match subdirectories)
113 113 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
114 114 'relpath:<path>' - a path relative to cwd
115 115 'relre:<regexp>' - a regexp that needn't match the start of a name
116 116 'set:<fileset>' - a fileset expression
117 117 'include:<path>' - a file of patterns to read and include
118 118 'subinclude:<path>' - a file of patterns to match against files under
119 119 the same directory
120 120 '<something>' - a pattern of the specified default type
121 121 """
122 122 normalize = _donormalize
123 123 if icasefs:
124 124 if exact:
125 125 raise error.ProgrammingError("a case-insensitive exact matcher "
126 126 "doesn't make sense")
127 127 dirstate = ctx.repo().dirstate
128 128 dsnormalize = dirstate.normalize
129 129
130 130 def normalize(patterns, default, root, cwd, auditor, warn):
131 131 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
132 132 kindpats = []
133 133 for kind, pats, source in kp:
134 134 if kind not in ('re', 'relre'): # regex can't be normalized
135 135 p = pats
136 136 pats = dsnormalize(pats)
137 137
138 138 # Preserve the original to handle a case only rename.
139 139 if p != pats and p in dirstate:
140 140 kindpats.append((kind, p, source))
141 141
142 142 kindpats.append((kind, pats, source))
143 143 return kindpats
144 144
145 145 return matcher(root, cwd, normalize, patterns, include=include,
146 146 exclude=exclude, default=default, exact=exact,
147 147 auditor=auditor, ctx=ctx, listsubrepos=listsubrepos,
148 148 warn=warn, badfn=badfn)
149 149
150 150 def exact(root, cwd, files, badfn=None):
151 151 return match(root, cwd, files, exact=True, badfn=badfn)
152 152
153 153 def always(root, cwd):
154 154 return match(root, cwd, [])
155 155
156 156 def badmatch(match, badfn):
157 157 """Make a copy of the given matcher, replacing its bad method with the given
158 158 one.
159 159 """
160 160 m = copy.copy(match)
161 161 m.bad = badfn
162 162 return m
163 163
164 164 def _donormalize(patterns, default, root, cwd, auditor, warn):
165 165 '''Convert 'kind:pat' from the patterns list to tuples with kind and
166 166 normalized and rooted patterns and with listfiles expanded.'''
167 167 kindpats = []
168 168 for kind, pat in [_patsplit(p, default) for p in patterns]:
169 169 if kind in ('glob', 'relpath'):
170 170 pat = pathutil.canonpath(root, cwd, pat, auditor)
171 171 elif kind in ('relglob', 'path', 'rootfilesin'):
172 172 pat = util.normpath(pat)
173 173 elif kind in ('listfile', 'listfile0'):
174 174 try:
175 175 files = util.readfile(pat)
176 176 if kind == 'listfile0':
177 177 files = files.split('\0')
178 178 else:
179 179 files = files.splitlines()
180 180 files = [f for f in files if f]
181 181 except EnvironmentError:
182 182 raise error.Abort(_("unable to read file list (%s)") % pat)
183 183 for k, p, source in _donormalize(files, default, root, cwd,
184 184 auditor, warn):
185 185 kindpats.append((k, p, pat))
186 186 continue
187 187 elif kind == 'include':
188 188 try:
189 189 fullpath = os.path.join(root, util.localpath(pat))
190 190 includepats = readpatternfile(fullpath, warn)
191 191 for k, p, source in _donormalize(includepats, default,
192 192 root, cwd, auditor, warn):
193 193 kindpats.append((k, p, source or pat))
194 194 except error.Abort as inst:
195 195 raise error.Abort('%s: %s' % (pat, inst[0]))
196 196 except IOError as inst:
197 197 if warn:
198 198 warn(_("skipping unreadable pattern file '%s': %s\n") %
199 199 (pat, inst.strerror))
200 200 continue
201 201 # else: re or relre - which cannot be normalized
202 202 kindpats.append((kind, pat, ''))
203 203 return kindpats
204 204
205 class matcher(object):
205 class basematcher(object):
206
207 def __init__(self, root, cwd, badfn=None):
208 self._root = root
209 self._cwd = cwd
210 if badfn is not None:
211 self.bad = badfn
212 self._files = [] # exact files and roots of patterns
213 self.matchfn = lambda f: False
214
215 def __call__(self, fn):
216 return self.matchfn(fn)
217 def __iter__(self):
218 for f in self._files:
219 yield f
220 # Callbacks related to how the matcher is used by dirstate.walk.
221 # Subscribers to these events must monkeypatch the matcher object.
222 def bad(self, f, msg):
223 '''Callback from dirstate.walk for each explicit file that can't be
224 found/accessed, with an error message.'''
225 pass
226
227 # If an explicitdir is set, it will be called when an explicitly listed
228 # directory is visited.
229 explicitdir = None
230
231 # If an traversedir is set, it will be called when a directory discovered
232 # by recursive traversal is visited.
233 traversedir = None
234
235 def abs(self, f):
236 '''Convert a repo path back to path that is relative to the root of the
237 matcher.'''
238 return f
239
240 def rel(self, f):
241 '''Convert repo path back to path that is relative to cwd of matcher.'''
242 return util.pathto(self._root, self._cwd, f)
243
244 def uipath(self, f):
245 '''Convert repo path to a display path. If patterns or -I/-X were used
246 to create this matcher, the display path will be relative to cwd.
247 Otherwise it is relative to the root of the repo.'''
248 return self.rel(f)
249
250 def files(self):
251 '''Explicitly listed files or patterns or roots:
252 if no patterns or .always(): empty list,
253 if exact: list exact files,
254 if not .anypats(): list all files and dirs,
255 else: optimal roots'''
256 return self._files
257
258 @propertycache
259 def _fileset(self):
260 return set(self._files)
261
262 def exact(self, f):
263 '''Returns True if f is in .files().'''
264 return f in self._fileset
265
266 def visitdir(self, dir):
267 '''Decides whether a directory should be visited based on whether it
268 has potential matches in it or one of its subdirectories. This is
269 based on the match's primary, included, and excluded patterns.
270
271 Returns the string 'all' if the given directory and all subdirectories
272 should be visited. Otherwise returns True or False indicating whether
273 the given directory should be visited.
274
275 This function's behavior is undefined if it has returned False for
276 one of the dir's parent directories.
277 '''
278 return False
279
280 def anypats(self):
281 '''Matcher uses patterns or include/exclude.'''
282 return False
283
284 def always(self):
285 '''Matcher will match everything and .files() will be empty
286 - optimization might be possible and necessary.'''
287 return False
288
289 def isexact(self):
290 return False
291
292 def prefix(self):
293 return not self.always() and not self.isexact() and not self.anypats()
294
295 class matcher(basematcher):
206 296
207 297 def __init__(self, root, cwd, normalize, patterns, include=None,
208 298 exclude=None, default='glob', exact=False, auditor=None,
209 299 ctx=None, listsubrepos=False, warn=None, badfn=None):
300 super(matcher, self).__init__(root, cwd, badfn)
210 301 if include is None:
211 302 include = []
212 303 if exclude is None:
213 304 exclude = []
214 305
215 self._root = root
216 self._cwd = cwd
217 self._files = [] # exact files and roots of patterns
218 306 self._anypats = bool(include or exclude)
219 307 self._always = False
220 308 self._pathrestricted = bool(include or exclude or patterns)
221 309 self.patternspat = None
222 310 self.includepat = None
223 311 self.excludepat = None
224 312
225 313 # roots are directories which are recursively included/excluded.
226 314 self._includeroots = set()
227 315 self._excluderoots = set()
228 316 # dirs are directories which are non-recursively included.
229 317 self._includedirs = set()
230 318
231 if badfn is not None:
232 self.bad = badfn
233
234 319 matchfns = []
235 320 if include:
236 321 kindpats = normalize(include, 'glob', root, cwd, auditor, warn)
237 322 self.includepat, im = _buildmatch(ctx, kindpats, '(?:/|$)',
238 323 listsubrepos, root)
239 324 roots, dirs = _rootsanddirs(kindpats)
240 325 self._includeroots.update(roots)
241 326 self._includedirs.update(dirs)
242 327 matchfns.append(im)
243 328 if exclude:
244 329 kindpats = normalize(exclude, 'glob', root, cwd, auditor, warn)
245 330 self.excludepat, em = _buildmatch(ctx, kindpats, '(?:/|$)',
246 331 listsubrepos, root)
247 332 if not _anypats(kindpats):
248 333 # Only consider recursive excludes as such - if a non-recursive
249 334 # exclude is used, we must still recurse into the excluded
250 335 # directory, at least to find subdirectories. In such a case,
251 336 # the regex still won't match the non-recursively-excluded
252 337 # files.
253 338 self._excluderoots.update(_roots(kindpats))
254 339 matchfns.append(lambda f: not em(f))
255 340 if exact:
256 341 if isinstance(patterns, list):
257 342 self._files = patterns
258 343 else:
259 344 self._files = list(patterns)
260 345 matchfns.append(self.exact)
261 346 elif patterns:
262 347 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
263 348 if not _kindpatsalwaysmatch(kindpats):
264 349 self._files = _explicitfiles(kindpats)
265 350 self._anypats = self._anypats or _anypats(kindpats)
266 351 self.patternspat, pm = _buildmatch(ctx, kindpats, '$',
267 352 listsubrepos, root)
268 353 matchfns.append(pm)
269 354
270 355 if not matchfns:
271 356 m = util.always
272 357 self._always = True
273 358 elif len(matchfns) == 1:
274 359 m = matchfns[0]
275 360 else:
276 361 def m(f):
277 362 for matchfn in matchfns:
278 363 if not matchfn(f):
279 364 return False
280 365 return True
281 366
282 367 self.matchfn = m
283 368
284 def __call__(self, fn):
285 return self.matchfn(fn)
286 def __iter__(self):
287 for f in self._files:
288 yield f
289
290 # Callbacks related to how the matcher is used by dirstate.walk.
291 # Subscribers to these events must monkeypatch the matcher object.
292 def bad(self, f, msg):
293 '''Callback from dirstate.walk for each explicit file that can't be
294 found/accessed, with an error message.'''
295 pass
296
297 # If an explicitdir is set, it will be called when an explicitly listed
298 # directory is visited.
299 explicitdir = None
300
301 # If an traversedir is set, it will be called when a directory discovered
302 # by recursive traversal is visited.
303 traversedir = None
304
305 def abs(self, f):
306 '''Convert a repo path back to path that is relative to the root of the
307 matcher.'''
308 return f
309
310 def rel(self, f):
311 '''Convert repo path back to path that is relative to cwd of matcher.'''
312 return util.pathto(self._root, self._cwd, f)
313
314 369 def uipath(self, f):
315 '''Convert repo path to a display path. If patterns or -I/-X were used
316 to create this matcher, the display path will be relative to cwd.
317 Otherwise it is relative to the root of the repo.'''
318 370 return (self._pathrestricted and self.rel(f)) or self.abs(f)
319 371
320 def files(self):
321 '''Explicitly listed files or patterns or roots:
322 if no patterns or .always(): empty list,
323 if exact: list exact files,
324 if not .anypats(): list all files and dirs,
325 else: optimal roots'''
326 return self._files
327
328 @propertycache
329 def _fileset(self):
330 return set(self._files)
331
332 372 @propertycache
333 373 def _dirs(self):
334 374 return set(util.dirs(self._fileset)) | {'.'}
335 375
336 376 def visitdir(self, dir):
337 '''Decides whether a directory should be visited based on whether it
338 has potential matches in it or one of its subdirectories. This is
339 based on the match's primary, included, and excluded patterns.
340
341 Returns the string 'all' if the given directory and all subdirectories
342 should be visited. Otherwise returns True or False indicating whether
343 the given directory should be visited.
344
345 This function's behavior is undefined if it has returned False for
346 one of the dir's parent directories.
347 '''
348 377 if self.prefix() and dir in self._fileset:
349 378 return 'all'
350 379 if dir in self._excluderoots:
351 380 return False
352 381 if ((self._includeroots or self._includedirs) and
353 382 '.' not in self._includeroots and
354 383 dir not in self._includeroots and
355 384 dir not in self._includedirs and
356 385 not any(parent in self._includeroots
357 386 for parent in util.finddirs(dir))):
358 387 return False
359 388 return (not self._fileset or
360 389 '.' in self._fileset or
361 390 dir in self._fileset or
362 391 dir in self._dirs or
363 392 any(parentdir in self._fileset
364 393 for parentdir in util.finddirs(dir)))
365 394
366 def exact(self, f):
367 '''Returns True if f is in .files().'''
368 return f in self._fileset
369
370 395 def anypats(self):
371 '''Matcher uses patterns or include/exclude.'''
372 396 return self._anypats
373 397
374 398 def always(self):
375 '''Matcher will match everything and .files() will be empty
376 - optimization might be possible and necessary.'''
377 399 return self._always
378 400
379 401 def isexact(self):
380 402 return self.matchfn == self.exact
381 403
382 def prefix(self):
383 return not self.always() and not self.isexact() and not self.anypats()
384
385 404 def __repr__(self):
386 405 return ('<matcher files=%r, patterns=%r, includes=%r, excludes=%r>' %
387 406 (self._files, self.patternspat, self.includepat,
388 407 self.excludepat))
389 408
390 409 class subdirmatcher(matcher):
391 410 """Adapt a matcher to work on a subdirectory only.
392 411
393 412 The paths are remapped to remove/insert the path as needed:
394 413
395 414 >>> m1 = match('root', '', ['a.txt', 'sub/b.txt'])
396 415 >>> m2 = subdirmatcher('sub', m1)
397 416 >>> bool(m2('a.txt'))
398 417 False
399 418 >>> bool(m2('b.txt'))
400 419 True
401 420 >>> bool(m2.matchfn('a.txt'))
402 421 False
403 422 >>> bool(m2.matchfn('b.txt'))
404 423 True
405 424 >>> m2.files()
406 425 ['b.txt']
407 426 >>> m2.exact('b.txt')
408 427 True
409 428 >>> util.pconvert(m2.rel('b.txt'))
410 429 'sub/b.txt'
411 430 >>> def bad(f, msg):
412 431 ... print "%s: %s" % (f, msg)
413 432 >>> m1.bad = bad
414 433 >>> m2.bad('x.txt', 'No such file')
415 434 sub/x.txt: No such file
416 435 >>> m2.abs('c.txt')
417 436 'sub/c.txt'
418 437 """
419 438
420 439 def __init__(self, path, matcher):
421 440 self._root = matcher._root
422 441 self._cwd = matcher._cwd
423 442 self._path = path
424 443 self._matcher = matcher
425 444 self._always = matcher._always
426 445
427 446 self._files = [f[len(path) + 1:] for f in matcher._files
428 447 if f.startswith(path + "/")]
429 448
430 449 # If the parent repo had a path to this subrepo and the matcher is
431 450 # a prefix matcher, this submatcher always matches.
432 451 if matcher.prefix():
433 452 self._always = any(f == path for f in matcher._files)
434 453
435 454 self._anypats = matcher._anypats
436 455 # Some information is lost in the superclass's constructor, so we
437 456 # can not accurately create the matching function for the subdirectory
438 457 # from the inputs. Instead, we override matchfn() and visitdir() to
439 458 # call the original matcher with the subdirectory path prepended.
440 459 self.matchfn = lambda fn: matcher.matchfn(self._path + "/" + fn)
441 460
442 461 def bad(self, f, msg):
443 462 self._matcher.bad(self._path + "/" + f, msg)
444 463
445 464 def abs(self, f):
446 465 return self._matcher.abs(self._path + "/" + f)
447 466
448 467 def rel(self, f):
449 468 return self._matcher.rel(self._path + "/" + f)
450 469
451 470 def uipath(self, f):
452 471 return self._matcher.uipath(self._path + "/" + f)
453 472
454 473 def visitdir(self, dir):
455 474 if dir == '.':
456 475 dir = self._path
457 476 else:
458 477 dir = self._path + "/" + dir
459 478 return self._matcher.visitdir(dir)
460 479
461 480 def patkind(pattern, default=None):
462 481 '''If pattern is 'kind:pat' with a known kind, return kind.'''
463 482 return _patsplit(pattern, default)[0]
464 483
465 484 def _patsplit(pattern, default):
466 485 """Split a string into the optional pattern kind prefix and the actual
467 486 pattern."""
468 487 if ':' in pattern:
469 488 kind, pat = pattern.split(':', 1)
470 489 if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
471 490 'listfile', 'listfile0', 'set', 'include', 'subinclude',
472 491 'rootfilesin'):
473 492 return kind, pat
474 493 return default, pattern
475 494
476 495 def _globre(pat):
477 496 r'''Convert an extended glob string to a regexp string.
478 497
479 498 >>> print _globre(r'?')
480 499 .
481 500 >>> print _globre(r'*')
482 501 [^/]*
483 502 >>> print _globre(r'**')
484 503 .*
485 504 >>> print _globre(r'**/a')
486 505 (?:.*/)?a
487 506 >>> print _globre(r'a/**/b')
488 507 a\/(?:.*/)?b
489 508 >>> print _globre(r'[a*?!^][^b][!c]')
490 509 [a*?!^][\^b][^c]
491 510 >>> print _globre(r'{a,b}')
492 511 (?:a|b)
493 512 >>> print _globre(r'.\*\?')
494 513 \.\*\?
495 514 '''
496 515 i, n = 0, len(pat)
497 516 res = ''
498 517 group = 0
499 518 escape = util.re.escape
500 519 def peek():
501 520 return i < n and pat[i:i + 1]
502 521 while i < n:
503 522 c = pat[i:i + 1]
504 523 i += 1
505 524 if c not in '*?[{},\\':
506 525 res += escape(c)
507 526 elif c == '*':
508 527 if peek() == '*':
509 528 i += 1
510 529 if peek() == '/':
511 530 i += 1
512 531 res += '(?:.*/)?'
513 532 else:
514 533 res += '.*'
515 534 else:
516 535 res += '[^/]*'
517 536 elif c == '?':
518 537 res += '.'
519 538 elif c == '[':
520 539 j = i
521 540 if j < n and pat[j:j + 1] in '!]':
522 541 j += 1
523 542 while j < n and pat[j:j + 1] != ']':
524 543 j += 1
525 544 if j >= n:
526 545 res += '\\['
527 546 else:
528 547 stuff = pat[i:j].replace('\\','\\\\')
529 548 i = j + 1
530 549 if stuff[0:1] == '!':
531 550 stuff = '^' + stuff[1:]
532 551 elif stuff[0:1] == '^':
533 552 stuff = '\\' + stuff
534 553 res = '%s[%s]' % (res, stuff)
535 554 elif c == '{':
536 555 group += 1
537 556 res += '(?:'
538 557 elif c == '}' and group:
539 558 res += ')'
540 559 group -= 1
541 560 elif c == ',' and group:
542 561 res += '|'
543 562 elif c == '\\':
544 563 p = peek()
545 564 if p:
546 565 i += 1
547 566 res += escape(p)
548 567 else:
549 568 res += escape(c)
550 569 else:
551 570 res += escape(c)
552 571 return res
553 572
554 573 def _regex(kind, pat, globsuffix):
555 574 '''Convert a (normalized) pattern of any kind into a regular expression.
556 575 globsuffix is appended to the regexp of globs.'''
557 576 if not pat:
558 577 return ''
559 578 if kind == 're':
560 579 return pat
561 580 if kind == 'path':
562 581 if pat == '.':
563 582 return ''
564 583 return '^' + util.re.escape(pat) + '(?:/|$)'
565 584 if kind == 'rootfilesin':
566 585 if pat == '.':
567 586 escaped = ''
568 587 else:
569 588 # Pattern is a directory name.
570 589 escaped = util.re.escape(pat) + '/'
571 590 # Anything after the pattern must be a non-directory.
572 591 return '^' + escaped + '[^/]+$'
573 592 if kind == 'relglob':
574 593 return '(?:|.*/)' + _globre(pat) + globsuffix
575 594 if kind == 'relpath':
576 595 return util.re.escape(pat) + '(?:/|$)'
577 596 if kind == 'relre':
578 597 if pat.startswith('^'):
579 598 return pat
580 599 return '.*' + pat
581 600 return _globre(pat) + globsuffix
582 601
583 602 def _buildmatch(ctx, kindpats, globsuffix, listsubrepos, root):
584 603 '''Return regexp string and a matcher function for kindpats.
585 604 globsuffix is appended to the regexp of globs.'''
586 605 matchfuncs = []
587 606
588 607 subincludes, kindpats = _expandsubinclude(kindpats, root)
589 608 if subincludes:
590 609 submatchers = {}
591 610 def matchsubinclude(f):
592 611 for prefix, matcherargs in subincludes:
593 612 if f.startswith(prefix):
594 613 mf = submatchers.get(prefix)
595 614 if mf is None:
596 615 mf = match(*matcherargs)
597 616 submatchers[prefix] = mf
598 617
599 618 if mf(f[len(prefix):]):
600 619 return True
601 620 return False
602 621 matchfuncs.append(matchsubinclude)
603 622
604 623 fset, kindpats = _expandsets(kindpats, ctx, listsubrepos)
605 624 if fset:
606 625 matchfuncs.append(fset.__contains__)
607 626
608 627 regex = ''
609 628 if kindpats:
610 629 regex, mf = _buildregexmatch(kindpats, globsuffix)
611 630 matchfuncs.append(mf)
612 631
613 632 if len(matchfuncs) == 1:
614 633 return regex, matchfuncs[0]
615 634 else:
616 635 return regex, lambda f: any(mf(f) for mf in matchfuncs)
617 636
618 637 def _buildregexmatch(kindpats, globsuffix):
619 638 """Build a match function from a list of kinds and kindpats,
620 639 return regexp string and a matcher function."""
621 640 try:
622 641 regex = '(?:%s)' % '|'.join([_regex(k, p, globsuffix)
623 642 for (k, p, s) in kindpats])
624 643 if len(regex) > 20000:
625 644 raise OverflowError
626 645 return regex, _rematcher(regex)
627 646 except OverflowError:
628 647 # We're using a Python with a tiny regex engine and we
629 648 # made it explode, so we'll divide the pattern list in two
630 649 # until it works
631 650 l = len(kindpats)
632 651 if l < 2:
633 652 raise
634 653 regexa, a = _buildregexmatch(kindpats[:l//2], globsuffix)
635 654 regexb, b = _buildregexmatch(kindpats[l//2:], globsuffix)
636 655 return regex, lambda s: a(s) or b(s)
637 656 except re.error:
638 657 for k, p, s in kindpats:
639 658 try:
640 659 _rematcher('(?:%s)' % _regex(k, p, globsuffix))
641 660 except re.error:
642 661 if s:
643 662 raise error.Abort(_("%s: invalid pattern (%s): %s") %
644 663 (s, k, p))
645 664 else:
646 665 raise error.Abort(_("invalid pattern (%s): %s") % (k, p))
647 666 raise error.Abort(_("invalid pattern"))
648 667
649 668 def _patternrootsanddirs(kindpats):
650 669 '''Returns roots and directories corresponding to each pattern.
651 670
652 671 This calculates the roots and directories exactly matching the patterns and
653 672 returns a tuple of (roots, dirs) for each. It does not return other
654 673 directories which may also need to be considered, like the parent
655 674 directories.
656 675 '''
657 676 r = []
658 677 d = []
659 678 for kind, pat, source in kindpats:
660 679 if kind == 'glob': # find the non-glob prefix
661 680 root = []
662 681 for p in pat.split('/'):
663 682 if '[' in p or '{' in p or '*' in p or '?' in p:
664 683 break
665 684 root.append(p)
666 685 r.append('/'.join(root) or '.')
667 686 elif kind in ('relpath', 'path'):
668 687 r.append(pat or '.')
669 688 elif kind in ('rootfilesin',):
670 689 d.append(pat or '.')
671 690 else: # relglob, re, relre
672 691 r.append('.')
673 692 return r, d
674 693
675 694 def _roots(kindpats):
676 695 '''Returns root directories to match recursively from the given patterns.'''
677 696 roots, dirs = _patternrootsanddirs(kindpats)
678 697 return roots
679 698
680 699 def _rootsanddirs(kindpats):
681 700 '''Returns roots and exact directories from patterns.
682 701
683 702 roots are directories to match recursively, whereas exact directories should
684 703 be matched non-recursively. The returned (roots, dirs) tuple will also
685 704 include directories that need to be implicitly considered as either, such as
686 705 parent directories.
687 706
688 707 >>> _rootsanddirs(\
689 708 [('glob', 'g/h/*', ''), ('glob', 'g/h', ''), ('glob', 'g*', '')])
690 709 (['g/h', 'g/h', '.'], ['g', '.'])
691 710 >>> _rootsanddirs(\
692 711 [('rootfilesin', 'g/h', ''), ('rootfilesin', '', '')])
693 712 ([], ['g/h', '.', 'g', '.'])
694 713 >>> _rootsanddirs(\
695 714 [('relpath', 'r', ''), ('path', 'p/p', ''), ('path', '', '')])
696 715 (['r', 'p/p', '.'], ['p', '.'])
697 716 >>> _rootsanddirs(\
698 717 [('relglob', 'rg*', ''), ('re', 're/', ''), ('relre', 'rr', '')])
699 718 (['.', '.', '.'], ['.'])
700 719 '''
701 720 r, d = _patternrootsanddirs(kindpats)
702 721
703 722 # Append the parents as non-recursive/exact directories, since they must be
704 723 # scanned to get to either the roots or the other exact directories.
705 724 d.extend(util.dirs(d))
706 725 d.extend(util.dirs(r))
707 726 # util.dirs() does not include the root directory, so add it manually
708 727 d.append('.')
709 728
710 729 return r, d
711 730
712 731 def _explicitfiles(kindpats):
713 732 '''Returns the potential explicit filenames from the patterns.
714 733
715 734 >>> _explicitfiles([('path', 'foo/bar', '')])
716 735 ['foo/bar']
717 736 >>> _explicitfiles([('rootfilesin', 'foo/bar', '')])
718 737 []
719 738 '''
720 739 # Keep only the pattern kinds where one can specify filenames (vs only
721 740 # directory names).
722 741 filable = [kp for kp in kindpats if kp[0] not in ('rootfilesin',)]
723 742 return _roots(filable)
724 743
725 744 def _anypats(kindpats):
726 745 for kind, pat, source in kindpats:
727 746 if kind in ('glob', 're', 'relglob', 'relre', 'set', 'rootfilesin'):
728 747 return True
729 748
730 749 _commentre = None
731 750
732 751 def readpatternfile(filepath, warn, sourceinfo=False):
733 752 '''parse a pattern file, returning a list of
734 753 patterns. These patterns should be given to compile()
735 754 to be validated and converted into a match function.
736 755
737 756 trailing white space is dropped.
738 757 the escape character is backslash.
739 758 comments start with #.
740 759 empty lines are skipped.
741 760
742 761 lines can be of the following formats:
743 762
744 763 syntax: regexp # defaults following lines to non-rooted regexps
745 764 syntax: glob # defaults following lines to non-rooted globs
746 765 re:pattern # non-rooted regular expression
747 766 glob:pattern # non-rooted glob
748 767 pattern # pattern of the current default type
749 768
750 769 if sourceinfo is set, returns a list of tuples:
751 770 (pattern, lineno, originalline). This is useful to debug ignore patterns.
752 771 '''
753 772
754 773 syntaxes = {'re': 'relre:', 'regexp': 'relre:', 'glob': 'relglob:',
755 774 'include': 'include', 'subinclude': 'subinclude'}
756 775 syntax = 'relre:'
757 776 patterns = []
758 777
759 778 fp = open(filepath, 'rb')
760 779 for lineno, line in enumerate(util.iterfile(fp), start=1):
761 780 if "#" in line:
762 781 global _commentre
763 782 if not _commentre:
764 783 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
765 784 # remove comments prefixed by an even number of escapes
766 785 m = _commentre.search(line)
767 786 if m:
768 787 line = line[:m.end(1)]
769 788 # fixup properly escaped comments that survived the above
770 789 line = line.replace("\\#", "#")
771 790 line = line.rstrip()
772 791 if not line:
773 792 continue
774 793
775 794 if line.startswith('syntax:'):
776 795 s = line[7:].strip()
777 796 try:
778 797 syntax = syntaxes[s]
779 798 except KeyError:
780 799 if warn:
781 800 warn(_("%s: ignoring invalid syntax '%s'\n") %
782 801 (filepath, s))
783 802 continue
784 803
785 804 linesyntax = syntax
786 805 for s, rels in syntaxes.iteritems():
787 806 if line.startswith(rels):
788 807 linesyntax = rels
789 808 line = line[len(rels):]
790 809 break
791 810 elif line.startswith(s+':'):
792 811 linesyntax = rels
793 812 line = line[len(s) + 1:]
794 813 break
795 814 if sourceinfo:
796 815 patterns.append((linesyntax + line, lineno, line))
797 816 else:
798 817 patterns.append(linesyntax + line)
799 818 fp.close()
800 819 return patterns
General Comments 0
You need to be logged in to leave comments. Login now