##// END OF EJS Templates
match: make matchfn a method on the class...
Martin von Zweigbergk -
r32463:43e09184 default
parent child Browse files
Show More
@@ -1,833 +1,835 b''
1 1 # match.py - filename matching
2 2 #
3 3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import copy
11 11 import os
12 12 import re
13 13
14 14 from .i18n import _
15 15 from . import (
16 16 error,
17 17 pathutil,
18 18 util,
19 19 )
20 20
21 21 propertycache = util.propertycache
22 22
23 23 def _rematcher(regex):
24 24 '''compile the regexp with the best available regexp engine and return a
25 25 matcher function'''
26 26 m = util.re.compile(regex)
27 27 try:
28 28 # slightly faster, provided by facebook's re2 bindings
29 29 return m.test_match
30 30 except AttributeError:
31 31 return m.match
32 32
33 33 def _expandsets(kindpats, ctx, listsubrepos):
34 34 '''Returns the kindpats list with the 'set' patterns expanded.'''
35 35 fset = set()
36 36 other = []
37 37
38 38 for kind, pat, source in kindpats:
39 39 if kind == 'set':
40 40 if not ctx:
41 41 raise error.ProgrammingError("fileset expression with no "
42 42 "context")
43 43 s = ctx.getfileset(pat)
44 44 fset.update(s)
45 45
46 46 if listsubrepos:
47 47 for subpath in ctx.substate:
48 48 s = ctx.sub(subpath).getfileset(pat)
49 49 fset.update(subpath + '/' + f for f in s)
50 50
51 51 continue
52 52 other.append((kind, pat, source))
53 53 return fset, other
54 54
55 55 def _expandsubinclude(kindpats, root):
56 56 '''Returns the list of subinclude matcher args and the kindpats without the
57 57 subincludes in it.'''
58 58 relmatchers = []
59 59 other = []
60 60
61 61 for kind, pat, source in kindpats:
62 62 if kind == 'subinclude':
63 63 sourceroot = pathutil.dirname(util.normpath(source))
64 64 pat = util.pconvert(pat)
65 65 path = pathutil.join(sourceroot, pat)
66 66
67 67 newroot = pathutil.dirname(path)
68 68 matcherargs = (newroot, '', [], ['include:%s' % path])
69 69
70 70 prefix = pathutil.canonpath(root, root, newroot)
71 71 if prefix:
72 72 prefix += '/'
73 73 relmatchers.append((prefix, matcherargs))
74 74 else:
75 75 other.append((kind, pat, source))
76 76
77 77 return relmatchers, other
78 78
79 79 def _kindpatsalwaysmatch(kindpats):
80 80 """"Checks whether the kindspats match everything, as e.g.
81 81 'relpath:.' does.
82 82 """
83 83 for kind, pat, source in kindpats:
84 84 if pat != '' or kind not in ['relpath', 'glob']:
85 85 return False
86 86 return True
87 87
88 88 def match(root, cwd, patterns, include=None, exclude=None, default='glob',
89 89 exact=False, auditor=None, ctx=None, listsubrepos=False, warn=None,
90 90 badfn=None, icasefs=False):
91 91 """build an object to match a set of file patterns
92 92
93 93 arguments:
94 94 root - the canonical root of the tree you're matching against
95 95 cwd - the current working directory, if relevant
96 96 patterns - patterns to find
97 97 include - patterns to include (unless they are excluded)
98 98 exclude - patterns to exclude (even if they are included)
99 99 default - if a pattern in patterns has no explicit type, assume this one
100 100 exact - patterns are actually filenames (include/exclude still apply)
101 101 warn - optional function used for printing warnings
102 102 badfn - optional bad() callback for this matcher instead of the default
103 103 icasefs - make a matcher for wdir on case insensitive filesystems, which
104 104 normalizes the given patterns to the case in the filesystem
105 105
106 106 a pattern is one of:
107 107 'glob:<glob>' - a glob relative to cwd
108 108 're:<regexp>' - a regular expression
109 109 'path:<path>' - a path relative to repository root, which is matched
110 110 recursively
111 111 'rootfilesin:<path>' - a path relative to repository root, which is
112 112 matched non-recursively (will not match subdirectories)
113 113 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
114 114 'relpath:<path>' - a path relative to cwd
115 115 'relre:<regexp>' - a regexp that needn't match the start of a name
116 116 'set:<fileset>' - a fileset expression
117 117 'include:<path>' - a file of patterns to read and include
118 118 'subinclude:<path>' - a file of patterns to match against files under
119 119 the same directory
120 120 '<something>' - a pattern of the specified default type
121 121 """
122 122 normalize = _donormalize
123 123 if icasefs:
124 124 if exact:
125 125 raise error.ProgrammingError("a case-insensitive exact matcher "
126 126 "doesn't make sense")
127 127 dirstate = ctx.repo().dirstate
128 128 dsnormalize = dirstate.normalize
129 129
130 130 def normalize(patterns, default, root, cwd, auditor, warn):
131 131 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
132 132 kindpats = []
133 133 for kind, pats, source in kp:
134 134 if kind not in ('re', 'relre'): # regex can't be normalized
135 135 p = pats
136 136 pats = dsnormalize(pats)
137 137
138 138 # Preserve the original to handle a case only rename.
139 139 if p != pats and p in dirstate:
140 140 kindpats.append((kind, p, source))
141 141
142 142 kindpats.append((kind, pats, source))
143 143 return kindpats
144 144
145 145 return matcher(root, cwd, normalize, patterns, include=include,
146 146 exclude=exclude, default=default, exact=exact,
147 147 auditor=auditor, ctx=ctx, listsubrepos=listsubrepos,
148 148 warn=warn, badfn=badfn)
149 149
150 150 def exact(root, cwd, files, badfn=None):
151 151 return match(root, cwd, files, exact=True, badfn=badfn)
152 152
153 153 def always(root, cwd):
154 154 return match(root, cwd, [])
155 155
156 156 def badmatch(match, badfn):
157 157 """Make a copy of the given matcher, replacing its bad method with the given
158 158 one.
159 159 """
160 160 m = copy.copy(match)
161 161 m.bad = badfn
162 162 return m
163 163
164 164 def _donormalize(patterns, default, root, cwd, auditor, warn):
165 165 '''Convert 'kind:pat' from the patterns list to tuples with kind and
166 166 normalized and rooted patterns and with listfiles expanded.'''
167 167 kindpats = []
168 168 for kind, pat in [_patsplit(p, default) for p in patterns]:
169 169 if kind in ('glob', 'relpath'):
170 170 pat = pathutil.canonpath(root, cwd, pat, auditor)
171 171 elif kind in ('relglob', 'path', 'rootfilesin'):
172 172 pat = util.normpath(pat)
173 173 elif kind in ('listfile', 'listfile0'):
174 174 try:
175 175 files = util.readfile(pat)
176 176 if kind == 'listfile0':
177 177 files = files.split('\0')
178 178 else:
179 179 files = files.splitlines()
180 180 files = [f for f in files if f]
181 181 except EnvironmentError:
182 182 raise error.Abort(_("unable to read file list (%s)") % pat)
183 183 for k, p, source in _donormalize(files, default, root, cwd,
184 184 auditor, warn):
185 185 kindpats.append((k, p, pat))
186 186 continue
187 187 elif kind == 'include':
188 188 try:
189 189 fullpath = os.path.join(root, util.localpath(pat))
190 190 includepats = readpatternfile(fullpath, warn)
191 191 for k, p, source in _donormalize(includepats, default,
192 192 root, cwd, auditor, warn):
193 193 kindpats.append((k, p, source or pat))
194 194 except error.Abort as inst:
195 195 raise error.Abort('%s: %s' % (pat, inst[0]))
196 196 except IOError as inst:
197 197 if warn:
198 198 warn(_("skipping unreadable pattern file '%s': %s\n") %
199 199 (pat, inst.strerror))
200 200 continue
201 201 # else: re or relre - which cannot be normalized
202 202 kindpats.append((kind, pat, ''))
203 203 return kindpats
204 204
205 205 class basematcher(object):
206 206
207 207 def __init__(self, root, cwd, badfn=None):
208 208 self._root = root
209 209 self._cwd = cwd
210 210 if badfn is not None:
211 211 self.bad = badfn
212 self.matchfn = lambda f: False
213 212
214 213 def __call__(self, fn):
215 214 return self.matchfn(fn)
216 215 def __iter__(self):
217 216 for f in self._files:
218 217 yield f
219 218 # Callbacks related to how the matcher is used by dirstate.walk.
220 219 # Subscribers to these events must monkeypatch the matcher object.
221 220 def bad(self, f, msg):
222 221 '''Callback from dirstate.walk for each explicit file that can't be
223 222 found/accessed, with an error message.'''
224 223 pass
225 224
226 225 # If an explicitdir is set, it will be called when an explicitly listed
227 226 # directory is visited.
228 227 explicitdir = None
229 228
230 229 # If an traversedir is set, it will be called when a directory discovered
231 230 # by recursive traversal is visited.
232 231 traversedir = None
233 232
234 233 def abs(self, f):
235 234 '''Convert a repo path back to path that is relative to the root of the
236 235 matcher.'''
237 236 return f
238 237
239 238 def rel(self, f):
240 239 '''Convert repo path back to path that is relative to cwd of matcher.'''
241 240 return util.pathto(self._root, self._cwd, f)
242 241
243 242 def uipath(self, f):
244 243 '''Convert repo path to a display path. If patterns or -I/-X were used
245 244 to create this matcher, the display path will be relative to cwd.
246 245 Otherwise it is relative to the root of the repo.'''
247 246 return self.rel(f)
248 247
249 248 @propertycache
250 249 def _files(self):
251 250 return []
252 251
253 252 def files(self):
254 253 '''Explicitly listed files or patterns or roots:
255 254 if no patterns or .always(): empty list,
256 255 if exact: list exact files,
257 256 if not .anypats(): list all files and dirs,
258 257 else: optimal roots'''
259 258 return self._files
260 259
261 260 @propertycache
262 261 def _fileset(self):
263 262 return set(self._files)
264 263
265 264 def exact(self, f):
266 265 '''Returns True if f is in .files().'''
267 266 return f in self._fileset
268 267
268 def matchfn(self, f):
269 return False
270
269 271 def visitdir(self, dir):
270 272 '''Decides whether a directory should be visited based on whether it
271 273 has potential matches in it or one of its subdirectories. This is
272 274 based on the match's primary, included, and excluded patterns.
273 275
274 276 Returns the string 'all' if the given directory and all subdirectories
275 277 should be visited. Otherwise returns True or False indicating whether
276 278 the given directory should be visited.
277 279
278 280 This function's behavior is undefined if it has returned False for
279 281 one of the dir's parent directories.
280 282 '''
281 283 return False
282 284
283 285 def anypats(self):
284 286 '''Matcher uses patterns or include/exclude.'''
285 287 return False
286 288
287 289 def always(self):
288 290 '''Matcher will match everything and .files() will be empty
289 291 - optimization might be possible and necessary.'''
290 292 return False
291 293
292 294 def isexact(self):
293 295 return False
294 296
295 297 def prefix(self):
296 298 return not self.always() and not self.isexact() and not self.anypats()
297 299
298 300 class matcher(basematcher):
299 301
300 302 def __init__(self, root, cwd, normalize, patterns, include=None,
301 303 exclude=None, default='glob', exact=False, auditor=None,
302 304 ctx=None, listsubrepos=False, warn=None, badfn=None):
303 305 super(matcher, self).__init__(root, cwd, badfn)
304 306 if include is None:
305 307 include = []
306 308 if exclude is None:
307 309 exclude = []
308 310
309 311 self._anypats = bool(include or exclude)
310 312 self._anyincludepats = False
311 313 self._always = False
312 314 self._pathrestricted = bool(include or exclude or patterns)
313 315 self.patternspat = None
314 316 self.includepat = None
315 317 self.excludepat = None
316 318
317 319 # roots are directories which are recursively included/excluded.
318 320 self._includeroots = set()
319 321 self._excluderoots = set()
320 322 # dirs are directories which are non-recursively included.
321 323 self._includedirs = set()
322 324
323 325 matchfns = []
324 326 if include:
325 327 kindpats = normalize(include, 'glob', root, cwd, auditor, warn)
326 328 self.includepat, im = _buildmatch(ctx, kindpats, '(?:/|$)',
327 329 listsubrepos, root)
328 330 self._anyincludepats = _anypats(kindpats)
329 331 roots, dirs = _rootsanddirs(kindpats)
330 332 self._includeroots.update(roots)
331 333 self._includedirs.update(dirs)
332 334 matchfns.append(im)
333 335 if exclude:
334 336 kindpats = normalize(exclude, 'glob', root, cwd, auditor, warn)
335 337 self.excludepat, em = _buildmatch(ctx, kindpats, '(?:/|$)',
336 338 listsubrepos, root)
337 339 if not _anypats(kindpats):
338 340 # Only consider recursive excludes as such - if a non-recursive
339 341 # exclude is used, we must still recurse into the excluded
340 342 # directory, at least to find subdirectories. In such a case,
341 343 # the regex still won't match the non-recursively-excluded
342 344 # files.
343 345 self._excluderoots.update(_roots(kindpats))
344 346 matchfns.append(lambda f: not em(f))
345 347 if exact:
346 348 if isinstance(patterns, list):
347 349 self._files = patterns
348 350 else:
349 351 self._files = list(patterns)
350 352 matchfns.append(self.exact)
351 353 elif patterns:
352 354 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
353 355 if not _kindpatsalwaysmatch(kindpats):
354 356 self._files = _explicitfiles(kindpats)
355 357 self._anypats = self._anypats or _anypats(kindpats)
356 358 self.patternspat, pm = _buildmatch(ctx, kindpats, '$',
357 359 listsubrepos, root)
358 360 matchfns.append(pm)
359 361
360 362 if not matchfns:
361 363 m = util.always
362 364 self._always = True
363 365 elif len(matchfns) == 1:
364 366 m = matchfns[0]
365 367 else:
366 368 def m(f):
367 369 for matchfn in matchfns:
368 370 if not matchfn(f):
369 371 return False
370 372 return True
371 373
372 374 self.matchfn = m
373 375
374 376 def uipath(self, f):
375 377 return (self._pathrestricted and self.rel(f)) or self.abs(f)
376 378
377 379 @propertycache
378 380 def _dirs(self):
379 381 return set(util.dirs(self._fileset)) | {'.'}
380 382
381 383 def visitdir(self, dir):
382 384 if self.prefix() and dir in self._fileset:
383 385 return 'all'
384 386 if dir in self._excluderoots:
385 387 return False
386 388 if self._includeroots or self._includedirs:
387 389 if (not self._anyincludepats and
388 390 not self._excluderoots and
389 391 dir in self._includeroots):
390 392 # The condition above is essentially self.prefix() for includes
391 393 return 'all'
392 394 if ('.' not in self._includeroots and
393 395 dir not in self._includeroots and
394 396 dir not in self._includedirs and
395 397 not any(parent in self._includeroots
396 398 for parent in util.finddirs(dir))):
397 399 return False
398 400 return (not self._fileset or
399 401 '.' in self._fileset or
400 402 dir in self._fileset or
401 403 dir in self._dirs or
402 404 any(parentdir in self._fileset
403 405 for parentdir in util.finddirs(dir)))
404 406
405 407 def anypats(self):
406 408 return self._anypats
407 409
408 410 def always(self):
409 411 return self._always
410 412
411 413 def isexact(self):
412 414 return self.matchfn == self.exact
413 415
414 416 def __repr__(self):
415 417 return ('<matcher files=%r, patterns=%r, includes=%r, excludes=%r>' %
416 418 (self._files, self.patternspat, self.includepat,
417 419 self.excludepat))
418 420
419 421 class subdirmatcher(basematcher):
420 422 """Adapt a matcher to work on a subdirectory only.
421 423
422 424 The paths are remapped to remove/insert the path as needed:
423 425
424 426 >>> m1 = match('root', '', ['a.txt', 'sub/b.txt'])
425 427 >>> m2 = subdirmatcher('sub', m1)
426 428 >>> bool(m2('a.txt'))
427 429 False
428 430 >>> bool(m2('b.txt'))
429 431 True
430 432 >>> bool(m2.matchfn('a.txt'))
431 433 False
432 434 >>> bool(m2.matchfn('b.txt'))
433 435 True
434 436 >>> m2.files()
435 437 ['b.txt']
436 438 >>> m2.exact('b.txt')
437 439 True
438 440 >>> util.pconvert(m2.rel('b.txt'))
439 441 'sub/b.txt'
440 442 >>> def bad(f, msg):
441 443 ... print "%s: %s" % (f, msg)
442 444 >>> m1.bad = bad
443 445 >>> m2.bad('x.txt', 'No such file')
444 446 sub/x.txt: No such file
445 447 >>> m2.abs('c.txt')
446 448 'sub/c.txt'
447 449 """
448 450
449 451 def __init__(self, path, matcher):
450 452 super(subdirmatcher, self).__init__(matcher._root, matcher._cwd)
451 453 self._path = path
452 454 self._matcher = matcher
453 455 self._always = matcher.always()
454 456
455 457 self._files = [f[len(path) + 1:] for f in matcher._files
456 458 if f.startswith(path + "/")]
457 459
458 460 # If the parent repo had a path to this subrepo and the matcher is
459 461 # a prefix matcher, this submatcher always matches.
460 462 if matcher.prefix():
461 463 self._always = any(f == path for f in matcher._files)
462 464
463 465 # Some information is lost in the superclass's constructor, so we
464 466 # can not accurately create the matching function for the subdirectory
465 467 # from the inputs. Instead, we override matchfn() and visitdir() to
466 468 # call the original matcher with the subdirectory path prepended.
467 469 self.matchfn = lambda fn: matcher.matchfn(self._path + "/" + fn)
468 470
469 471 def bad(self, f, msg):
470 472 self._matcher.bad(self._path + "/" + f, msg)
471 473
472 474 def abs(self, f):
473 475 return self._matcher.abs(self._path + "/" + f)
474 476
475 477 def rel(self, f):
476 478 return self._matcher.rel(self._path + "/" + f)
477 479
478 480 def uipath(self, f):
479 481 return self._matcher.uipath(self._path + "/" + f)
480 482
481 483 def visitdir(self, dir):
482 484 if dir == '.':
483 485 dir = self._path
484 486 else:
485 487 dir = self._path + "/" + dir
486 488 return self._matcher.visitdir(dir)
487 489
488 490 def always(self):
489 491 return self._always
490 492
491 493 def anypats(self):
492 494 return self._matcher.anypats()
493 495
494 496 def patkind(pattern, default=None):
495 497 '''If pattern is 'kind:pat' with a known kind, return kind.'''
496 498 return _patsplit(pattern, default)[0]
497 499
498 500 def _patsplit(pattern, default):
499 501 """Split a string into the optional pattern kind prefix and the actual
500 502 pattern."""
501 503 if ':' in pattern:
502 504 kind, pat = pattern.split(':', 1)
503 505 if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
504 506 'listfile', 'listfile0', 'set', 'include', 'subinclude',
505 507 'rootfilesin'):
506 508 return kind, pat
507 509 return default, pattern
508 510
509 511 def _globre(pat):
510 512 r'''Convert an extended glob string to a regexp string.
511 513
512 514 >>> print _globre(r'?')
513 515 .
514 516 >>> print _globre(r'*')
515 517 [^/]*
516 518 >>> print _globre(r'**')
517 519 .*
518 520 >>> print _globre(r'**/a')
519 521 (?:.*/)?a
520 522 >>> print _globre(r'a/**/b')
521 523 a\/(?:.*/)?b
522 524 >>> print _globre(r'[a*?!^][^b][!c]')
523 525 [a*?!^][\^b][^c]
524 526 >>> print _globre(r'{a,b}')
525 527 (?:a|b)
526 528 >>> print _globre(r'.\*\?')
527 529 \.\*\?
528 530 '''
529 531 i, n = 0, len(pat)
530 532 res = ''
531 533 group = 0
532 534 escape = util.re.escape
533 535 def peek():
534 536 return i < n and pat[i:i + 1]
535 537 while i < n:
536 538 c = pat[i:i + 1]
537 539 i += 1
538 540 if c not in '*?[{},\\':
539 541 res += escape(c)
540 542 elif c == '*':
541 543 if peek() == '*':
542 544 i += 1
543 545 if peek() == '/':
544 546 i += 1
545 547 res += '(?:.*/)?'
546 548 else:
547 549 res += '.*'
548 550 else:
549 551 res += '[^/]*'
550 552 elif c == '?':
551 553 res += '.'
552 554 elif c == '[':
553 555 j = i
554 556 if j < n and pat[j:j + 1] in '!]':
555 557 j += 1
556 558 while j < n and pat[j:j + 1] != ']':
557 559 j += 1
558 560 if j >= n:
559 561 res += '\\['
560 562 else:
561 563 stuff = pat[i:j].replace('\\','\\\\')
562 564 i = j + 1
563 565 if stuff[0:1] == '!':
564 566 stuff = '^' + stuff[1:]
565 567 elif stuff[0:1] == '^':
566 568 stuff = '\\' + stuff
567 569 res = '%s[%s]' % (res, stuff)
568 570 elif c == '{':
569 571 group += 1
570 572 res += '(?:'
571 573 elif c == '}' and group:
572 574 res += ')'
573 575 group -= 1
574 576 elif c == ',' and group:
575 577 res += '|'
576 578 elif c == '\\':
577 579 p = peek()
578 580 if p:
579 581 i += 1
580 582 res += escape(p)
581 583 else:
582 584 res += escape(c)
583 585 else:
584 586 res += escape(c)
585 587 return res
586 588
587 589 def _regex(kind, pat, globsuffix):
588 590 '''Convert a (normalized) pattern of any kind into a regular expression.
589 591 globsuffix is appended to the regexp of globs.'''
590 592 if not pat:
591 593 return ''
592 594 if kind == 're':
593 595 return pat
594 596 if kind == 'path':
595 597 if pat == '.':
596 598 return ''
597 599 return '^' + util.re.escape(pat) + '(?:/|$)'
598 600 if kind == 'rootfilesin':
599 601 if pat == '.':
600 602 escaped = ''
601 603 else:
602 604 # Pattern is a directory name.
603 605 escaped = util.re.escape(pat) + '/'
604 606 # Anything after the pattern must be a non-directory.
605 607 return '^' + escaped + '[^/]+$'
606 608 if kind == 'relglob':
607 609 return '(?:|.*/)' + _globre(pat) + globsuffix
608 610 if kind == 'relpath':
609 611 return util.re.escape(pat) + '(?:/|$)'
610 612 if kind == 'relre':
611 613 if pat.startswith('^'):
612 614 return pat
613 615 return '.*' + pat
614 616 return _globre(pat) + globsuffix
615 617
616 618 def _buildmatch(ctx, kindpats, globsuffix, listsubrepos, root):
617 619 '''Return regexp string and a matcher function for kindpats.
618 620 globsuffix is appended to the regexp of globs.'''
619 621 matchfuncs = []
620 622
621 623 subincludes, kindpats = _expandsubinclude(kindpats, root)
622 624 if subincludes:
623 625 submatchers = {}
624 626 def matchsubinclude(f):
625 627 for prefix, matcherargs in subincludes:
626 628 if f.startswith(prefix):
627 629 mf = submatchers.get(prefix)
628 630 if mf is None:
629 631 mf = match(*matcherargs)
630 632 submatchers[prefix] = mf
631 633
632 634 if mf(f[len(prefix):]):
633 635 return True
634 636 return False
635 637 matchfuncs.append(matchsubinclude)
636 638
637 639 fset, kindpats = _expandsets(kindpats, ctx, listsubrepos)
638 640 if fset:
639 641 matchfuncs.append(fset.__contains__)
640 642
641 643 regex = ''
642 644 if kindpats:
643 645 regex, mf = _buildregexmatch(kindpats, globsuffix)
644 646 matchfuncs.append(mf)
645 647
646 648 if len(matchfuncs) == 1:
647 649 return regex, matchfuncs[0]
648 650 else:
649 651 return regex, lambda f: any(mf(f) for mf in matchfuncs)
650 652
651 653 def _buildregexmatch(kindpats, globsuffix):
652 654 """Build a match function from a list of kinds and kindpats,
653 655 return regexp string and a matcher function."""
654 656 try:
655 657 regex = '(?:%s)' % '|'.join([_regex(k, p, globsuffix)
656 658 for (k, p, s) in kindpats])
657 659 if len(regex) > 20000:
658 660 raise OverflowError
659 661 return regex, _rematcher(regex)
660 662 except OverflowError:
661 663 # We're using a Python with a tiny regex engine and we
662 664 # made it explode, so we'll divide the pattern list in two
663 665 # until it works
664 666 l = len(kindpats)
665 667 if l < 2:
666 668 raise
667 669 regexa, a = _buildregexmatch(kindpats[:l//2], globsuffix)
668 670 regexb, b = _buildregexmatch(kindpats[l//2:], globsuffix)
669 671 return regex, lambda s: a(s) or b(s)
670 672 except re.error:
671 673 for k, p, s in kindpats:
672 674 try:
673 675 _rematcher('(?:%s)' % _regex(k, p, globsuffix))
674 676 except re.error:
675 677 if s:
676 678 raise error.Abort(_("%s: invalid pattern (%s): %s") %
677 679 (s, k, p))
678 680 else:
679 681 raise error.Abort(_("invalid pattern (%s): %s") % (k, p))
680 682 raise error.Abort(_("invalid pattern"))
681 683
682 684 def _patternrootsanddirs(kindpats):
683 685 '''Returns roots and directories corresponding to each pattern.
684 686
685 687 This calculates the roots and directories exactly matching the patterns and
686 688 returns a tuple of (roots, dirs) for each. It does not return other
687 689 directories which may also need to be considered, like the parent
688 690 directories.
689 691 '''
690 692 r = []
691 693 d = []
692 694 for kind, pat, source in kindpats:
693 695 if kind == 'glob': # find the non-glob prefix
694 696 root = []
695 697 for p in pat.split('/'):
696 698 if '[' in p or '{' in p or '*' in p or '?' in p:
697 699 break
698 700 root.append(p)
699 701 r.append('/'.join(root) or '.')
700 702 elif kind in ('relpath', 'path'):
701 703 r.append(pat or '.')
702 704 elif kind in ('rootfilesin',):
703 705 d.append(pat or '.')
704 706 else: # relglob, re, relre
705 707 r.append('.')
706 708 return r, d
707 709
708 710 def _roots(kindpats):
709 711 '''Returns root directories to match recursively from the given patterns.'''
710 712 roots, dirs = _patternrootsanddirs(kindpats)
711 713 return roots
712 714
713 715 def _rootsanddirs(kindpats):
714 716 '''Returns roots and exact directories from patterns.
715 717
716 718 roots are directories to match recursively, whereas exact directories should
717 719 be matched non-recursively. The returned (roots, dirs) tuple will also
718 720 include directories that need to be implicitly considered as either, such as
719 721 parent directories.
720 722
721 723 >>> _rootsanddirs(\
722 724 [('glob', 'g/h/*', ''), ('glob', 'g/h', ''), ('glob', 'g*', '')])
723 725 (['g/h', 'g/h', '.'], ['g', '.'])
724 726 >>> _rootsanddirs(\
725 727 [('rootfilesin', 'g/h', ''), ('rootfilesin', '', '')])
726 728 ([], ['g/h', '.', 'g', '.'])
727 729 >>> _rootsanddirs(\
728 730 [('relpath', 'r', ''), ('path', 'p/p', ''), ('path', '', '')])
729 731 (['r', 'p/p', '.'], ['p', '.'])
730 732 >>> _rootsanddirs(\
731 733 [('relglob', 'rg*', ''), ('re', 're/', ''), ('relre', 'rr', '')])
732 734 (['.', '.', '.'], ['.'])
733 735 '''
734 736 r, d = _patternrootsanddirs(kindpats)
735 737
736 738 # Append the parents as non-recursive/exact directories, since they must be
737 739 # scanned to get to either the roots or the other exact directories.
738 740 d.extend(util.dirs(d))
739 741 d.extend(util.dirs(r))
740 742 # util.dirs() does not include the root directory, so add it manually
741 743 d.append('.')
742 744
743 745 return r, d
744 746
745 747 def _explicitfiles(kindpats):
746 748 '''Returns the potential explicit filenames from the patterns.
747 749
748 750 >>> _explicitfiles([('path', 'foo/bar', '')])
749 751 ['foo/bar']
750 752 >>> _explicitfiles([('rootfilesin', 'foo/bar', '')])
751 753 []
752 754 '''
753 755 # Keep only the pattern kinds where one can specify filenames (vs only
754 756 # directory names).
755 757 filable = [kp for kp in kindpats if kp[0] not in ('rootfilesin',)]
756 758 return _roots(filable)
757 759
758 760 def _anypats(kindpats):
759 761 for kind, pat, source in kindpats:
760 762 if kind in ('glob', 're', 'relglob', 'relre', 'set', 'rootfilesin'):
761 763 return True
762 764
763 765 _commentre = None
764 766
765 767 def readpatternfile(filepath, warn, sourceinfo=False):
766 768 '''parse a pattern file, returning a list of
767 769 patterns. These patterns should be given to compile()
768 770 to be validated and converted into a match function.
769 771
770 772 trailing white space is dropped.
771 773 the escape character is backslash.
772 774 comments start with #.
773 775 empty lines are skipped.
774 776
775 777 lines can be of the following formats:
776 778
777 779 syntax: regexp # defaults following lines to non-rooted regexps
778 780 syntax: glob # defaults following lines to non-rooted globs
779 781 re:pattern # non-rooted regular expression
780 782 glob:pattern # non-rooted glob
781 783 pattern # pattern of the current default type
782 784
783 785 if sourceinfo is set, returns a list of tuples:
784 786 (pattern, lineno, originalline). This is useful to debug ignore patterns.
785 787 '''
786 788
787 789 syntaxes = {'re': 'relre:', 'regexp': 'relre:', 'glob': 'relglob:',
788 790 'include': 'include', 'subinclude': 'subinclude'}
789 791 syntax = 'relre:'
790 792 patterns = []
791 793
792 794 fp = open(filepath, 'rb')
793 795 for lineno, line in enumerate(util.iterfile(fp), start=1):
794 796 if "#" in line:
795 797 global _commentre
796 798 if not _commentre:
797 799 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
798 800 # remove comments prefixed by an even number of escapes
799 801 m = _commentre.search(line)
800 802 if m:
801 803 line = line[:m.end(1)]
802 804 # fixup properly escaped comments that survived the above
803 805 line = line.replace("\\#", "#")
804 806 line = line.rstrip()
805 807 if not line:
806 808 continue
807 809
808 810 if line.startswith('syntax:'):
809 811 s = line[7:].strip()
810 812 try:
811 813 syntax = syntaxes[s]
812 814 except KeyError:
813 815 if warn:
814 816 warn(_("%s: ignoring invalid syntax '%s'\n") %
815 817 (filepath, s))
816 818 continue
817 819
818 820 linesyntax = syntax
819 821 for s, rels in syntaxes.iteritems():
820 822 if line.startswith(rels):
821 823 linesyntax = rels
822 824 line = line[len(rels):]
823 825 break
824 826 elif line.startswith(s+':'):
825 827 linesyntax = rels
826 828 line = line[len(s) + 1:]
827 829 break
828 830 if sourceinfo:
829 831 patterns.append((linesyntax + line, lineno, line))
830 832 else:
831 833 patterns.append(linesyntax + line)
832 834 fp.close()
833 835 return patterns
General Comments 0
You need to be logged in to leave comments. Login now