##// END OF EJS Templates
match: make subdirmatcher extend basematcher...
Martin von Zweigbergk -
r32456:f9445b52 default
parent child Browse files
Show More
@@ -1,822 +1,826
1 1 # match.py - filename matching
2 2 #
3 3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import copy
11 11 import os
12 12 import re
13 13
14 14 from .i18n import _
15 15 from . import (
16 16 error,
17 17 pathutil,
18 18 util,
19 19 )
20 20
21 21 propertycache = util.propertycache
22 22
23 23 def _rematcher(regex):
24 24 '''compile the regexp with the best available regexp engine and return a
25 25 matcher function'''
26 26 m = util.re.compile(regex)
27 27 try:
28 28 # slightly faster, provided by facebook's re2 bindings
29 29 return m.test_match
30 30 except AttributeError:
31 31 return m.match
32 32
33 33 def _expandsets(kindpats, ctx, listsubrepos):
34 34 '''Returns the kindpats list with the 'set' patterns expanded.'''
35 35 fset = set()
36 36 other = []
37 37
38 38 for kind, pat, source in kindpats:
39 39 if kind == 'set':
40 40 if not ctx:
41 41 raise error.ProgrammingError("fileset expression with no "
42 42 "context")
43 43 s = ctx.getfileset(pat)
44 44 fset.update(s)
45 45
46 46 if listsubrepos:
47 47 for subpath in ctx.substate:
48 48 s = ctx.sub(subpath).getfileset(pat)
49 49 fset.update(subpath + '/' + f for f in s)
50 50
51 51 continue
52 52 other.append((kind, pat, source))
53 53 return fset, other
54 54
55 55 def _expandsubinclude(kindpats, root):
56 56 '''Returns the list of subinclude matcher args and the kindpats without the
57 57 subincludes in it.'''
58 58 relmatchers = []
59 59 other = []
60 60
61 61 for kind, pat, source in kindpats:
62 62 if kind == 'subinclude':
63 63 sourceroot = pathutil.dirname(util.normpath(source))
64 64 pat = util.pconvert(pat)
65 65 path = pathutil.join(sourceroot, pat)
66 66
67 67 newroot = pathutil.dirname(path)
68 68 matcherargs = (newroot, '', [], ['include:%s' % path])
69 69
70 70 prefix = pathutil.canonpath(root, root, newroot)
71 71 if prefix:
72 72 prefix += '/'
73 73 relmatchers.append((prefix, matcherargs))
74 74 else:
75 75 other.append((kind, pat, source))
76 76
77 77 return relmatchers, other
78 78
79 79 def _kindpatsalwaysmatch(kindpats):
80 80 """"Checks whether the kindspats match everything, as e.g.
81 81 'relpath:.' does.
82 82 """
83 83 for kind, pat, source in kindpats:
84 84 if pat != '' or kind not in ['relpath', 'glob']:
85 85 return False
86 86 return True
87 87
88 88 def match(root, cwd, patterns, include=None, exclude=None, default='glob',
89 89 exact=False, auditor=None, ctx=None, listsubrepos=False, warn=None,
90 90 badfn=None, icasefs=False):
91 91 """build an object to match a set of file patterns
92 92
93 93 arguments:
94 94 root - the canonical root of the tree you're matching against
95 95 cwd - the current working directory, if relevant
96 96 patterns - patterns to find
97 97 include - patterns to include (unless they are excluded)
98 98 exclude - patterns to exclude (even if they are included)
99 99 default - if a pattern in patterns has no explicit type, assume this one
100 100 exact - patterns are actually filenames (include/exclude still apply)
101 101 warn - optional function used for printing warnings
102 102 badfn - optional bad() callback for this matcher instead of the default
103 103 icasefs - make a matcher for wdir on case insensitive filesystems, which
104 104 normalizes the given patterns to the case in the filesystem
105 105
106 106 a pattern is one of:
107 107 'glob:<glob>' - a glob relative to cwd
108 108 're:<regexp>' - a regular expression
109 109 'path:<path>' - a path relative to repository root, which is matched
110 110 recursively
111 111 'rootfilesin:<path>' - a path relative to repository root, which is
112 112 matched non-recursively (will not match subdirectories)
113 113 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
114 114 'relpath:<path>' - a path relative to cwd
115 115 'relre:<regexp>' - a regexp that needn't match the start of a name
116 116 'set:<fileset>' - a fileset expression
117 117 'include:<path>' - a file of patterns to read and include
118 118 'subinclude:<path>' - a file of patterns to match against files under
119 119 the same directory
120 120 '<something>' - a pattern of the specified default type
121 121 """
122 122 normalize = _donormalize
123 123 if icasefs:
124 124 if exact:
125 125 raise error.ProgrammingError("a case-insensitive exact matcher "
126 126 "doesn't make sense")
127 127 dirstate = ctx.repo().dirstate
128 128 dsnormalize = dirstate.normalize
129 129
130 130 def normalize(patterns, default, root, cwd, auditor, warn):
131 131 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
132 132 kindpats = []
133 133 for kind, pats, source in kp:
134 134 if kind not in ('re', 'relre'): # regex can't be normalized
135 135 p = pats
136 136 pats = dsnormalize(pats)
137 137
138 138 # Preserve the original to handle a case only rename.
139 139 if p != pats and p in dirstate:
140 140 kindpats.append((kind, p, source))
141 141
142 142 kindpats.append((kind, pats, source))
143 143 return kindpats
144 144
145 145 return matcher(root, cwd, normalize, patterns, include=include,
146 146 exclude=exclude, default=default, exact=exact,
147 147 auditor=auditor, ctx=ctx, listsubrepos=listsubrepos,
148 148 warn=warn, badfn=badfn)
149 149
150 150 def exact(root, cwd, files, badfn=None):
151 151 return match(root, cwd, files, exact=True, badfn=badfn)
152 152
153 153 def always(root, cwd):
154 154 return match(root, cwd, [])
155 155
156 156 def badmatch(match, badfn):
157 157 """Make a copy of the given matcher, replacing its bad method with the given
158 158 one.
159 159 """
160 160 m = copy.copy(match)
161 161 m.bad = badfn
162 162 return m
163 163
164 164 def _donormalize(patterns, default, root, cwd, auditor, warn):
165 165 '''Convert 'kind:pat' from the patterns list to tuples with kind and
166 166 normalized and rooted patterns and with listfiles expanded.'''
167 167 kindpats = []
168 168 for kind, pat in [_patsplit(p, default) for p in patterns]:
169 169 if kind in ('glob', 'relpath'):
170 170 pat = pathutil.canonpath(root, cwd, pat, auditor)
171 171 elif kind in ('relglob', 'path', 'rootfilesin'):
172 172 pat = util.normpath(pat)
173 173 elif kind in ('listfile', 'listfile0'):
174 174 try:
175 175 files = util.readfile(pat)
176 176 if kind == 'listfile0':
177 177 files = files.split('\0')
178 178 else:
179 179 files = files.splitlines()
180 180 files = [f for f in files if f]
181 181 except EnvironmentError:
182 182 raise error.Abort(_("unable to read file list (%s)") % pat)
183 183 for k, p, source in _donormalize(files, default, root, cwd,
184 184 auditor, warn):
185 185 kindpats.append((k, p, pat))
186 186 continue
187 187 elif kind == 'include':
188 188 try:
189 189 fullpath = os.path.join(root, util.localpath(pat))
190 190 includepats = readpatternfile(fullpath, warn)
191 191 for k, p, source in _donormalize(includepats, default,
192 192 root, cwd, auditor, warn):
193 193 kindpats.append((k, p, source or pat))
194 194 except error.Abort as inst:
195 195 raise error.Abort('%s: %s' % (pat, inst[0]))
196 196 except IOError as inst:
197 197 if warn:
198 198 warn(_("skipping unreadable pattern file '%s': %s\n") %
199 199 (pat, inst.strerror))
200 200 continue
201 201 # else: re or relre - which cannot be normalized
202 202 kindpats.append((kind, pat, ''))
203 203 return kindpats
204 204
205 205 class basematcher(object):
206 206
207 207 def __init__(self, root, cwd, badfn=None):
208 208 self._root = root
209 209 self._cwd = cwd
210 210 if badfn is not None:
211 211 self.bad = badfn
212 212 self.matchfn = lambda f: False
213 213
214 214 def __call__(self, fn):
215 215 return self.matchfn(fn)
216 216 def __iter__(self):
217 217 for f in self._files:
218 218 yield f
219 219 # Callbacks related to how the matcher is used by dirstate.walk.
220 220 # Subscribers to these events must monkeypatch the matcher object.
221 221 def bad(self, f, msg):
222 222 '''Callback from dirstate.walk for each explicit file that can't be
223 223 found/accessed, with an error message.'''
224 224 pass
225 225
226 226 # If an explicitdir is set, it will be called when an explicitly listed
227 227 # directory is visited.
228 228 explicitdir = None
229 229
230 230 # If an traversedir is set, it will be called when a directory discovered
231 231 # by recursive traversal is visited.
232 232 traversedir = None
233 233
234 234 def abs(self, f):
235 235 '''Convert a repo path back to path that is relative to the root of the
236 236 matcher.'''
237 237 return f
238 238
239 239 def rel(self, f):
240 240 '''Convert repo path back to path that is relative to cwd of matcher.'''
241 241 return util.pathto(self._root, self._cwd, f)
242 242
243 243 def uipath(self, f):
244 244 '''Convert repo path to a display path. If patterns or -I/-X were used
245 245 to create this matcher, the display path will be relative to cwd.
246 246 Otherwise it is relative to the root of the repo.'''
247 247 return self.rel(f)
248 248
249 249 @propertycache
250 250 def _files(self):
251 251 return []
252 252
253 253 def files(self):
254 254 '''Explicitly listed files or patterns or roots:
255 255 if no patterns or .always(): empty list,
256 256 if exact: list exact files,
257 257 if not .anypats(): list all files and dirs,
258 258 else: optimal roots'''
259 259 return self._files
260 260
261 261 @propertycache
262 262 def _fileset(self):
263 263 return set(self._files)
264 264
265 265 def exact(self, f):
266 266 '''Returns True if f is in .files().'''
267 267 return f in self._fileset
268 268
269 269 def visitdir(self, dir):
270 270 '''Decides whether a directory should be visited based on whether it
271 271 has potential matches in it or one of its subdirectories. This is
272 272 based on the match's primary, included, and excluded patterns.
273 273
274 274 Returns the string 'all' if the given directory and all subdirectories
275 275 should be visited. Otherwise returns True or False indicating whether
276 276 the given directory should be visited.
277 277
278 278 This function's behavior is undefined if it has returned False for
279 279 one of the dir's parent directories.
280 280 '''
281 281 return False
282 282
283 283 def anypats(self):
284 284 '''Matcher uses patterns or include/exclude.'''
285 285 return False
286 286
287 287 def always(self):
288 288 '''Matcher will match everything and .files() will be empty
289 289 - optimization might be possible and necessary.'''
290 290 return False
291 291
292 292 def isexact(self):
293 293 return False
294 294
295 295 def prefix(self):
296 296 return not self.always() and not self.isexact() and not self.anypats()
297 297
298 298 class matcher(basematcher):
299 299
300 300 def __init__(self, root, cwd, normalize, patterns, include=None,
301 301 exclude=None, default='glob', exact=False, auditor=None,
302 302 ctx=None, listsubrepos=False, warn=None, badfn=None):
303 303 super(matcher, self).__init__(root, cwd, badfn)
304 304 if include is None:
305 305 include = []
306 306 if exclude is None:
307 307 exclude = []
308 308
309 309 self._anypats = bool(include or exclude)
310 310 self._always = False
311 311 self._pathrestricted = bool(include or exclude or patterns)
312 312 self.patternspat = None
313 313 self.includepat = None
314 314 self.excludepat = None
315 315
316 316 # roots are directories which are recursively included/excluded.
317 317 self._includeroots = set()
318 318 self._excluderoots = set()
319 319 # dirs are directories which are non-recursively included.
320 320 self._includedirs = set()
321 321
322 322 matchfns = []
323 323 if include:
324 324 kindpats = normalize(include, 'glob', root, cwd, auditor, warn)
325 325 self.includepat, im = _buildmatch(ctx, kindpats, '(?:/|$)',
326 326 listsubrepos, root)
327 327 roots, dirs = _rootsanddirs(kindpats)
328 328 self._includeroots.update(roots)
329 329 self._includedirs.update(dirs)
330 330 matchfns.append(im)
331 331 if exclude:
332 332 kindpats = normalize(exclude, 'glob', root, cwd, auditor, warn)
333 333 self.excludepat, em = _buildmatch(ctx, kindpats, '(?:/|$)',
334 334 listsubrepos, root)
335 335 if not _anypats(kindpats):
336 336 # Only consider recursive excludes as such - if a non-recursive
337 337 # exclude is used, we must still recurse into the excluded
338 338 # directory, at least to find subdirectories. In such a case,
339 339 # the regex still won't match the non-recursively-excluded
340 340 # files.
341 341 self._excluderoots.update(_roots(kindpats))
342 342 matchfns.append(lambda f: not em(f))
343 343 if exact:
344 344 if isinstance(patterns, list):
345 345 self._files = patterns
346 346 else:
347 347 self._files = list(patterns)
348 348 matchfns.append(self.exact)
349 349 elif patterns:
350 350 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
351 351 if not _kindpatsalwaysmatch(kindpats):
352 352 self._files = _explicitfiles(kindpats)
353 353 self._anypats = self._anypats or _anypats(kindpats)
354 354 self.patternspat, pm = _buildmatch(ctx, kindpats, '$',
355 355 listsubrepos, root)
356 356 matchfns.append(pm)
357 357
358 358 if not matchfns:
359 359 m = util.always
360 360 self._always = True
361 361 elif len(matchfns) == 1:
362 362 m = matchfns[0]
363 363 else:
364 364 def m(f):
365 365 for matchfn in matchfns:
366 366 if not matchfn(f):
367 367 return False
368 368 return True
369 369
370 370 self.matchfn = m
371 371
372 372 def uipath(self, f):
373 373 return (self._pathrestricted and self.rel(f)) or self.abs(f)
374 374
375 375 @propertycache
376 376 def _dirs(self):
377 377 return set(util.dirs(self._fileset)) | {'.'}
378 378
379 379 def visitdir(self, dir):
380 380 if self.prefix() and dir in self._fileset:
381 381 return 'all'
382 382 if dir in self._excluderoots:
383 383 return False
384 384 if ((self._includeroots or self._includedirs) and
385 385 '.' not in self._includeroots and
386 386 dir not in self._includeroots and
387 387 dir not in self._includedirs and
388 388 not any(parent in self._includeroots
389 389 for parent in util.finddirs(dir))):
390 390 return False
391 391 return (not self._fileset or
392 392 '.' in self._fileset or
393 393 dir in self._fileset or
394 394 dir in self._dirs or
395 395 any(parentdir in self._fileset
396 396 for parentdir in util.finddirs(dir)))
397 397
398 398 def anypats(self):
399 399 return self._anypats
400 400
401 401 def always(self):
402 402 return self._always
403 403
404 404 def isexact(self):
405 405 return self.matchfn == self.exact
406 406
407 407 def __repr__(self):
408 408 return ('<matcher files=%r, patterns=%r, includes=%r, excludes=%r>' %
409 409 (self._files, self.patternspat, self.includepat,
410 410 self.excludepat))
411 411
412 class subdirmatcher(matcher):
412 class subdirmatcher(basematcher):
413 413 """Adapt a matcher to work on a subdirectory only.
414 414
415 415 The paths are remapped to remove/insert the path as needed:
416 416
417 417 >>> m1 = match('root', '', ['a.txt', 'sub/b.txt'])
418 418 >>> m2 = subdirmatcher('sub', m1)
419 419 >>> bool(m2('a.txt'))
420 420 False
421 421 >>> bool(m2('b.txt'))
422 422 True
423 423 >>> bool(m2.matchfn('a.txt'))
424 424 False
425 425 >>> bool(m2.matchfn('b.txt'))
426 426 True
427 427 >>> m2.files()
428 428 ['b.txt']
429 429 >>> m2.exact('b.txt')
430 430 True
431 431 >>> util.pconvert(m2.rel('b.txt'))
432 432 'sub/b.txt'
433 433 >>> def bad(f, msg):
434 434 ... print "%s: %s" % (f, msg)
435 435 >>> m1.bad = bad
436 436 >>> m2.bad('x.txt', 'No such file')
437 437 sub/x.txt: No such file
438 438 >>> m2.abs('c.txt')
439 439 'sub/c.txt'
440 440 """
441 441
442 442 def __init__(self, path, matcher):
443 self._root = matcher._root
444 self._cwd = matcher._cwd
443 super(subdirmatcher, self).__init__(matcher._root, matcher._cwd)
445 444 self._path = path
446 445 self._matcher = matcher
447 self._always = matcher._always
446 self._always = matcher.always()
448 447
449 448 self._files = [f[len(path) + 1:] for f in matcher._files
450 449 if f.startswith(path + "/")]
451 450
452 451 # If the parent repo had a path to this subrepo and the matcher is
453 452 # a prefix matcher, this submatcher always matches.
454 453 if matcher.prefix():
455 454 self._always = any(f == path for f in matcher._files)
456 455
457 self._anypats = matcher._anypats
458 456 # Some information is lost in the superclass's constructor, so we
459 457 # can not accurately create the matching function for the subdirectory
460 458 # from the inputs. Instead, we override matchfn() and visitdir() to
461 459 # call the original matcher with the subdirectory path prepended.
462 460 self.matchfn = lambda fn: matcher.matchfn(self._path + "/" + fn)
463 461
464 462 def bad(self, f, msg):
465 463 self._matcher.bad(self._path + "/" + f, msg)
466 464
467 465 def abs(self, f):
468 466 return self._matcher.abs(self._path + "/" + f)
469 467
470 468 def rel(self, f):
471 469 return self._matcher.rel(self._path + "/" + f)
472 470
473 471 def uipath(self, f):
474 472 return self._matcher.uipath(self._path + "/" + f)
475 473
476 474 def visitdir(self, dir):
477 475 if dir == '.':
478 476 dir = self._path
479 477 else:
480 478 dir = self._path + "/" + dir
481 479 return self._matcher.visitdir(dir)
482 480
481 def always(self):
482 return self._always
483
484 def anypats(self):
485 return self._matcher.anypats()
486
483 487 def patkind(pattern, default=None):
484 488 '''If pattern is 'kind:pat' with a known kind, return kind.'''
485 489 return _patsplit(pattern, default)[0]
486 490
487 491 def _patsplit(pattern, default):
488 492 """Split a string into the optional pattern kind prefix and the actual
489 493 pattern."""
490 494 if ':' in pattern:
491 495 kind, pat = pattern.split(':', 1)
492 496 if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
493 497 'listfile', 'listfile0', 'set', 'include', 'subinclude',
494 498 'rootfilesin'):
495 499 return kind, pat
496 500 return default, pattern
497 501
498 502 def _globre(pat):
499 503 r'''Convert an extended glob string to a regexp string.
500 504
501 505 >>> print _globre(r'?')
502 506 .
503 507 >>> print _globre(r'*')
504 508 [^/]*
505 509 >>> print _globre(r'**')
506 510 .*
507 511 >>> print _globre(r'**/a')
508 512 (?:.*/)?a
509 513 >>> print _globre(r'a/**/b')
510 514 a\/(?:.*/)?b
511 515 >>> print _globre(r'[a*?!^][^b][!c]')
512 516 [a*?!^][\^b][^c]
513 517 >>> print _globre(r'{a,b}')
514 518 (?:a|b)
515 519 >>> print _globre(r'.\*\?')
516 520 \.\*\?
517 521 '''
518 522 i, n = 0, len(pat)
519 523 res = ''
520 524 group = 0
521 525 escape = util.re.escape
522 526 def peek():
523 527 return i < n and pat[i:i + 1]
524 528 while i < n:
525 529 c = pat[i:i + 1]
526 530 i += 1
527 531 if c not in '*?[{},\\':
528 532 res += escape(c)
529 533 elif c == '*':
530 534 if peek() == '*':
531 535 i += 1
532 536 if peek() == '/':
533 537 i += 1
534 538 res += '(?:.*/)?'
535 539 else:
536 540 res += '.*'
537 541 else:
538 542 res += '[^/]*'
539 543 elif c == '?':
540 544 res += '.'
541 545 elif c == '[':
542 546 j = i
543 547 if j < n and pat[j:j + 1] in '!]':
544 548 j += 1
545 549 while j < n and pat[j:j + 1] != ']':
546 550 j += 1
547 551 if j >= n:
548 552 res += '\\['
549 553 else:
550 554 stuff = pat[i:j].replace('\\','\\\\')
551 555 i = j + 1
552 556 if stuff[0:1] == '!':
553 557 stuff = '^' + stuff[1:]
554 558 elif stuff[0:1] == '^':
555 559 stuff = '\\' + stuff
556 560 res = '%s[%s]' % (res, stuff)
557 561 elif c == '{':
558 562 group += 1
559 563 res += '(?:'
560 564 elif c == '}' and group:
561 565 res += ')'
562 566 group -= 1
563 567 elif c == ',' and group:
564 568 res += '|'
565 569 elif c == '\\':
566 570 p = peek()
567 571 if p:
568 572 i += 1
569 573 res += escape(p)
570 574 else:
571 575 res += escape(c)
572 576 else:
573 577 res += escape(c)
574 578 return res
575 579
576 580 def _regex(kind, pat, globsuffix):
577 581 '''Convert a (normalized) pattern of any kind into a regular expression.
578 582 globsuffix is appended to the regexp of globs.'''
579 583 if not pat:
580 584 return ''
581 585 if kind == 're':
582 586 return pat
583 587 if kind == 'path':
584 588 if pat == '.':
585 589 return ''
586 590 return '^' + util.re.escape(pat) + '(?:/|$)'
587 591 if kind == 'rootfilesin':
588 592 if pat == '.':
589 593 escaped = ''
590 594 else:
591 595 # Pattern is a directory name.
592 596 escaped = util.re.escape(pat) + '/'
593 597 # Anything after the pattern must be a non-directory.
594 598 return '^' + escaped + '[^/]+$'
595 599 if kind == 'relglob':
596 600 return '(?:|.*/)' + _globre(pat) + globsuffix
597 601 if kind == 'relpath':
598 602 return util.re.escape(pat) + '(?:/|$)'
599 603 if kind == 'relre':
600 604 if pat.startswith('^'):
601 605 return pat
602 606 return '.*' + pat
603 607 return _globre(pat) + globsuffix
604 608
605 609 def _buildmatch(ctx, kindpats, globsuffix, listsubrepos, root):
606 610 '''Return regexp string and a matcher function for kindpats.
607 611 globsuffix is appended to the regexp of globs.'''
608 612 matchfuncs = []
609 613
610 614 subincludes, kindpats = _expandsubinclude(kindpats, root)
611 615 if subincludes:
612 616 submatchers = {}
613 617 def matchsubinclude(f):
614 618 for prefix, matcherargs in subincludes:
615 619 if f.startswith(prefix):
616 620 mf = submatchers.get(prefix)
617 621 if mf is None:
618 622 mf = match(*matcherargs)
619 623 submatchers[prefix] = mf
620 624
621 625 if mf(f[len(prefix):]):
622 626 return True
623 627 return False
624 628 matchfuncs.append(matchsubinclude)
625 629
626 630 fset, kindpats = _expandsets(kindpats, ctx, listsubrepos)
627 631 if fset:
628 632 matchfuncs.append(fset.__contains__)
629 633
630 634 regex = ''
631 635 if kindpats:
632 636 regex, mf = _buildregexmatch(kindpats, globsuffix)
633 637 matchfuncs.append(mf)
634 638
635 639 if len(matchfuncs) == 1:
636 640 return regex, matchfuncs[0]
637 641 else:
638 642 return regex, lambda f: any(mf(f) for mf in matchfuncs)
639 643
640 644 def _buildregexmatch(kindpats, globsuffix):
641 645 """Build a match function from a list of kinds and kindpats,
642 646 return regexp string and a matcher function."""
643 647 try:
644 648 regex = '(?:%s)' % '|'.join([_regex(k, p, globsuffix)
645 649 for (k, p, s) in kindpats])
646 650 if len(regex) > 20000:
647 651 raise OverflowError
648 652 return regex, _rematcher(regex)
649 653 except OverflowError:
650 654 # We're using a Python with a tiny regex engine and we
651 655 # made it explode, so we'll divide the pattern list in two
652 656 # until it works
653 657 l = len(kindpats)
654 658 if l < 2:
655 659 raise
656 660 regexa, a = _buildregexmatch(kindpats[:l//2], globsuffix)
657 661 regexb, b = _buildregexmatch(kindpats[l//2:], globsuffix)
658 662 return regex, lambda s: a(s) or b(s)
659 663 except re.error:
660 664 for k, p, s in kindpats:
661 665 try:
662 666 _rematcher('(?:%s)' % _regex(k, p, globsuffix))
663 667 except re.error:
664 668 if s:
665 669 raise error.Abort(_("%s: invalid pattern (%s): %s") %
666 670 (s, k, p))
667 671 else:
668 672 raise error.Abort(_("invalid pattern (%s): %s") % (k, p))
669 673 raise error.Abort(_("invalid pattern"))
670 674
671 675 def _patternrootsanddirs(kindpats):
672 676 '''Returns roots and directories corresponding to each pattern.
673 677
674 678 This calculates the roots and directories exactly matching the patterns and
675 679 returns a tuple of (roots, dirs) for each. It does not return other
676 680 directories which may also need to be considered, like the parent
677 681 directories.
678 682 '''
679 683 r = []
680 684 d = []
681 685 for kind, pat, source in kindpats:
682 686 if kind == 'glob': # find the non-glob prefix
683 687 root = []
684 688 for p in pat.split('/'):
685 689 if '[' in p or '{' in p or '*' in p or '?' in p:
686 690 break
687 691 root.append(p)
688 692 r.append('/'.join(root) or '.')
689 693 elif kind in ('relpath', 'path'):
690 694 r.append(pat or '.')
691 695 elif kind in ('rootfilesin',):
692 696 d.append(pat or '.')
693 697 else: # relglob, re, relre
694 698 r.append('.')
695 699 return r, d
696 700
697 701 def _roots(kindpats):
698 702 '''Returns root directories to match recursively from the given patterns.'''
699 703 roots, dirs = _patternrootsanddirs(kindpats)
700 704 return roots
701 705
702 706 def _rootsanddirs(kindpats):
703 707 '''Returns roots and exact directories from patterns.
704 708
705 709 roots are directories to match recursively, whereas exact directories should
706 710 be matched non-recursively. The returned (roots, dirs) tuple will also
707 711 include directories that need to be implicitly considered as either, such as
708 712 parent directories.
709 713
710 714 >>> _rootsanddirs(\
711 715 [('glob', 'g/h/*', ''), ('glob', 'g/h', ''), ('glob', 'g*', '')])
712 716 (['g/h', 'g/h', '.'], ['g', '.'])
713 717 >>> _rootsanddirs(\
714 718 [('rootfilesin', 'g/h', ''), ('rootfilesin', '', '')])
715 719 ([], ['g/h', '.', 'g', '.'])
716 720 >>> _rootsanddirs(\
717 721 [('relpath', 'r', ''), ('path', 'p/p', ''), ('path', '', '')])
718 722 (['r', 'p/p', '.'], ['p', '.'])
719 723 >>> _rootsanddirs(\
720 724 [('relglob', 'rg*', ''), ('re', 're/', ''), ('relre', 'rr', '')])
721 725 (['.', '.', '.'], ['.'])
722 726 '''
723 727 r, d = _patternrootsanddirs(kindpats)
724 728
725 729 # Append the parents as non-recursive/exact directories, since they must be
726 730 # scanned to get to either the roots or the other exact directories.
727 731 d.extend(util.dirs(d))
728 732 d.extend(util.dirs(r))
729 733 # util.dirs() does not include the root directory, so add it manually
730 734 d.append('.')
731 735
732 736 return r, d
733 737
734 738 def _explicitfiles(kindpats):
735 739 '''Returns the potential explicit filenames from the patterns.
736 740
737 741 >>> _explicitfiles([('path', 'foo/bar', '')])
738 742 ['foo/bar']
739 743 >>> _explicitfiles([('rootfilesin', 'foo/bar', '')])
740 744 []
741 745 '''
742 746 # Keep only the pattern kinds where one can specify filenames (vs only
743 747 # directory names).
744 748 filable = [kp for kp in kindpats if kp[0] not in ('rootfilesin',)]
745 749 return _roots(filable)
746 750
747 751 def _anypats(kindpats):
748 752 for kind, pat, source in kindpats:
749 753 if kind in ('glob', 're', 'relglob', 'relre', 'set', 'rootfilesin'):
750 754 return True
751 755
752 756 _commentre = None
753 757
754 758 def readpatternfile(filepath, warn, sourceinfo=False):
755 759 '''parse a pattern file, returning a list of
756 760 patterns. These patterns should be given to compile()
757 761 to be validated and converted into a match function.
758 762
759 763 trailing white space is dropped.
760 764 the escape character is backslash.
761 765 comments start with #.
762 766 empty lines are skipped.
763 767
764 768 lines can be of the following formats:
765 769
766 770 syntax: regexp # defaults following lines to non-rooted regexps
767 771 syntax: glob # defaults following lines to non-rooted globs
768 772 re:pattern # non-rooted regular expression
769 773 glob:pattern # non-rooted glob
770 774 pattern # pattern of the current default type
771 775
772 776 if sourceinfo is set, returns a list of tuples:
773 777 (pattern, lineno, originalline). This is useful to debug ignore patterns.
774 778 '''
775 779
776 780 syntaxes = {'re': 'relre:', 'regexp': 'relre:', 'glob': 'relglob:',
777 781 'include': 'include', 'subinclude': 'subinclude'}
778 782 syntax = 'relre:'
779 783 patterns = []
780 784
781 785 fp = open(filepath, 'rb')
782 786 for lineno, line in enumerate(util.iterfile(fp), start=1):
783 787 if "#" in line:
784 788 global _commentre
785 789 if not _commentre:
786 790 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
787 791 # remove comments prefixed by an even number of escapes
788 792 m = _commentre.search(line)
789 793 if m:
790 794 line = line[:m.end(1)]
791 795 # fixup properly escaped comments that survived the above
792 796 line = line.replace("\\#", "#")
793 797 line = line.rstrip()
794 798 if not line:
795 799 continue
796 800
797 801 if line.startswith('syntax:'):
798 802 s = line[7:].strip()
799 803 try:
800 804 syntax = syntaxes[s]
801 805 except KeyError:
802 806 if warn:
803 807 warn(_("%s: ignoring invalid syntax '%s'\n") %
804 808 (filepath, s))
805 809 continue
806 810
807 811 linesyntax = syntax
808 812 for s, rels in syntaxes.iteritems():
809 813 if line.startswith(rels):
810 814 linesyntax = rels
811 815 line = line[len(rels):]
812 816 break
813 817 elif line.startswith(s+':'):
814 818 linesyntax = rels
815 819 line = line[len(s) + 1:]
816 820 break
817 821 if sourceinfo:
818 822 patterns.append((linesyntax + line, lineno, line))
819 823 else:
820 824 patterns.append(linesyntax + line)
821 825 fp.close()
822 826 return patterns
General Comments 0
You need to be logged in to leave comments. Login now