##// END OF EJS Templates
match: fix bug in match.visitdir()...
Drew Gottlieb -
r25250:f9a29dc9 default
parent child Browse files
Show More
@@ -1,620 +1,620 b''
1 1 # match.py - filename matching
2 2 #
3 3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 import re
9 9 import util, pathutil
10 10 from i18n import _
11 11
12 12 propertycache = util.propertycache
13 13
14 14 def _rematcher(regex):
15 15 '''compile the regexp with the best available regexp engine and return a
16 16 matcher function'''
17 17 m = util.re.compile(regex)
18 18 try:
19 19 # slightly faster, provided by facebook's re2 bindings
20 20 return m.test_match
21 21 except AttributeError:
22 22 return m.match
23 23
24 24 def _expandsets(kindpats, ctx, listsubrepos):
25 25 '''Returns the kindpats list with the 'set' patterns expanded.'''
26 26 fset = set()
27 27 other = []
28 28
29 29 for kind, pat, source in kindpats:
30 30 if kind == 'set':
31 31 if not ctx:
32 32 raise util.Abort("fileset expression with no context")
33 33 s = ctx.getfileset(pat)
34 34 fset.update(s)
35 35
36 36 if listsubrepos:
37 37 for subpath in ctx.substate:
38 38 s = ctx.sub(subpath).getfileset(pat)
39 39 fset.update(subpath + '/' + f for f in s)
40 40
41 41 continue
42 42 other.append((kind, pat, source))
43 43 return fset, other
44 44
45 45 def _kindpatsalwaysmatch(kindpats):
46 46 """"Checks whether the kindspats match everything, as e.g.
47 47 'relpath:.' does.
48 48 """
49 49 for kind, pat, source in kindpats:
50 50 if pat != '' or kind not in ['relpath', 'glob']:
51 51 return False
52 52 return True
53 53
54 54 class match(object):
55 55 def __init__(self, root, cwd, patterns, include=[], exclude=[],
56 56 default='glob', exact=False, auditor=None, ctx=None,
57 57 listsubrepos=False, warn=None):
58 58 """build an object to match a set of file patterns
59 59
60 60 arguments:
61 61 root - the canonical root of the tree you're matching against
62 62 cwd - the current working directory, if relevant
63 63 patterns - patterns to find
64 64 include - patterns to include (unless they are excluded)
65 65 exclude - patterns to exclude (even if they are included)
66 66 default - if a pattern in patterns has no explicit type, assume this one
67 67 exact - patterns are actually filenames (include/exclude still apply)
68 68 warn - optional function used for printing warnings
69 69
70 70 a pattern is one of:
71 71 'glob:<glob>' - a glob relative to cwd
72 72 're:<regexp>' - a regular expression
73 73 'path:<path>' - a path relative to repository root
74 74 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
75 75 'relpath:<path>' - a path relative to cwd
76 76 'relre:<regexp>' - a regexp that needn't match the start of a name
77 77 'set:<fileset>' - a fileset expression
78 78 'include:<path>' - a file of patterns to read and include
79 79 '<something>' - a pattern of the specified default type
80 80 """
81 81
82 82 self._root = root
83 83 self._cwd = cwd
84 84 self._files = [] # exact files and roots of patterns
85 85 self._anypats = bool(include or exclude)
86 86 self._always = False
87 87 self._pathrestricted = bool(include or exclude or patterns)
88 88 self._warn = warn
89 89 self._includeroots = set()
90 90 self._includedirs = set(['.'])
91 91 self._excluderoots = set()
92 92
93 93 matchfns = []
94 94 if include:
95 95 kindpats = self._normalize(include, 'glob', root, cwd, auditor)
96 96 self.includepat, im = _buildmatch(ctx, kindpats, '(?:/|$)',
97 97 listsubrepos, root)
98 98 self._includeroots.update(_roots(kindpats))
99 99 self._includeroots.discard('.')
100 100 self._includedirs.update(util.dirs(self._includeroots))
101 101 matchfns.append(im)
102 102 if exclude:
103 103 kindpats = self._normalize(exclude, 'glob', root, cwd, auditor)
104 104 self.excludepat, em = _buildmatch(ctx, kindpats, '(?:/|$)',
105 105 listsubrepos, root)
106 106 self._excluderoots.update(_roots(kindpats))
107 107 self._excluderoots.discard('.')
108 108 matchfns.append(lambda f: not em(f))
109 109 if exact:
110 110 if isinstance(patterns, list):
111 111 self._files = patterns
112 112 else:
113 113 self._files = list(patterns)
114 114 matchfns.append(self.exact)
115 115 elif patterns:
116 116 kindpats = self._normalize(patterns, default, root, cwd, auditor)
117 117 if not _kindpatsalwaysmatch(kindpats):
118 118 self._files = _roots(kindpats)
119 119 self._anypats = self._anypats or _anypats(kindpats)
120 120 self.patternspat, pm = _buildmatch(ctx, kindpats, '$',
121 121 listsubrepos, root)
122 122 matchfns.append(pm)
123 123
124 124 if not matchfns:
125 125 m = util.always
126 126 self._always = True
127 127 elif len(matchfns) == 1:
128 128 m = matchfns[0]
129 129 else:
130 130 def m(f):
131 131 for matchfn in matchfns:
132 132 if not matchfn(f):
133 133 return False
134 134 return True
135 135
136 136 self.matchfn = m
137 137 self._fileroots = set(self._files)
138 138
139 139 def __call__(self, fn):
140 140 return self.matchfn(fn)
141 141 def __iter__(self):
142 142 for f in self._files:
143 143 yield f
144 144
145 145 # Callbacks related to how the matcher is used by dirstate.walk.
146 146 # Subscribers to these events must monkeypatch the matcher object.
147 147 def bad(self, f, msg):
148 148 '''Callback from dirstate.walk for each explicit file that can't be
149 149 found/accessed, with an error message.'''
150 150 pass
151 151
152 152 # If an explicitdir is set, it will be called when an explicitly listed
153 153 # directory is visited.
154 154 explicitdir = None
155 155
156 156 # If an traversedir is set, it will be called when a directory discovered
157 157 # by recursive traversal is visited.
158 158 traversedir = None
159 159
160 160 def abs(self, f):
161 161 '''Convert a repo path back to path that is relative to the root of the
162 162 matcher.'''
163 163 return f
164 164
165 165 def rel(self, f):
166 166 '''Convert repo path back to path that is relative to cwd of matcher.'''
167 167 return util.pathto(self._root, self._cwd, f)
168 168
169 169 def uipath(self, f):
170 170 '''Convert repo path to a display path. If patterns or -I/-X were used
171 171 to create this matcher, the display path will be relative to cwd.
172 172 Otherwise it is relative to the root of the repo.'''
173 173 return (self._pathrestricted and self.rel(f)) or self.abs(f)
174 174
175 175 def files(self):
176 176 '''Explicitly listed files or patterns or roots:
177 177 if no patterns or .always(): empty list,
178 178 if exact: list exact files,
179 179 if not .anypats(): list all files and dirs,
180 180 else: optimal roots'''
181 181 return self._files
182 182
183 183 @propertycache
184 184 def _dirs(self):
185 185 return set(util.dirs(self._fileroots)) | set(['.'])
186 186
187 187 def visitdir(self, dir):
188 188 '''Decides whether a directory should be visited based on whether it
189 189 has potential matches in it or one of its subdirectories. This is
190 190 based on the match's primary, included, and excluded patterns.
191 191
192 192 This function's behavior is undefined if it has returned False for
193 193 one of the dir's parent directories.
194 194 '''
195 195 if dir in self._excluderoots:
196 196 return False
197 197 parentdirs = None
198 198 if (self._includeroots and dir not in self._includeroots and
199 199 dir not in self._includedirs):
200 parentdirs = util.finddirs(dir)
200 parentdirs = list(util.finddirs(dir))
201 201 if not any(parent in self._includeroots for parent in parentdirs):
202 202 return False
203 203 return (not self._fileroots or '.' in self._fileroots or
204 204 dir in self._fileroots or dir in self._dirs or
205 205 any(parentdir in self._fileroots
206 206 for parentdir in parentdirs or util.finddirs(dir)))
207 207
208 208 def exact(self, f):
209 209 '''Returns True if f is in .files().'''
210 210 return f in self._fileroots
211 211
212 212 def anypats(self):
213 213 '''Matcher uses patterns or include/exclude.'''
214 214 return self._anypats
215 215
216 216 def always(self):
217 217 '''Matcher will match everything and .files() will be empty
218 218 - optimization might be possible and necessary.'''
219 219 return self._always
220 220
221 221 def ispartial(self):
222 222 '''True if the matcher won't always match.
223 223
224 224 Although it's just the inverse of _always in this implementation,
225 225 an extenion such as narrowhg might make it return something
226 226 slightly different.'''
227 227 return not self._always
228 228
229 229 def isexact(self):
230 230 return self.matchfn == self.exact
231 231
232 232 def prefix(self):
233 233 return not self.always() and not self.isexact() and not self.anypats()
234 234
235 235 def _normalize(self, patterns, default, root, cwd, auditor):
236 236 '''Convert 'kind:pat' from the patterns list to tuples with kind and
237 237 normalized and rooted patterns and with listfiles expanded.'''
238 238 kindpats = []
239 239 for kind, pat in [_patsplit(p, default) for p in patterns]:
240 240 if kind in ('glob', 'relpath'):
241 241 pat = pathutil.canonpath(root, cwd, pat, auditor)
242 242 elif kind in ('relglob', 'path'):
243 243 pat = util.normpath(pat)
244 244 elif kind in ('listfile', 'listfile0'):
245 245 try:
246 246 files = util.readfile(pat)
247 247 if kind == 'listfile0':
248 248 files = files.split('\0')
249 249 else:
250 250 files = files.splitlines()
251 251 files = [f for f in files if f]
252 252 except EnvironmentError:
253 253 raise util.Abort(_("unable to read file list (%s)") % pat)
254 254 for k, p, source in self._normalize(files, default, root, cwd,
255 255 auditor):
256 256 kindpats.append((k, p, pat))
257 257 continue
258 258 elif kind == 'include':
259 259 try:
260 260 includepats = readpatternfile(pat, self._warn)
261 261 for k, p, source in self._normalize(includepats, default,
262 262 root, cwd, auditor):
263 263 kindpats.append((k, p, source or pat))
264 264 except util.Abort, inst:
265 265 raise util.Abort('%s: %s' % (pat, inst[0]))
266 266 except IOError, inst:
267 267 if self._warn:
268 268 self._warn(_("skipping unreadable pattern file "
269 269 "'%s': %s\n") % (pat, inst.strerror))
270 270 continue
271 271 # else: re or relre - which cannot be normalized
272 272 kindpats.append((kind, pat, ''))
273 273 return kindpats
274 274
275 275 def exact(root, cwd, files):
276 276 return match(root, cwd, files, exact=True)
277 277
278 278 def always(root, cwd):
279 279 return match(root, cwd, [])
280 280
281 281 class narrowmatcher(match):
282 282 """Adapt a matcher to work on a subdirectory only.
283 283
284 284 The paths are remapped to remove/insert the path as needed:
285 285
286 286 >>> m1 = match('root', '', ['a.txt', 'sub/b.txt'])
287 287 >>> m2 = narrowmatcher('sub', m1)
288 288 >>> bool(m2('a.txt'))
289 289 False
290 290 >>> bool(m2('b.txt'))
291 291 True
292 292 >>> bool(m2.matchfn('a.txt'))
293 293 False
294 294 >>> bool(m2.matchfn('b.txt'))
295 295 True
296 296 >>> m2.files()
297 297 ['b.txt']
298 298 >>> m2.exact('b.txt')
299 299 True
300 300 >>> util.pconvert(m2.rel('b.txt'))
301 301 'sub/b.txt'
302 302 >>> def bad(f, msg):
303 303 ... print "%s: %s" % (f, msg)
304 304 >>> m1.bad = bad
305 305 >>> m2.bad('x.txt', 'No such file')
306 306 sub/x.txt: No such file
307 307 >>> m2.abs('c.txt')
308 308 'sub/c.txt'
309 309 """
310 310
311 311 def __init__(self, path, matcher):
312 312 self._root = matcher._root
313 313 self._cwd = matcher._cwd
314 314 self._path = path
315 315 self._matcher = matcher
316 316 self._always = matcher._always
317 317 self._pathrestricted = matcher._pathrestricted
318 318
319 319 self._files = [f[len(path) + 1:] for f in matcher._files
320 320 if f.startswith(path + "/")]
321 321
322 322 # If the parent repo had a path to this subrepo and no patterns are
323 323 # specified, this submatcher always matches.
324 324 if not self._always and not matcher._anypats:
325 325 self._always = any(f == path for f in matcher._files)
326 326
327 327 self._anypats = matcher._anypats
328 328 self.matchfn = lambda fn: matcher.matchfn(self._path + "/" + fn)
329 329 self._fileroots = set(self._files)
330 330
331 331 def abs(self, f):
332 332 return self._matcher.abs(self._path + "/" + f)
333 333
334 334 def bad(self, f, msg):
335 335 self._matcher.bad(self._path + "/" + f, msg)
336 336
337 337 def rel(self, f):
338 338 return self._matcher.rel(self._path + "/" + f)
339 339
340 340 class icasefsmatcher(match):
341 341 """A matcher for wdir on case insensitive filesystems, which normalizes the
342 342 given patterns to the case in the filesystem.
343 343 """
344 344
345 345 def __init__(self, root, cwd, patterns, include, exclude, default, auditor,
346 346 ctx, listsubrepos=False):
347 347 init = super(icasefsmatcher, self).__init__
348 348 self._dsnormalize = ctx.repo().dirstate.normalize
349 349
350 350 init(root, cwd, patterns, include, exclude, default, auditor=auditor,
351 351 ctx=ctx, listsubrepos=listsubrepos)
352 352
353 353 # m.exact(file) must be based off of the actual user input, otherwise
354 354 # inexact case matches are treated as exact, and not noted without -v.
355 355 if self._files:
356 356 self._fileroots = set(_roots(self._kp))
357 357
358 358 def _normalize(self, patterns, default, root, cwd, auditor):
359 359 self._kp = super(icasefsmatcher, self)._normalize(patterns, default,
360 360 root, cwd, auditor)
361 361 kindpats = []
362 362 for kind, pats, source in self._kp:
363 363 if kind not in ('re', 'relre'): # regex can't be normalized
364 364 pats = self._dsnormalize(pats)
365 365 kindpats.append((kind, pats, source))
366 366 return kindpats
367 367
368 368 def patkind(pattern, default=None):
369 369 '''If pattern is 'kind:pat' with a known kind, return kind.'''
370 370 return _patsplit(pattern, default)[0]
371 371
372 372 def _patsplit(pattern, default):
373 373 """Split a string into the optional pattern kind prefix and the actual
374 374 pattern."""
375 375 if ':' in pattern:
376 376 kind, pat = pattern.split(':', 1)
377 377 if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
378 378 'listfile', 'listfile0', 'set', 'include'):
379 379 return kind, pat
380 380 return default, pattern
381 381
382 382 def _globre(pat):
383 383 r'''Convert an extended glob string to a regexp string.
384 384
385 385 >>> print _globre(r'?')
386 386 .
387 387 >>> print _globre(r'*')
388 388 [^/]*
389 389 >>> print _globre(r'**')
390 390 .*
391 391 >>> print _globre(r'**/a')
392 392 (?:.*/)?a
393 393 >>> print _globre(r'a/**/b')
394 394 a\/(?:.*/)?b
395 395 >>> print _globre(r'[a*?!^][^b][!c]')
396 396 [a*?!^][\^b][^c]
397 397 >>> print _globre(r'{a,b}')
398 398 (?:a|b)
399 399 >>> print _globre(r'.\*\?')
400 400 \.\*\?
401 401 '''
402 402 i, n = 0, len(pat)
403 403 res = ''
404 404 group = 0
405 405 escape = util.re.escape
406 406 def peek():
407 407 return i < n and pat[i]
408 408 while i < n:
409 409 c = pat[i]
410 410 i += 1
411 411 if c not in '*?[{},\\':
412 412 res += escape(c)
413 413 elif c == '*':
414 414 if peek() == '*':
415 415 i += 1
416 416 if peek() == '/':
417 417 i += 1
418 418 res += '(?:.*/)?'
419 419 else:
420 420 res += '.*'
421 421 else:
422 422 res += '[^/]*'
423 423 elif c == '?':
424 424 res += '.'
425 425 elif c == '[':
426 426 j = i
427 427 if j < n and pat[j] in '!]':
428 428 j += 1
429 429 while j < n and pat[j] != ']':
430 430 j += 1
431 431 if j >= n:
432 432 res += '\\['
433 433 else:
434 434 stuff = pat[i:j].replace('\\','\\\\')
435 435 i = j + 1
436 436 if stuff[0] == '!':
437 437 stuff = '^' + stuff[1:]
438 438 elif stuff[0] == '^':
439 439 stuff = '\\' + stuff
440 440 res = '%s[%s]' % (res, stuff)
441 441 elif c == '{':
442 442 group += 1
443 443 res += '(?:'
444 444 elif c == '}' and group:
445 445 res += ')'
446 446 group -= 1
447 447 elif c == ',' and group:
448 448 res += '|'
449 449 elif c == '\\':
450 450 p = peek()
451 451 if p:
452 452 i += 1
453 453 res += escape(p)
454 454 else:
455 455 res += escape(c)
456 456 else:
457 457 res += escape(c)
458 458 return res
459 459
460 460 def _regex(kind, pat, globsuffix):
461 461 '''Convert a (normalized) pattern of any kind into a regular expression.
462 462 globsuffix is appended to the regexp of globs.'''
463 463 if not pat:
464 464 return ''
465 465 if kind == 're':
466 466 return pat
467 467 if kind == 'path':
468 468 return '^' + util.re.escape(pat) + '(?:/|$)'
469 469 if kind == 'relglob':
470 470 return '(?:|.*/)' + _globre(pat) + globsuffix
471 471 if kind == 'relpath':
472 472 return util.re.escape(pat) + '(?:/|$)'
473 473 if kind == 'relre':
474 474 if pat.startswith('^'):
475 475 return pat
476 476 return '.*' + pat
477 477 return _globre(pat) + globsuffix
478 478
479 479 def _buildmatch(ctx, kindpats, globsuffix, listsubrepos, root):
480 480 '''Return regexp string and a matcher function for kindpats.
481 481 globsuffix is appended to the regexp of globs.'''
482 482 matchfuncs = []
483 483
484 484 fset, kindpats = _expandsets(kindpats, ctx, listsubrepos)
485 485 if fset:
486 486 matchfuncs.append(fset.__contains__)
487 487
488 488 regex = ''
489 489 if kindpats:
490 490 regex, mf = _buildregexmatch(kindpats, globsuffix)
491 491 matchfuncs.append(mf)
492 492
493 493 if len(matchfuncs) == 1:
494 494 return regex, matchfuncs[0]
495 495 else:
496 496 return regex, lambda f: any(mf(f) for mf in matchfuncs)
497 497
498 498 def _buildregexmatch(kindpats, globsuffix):
499 499 """Build a match function from a list of kinds and kindpats,
500 500 return regexp string and a matcher function."""
501 501 try:
502 502 regex = '(?:%s)' % '|'.join([_regex(k, p, globsuffix)
503 503 for (k, p, s) in kindpats])
504 504 if len(regex) > 20000:
505 505 raise OverflowError
506 506 return regex, _rematcher(regex)
507 507 except OverflowError:
508 508 # We're using a Python with a tiny regex engine and we
509 509 # made it explode, so we'll divide the pattern list in two
510 510 # until it works
511 511 l = len(kindpats)
512 512 if l < 2:
513 513 raise
514 514 regexa, a = _buildregexmatch(kindpats[:l//2], globsuffix)
515 515 regexb, b = _buildregexmatch(kindpats[l//2:], globsuffix)
516 516 return regex, lambda s: a(s) or b(s)
517 517 except re.error:
518 518 for k, p, s in kindpats:
519 519 try:
520 520 _rematcher('(?:%s)' % _regex(k, p, globsuffix))
521 521 except re.error:
522 522 if s:
523 523 raise util.Abort(_("%s: invalid pattern (%s): %s") %
524 524 (s, k, p))
525 525 else:
526 526 raise util.Abort(_("invalid pattern (%s): %s") % (k, p))
527 527 raise util.Abort(_("invalid pattern"))
528 528
529 529 def _roots(kindpats):
530 530 '''return roots and exact explicitly listed files from patterns
531 531
532 532 >>> _roots([('glob', 'g/*', ''), ('glob', 'g', ''), ('glob', 'g*', '')])
533 533 ['g', 'g', '.']
534 534 >>> _roots([('relpath', 'r', ''), ('path', 'p/p', ''), ('path', '', '')])
535 535 ['r', 'p/p', '.']
536 536 >>> _roots([('relglob', 'rg*', ''), ('re', 're/', ''), ('relre', 'rr', '')])
537 537 ['.', '.', '.']
538 538 '''
539 539 r = []
540 540 for kind, pat, source in kindpats:
541 541 if kind == 'glob': # find the non-glob prefix
542 542 root = []
543 543 for p in pat.split('/'):
544 544 if '[' in p or '{' in p or '*' in p or '?' in p:
545 545 break
546 546 root.append(p)
547 547 r.append('/'.join(root) or '.')
548 548 elif kind in ('relpath', 'path'):
549 549 r.append(pat or '.')
550 550 else: # relglob, re, relre
551 551 r.append('.')
552 552 return r
553 553
554 554 def _anypats(kindpats):
555 555 for kind, pat, source in kindpats:
556 556 if kind in ('glob', 're', 'relglob', 'relre', 'set'):
557 557 return True
558 558
559 559 _commentre = None
560 560
561 561 def readpatternfile(filepath, warn):
562 562 '''parse a pattern file, returning a list of
563 563 patterns. These patterns should be given to compile()
564 564 to be validated and converted into a match function.
565 565
566 566 trailing white space is dropped.
567 567 the escape character is backslash.
568 568 comments start with #.
569 569 empty lines are skipped.
570 570
571 571 lines can be of the following formats:
572 572
573 573 syntax: regexp # defaults following lines to non-rooted regexps
574 574 syntax: glob # defaults following lines to non-rooted globs
575 575 re:pattern # non-rooted regular expression
576 576 glob:pattern # non-rooted glob
577 577 pattern # pattern of the current default type'''
578 578
579 579 syntaxes = {'re': 'relre:', 'regexp': 'relre:', 'glob': 'relglob:',
580 580 'include': 'include'}
581 581 syntax = 'relre:'
582 582 patterns = []
583 583
584 584 fp = open(filepath)
585 585 for line in fp:
586 586 if "#" in line:
587 587 global _commentre
588 588 if not _commentre:
589 589 _commentre = re.compile(r'((^|[^\\])(\\\\)*)#.*')
590 590 # remove comments prefixed by an even number of escapes
591 591 line = _commentre.sub(r'\1', line)
592 592 # fixup properly escaped comments that survived the above
593 593 line = line.replace("\\#", "#")
594 594 line = line.rstrip()
595 595 if not line:
596 596 continue
597 597
598 598 if line.startswith('syntax:'):
599 599 s = line[7:].strip()
600 600 try:
601 601 syntax = syntaxes[s]
602 602 except KeyError:
603 603 if warn:
604 604 warn(_("%s: ignoring invalid syntax '%s'\n") %
605 605 (filepath, s))
606 606 continue
607 607
608 608 linesyntax = syntax
609 609 for s, rels in syntaxes.iteritems():
610 610 if line.startswith(rels):
611 611 linesyntax = rels
612 612 line = line[len(rels):]
613 613 break
614 614 elif line.startswith(s+':'):
615 615 linesyntax = rels
616 616 line = line[len(s) + 1:]
617 617 break
618 618 patterns.append(linesyntax + line)
619 619 fp.close()
620 620 return patterns
General Comments 0
You need to be logged in to leave comments. Login now