##// END OF EJS Templates
match: introduce boolean prefix() method...
Martin von Zweigbergk -
r25233:9789b4a7 default
parent child Browse files
Show More
@@ -1,610 +1,613 b''
1 1 # match.py - filename matching
2 2 #
3 3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 import re
9 9 import util, pathutil
10 10 from i18n import _
11 11
12 12 propertycache = util.propertycache
13 13
14 14 def _rematcher(regex):
15 15 '''compile the regexp with the best available regexp engine and return a
16 16 matcher function'''
17 17 m = util.re.compile(regex)
18 18 try:
19 19 # slightly faster, provided by facebook's re2 bindings
20 20 return m.test_match
21 21 except AttributeError:
22 22 return m.match
23 23
24 24 def _expandsets(kindpats, ctx, listsubrepos):
25 25 '''Returns the kindpats list with the 'set' patterns expanded.'''
26 26 fset = set()
27 27 other = []
28 28
29 29 for kind, pat, source in kindpats:
30 30 if kind == 'set':
31 31 if not ctx:
32 32 raise util.Abort("fileset expression with no context")
33 33 s = ctx.getfileset(pat)
34 34 fset.update(s)
35 35
36 36 if listsubrepos:
37 37 for subpath in ctx.substate:
38 38 s = ctx.sub(subpath).getfileset(pat)
39 39 fset.update(subpath + '/' + f for f in s)
40 40
41 41 continue
42 42 other.append((kind, pat, source))
43 43 return fset, other
44 44
45 45 def _kindpatsalwaysmatch(kindpats):
46 46 """"Checks whether the kindspats match everything, as e.g.
47 47 'relpath:.' does.
48 48 """
49 49 for kind, pat, source in kindpats:
50 50 if pat != '' or kind not in ['relpath', 'glob']:
51 51 return False
52 52 return True
53 53
54 54 class match(object):
55 55 def __init__(self, root, cwd, patterns, include=[], exclude=[],
56 56 default='glob', exact=False, auditor=None, ctx=None,
57 57 listsubrepos=False, warn=None):
58 58 """build an object to match a set of file patterns
59 59
60 60 arguments:
61 61 root - the canonical root of the tree you're matching against
62 62 cwd - the current working directory, if relevant
63 63 patterns - patterns to find
64 64 include - patterns to include (unless they are excluded)
65 65 exclude - patterns to exclude (even if they are included)
66 66 default - if a pattern in patterns has no explicit type, assume this one
67 67 exact - patterns are actually filenames (include/exclude still apply)
68 68 warn - optional function used for printing warnings
69 69
70 70 a pattern is one of:
71 71 'glob:<glob>' - a glob relative to cwd
72 72 're:<regexp>' - a regular expression
73 73 'path:<path>' - a path relative to repository root
74 74 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
75 75 'relpath:<path>' - a path relative to cwd
76 76 'relre:<regexp>' - a regexp that needn't match the start of a name
77 77 'set:<fileset>' - a fileset expression
78 78 'include:<path>' - a file of patterns to read and include
79 79 '<something>' - a pattern of the specified default type
80 80 """
81 81
82 82 self._root = root
83 83 self._cwd = cwd
84 84 self._files = [] # exact files and roots of patterns
85 85 self._anypats = bool(include or exclude)
86 86 self._always = False
87 87 self._pathrestricted = bool(include or exclude or patterns)
88 88 self._warn = warn
89 89 self._includeroots = set()
90 90 self._includedirs = set(['.'])
91 91 self._excluderoots = set()
92 92
93 93 matchfns = []
94 94 if include:
95 95 kindpats = self._normalize(include, 'glob', root, cwd, auditor)
96 96 self.includepat, im = _buildmatch(ctx, kindpats, '(?:/|$)',
97 97 listsubrepos)
98 98 self._includeroots.update(_roots(kindpats))
99 99 self._includeroots.discard('.')
100 100 self._includedirs.update(util.dirs(self._includeroots))
101 101 matchfns.append(im)
102 102 if exclude:
103 103 kindpats = self._normalize(exclude, 'glob', root, cwd, auditor)
104 104 self.excludepat, em = _buildmatch(ctx, kindpats, '(?:/|$)',
105 105 listsubrepos)
106 106 self._excluderoots.update(_roots(kindpats))
107 107 self._excluderoots.discard('.')
108 108 matchfns.append(lambda f: not em(f))
109 109 if exact:
110 110 if isinstance(patterns, list):
111 111 self._files = patterns
112 112 else:
113 113 self._files = list(patterns)
114 114 matchfns.append(self.exact)
115 115 elif patterns:
116 116 kindpats = self._normalize(patterns, default, root, cwd, auditor)
117 117 if not _kindpatsalwaysmatch(kindpats):
118 118 self._files = _roots(kindpats)
119 119 self._anypats = self._anypats or _anypats(kindpats)
120 120 self.patternspat, pm = _buildmatch(ctx, kindpats, '$',
121 121 listsubrepos)
122 122 matchfns.append(pm)
123 123
124 124 if not matchfns:
125 125 m = util.always
126 126 self._always = True
127 127 elif len(matchfns) == 1:
128 128 m = matchfns[0]
129 129 else:
130 130 def m(f):
131 131 for matchfn in matchfns:
132 132 if not matchfn(f):
133 133 return False
134 134 return True
135 135
136 136 self.matchfn = m
137 137 self._fileroots = set(self._files)
138 138
139 139 def __call__(self, fn):
140 140 return self.matchfn(fn)
141 141 def __iter__(self):
142 142 for f in self._files:
143 143 yield f
144 144
145 145 # Callbacks related to how the matcher is used by dirstate.walk.
146 146 # Subscribers to these events must monkeypatch the matcher object.
147 147 def bad(self, f, msg):
148 148 '''Callback from dirstate.walk for each explicit file that can't be
149 149 found/accessed, with an error message.'''
150 150 pass
151 151
152 152 # If an explicitdir is set, it will be called when an explicitly listed
153 153 # directory is visited.
154 154 explicitdir = None
155 155
156 156 # If an traversedir is set, it will be called when a directory discovered
157 157 # by recursive traversal is visited.
158 158 traversedir = None
159 159
160 160 def abs(self, f):
161 161 '''Convert a repo path back to path that is relative to the root of the
162 162 matcher.'''
163 163 return f
164 164
165 165 def rel(self, f):
166 166 '''Convert repo path back to path that is relative to cwd of matcher.'''
167 167 return util.pathto(self._root, self._cwd, f)
168 168
169 169 def uipath(self, f):
170 170 '''Convert repo path to a display path. If patterns or -I/-X were used
171 171 to create this matcher, the display path will be relative to cwd.
172 172 Otherwise it is relative to the root of the repo.'''
173 173 return (self._pathrestricted and self.rel(f)) or self.abs(f)
174 174
175 175 def files(self):
176 176 '''Explicitly listed files or patterns or roots:
177 177 if no patterns or .always(): empty list,
178 178 if exact: list exact files,
179 179 if not .anypats(): list all files and dirs,
180 180 else: optimal roots'''
181 181 return self._files
182 182
183 183 @propertycache
184 184 def _dirs(self):
185 185 return set(util.dirs(self._fileroots)) | set(['.'])
186 186
187 187 def visitdir(self, dir):
188 188 '''Decides whether a directory should be visited based on whether it
189 189 has potential matches in it or one of its subdirectories. This is
190 190 based on the match's primary, included, and excluded patterns.
191 191
192 192 This function's behavior is undefined if it has returned False for
193 193 one of the dir's parent directories.
194 194 '''
195 195 if dir in self._excluderoots:
196 196 return False
197 197 parentdirs = None
198 198 if (self._includeroots and dir not in self._includeroots and
199 199 dir not in self._includedirs):
200 200 parentdirs = util.finddirs(dir)
201 201 if not any(parent in self._includeroots for parent in parentdirs):
202 202 return False
203 203 return (not self._fileroots or '.' in self._fileroots or
204 204 dir in self._fileroots or dir in self._dirs or
205 205 any(parentdir in self._fileroots
206 206 for parentdir in parentdirs or util.finddirs(dir)))
207 207
208 208 def exact(self, f):
209 209 '''Returns True if f is in .files().'''
210 210 return f in self._fileroots
211 211
212 212 def anypats(self):
213 213 '''Matcher uses patterns or include/exclude.'''
214 214 return self._anypats
215 215
216 216 def always(self):
217 217 '''Matcher will match everything and .files() will be empty
218 218 - optimization might be possible and necessary.'''
219 219 return self._always
220 220
221 221 def ispartial(self):
222 222 '''True if the matcher won't always match.
223 223
224 224 Although it's just the inverse of _always in this implementation,
225 225 an extenion such as narrowhg might make it return something
226 226 slightly different.'''
227 227 return not self._always
228 228
229 229 def isexact(self):
230 230 return self.matchfn == self.exact
231 231
232 def prefix(self):
233 return not self.always() and not self.isexact() and not self.anypats()
234
232 235 def _normalize(self, patterns, default, root, cwd, auditor):
233 236 '''Convert 'kind:pat' from the patterns list to tuples with kind and
234 237 normalized and rooted patterns and with listfiles expanded.'''
235 238 kindpats = []
236 239 for kind, pat in [_patsplit(p, default) for p in patterns]:
237 240 if kind in ('glob', 'relpath'):
238 241 pat = pathutil.canonpath(root, cwd, pat, auditor)
239 242 elif kind in ('relglob', 'path'):
240 243 pat = util.normpath(pat)
241 244 elif kind in ('listfile', 'listfile0'):
242 245 try:
243 246 files = util.readfile(pat)
244 247 if kind == 'listfile0':
245 248 files = files.split('\0')
246 249 else:
247 250 files = files.splitlines()
248 251 files = [f for f in files if f]
249 252 except EnvironmentError:
250 253 raise util.Abort(_("unable to read file list (%s)") % pat)
251 254 for k, p, source in self._normalize(files, default, root, cwd,
252 255 auditor):
253 256 kindpats.append((k, p, pat))
254 257 continue
255 258 elif kind == 'include':
256 259 try:
257 260 includepats = readpatternfile(pat, self._warn)
258 261 for k, p, source in self._normalize(includepats, default,
259 262 root, cwd, auditor):
260 263 kindpats.append((k, p, source or pat))
261 264 except util.Abort, inst:
262 265 raise util.Abort('%s: %s' % (pat, inst[0]))
263 266 except IOError, inst:
264 267 if self._warn:
265 268 self._warn(_("skipping unreadable pattern file "
266 269 "'%s': %s\n") % (pat, inst.strerror))
267 270 continue
268 271 # else: re or relre - which cannot be normalized
269 272 kindpats.append((kind, pat, ''))
270 273 return kindpats
271 274
272 275 def exact(root, cwd, files):
273 276 return match(root, cwd, files, exact=True)
274 277
275 278 def always(root, cwd):
276 279 return match(root, cwd, [])
277 280
278 281 class narrowmatcher(match):
279 282 """Adapt a matcher to work on a subdirectory only.
280 283
281 284 The paths are remapped to remove/insert the path as needed:
282 285
283 286 >>> m1 = match('root', '', ['a.txt', 'sub/b.txt'])
284 287 >>> m2 = narrowmatcher('sub', m1)
285 288 >>> bool(m2('a.txt'))
286 289 False
287 290 >>> bool(m2('b.txt'))
288 291 True
289 292 >>> bool(m2.matchfn('a.txt'))
290 293 False
291 294 >>> bool(m2.matchfn('b.txt'))
292 295 True
293 296 >>> m2.files()
294 297 ['b.txt']
295 298 >>> m2.exact('b.txt')
296 299 True
297 300 >>> util.pconvert(m2.rel('b.txt'))
298 301 'sub/b.txt'
299 302 >>> def bad(f, msg):
300 303 ... print "%s: %s" % (f, msg)
301 304 >>> m1.bad = bad
302 305 >>> m2.bad('x.txt', 'No such file')
303 306 sub/x.txt: No such file
304 307 >>> m2.abs('c.txt')
305 308 'sub/c.txt'
306 309 """
307 310
308 311 def __init__(self, path, matcher):
309 312 self._root = matcher._root
310 313 self._cwd = matcher._cwd
311 314 self._path = path
312 315 self._matcher = matcher
313 316 self._always = matcher._always
314 317 self._pathrestricted = matcher._pathrestricted
315 318
316 319 self._files = [f[len(path) + 1:] for f in matcher._files
317 320 if f.startswith(path + "/")]
318 321
319 322 # If the parent repo had a path to this subrepo and no patterns are
320 323 # specified, this submatcher always matches.
321 324 if not self._always and not matcher._anypats:
322 325 self._always = any(f == path for f in matcher._files)
323 326
324 327 self._anypats = matcher._anypats
325 328 self.matchfn = lambda fn: matcher.matchfn(self._path + "/" + fn)
326 329 self._fileroots = set(self._files)
327 330
328 331 def abs(self, f):
329 332 return self._matcher.abs(self._path + "/" + f)
330 333
331 334 def bad(self, f, msg):
332 335 self._matcher.bad(self._path + "/" + f, msg)
333 336
334 337 def rel(self, f):
335 338 return self._matcher.rel(self._path + "/" + f)
336 339
337 340 class icasefsmatcher(match):
338 341 """A matcher for wdir on case insensitive filesystems, which normalizes the
339 342 given patterns to the case in the filesystem.
340 343 """
341 344
342 345 def __init__(self, root, cwd, patterns, include, exclude, default, auditor,
343 346 ctx, listsubrepos=False):
344 347 init = super(icasefsmatcher, self).__init__
345 348 self._dsnormalize = ctx.repo().dirstate.normalize
346 349
347 350 init(root, cwd, patterns, include, exclude, default, auditor=auditor,
348 351 ctx=ctx, listsubrepos=listsubrepos)
349 352
350 353 # m.exact(file) must be based off of the actual user input, otherwise
351 354 # inexact case matches are treated as exact, and not noted without -v.
352 355 if self._files:
353 356 self._fileroots = set(_roots(self._kp))
354 357
355 358 def _normalize(self, patterns, default, root, cwd, auditor):
356 359 self._kp = super(icasefsmatcher, self)._normalize(patterns, default,
357 360 root, cwd, auditor)
358 361 kindpats = []
359 362 for kind, pats, source in self._kp:
360 363 if kind not in ('re', 'relre'): # regex can't be normalized
361 364 pats = self._dsnormalize(pats)
362 365 kindpats.append((kind, pats, source))
363 366 return kindpats
364 367
365 368 def patkind(pattern, default=None):
366 369 '''If pattern is 'kind:pat' with a known kind, return kind.'''
367 370 return _patsplit(pattern, default)[0]
368 371
369 372 def _patsplit(pattern, default):
370 373 """Split a string into the optional pattern kind prefix and the actual
371 374 pattern."""
372 375 if ':' in pattern:
373 376 kind, pat = pattern.split(':', 1)
374 377 if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
375 378 'listfile', 'listfile0', 'set', 'include'):
376 379 return kind, pat
377 380 return default, pattern
378 381
379 382 def _globre(pat):
380 383 r'''Convert an extended glob string to a regexp string.
381 384
382 385 >>> print _globre(r'?')
383 386 .
384 387 >>> print _globre(r'*')
385 388 [^/]*
386 389 >>> print _globre(r'**')
387 390 .*
388 391 >>> print _globre(r'**/a')
389 392 (?:.*/)?a
390 393 >>> print _globre(r'a/**/b')
391 394 a\/(?:.*/)?b
392 395 >>> print _globre(r'[a*?!^][^b][!c]')
393 396 [a*?!^][\^b][^c]
394 397 >>> print _globre(r'{a,b}')
395 398 (?:a|b)
396 399 >>> print _globre(r'.\*\?')
397 400 \.\*\?
398 401 '''
399 402 i, n = 0, len(pat)
400 403 res = ''
401 404 group = 0
402 405 escape = util.re.escape
403 406 def peek():
404 407 return i < n and pat[i]
405 408 while i < n:
406 409 c = pat[i]
407 410 i += 1
408 411 if c not in '*?[{},\\':
409 412 res += escape(c)
410 413 elif c == '*':
411 414 if peek() == '*':
412 415 i += 1
413 416 if peek() == '/':
414 417 i += 1
415 418 res += '(?:.*/)?'
416 419 else:
417 420 res += '.*'
418 421 else:
419 422 res += '[^/]*'
420 423 elif c == '?':
421 424 res += '.'
422 425 elif c == '[':
423 426 j = i
424 427 if j < n and pat[j] in '!]':
425 428 j += 1
426 429 while j < n and pat[j] != ']':
427 430 j += 1
428 431 if j >= n:
429 432 res += '\\['
430 433 else:
431 434 stuff = pat[i:j].replace('\\','\\\\')
432 435 i = j + 1
433 436 if stuff[0] == '!':
434 437 stuff = '^' + stuff[1:]
435 438 elif stuff[0] == '^':
436 439 stuff = '\\' + stuff
437 440 res = '%s[%s]' % (res, stuff)
438 441 elif c == '{':
439 442 group += 1
440 443 res += '(?:'
441 444 elif c == '}' and group:
442 445 res += ')'
443 446 group -= 1
444 447 elif c == ',' and group:
445 448 res += '|'
446 449 elif c == '\\':
447 450 p = peek()
448 451 if p:
449 452 i += 1
450 453 res += escape(p)
451 454 else:
452 455 res += escape(c)
453 456 else:
454 457 res += escape(c)
455 458 return res
456 459
457 460 def _regex(kind, pat, globsuffix):
458 461 '''Convert a (normalized) pattern of any kind into a regular expression.
459 462 globsuffix is appended to the regexp of globs.'''
460 463 if not pat:
461 464 return ''
462 465 if kind == 're':
463 466 return pat
464 467 if kind == 'path':
465 468 return '^' + util.re.escape(pat) + '(?:/|$)'
466 469 if kind == 'relglob':
467 470 return '(?:|.*/)' + _globre(pat) + globsuffix
468 471 if kind == 'relpath':
469 472 return util.re.escape(pat) + '(?:/|$)'
470 473 if kind == 'relre':
471 474 if pat.startswith('^'):
472 475 return pat
473 476 return '.*' + pat
474 477 return _globre(pat) + globsuffix
475 478
476 479 def _buildmatch(ctx, kindpats, globsuffix, listsubrepos):
477 480 '''Return regexp string and a matcher function for kindpats.
478 481 globsuffix is appended to the regexp of globs.'''
479 482 fset, kindpats = _expandsets(kindpats, ctx, listsubrepos)
480 483 if not kindpats:
481 484 return "", fset.__contains__
482 485
483 486 regex, mf = _buildregexmatch(kindpats, globsuffix)
484 487 if fset:
485 488 return regex, lambda f: f in fset or mf(f)
486 489 return regex, mf
487 490
488 491 def _buildregexmatch(kindpats, globsuffix):
489 492 """Build a match function from a list of kinds and kindpats,
490 493 return regexp string and a matcher function."""
491 494 try:
492 495 regex = '(?:%s)' % '|'.join([_regex(k, p, globsuffix)
493 496 for (k, p, s) in kindpats])
494 497 if len(regex) > 20000:
495 498 raise OverflowError
496 499 return regex, _rematcher(regex)
497 500 except OverflowError:
498 501 # We're using a Python with a tiny regex engine and we
499 502 # made it explode, so we'll divide the pattern list in two
500 503 # until it works
501 504 l = len(kindpats)
502 505 if l < 2:
503 506 raise
504 507 regexa, a = _buildregexmatch(kindpats[:l//2], globsuffix)
505 508 regexb, b = _buildregexmatch(kindpats[l//2:], globsuffix)
506 509 return regex, lambda s: a(s) or b(s)
507 510 except re.error:
508 511 for k, p, s in kindpats:
509 512 try:
510 513 _rematcher('(?:%s)' % _regex(k, p, globsuffix))
511 514 except re.error:
512 515 if s:
513 516 raise util.Abort(_("%s: invalid pattern (%s): %s") %
514 517 (s, k, p))
515 518 else:
516 519 raise util.Abort(_("invalid pattern (%s): %s") % (k, p))
517 520 raise util.Abort(_("invalid pattern"))
518 521
519 522 def _roots(kindpats):
520 523 '''return roots and exact explicitly listed files from patterns
521 524
522 525 >>> _roots([('glob', 'g/*', ''), ('glob', 'g', ''), ('glob', 'g*', '')])
523 526 ['g', 'g', '.']
524 527 >>> _roots([('relpath', 'r', ''), ('path', 'p/p', ''), ('path', '', '')])
525 528 ['r', 'p/p', '.']
526 529 >>> _roots([('relglob', 'rg*', ''), ('re', 're/', ''), ('relre', 'rr', '')])
527 530 ['.', '.', '.']
528 531 '''
529 532 r = []
530 533 for kind, pat, source in kindpats:
531 534 if kind == 'glob': # find the non-glob prefix
532 535 root = []
533 536 for p in pat.split('/'):
534 537 if '[' in p or '{' in p or '*' in p or '?' in p:
535 538 break
536 539 root.append(p)
537 540 r.append('/'.join(root) or '.')
538 541 elif kind in ('relpath', 'path'):
539 542 r.append(pat or '.')
540 543 else: # relglob, re, relre
541 544 r.append('.')
542 545 return r
543 546
544 547 def _anypats(kindpats):
545 548 for kind, pat, source in kindpats:
546 549 if kind in ('glob', 're', 'relglob', 'relre', 'set'):
547 550 return True
548 551
549 552 _commentre = None
550 553
551 554 def readpatternfile(filepath, warn):
552 555 '''parse a pattern file, returning a list of
553 556 patterns. These patterns should be given to compile()
554 557 to be validated and converted into a match function.
555 558
556 559 trailing white space is dropped.
557 560 the escape character is backslash.
558 561 comments start with #.
559 562 empty lines are skipped.
560 563
561 564 lines can be of the following formats:
562 565
563 566 syntax: regexp # defaults following lines to non-rooted regexps
564 567 syntax: glob # defaults following lines to non-rooted globs
565 568 re:pattern # non-rooted regular expression
566 569 glob:pattern # non-rooted glob
567 570 pattern # pattern of the current default type'''
568 571
569 572 syntaxes = {'re': 'relre:', 'regexp': 'relre:', 'glob': 'relglob:',
570 573 'include': 'include'}
571 574 syntax = 'relre:'
572 575 patterns = []
573 576
574 577 fp = open(filepath)
575 578 for line in fp:
576 579 if "#" in line:
577 580 global _commentre
578 581 if not _commentre:
579 582 _commentre = re.compile(r'((^|[^\\])(\\\\)*)#.*')
580 583 # remove comments prefixed by an even number of escapes
581 584 line = _commentre.sub(r'\1', line)
582 585 # fixup properly escaped comments that survived the above
583 586 line = line.replace("\\#", "#")
584 587 line = line.rstrip()
585 588 if not line:
586 589 continue
587 590
588 591 if line.startswith('syntax:'):
589 592 s = line[7:].strip()
590 593 try:
591 594 syntax = syntaxes[s]
592 595 except KeyError:
593 596 if warn:
594 597 warn(_("%s: ignoring invalid syntax '%s'\n") %
595 598 (filepath, s))
596 599 continue
597 600
598 601 linesyntax = syntax
599 602 for s, rels in syntaxes.iteritems():
600 603 if line.startswith(rels):
601 604 linesyntax = rels
602 605 line = line[len(rels):]
603 606 break
604 607 elif line.startswith(s+':'):
605 608 linesyntax = rels
606 609 line = line[len(s) + 1:]
607 610 break
608 611 patterns.append(linesyntax + line)
609 612 fp.close()
610 613 return patterns
General Comments 0
You need to be logged in to leave comments. Login now