##// END OF EJS Templates
match: add optional warn argument...
Durham Goode -
r25214:08703b10 default
parent child Browse files
Show More
@@ -1,554 +1,558 b''
1 1 # match.py - filename matching
2 2 #
3 3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 import re
9 9 import util, pathutil
10 10 from i18n import _
11 11
12 12 propertycache = util.propertycache
13 13
14 14 def _rematcher(regex):
15 15 '''compile the regexp with the best available regexp engine and return a
16 16 matcher function'''
17 17 m = util.re.compile(regex)
18 18 try:
19 19 # slightly faster, provided by facebook's re2 bindings
20 20 return m.test_match
21 21 except AttributeError:
22 22 return m.match
23 23
24 24 def _expandsets(kindpats, ctx, listsubrepos):
25 25 '''Returns the kindpats list with the 'set' patterns expanded.'''
26 26 fset = set()
27 27 other = []
28 28
29 29 for kind, pat, source in kindpats:
30 30 if kind == 'set':
31 31 if not ctx:
32 32 raise util.Abort("fileset expression with no context")
33 33 s = ctx.getfileset(pat)
34 34 fset.update(s)
35 35
36 36 if listsubrepos:
37 37 for subpath in ctx.substate:
38 38 s = ctx.sub(subpath).getfileset(pat)
39 39 fset.update(subpath + '/' + f for f in s)
40 40
41 41 continue
42 42 other.append((kind, pat, source))
43 43 return fset, other
44 44
45 45 def _kindpatsalwaysmatch(kindpats):
46 46 """"Checks whether the kindspats match everything, as e.g.
47 47 'relpath:.' does.
48 48 """
49 49 for kind, pat, source in kindpats:
50 50 if pat != '' or kind not in ['relpath', 'glob']:
51 51 return False
52 52 return True
53 53
54 54 class match(object):
55 55 def __init__(self, root, cwd, patterns, include=[], exclude=[],
56 56 default='glob', exact=False, auditor=None, ctx=None,
57 listsubrepos=False):
57 listsubrepos=False, warn=None):
58 58 """build an object to match a set of file patterns
59 59
60 60 arguments:
61 61 root - the canonical root of the tree you're matching against
62 62 cwd - the current working directory, if relevant
63 63 patterns - patterns to find
64 64 include - patterns to include (unless they are excluded)
65 65 exclude - patterns to exclude (even if they are included)
66 66 default - if a pattern in patterns has no explicit type, assume this one
67 67 exact - patterns are actually filenames (include/exclude still apply)
68 warn - optional function used for printing warnings
68 69
69 70 a pattern is one of:
70 71 'glob:<glob>' - a glob relative to cwd
71 72 're:<regexp>' - a regular expression
72 73 'path:<path>' - a path relative to repository root
73 74 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
74 75 'relpath:<path>' - a path relative to cwd
75 76 'relre:<regexp>' - a regexp that needn't match the start of a name
76 77 'set:<fileset>' - a fileset expression
77 78 '<something>' - a pattern of the specified default type
78 79 """
79 80
80 81 self._root = root
81 82 self._cwd = cwd
82 83 self._files = [] # exact files and roots of patterns
83 84 self._anypats = bool(include or exclude)
84 85 self._always = False
85 86 self._pathrestricted = bool(include or exclude or patterns)
87 self._warn = warn
86 88
87 89 matchfns = []
88 90 if include:
89 91 kindpats = self._normalize(include, 'glob', root, cwd, auditor)
90 92 self.includepat, im = _buildmatch(ctx, kindpats, '(?:/|$)',
91 93 listsubrepos)
92 94 matchfns.append(im)
93 95 if exclude:
94 96 kindpats = self._normalize(exclude, 'glob', root, cwd, auditor)
95 97 self.excludepat, em = _buildmatch(ctx, kindpats, '(?:/|$)',
96 98 listsubrepos)
97 99 matchfns.append(lambda f: not em(f))
98 100 if exact:
99 101 if isinstance(patterns, list):
100 102 self._files = patterns
101 103 else:
102 104 self._files = list(patterns)
103 105 matchfns.append(self.exact)
104 106 elif patterns:
105 107 kindpats = self._normalize(patterns, default, root, cwd, auditor)
106 108 if not _kindpatsalwaysmatch(kindpats):
107 109 self._files = _roots(kindpats)
108 110 self._anypats = self._anypats or _anypats(kindpats)
109 111 self.patternspat, pm = _buildmatch(ctx, kindpats, '$',
110 112 listsubrepos)
111 113 matchfns.append(pm)
112 114
113 115 if not matchfns:
114 116 m = util.always
115 117 self._always = True
116 118 elif len(matchfns) == 1:
117 119 m = matchfns[0]
118 120 else:
119 121 def m(f):
120 122 for matchfn in matchfns:
121 123 if not matchfn(f):
122 124 return False
123 125 return True
124 126
125 127 self.matchfn = m
126 128 self._fileroots = set(self._files)
127 129
128 130 def __call__(self, fn):
129 131 return self.matchfn(fn)
130 132 def __iter__(self):
131 133 for f in self._files:
132 134 yield f
133 135
134 136 # Callbacks related to how the matcher is used by dirstate.walk.
135 137 # Subscribers to these events must monkeypatch the matcher object.
136 138 def bad(self, f, msg):
137 139 '''Callback from dirstate.walk for each explicit file that can't be
138 140 found/accessed, with an error message.'''
139 141 pass
140 142
141 143 # If an explicitdir is set, it will be called when an explicitly listed
142 144 # directory is visited.
143 145 explicitdir = None
144 146
145 147 # If an traversedir is set, it will be called when a directory discovered
146 148 # by recursive traversal is visited.
147 149 traversedir = None
148 150
149 151 def abs(self, f):
150 152 '''Convert a repo path back to path that is relative to the root of the
151 153 matcher.'''
152 154 return f
153 155
154 156 def rel(self, f):
155 157 '''Convert repo path back to path that is relative to cwd of matcher.'''
156 158 return util.pathto(self._root, self._cwd, f)
157 159
158 160 def uipath(self, f):
159 161 '''Convert repo path to a display path. If patterns or -I/-X were used
160 162 to create this matcher, the display path will be relative to cwd.
161 163 Otherwise it is relative to the root of the repo.'''
162 164 return (self._pathrestricted and self.rel(f)) or self.abs(f)
163 165
164 166 def files(self):
165 167 '''Explicitly listed files or patterns or roots:
166 168 if no patterns or .always(): empty list,
167 169 if exact: list exact files,
168 170 if not .anypats(): list all files and dirs,
169 171 else: optimal roots'''
170 172 return self._files
171 173
172 174 @propertycache
173 175 def _dirs(self):
174 176 return set(util.dirs(self._fileroots)) | set(['.'])
175 177
176 178 def visitdir(self, dir):
177 179 return (not self._fileroots or '.' in self._fileroots or
178 180 dir in self._fileroots or dir in self._dirs or
179 181 any(parentdir in self._fileroots
180 182 for parentdir in util.finddirs(dir)))
181 183
182 184 def exact(self, f):
183 185 '''Returns True if f is in .files().'''
184 186 return f in self._fileroots
185 187
186 188 def anypats(self):
187 189 '''Matcher uses patterns or include/exclude.'''
188 190 return self._anypats
189 191
190 192 def always(self):
191 193 '''Matcher will match everything and .files() will be empty
192 194 - optimization might be possible and necessary.'''
193 195 return self._always
194 196
195 197 def ispartial(self):
196 198 '''True if the matcher won't always match.
197 199
198 200 Although it's just the inverse of _always in this implementation,
199 201 an extenion such as narrowhg might make it return something
200 202 slightly different.'''
201 203 return not self._always
202 204
203 205 def isexact(self):
204 206 return self.matchfn == self.exact
205 207
206 208 def _normalize(self, patterns, default, root, cwd, auditor):
207 209 '''Convert 'kind:pat' from the patterns list to tuples with kind and
208 210 normalized and rooted patterns and with listfiles expanded.'''
209 211 kindpats = []
210 212 for kind, pat in [_patsplit(p, default) for p in patterns]:
211 213 if kind in ('glob', 'relpath'):
212 214 pat = pathutil.canonpath(root, cwd, pat, auditor)
213 215 elif kind in ('relglob', 'path'):
214 216 pat = util.normpath(pat)
215 217 elif kind in ('listfile', 'listfile0'):
216 218 try:
217 219 files = util.readfile(pat)
218 220 if kind == 'listfile0':
219 221 files = files.split('\0')
220 222 else:
221 223 files = files.splitlines()
222 224 files = [f for f in files if f]
223 225 except EnvironmentError:
224 226 raise util.Abort(_("unable to read file list (%s)") % pat)
225 227 for k, p, source in self._normalize(files, default, root, cwd,
226 228 auditor):
227 229 kindpats.append((k, p, pat))
228 230 continue
229 231 # else: re or relre - which cannot be normalized
230 232 kindpats.append((kind, pat, ''))
231 233 return kindpats
232 234
233 235 def exact(root, cwd, files):
234 236 return match(root, cwd, files, exact=True)
235 237
236 238 def always(root, cwd):
237 239 return match(root, cwd, [])
238 240
239 241 class narrowmatcher(match):
240 242 """Adapt a matcher to work on a subdirectory only.
241 243
242 244 The paths are remapped to remove/insert the path as needed:
243 245
244 246 >>> m1 = match('root', '', ['a.txt', 'sub/b.txt'])
245 247 >>> m2 = narrowmatcher('sub', m1)
246 248 >>> bool(m2('a.txt'))
247 249 False
248 250 >>> bool(m2('b.txt'))
249 251 True
250 252 >>> bool(m2.matchfn('a.txt'))
251 253 False
252 254 >>> bool(m2.matchfn('b.txt'))
253 255 True
254 256 >>> m2.files()
255 257 ['b.txt']
256 258 >>> m2.exact('b.txt')
257 259 True
258 260 >>> util.pconvert(m2.rel('b.txt'))
259 261 'sub/b.txt'
260 262 >>> def bad(f, msg):
261 263 ... print "%s: %s" % (f, msg)
262 264 >>> m1.bad = bad
263 265 >>> m2.bad('x.txt', 'No such file')
264 266 sub/x.txt: No such file
265 267 >>> m2.abs('c.txt')
266 268 'sub/c.txt'
267 269 """
268 270
269 271 def __init__(self, path, matcher):
270 272 self._root = matcher._root
271 273 self._cwd = matcher._cwd
272 274 self._path = path
273 275 self._matcher = matcher
274 276 self._always = matcher._always
275 277 self._pathrestricted = matcher._pathrestricted
276 278
277 279 self._files = [f[len(path) + 1:] for f in matcher._files
278 280 if f.startswith(path + "/")]
279 281
280 282 # If the parent repo had a path to this subrepo and no patterns are
281 283 # specified, this submatcher always matches.
282 284 if not self._always and not matcher._anypats:
283 285 self._always = any(f == path for f in matcher._files)
284 286
285 287 self._anypats = matcher._anypats
286 288 self.matchfn = lambda fn: matcher.matchfn(self._path + "/" + fn)
287 289 self._fileroots = set(self._files)
288 290
289 291 def abs(self, f):
290 292 return self._matcher.abs(self._path + "/" + f)
291 293
292 294 def bad(self, f, msg):
293 295 self._matcher.bad(self._path + "/" + f, msg)
294 296
295 297 def rel(self, f):
296 298 return self._matcher.rel(self._path + "/" + f)
297 299
298 300 class icasefsmatcher(match):
299 301 """A matcher for wdir on case insensitive filesystems, which normalizes the
300 302 given patterns to the case in the filesystem.
301 303 """
302 304
303 305 def __init__(self, root, cwd, patterns, include, exclude, default, auditor,
304 306 ctx, listsubrepos=False):
305 307 init = super(icasefsmatcher, self).__init__
306 308 self._dsnormalize = ctx.repo().dirstate.normalize
307 309
308 310 init(root, cwd, patterns, include, exclude, default, auditor=auditor,
309 311 ctx=ctx, listsubrepos=listsubrepos)
310 312
311 313 # m.exact(file) must be based off of the actual user input, otherwise
312 314 # inexact case matches are treated as exact, and not noted without -v.
313 315 if self._files:
314 316 self._fileroots = set(_roots(self._kp))
315 317
316 318 def _normalize(self, patterns, default, root, cwd, auditor):
317 319 self._kp = super(icasefsmatcher, self)._normalize(patterns, default,
318 320 root, cwd, auditor)
319 321 kindpats = []
320 322 for kind, pats, source in self._kp:
321 323 if kind not in ('re', 'relre'): # regex can't be normalized
322 324 pats = self._dsnormalize(pats)
323 325 kindpats.append((kind, pats, source))
324 326 return kindpats
325 327
326 328 def patkind(pattern, default=None):
327 329 '''If pattern is 'kind:pat' with a known kind, return kind.'''
328 330 return _patsplit(pattern, default)[0]
329 331
330 332 def _patsplit(pattern, default):
331 333 """Split a string into the optional pattern kind prefix and the actual
332 334 pattern."""
333 335 if ':' in pattern:
334 336 kind, pat = pattern.split(':', 1)
335 337 if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
336 338 'listfile', 'listfile0', 'set'):
337 339 return kind, pat
338 340 return default, pattern
339 341
340 342 def _globre(pat):
341 343 r'''Convert an extended glob string to a regexp string.
342 344
343 345 >>> print _globre(r'?')
344 346 .
345 347 >>> print _globre(r'*')
346 348 [^/]*
347 349 >>> print _globre(r'**')
348 350 .*
349 351 >>> print _globre(r'**/a')
350 352 (?:.*/)?a
351 353 >>> print _globre(r'a/**/b')
352 354 a\/(?:.*/)?b
353 355 >>> print _globre(r'[a*?!^][^b][!c]')
354 356 [a*?!^][\^b][^c]
355 357 >>> print _globre(r'{a,b}')
356 358 (?:a|b)
357 359 >>> print _globre(r'.\*\?')
358 360 \.\*\?
359 361 '''
360 362 i, n = 0, len(pat)
361 363 res = ''
362 364 group = 0
363 365 escape = util.re.escape
364 366 def peek():
365 367 return i < n and pat[i]
366 368 while i < n:
367 369 c = pat[i]
368 370 i += 1
369 371 if c not in '*?[{},\\':
370 372 res += escape(c)
371 373 elif c == '*':
372 374 if peek() == '*':
373 375 i += 1
374 376 if peek() == '/':
375 377 i += 1
376 378 res += '(?:.*/)?'
377 379 else:
378 380 res += '.*'
379 381 else:
380 382 res += '[^/]*'
381 383 elif c == '?':
382 384 res += '.'
383 385 elif c == '[':
384 386 j = i
385 387 if j < n and pat[j] in '!]':
386 388 j += 1
387 389 while j < n and pat[j] != ']':
388 390 j += 1
389 391 if j >= n:
390 392 res += '\\['
391 393 else:
392 394 stuff = pat[i:j].replace('\\','\\\\')
393 395 i = j + 1
394 396 if stuff[0] == '!':
395 397 stuff = '^' + stuff[1:]
396 398 elif stuff[0] == '^':
397 399 stuff = '\\' + stuff
398 400 res = '%s[%s]' % (res, stuff)
399 401 elif c == '{':
400 402 group += 1
401 403 res += '(?:'
402 404 elif c == '}' and group:
403 405 res += ')'
404 406 group -= 1
405 407 elif c == ',' and group:
406 408 res += '|'
407 409 elif c == '\\':
408 410 p = peek()
409 411 if p:
410 412 i += 1
411 413 res += escape(p)
412 414 else:
413 415 res += escape(c)
414 416 else:
415 417 res += escape(c)
416 418 return res
417 419
418 420 def _regex(kind, pat, globsuffix):
419 421 '''Convert a (normalized) pattern of any kind into a regular expression.
420 422 globsuffix is appended to the regexp of globs.'''
421 423 if not pat:
422 424 return ''
423 425 if kind == 're':
424 426 return pat
425 427 if kind == 'path':
426 428 return '^' + util.re.escape(pat) + '(?:/|$)'
427 429 if kind == 'relglob':
428 430 return '(?:|.*/)' + _globre(pat) + globsuffix
429 431 if kind == 'relpath':
430 432 return util.re.escape(pat) + '(?:/|$)'
431 433 if kind == 'relre':
432 434 if pat.startswith('^'):
433 435 return pat
434 436 return '.*' + pat
435 437 return _globre(pat) + globsuffix
436 438
437 439 def _buildmatch(ctx, kindpats, globsuffix, listsubrepos):
438 440 '''Return regexp string and a matcher function for kindpats.
439 441 globsuffix is appended to the regexp of globs.'''
440 442 fset, kindpats = _expandsets(kindpats, ctx, listsubrepos)
441 443 if not kindpats:
442 444 return "", fset.__contains__
443 445
444 446 regex, mf = _buildregexmatch(kindpats, globsuffix)
445 447 if fset:
446 448 return regex, lambda f: f in fset or mf(f)
447 449 return regex, mf
448 450
449 451 def _buildregexmatch(kindpats, globsuffix):
450 452 """Build a match function from a list of kinds and kindpats,
451 453 return regexp string and a matcher function."""
452 454 try:
453 455 regex = '(?:%s)' % '|'.join([_regex(k, p, globsuffix)
454 456 for (k, p, s) in kindpats])
455 457 if len(regex) > 20000:
456 458 raise OverflowError
457 459 return regex, _rematcher(regex)
458 460 except OverflowError:
459 461 # We're using a Python with a tiny regex engine and we
460 462 # made it explode, so we'll divide the pattern list in two
461 463 # until it works
462 464 l = len(kindpats)
463 465 if l < 2:
464 466 raise
465 467 regexa, a = _buildregexmatch(kindpats[:l//2], globsuffix)
466 468 regexb, b = _buildregexmatch(kindpats[l//2:], globsuffix)
467 469 return regex, lambda s: a(s) or b(s)
468 470 except re.error:
469 471 for k, p, s in kindpats:
470 472 try:
471 473 _rematcher('(?:%s)' % _regex(k, p, globsuffix))
472 474 except re.error:
473 475 if s:
474 476 raise util.Abort(_("%s: invalid pattern (%s): %s") %
475 477 (s, k, p))
476 478 else:
477 479 raise util.Abort(_("invalid pattern (%s): %s") % (k, p))
478 480 raise util.Abort(_("invalid pattern"))
479 481
480 482 def _roots(kindpats):
481 483 '''return roots and exact explicitly listed files from patterns
482 484
483 485 >>> _roots([('glob', 'g/*', ''), ('glob', 'g', ''), ('glob', 'g*', '')])
484 486 ['g', 'g', '.']
485 487 >>> _roots([('relpath', 'r', ''), ('path', 'p/p', ''), ('path', '', '')])
486 488 ['r', 'p/p', '.']
487 489 >>> _roots([('relglob', 'rg*', ''), ('re', 're/', ''), ('relre', 'rr', '')])
488 490 ['.', '.', '.']
489 491 '''
490 492 r = []
491 493 for kind, pat, source in kindpats:
492 494 if kind == 'glob': # find the non-glob prefix
493 495 root = []
494 496 for p in pat.split('/'):
495 497 if '[' in p or '{' in p or '*' in p or '?' in p:
496 498 break
497 499 root.append(p)
498 500 r.append('/'.join(root) or '.')
499 501 elif kind in ('relpath', 'path'):
500 502 r.append(pat or '.')
501 503 else: # relglob, re, relre
502 504 r.append('.')
503 505 return r
504 506
505 507 def _anypats(kindpats):
506 508 for kind, pat, source in kindpats:
507 509 if kind in ('glob', 're', 'relglob', 'relre', 'set'):
508 510 return True
509 511
510 512 _commentre = None
511 513
512 514 def readpatternfile(filepath, warn):
513 515 '''parse a pattern file, returning a list of
514 516 patterns. These patterns should be given to compile()
515 517 to be validated and converted into a match function.'''
516 518 syntaxes = {'re': 'relre:', 'regexp': 'relre:', 'glob': 'relglob:'}
517 519 syntax = 'relre:'
518 520 patterns = []
519 521
520 522 fp = open(filepath)
521 523 for line in fp:
522 524 if "#" in line:
523 525 global _commentre
524 526 if not _commentre:
525 527 _commentre = re.compile(r'((^|[^\\])(\\\\)*)#.*')
526 528 # remove comments prefixed by an even number of escapes
527 529 line = _commentre.sub(r'\1', line)
528 530 # fixup properly escaped comments that survived the above
529 531 line = line.replace("\\#", "#")
530 532 line = line.rstrip()
531 533 if not line:
532 534 continue
533 535
534 536 if line.startswith('syntax:'):
535 537 s = line[7:].strip()
536 538 try:
537 539 syntax = syntaxes[s]
538 540 except KeyError:
539 warn(_("%s: ignoring invalid syntax '%s'\n") % (filepath, s))
541 if warn:
542 warn(_("%s: ignoring invalid syntax '%s'\n") %
543 (filepath, s))
540 544 continue
541 545
542 546 linesyntax = syntax
543 547 for s, rels in syntaxes.iteritems():
544 548 if line.startswith(rels):
545 549 linesyntax = rels
546 550 line = line[len(rels):]
547 551 break
548 552 elif line.startswith(s+':'):
549 553 linesyntax = rels
550 554 line = line[len(s) + 1:]
551 555 break
552 556 patterns.append(linesyntax + line)
553 557 fp.close()
554 558 return patterns
General Comments 0
You need to be logged in to leave comments. Login now