##// END OF EJS Templates
match: add source to kindpats list...
Durham Goode -
r25213:08a8e9da default
parent child Browse files
Show More
@@ -1,548 +1,554
1 1 # match.py - filename matching
2 2 #
3 3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 import re
9 9 import util, pathutil
10 10 from i18n import _
11 11
12 12 propertycache = util.propertycache
13 13
14 14 def _rematcher(regex):
15 15 '''compile the regexp with the best available regexp engine and return a
16 16 matcher function'''
17 17 m = util.re.compile(regex)
18 18 try:
19 19 # slightly faster, provided by facebook's re2 bindings
20 20 return m.test_match
21 21 except AttributeError:
22 22 return m.match
23 23
24 24 def _expandsets(kindpats, ctx, listsubrepos):
25 25 '''Returns the kindpats list with the 'set' patterns expanded.'''
26 26 fset = set()
27 27 other = []
28 28
29 for kind, pat in kindpats:
29 for kind, pat, source in kindpats:
30 30 if kind == 'set':
31 31 if not ctx:
32 32 raise util.Abort("fileset expression with no context")
33 33 s = ctx.getfileset(pat)
34 34 fset.update(s)
35 35
36 36 if listsubrepos:
37 37 for subpath in ctx.substate:
38 38 s = ctx.sub(subpath).getfileset(pat)
39 39 fset.update(subpath + '/' + f for f in s)
40 40
41 41 continue
42 other.append((kind, pat))
42 other.append((kind, pat, source))
43 43 return fset, other
44 44
45 45 def _kindpatsalwaysmatch(kindpats):
46 46 """"Checks whether the kindspats match everything, as e.g.
47 47 'relpath:.' does.
48 48 """
49 for kind, pat in kindpats:
49 for kind, pat, source in kindpats:
50 50 if pat != '' or kind not in ['relpath', 'glob']:
51 51 return False
52 52 return True
53 53
54 54 class match(object):
55 55 def __init__(self, root, cwd, patterns, include=[], exclude=[],
56 56 default='glob', exact=False, auditor=None, ctx=None,
57 57 listsubrepos=False):
58 58 """build an object to match a set of file patterns
59 59
60 60 arguments:
61 61 root - the canonical root of the tree you're matching against
62 62 cwd - the current working directory, if relevant
63 63 patterns - patterns to find
64 64 include - patterns to include (unless they are excluded)
65 65 exclude - patterns to exclude (even if they are included)
66 66 default - if a pattern in patterns has no explicit type, assume this one
67 67 exact - patterns are actually filenames (include/exclude still apply)
68 68
69 69 a pattern is one of:
70 70 'glob:<glob>' - a glob relative to cwd
71 71 're:<regexp>' - a regular expression
72 72 'path:<path>' - a path relative to repository root
73 73 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
74 74 'relpath:<path>' - a path relative to cwd
75 75 'relre:<regexp>' - a regexp that needn't match the start of a name
76 76 'set:<fileset>' - a fileset expression
77 77 '<something>' - a pattern of the specified default type
78 78 """
79 79
80 80 self._root = root
81 81 self._cwd = cwd
82 82 self._files = [] # exact files and roots of patterns
83 83 self._anypats = bool(include or exclude)
84 84 self._always = False
85 85 self._pathrestricted = bool(include or exclude or patterns)
86 86
87 87 matchfns = []
88 88 if include:
89 89 kindpats = self._normalize(include, 'glob', root, cwd, auditor)
90 90 self.includepat, im = _buildmatch(ctx, kindpats, '(?:/|$)',
91 91 listsubrepos)
92 92 matchfns.append(im)
93 93 if exclude:
94 94 kindpats = self._normalize(exclude, 'glob', root, cwd, auditor)
95 95 self.excludepat, em = _buildmatch(ctx, kindpats, '(?:/|$)',
96 96 listsubrepos)
97 97 matchfns.append(lambda f: not em(f))
98 98 if exact:
99 99 if isinstance(patterns, list):
100 100 self._files = patterns
101 101 else:
102 102 self._files = list(patterns)
103 103 matchfns.append(self.exact)
104 104 elif patterns:
105 105 kindpats = self._normalize(patterns, default, root, cwd, auditor)
106 106 if not _kindpatsalwaysmatch(kindpats):
107 107 self._files = _roots(kindpats)
108 108 self._anypats = self._anypats or _anypats(kindpats)
109 109 self.patternspat, pm = _buildmatch(ctx, kindpats, '$',
110 110 listsubrepos)
111 111 matchfns.append(pm)
112 112
113 113 if not matchfns:
114 114 m = util.always
115 115 self._always = True
116 116 elif len(matchfns) == 1:
117 117 m = matchfns[0]
118 118 else:
119 119 def m(f):
120 120 for matchfn in matchfns:
121 121 if not matchfn(f):
122 122 return False
123 123 return True
124 124
125 125 self.matchfn = m
126 126 self._fileroots = set(self._files)
127 127
128 128 def __call__(self, fn):
129 129 return self.matchfn(fn)
130 130 def __iter__(self):
131 131 for f in self._files:
132 132 yield f
133 133
134 134 # Callbacks related to how the matcher is used by dirstate.walk.
135 135 # Subscribers to these events must monkeypatch the matcher object.
136 136 def bad(self, f, msg):
137 137 '''Callback from dirstate.walk for each explicit file that can't be
138 138 found/accessed, with an error message.'''
139 139 pass
140 140
141 141 # If an explicitdir is set, it will be called when an explicitly listed
142 142 # directory is visited.
143 143 explicitdir = None
144 144
145 145 # If an traversedir is set, it will be called when a directory discovered
146 146 # by recursive traversal is visited.
147 147 traversedir = None
148 148
149 149 def abs(self, f):
150 150 '''Convert a repo path back to path that is relative to the root of the
151 151 matcher.'''
152 152 return f
153 153
154 154 def rel(self, f):
155 155 '''Convert repo path back to path that is relative to cwd of matcher.'''
156 156 return util.pathto(self._root, self._cwd, f)
157 157
158 158 def uipath(self, f):
159 159 '''Convert repo path to a display path. If patterns or -I/-X were used
160 160 to create this matcher, the display path will be relative to cwd.
161 161 Otherwise it is relative to the root of the repo.'''
162 162 return (self._pathrestricted and self.rel(f)) or self.abs(f)
163 163
164 164 def files(self):
165 165 '''Explicitly listed files or patterns or roots:
166 166 if no patterns or .always(): empty list,
167 167 if exact: list exact files,
168 168 if not .anypats(): list all files and dirs,
169 169 else: optimal roots'''
170 170 return self._files
171 171
172 172 @propertycache
173 173 def _dirs(self):
174 174 return set(util.dirs(self._fileroots)) | set(['.'])
175 175
176 176 def visitdir(self, dir):
177 177 return (not self._fileroots or '.' in self._fileroots or
178 178 dir in self._fileroots or dir in self._dirs or
179 179 any(parentdir in self._fileroots
180 180 for parentdir in util.finddirs(dir)))
181 181
182 182 def exact(self, f):
183 183 '''Returns True if f is in .files().'''
184 184 return f in self._fileroots
185 185
186 186 def anypats(self):
187 187 '''Matcher uses patterns or include/exclude.'''
188 188 return self._anypats
189 189
190 190 def always(self):
191 191 '''Matcher will match everything and .files() will be empty
192 192 - optimization might be possible and necessary.'''
193 193 return self._always
194 194
195 195 def ispartial(self):
196 196 '''True if the matcher won't always match.
197 197
198 198 Although it's just the inverse of _always in this implementation,
199 199 an extenion such as narrowhg might make it return something
200 200 slightly different.'''
201 201 return not self._always
202 202
203 203 def isexact(self):
204 204 return self.matchfn == self.exact
205 205
206 206 def _normalize(self, patterns, default, root, cwd, auditor):
207 207 '''Convert 'kind:pat' from the patterns list to tuples with kind and
208 208 normalized and rooted patterns and with listfiles expanded.'''
209 209 kindpats = []
210 210 for kind, pat in [_patsplit(p, default) for p in patterns]:
211 211 if kind in ('glob', 'relpath'):
212 212 pat = pathutil.canonpath(root, cwd, pat, auditor)
213 213 elif kind in ('relglob', 'path'):
214 214 pat = util.normpath(pat)
215 215 elif kind in ('listfile', 'listfile0'):
216 216 try:
217 217 files = util.readfile(pat)
218 218 if kind == 'listfile0':
219 219 files = files.split('\0')
220 220 else:
221 221 files = files.splitlines()
222 222 files = [f for f in files if f]
223 223 except EnvironmentError:
224 224 raise util.Abort(_("unable to read file list (%s)") % pat)
225 kindpats += self._normalize(files, default, root, cwd, auditor)
225 for k, p, source in self._normalize(files, default, root, cwd,
226 auditor):
227 kindpats.append((k, p, pat))
226 228 continue
227 229 # else: re or relre - which cannot be normalized
228 kindpats.append((kind, pat))
230 kindpats.append((kind, pat, ''))
229 231 return kindpats
230 232
231 233 def exact(root, cwd, files):
232 234 return match(root, cwd, files, exact=True)
233 235
234 236 def always(root, cwd):
235 237 return match(root, cwd, [])
236 238
237 239 class narrowmatcher(match):
238 240 """Adapt a matcher to work on a subdirectory only.
239 241
240 242 The paths are remapped to remove/insert the path as needed:
241 243
242 244 >>> m1 = match('root', '', ['a.txt', 'sub/b.txt'])
243 245 >>> m2 = narrowmatcher('sub', m1)
244 246 >>> bool(m2('a.txt'))
245 247 False
246 248 >>> bool(m2('b.txt'))
247 249 True
248 250 >>> bool(m2.matchfn('a.txt'))
249 251 False
250 252 >>> bool(m2.matchfn('b.txt'))
251 253 True
252 254 >>> m2.files()
253 255 ['b.txt']
254 256 >>> m2.exact('b.txt')
255 257 True
256 258 >>> util.pconvert(m2.rel('b.txt'))
257 259 'sub/b.txt'
258 260 >>> def bad(f, msg):
259 261 ... print "%s: %s" % (f, msg)
260 262 >>> m1.bad = bad
261 263 >>> m2.bad('x.txt', 'No such file')
262 264 sub/x.txt: No such file
263 265 >>> m2.abs('c.txt')
264 266 'sub/c.txt'
265 267 """
266 268
267 269 def __init__(self, path, matcher):
268 270 self._root = matcher._root
269 271 self._cwd = matcher._cwd
270 272 self._path = path
271 273 self._matcher = matcher
272 274 self._always = matcher._always
273 275 self._pathrestricted = matcher._pathrestricted
274 276
275 277 self._files = [f[len(path) + 1:] for f in matcher._files
276 278 if f.startswith(path + "/")]
277 279
278 280 # If the parent repo had a path to this subrepo and no patterns are
279 281 # specified, this submatcher always matches.
280 282 if not self._always and not matcher._anypats:
281 283 self._always = any(f == path for f in matcher._files)
282 284
283 285 self._anypats = matcher._anypats
284 286 self.matchfn = lambda fn: matcher.matchfn(self._path + "/" + fn)
285 287 self._fileroots = set(self._files)
286 288
287 289 def abs(self, f):
288 290 return self._matcher.abs(self._path + "/" + f)
289 291
290 292 def bad(self, f, msg):
291 293 self._matcher.bad(self._path + "/" + f, msg)
292 294
293 295 def rel(self, f):
294 296 return self._matcher.rel(self._path + "/" + f)
295 297
296 298 class icasefsmatcher(match):
297 299 """A matcher for wdir on case insensitive filesystems, which normalizes the
298 300 given patterns to the case in the filesystem.
299 301 """
300 302
301 303 def __init__(self, root, cwd, patterns, include, exclude, default, auditor,
302 304 ctx, listsubrepos=False):
303 305 init = super(icasefsmatcher, self).__init__
304 306 self._dsnormalize = ctx.repo().dirstate.normalize
305 307
306 308 init(root, cwd, patterns, include, exclude, default, auditor=auditor,
307 309 ctx=ctx, listsubrepos=listsubrepos)
308 310
309 311 # m.exact(file) must be based off of the actual user input, otherwise
310 312 # inexact case matches are treated as exact, and not noted without -v.
311 313 if self._files:
312 314 self._fileroots = set(_roots(self._kp))
313 315
314 316 def _normalize(self, patterns, default, root, cwd, auditor):
315 317 self._kp = super(icasefsmatcher, self)._normalize(patterns, default,
316 318 root, cwd, auditor)
317 319 kindpats = []
318 for kind, pats in self._kp:
320 for kind, pats, source in self._kp:
319 321 if kind not in ('re', 'relre'): # regex can't be normalized
320 322 pats = self._dsnormalize(pats)
321 kindpats.append((kind, pats))
323 kindpats.append((kind, pats, source))
322 324 return kindpats
323 325
324 326 def patkind(pattern, default=None):
325 327 '''If pattern is 'kind:pat' with a known kind, return kind.'''
326 328 return _patsplit(pattern, default)[0]
327 329
328 330 def _patsplit(pattern, default):
329 331 """Split a string into the optional pattern kind prefix and the actual
330 332 pattern."""
331 333 if ':' in pattern:
332 334 kind, pat = pattern.split(':', 1)
333 335 if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
334 336 'listfile', 'listfile0', 'set'):
335 337 return kind, pat
336 338 return default, pattern
337 339
338 340 def _globre(pat):
339 341 r'''Convert an extended glob string to a regexp string.
340 342
341 343 >>> print _globre(r'?')
342 344 .
343 345 >>> print _globre(r'*')
344 346 [^/]*
345 347 >>> print _globre(r'**')
346 348 .*
347 349 >>> print _globre(r'**/a')
348 350 (?:.*/)?a
349 351 >>> print _globre(r'a/**/b')
350 352 a\/(?:.*/)?b
351 353 >>> print _globre(r'[a*?!^][^b][!c]')
352 354 [a*?!^][\^b][^c]
353 355 >>> print _globre(r'{a,b}')
354 356 (?:a|b)
355 357 >>> print _globre(r'.\*\?')
356 358 \.\*\?
357 359 '''
358 360 i, n = 0, len(pat)
359 361 res = ''
360 362 group = 0
361 363 escape = util.re.escape
362 364 def peek():
363 365 return i < n and pat[i]
364 366 while i < n:
365 367 c = pat[i]
366 368 i += 1
367 369 if c not in '*?[{},\\':
368 370 res += escape(c)
369 371 elif c == '*':
370 372 if peek() == '*':
371 373 i += 1
372 374 if peek() == '/':
373 375 i += 1
374 376 res += '(?:.*/)?'
375 377 else:
376 378 res += '.*'
377 379 else:
378 380 res += '[^/]*'
379 381 elif c == '?':
380 382 res += '.'
381 383 elif c == '[':
382 384 j = i
383 385 if j < n and pat[j] in '!]':
384 386 j += 1
385 387 while j < n and pat[j] != ']':
386 388 j += 1
387 389 if j >= n:
388 390 res += '\\['
389 391 else:
390 392 stuff = pat[i:j].replace('\\','\\\\')
391 393 i = j + 1
392 394 if stuff[0] == '!':
393 395 stuff = '^' + stuff[1:]
394 396 elif stuff[0] == '^':
395 397 stuff = '\\' + stuff
396 398 res = '%s[%s]' % (res, stuff)
397 399 elif c == '{':
398 400 group += 1
399 401 res += '(?:'
400 402 elif c == '}' and group:
401 403 res += ')'
402 404 group -= 1
403 405 elif c == ',' and group:
404 406 res += '|'
405 407 elif c == '\\':
406 408 p = peek()
407 409 if p:
408 410 i += 1
409 411 res += escape(p)
410 412 else:
411 413 res += escape(c)
412 414 else:
413 415 res += escape(c)
414 416 return res
415 417
416 418 def _regex(kind, pat, globsuffix):
417 419 '''Convert a (normalized) pattern of any kind into a regular expression.
418 420 globsuffix is appended to the regexp of globs.'''
419 421 if not pat:
420 422 return ''
421 423 if kind == 're':
422 424 return pat
423 425 if kind == 'path':
424 426 return '^' + util.re.escape(pat) + '(?:/|$)'
425 427 if kind == 'relglob':
426 428 return '(?:|.*/)' + _globre(pat) + globsuffix
427 429 if kind == 'relpath':
428 430 return util.re.escape(pat) + '(?:/|$)'
429 431 if kind == 'relre':
430 432 if pat.startswith('^'):
431 433 return pat
432 434 return '.*' + pat
433 435 return _globre(pat) + globsuffix
434 436
435 437 def _buildmatch(ctx, kindpats, globsuffix, listsubrepos):
436 438 '''Return regexp string and a matcher function for kindpats.
437 439 globsuffix is appended to the regexp of globs.'''
438 440 fset, kindpats = _expandsets(kindpats, ctx, listsubrepos)
439 441 if not kindpats:
440 442 return "", fset.__contains__
441 443
442 444 regex, mf = _buildregexmatch(kindpats, globsuffix)
443 445 if fset:
444 446 return regex, lambda f: f in fset or mf(f)
445 447 return regex, mf
446 448
447 449 def _buildregexmatch(kindpats, globsuffix):
448 450 """Build a match function from a list of kinds and kindpats,
449 451 return regexp string and a matcher function."""
450 452 try:
451 453 regex = '(?:%s)' % '|'.join([_regex(k, p, globsuffix)
452 for (k, p) in kindpats])
454 for (k, p, s) in kindpats])
453 455 if len(regex) > 20000:
454 456 raise OverflowError
455 457 return regex, _rematcher(regex)
456 458 except OverflowError:
457 459 # We're using a Python with a tiny regex engine and we
458 460 # made it explode, so we'll divide the pattern list in two
459 461 # until it works
460 462 l = len(kindpats)
461 463 if l < 2:
462 464 raise
463 465 regexa, a = _buildregexmatch(kindpats[:l//2], globsuffix)
464 466 regexb, b = _buildregexmatch(kindpats[l//2:], globsuffix)
465 467 return regex, lambda s: a(s) or b(s)
466 468 except re.error:
467 for k, p in kindpats:
469 for k, p, s in kindpats:
468 470 try:
469 471 _rematcher('(?:%s)' % _regex(k, p, globsuffix))
470 472 except re.error:
473 if s:
474 raise util.Abort(_("%s: invalid pattern (%s): %s") %
475 (s, k, p))
476 else:
471 477 raise util.Abort(_("invalid pattern (%s): %s") % (k, p))
472 478 raise util.Abort(_("invalid pattern"))
473 479
474 480 def _roots(kindpats):
475 481 '''return roots and exact explicitly listed files from patterns
476 482
477 >>> _roots([('glob', 'g/*'), ('glob', 'g'), ('glob', 'g*')])
483 >>> _roots([('glob', 'g/*', ''), ('glob', 'g', ''), ('glob', 'g*', '')])
478 484 ['g', 'g', '.']
479 >>> _roots([('relpath', 'r'), ('path', 'p/p'), ('path', '')])
485 >>> _roots([('relpath', 'r', ''), ('path', 'p/p', ''), ('path', '', '')])
480 486 ['r', 'p/p', '.']
481 >>> _roots([('relglob', 'rg*'), ('re', 're/'), ('relre', 'rr')])
487 >>> _roots([('relglob', 'rg*', ''), ('re', 're/', ''), ('relre', 'rr', '')])
482 488 ['.', '.', '.']
483 489 '''
484 490 r = []
485 for kind, pat in kindpats:
491 for kind, pat, source in kindpats:
486 492 if kind == 'glob': # find the non-glob prefix
487 493 root = []
488 494 for p in pat.split('/'):
489 495 if '[' in p or '{' in p or '*' in p or '?' in p:
490 496 break
491 497 root.append(p)
492 498 r.append('/'.join(root) or '.')
493 499 elif kind in ('relpath', 'path'):
494 500 r.append(pat or '.')
495 501 else: # relglob, re, relre
496 502 r.append('.')
497 503 return r
498 504
499 505 def _anypats(kindpats):
500 for kind, pat in kindpats:
506 for kind, pat, source in kindpats:
501 507 if kind in ('glob', 're', 'relglob', 'relre', 'set'):
502 508 return True
503 509
504 510 _commentre = None
505 511
506 512 def readpatternfile(filepath, warn):
507 513 '''parse a pattern file, returning a list of
508 514 patterns. These patterns should be given to compile()
509 515 to be validated and converted into a match function.'''
510 516 syntaxes = {'re': 'relre:', 'regexp': 'relre:', 'glob': 'relglob:'}
511 517 syntax = 'relre:'
512 518 patterns = []
513 519
514 520 fp = open(filepath)
515 521 for line in fp:
516 522 if "#" in line:
517 523 global _commentre
518 524 if not _commentre:
519 525 _commentre = re.compile(r'((^|[^\\])(\\\\)*)#.*')
520 526 # remove comments prefixed by an even number of escapes
521 527 line = _commentre.sub(r'\1', line)
522 528 # fixup properly escaped comments that survived the above
523 529 line = line.replace("\\#", "#")
524 530 line = line.rstrip()
525 531 if not line:
526 532 continue
527 533
528 534 if line.startswith('syntax:'):
529 535 s = line[7:].strip()
530 536 try:
531 537 syntax = syntaxes[s]
532 538 except KeyError:
533 539 warn(_("%s: ignoring invalid syntax '%s'\n") % (filepath, s))
534 540 continue
535 541
536 542 linesyntax = syntax
537 543 for s, rels in syntaxes.iteritems():
538 544 if line.startswith(rels):
539 545 linesyntax = rels
540 546 line = line[len(rels):]
541 547 break
542 548 elif line.startswith(s+':'):
543 549 linesyntax = rels
544 550 line = line[len(s) + 1:]
545 551 break
546 552 patterns.append(linesyntax + line)
547 553 fp.close()
548 554 return patterns
General Comments 0
You need to be logged in to leave comments. Login now