##// END OF EJS Templates
match: pass in normalize() function to matchers...
Martin von Zweigbergk -
r32398:1c1f7c94 default
parent child Browse files
Show More
@@ -1,808 +1,805 b''
1 1 # match.py - filename matching
2 2 #
3 3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import copy
11 11 import os
12 12 import re
13 13
14 14 from .i18n import _
15 15 from . import (
16 16 error,
17 17 pathutil,
18 18 util,
19 19 )
20 20
21 21 propertycache = util.propertycache
22 22
23 23 def _rematcher(regex):
24 24 '''compile the regexp with the best available regexp engine and return a
25 25 matcher function'''
26 26 m = util.re.compile(regex)
27 27 try:
28 28 # slightly faster, provided by facebook's re2 bindings
29 29 return m.test_match
30 30 except AttributeError:
31 31 return m.match
32 32
33 33 def _expandsets(kindpats, ctx, listsubrepos):
34 34 '''Returns the kindpats list with the 'set' patterns expanded.'''
35 35 fset = set()
36 36 other = []
37 37
38 38 for kind, pat, source in kindpats:
39 39 if kind == 'set':
40 40 if not ctx:
41 41 raise error.Abort(_("fileset expression with no context"))
42 42 s = ctx.getfileset(pat)
43 43 fset.update(s)
44 44
45 45 if listsubrepos:
46 46 for subpath in ctx.substate:
47 47 s = ctx.sub(subpath).getfileset(pat)
48 48 fset.update(subpath + '/' + f for f in s)
49 49
50 50 continue
51 51 other.append((kind, pat, source))
52 52 return fset, other
53 53
54 54 def _expandsubinclude(kindpats, root):
55 55 '''Returns the list of subinclude matcher args and the kindpats without the
56 56 subincludes in it.'''
57 57 relmatchers = []
58 58 other = []
59 59
60 60 for kind, pat, source in kindpats:
61 61 if kind == 'subinclude':
62 62 sourceroot = pathutil.dirname(util.normpath(source))
63 63 pat = util.pconvert(pat)
64 64 path = pathutil.join(sourceroot, pat)
65 65
66 66 newroot = pathutil.dirname(path)
67 67 matcherargs = (newroot, '', [], ['include:%s' % path])
68 68
69 69 prefix = pathutil.canonpath(root, root, newroot)
70 70 if prefix:
71 71 prefix += '/'
72 72 relmatchers.append((prefix, matcherargs))
73 73 else:
74 74 other.append((kind, pat, source))
75 75
76 76 return relmatchers, other
77 77
78 78 def _kindpatsalwaysmatch(kindpats):
79 79 """"Checks whether the kindspats match everything, as e.g.
80 80 'relpath:.' does.
81 81 """
82 82 for kind, pat, source in kindpats:
83 83 if pat != '' or kind not in ['relpath', 'glob']:
84 84 return False
85 85 return True
86 86
87 87 def match(root, cwd, patterns, include=None, exclude=None, default='glob',
88 88 exact=False, auditor=None, ctx=None, listsubrepos=False, warn=None,
89 89 badfn=None):
90 90 """build an object to match a set of file patterns
91 91
92 92 arguments:
93 93 root - the canonical root of the tree you're matching against
94 94 cwd - the current working directory, if relevant
95 95 patterns - patterns to find
96 96 include - patterns to include (unless they are excluded)
97 97 exclude - patterns to exclude (even if they are included)
98 98 default - if a pattern in patterns has no explicit type, assume this one
99 99 exact - patterns are actually filenames (include/exclude still apply)
100 100 warn - optional function used for printing warnings
101 101 badfn - optional bad() callback for this matcher instead of the default
102 102
103 103 a pattern is one of:
104 104 'glob:<glob>' - a glob relative to cwd
105 105 're:<regexp>' - a regular expression
106 106 'path:<path>' - a path relative to repository root, which is matched
107 107 recursively
108 108 'rootfilesin:<path>' - a path relative to repository root, which is
109 109 matched non-recursively (will not match subdirectories)
110 110 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
111 111 'relpath:<path>' - a path relative to cwd
112 112 'relre:<regexp>' - a regexp that needn't match the start of a name
113 113 'set:<fileset>' - a fileset expression
114 114 'include:<path>' - a file of patterns to read and include
115 115 'subinclude:<path>' - a file of patterns to match against files under
116 116 the same directory
117 117 '<something>' - a pattern of the specified default type
118 118 """
119 return matcher(root, cwd, patterns, include=include, exclude=exclude,
120 default=default, exact=exact, auditor=auditor, ctx=ctx,
121 listsubrepos=listsubrepos, warn=warn, badfn=badfn)
119 return matcher(root, cwd, _donormalize, patterns, include=include,
120 exclude=exclude, default=default, exact=exact,
121 auditor=auditor, ctx=ctx, listsubrepos=listsubrepos,
122 warn=warn, badfn=badfn)
122 123
123 124 def icasefsmatch(root, cwd, patterns, include=None, exclude=None,
124 125 default='glob', auditor=None, ctx=None,
125 126 listsubrepos=False, badfn=None):
126 127 """A matcher for wdir on case insensitive filesystems, which normalizes the
127 128 given patterns to the case in the filesystem.
128 129 """
129 return icasefsmatcher(root, cwd, patterns, include=include, exclude=exclude,
130 default=default, auditor=auditor, ctx=ctx,
131 listsubrepos=listsubrepos, badfn=badfn)
130 dirstate = ctx.repo().dirstate
131 dsnormalize = dirstate.normalize
132
133 def normalize(patterns, default, root, cwd, auditor, warn):
134 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
135 kindpats = []
136 for kind, pats, source in kp:
137 if kind not in ('re', 'relre'): # regex can't be normalized
138 p = pats
139 pats = dsnormalize(pats)
140
141 # Preserve the original to handle a case only rename.
142 if p != pats and p in dirstate:
143 kindpats.append((kind, p, source))
144
145 kindpats.append((kind, pats, source))
146 return kindpats
147
148 return icasefsmatcher(root, cwd, normalize, patterns=patterns,
149 include=include, exclude=exclude, default=default,
150 auditor=auditor, ctx=ctx, listsubrepos=listsubrepos,
151 badfn=badfn)
132 152
133 153 def exact(root, cwd, files, badfn=None):
134 154 return match(root, cwd, files, exact=True, badfn=badfn)
135 155
136 156 def always(root, cwd):
137 157 return match(root, cwd, [])
138 158
139 159 def badmatch(match, badfn):
140 160 """Make a copy of the given matcher, replacing its bad method with the given
141 161 one.
142 162 """
143 163 m = copy.copy(match)
144 164 m.bad = badfn
145 165 return m
146 166
147 167 def _donormalize(patterns, default, root, cwd, auditor, warn):
148 168 '''Convert 'kind:pat' from the patterns list to tuples with kind and
149 169 normalized and rooted patterns and with listfiles expanded.'''
150 170 kindpats = []
151 171 for kind, pat in [_patsplit(p, default) for p in patterns]:
152 172 if kind in ('glob', 'relpath'):
153 173 pat = pathutil.canonpath(root, cwd, pat, auditor)
154 174 elif kind in ('relglob', 'path', 'rootfilesin'):
155 175 pat = util.normpath(pat)
156 176 elif kind in ('listfile', 'listfile0'):
157 177 try:
158 178 files = util.readfile(pat)
159 179 if kind == 'listfile0':
160 180 files = files.split('\0')
161 181 else:
162 182 files = files.splitlines()
163 183 files = [f for f in files if f]
164 184 except EnvironmentError:
165 185 raise error.Abort(_("unable to read file list (%s)") % pat)
166 186 for k, p, source in _donormalize(files, default, root, cwd,
167 187 auditor, warn):
168 188 kindpats.append((k, p, pat))
169 189 continue
170 190 elif kind == 'include':
171 191 try:
172 192 fullpath = os.path.join(root, util.localpath(pat))
173 193 includepats = readpatternfile(fullpath, warn)
174 194 for k, p, source in _donormalize(includepats, default,
175 195 root, cwd, auditor, warn):
176 196 kindpats.append((k, p, source or pat))
177 197 except error.Abort as inst:
178 198 raise error.Abort('%s: %s' % (pat, inst[0]))
179 199 except IOError as inst:
180 200 if warn:
181 201 warn(_("skipping unreadable pattern file '%s': %s\n") %
182 202 (pat, inst.strerror))
183 203 continue
184 204 # else: re or relre - which cannot be normalized
185 205 kindpats.append((kind, pat, ''))
186 206 return kindpats
187 207
188 208 class matcher(object):
189 209
190 def __init__(self, root, cwd, patterns, include=None, exclude=None,
191 default='glob', exact=False, auditor=None, ctx=None,
192 listsubrepos=False, warn=None, badfn=None):
210 def __init__(self, root, cwd, normalize, patterns, include=None,
211 exclude=None, default='glob', exact=False, auditor=None,
212 ctx=None, listsubrepos=False, warn=None, badfn=None):
193 213 if include is None:
194 214 include = []
195 215 if exclude is None:
196 216 exclude = []
197 217
198 218 self._root = root
199 219 self._cwd = cwd
200 220 self._files = [] # exact files and roots of patterns
201 221 self._anypats = bool(include or exclude)
202 222 self._always = False
203 223 self._pathrestricted = bool(include or exclude or patterns)
204 224
205 225 # roots are directories which are recursively included/excluded.
206 226 self._includeroots = set()
207 227 self._excluderoots = set()
208 228 # dirs are directories which are non-recursively included.
209 229 self._includedirs = set()
210 230
211 231 if badfn is not None:
212 232 self.bad = badfn
213 233
214 234 matchfns = []
215 235 if include:
216 kindpats = self._normalize(include, 'glob', root, cwd, auditor,
217 warn)
236 kindpats = normalize(include, 'glob', root, cwd, auditor, warn)
218 237 self.includepat, im = _buildmatch(ctx, kindpats, '(?:/|$)',
219 238 listsubrepos, root)
220 239 roots, dirs = _rootsanddirs(kindpats)
221 240 self._includeroots.update(roots)
222 241 self._includedirs.update(dirs)
223 242 matchfns.append(im)
224 243 if exclude:
225 kindpats = self._normalize(exclude, 'glob', root, cwd, auditor,
226 warn)
244 kindpats = normalize(exclude, 'glob', root, cwd, auditor, warn)
227 245 self.excludepat, em = _buildmatch(ctx, kindpats, '(?:/|$)',
228 246 listsubrepos, root)
229 247 if not _anypats(kindpats):
230 248 # Only consider recursive excludes as such - if a non-recursive
231 249 # exclude is used, we must still recurse into the excluded
232 250 # directory, at least to find subdirectories. In such a case,
233 251 # the regex still won't match the non-recursively-excluded
234 252 # files.
235 253 self._excluderoots.update(_roots(kindpats))
236 254 matchfns.append(lambda f: not em(f))
237 255 if exact:
238 256 if isinstance(patterns, list):
239 257 self._files = patterns
240 258 else:
241 259 self._files = list(patterns)
242 260 matchfns.append(self.exact)
243 261 elif patterns:
244 kindpats = self._normalize(patterns, default, root, cwd, auditor,
245 warn)
262 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
246 263 if not _kindpatsalwaysmatch(kindpats):
247 264 self._files = _explicitfiles(kindpats)
248 265 self._anypats = self._anypats or _anypats(kindpats)
249 266 self.patternspat, pm = _buildmatch(ctx, kindpats, '$',
250 267 listsubrepos, root)
251 268 matchfns.append(pm)
252 269
253 270 if not matchfns:
254 271 m = util.always
255 272 self._always = True
256 273 elif len(matchfns) == 1:
257 274 m = matchfns[0]
258 275 else:
259 276 def m(f):
260 277 for matchfn in matchfns:
261 278 if not matchfn(f):
262 279 return False
263 280 return True
264 281
265 282 self.matchfn = m
266 283
267 284 def __call__(self, fn):
268 285 return self.matchfn(fn)
269 286 def __iter__(self):
270 287 for f in self._files:
271 288 yield f
272 289
273 290 # Callbacks related to how the matcher is used by dirstate.walk.
274 291 # Subscribers to these events must monkeypatch the matcher object.
275 292 def bad(self, f, msg):
276 293 '''Callback from dirstate.walk for each explicit file that can't be
277 294 found/accessed, with an error message.'''
278 295 pass
279 296
280 297 # If an explicitdir is set, it will be called when an explicitly listed
281 298 # directory is visited.
282 299 explicitdir = None
283 300
284 301 # If an traversedir is set, it will be called when a directory discovered
285 302 # by recursive traversal is visited.
286 303 traversedir = None
287 304
288 305 def abs(self, f):
289 306 '''Convert a repo path back to path that is relative to the root of the
290 307 matcher.'''
291 308 return f
292 309
293 310 def rel(self, f):
294 311 '''Convert repo path back to path that is relative to cwd of matcher.'''
295 312 return util.pathto(self._root, self._cwd, f)
296 313
297 314 def uipath(self, f):
298 315 '''Convert repo path to a display path. If patterns or -I/-X were used
299 316 to create this matcher, the display path will be relative to cwd.
300 317 Otherwise it is relative to the root of the repo.'''
301 318 return (self._pathrestricted and self.rel(f)) or self.abs(f)
302 319
303 320 def files(self):
304 321 '''Explicitly listed files or patterns or roots:
305 322 if no patterns or .always(): empty list,
306 323 if exact: list exact files,
307 324 if not .anypats(): list all files and dirs,
308 325 else: optimal roots'''
309 326 return self._files
310 327
311 328 @propertycache
312 329 def _fileset(self):
313 330 return set(self._files)
314 331
315 332 @propertycache
316 333 def _dirs(self):
317 334 return set(util.dirs(self._fileset)) | {'.'}
318 335
319 336 def visitdir(self, dir):
320 337 '''Decides whether a directory should be visited based on whether it
321 338 has potential matches in it or one of its subdirectories. This is
322 339 based on the match's primary, included, and excluded patterns.
323 340
324 341 Returns the string 'all' if the given directory and all subdirectories
325 342 should be visited. Otherwise returns True or False indicating whether
326 343 the given directory should be visited.
327 344
328 345 This function's behavior is undefined if it has returned False for
329 346 one of the dir's parent directories.
330 347 '''
331 348 if self.prefix() and dir in self._fileset:
332 349 return 'all'
333 350 if dir in self._excluderoots:
334 351 return False
335 352 if ((self._includeroots or self._includedirs) and
336 353 '.' not in self._includeroots and
337 354 dir not in self._includeroots and
338 355 dir not in self._includedirs and
339 356 not any(parent in self._includeroots
340 357 for parent in util.finddirs(dir))):
341 358 return False
342 359 return (not self._fileset or
343 360 '.' in self._fileset or
344 361 dir in self._fileset or
345 362 dir in self._dirs or
346 363 any(parentdir in self._fileset
347 364 for parentdir in util.finddirs(dir)))
348 365
349 366 def exact(self, f):
350 367 '''Returns True if f is in .files().'''
351 368 return f in self._fileset
352 369
353 370 def anypats(self):
354 371 '''Matcher uses patterns or include/exclude.'''
355 372 return self._anypats
356 373
357 374 def always(self):
358 375 '''Matcher will match everything and .files() will be empty
359 376 - optimization might be possible and necessary.'''
360 377 return self._always
361 378
362 379 def isexact(self):
363 380 return self.matchfn == self.exact
364 381
365 382 def prefix(self):
366 383 return not self.always() and not self.isexact() and not self.anypats()
367 384
368 def _normalize(self, patterns, default, root, cwd, auditor, warn):
369 return _donormalize(patterns, default, root, cwd, auditor, warn)
370
371 385 class subdirmatcher(matcher):
372 386 """Adapt a matcher to work on a subdirectory only.
373 387
374 388 The paths are remapped to remove/insert the path as needed:
375 389
376 390 >>> m1 = match('root', '', ['a.txt', 'sub/b.txt'])
377 391 >>> m2 = subdirmatcher('sub', m1)
378 392 >>> bool(m2('a.txt'))
379 393 False
380 394 >>> bool(m2('b.txt'))
381 395 True
382 396 >>> bool(m2.matchfn('a.txt'))
383 397 False
384 398 >>> bool(m2.matchfn('b.txt'))
385 399 True
386 400 >>> m2.files()
387 401 ['b.txt']
388 402 >>> m2.exact('b.txt')
389 403 True
390 404 >>> util.pconvert(m2.rel('b.txt'))
391 405 'sub/b.txt'
392 406 >>> def bad(f, msg):
393 407 ... print "%s: %s" % (f, msg)
394 408 >>> m1.bad = bad
395 409 >>> m2.bad('x.txt', 'No such file')
396 410 sub/x.txt: No such file
397 411 >>> m2.abs('c.txt')
398 412 'sub/c.txt'
399 413 """
400 414
401 415 def __init__(self, path, matcher):
402 416 self._root = matcher._root
403 417 self._cwd = matcher._cwd
404 418 self._path = path
405 419 self._matcher = matcher
406 420 self._always = matcher._always
407 421
408 422 self._files = [f[len(path) + 1:] for f in matcher._files
409 423 if f.startswith(path + "/")]
410 424
411 425 # If the parent repo had a path to this subrepo and the matcher is
412 426 # a prefix matcher, this submatcher always matches.
413 427 if matcher.prefix():
414 428 self._always = any(f == path for f in matcher._files)
415 429
416 430 self._anypats = matcher._anypats
417 431 # Some information is lost in the superclass's constructor, so we
418 432 # can not accurately create the matching function for the subdirectory
419 433 # from the inputs. Instead, we override matchfn() and visitdir() to
420 434 # call the original matcher with the subdirectory path prepended.
421 435 self.matchfn = lambda fn: matcher.matchfn(self._path + "/" + fn)
422 436
423 437 def bad(self, f, msg):
424 438 self._matcher.bad(self._path + "/" + f, msg)
425 439
426 440 def abs(self, f):
427 441 return self._matcher.abs(self._path + "/" + f)
428 442
429 443 def rel(self, f):
430 444 return self._matcher.rel(self._path + "/" + f)
431 445
432 446 def uipath(self, f):
433 447 return self._matcher.uipath(self._path + "/" + f)
434 448
435 449 def visitdir(self, dir):
436 450 if dir == '.':
437 451 dir = self._path
438 452 else:
439 453 dir = self._path + "/" + dir
440 454 return self._matcher.visitdir(dir)
441 455
442 456 class icasefsmatcher(matcher):
443 457
444 def __init__(self, root, cwd, patterns, include, exclude, default, auditor,
445 ctx, listsubrepos=False, badfn=None):
458 def __init__(self, root, cwd, normalize, patterns, include, exclude,
459 default, auditor, ctx, listsubrepos=False, badfn=None):
446 460 init = super(icasefsmatcher, self).__init__
447 self._dirstate = ctx.repo().dirstate
448 self._dsnormalize = self._dirstate.normalize
449
450 init(root, cwd, patterns, include, exclude, default, auditor=auditor,
451 ctx=ctx, listsubrepos=listsubrepos, badfn=badfn)
452 461
453 def _normalize(self, patterns, default, root, cwd, auditor, warn):
454 kp = super(icasefsmatcher, self)._normalize(patterns, default, root,
455 cwd, auditor, warn)
456 kindpats = []
457 for kind, pats, source in kp:
458 if kind not in ('re', 'relre'): # regex can't be normalized
459 p = pats
460 pats = self._dsnormalize(pats)
461
462 # Preserve the original to handle a case only rename.
463 if p != pats and p in self._dirstate:
464 kindpats.append((kind, p, source))
465
466 kindpats.append((kind, pats, source))
467 return kindpats
462 init(root, cwd, normalize=normalize, patterns=patterns, include=include,
463 exclude=exclude, default=default, auditor=auditor,
464 ctx=ctx, listsubrepos=listsubrepos, badfn=badfn)
468 465
469 466 def patkind(pattern, default=None):
470 467 '''If pattern is 'kind:pat' with a known kind, return kind.'''
471 468 return _patsplit(pattern, default)[0]
472 469
473 470 def _patsplit(pattern, default):
474 471 """Split a string into the optional pattern kind prefix and the actual
475 472 pattern."""
476 473 if ':' in pattern:
477 474 kind, pat = pattern.split(':', 1)
478 475 if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
479 476 'listfile', 'listfile0', 'set', 'include', 'subinclude',
480 477 'rootfilesin'):
481 478 return kind, pat
482 479 return default, pattern
483 480
484 481 def _globre(pat):
485 482 r'''Convert an extended glob string to a regexp string.
486 483
487 484 >>> print _globre(r'?')
488 485 .
489 486 >>> print _globre(r'*')
490 487 [^/]*
491 488 >>> print _globre(r'**')
492 489 .*
493 490 >>> print _globre(r'**/a')
494 491 (?:.*/)?a
495 492 >>> print _globre(r'a/**/b')
496 493 a\/(?:.*/)?b
497 494 >>> print _globre(r'[a*?!^][^b][!c]')
498 495 [a*?!^][\^b][^c]
499 496 >>> print _globre(r'{a,b}')
500 497 (?:a|b)
501 498 >>> print _globre(r'.\*\?')
502 499 \.\*\?
503 500 '''
504 501 i, n = 0, len(pat)
505 502 res = ''
506 503 group = 0
507 504 escape = util.re.escape
508 505 def peek():
509 506 return i < n and pat[i:i + 1]
510 507 while i < n:
511 508 c = pat[i:i + 1]
512 509 i += 1
513 510 if c not in '*?[{},\\':
514 511 res += escape(c)
515 512 elif c == '*':
516 513 if peek() == '*':
517 514 i += 1
518 515 if peek() == '/':
519 516 i += 1
520 517 res += '(?:.*/)?'
521 518 else:
522 519 res += '.*'
523 520 else:
524 521 res += '[^/]*'
525 522 elif c == '?':
526 523 res += '.'
527 524 elif c == '[':
528 525 j = i
529 526 if j < n and pat[j:j + 1] in '!]':
530 527 j += 1
531 528 while j < n and pat[j:j + 1] != ']':
532 529 j += 1
533 530 if j >= n:
534 531 res += '\\['
535 532 else:
536 533 stuff = pat[i:j].replace('\\','\\\\')
537 534 i = j + 1
538 535 if stuff[0:1] == '!':
539 536 stuff = '^' + stuff[1:]
540 537 elif stuff[0:1] == '^':
541 538 stuff = '\\' + stuff
542 539 res = '%s[%s]' % (res, stuff)
543 540 elif c == '{':
544 541 group += 1
545 542 res += '(?:'
546 543 elif c == '}' and group:
547 544 res += ')'
548 545 group -= 1
549 546 elif c == ',' and group:
550 547 res += '|'
551 548 elif c == '\\':
552 549 p = peek()
553 550 if p:
554 551 i += 1
555 552 res += escape(p)
556 553 else:
557 554 res += escape(c)
558 555 else:
559 556 res += escape(c)
560 557 return res
561 558
562 559 def _regex(kind, pat, globsuffix):
563 560 '''Convert a (normalized) pattern of any kind into a regular expression.
564 561 globsuffix is appended to the regexp of globs.'''
565 562 if not pat:
566 563 return ''
567 564 if kind == 're':
568 565 return pat
569 566 if kind == 'path':
570 567 if pat == '.':
571 568 return ''
572 569 return '^' + util.re.escape(pat) + '(?:/|$)'
573 570 if kind == 'rootfilesin':
574 571 if pat == '.':
575 572 escaped = ''
576 573 else:
577 574 # Pattern is a directory name.
578 575 escaped = util.re.escape(pat) + '/'
579 576 # Anything after the pattern must be a non-directory.
580 577 return '^' + escaped + '[^/]+$'
581 578 if kind == 'relglob':
582 579 return '(?:|.*/)' + _globre(pat) + globsuffix
583 580 if kind == 'relpath':
584 581 return util.re.escape(pat) + '(?:/|$)'
585 582 if kind == 'relre':
586 583 if pat.startswith('^'):
587 584 return pat
588 585 return '.*' + pat
589 586 return _globre(pat) + globsuffix
590 587
591 588 def _buildmatch(ctx, kindpats, globsuffix, listsubrepos, root):
592 589 '''Return regexp string and a matcher function for kindpats.
593 590 globsuffix is appended to the regexp of globs.'''
594 591 matchfuncs = []
595 592
596 593 subincludes, kindpats = _expandsubinclude(kindpats, root)
597 594 if subincludes:
598 595 submatchers = {}
599 596 def matchsubinclude(f):
600 597 for prefix, matcherargs in subincludes:
601 598 if f.startswith(prefix):
602 599 mf = submatchers.get(prefix)
603 600 if mf is None:
604 601 mf = match(*matcherargs)
605 602 submatchers[prefix] = mf
606 603
607 604 if mf(f[len(prefix):]):
608 605 return True
609 606 return False
610 607 matchfuncs.append(matchsubinclude)
611 608
612 609 fset, kindpats = _expandsets(kindpats, ctx, listsubrepos)
613 610 if fset:
614 611 matchfuncs.append(fset.__contains__)
615 612
616 613 regex = ''
617 614 if kindpats:
618 615 regex, mf = _buildregexmatch(kindpats, globsuffix)
619 616 matchfuncs.append(mf)
620 617
621 618 if len(matchfuncs) == 1:
622 619 return regex, matchfuncs[0]
623 620 else:
624 621 return regex, lambda f: any(mf(f) for mf in matchfuncs)
625 622
626 623 def _buildregexmatch(kindpats, globsuffix):
627 624 """Build a match function from a list of kinds and kindpats,
628 625 return regexp string and a matcher function."""
629 626 try:
630 627 regex = '(?:%s)' % '|'.join([_regex(k, p, globsuffix)
631 628 for (k, p, s) in kindpats])
632 629 if len(regex) > 20000:
633 630 raise OverflowError
634 631 return regex, _rematcher(regex)
635 632 except OverflowError:
636 633 # We're using a Python with a tiny regex engine and we
637 634 # made it explode, so we'll divide the pattern list in two
638 635 # until it works
639 636 l = len(kindpats)
640 637 if l < 2:
641 638 raise
642 639 regexa, a = _buildregexmatch(kindpats[:l//2], globsuffix)
643 640 regexb, b = _buildregexmatch(kindpats[l//2:], globsuffix)
644 641 return regex, lambda s: a(s) or b(s)
645 642 except re.error:
646 643 for k, p, s in kindpats:
647 644 try:
648 645 _rematcher('(?:%s)' % _regex(k, p, globsuffix))
649 646 except re.error:
650 647 if s:
651 648 raise error.Abort(_("%s: invalid pattern (%s): %s") %
652 649 (s, k, p))
653 650 else:
654 651 raise error.Abort(_("invalid pattern (%s): %s") % (k, p))
655 652 raise error.Abort(_("invalid pattern"))
656 653
657 654 def _patternrootsanddirs(kindpats):
658 655 '''Returns roots and directories corresponding to each pattern.
659 656
660 657 This calculates the roots and directories exactly matching the patterns and
661 658 returns a tuple of (roots, dirs) for each. It does not return other
662 659 directories which may also need to be considered, like the parent
663 660 directories.
664 661 '''
665 662 r = []
666 663 d = []
667 664 for kind, pat, source in kindpats:
668 665 if kind == 'glob': # find the non-glob prefix
669 666 root = []
670 667 for p in pat.split('/'):
671 668 if '[' in p or '{' in p or '*' in p or '?' in p:
672 669 break
673 670 root.append(p)
674 671 r.append('/'.join(root) or '.')
675 672 elif kind in ('relpath', 'path'):
676 673 r.append(pat or '.')
677 674 elif kind in ('rootfilesin',):
678 675 d.append(pat or '.')
679 676 else: # relglob, re, relre
680 677 r.append('.')
681 678 return r, d
682 679
683 680 def _roots(kindpats):
684 681 '''Returns root directories to match recursively from the given patterns.'''
685 682 roots, dirs = _patternrootsanddirs(kindpats)
686 683 return roots
687 684
688 685 def _rootsanddirs(kindpats):
689 686 '''Returns roots and exact directories from patterns.
690 687
691 688 roots are directories to match recursively, whereas exact directories should
692 689 be matched non-recursively. The returned (roots, dirs) tuple will also
693 690 include directories that need to be implicitly considered as either, such as
694 691 parent directories.
695 692
696 693 >>> _rootsanddirs(\
697 694 [('glob', 'g/h/*', ''), ('glob', 'g/h', ''), ('glob', 'g*', '')])
698 695 (['g/h', 'g/h', '.'], ['g', '.'])
699 696 >>> _rootsanddirs(\
700 697 [('rootfilesin', 'g/h', ''), ('rootfilesin', '', '')])
701 698 ([], ['g/h', '.', 'g', '.'])
702 699 >>> _rootsanddirs(\
703 700 [('relpath', 'r', ''), ('path', 'p/p', ''), ('path', '', '')])
704 701 (['r', 'p/p', '.'], ['p', '.'])
705 702 >>> _rootsanddirs(\
706 703 [('relglob', 'rg*', ''), ('re', 're/', ''), ('relre', 'rr', '')])
707 704 (['.', '.', '.'], ['.'])
708 705 '''
709 706 r, d = _patternrootsanddirs(kindpats)
710 707
711 708 # Append the parents as non-recursive/exact directories, since they must be
712 709 # scanned to get to either the roots or the other exact directories.
713 710 d.extend(util.dirs(d))
714 711 d.extend(util.dirs(r))
715 712 # util.dirs() does not include the root directory, so add it manually
716 713 d.append('.')
717 714
718 715 return r, d
719 716
720 717 def _explicitfiles(kindpats):
721 718 '''Returns the potential explicit filenames from the patterns.
722 719
723 720 >>> _explicitfiles([('path', 'foo/bar', '')])
724 721 ['foo/bar']
725 722 >>> _explicitfiles([('rootfilesin', 'foo/bar', '')])
726 723 []
727 724 '''
728 725 # Keep only the pattern kinds where one can specify filenames (vs only
729 726 # directory names).
730 727 filable = [kp for kp in kindpats if kp[0] not in ('rootfilesin',)]
731 728 return _roots(filable)
732 729
733 730 def _anypats(kindpats):
734 731 for kind, pat, source in kindpats:
735 732 if kind in ('glob', 're', 'relglob', 'relre', 'set', 'rootfilesin'):
736 733 return True
737 734
738 735 _commentre = None
739 736
740 737 def readpatternfile(filepath, warn, sourceinfo=False):
741 738 '''parse a pattern file, returning a list of
742 739 patterns. These patterns should be given to compile()
743 740 to be validated and converted into a match function.
744 741
745 742 trailing white space is dropped.
746 743 the escape character is backslash.
747 744 comments start with #.
748 745 empty lines are skipped.
749 746
750 747 lines can be of the following formats:
751 748
752 749 syntax: regexp # defaults following lines to non-rooted regexps
753 750 syntax: glob # defaults following lines to non-rooted globs
754 751 re:pattern # non-rooted regular expression
755 752 glob:pattern # non-rooted glob
756 753 pattern # pattern of the current default type
757 754
758 755 if sourceinfo is set, returns a list of tuples:
759 756 (pattern, lineno, originalline). This is useful to debug ignore patterns.
760 757 '''
761 758
762 759 syntaxes = {'re': 'relre:', 'regexp': 'relre:', 'glob': 'relglob:',
763 760 'include': 'include', 'subinclude': 'subinclude'}
764 761 syntax = 'relre:'
765 762 patterns = []
766 763
767 764 fp = open(filepath, 'rb')
768 765 for lineno, line in enumerate(util.iterfile(fp), start=1):
769 766 if "#" in line:
770 767 global _commentre
771 768 if not _commentre:
772 769 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
773 770 # remove comments prefixed by an even number of escapes
774 771 m = _commentre.search(line)
775 772 if m:
776 773 line = line[:m.end(1)]
777 774 # fixup properly escaped comments that survived the above
778 775 line = line.replace("\\#", "#")
779 776 line = line.rstrip()
780 777 if not line:
781 778 continue
782 779
783 780 if line.startswith('syntax:'):
784 781 s = line[7:].strip()
785 782 try:
786 783 syntax = syntaxes[s]
787 784 except KeyError:
788 785 if warn:
789 786 warn(_("%s: ignoring invalid syntax '%s'\n") %
790 787 (filepath, s))
791 788 continue
792 789
793 790 linesyntax = syntax
794 791 for s, rels in syntaxes.iteritems():
795 792 if line.startswith(rels):
796 793 linesyntax = rels
797 794 line = line[len(rels):]
798 795 break
799 796 elif line.startswith(s+':'):
800 797 linesyntax = rels
801 798 line = line[len(s) + 1:]
802 799 break
803 800 if sourceinfo:
804 801 patterns.append((linesyntax + line, lineno, line))
805 802 else:
806 803 patterns.append(linesyntax + line)
807 804 fp.close()
808 805 return patterns
General Comments 0
You need to be logged in to leave comments. Login now