##// END OF EJS Templates
match: move body of _normalize() to a static function...
Martin von Zweigbergk -
r32396:0ec4cd6f default
parent child Browse files
Show More
@@ -1,814 +1,816
1 1 # match.py - filename matching
2 2 #
3 3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import copy
11 11 import os
12 12 import re
13 13
14 14 from .i18n import _
15 15 from . import (
16 16 error,
17 17 pathutil,
18 18 util,
19 19 )
20 20
21 21 propertycache = util.propertycache
22 22
23 23 def _rematcher(regex):
24 24 '''compile the regexp with the best available regexp engine and return a
25 25 matcher function'''
26 26 m = util.re.compile(regex)
27 27 try:
28 28 # slightly faster, provided by facebook's re2 bindings
29 29 return m.test_match
30 30 except AttributeError:
31 31 return m.match
32 32
33 33 def _expandsets(kindpats, ctx, listsubrepos):
34 34 '''Returns the kindpats list with the 'set' patterns expanded.'''
35 35 fset = set()
36 36 other = []
37 37
38 38 for kind, pat, source in kindpats:
39 39 if kind == 'set':
40 40 if not ctx:
41 41 raise error.Abort(_("fileset expression with no context"))
42 42 s = ctx.getfileset(pat)
43 43 fset.update(s)
44 44
45 45 if listsubrepos:
46 46 for subpath in ctx.substate:
47 47 s = ctx.sub(subpath).getfileset(pat)
48 48 fset.update(subpath + '/' + f for f in s)
49 49
50 50 continue
51 51 other.append((kind, pat, source))
52 52 return fset, other
53 53
54 54 def _expandsubinclude(kindpats, root):
55 55 '''Returns the list of subinclude matcher args and the kindpats without the
56 56 subincludes in it.'''
57 57 relmatchers = []
58 58 other = []
59 59
60 60 for kind, pat, source in kindpats:
61 61 if kind == 'subinclude':
62 62 sourceroot = pathutil.dirname(util.normpath(source))
63 63 pat = util.pconvert(pat)
64 64 path = pathutil.join(sourceroot, pat)
65 65
66 66 newroot = pathutil.dirname(path)
67 67 matcherargs = (newroot, '', [], ['include:%s' % path])
68 68
69 69 prefix = pathutil.canonpath(root, root, newroot)
70 70 if prefix:
71 71 prefix += '/'
72 72 relmatchers.append((prefix, matcherargs))
73 73 else:
74 74 other.append((kind, pat, source))
75 75
76 76 return relmatchers, other
77 77
78 78 def _kindpatsalwaysmatch(kindpats):
79 79 """"Checks whether the kindspats match everything, as e.g.
80 80 'relpath:.' does.
81 81 """
82 82 for kind, pat, source in kindpats:
83 83 if pat != '' or kind not in ['relpath', 'glob']:
84 84 return False
85 85 return True
86 86
87 87 def match(root, cwd, patterns, include=None, exclude=None, default='glob',
88 88 exact=False, auditor=None, ctx=None, listsubrepos=False, warn=None,
89 89 badfn=None):
90 90 """build an object to match a set of file patterns
91 91
92 92 arguments:
93 93 root - the canonical root of the tree you're matching against
94 94 cwd - the current working directory, if relevant
95 95 patterns - patterns to find
96 96 include - patterns to include (unless they are excluded)
97 97 exclude - patterns to exclude (even if they are included)
98 98 default - if a pattern in patterns has no explicit type, assume this one
99 99 exact - patterns are actually filenames (include/exclude still apply)
100 100 warn - optional function used for printing warnings
101 101 badfn - optional bad() callback for this matcher instead of the default
102 102
103 103 a pattern is one of:
104 104 'glob:<glob>' - a glob relative to cwd
105 105 're:<regexp>' - a regular expression
106 106 'path:<path>' - a path relative to repository root, which is matched
107 107 recursively
108 108 'rootfilesin:<path>' - a path relative to repository root, which is
109 109 matched non-recursively (will not match subdirectories)
110 110 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
111 111 'relpath:<path>' - a path relative to cwd
112 112 'relre:<regexp>' - a regexp that needn't match the start of a name
113 113 'set:<fileset>' - a fileset expression
114 114 'include:<path>' - a file of patterns to read and include
115 115 'subinclude:<path>' - a file of patterns to match against files under
116 116 the same directory
117 117 '<something>' - a pattern of the specified default type
118 118 """
119 119 return matcher(root, cwd, patterns, include=include, exclude=exclude,
120 120 default=default, exact=exact, auditor=auditor, ctx=ctx,
121 121 listsubrepos=listsubrepos, warn=warn, badfn=badfn)
122 122
123 123 def icasefsmatch(root, cwd, patterns, include=None, exclude=None,
124 124 default='glob', auditor=None, ctx=None,
125 125 listsubrepos=False, badfn=None):
126 126 """A matcher for wdir on case insensitive filesystems, which normalizes the
127 127 given patterns to the case in the filesystem.
128 128 """
129 129 return icasefsmatcher(root, cwd, patterns, include=include, exclude=exclude,
130 130 default=default, auditor=auditor, ctx=ctx,
131 131 listsubrepos=listsubrepos, badfn=badfn)
132 132
133 133 def exact(root, cwd, files, badfn=None):
134 134 return match(root, cwd, files, exact=True, badfn=badfn)
135 135
136 136 def always(root, cwd):
137 137 return match(root, cwd, [])
138 138
139 139 def badmatch(match, badfn):
140 140 """Make a copy of the given matcher, replacing its bad method with the given
141 141 one.
142 142 """
143 143 m = copy.copy(match)
144 144 m.bad = badfn
145 145 return m
146 146
147 def _donormalize(patterns, default, root, cwd, auditor, warn):
148 '''Convert 'kind:pat' from the patterns list to tuples with kind and
149 normalized and rooted patterns and with listfiles expanded.'''
150 kindpats = []
151 for kind, pat in [_patsplit(p, default) for p in patterns]:
152 if kind in ('glob', 'relpath'):
153 pat = pathutil.canonpath(root, cwd, pat, auditor)
154 elif kind in ('relglob', 'path', 'rootfilesin'):
155 pat = util.normpath(pat)
156 elif kind in ('listfile', 'listfile0'):
157 try:
158 files = util.readfile(pat)
159 if kind == 'listfile0':
160 files = files.split('\0')
161 else:
162 files = files.splitlines()
163 files = [f for f in files if f]
164 except EnvironmentError:
165 raise error.Abort(_("unable to read file list (%s)") % pat)
166 for k, p, source in _donormalize(files, default, root, cwd,
167 auditor, warn):
168 kindpats.append((k, p, pat))
169 continue
170 elif kind == 'include':
171 try:
172 fullpath = os.path.join(root, util.localpath(pat))
173 includepats = readpatternfile(fullpath, warn)
174 for k, p, source in _donormalize(includepats, default,
175 root, cwd, auditor, warn):
176 kindpats.append((k, p, source or pat))
177 except error.Abort as inst:
178 raise error.Abort('%s: %s' % (pat, inst[0]))
179 except IOError as inst:
180 if warn:
181 warn(_("skipping unreadable pattern file '%s': %s\n") %
182 (pat, inst.strerror))
183 continue
184 # else: re or relre - which cannot be normalized
185 kindpats.append((kind, pat, ''))
186 return kindpats
187
147 188 class matcher(object):
148 189
149 190 def __init__(self, root, cwd, patterns, include=None, exclude=None,
150 191 default='glob', exact=False, auditor=None, ctx=None,
151 192 listsubrepos=False, warn=None, badfn=None):
152 193 if include is None:
153 194 include = []
154 195 if exclude is None:
155 196 exclude = []
156 197
157 198 self._root = root
158 199 self._cwd = cwd
159 200 self._files = [] # exact files and roots of patterns
160 201 self._anypats = bool(include or exclude)
161 202 self._always = False
162 203 self._pathrestricted = bool(include or exclude or patterns)
163 204
164 205 # roots are directories which are recursively included/excluded.
165 206 self._includeroots = set()
166 207 self._excluderoots = set()
167 208 # dirs are directories which are non-recursively included.
168 209 self._includedirs = set()
169 210
170 211 if badfn is not None:
171 212 self.bad = badfn
172 213
173 214 matchfns = []
174 215 if include:
175 216 kindpats = self._normalize(include, 'glob', root, cwd, auditor,
176 217 warn)
177 218 self.includepat, im = _buildmatch(ctx, kindpats, '(?:/|$)',
178 219 listsubrepos, root)
179 220 roots, dirs = _rootsanddirs(kindpats)
180 221 self._includeroots.update(roots)
181 222 self._includedirs.update(dirs)
182 223 matchfns.append(im)
183 224 if exclude:
184 225 kindpats = self._normalize(exclude, 'glob', root, cwd, auditor,
185 226 warn)
186 227 self.excludepat, em = _buildmatch(ctx, kindpats, '(?:/|$)',
187 228 listsubrepos, root)
188 229 if not _anypats(kindpats):
189 230 # Only consider recursive excludes as such - if a non-recursive
190 231 # exclude is used, we must still recurse into the excluded
191 232 # directory, at least to find subdirectories. In such a case,
192 233 # the regex still won't match the non-recursively-excluded
193 234 # files.
194 235 self._excluderoots.update(_roots(kindpats))
195 236 matchfns.append(lambda f: not em(f))
196 237 if exact:
197 238 if isinstance(patterns, list):
198 239 self._files = patterns
199 240 else:
200 241 self._files = list(patterns)
201 242 matchfns.append(self.exact)
202 243 elif patterns:
203 244 kindpats = self._normalize(patterns, default, root, cwd, auditor,
204 245 warn)
205 246 if not _kindpatsalwaysmatch(kindpats):
206 247 self._files = _explicitfiles(kindpats)
207 248 self._anypats = self._anypats or _anypats(kindpats)
208 249 self.patternspat, pm = _buildmatch(ctx, kindpats, '$',
209 250 listsubrepos, root)
210 251 matchfns.append(pm)
211 252
212 253 if not matchfns:
213 254 m = util.always
214 255 self._always = True
215 256 elif len(matchfns) == 1:
216 257 m = matchfns[0]
217 258 else:
218 259 def m(f):
219 260 for matchfn in matchfns:
220 261 if not matchfn(f):
221 262 return False
222 263 return True
223 264
224 265 self.matchfn = m
225 266
226 267 def __call__(self, fn):
227 268 return self.matchfn(fn)
228 269 def __iter__(self):
229 270 for f in self._files:
230 271 yield f
231 272
232 273 # Callbacks related to how the matcher is used by dirstate.walk.
233 274 # Subscribers to these events must monkeypatch the matcher object.
234 275 def bad(self, f, msg):
235 276 '''Callback from dirstate.walk for each explicit file that can't be
236 277 found/accessed, with an error message.'''
237 278 pass
238 279
239 280 # If an explicitdir is set, it will be called when an explicitly listed
240 281 # directory is visited.
241 282 explicitdir = None
242 283
243 284 # If an traversedir is set, it will be called when a directory discovered
244 285 # by recursive traversal is visited.
245 286 traversedir = None
246 287
247 288 def abs(self, f):
248 289 '''Convert a repo path back to path that is relative to the root of the
249 290 matcher.'''
250 291 return f
251 292
252 293 def rel(self, f):
253 294 '''Convert repo path back to path that is relative to cwd of matcher.'''
254 295 return util.pathto(self._root, self._cwd, f)
255 296
256 297 def uipath(self, f):
257 298 '''Convert repo path to a display path. If patterns or -I/-X were used
258 299 to create this matcher, the display path will be relative to cwd.
259 300 Otherwise it is relative to the root of the repo.'''
260 301 return (self._pathrestricted and self.rel(f)) or self.abs(f)
261 302
262 303 def files(self):
263 304 '''Explicitly listed files or patterns or roots:
264 305 if no patterns or .always(): empty list,
265 306 if exact: list exact files,
266 307 if not .anypats(): list all files and dirs,
267 308 else: optimal roots'''
268 309 return self._files
269 310
270 311 @propertycache
271 312 def _fileset(self):
272 313 return set(self._files)
273 314
274 315 @propertycache
275 316 def _dirs(self):
276 317 return set(util.dirs(self._fileset)) | {'.'}
277 318
278 319 def visitdir(self, dir):
279 320 '''Decides whether a directory should be visited based on whether it
280 321 has potential matches in it or one of its subdirectories. This is
281 322 based on the match's primary, included, and excluded patterns.
282 323
283 324 Returns the string 'all' if the given directory and all subdirectories
284 325 should be visited. Otherwise returns True or False indicating whether
285 326 the given directory should be visited.
286 327
287 328 This function's behavior is undefined if it has returned False for
288 329 one of the dir's parent directories.
289 330 '''
290 331 if self.prefix() and dir in self._fileset:
291 332 return 'all'
292 333 if dir in self._excluderoots:
293 334 return False
294 335 if ((self._includeroots or self._includedirs) and
295 336 '.' not in self._includeroots and
296 337 dir not in self._includeroots and
297 338 dir not in self._includedirs and
298 339 not any(parent in self._includeroots
299 340 for parent in util.finddirs(dir))):
300 341 return False
301 342 return (not self._fileset or
302 343 '.' in self._fileset or
303 344 dir in self._fileset or
304 345 dir in self._dirs or
305 346 any(parentdir in self._fileset
306 347 for parentdir in util.finddirs(dir)))
307 348
308 349 def exact(self, f):
309 350 '''Returns True if f is in .files().'''
310 351 return f in self._fileset
311 352
312 353 def anypats(self):
313 354 '''Matcher uses patterns or include/exclude.'''
314 355 return self._anypats
315 356
316 357 def always(self):
317 358 '''Matcher will match everything and .files() will be empty
318 359 - optimization might be possible and necessary.'''
319 360 return self._always
320 361
321 362 def isexact(self):
322 363 return self.matchfn == self.exact
323 364
324 365 def prefix(self):
325 366 return not self.always() and not self.isexact() and not self.anypats()
326 367
327 368 def _normalize(self, patterns, default, root, cwd, auditor, warn):
328 '''Convert 'kind:pat' from the patterns list to tuples with kind and
329 normalized and rooted patterns and with listfiles expanded.'''
330 kindpats = []
331 for kind, pat in [_patsplit(p, default) for p in patterns]:
332 if kind in ('glob', 'relpath'):
333 pat = pathutil.canonpath(root, cwd, pat, auditor)
334 elif kind in ('relglob', 'path', 'rootfilesin'):
335 pat = util.normpath(pat)
336 elif kind in ('listfile', 'listfile0'):
337 try:
338 files = util.readfile(pat)
339 if kind == 'listfile0':
340 files = files.split('\0')
341 else:
342 files = files.splitlines()
343 files = [f for f in files if f]
344 except EnvironmentError:
345 raise error.Abort(_("unable to read file list (%s)") % pat)
346 for k, p, source in self._normalize(files, default, root, cwd,
347 auditor, warn):
348 kindpats.append((k, p, pat))
349 continue
350 elif kind == 'include':
351 try:
352 fullpath = os.path.join(root, util.localpath(pat))
353 includepats = readpatternfile(fullpath, warn)
354 for k, p, source in self._normalize(includepats, default,
355 root, cwd, auditor,
356 warn):
357 kindpats.append((k, p, source or pat))
358 except error.Abort as inst:
359 raise error.Abort('%s: %s' % (pat, inst[0]))
360 except IOError as inst:
361 if warn:
362 warn(_("skipping unreadable pattern file '%s': %s\n") %
363 (pat, inst.strerror))
364 continue
365 # else: re or relre - which cannot be normalized
366 kindpats.append((kind, pat, ''))
367 return kindpats
369 return _donormalize(patterns, default, root, cwd, auditor, warn)
368 370
369 371 class subdirmatcher(matcher):
370 372 """Adapt a matcher to work on a subdirectory only.
371 373
372 374 The paths are remapped to remove/insert the path as needed:
373 375
374 376 >>> m1 = match('root', '', ['a.txt', 'sub/b.txt'])
375 377 >>> m2 = subdirmatcher('sub', m1)
376 378 >>> bool(m2('a.txt'))
377 379 False
378 380 >>> bool(m2('b.txt'))
379 381 True
380 382 >>> bool(m2.matchfn('a.txt'))
381 383 False
382 384 >>> bool(m2.matchfn('b.txt'))
383 385 True
384 386 >>> m2.files()
385 387 ['b.txt']
386 388 >>> m2.exact('b.txt')
387 389 True
388 390 >>> util.pconvert(m2.rel('b.txt'))
389 391 'sub/b.txt'
390 392 >>> def bad(f, msg):
391 393 ... print "%s: %s" % (f, msg)
392 394 >>> m1.bad = bad
393 395 >>> m2.bad('x.txt', 'No such file')
394 396 sub/x.txt: No such file
395 397 >>> m2.abs('c.txt')
396 398 'sub/c.txt'
397 399 """
398 400
399 401 def __init__(self, path, matcher):
400 402 self._root = matcher._root
401 403 self._cwd = matcher._cwd
402 404 self._path = path
403 405 self._matcher = matcher
404 406 self._always = matcher._always
405 407
406 408 self._files = [f[len(path) + 1:] for f in matcher._files
407 409 if f.startswith(path + "/")]
408 410
409 411 # If the parent repo had a path to this subrepo and the matcher is
410 412 # a prefix matcher, this submatcher always matches.
411 413 if matcher.prefix():
412 414 self._always = any(f == path for f in matcher._files)
413 415
414 416 self._anypats = matcher._anypats
415 417 # Some information is lost in the superclass's constructor, so we
416 418 # can not accurately create the matching function for the subdirectory
417 419 # from the inputs. Instead, we override matchfn() and visitdir() to
418 420 # call the original matcher with the subdirectory path prepended.
419 421 self.matchfn = lambda fn: matcher.matchfn(self._path + "/" + fn)
420 422
421 423 def bad(self, f, msg):
422 424 self._matcher.bad(self._path + "/" + f, msg)
423 425
424 426 def abs(self, f):
425 427 return self._matcher.abs(self._path + "/" + f)
426 428
427 429 def rel(self, f):
428 430 return self._matcher.rel(self._path + "/" + f)
429 431
430 432 def uipath(self, f):
431 433 return self._matcher.uipath(self._path + "/" + f)
432 434
433 435 def visitdir(self, dir):
434 436 if dir == '.':
435 437 dir = self._path
436 438 else:
437 439 dir = self._path + "/" + dir
438 440 return self._matcher.visitdir(dir)
439 441
440 442 class icasefsmatcher(matcher):
441 443
442 444 def __init__(self, root, cwd, patterns, include, exclude, default, auditor,
443 445 ctx, listsubrepos=False, badfn=None):
444 446 init = super(icasefsmatcher, self).__init__
445 447 self._dirstate = ctx.repo().dirstate
446 448 self._dsnormalize = self._dirstate.normalize
447 449
448 450 init(root, cwd, patterns, include, exclude, default, auditor=auditor,
449 451 ctx=ctx, listsubrepos=listsubrepos, badfn=badfn)
450 452
451 453 # m.exact(file) must be based off of the actual user input, otherwise
452 454 # inexact case matches are treated as exact, and not noted without -v.
453 455 if self._files:
454 456 roots, dirs = _rootsanddirs(self._kp)
455 457 self._fileset = set(roots)
456 458 self._fileset.update(dirs)
457 459
458 460 def _normalize(self, patterns, default, root, cwd, auditor, warn):
459 461 self._kp = super(icasefsmatcher, self)._normalize(patterns, default,
460 462 root, cwd, auditor,
461 463 warn)
462 464 kindpats = []
463 465 for kind, pats, source in self._kp:
464 466 if kind not in ('re', 'relre'): # regex can't be normalized
465 467 p = pats
466 468 pats = self._dsnormalize(pats)
467 469
468 470 # Preserve the original to handle a case only rename.
469 471 if p != pats and p in self._dirstate:
470 472 kindpats.append((kind, p, source))
471 473
472 474 kindpats.append((kind, pats, source))
473 475 return kindpats
474 476
475 477 def patkind(pattern, default=None):
476 478 '''If pattern is 'kind:pat' with a known kind, return kind.'''
477 479 return _patsplit(pattern, default)[0]
478 480
479 481 def _patsplit(pattern, default):
480 482 """Split a string into the optional pattern kind prefix and the actual
481 483 pattern."""
482 484 if ':' in pattern:
483 485 kind, pat = pattern.split(':', 1)
484 486 if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
485 487 'listfile', 'listfile0', 'set', 'include', 'subinclude',
486 488 'rootfilesin'):
487 489 return kind, pat
488 490 return default, pattern
489 491
490 492 def _globre(pat):
491 493 r'''Convert an extended glob string to a regexp string.
492 494
493 495 >>> print _globre(r'?')
494 496 .
495 497 >>> print _globre(r'*')
496 498 [^/]*
497 499 >>> print _globre(r'**')
498 500 .*
499 501 >>> print _globre(r'**/a')
500 502 (?:.*/)?a
501 503 >>> print _globre(r'a/**/b')
502 504 a\/(?:.*/)?b
503 505 >>> print _globre(r'[a*?!^][^b][!c]')
504 506 [a*?!^][\^b][^c]
505 507 >>> print _globre(r'{a,b}')
506 508 (?:a|b)
507 509 >>> print _globre(r'.\*\?')
508 510 \.\*\?
509 511 '''
510 512 i, n = 0, len(pat)
511 513 res = ''
512 514 group = 0
513 515 escape = util.re.escape
514 516 def peek():
515 517 return i < n and pat[i:i + 1]
516 518 while i < n:
517 519 c = pat[i:i + 1]
518 520 i += 1
519 521 if c not in '*?[{},\\':
520 522 res += escape(c)
521 523 elif c == '*':
522 524 if peek() == '*':
523 525 i += 1
524 526 if peek() == '/':
525 527 i += 1
526 528 res += '(?:.*/)?'
527 529 else:
528 530 res += '.*'
529 531 else:
530 532 res += '[^/]*'
531 533 elif c == '?':
532 534 res += '.'
533 535 elif c == '[':
534 536 j = i
535 537 if j < n and pat[j:j + 1] in '!]':
536 538 j += 1
537 539 while j < n and pat[j:j + 1] != ']':
538 540 j += 1
539 541 if j >= n:
540 542 res += '\\['
541 543 else:
542 544 stuff = pat[i:j].replace('\\','\\\\')
543 545 i = j + 1
544 546 if stuff[0:1] == '!':
545 547 stuff = '^' + stuff[1:]
546 548 elif stuff[0:1] == '^':
547 549 stuff = '\\' + stuff
548 550 res = '%s[%s]' % (res, stuff)
549 551 elif c == '{':
550 552 group += 1
551 553 res += '(?:'
552 554 elif c == '}' and group:
553 555 res += ')'
554 556 group -= 1
555 557 elif c == ',' and group:
556 558 res += '|'
557 559 elif c == '\\':
558 560 p = peek()
559 561 if p:
560 562 i += 1
561 563 res += escape(p)
562 564 else:
563 565 res += escape(c)
564 566 else:
565 567 res += escape(c)
566 568 return res
567 569
568 570 def _regex(kind, pat, globsuffix):
569 571 '''Convert a (normalized) pattern of any kind into a regular expression.
570 572 globsuffix is appended to the regexp of globs.'''
571 573 if not pat:
572 574 return ''
573 575 if kind == 're':
574 576 return pat
575 577 if kind == 'path':
576 578 if pat == '.':
577 579 return ''
578 580 return '^' + util.re.escape(pat) + '(?:/|$)'
579 581 if kind == 'rootfilesin':
580 582 if pat == '.':
581 583 escaped = ''
582 584 else:
583 585 # Pattern is a directory name.
584 586 escaped = util.re.escape(pat) + '/'
585 587 # Anything after the pattern must be a non-directory.
586 588 return '^' + escaped + '[^/]+$'
587 589 if kind == 'relglob':
588 590 return '(?:|.*/)' + _globre(pat) + globsuffix
589 591 if kind == 'relpath':
590 592 return util.re.escape(pat) + '(?:/|$)'
591 593 if kind == 'relre':
592 594 if pat.startswith('^'):
593 595 return pat
594 596 return '.*' + pat
595 597 return _globre(pat) + globsuffix
596 598
597 599 def _buildmatch(ctx, kindpats, globsuffix, listsubrepos, root):
598 600 '''Return regexp string and a matcher function for kindpats.
599 601 globsuffix is appended to the regexp of globs.'''
600 602 matchfuncs = []
601 603
602 604 subincludes, kindpats = _expandsubinclude(kindpats, root)
603 605 if subincludes:
604 606 submatchers = {}
605 607 def matchsubinclude(f):
606 608 for prefix, matcherargs in subincludes:
607 609 if f.startswith(prefix):
608 610 mf = submatchers.get(prefix)
609 611 if mf is None:
610 612 mf = match(*matcherargs)
611 613 submatchers[prefix] = mf
612 614
613 615 if mf(f[len(prefix):]):
614 616 return True
615 617 return False
616 618 matchfuncs.append(matchsubinclude)
617 619
618 620 fset, kindpats = _expandsets(kindpats, ctx, listsubrepos)
619 621 if fset:
620 622 matchfuncs.append(fset.__contains__)
621 623
622 624 regex = ''
623 625 if kindpats:
624 626 regex, mf = _buildregexmatch(kindpats, globsuffix)
625 627 matchfuncs.append(mf)
626 628
627 629 if len(matchfuncs) == 1:
628 630 return regex, matchfuncs[0]
629 631 else:
630 632 return regex, lambda f: any(mf(f) for mf in matchfuncs)
631 633
632 634 def _buildregexmatch(kindpats, globsuffix):
633 635 """Build a match function from a list of kinds and kindpats,
634 636 return regexp string and a matcher function."""
635 637 try:
636 638 regex = '(?:%s)' % '|'.join([_regex(k, p, globsuffix)
637 639 for (k, p, s) in kindpats])
638 640 if len(regex) > 20000:
639 641 raise OverflowError
640 642 return regex, _rematcher(regex)
641 643 except OverflowError:
642 644 # We're using a Python with a tiny regex engine and we
643 645 # made it explode, so we'll divide the pattern list in two
644 646 # until it works
645 647 l = len(kindpats)
646 648 if l < 2:
647 649 raise
648 650 regexa, a = _buildregexmatch(kindpats[:l//2], globsuffix)
649 651 regexb, b = _buildregexmatch(kindpats[l//2:], globsuffix)
650 652 return regex, lambda s: a(s) or b(s)
651 653 except re.error:
652 654 for k, p, s in kindpats:
653 655 try:
654 656 _rematcher('(?:%s)' % _regex(k, p, globsuffix))
655 657 except re.error:
656 658 if s:
657 659 raise error.Abort(_("%s: invalid pattern (%s): %s") %
658 660 (s, k, p))
659 661 else:
660 662 raise error.Abort(_("invalid pattern (%s): %s") % (k, p))
661 663 raise error.Abort(_("invalid pattern"))
662 664
663 665 def _patternrootsanddirs(kindpats):
664 666 '''Returns roots and directories corresponding to each pattern.
665 667
666 668 This calculates the roots and directories exactly matching the patterns and
667 669 returns a tuple of (roots, dirs) for each. It does not return other
668 670 directories which may also need to be considered, like the parent
669 671 directories.
670 672 '''
671 673 r = []
672 674 d = []
673 675 for kind, pat, source in kindpats:
674 676 if kind == 'glob': # find the non-glob prefix
675 677 root = []
676 678 for p in pat.split('/'):
677 679 if '[' in p or '{' in p or '*' in p or '?' in p:
678 680 break
679 681 root.append(p)
680 682 r.append('/'.join(root) or '.')
681 683 elif kind in ('relpath', 'path'):
682 684 r.append(pat or '.')
683 685 elif kind in ('rootfilesin',):
684 686 d.append(pat or '.')
685 687 else: # relglob, re, relre
686 688 r.append('.')
687 689 return r, d
688 690
689 691 def _roots(kindpats):
690 692 '''Returns root directories to match recursively from the given patterns.'''
691 693 roots, dirs = _patternrootsanddirs(kindpats)
692 694 return roots
693 695
694 696 def _rootsanddirs(kindpats):
695 697 '''Returns roots and exact directories from patterns.
696 698
697 699 roots are directories to match recursively, whereas exact directories should
698 700 be matched non-recursively. The returned (roots, dirs) tuple will also
699 701 include directories that need to be implicitly considered as either, such as
700 702 parent directories.
701 703
702 704 >>> _rootsanddirs(\
703 705 [('glob', 'g/h/*', ''), ('glob', 'g/h', ''), ('glob', 'g*', '')])
704 706 (['g/h', 'g/h', '.'], ['g', '.'])
705 707 >>> _rootsanddirs(\
706 708 [('rootfilesin', 'g/h', ''), ('rootfilesin', '', '')])
707 709 ([], ['g/h', '.', 'g', '.'])
708 710 >>> _rootsanddirs(\
709 711 [('relpath', 'r', ''), ('path', 'p/p', ''), ('path', '', '')])
710 712 (['r', 'p/p', '.'], ['p', '.'])
711 713 >>> _rootsanddirs(\
712 714 [('relglob', 'rg*', ''), ('re', 're/', ''), ('relre', 'rr', '')])
713 715 (['.', '.', '.'], ['.'])
714 716 '''
715 717 r, d = _patternrootsanddirs(kindpats)
716 718
717 719 # Append the parents as non-recursive/exact directories, since they must be
718 720 # scanned to get to either the roots or the other exact directories.
719 721 d.extend(util.dirs(d))
720 722 d.extend(util.dirs(r))
721 723 # util.dirs() does not include the root directory, so add it manually
722 724 d.append('.')
723 725
724 726 return r, d
725 727
726 728 def _explicitfiles(kindpats):
727 729 '''Returns the potential explicit filenames from the patterns.
728 730
729 731 >>> _explicitfiles([('path', 'foo/bar', '')])
730 732 ['foo/bar']
731 733 >>> _explicitfiles([('rootfilesin', 'foo/bar', '')])
732 734 []
733 735 '''
734 736 # Keep only the pattern kinds where one can specify filenames (vs only
735 737 # directory names).
736 738 filable = [kp for kp in kindpats if kp[0] not in ('rootfilesin',)]
737 739 return _roots(filable)
738 740
739 741 def _anypats(kindpats):
740 742 for kind, pat, source in kindpats:
741 743 if kind in ('glob', 're', 'relglob', 'relre', 'set', 'rootfilesin'):
742 744 return True
743 745
744 746 _commentre = None
745 747
746 748 def readpatternfile(filepath, warn, sourceinfo=False):
747 749 '''parse a pattern file, returning a list of
748 750 patterns. These patterns should be given to compile()
749 751 to be validated and converted into a match function.
750 752
751 753 trailing white space is dropped.
752 754 the escape character is backslash.
753 755 comments start with #.
754 756 empty lines are skipped.
755 757
756 758 lines can be of the following formats:
757 759
758 760 syntax: regexp # defaults following lines to non-rooted regexps
759 761 syntax: glob # defaults following lines to non-rooted globs
760 762 re:pattern # non-rooted regular expression
761 763 glob:pattern # non-rooted glob
762 764 pattern # pattern of the current default type
763 765
764 766 if sourceinfo is set, returns a list of tuples:
765 767 (pattern, lineno, originalline). This is useful to debug ignore patterns.
766 768 '''
767 769
768 770 syntaxes = {'re': 'relre:', 'regexp': 'relre:', 'glob': 'relglob:',
769 771 'include': 'include', 'subinclude': 'subinclude'}
770 772 syntax = 'relre:'
771 773 patterns = []
772 774
773 775 fp = open(filepath, 'rb')
774 776 for lineno, line in enumerate(util.iterfile(fp), start=1):
775 777 if "#" in line:
776 778 global _commentre
777 779 if not _commentre:
778 780 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
779 781 # remove comments prefixed by an even number of escapes
780 782 m = _commentre.search(line)
781 783 if m:
782 784 line = line[:m.end(1)]
783 785 # fixup properly escaped comments that survived the above
784 786 line = line.replace("\\#", "#")
785 787 line = line.rstrip()
786 788 if not line:
787 789 continue
788 790
789 791 if line.startswith('syntax:'):
790 792 s = line[7:].strip()
791 793 try:
792 794 syntax = syntaxes[s]
793 795 except KeyError:
794 796 if warn:
795 797 warn(_("%s: ignoring invalid syntax '%s'\n") %
796 798 (filepath, s))
797 799 continue
798 800
799 801 linesyntax = syntax
800 802 for s, rels in syntaxes.iteritems():
801 803 if line.startswith(rels):
802 804 linesyntax = rels
803 805 line = line[len(rels):]
804 806 break
805 807 elif line.startswith(s+':'):
806 808 linesyntax = rels
807 809 line = line[len(s) + 1:]
808 810 break
809 811 if sourceinfo:
810 812 patterns.append((linesyntax + line, lineno, line))
811 813 else:
812 814 patterns.append(linesyntax + line)
813 815 fp.close()
814 816 return patterns
General Comments 0
You need to be logged in to leave comments. Login now