##// END OF EJS Templates
match: move entire uipath() implementation to basematcher...
Martin von Zweigbergk -
r32496:ca77a243 default
parent child Browse files
Show More
@@ -1,877 +1,875
1 1 # match.py - filename matching
2 2 #
3 3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import copy
11 11 import os
12 12 import re
13 13
14 14 from .i18n import _
15 15 from . import (
16 16 error,
17 17 pathutil,
18 18 util,
19 19 )
20 20
21 21 propertycache = util.propertycache
22 22
23 23 def _rematcher(regex):
24 24 '''compile the regexp with the best available regexp engine and return a
25 25 matcher function'''
26 26 m = util.re.compile(regex)
27 27 try:
28 28 # slightly faster, provided by facebook's re2 bindings
29 29 return m.test_match
30 30 except AttributeError:
31 31 return m.match
32 32
33 33 def _expandsets(kindpats, ctx, listsubrepos):
34 34 '''Returns the kindpats list with the 'set' patterns expanded.'''
35 35 fset = set()
36 36 other = []
37 37
38 38 for kind, pat, source in kindpats:
39 39 if kind == 'set':
40 40 if not ctx:
41 41 raise error.ProgrammingError("fileset expression with no "
42 42 "context")
43 43 s = ctx.getfileset(pat)
44 44 fset.update(s)
45 45
46 46 if listsubrepos:
47 47 for subpath in ctx.substate:
48 48 s = ctx.sub(subpath).getfileset(pat)
49 49 fset.update(subpath + '/' + f for f in s)
50 50
51 51 continue
52 52 other.append((kind, pat, source))
53 53 return fset, other
54 54
55 55 def _expandsubinclude(kindpats, root):
56 56 '''Returns the list of subinclude matcher args and the kindpats without the
57 57 subincludes in it.'''
58 58 relmatchers = []
59 59 other = []
60 60
61 61 for kind, pat, source in kindpats:
62 62 if kind == 'subinclude':
63 63 sourceroot = pathutil.dirname(util.normpath(source))
64 64 pat = util.pconvert(pat)
65 65 path = pathutil.join(sourceroot, pat)
66 66
67 67 newroot = pathutil.dirname(path)
68 68 matcherargs = (newroot, '', [], ['include:%s' % path])
69 69
70 70 prefix = pathutil.canonpath(root, root, newroot)
71 71 if prefix:
72 72 prefix += '/'
73 73 relmatchers.append((prefix, matcherargs))
74 74 else:
75 75 other.append((kind, pat, source))
76 76
77 77 return relmatchers, other
78 78
79 79 def _kindpatsalwaysmatch(kindpats):
80 80 """"Checks whether the kindspats match everything, as e.g.
81 81 'relpath:.' does.
82 82 """
83 83 for kind, pat, source in kindpats:
84 84 if pat != '' or kind not in ['relpath', 'glob']:
85 85 return False
86 86 return True
87 87
88 88 def match(root, cwd, patterns, include=None, exclude=None, default='glob',
89 89 exact=False, auditor=None, ctx=None, listsubrepos=False, warn=None,
90 90 badfn=None, icasefs=False):
91 91 """build an object to match a set of file patterns
92 92
93 93 arguments:
94 94 root - the canonical root of the tree you're matching against
95 95 cwd - the current working directory, if relevant
96 96 patterns - patterns to find
97 97 include - patterns to include (unless they are excluded)
98 98 exclude - patterns to exclude (even if they are included)
99 99 default - if a pattern in patterns has no explicit type, assume this one
100 100 exact - patterns are actually filenames (include/exclude still apply)
101 101 warn - optional function used for printing warnings
102 102 badfn - optional bad() callback for this matcher instead of the default
103 103 icasefs - make a matcher for wdir on case insensitive filesystems, which
104 104 normalizes the given patterns to the case in the filesystem
105 105
106 106 a pattern is one of:
107 107 'glob:<glob>' - a glob relative to cwd
108 108 're:<regexp>' - a regular expression
109 109 'path:<path>' - a path relative to repository root, which is matched
110 110 recursively
111 111 'rootfilesin:<path>' - a path relative to repository root, which is
112 112 matched non-recursively (will not match subdirectories)
113 113 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
114 114 'relpath:<path>' - a path relative to cwd
115 115 'relre:<regexp>' - a regexp that needn't match the start of a name
116 116 'set:<fileset>' - a fileset expression
117 117 'include:<path>' - a file of patterns to read and include
118 118 'subinclude:<path>' - a file of patterns to match against files under
119 119 the same directory
120 120 '<something>' - a pattern of the specified default type
121 121 """
122 122 normalize = _donormalize
123 123 if icasefs:
124 124 if exact:
125 125 raise error.ProgrammingError("a case-insensitive exact matcher "
126 126 "doesn't make sense")
127 127 dirstate = ctx.repo().dirstate
128 128 dsnormalize = dirstate.normalize
129 129
130 130 def normalize(patterns, default, root, cwd, auditor, warn):
131 131 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
132 132 kindpats = []
133 133 for kind, pats, source in kp:
134 134 if kind not in ('re', 'relre'): # regex can't be normalized
135 135 p = pats
136 136 pats = dsnormalize(pats)
137 137
138 138 # Preserve the original to handle a case only rename.
139 139 if p != pats and p in dirstate:
140 140 kindpats.append((kind, p, source))
141 141
142 142 kindpats.append((kind, pats, source))
143 143 return kindpats
144 144
145 145 m = matcher(root, cwd, normalize, patterns, include=include,
146 146 default=default, exact=exact, auditor=auditor, ctx=ctx,
147 147 listsubrepos=listsubrepos, warn=warn, badfn=badfn)
148 148 if exclude:
149 149 em = matcher(root, cwd, normalize, [], include=exclude, default=default,
150 150 exact=False, auditor=auditor, ctx=ctx,
151 151 listsubrepos=listsubrepos, warn=warn, badfn=None)
152 152 m = differencematcher(m, em)
153 153 return m
154 154
155 155 def exact(root, cwd, files, badfn=None):
156 156 return match(root, cwd, files, exact=True, badfn=badfn)
157 157
158 158 def always(root, cwd):
159 159 return match(root, cwd, [])
160 160
161 161 def badmatch(match, badfn):
162 162 """Make a copy of the given matcher, replacing its bad method with the given
163 163 one.
164 164 """
165 165 m = copy.copy(match)
166 166 m.bad = badfn
167 167 return m
168 168
169 169 def _donormalize(patterns, default, root, cwd, auditor, warn):
170 170 '''Convert 'kind:pat' from the patterns list to tuples with kind and
171 171 normalized and rooted patterns and with listfiles expanded.'''
172 172 kindpats = []
173 173 for kind, pat in [_patsplit(p, default) for p in patterns]:
174 174 if kind in ('glob', 'relpath'):
175 175 pat = pathutil.canonpath(root, cwd, pat, auditor)
176 176 elif kind in ('relglob', 'path', 'rootfilesin'):
177 177 pat = util.normpath(pat)
178 178 elif kind in ('listfile', 'listfile0'):
179 179 try:
180 180 files = util.readfile(pat)
181 181 if kind == 'listfile0':
182 182 files = files.split('\0')
183 183 else:
184 184 files = files.splitlines()
185 185 files = [f for f in files if f]
186 186 except EnvironmentError:
187 187 raise error.Abort(_("unable to read file list (%s)") % pat)
188 188 for k, p, source in _donormalize(files, default, root, cwd,
189 189 auditor, warn):
190 190 kindpats.append((k, p, pat))
191 191 continue
192 192 elif kind == 'include':
193 193 try:
194 194 fullpath = os.path.join(root, util.localpath(pat))
195 195 includepats = readpatternfile(fullpath, warn)
196 196 for k, p, source in _donormalize(includepats, default,
197 197 root, cwd, auditor, warn):
198 198 kindpats.append((k, p, source or pat))
199 199 except error.Abort as inst:
200 200 raise error.Abort('%s: %s' % (pat, inst[0]))
201 201 except IOError as inst:
202 202 if warn:
203 203 warn(_("skipping unreadable pattern file '%s': %s\n") %
204 204 (pat, inst.strerror))
205 205 continue
206 206 # else: re or relre - which cannot be normalized
207 207 kindpats.append((kind, pat, ''))
208 208 return kindpats
209 209
210 210 class basematcher(object):
211 211
212 def __init__(self, root, cwd, badfn=None):
212 def __init__(self, root, cwd, badfn=None, relativeuipath=True):
213 213 self._root = root
214 214 self._cwd = cwd
215 215 if badfn is not None:
216 216 self.bad = badfn
217 self._relativeuipath = relativeuipath
217 218
218 219 def __call__(self, fn):
219 220 return self.matchfn(fn)
220 221 def __iter__(self):
221 222 for f in self._files:
222 223 yield f
223 224 # Callbacks related to how the matcher is used by dirstate.walk.
224 225 # Subscribers to these events must monkeypatch the matcher object.
225 226 def bad(self, f, msg):
226 227 '''Callback from dirstate.walk for each explicit file that can't be
227 228 found/accessed, with an error message.'''
228 229 pass
229 230
230 231 # If an explicitdir is set, it will be called when an explicitly listed
231 232 # directory is visited.
232 233 explicitdir = None
233 234
234 235 # If an traversedir is set, it will be called when a directory discovered
235 236 # by recursive traversal is visited.
236 237 traversedir = None
237 238
238 239 def abs(self, f):
239 240 '''Convert a repo path back to path that is relative to the root of the
240 241 matcher.'''
241 242 return f
242 243
243 244 def rel(self, f):
244 245 '''Convert repo path back to path that is relative to cwd of matcher.'''
245 246 return util.pathto(self._root, self._cwd, f)
246 247
247 248 def uipath(self, f):
248 249 '''Convert repo path to a display path. If patterns or -I/-X were used
249 250 to create this matcher, the display path will be relative to cwd.
250 251 Otherwise it is relative to the root of the repo.'''
251 return self.rel(f)
252 return (self._relativeuipath and self.rel(f)) or self.abs(f)
252 253
253 254 @propertycache
254 255 def _files(self):
255 256 return []
256 257
257 258 def files(self):
258 259 '''Explicitly listed files or patterns or roots:
259 260 if no patterns or .always(): empty list,
260 261 if exact: list exact files,
261 262 if not .anypats(): list all files and dirs,
262 263 else: optimal roots'''
263 264 return self._files
264 265
265 266 @propertycache
266 267 def _fileset(self):
267 268 return set(self._files)
268 269
269 270 def exact(self, f):
270 271 '''Returns True if f is in .files().'''
271 272 return f in self._fileset
272 273
273 274 def matchfn(self, f):
274 275 return False
275 276
276 277 def visitdir(self, dir):
277 278 '''Decides whether a directory should be visited based on whether it
278 279 has potential matches in it or one of its subdirectories. This is
279 280 based on the match's primary, included, and excluded patterns.
280 281
281 282 Returns the string 'all' if the given directory and all subdirectories
282 283 should be visited. Otherwise returns True or False indicating whether
283 284 the given directory should be visited.
284 285
285 286 This function's behavior is undefined if it has returned False for
286 287 one of the dir's parent directories.
287 288 '''
288 289 return False
289 290
290 291 def anypats(self):
291 292 '''Matcher uses patterns or include/exclude.'''
292 293 return False
293 294
294 295 def always(self):
295 296 '''Matcher will match everything and .files() will be empty
296 297 - optimization might be possible and necessary.'''
297 298 return False
298 299
299 300 def isexact(self):
300 301 return False
301 302
302 303 def prefix(self):
303 304 return not self.always() and not self.isexact() and not self.anypats()
304 305
305 306 class matcher(basematcher):
306 307
307 308 def __init__(self, root, cwd, normalize, patterns, include=None,
308 309 default='glob', exact=False, auditor=None, ctx=None,
309 310 listsubrepos=False, warn=None, badfn=None):
310 super(matcher, self).__init__(root, cwd, badfn)
311 super(matcher, self).__init__(root, cwd, badfn,
312 relativeuipath=bool(include or patterns))
311 313 if include is None:
312 314 include = []
313 315
314 316 self._anypats = bool(include)
315 317 self._anyincludepats = False
316 318 self._always = False
317 self._pathrestricted = bool(include or patterns)
318 319 self.patternspat = None
319 320 self.includepat = None
320 321
321 322 # roots are directories which are recursively included.
322 323 self._includeroots = set()
323 324 # dirs are directories which are non-recursively included.
324 325 self._includedirs = set()
325 326
326 327 matchfns = []
327 328 if include:
328 329 kindpats = normalize(include, 'glob', root, cwd, auditor, warn)
329 330 self.includepat, im = _buildmatch(ctx, kindpats, '(?:/|$)',
330 331 listsubrepos, root)
331 332 self._anyincludepats = _anypats(kindpats)
332 333 roots, dirs = _rootsanddirs(kindpats)
333 334 self._includeroots.update(roots)
334 335 self._includedirs.update(dirs)
335 336 matchfns.append(im)
336 337 if exact:
337 338 if isinstance(patterns, list):
338 339 self._files = patterns
339 340 else:
340 341 self._files = list(patterns)
341 342 matchfns.append(self.exact)
342 343 elif patterns:
343 344 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
344 345 if not _kindpatsalwaysmatch(kindpats):
345 346 self._files = _explicitfiles(kindpats)
346 347 self._anypats = self._anypats or _anypats(kindpats)
347 348 self.patternspat, pm = _buildmatch(ctx, kindpats, '$',
348 349 listsubrepos, root)
349 350 matchfns.append(pm)
350 351
351 352 if not matchfns:
352 353 m = util.always
353 354 self._always = True
354 355 elif len(matchfns) == 1:
355 356 m = matchfns[0]
356 357 else:
357 358 def m(f):
358 359 for matchfn in matchfns:
359 360 if not matchfn(f):
360 361 return False
361 362 return True
362 363
363 364 self.matchfn = m
364 365
365 def uipath(self, f):
366 return (self._pathrestricted and self.rel(f)) or self.abs(f)
367
368 366 @propertycache
369 367 def _dirs(self):
370 368 return set(util.dirs(self._fileset)) | {'.'}
371 369
372 370 def visitdir(self, dir):
373 371 if self.prefix() and dir in self._fileset:
374 372 return 'all'
375 373 if self._includeroots or self._includedirs:
376 374 if (not self._anyincludepats and
377 375 dir in self._includeroots):
378 376 # The condition above is essentially self.prefix() for includes
379 377 return 'all'
380 378 if ('.' not in self._includeroots and
381 379 dir not in self._includeroots and
382 380 dir not in self._includedirs and
383 381 not any(parent in self._includeroots
384 382 for parent in util.finddirs(dir))):
385 383 return False
386 384 return (not self._fileset or
387 385 '.' in self._fileset or
388 386 dir in self._fileset or
389 387 dir in self._dirs or
390 388 any(parentdir in self._fileset
391 389 for parentdir in util.finddirs(dir)))
392 390
393 391 def anypats(self):
394 392 return self._anypats
395 393
396 394 def always(self):
397 395 return self._always
398 396
399 397 def isexact(self):
400 398 return self.matchfn == self.exact
401 399
402 400 def __repr__(self):
403 401 return ('<matcher files=%r, patterns=%r, includes=%r>' %
404 402 (self._files, self.patternspat, self.includepat))
405 403
406 404 class differencematcher(basematcher):
407 405 '''Composes two matchers by matching if the first matches and the second
408 406 does not. Well, almost... If the user provides a pattern like "-X foo foo",
409 407 Mercurial actually does match "foo" against that. That's because exact
410 408 matches are treated specially. So, since this differencematcher is used for
411 409 excludes, it needs to special-case exact matching.
412 410
413 411 The second matcher's non-matching-attributes (root, cwd, bad, explicitdir,
414 412 traversedir) are ignored.
415 413
416 414 TODO: If we want to keep the behavior described above for exact matches, we
417 415 should consider instead treating the above case something like this:
418 416 union(exact(foo), difference(pattern(foo), include(foo)))
419 417 '''
420 418 def __init__(self, m1, m2):
421 419 super(differencematcher, self).__init__(m1._root, m1._cwd)
422 420 self._m1 = m1
423 421 self._m2 = m2
424 422 self.bad = m1.bad
425 423 self.explicitdir = m1.explicitdir
426 424 self.traversedir = m1.traversedir
427 425
428 426 def matchfn(self, f):
429 427 return self._m1(f) and (not self._m2(f) or self._m1.exact(f))
430 428
431 429 @propertycache
432 430 def _files(self):
433 431 if self.isexact():
434 432 return [f for f in self._m1.files() if self(f)]
435 433 # If m1 is not an exact matcher, we can't easily figure out the set of
436 434 # files, because its files() are not always files. For example, if
437 435 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
438 436 # want to remove "dir" from the set even though it would match m2,
439 437 # because the "dir" in m1 may not be a file.
440 438 return self._m1.files()
441 439
442 440 def visitdir(self, dir):
443 441 if self._m2.visitdir(dir) == 'all':
444 442 # There's a bug here: If m1 matches file 'dir/file' and m2 excludes
445 443 # 'dir' (recursively), we should still visit 'dir' due to the
446 444 # exception we have for exact matches.
447 445 return False
448 446 return bool(self._m1.visitdir(dir))
449 447
450 448 def isexact(self):
451 449 return self._m1.isexact()
452 450
453 451 def anypats(self):
454 452 return self._m1.anypats() or self._m2.anypats()
455 453
456 454 def prefix(self):
457 455 return not self.always() and not self.isexact() and not self.anypats()
458 456
459 457 def __repr__(self):
460 458 return ('<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2))
461 459
462 460 class subdirmatcher(basematcher):
463 461 """Adapt a matcher to work on a subdirectory only.
464 462
465 463 The paths are remapped to remove/insert the path as needed:
466 464
467 465 >>> m1 = match('root', '', ['a.txt', 'sub/b.txt'])
468 466 >>> m2 = subdirmatcher('sub', m1)
469 467 >>> bool(m2('a.txt'))
470 468 False
471 469 >>> bool(m2('b.txt'))
472 470 True
473 471 >>> bool(m2.matchfn('a.txt'))
474 472 False
475 473 >>> bool(m2.matchfn('b.txt'))
476 474 True
477 475 >>> m2.files()
478 476 ['b.txt']
479 477 >>> m2.exact('b.txt')
480 478 True
481 479 >>> util.pconvert(m2.rel('b.txt'))
482 480 'sub/b.txt'
483 481 >>> def bad(f, msg):
484 482 ... print "%s: %s" % (f, msg)
485 483 >>> m1.bad = bad
486 484 >>> m2.bad('x.txt', 'No such file')
487 485 sub/x.txt: No such file
488 486 >>> m2.abs('c.txt')
489 487 'sub/c.txt'
490 488 """
491 489
492 490 def __init__(self, path, matcher):
493 491 super(subdirmatcher, self).__init__(matcher._root, matcher._cwd)
494 492 self._path = path
495 493 self._matcher = matcher
496 494 self._always = matcher.always()
497 495
498 496 self._files = [f[len(path) + 1:] for f in matcher._files
499 497 if f.startswith(path + "/")]
500 498
501 499 # If the parent repo had a path to this subrepo and the matcher is
502 500 # a prefix matcher, this submatcher always matches.
503 501 if matcher.prefix():
504 502 self._always = any(f == path for f in matcher._files)
505 503
506 504 def bad(self, f, msg):
507 505 self._matcher.bad(self._path + "/" + f, msg)
508 506
509 507 def abs(self, f):
510 508 return self._matcher.abs(self._path + "/" + f)
511 509
512 510 def rel(self, f):
513 511 return self._matcher.rel(self._path + "/" + f)
514 512
515 513 def uipath(self, f):
516 514 return self._matcher.uipath(self._path + "/" + f)
517 515
518 516 def matchfn(self, f):
519 517 # Some information is lost in the superclass's constructor, so we
520 518 # can not accurately create the matching function for the subdirectory
521 519 # from the inputs. Instead, we override matchfn() and visitdir() to
522 520 # call the original matcher with the subdirectory path prepended.
523 521 return self._matcher.matchfn(self._path + "/" + f)
524 522
525 523 def visitdir(self, dir):
526 524 if dir == '.':
527 525 dir = self._path
528 526 else:
529 527 dir = self._path + "/" + dir
530 528 return self._matcher.visitdir(dir)
531 529
532 530 def always(self):
533 531 return self._always
534 532
535 533 def anypats(self):
536 534 return self._matcher.anypats()
537 535
538 536 def patkind(pattern, default=None):
539 537 '''If pattern is 'kind:pat' with a known kind, return kind.'''
540 538 return _patsplit(pattern, default)[0]
541 539
542 540 def _patsplit(pattern, default):
543 541 """Split a string into the optional pattern kind prefix and the actual
544 542 pattern."""
545 543 if ':' in pattern:
546 544 kind, pat = pattern.split(':', 1)
547 545 if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
548 546 'listfile', 'listfile0', 'set', 'include', 'subinclude',
549 547 'rootfilesin'):
550 548 return kind, pat
551 549 return default, pattern
552 550
553 551 def _globre(pat):
554 552 r'''Convert an extended glob string to a regexp string.
555 553
556 554 >>> print _globre(r'?')
557 555 .
558 556 >>> print _globre(r'*')
559 557 [^/]*
560 558 >>> print _globre(r'**')
561 559 .*
562 560 >>> print _globre(r'**/a')
563 561 (?:.*/)?a
564 562 >>> print _globre(r'a/**/b')
565 563 a\/(?:.*/)?b
566 564 >>> print _globre(r'[a*?!^][^b][!c]')
567 565 [a*?!^][\^b][^c]
568 566 >>> print _globre(r'{a,b}')
569 567 (?:a|b)
570 568 >>> print _globre(r'.\*\?')
571 569 \.\*\?
572 570 '''
573 571 i, n = 0, len(pat)
574 572 res = ''
575 573 group = 0
576 574 escape = util.re.escape
577 575 def peek():
578 576 return i < n and pat[i:i + 1]
579 577 while i < n:
580 578 c = pat[i:i + 1]
581 579 i += 1
582 580 if c not in '*?[{},\\':
583 581 res += escape(c)
584 582 elif c == '*':
585 583 if peek() == '*':
586 584 i += 1
587 585 if peek() == '/':
588 586 i += 1
589 587 res += '(?:.*/)?'
590 588 else:
591 589 res += '.*'
592 590 else:
593 591 res += '[^/]*'
594 592 elif c == '?':
595 593 res += '.'
596 594 elif c == '[':
597 595 j = i
598 596 if j < n and pat[j:j + 1] in '!]':
599 597 j += 1
600 598 while j < n and pat[j:j + 1] != ']':
601 599 j += 1
602 600 if j >= n:
603 601 res += '\\['
604 602 else:
605 603 stuff = pat[i:j].replace('\\','\\\\')
606 604 i = j + 1
607 605 if stuff[0:1] == '!':
608 606 stuff = '^' + stuff[1:]
609 607 elif stuff[0:1] == '^':
610 608 stuff = '\\' + stuff
611 609 res = '%s[%s]' % (res, stuff)
612 610 elif c == '{':
613 611 group += 1
614 612 res += '(?:'
615 613 elif c == '}' and group:
616 614 res += ')'
617 615 group -= 1
618 616 elif c == ',' and group:
619 617 res += '|'
620 618 elif c == '\\':
621 619 p = peek()
622 620 if p:
623 621 i += 1
624 622 res += escape(p)
625 623 else:
626 624 res += escape(c)
627 625 else:
628 626 res += escape(c)
629 627 return res
630 628
631 629 def _regex(kind, pat, globsuffix):
632 630 '''Convert a (normalized) pattern of any kind into a regular expression.
633 631 globsuffix is appended to the regexp of globs.'''
634 632 if not pat:
635 633 return ''
636 634 if kind == 're':
637 635 return pat
638 636 if kind == 'path':
639 637 if pat == '.':
640 638 return ''
641 639 return '^' + util.re.escape(pat) + '(?:/|$)'
642 640 if kind == 'rootfilesin':
643 641 if pat == '.':
644 642 escaped = ''
645 643 else:
646 644 # Pattern is a directory name.
647 645 escaped = util.re.escape(pat) + '/'
648 646 # Anything after the pattern must be a non-directory.
649 647 return '^' + escaped + '[^/]+$'
650 648 if kind == 'relglob':
651 649 return '(?:|.*/)' + _globre(pat) + globsuffix
652 650 if kind == 'relpath':
653 651 return util.re.escape(pat) + '(?:/|$)'
654 652 if kind == 'relre':
655 653 if pat.startswith('^'):
656 654 return pat
657 655 return '.*' + pat
658 656 return _globre(pat) + globsuffix
659 657
660 658 def _buildmatch(ctx, kindpats, globsuffix, listsubrepos, root):
661 659 '''Return regexp string and a matcher function for kindpats.
662 660 globsuffix is appended to the regexp of globs.'''
663 661 matchfuncs = []
664 662
665 663 subincludes, kindpats = _expandsubinclude(kindpats, root)
666 664 if subincludes:
667 665 submatchers = {}
668 666 def matchsubinclude(f):
669 667 for prefix, matcherargs in subincludes:
670 668 if f.startswith(prefix):
671 669 mf = submatchers.get(prefix)
672 670 if mf is None:
673 671 mf = match(*matcherargs)
674 672 submatchers[prefix] = mf
675 673
676 674 if mf(f[len(prefix):]):
677 675 return True
678 676 return False
679 677 matchfuncs.append(matchsubinclude)
680 678
681 679 fset, kindpats = _expandsets(kindpats, ctx, listsubrepos)
682 680 if fset:
683 681 matchfuncs.append(fset.__contains__)
684 682
685 683 regex = ''
686 684 if kindpats:
687 685 regex, mf = _buildregexmatch(kindpats, globsuffix)
688 686 matchfuncs.append(mf)
689 687
690 688 if len(matchfuncs) == 1:
691 689 return regex, matchfuncs[0]
692 690 else:
693 691 return regex, lambda f: any(mf(f) for mf in matchfuncs)
694 692
695 693 def _buildregexmatch(kindpats, globsuffix):
696 694 """Build a match function from a list of kinds and kindpats,
697 695 return regexp string and a matcher function."""
698 696 try:
699 697 regex = '(?:%s)' % '|'.join([_regex(k, p, globsuffix)
700 698 for (k, p, s) in kindpats])
701 699 if len(regex) > 20000:
702 700 raise OverflowError
703 701 return regex, _rematcher(regex)
704 702 except OverflowError:
705 703 # We're using a Python with a tiny regex engine and we
706 704 # made it explode, so we'll divide the pattern list in two
707 705 # until it works
708 706 l = len(kindpats)
709 707 if l < 2:
710 708 raise
711 709 regexa, a = _buildregexmatch(kindpats[:l//2], globsuffix)
712 710 regexb, b = _buildregexmatch(kindpats[l//2:], globsuffix)
713 711 return regex, lambda s: a(s) or b(s)
714 712 except re.error:
715 713 for k, p, s in kindpats:
716 714 try:
717 715 _rematcher('(?:%s)' % _regex(k, p, globsuffix))
718 716 except re.error:
719 717 if s:
720 718 raise error.Abort(_("%s: invalid pattern (%s): %s") %
721 719 (s, k, p))
722 720 else:
723 721 raise error.Abort(_("invalid pattern (%s): %s") % (k, p))
724 722 raise error.Abort(_("invalid pattern"))
725 723
726 724 def _patternrootsanddirs(kindpats):
727 725 '''Returns roots and directories corresponding to each pattern.
728 726
729 727 This calculates the roots and directories exactly matching the patterns and
730 728 returns a tuple of (roots, dirs) for each. It does not return other
731 729 directories which may also need to be considered, like the parent
732 730 directories.
733 731 '''
734 732 r = []
735 733 d = []
736 734 for kind, pat, source in kindpats:
737 735 if kind == 'glob': # find the non-glob prefix
738 736 root = []
739 737 for p in pat.split('/'):
740 738 if '[' in p or '{' in p or '*' in p or '?' in p:
741 739 break
742 740 root.append(p)
743 741 r.append('/'.join(root) or '.')
744 742 elif kind in ('relpath', 'path'):
745 743 r.append(pat or '.')
746 744 elif kind in ('rootfilesin',):
747 745 d.append(pat or '.')
748 746 else: # relglob, re, relre
749 747 r.append('.')
750 748 return r, d
751 749
752 750 def _roots(kindpats):
753 751 '''Returns root directories to match recursively from the given patterns.'''
754 752 roots, dirs = _patternrootsanddirs(kindpats)
755 753 return roots
756 754
757 755 def _rootsanddirs(kindpats):
758 756 '''Returns roots and exact directories from patterns.
759 757
760 758 roots are directories to match recursively, whereas exact directories should
761 759 be matched non-recursively. The returned (roots, dirs) tuple will also
762 760 include directories that need to be implicitly considered as either, such as
763 761 parent directories.
764 762
765 763 >>> _rootsanddirs(\
766 764 [('glob', 'g/h/*', ''), ('glob', 'g/h', ''), ('glob', 'g*', '')])
767 765 (['g/h', 'g/h', '.'], ['g', '.'])
768 766 >>> _rootsanddirs(\
769 767 [('rootfilesin', 'g/h', ''), ('rootfilesin', '', '')])
770 768 ([], ['g/h', '.', 'g', '.'])
771 769 >>> _rootsanddirs(\
772 770 [('relpath', 'r', ''), ('path', 'p/p', ''), ('path', '', '')])
773 771 (['r', 'p/p', '.'], ['p', '.'])
774 772 >>> _rootsanddirs(\
775 773 [('relglob', 'rg*', ''), ('re', 're/', ''), ('relre', 'rr', '')])
776 774 (['.', '.', '.'], ['.'])
777 775 '''
778 776 r, d = _patternrootsanddirs(kindpats)
779 777
780 778 # Append the parents as non-recursive/exact directories, since they must be
781 779 # scanned to get to either the roots or the other exact directories.
782 780 d.extend(util.dirs(d))
783 781 d.extend(util.dirs(r))
784 782 # util.dirs() does not include the root directory, so add it manually
785 783 d.append('.')
786 784
787 785 return r, d
788 786
789 787 def _explicitfiles(kindpats):
790 788 '''Returns the potential explicit filenames from the patterns.
791 789
792 790 >>> _explicitfiles([('path', 'foo/bar', '')])
793 791 ['foo/bar']
794 792 >>> _explicitfiles([('rootfilesin', 'foo/bar', '')])
795 793 []
796 794 '''
797 795 # Keep only the pattern kinds where one can specify filenames (vs only
798 796 # directory names).
799 797 filable = [kp for kp in kindpats if kp[0] not in ('rootfilesin',)]
800 798 return _roots(filable)
801 799
802 800 def _anypats(kindpats):
803 801 for kind, pat, source in kindpats:
804 802 if kind in ('glob', 're', 'relglob', 'relre', 'set', 'rootfilesin'):
805 803 return True
806 804
807 805 _commentre = None
808 806
809 807 def readpatternfile(filepath, warn, sourceinfo=False):
810 808 '''parse a pattern file, returning a list of
811 809 patterns. These patterns should be given to compile()
812 810 to be validated and converted into a match function.
813 811
814 812 trailing white space is dropped.
815 813 the escape character is backslash.
816 814 comments start with #.
817 815 empty lines are skipped.
818 816
819 817 lines can be of the following formats:
820 818
821 819 syntax: regexp # defaults following lines to non-rooted regexps
822 820 syntax: glob # defaults following lines to non-rooted globs
823 821 re:pattern # non-rooted regular expression
824 822 glob:pattern # non-rooted glob
825 823 pattern # pattern of the current default type
826 824
827 825 if sourceinfo is set, returns a list of tuples:
828 826 (pattern, lineno, originalline). This is useful to debug ignore patterns.
829 827 '''
830 828
831 829 syntaxes = {'re': 'relre:', 'regexp': 'relre:', 'glob': 'relglob:',
832 830 'include': 'include', 'subinclude': 'subinclude'}
833 831 syntax = 'relre:'
834 832 patterns = []
835 833
836 834 fp = open(filepath, 'rb')
837 835 for lineno, line in enumerate(util.iterfile(fp), start=1):
838 836 if "#" in line:
839 837 global _commentre
840 838 if not _commentre:
841 839 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
842 840 # remove comments prefixed by an even number of escapes
843 841 m = _commentre.search(line)
844 842 if m:
845 843 line = line[:m.end(1)]
846 844 # fixup properly escaped comments that survived the above
847 845 line = line.replace("\\#", "#")
848 846 line = line.rstrip()
849 847 if not line:
850 848 continue
851 849
852 850 if line.startswith('syntax:'):
853 851 s = line[7:].strip()
854 852 try:
855 853 syntax = syntaxes[s]
856 854 except KeyError:
857 855 if warn:
858 856 warn(_("%s: ignoring invalid syntax '%s'\n") %
859 857 (filepath, s))
860 858 continue
861 859
862 860 linesyntax = syntax
863 861 for s, rels in syntaxes.iteritems():
864 862 if line.startswith(rels):
865 863 linesyntax = rels
866 864 line = line[len(rels):]
867 865 break
868 866 elif line.startswith(s+':'):
869 867 linesyntax = rels
870 868 line = line[len(s) + 1:]
871 869 break
872 870 if sourceinfo:
873 871 patterns.append((linesyntax + line, lineno, line))
874 872 else:
875 873 patterns.append(linesyntax + line)
876 874 fp.close()
877 875 return patterns
General Comments 0
You need to be logged in to leave comments. Login now