##// END OF EJS Templates
match: make nevermatcher an exact matcher and a prefix matcher...
Martin von Zweigbergk -
r33378:adf95bfb default
parent child Browse files
Show More
@@ -1,1034 +1,1045 b''
1 1 # match.py - filename matching
2 2 #
3 3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import copy
11 11 import os
12 12 import re
13 13
14 14 from .i18n import _
15 15 from . import (
16 16 error,
17 17 pathutil,
18 18 util,
19 19 )
20 20
21 21 propertycache = util.propertycache
22 22
23 23 def _rematcher(regex):
24 24 '''compile the regexp with the best available regexp engine and return a
25 25 matcher function'''
26 26 m = util.re.compile(regex)
27 27 try:
28 28 # slightly faster, provided by facebook's re2 bindings
29 29 return m.test_match
30 30 except AttributeError:
31 31 return m.match
32 32
33 33 def _expandsets(kindpats, ctx, listsubrepos):
34 34 '''Returns the kindpats list with the 'set' patterns expanded.'''
35 35 fset = set()
36 36 other = []
37 37
38 38 for kind, pat, source in kindpats:
39 39 if kind == 'set':
40 40 if not ctx:
41 41 raise error.ProgrammingError("fileset expression with no "
42 42 "context")
43 43 s = ctx.getfileset(pat)
44 44 fset.update(s)
45 45
46 46 if listsubrepos:
47 47 for subpath in ctx.substate:
48 48 s = ctx.sub(subpath).getfileset(pat)
49 49 fset.update(subpath + '/' + f for f in s)
50 50
51 51 continue
52 52 other.append((kind, pat, source))
53 53 return fset, other
54 54
55 55 def _expandsubinclude(kindpats, root):
56 56 '''Returns the list of subinclude matcher args and the kindpats without the
57 57 subincludes in it.'''
58 58 relmatchers = []
59 59 other = []
60 60
61 61 for kind, pat, source in kindpats:
62 62 if kind == 'subinclude':
63 63 sourceroot = pathutil.dirname(util.normpath(source))
64 64 pat = util.pconvert(pat)
65 65 path = pathutil.join(sourceroot, pat)
66 66
67 67 newroot = pathutil.dirname(path)
68 68 matcherargs = (newroot, '', [], ['include:%s' % path])
69 69
70 70 prefix = pathutil.canonpath(root, root, newroot)
71 71 if prefix:
72 72 prefix += '/'
73 73 relmatchers.append((prefix, matcherargs))
74 74 else:
75 75 other.append((kind, pat, source))
76 76
77 77 return relmatchers, other
78 78
79 79 def _kindpatsalwaysmatch(kindpats):
80 80 """"Checks whether the kindspats match everything, as e.g.
81 81 'relpath:.' does.
82 82 """
83 83 for kind, pat, source in kindpats:
84 84 if pat != '' or kind not in ['relpath', 'glob']:
85 85 return False
86 86 return True
87 87
88 88 def match(root, cwd, patterns=None, include=None, exclude=None, default='glob',
89 89 exact=False, auditor=None, ctx=None, listsubrepos=False, warn=None,
90 90 badfn=None, icasefs=False):
91 91 """build an object to match a set of file patterns
92 92
93 93 arguments:
94 94 root - the canonical root of the tree you're matching against
95 95 cwd - the current working directory, if relevant
96 96 patterns - patterns to find
97 97 include - patterns to include (unless they are excluded)
98 98 exclude - patterns to exclude (even if they are included)
99 99 default - if a pattern in patterns has no explicit type, assume this one
100 100 exact - patterns are actually filenames (include/exclude still apply)
101 101 warn - optional function used for printing warnings
102 102 badfn - optional bad() callback for this matcher instead of the default
103 103 icasefs - make a matcher for wdir on case insensitive filesystems, which
104 104 normalizes the given patterns to the case in the filesystem
105 105
106 106 a pattern is one of:
107 107 'glob:<glob>' - a glob relative to cwd
108 108 're:<regexp>' - a regular expression
109 109 'path:<path>' - a path relative to repository root, which is matched
110 110 recursively
111 111 'rootfilesin:<path>' - a path relative to repository root, which is
112 112 matched non-recursively (will not match subdirectories)
113 113 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
114 114 'relpath:<path>' - a path relative to cwd
115 115 'relre:<regexp>' - a regexp that needn't match the start of a name
116 116 'set:<fileset>' - a fileset expression
117 117 'include:<path>' - a file of patterns to read and include
118 118 'subinclude:<path>' - a file of patterns to match against files under
119 119 the same directory
120 120 '<something>' - a pattern of the specified default type
121 121 """
122 122 normalize = _donormalize
123 123 if icasefs:
124 124 if exact:
125 125 raise error.ProgrammingError("a case-insensitive exact matcher "
126 126 "doesn't make sense")
127 127 dirstate = ctx.repo().dirstate
128 128 dsnormalize = dirstate.normalize
129 129
130 130 def normalize(patterns, default, root, cwd, auditor, warn):
131 131 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
132 132 kindpats = []
133 133 for kind, pats, source in kp:
134 134 if kind not in ('re', 'relre'): # regex can't be normalized
135 135 p = pats
136 136 pats = dsnormalize(pats)
137 137
138 138 # Preserve the original to handle a case only rename.
139 139 if p != pats and p in dirstate:
140 140 kindpats.append((kind, p, source))
141 141
142 142 kindpats.append((kind, pats, source))
143 143 return kindpats
144 144
145 145 if exact:
146 146 m = exactmatcher(root, cwd, patterns, badfn)
147 147 elif patterns:
148 148 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
149 149 if _kindpatsalwaysmatch(kindpats):
150 150 m = alwaysmatcher(root, cwd, badfn, relativeuipath=True)
151 151 else:
152 152 m = patternmatcher(root, cwd, kindpats, ctx=ctx,
153 153 listsubrepos=listsubrepos, badfn=badfn)
154 154 else:
155 155 # It's a little strange that no patterns means to match everything.
156 156 # Consider changing this to match nothing (probably using nevermatcher).
157 157 m = alwaysmatcher(root, cwd, badfn)
158 158
159 159 if include:
160 160 kindpats = normalize(include, 'glob', root, cwd, auditor, warn)
161 161 im = includematcher(root, cwd, kindpats, ctx=ctx,
162 162 listsubrepos=listsubrepos, badfn=None)
163 163 m = intersectmatchers(m, im)
164 164 if exclude:
165 165 kindpats = normalize(exclude, 'glob', root, cwd, auditor, warn)
166 166 em = includematcher(root, cwd, kindpats, ctx=ctx,
167 167 listsubrepos=listsubrepos, badfn=None)
168 168 m = differencematcher(m, em)
169 169 return m
170 170
171 171 def exact(root, cwd, files, badfn=None):
172 172 return exactmatcher(root, cwd, files, badfn=badfn)
173 173
174 174 def always(root, cwd):
175 175 return alwaysmatcher(root, cwd)
176 176
177 177 def never(root, cwd):
178 178 return nevermatcher(root, cwd)
179 179
180 180 def badmatch(match, badfn):
181 181 """Make a copy of the given matcher, replacing its bad method with the given
182 182 one.
183 183 """
184 184 m = copy.copy(match)
185 185 m.bad = badfn
186 186 return m
187 187
188 188 def _donormalize(patterns, default, root, cwd, auditor, warn):
189 189 '''Convert 'kind:pat' from the patterns list to tuples with kind and
190 190 normalized and rooted patterns and with listfiles expanded.'''
191 191 kindpats = []
192 192 for kind, pat in [_patsplit(p, default) for p in patterns]:
193 193 if kind in ('glob', 'relpath'):
194 194 pat = pathutil.canonpath(root, cwd, pat, auditor)
195 195 elif kind in ('relglob', 'path', 'rootfilesin'):
196 196 pat = util.normpath(pat)
197 197 elif kind in ('listfile', 'listfile0'):
198 198 try:
199 199 files = util.readfile(pat)
200 200 if kind == 'listfile0':
201 201 files = files.split('\0')
202 202 else:
203 203 files = files.splitlines()
204 204 files = [f for f in files if f]
205 205 except EnvironmentError:
206 206 raise error.Abort(_("unable to read file list (%s)") % pat)
207 207 for k, p, source in _donormalize(files, default, root, cwd,
208 208 auditor, warn):
209 209 kindpats.append((k, p, pat))
210 210 continue
211 211 elif kind == 'include':
212 212 try:
213 213 fullpath = os.path.join(root, util.localpath(pat))
214 214 includepats = readpatternfile(fullpath, warn)
215 215 for k, p, source in _donormalize(includepats, default,
216 216 root, cwd, auditor, warn):
217 217 kindpats.append((k, p, source or pat))
218 218 except error.Abort as inst:
219 219 raise error.Abort('%s: %s' % (pat, inst[0]))
220 220 except IOError as inst:
221 221 if warn:
222 222 warn(_("skipping unreadable pattern file '%s': %s\n") %
223 223 (pat, inst.strerror))
224 224 continue
225 225 # else: re or relre - which cannot be normalized
226 226 kindpats.append((kind, pat, ''))
227 227 return kindpats
228 228
229 229 class basematcher(object):
230 230
231 231 def __init__(self, root, cwd, badfn=None, relativeuipath=True):
232 232 self._root = root
233 233 self._cwd = cwd
234 234 if badfn is not None:
235 235 self.bad = badfn
236 236 self._relativeuipath = relativeuipath
237 237
238 238 def __call__(self, fn):
239 239 return self.matchfn(fn)
240 240 def __iter__(self):
241 241 for f in self._files:
242 242 yield f
243 243 # Callbacks related to how the matcher is used by dirstate.walk.
244 244 # Subscribers to these events must monkeypatch the matcher object.
245 245 def bad(self, f, msg):
246 246 '''Callback from dirstate.walk for each explicit file that can't be
247 247 found/accessed, with an error message.'''
248 248 pass
249 249
250 250 # If an explicitdir is set, it will be called when an explicitly listed
251 251 # directory is visited.
252 252 explicitdir = None
253 253
254 254 # If an traversedir is set, it will be called when a directory discovered
255 255 # by recursive traversal is visited.
256 256 traversedir = None
257 257
258 258 def abs(self, f):
259 259 '''Convert a repo path back to path that is relative to the root of the
260 260 matcher.'''
261 261 return f
262 262
263 263 def rel(self, f):
264 264 '''Convert repo path back to path that is relative to cwd of matcher.'''
265 265 return util.pathto(self._root, self._cwd, f)
266 266
267 267 def uipath(self, f):
268 268 '''Convert repo path to a display path. If patterns or -I/-X were used
269 269 to create this matcher, the display path will be relative to cwd.
270 270 Otherwise it is relative to the root of the repo.'''
271 271 return (self._relativeuipath and self.rel(f)) or self.abs(f)
272 272
273 273 @propertycache
274 274 def _files(self):
275 275 return []
276 276
277 277 def files(self):
278 278 '''Explicitly listed files or patterns or roots:
279 279 if no patterns or .always(): empty list,
280 280 if exact: list exact files,
281 281 if not .anypats(): list all files and dirs,
282 282 else: optimal roots'''
283 283 return self._files
284 284
285 285 @propertycache
286 286 def _fileset(self):
287 287 return set(self._files)
288 288
289 289 def exact(self, f):
290 290 '''Returns True if f is in .files().'''
291 291 return f in self._fileset
292 292
293 293 def matchfn(self, f):
294 294 return False
295 295
296 296 def visitdir(self, dir):
297 297 '''Decides whether a directory should be visited based on whether it
298 298 has potential matches in it or one of its subdirectories. This is
299 299 based on the match's primary, included, and excluded patterns.
300 300
301 301 Returns the string 'all' if the given directory and all subdirectories
302 302 should be visited. Otherwise returns True or False indicating whether
303 303 the given directory should be visited.
304 304
305 305 This function's behavior is undefined if it has returned False for
306 306 one of the dir's parent directories.
307 307 '''
308 308 return False
309 309
310 310 def anypats(self):
311 311 '''Matcher uses patterns or include/exclude.'''
312 312 return False
313 313
314 314 def always(self):
315 315 '''Matcher will match everything and .files() will be empty
316 316 - optimization might be possible and necessary.'''
317 317 return False
318 318
319 319 def isexact(self):
320 320 return False
321 321
322 322 def prefix(self):
323 323 return not self.always() and not self.isexact() and not self.anypats()
324 324
325 325 class alwaysmatcher(basematcher):
326 326 '''Matches everything.'''
327 327
328 328 def __init__(self, root, cwd, badfn=None, relativeuipath=False):
329 329 super(alwaysmatcher, self).__init__(root, cwd, badfn,
330 330 relativeuipath=relativeuipath)
331 331
332 332 def always(self):
333 333 return True
334 334
335 335 def matchfn(self, f):
336 336 return True
337 337
338 338 def visitdir(self, dir):
339 339 return 'all'
340 340
341 341 def __repr__(self):
342 342 return '<alwaysmatcher>'
343 343
344 344 class nevermatcher(basematcher):
345 345 '''Matches nothing.'''
346 346
347 347 def __init__(self, root, cwd, badfn=None):
348 348 super(nevermatcher, self).__init__(root, cwd, badfn)
349 349
350 # It's a little weird to say that the nevermatcher is an exact matcher
351 # or a prefix matcher, but it seems to make sense to let callers take
352 # fast paths based on either. There will be no exact matches, nor any
353 # prefixes (files() returns []), so fast paths iterating over them should
354 # be efficient (and correct).
355 def isexact(self):
356 return True
357
358 def prefix(self):
359 return True
360
350 361 def __repr__(self):
351 362 return '<nevermatcher>'
352 363
353 364 class patternmatcher(basematcher):
354 365
355 366 def __init__(self, root, cwd, kindpats, ctx=None, listsubrepos=False,
356 367 badfn=None):
357 368 super(patternmatcher, self).__init__(root, cwd, badfn)
358 369
359 370 self._files = _explicitfiles(kindpats)
360 371 self._anypats = _anypats(kindpats)
361 372 self._pats, self.matchfn = _buildmatch(ctx, kindpats, '$', listsubrepos,
362 373 root)
363 374
364 375 @propertycache
365 376 def _dirs(self):
366 377 return set(util.dirs(self._fileset)) | {'.'}
367 378
368 379 def visitdir(self, dir):
369 380 if self.prefix() and dir in self._fileset:
370 381 return 'all'
371 382 return ('.' in self._fileset or
372 383 dir in self._fileset or
373 384 dir in self._dirs or
374 385 any(parentdir in self._fileset
375 386 for parentdir in util.finddirs(dir)))
376 387
377 388 def anypats(self):
378 389 return self._anypats
379 390
380 391 def __repr__(self):
381 392 return ('<patternmatcher patterns=%r>' % self._pats)
382 393
383 394 class includematcher(basematcher):
384 395
385 396 def __init__(self, root, cwd, kindpats, ctx=None, listsubrepos=False,
386 397 badfn=None):
387 398 super(includematcher, self).__init__(root, cwd, badfn)
388 399
389 400 self._pats, self.matchfn = _buildmatch(ctx, kindpats, '(?:/|$)',
390 401 listsubrepos, root)
391 402 self._anypats = _anypats(kindpats)
392 403 roots, dirs = _rootsanddirs(kindpats)
393 404 # roots are directories which are recursively included.
394 405 self._roots = set(roots)
395 406 # dirs are directories which are non-recursively included.
396 407 self._dirs = set(dirs)
397 408
398 409 def visitdir(self, dir):
399 410 if not self._anypats and dir in self._roots:
400 411 # The condition above is essentially self.prefix() for includes
401 412 return 'all'
402 413 return ('.' in self._roots or
403 414 dir in self._roots or
404 415 dir in self._dirs or
405 416 any(parentdir in self._roots
406 417 for parentdir in util.finddirs(dir)))
407 418
408 419 def anypats(self):
409 420 return True
410 421
411 422 def __repr__(self):
412 423 return ('<includematcher includes=%r>' % self._pats)
413 424
414 425 class exactmatcher(basematcher):
415 426 '''Matches the input files exactly. They are interpreted as paths, not
416 427 patterns (so no kind-prefixes).
417 428 '''
418 429
419 430 def __init__(self, root, cwd, files, badfn=None):
420 431 super(exactmatcher, self).__init__(root, cwd, badfn)
421 432
422 433 if isinstance(files, list):
423 434 self._files = files
424 435 else:
425 436 self._files = list(files)
426 437
427 438 matchfn = basematcher.exact
428 439
429 440 @propertycache
430 441 def _dirs(self):
431 442 return set(util.dirs(self._fileset)) | {'.'}
432 443
433 444 def visitdir(self, dir):
434 445 return dir in self._dirs
435 446
436 447 def isexact(self):
437 448 return True
438 449
439 450 def __repr__(self):
440 451 return ('<exactmatcher files=%r>' % self._files)
441 452
442 453 class differencematcher(basematcher):
443 454 '''Composes two matchers by matching if the first matches and the second
444 455 does not. Well, almost... If the user provides a pattern like "-X foo foo",
445 456 Mercurial actually does match "foo" against that. That's because exact
446 457 matches are treated specially. So, since this differencematcher is used for
447 458 excludes, it needs to special-case exact matching.
448 459
449 460 The second matcher's non-matching-attributes (root, cwd, bad, explicitdir,
450 461 traversedir) are ignored.
451 462
452 463 TODO: If we want to keep the behavior described above for exact matches, we
453 464 should consider instead treating the above case something like this:
454 465 union(exact(foo), difference(pattern(foo), include(foo)))
455 466 '''
456 467 def __init__(self, m1, m2):
457 468 super(differencematcher, self).__init__(m1._root, m1._cwd)
458 469 self._m1 = m1
459 470 self._m2 = m2
460 471 self.bad = m1.bad
461 472 self.explicitdir = m1.explicitdir
462 473 self.traversedir = m1.traversedir
463 474
464 475 def matchfn(self, f):
465 476 return self._m1(f) and (not self._m2(f) or self._m1.exact(f))
466 477
467 478 @propertycache
468 479 def _files(self):
469 480 if self.isexact():
470 481 return [f for f in self._m1.files() if self(f)]
471 482 # If m1 is not an exact matcher, we can't easily figure out the set of
472 483 # files, because its files() are not always files. For example, if
473 484 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
474 485 # want to remove "dir" from the set even though it would match m2,
475 486 # because the "dir" in m1 may not be a file.
476 487 return self._m1.files()
477 488
478 489 def visitdir(self, dir):
479 490 if self._m2.visitdir(dir) == 'all':
480 491 # There's a bug here: If m1 matches file 'dir/file' and m2 excludes
481 492 # 'dir' (recursively), we should still visit 'dir' due to the
482 493 # exception we have for exact matches.
483 494 return False
484 495 return bool(self._m1.visitdir(dir))
485 496
486 497 def isexact(self):
487 498 return self._m1.isexact()
488 499
489 500 def anypats(self):
490 501 return self._m1.anypats() or self._m2.anypats()
491 502
492 503 def __repr__(self):
493 504 return ('<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2))
494 505
495 506 def intersectmatchers(m1, m2):
496 507 '''Composes two matchers by matching if both of them match.
497 508
498 509 The second matcher's non-matching-attributes (root, cwd, bad, explicitdir,
499 510 traversedir) are ignored.
500 511 '''
501 512 if m1 is None or m2 is None:
502 513 return m1 or m2
503 514 if m1.always():
504 515 m = copy.copy(m2)
505 516 # TODO: Consider encapsulating these things in a class so there's only
506 517 # one thing to copy from m1.
507 518 m.bad = m1.bad
508 519 m.explicitdir = m1.explicitdir
509 520 m.traversedir = m1.traversedir
510 521 m.abs = m1.abs
511 522 m.rel = m1.rel
512 523 m._relativeuipath |= m1._relativeuipath
513 524 return m
514 525 if m2.always():
515 526 m = copy.copy(m1)
516 527 m._relativeuipath |= m2._relativeuipath
517 528 return m
518 529 return intersectionmatcher(m1, m2)
519 530
520 531 class intersectionmatcher(basematcher):
521 532 def __init__(self, m1, m2):
522 533 super(intersectionmatcher, self).__init__(m1._root, m1._cwd)
523 534 self._m1 = m1
524 535 self._m2 = m2
525 536 self.bad = m1.bad
526 537 self.explicitdir = m1.explicitdir
527 538 self.traversedir = m1.traversedir
528 539
529 540 @propertycache
530 541 def _files(self):
531 542 if self.isexact():
532 543 m1, m2 = self._m1, self._m2
533 544 if not m1.isexact():
534 545 m1, m2 = m2, m1
535 546 return [f for f in m1.files() if m2(f)]
536 547 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
537 548 # the set of files, because their files() are not always files. For
538 549 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
539 550 # "path:dir2", we don't want to remove "dir2" from the set.
540 551 return self._m1.files() + self._m2.files()
541 552
542 553 def matchfn(self, f):
543 554 return self._m1(f) and self._m2(f)
544 555
545 556 def visitdir(self, dir):
546 557 visit1 = self._m1.visitdir(dir)
547 558 if visit1 == 'all':
548 559 return self._m2.visitdir(dir)
549 560 # bool() because visit1=True + visit2='all' should not be 'all'
550 561 return bool(visit1 and self._m2.visitdir(dir))
551 562
552 563 def always(self):
553 564 return self._m1.always() and self._m2.always()
554 565
555 566 def isexact(self):
556 567 return self._m1.isexact() or self._m2.isexact()
557 568
558 569 def anypats(self):
559 570 return self._m1.anypats() or self._m2.anypats()
560 571
561 572 def __repr__(self):
562 573 return ('<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2))
563 574
564 575 class subdirmatcher(basematcher):
565 576 """Adapt a matcher to work on a subdirectory only.
566 577
567 578 The paths are remapped to remove/insert the path as needed:
568 579
569 580 >>> m1 = match('root', '', ['a.txt', 'sub/b.txt'])
570 581 >>> m2 = subdirmatcher('sub', m1)
571 582 >>> bool(m2('a.txt'))
572 583 False
573 584 >>> bool(m2('b.txt'))
574 585 True
575 586 >>> bool(m2.matchfn('a.txt'))
576 587 False
577 588 >>> bool(m2.matchfn('b.txt'))
578 589 True
579 590 >>> m2.files()
580 591 ['b.txt']
581 592 >>> m2.exact('b.txt')
582 593 True
583 594 >>> util.pconvert(m2.rel('b.txt'))
584 595 'sub/b.txt'
585 596 >>> def bad(f, msg):
586 597 ... print "%s: %s" % (f, msg)
587 598 >>> m1.bad = bad
588 599 >>> m2.bad('x.txt', 'No such file')
589 600 sub/x.txt: No such file
590 601 >>> m2.abs('c.txt')
591 602 'sub/c.txt'
592 603 """
593 604
594 605 def __init__(self, path, matcher):
595 606 super(subdirmatcher, self).__init__(matcher._root, matcher._cwd)
596 607 self._path = path
597 608 self._matcher = matcher
598 609 self._always = matcher.always()
599 610
600 611 self._files = [f[len(path) + 1:] for f in matcher._files
601 612 if f.startswith(path + "/")]
602 613
603 614 # If the parent repo had a path to this subrepo and the matcher is
604 615 # a prefix matcher, this submatcher always matches.
605 616 if matcher.prefix():
606 617 self._always = any(f == path for f in matcher._files)
607 618
608 619 def bad(self, f, msg):
609 620 self._matcher.bad(self._path + "/" + f, msg)
610 621
611 622 def abs(self, f):
612 623 return self._matcher.abs(self._path + "/" + f)
613 624
614 625 def rel(self, f):
615 626 return self._matcher.rel(self._path + "/" + f)
616 627
617 628 def uipath(self, f):
618 629 return self._matcher.uipath(self._path + "/" + f)
619 630
620 631 def matchfn(self, f):
621 632 # Some information is lost in the superclass's constructor, so we
622 633 # can not accurately create the matching function for the subdirectory
623 634 # from the inputs. Instead, we override matchfn() and visitdir() to
624 635 # call the original matcher with the subdirectory path prepended.
625 636 return self._matcher.matchfn(self._path + "/" + f)
626 637
627 638 def visitdir(self, dir):
628 639 if dir == '.':
629 640 dir = self._path
630 641 else:
631 642 dir = self._path + "/" + dir
632 643 return self._matcher.visitdir(dir)
633 644
634 645 def always(self):
635 646 return self._always
636 647
637 648 def anypats(self):
638 649 return self._matcher.anypats()
639 650
640 651 def __repr__(self):
641 652 return ('<subdirmatcher path=%r, matcher=%r>' %
642 653 (self._path, self._matcher))
643 654
644 655 class forceincludematcher(basematcher):
645 656 """A matcher that returns true for any of the forced includes before testing
646 657 against the actual matcher."""
647 658 def __init__(self, matcher, includes):
648 659 self._matcher = matcher
649 660 self._includes = includes
650 661
651 662 def __call__(self, value):
652 663 return value in self._includes or self._matcher(value)
653 664
654 665 def anypats(self):
655 666 return True
656 667
657 668 def prefix(self):
658 669 return False
659 670
660 671 def __repr__(self):
661 672 return ('<forceincludematcher matcher=%r, includes=%r>' %
662 673 (self._matcher, sorted(self._includes)))
663 674
664 675 class unionmatcher(basematcher):
665 676 """A matcher that is the union of several matchers."""
666 677 def __init__(self, matchers):
667 678 self._matchers = matchers
668 679
669 680 def __call__(self, value):
670 681 for match in self._matchers:
671 682 if match(value):
672 683 return True
673 684 return False
674 685
675 686 def anypats(self):
676 687 return True
677 688
678 689 def prefix(self):
679 690 return False
680 691
681 692 def __repr__(self):
682 693 return ('<unionmatcher matchers=%r>' % self._matchers)
683 694
684 695 class negatematcher(basematcher):
685 696 def __init__(self, matcher):
686 697 self._matcher = matcher
687 698
688 699 def __call__(self, value):
689 700 return not self._matcher(value)
690 701
691 702 def anypats(self):
692 703 return True
693 704
694 705 def __repr__(self):
695 706 return ('<negatematcher matcher=%r>' % self._matcher)
696 707
697 708 def patkind(pattern, default=None):
698 709 '''If pattern is 'kind:pat' with a known kind, return kind.'''
699 710 return _patsplit(pattern, default)[0]
700 711
701 712 def _patsplit(pattern, default):
702 713 """Split a string into the optional pattern kind prefix and the actual
703 714 pattern."""
704 715 if ':' in pattern:
705 716 kind, pat = pattern.split(':', 1)
706 717 if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
707 718 'listfile', 'listfile0', 'set', 'include', 'subinclude',
708 719 'rootfilesin'):
709 720 return kind, pat
710 721 return default, pattern
711 722
712 723 def _globre(pat):
713 724 r'''Convert an extended glob string to a regexp string.
714 725
715 726 >>> print _globre(r'?')
716 727 .
717 728 >>> print _globre(r'*')
718 729 [^/]*
719 730 >>> print _globre(r'**')
720 731 .*
721 732 >>> print _globre(r'**/a')
722 733 (?:.*/)?a
723 734 >>> print _globre(r'a/**/b')
724 735 a\/(?:.*/)?b
725 736 >>> print _globre(r'[a*?!^][^b][!c]')
726 737 [a*?!^][\^b][^c]
727 738 >>> print _globre(r'{a,b}')
728 739 (?:a|b)
729 740 >>> print _globre(r'.\*\?')
730 741 \.\*\?
731 742 '''
732 743 i, n = 0, len(pat)
733 744 res = ''
734 745 group = 0
735 746 escape = util.re.escape
736 747 def peek():
737 748 return i < n and pat[i:i + 1]
738 749 while i < n:
739 750 c = pat[i:i + 1]
740 751 i += 1
741 752 if c not in '*?[{},\\':
742 753 res += escape(c)
743 754 elif c == '*':
744 755 if peek() == '*':
745 756 i += 1
746 757 if peek() == '/':
747 758 i += 1
748 759 res += '(?:.*/)?'
749 760 else:
750 761 res += '.*'
751 762 else:
752 763 res += '[^/]*'
753 764 elif c == '?':
754 765 res += '.'
755 766 elif c == '[':
756 767 j = i
757 768 if j < n and pat[j:j + 1] in '!]':
758 769 j += 1
759 770 while j < n and pat[j:j + 1] != ']':
760 771 j += 1
761 772 if j >= n:
762 773 res += '\\['
763 774 else:
764 775 stuff = pat[i:j].replace('\\','\\\\')
765 776 i = j + 1
766 777 if stuff[0:1] == '!':
767 778 stuff = '^' + stuff[1:]
768 779 elif stuff[0:1] == '^':
769 780 stuff = '\\' + stuff
770 781 res = '%s[%s]' % (res, stuff)
771 782 elif c == '{':
772 783 group += 1
773 784 res += '(?:'
774 785 elif c == '}' and group:
775 786 res += ')'
776 787 group -= 1
777 788 elif c == ',' and group:
778 789 res += '|'
779 790 elif c == '\\':
780 791 p = peek()
781 792 if p:
782 793 i += 1
783 794 res += escape(p)
784 795 else:
785 796 res += escape(c)
786 797 else:
787 798 res += escape(c)
788 799 return res
789 800
790 801 def _regex(kind, pat, globsuffix):
791 802 '''Convert a (normalized) pattern of any kind into a regular expression.
792 803 globsuffix is appended to the regexp of globs.'''
793 804 if not pat:
794 805 return ''
795 806 if kind == 're':
796 807 return pat
797 808 if kind in ('path', 'relpath'):
798 809 if pat == '.':
799 810 return ''
800 811 return util.re.escape(pat) + '(?:/|$)'
801 812 if kind == 'rootfilesin':
802 813 if pat == '.':
803 814 escaped = ''
804 815 else:
805 816 # Pattern is a directory name.
806 817 escaped = util.re.escape(pat) + '/'
807 818 # Anything after the pattern must be a non-directory.
808 819 return escaped + '[^/]+$'
809 820 if kind == 'relglob':
810 821 return '(?:|.*/)' + _globre(pat) + globsuffix
811 822 if kind == 'relre':
812 823 if pat.startswith('^'):
813 824 return pat
814 825 return '.*' + pat
815 826 return _globre(pat) + globsuffix
816 827
817 828 def _buildmatch(ctx, kindpats, globsuffix, listsubrepos, root):
818 829 '''Return regexp string and a matcher function for kindpats.
819 830 globsuffix is appended to the regexp of globs.'''
820 831 matchfuncs = []
821 832
822 833 subincludes, kindpats = _expandsubinclude(kindpats, root)
823 834 if subincludes:
824 835 submatchers = {}
825 836 def matchsubinclude(f):
826 837 for prefix, matcherargs in subincludes:
827 838 if f.startswith(prefix):
828 839 mf = submatchers.get(prefix)
829 840 if mf is None:
830 841 mf = match(*matcherargs)
831 842 submatchers[prefix] = mf
832 843
833 844 if mf(f[len(prefix):]):
834 845 return True
835 846 return False
836 847 matchfuncs.append(matchsubinclude)
837 848
838 849 fset, kindpats = _expandsets(kindpats, ctx, listsubrepos)
839 850 if fset:
840 851 matchfuncs.append(fset.__contains__)
841 852
842 853 regex = ''
843 854 if kindpats:
844 855 regex, mf = _buildregexmatch(kindpats, globsuffix)
845 856 matchfuncs.append(mf)
846 857
847 858 if len(matchfuncs) == 1:
848 859 return regex, matchfuncs[0]
849 860 else:
850 861 return regex, lambda f: any(mf(f) for mf in matchfuncs)
851 862
852 863 def _buildregexmatch(kindpats, globsuffix):
853 864 """Build a match function from a list of kinds and kindpats,
854 865 return regexp string and a matcher function."""
855 866 try:
856 867 regex = '(?:%s)' % '|'.join([_regex(k, p, globsuffix)
857 868 for (k, p, s) in kindpats])
858 869 if len(regex) > 20000:
859 870 raise OverflowError
860 871 return regex, _rematcher(regex)
861 872 except OverflowError:
862 873 # We're using a Python with a tiny regex engine and we
863 874 # made it explode, so we'll divide the pattern list in two
864 875 # until it works
865 876 l = len(kindpats)
866 877 if l < 2:
867 878 raise
868 879 regexa, a = _buildregexmatch(kindpats[:l//2], globsuffix)
869 880 regexb, b = _buildregexmatch(kindpats[l//2:], globsuffix)
870 881 return regex, lambda s: a(s) or b(s)
871 882 except re.error:
872 883 for k, p, s in kindpats:
873 884 try:
874 885 _rematcher('(?:%s)' % _regex(k, p, globsuffix))
875 886 except re.error:
876 887 if s:
877 888 raise error.Abort(_("%s: invalid pattern (%s): %s") %
878 889 (s, k, p))
879 890 else:
880 891 raise error.Abort(_("invalid pattern (%s): %s") % (k, p))
881 892 raise error.Abort(_("invalid pattern"))
882 893
883 894 def _patternrootsanddirs(kindpats):
884 895 '''Returns roots and directories corresponding to each pattern.
885 896
886 897 This calculates the roots and directories exactly matching the patterns and
887 898 returns a tuple of (roots, dirs) for each. It does not return other
888 899 directories which may also need to be considered, like the parent
889 900 directories.
890 901 '''
891 902 r = []
892 903 d = []
893 904 for kind, pat, source in kindpats:
894 905 if kind == 'glob': # find the non-glob prefix
895 906 root = []
896 907 for p in pat.split('/'):
897 908 if '[' in p or '{' in p or '*' in p or '?' in p:
898 909 break
899 910 root.append(p)
900 911 r.append('/'.join(root) or '.')
901 912 elif kind in ('relpath', 'path'):
902 913 r.append(pat or '.')
903 914 elif kind in ('rootfilesin',):
904 915 d.append(pat or '.')
905 916 else: # relglob, re, relre
906 917 r.append('.')
907 918 return r, d
908 919
909 920 def _roots(kindpats):
910 921 '''Returns root directories to match recursively from the given patterns.'''
911 922 roots, dirs = _patternrootsanddirs(kindpats)
912 923 return roots
913 924
914 925 def _rootsanddirs(kindpats):
915 926 '''Returns roots and exact directories from patterns.
916 927
917 928 roots are directories to match recursively, whereas exact directories should
918 929 be matched non-recursively. The returned (roots, dirs) tuple will also
919 930 include directories that need to be implicitly considered as either, such as
920 931 parent directories.
921 932
922 933 >>> _rootsanddirs(\
923 934 [('glob', 'g/h/*', ''), ('glob', 'g/h', ''), ('glob', 'g*', '')])
924 935 (['g/h', 'g/h', '.'], ['g', '.'])
925 936 >>> _rootsanddirs(\
926 937 [('rootfilesin', 'g/h', ''), ('rootfilesin', '', '')])
927 938 ([], ['g/h', '.', 'g', '.'])
928 939 >>> _rootsanddirs(\
929 940 [('relpath', 'r', ''), ('path', 'p/p', ''), ('path', '', '')])
930 941 (['r', 'p/p', '.'], ['p', '.'])
931 942 >>> _rootsanddirs(\
932 943 [('relglob', 'rg*', ''), ('re', 're/', ''), ('relre', 'rr', '')])
933 944 (['.', '.', '.'], ['.'])
934 945 '''
935 946 r, d = _patternrootsanddirs(kindpats)
936 947
937 948 # Append the parents as non-recursive/exact directories, since they must be
938 949 # scanned to get to either the roots or the other exact directories.
939 950 d.extend(util.dirs(d))
940 951 d.extend(util.dirs(r))
941 952 # util.dirs() does not include the root directory, so add it manually
942 953 d.append('.')
943 954
944 955 return r, d
945 956
946 957 def _explicitfiles(kindpats):
947 958 '''Returns the potential explicit filenames from the patterns.
948 959
949 960 >>> _explicitfiles([('path', 'foo/bar', '')])
950 961 ['foo/bar']
951 962 >>> _explicitfiles([('rootfilesin', 'foo/bar', '')])
952 963 []
953 964 '''
954 965 # Keep only the pattern kinds where one can specify filenames (vs only
955 966 # directory names).
956 967 filable = [kp for kp in kindpats if kp[0] not in ('rootfilesin',)]
957 968 return _roots(filable)
958 969
959 970 def _anypats(kindpats):
960 971 for kind, pat, source in kindpats:
961 972 if kind in ('glob', 're', 'relglob', 'relre', 'set', 'rootfilesin'):
962 973 return True
963 974
964 975 _commentre = None
965 976
966 977 def readpatternfile(filepath, warn, sourceinfo=False):
967 978 '''parse a pattern file, returning a list of
968 979 patterns. These patterns should be given to compile()
969 980 to be validated and converted into a match function.
970 981
971 982 trailing white space is dropped.
972 983 the escape character is backslash.
973 984 comments start with #.
974 985 empty lines are skipped.
975 986
976 987 lines can be of the following formats:
977 988
978 989 syntax: regexp # defaults following lines to non-rooted regexps
979 990 syntax: glob # defaults following lines to non-rooted globs
980 991 re:pattern # non-rooted regular expression
981 992 glob:pattern # non-rooted glob
982 993 pattern # pattern of the current default type
983 994
984 995 if sourceinfo is set, returns a list of tuples:
985 996 (pattern, lineno, originalline). This is useful to debug ignore patterns.
986 997 '''
987 998
988 999 syntaxes = {'re': 'relre:', 'regexp': 'relre:', 'glob': 'relglob:',
989 1000 'include': 'include', 'subinclude': 'subinclude'}
990 1001 syntax = 'relre:'
991 1002 patterns = []
992 1003
993 1004 fp = open(filepath, 'rb')
994 1005 for lineno, line in enumerate(util.iterfile(fp), start=1):
995 1006 if "#" in line:
996 1007 global _commentre
997 1008 if not _commentre:
998 1009 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
999 1010 # remove comments prefixed by an even number of escapes
1000 1011 m = _commentre.search(line)
1001 1012 if m:
1002 1013 line = line[:m.end(1)]
1003 1014 # fixup properly escaped comments that survived the above
1004 1015 line = line.replace("\\#", "#")
1005 1016 line = line.rstrip()
1006 1017 if not line:
1007 1018 continue
1008 1019
1009 1020 if line.startswith('syntax:'):
1010 1021 s = line[7:].strip()
1011 1022 try:
1012 1023 syntax = syntaxes[s]
1013 1024 except KeyError:
1014 1025 if warn:
1015 1026 warn(_("%s: ignoring invalid syntax '%s'\n") %
1016 1027 (filepath, s))
1017 1028 continue
1018 1029
1019 1030 linesyntax = syntax
1020 1031 for s, rels in syntaxes.iteritems():
1021 1032 if line.startswith(rels):
1022 1033 linesyntax = rels
1023 1034 line = line[len(rels):]
1024 1035 break
1025 1036 elif line.startswith(s+':'):
1026 1037 linesyntax = rels
1027 1038 line = line[len(s) + 1:]
1028 1039 break
1029 1040 if sourceinfo:
1030 1041 patterns.append((linesyntax + line, lineno, line))
1031 1042 else:
1032 1043 patterns.append(linesyntax + line)
1033 1044 fp.close()
1034 1045 return patterns
General Comments 0
You need to be logged in to leave comments. Login now