##// END OF EJS Templates
py3: make sure we return str from __repr__...
Pulkit Goyal -
r36067:c4fa47f8 default
parent child Browse files
Show More
@@ -1,1020 +1,1028
1 1 # match.py - filename matching
2 2 #
3 3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import, print_function
9 9
10 10 import copy
11 11 import os
12 12 import re
13 13
14 14 from .i18n import _
15 15 from . import (
16 encoding,
16 17 error,
17 18 pathutil,
18 19 util,
19 20 )
20 21
21 22 allpatternkinds = ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
22 23 'listfile', 'listfile0', 'set', 'include', 'subinclude',
23 24 'rootfilesin')
24 25 cwdrelativepatternkinds = ('relpath', 'glob')
25 26
26 27 propertycache = util.propertycache
27 28
28 29 def _rematcher(regex):
29 30 '''compile the regexp with the best available regexp engine and return a
30 31 matcher function'''
31 32 m = util.re.compile(regex)
32 33 try:
33 34 # slightly faster, provided by facebook's re2 bindings
34 35 return m.test_match
35 36 except AttributeError:
36 37 return m.match
37 38
38 39 def _expandsets(kindpats, ctx, listsubrepos):
39 40 '''Returns the kindpats list with the 'set' patterns expanded.'''
40 41 fset = set()
41 42 other = []
42 43
43 44 for kind, pat, source in kindpats:
44 45 if kind == 'set':
45 46 if not ctx:
46 47 raise error.ProgrammingError("fileset expression with no "
47 48 "context")
48 49 s = ctx.getfileset(pat)
49 50 fset.update(s)
50 51
51 52 if listsubrepos:
52 53 for subpath in ctx.substate:
53 54 s = ctx.sub(subpath).getfileset(pat)
54 55 fset.update(subpath + '/' + f for f in s)
55 56
56 57 continue
57 58 other.append((kind, pat, source))
58 59 return fset, other
59 60
60 61 def _expandsubinclude(kindpats, root):
61 62 '''Returns the list of subinclude matcher args and the kindpats without the
62 63 subincludes in it.'''
63 64 relmatchers = []
64 65 other = []
65 66
66 67 for kind, pat, source in kindpats:
67 68 if kind == 'subinclude':
68 69 sourceroot = pathutil.dirname(util.normpath(source))
69 70 pat = util.pconvert(pat)
70 71 path = pathutil.join(sourceroot, pat)
71 72
72 73 newroot = pathutil.dirname(path)
73 74 matcherargs = (newroot, '', [], ['include:%s' % path])
74 75
75 76 prefix = pathutil.canonpath(root, root, newroot)
76 77 if prefix:
77 78 prefix += '/'
78 79 relmatchers.append((prefix, matcherargs))
79 80 else:
80 81 other.append((kind, pat, source))
81 82
82 83 return relmatchers, other
83 84
84 85 def _kindpatsalwaysmatch(kindpats):
85 86 """"Checks whether the kindspats match everything, as e.g.
86 87 'relpath:.' does.
87 88 """
88 89 for kind, pat, source in kindpats:
89 90 if pat != '' or kind not in ['relpath', 'glob']:
90 91 return False
91 92 return True
92 93
93 94 def match(root, cwd, patterns=None, include=None, exclude=None, default='glob',
94 95 exact=False, auditor=None, ctx=None, listsubrepos=False, warn=None,
95 96 badfn=None, icasefs=False):
96 97 """build an object to match a set of file patterns
97 98
98 99 arguments:
99 100 root - the canonical root of the tree you're matching against
100 101 cwd - the current working directory, if relevant
101 102 patterns - patterns to find
102 103 include - patterns to include (unless they are excluded)
103 104 exclude - patterns to exclude (even if they are included)
104 105 default - if a pattern in patterns has no explicit type, assume this one
105 106 exact - patterns are actually filenames (include/exclude still apply)
106 107 warn - optional function used for printing warnings
107 108 badfn - optional bad() callback for this matcher instead of the default
108 109 icasefs - make a matcher for wdir on case insensitive filesystems, which
109 110 normalizes the given patterns to the case in the filesystem
110 111
111 112 a pattern is one of:
112 113 'glob:<glob>' - a glob relative to cwd
113 114 're:<regexp>' - a regular expression
114 115 'path:<path>' - a path relative to repository root, which is matched
115 116 recursively
116 117 'rootfilesin:<path>' - a path relative to repository root, which is
117 118 matched non-recursively (will not match subdirectories)
118 119 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
119 120 'relpath:<path>' - a path relative to cwd
120 121 'relre:<regexp>' - a regexp that needn't match the start of a name
121 122 'set:<fileset>' - a fileset expression
122 123 'include:<path>' - a file of patterns to read and include
123 124 'subinclude:<path>' - a file of patterns to match against files under
124 125 the same directory
125 126 '<something>' - a pattern of the specified default type
126 127 """
127 128 normalize = _donormalize
128 129 if icasefs:
129 130 if exact:
130 131 raise error.ProgrammingError("a case-insensitive exact matcher "
131 132 "doesn't make sense")
132 133 dirstate = ctx.repo().dirstate
133 134 dsnormalize = dirstate.normalize
134 135
135 136 def normalize(patterns, default, root, cwd, auditor, warn):
136 137 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
137 138 kindpats = []
138 139 for kind, pats, source in kp:
139 140 if kind not in ('re', 'relre'): # regex can't be normalized
140 141 p = pats
141 142 pats = dsnormalize(pats)
142 143
143 144 # Preserve the original to handle a case only rename.
144 145 if p != pats and p in dirstate:
145 146 kindpats.append((kind, p, source))
146 147
147 148 kindpats.append((kind, pats, source))
148 149 return kindpats
149 150
150 151 if exact:
151 152 m = exactmatcher(root, cwd, patterns, badfn)
152 153 elif patterns:
153 154 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
154 155 if _kindpatsalwaysmatch(kindpats):
155 156 m = alwaysmatcher(root, cwd, badfn, relativeuipath=True)
156 157 else:
157 158 m = patternmatcher(root, cwd, kindpats, ctx=ctx,
158 159 listsubrepos=listsubrepos, badfn=badfn)
159 160 else:
160 161 # It's a little strange that no patterns means to match everything.
161 162 # Consider changing this to match nothing (probably using nevermatcher).
162 163 m = alwaysmatcher(root, cwd, badfn)
163 164
164 165 if include:
165 166 kindpats = normalize(include, 'glob', root, cwd, auditor, warn)
166 167 im = includematcher(root, cwd, kindpats, ctx=ctx,
167 168 listsubrepos=listsubrepos, badfn=None)
168 169 m = intersectmatchers(m, im)
169 170 if exclude:
170 171 kindpats = normalize(exclude, 'glob', root, cwd, auditor, warn)
171 172 em = includematcher(root, cwd, kindpats, ctx=ctx,
172 173 listsubrepos=listsubrepos, badfn=None)
173 174 m = differencematcher(m, em)
174 175 return m
175 176
176 177 def exact(root, cwd, files, badfn=None):
177 178 return exactmatcher(root, cwd, files, badfn=badfn)
178 179
179 180 def always(root, cwd):
180 181 return alwaysmatcher(root, cwd)
181 182
182 183 def never(root, cwd):
183 184 return nevermatcher(root, cwd)
184 185
185 186 def badmatch(match, badfn):
186 187 """Make a copy of the given matcher, replacing its bad method with the given
187 188 one.
188 189 """
189 190 m = copy.copy(match)
190 191 m.bad = badfn
191 192 return m
192 193
193 194 def _donormalize(patterns, default, root, cwd, auditor, warn):
194 195 '''Convert 'kind:pat' from the patterns list to tuples with kind and
195 196 normalized and rooted patterns and with listfiles expanded.'''
196 197 kindpats = []
197 198 for kind, pat in [_patsplit(p, default) for p in patterns]:
198 199 if kind in cwdrelativepatternkinds:
199 200 pat = pathutil.canonpath(root, cwd, pat, auditor)
200 201 elif kind in ('relglob', 'path', 'rootfilesin'):
201 202 pat = util.normpath(pat)
202 203 elif kind in ('listfile', 'listfile0'):
203 204 try:
204 205 files = util.readfile(pat)
205 206 if kind == 'listfile0':
206 207 files = files.split('\0')
207 208 else:
208 209 files = files.splitlines()
209 210 files = [f for f in files if f]
210 211 except EnvironmentError:
211 212 raise error.Abort(_("unable to read file list (%s)") % pat)
212 213 for k, p, source in _donormalize(files, default, root, cwd,
213 214 auditor, warn):
214 215 kindpats.append((k, p, pat))
215 216 continue
216 217 elif kind == 'include':
217 218 try:
218 219 fullpath = os.path.join(root, util.localpath(pat))
219 220 includepats = readpatternfile(fullpath, warn)
220 221 for k, p, source in _donormalize(includepats, default,
221 222 root, cwd, auditor, warn):
222 223 kindpats.append((k, p, source or pat))
223 224 except error.Abort as inst:
224 225 raise error.Abort('%s: %s' % (pat, inst[0]))
225 226 except IOError as inst:
226 227 if warn:
227 228 warn(_("skipping unreadable pattern file '%s': %s\n") %
228 229 (pat, inst.strerror))
229 230 continue
230 231 # else: re or relre - which cannot be normalized
231 232 kindpats.append((kind, pat, ''))
232 233 return kindpats
233 234
234 235 class basematcher(object):
235 236
236 237 def __init__(self, root, cwd, badfn=None, relativeuipath=True):
237 238 self._root = root
238 239 self._cwd = cwd
239 240 if badfn is not None:
240 241 self.bad = badfn
241 242 self._relativeuipath = relativeuipath
242 243
243 244 def __call__(self, fn):
244 245 return self.matchfn(fn)
245 246 def __iter__(self):
246 247 for f in self._files:
247 248 yield f
248 249 # Callbacks related to how the matcher is used by dirstate.walk.
249 250 # Subscribers to these events must monkeypatch the matcher object.
250 251 def bad(self, f, msg):
251 252 '''Callback from dirstate.walk for each explicit file that can't be
252 253 found/accessed, with an error message.'''
253 254
254 255 # If an explicitdir is set, it will be called when an explicitly listed
255 256 # directory is visited.
256 257 explicitdir = None
257 258
258 259 # If an traversedir is set, it will be called when a directory discovered
259 260 # by recursive traversal is visited.
260 261 traversedir = None
261 262
262 263 def abs(self, f):
263 264 '''Convert a repo path back to path that is relative to the root of the
264 265 matcher.'''
265 266 return f
266 267
267 268 def rel(self, f):
268 269 '''Convert repo path back to path that is relative to cwd of matcher.'''
269 270 return util.pathto(self._root, self._cwd, f)
270 271
271 272 def uipath(self, f):
272 273 '''Convert repo path to a display path. If patterns or -I/-X were used
273 274 to create this matcher, the display path will be relative to cwd.
274 275 Otherwise it is relative to the root of the repo.'''
275 276 return (self._relativeuipath and self.rel(f)) or self.abs(f)
276 277
277 278 @propertycache
278 279 def _files(self):
279 280 return []
280 281
281 282 def files(self):
282 283 '''Explicitly listed files or patterns or roots:
283 284 if no patterns or .always(): empty list,
284 285 if exact: list exact files,
285 286 if not .anypats(): list all files and dirs,
286 287 else: optimal roots'''
287 288 return self._files
288 289
289 290 @propertycache
290 291 def _fileset(self):
291 292 return set(self._files)
292 293
293 294 def exact(self, f):
294 295 '''Returns True if f is in .files().'''
295 296 return f in self._fileset
296 297
297 298 def matchfn(self, f):
298 299 return False
299 300
300 301 def visitdir(self, dir):
301 302 '''Decides whether a directory should be visited based on whether it
302 303 has potential matches in it or one of its subdirectories. This is
303 304 based on the match's primary, included, and excluded patterns.
304 305
305 306 Returns the string 'all' if the given directory and all subdirectories
306 307 should be visited. Otherwise returns True or False indicating whether
307 308 the given directory should be visited.
308 309 '''
309 310 return True
310 311
311 312 def always(self):
312 313 '''Matcher will match everything and .files() will be empty --
313 314 optimization might be possible.'''
314 315 return False
315 316
316 317 def isexact(self):
317 318 '''Matcher will match exactly the list of files in .files() --
318 319 optimization might be possible.'''
319 320 return False
320 321
321 322 def prefix(self):
322 323 '''Matcher will match the paths in .files() recursively --
323 324 optimization might be possible.'''
324 325 return False
325 326
326 327 def anypats(self):
327 328 '''None of .always(), .isexact(), and .prefix() is true --
328 329 optimizations will be difficult.'''
329 330 return not self.always() and not self.isexact() and not self.prefix()
330 331
331 332 class alwaysmatcher(basematcher):
332 333 '''Matches everything.'''
333 334
334 335 def __init__(self, root, cwd, badfn=None, relativeuipath=False):
335 336 super(alwaysmatcher, self).__init__(root, cwd, badfn,
336 337 relativeuipath=relativeuipath)
337 338
338 339 def always(self):
339 340 return True
340 341
341 342 def matchfn(self, f):
342 343 return True
343 344
344 345 def visitdir(self, dir):
345 346 return 'all'
346 347
347 348 def __repr__(self):
348 return '<alwaysmatcher>'
349 return r'<alwaysmatcher>'
349 350
350 351 class nevermatcher(basematcher):
351 352 '''Matches nothing.'''
352 353
353 354 def __init__(self, root, cwd, badfn=None):
354 355 super(nevermatcher, self).__init__(root, cwd, badfn)
355 356
356 357 # It's a little weird to say that the nevermatcher is an exact matcher
357 358 # or a prefix matcher, but it seems to make sense to let callers take
358 359 # fast paths based on either. There will be no exact matches, nor any
359 360 # prefixes (files() returns []), so fast paths iterating over them should
360 361 # be efficient (and correct).
361 362 def isexact(self):
362 363 return True
363 364
364 365 def prefix(self):
365 366 return True
366 367
367 368 def visitdir(self, dir):
368 369 return False
369 370
370 371 def __repr__(self):
371 return '<nevermatcher>'
372 return r'<nevermatcher>'
372 373
373 374 class patternmatcher(basematcher):
374 375
375 376 def __init__(self, root, cwd, kindpats, ctx=None, listsubrepos=False,
376 377 badfn=None):
377 378 super(patternmatcher, self).__init__(root, cwd, badfn)
378 379
379 380 self._files = _explicitfiles(kindpats)
380 381 self._prefix = _prefix(kindpats)
381 382 self._pats, self.matchfn = _buildmatch(ctx, kindpats, '$', listsubrepos,
382 383 root)
383 384
384 385 @propertycache
385 386 def _dirs(self):
386 387 return set(util.dirs(self._fileset)) | {'.'}
387 388
388 389 def visitdir(self, dir):
389 390 if self._prefix and dir in self._fileset:
390 391 return 'all'
391 392 return ('.' in self._fileset or
392 393 dir in self._fileset or
393 394 dir in self._dirs or
394 395 any(parentdir in self._fileset
395 396 for parentdir in util.finddirs(dir)))
396 397
397 398 def prefix(self):
398 399 return self._prefix
399 400
401 @encoding.strmethod
400 402 def __repr__(self):
401 403 return ('<patternmatcher patterns=%r>' % self._pats)
402 404
403 405 class includematcher(basematcher):
404 406
405 407 def __init__(self, root, cwd, kindpats, ctx=None, listsubrepos=False,
406 408 badfn=None):
407 409 super(includematcher, self).__init__(root, cwd, badfn)
408 410
409 411 self._pats, self.matchfn = _buildmatch(ctx, kindpats, '(?:/|$)',
410 412 listsubrepos, root)
411 413 self._prefix = _prefix(kindpats)
412 414 roots, dirs = _rootsanddirs(kindpats)
413 415 # roots are directories which are recursively included.
414 416 self._roots = set(roots)
415 417 # dirs are directories which are non-recursively included.
416 418 self._dirs = set(dirs)
417 419
418 420 def visitdir(self, dir):
419 421 if self._prefix and dir in self._roots:
420 422 return 'all'
421 423 return ('.' in self._roots or
422 424 dir in self._roots or
423 425 dir in self._dirs or
424 426 any(parentdir in self._roots
425 427 for parentdir in util.finddirs(dir)))
426 428
429 @encoding.strmethod
427 430 def __repr__(self):
428 431 return ('<includematcher includes=%r>' % self._pats)
429 432
430 433 class exactmatcher(basematcher):
431 434 '''Matches the input files exactly. They are interpreted as paths, not
432 435 patterns (so no kind-prefixes).
433 436 '''
434 437
435 438 def __init__(self, root, cwd, files, badfn=None):
436 439 super(exactmatcher, self).__init__(root, cwd, badfn)
437 440
438 441 if isinstance(files, list):
439 442 self._files = files
440 443 else:
441 444 self._files = list(files)
442 445
443 446 matchfn = basematcher.exact
444 447
445 448 @propertycache
446 449 def _dirs(self):
447 450 return set(util.dirs(self._fileset)) | {'.'}
448 451
449 452 def visitdir(self, dir):
450 453 return dir in self._dirs
451 454
452 455 def isexact(self):
453 456 return True
454 457
458 @encoding.strmethod
455 459 def __repr__(self):
456 460 return ('<exactmatcher files=%r>' % self._files)
457 461
458 462 class differencematcher(basematcher):
459 463 '''Composes two matchers by matching if the first matches and the second
460 464 does not.
461 465
462 466 The second matcher's non-matching-attributes (root, cwd, bad, explicitdir,
463 467 traversedir) are ignored.
464 468 '''
465 469 def __init__(self, m1, m2):
466 470 super(differencematcher, self).__init__(m1._root, m1._cwd)
467 471 self._m1 = m1
468 472 self._m2 = m2
469 473 self.bad = m1.bad
470 474 self.explicitdir = m1.explicitdir
471 475 self.traversedir = m1.traversedir
472 476
473 477 def matchfn(self, f):
474 478 return self._m1(f) and not self._m2(f)
475 479
476 480 @propertycache
477 481 def _files(self):
478 482 if self.isexact():
479 483 return [f for f in self._m1.files() if self(f)]
480 484 # If m1 is not an exact matcher, we can't easily figure out the set of
481 485 # files, because its files() are not always files. For example, if
482 486 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
483 487 # want to remove "dir" from the set even though it would match m2,
484 488 # because the "dir" in m1 may not be a file.
485 489 return self._m1.files()
486 490
487 491 def visitdir(self, dir):
488 492 if self._m2.visitdir(dir) == 'all':
489 493 return False
490 494 return bool(self._m1.visitdir(dir))
491 495
492 496 def isexact(self):
493 497 return self._m1.isexact()
494 498
499 @encoding.strmethod
495 500 def __repr__(self):
496 501 return ('<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2))
497 502
498 503 def intersectmatchers(m1, m2):
499 504 '''Composes two matchers by matching if both of them match.
500 505
501 506 The second matcher's non-matching-attributes (root, cwd, bad, explicitdir,
502 507 traversedir) are ignored.
503 508 '''
504 509 if m1 is None or m2 is None:
505 510 return m1 or m2
506 511 if m1.always():
507 512 m = copy.copy(m2)
508 513 # TODO: Consider encapsulating these things in a class so there's only
509 514 # one thing to copy from m1.
510 515 m.bad = m1.bad
511 516 m.explicitdir = m1.explicitdir
512 517 m.traversedir = m1.traversedir
513 518 m.abs = m1.abs
514 519 m.rel = m1.rel
515 520 m._relativeuipath |= m1._relativeuipath
516 521 return m
517 522 if m2.always():
518 523 m = copy.copy(m1)
519 524 m._relativeuipath |= m2._relativeuipath
520 525 return m
521 526 return intersectionmatcher(m1, m2)
522 527
523 528 class intersectionmatcher(basematcher):
524 529 def __init__(self, m1, m2):
525 530 super(intersectionmatcher, self).__init__(m1._root, m1._cwd)
526 531 self._m1 = m1
527 532 self._m2 = m2
528 533 self.bad = m1.bad
529 534 self.explicitdir = m1.explicitdir
530 535 self.traversedir = m1.traversedir
531 536
532 537 @propertycache
533 538 def _files(self):
534 539 if self.isexact():
535 540 m1, m2 = self._m1, self._m2
536 541 if not m1.isexact():
537 542 m1, m2 = m2, m1
538 543 return [f for f in m1.files() if m2(f)]
539 544 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
540 545 # the set of files, because their files() are not always files. For
541 546 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
542 547 # "path:dir2", we don't want to remove "dir2" from the set.
543 548 return self._m1.files() + self._m2.files()
544 549
545 550 def matchfn(self, f):
546 551 return self._m1(f) and self._m2(f)
547 552
548 553 def visitdir(self, dir):
549 554 visit1 = self._m1.visitdir(dir)
550 555 if visit1 == 'all':
551 556 return self._m2.visitdir(dir)
552 557 # bool() because visit1=True + visit2='all' should not be 'all'
553 558 return bool(visit1 and self._m2.visitdir(dir))
554 559
555 560 def always(self):
556 561 return self._m1.always() and self._m2.always()
557 562
558 563 def isexact(self):
559 564 return self._m1.isexact() or self._m2.isexact()
560 565
566 @encoding.strmethod
561 567 def __repr__(self):
562 568 return ('<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2))
563 569
564 570 class subdirmatcher(basematcher):
565 571 """Adapt a matcher to work on a subdirectory only.
566 572
567 573 The paths are remapped to remove/insert the path as needed:
568 574
569 575 >>> from . import pycompat
570 576 >>> m1 = match(b'root', b'', [b'a.txt', b'sub/b.txt'])
571 577 >>> m2 = subdirmatcher(b'sub', m1)
572 578 >>> bool(m2(b'a.txt'))
573 579 False
574 580 >>> bool(m2(b'b.txt'))
575 581 True
576 582 >>> bool(m2.matchfn(b'a.txt'))
577 583 False
578 584 >>> bool(m2.matchfn(b'b.txt'))
579 585 True
580 586 >>> m2.files()
581 587 ['b.txt']
582 588 >>> m2.exact(b'b.txt')
583 589 True
584 590 >>> util.pconvert(m2.rel(b'b.txt'))
585 591 'sub/b.txt'
586 592 >>> def bad(f, msg):
587 593 ... print(pycompat.sysstr(b"%s: %s" % (f, msg)))
588 594 >>> m1.bad = bad
589 595 >>> m2.bad(b'x.txt', b'No such file')
590 596 sub/x.txt: No such file
591 597 >>> m2.abs(b'c.txt')
592 598 'sub/c.txt'
593 599 """
594 600
595 601 def __init__(self, path, matcher):
596 602 super(subdirmatcher, self).__init__(matcher._root, matcher._cwd)
597 603 self._path = path
598 604 self._matcher = matcher
599 605 self._always = matcher.always()
600 606
601 607 self._files = [f[len(path) + 1:] for f in matcher._files
602 608 if f.startswith(path + "/")]
603 609
604 610 # If the parent repo had a path to this subrepo and the matcher is
605 611 # a prefix matcher, this submatcher always matches.
606 612 if matcher.prefix():
607 613 self._always = any(f == path for f in matcher._files)
608 614
609 615 def bad(self, f, msg):
610 616 self._matcher.bad(self._path + "/" + f, msg)
611 617
612 618 def abs(self, f):
613 619 return self._matcher.abs(self._path + "/" + f)
614 620
615 621 def rel(self, f):
616 622 return self._matcher.rel(self._path + "/" + f)
617 623
618 624 def uipath(self, f):
619 625 return self._matcher.uipath(self._path + "/" + f)
620 626
621 627 def matchfn(self, f):
622 628 # Some information is lost in the superclass's constructor, so we
623 629 # can not accurately create the matching function for the subdirectory
624 630 # from the inputs. Instead, we override matchfn() and visitdir() to
625 631 # call the original matcher with the subdirectory path prepended.
626 632 return self._matcher.matchfn(self._path + "/" + f)
627 633
628 634 def visitdir(self, dir):
629 635 if dir == '.':
630 636 dir = self._path
631 637 else:
632 638 dir = self._path + "/" + dir
633 639 return self._matcher.visitdir(dir)
634 640
635 641 def always(self):
636 642 return self._always
637 643
638 644 def prefix(self):
639 645 return self._matcher.prefix() and not self._always
640 646
647 @encoding.strmethod
641 648 def __repr__(self):
642 649 return ('<subdirmatcher path=%r, matcher=%r>' %
643 650 (self._path, self._matcher))
644 651
645 652 class unionmatcher(basematcher):
646 653 """A matcher that is the union of several matchers.
647 654
648 655 The non-matching-attributes (root, cwd, bad, explicitdir, traversedir) are
649 656 taken from the first matcher.
650 657 """
651 658
652 659 def __init__(self, matchers):
653 660 m1 = matchers[0]
654 661 super(unionmatcher, self).__init__(m1._root, m1._cwd)
655 662 self.explicitdir = m1.explicitdir
656 663 self.traversedir = m1.traversedir
657 664 self._matchers = matchers
658 665
659 666 def matchfn(self, f):
660 667 for match in self._matchers:
661 668 if match(f):
662 669 return True
663 670 return False
664 671
665 672 def visitdir(self, dir):
666 673 r = False
667 674 for m in self._matchers:
668 675 v = m.visitdir(dir)
669 676 if v == 'all':
670 677 return v
671 678 r |= v
672 679 return r
673 680
681 @encoding.strmethod
674 682 def __repr__(self):
675 683 return ('<unionmatcher matchers=%r>' % self._matchers)
676 684
677 685 def patkind(pattern, default=None):
678 686 '''If pattern is 'kind:pat' with a known kind, return kind.'''
679 687 return _patsplit(pattern, default)[0]
680 688
681 689 def _patsplit(pattern, default):
682 690 """Split a string into the optional pattern kind prefix and the actual
683 691 pattern."""
684 692 if ':' in pattern:
685 693 kind, pat = pattern.split(':', 1)
686 694 if kind in allpatternkinds:
687 695 return kind, pat
688 696 return default, pattern
689 697
690 698 def _globre(pat):
691 699 r'''Convert an extended glob string to a regexp string.
692 700
693 701 >>> from . import pycompat
694 702 >>> def bprint(s):
695 703 ... print(pycompat.sysstr(s))
696 704 >>> bprint(_globre(br'?'))
697 705 .
698 706 >>> bprint(_globre(br'*'))
699 707 [^/]*
700 708 >>> bprint(_globre(br'**'))
701 709 .*
702 710 >>> bprint(_globre(br'**/a'))
703 711 (?:.*/)?a
704 712 >>> bprint(_globre(br'a/**/b'))
705 713 a\/(?:.*/)?b
706 714 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
707 715 [a*?!^][\^b][^c]
708 716 >>> bprint(_globre(br'{a,b}'))
709 717 (?:a|b)
710 718 >>> bprint(_globre(br'.\*\?'))
711 719 \.\*\?
712 720 '''
713 721 i, n = 0, len(pat)
714 722 res = ''
715 723 group = 0
716 724 escape = util.re.escape
717 725 def peek():
718 726 return i < n and pat[i:i + 1]
719 727 while i < n:
720 728 c = pat[i:i + 1]
721 729 i += 1
722 730 if c not in '*?[{},\\':
723 731 res += escape(c)
724 732 elif c == '*':
725 733 if peek() == '*':
726 734 i += 1
727 735 if peek() == '/':
728 736 i += 1
729 737 res += '(?:.*/)?'
730 738 else:
731 739 res += '.*'
732 740 else:
733 741 res += '[^/]*'
734 742 elif c == '?':
735 743 res += '.'
736 744 elif c == '[':
737 745 j = i
738 746 if j < n and pat[j:j + 1] in '!]':
739 747 j += 1
740 748 while j < n and pat[j:j + 1] != ']':
741 749 j += 1
742 750 if j >= n:
743 751 res += '\\['
744 752 else:
745 753 stuff = pat[i:j].replace('\\','\\\\')
746 754 i = j + 1
747 755 if stuff[0:1] == '!':
748 756 stuff = '^' + stuff[1:]
749 757 elif stuff[0:1] == '^':
750 758 stuff = '\\' + stuff
751 759 res = '%s[%s]' % (res, stuff)
752 760 elif c == '{':
753 761 group += 1
754 762 res += '(?:'
755 763 elif c == '}' and group:
756 764 res += ')'
757 765 group -= 1
758 766 elif c == ',' and group:
759 767 res += '|'
760 768 elif c == '\\':
761 769 p = peek()
762 770 if p:
763 771 i += 1
764 772 res += escape(p)
765 773 else:
766 774 res += escape(c)
767 775 else:
768 776 res += escape(c)
769 777 return res
770 778
771 779 def _regex(kind, pat, globsuffix):
772 780 '''Convert a (normalized) pattern of any kind into a regular expression.
773 781 globsuffix is appended to the regexp of globs.'''
774 782 if not pat:
775 783 return ''
776 784 if kind == 're':
777 785 return pat
778 786 if kind in ('path', 'relpath'):
779 787 if pat == '.':
780 788 return ''
781 789 return util.re.escape(pat) + '(?:/|$)'
782 790 if kind == 'rootfilesin':
783 791 if pat == '.':
784 792 escaped = ''
785 793 else:
786 794 # Pattern is a directory name.
787 795 escaped = util.re.escape(pat) + '/'
788 796 # Anything after the pattern must be a non-directory.
789 797 return escaped + '[^/]+$'
790 798 if kind == 'relglob':
791 799 return '(?:|.*/)' + _globre(pat) + globsuffix
792 800 if kind == 'relre':
793 801 if pat.startswith('^'):
794 802 return pat
795 803 return '.*' + pat
796 804 return _globre(pat) + globsuffix
797 805
798 806 def _buildmatch(ctx, kindpats, globsuffix, listsubrepos, root):
799 807 '''Return regexp string and a matcher function for kindpats.
800 808 globsuffix is appended to the regexp of globs.'''
801 809 matchfuncs = []
802 810
803 811 subincludes, kindpats = _expandsubinclude(kindpats, root)
804 812 if subincludes:
805 813 submatchers = {}
806 814 def matchsubinclude(f):
807 815 for prefix, matcherargs in subincludes:
808 816 if f.startswith(prefix):
809 817 mf = submatchers.get(prefix)
810 818 if mf is None:
811 819 mf = match(*matcherargs)
812 820 submatchers[prefix] = mf
813 821
814 822 if mf(f[len(prefix):]):
815 823 return True
816 824 return False
817 825 matchfuncs.append(matchsubinclude)
818 826
819 827 fset, kindpats = _expandsets(kindpats, ctx, listsubrepos)
820 828 if fset:
821 829 matchfuncs.append(fset.__contains__)
822 830
823 831 regex = ''
824 832 if kindpats:
825 833 regex, mf = _buildregexmatch(kindpats, globsuffix)
826 834 matchfuncs.append(mf)
827 835
828 836 if len(matchfuncs) == 1:
829 837 return regex, matchfuncs[0]
830 838 else:
831 839 return regex, lambda f: any(mf(f) for mf in matchfuncs)
832 840
833 841 def _buildregexmatch(kindpats, globsuffix):
834 842 """Build a match function from a list of kinds and kindpats,
835 843 return regexp string and a matcher function."""
836 844 try:
837 845 regex = '(?:%s)' % '|'.join([_regex(k, p, globsuffix)
838 846 for (k, p, s) in kindpats])
839 847 if len(regex) > 20000:
840 848 raise OverflowError
841 849 return regex, _rematcher(regex)
842 850 except OverflowError:
843 851 # We're using a Python with a tiny regex engine and we
844 852 # made it explode, so we'll divide the pattern list in two
845 853 # until it works
846 854 l = len(kindpats)
847 855 if l < 2:
848 856 raise
849 857 regexa, a = _buildregexmatch(kindpats[:l//2], globsuffix)
850 858 regexb, b = _buildregexmatch(kindpats[l//2:], globsuffix)
851 859 return regex, lambda s: a(s) or b(s)
852 860 except re.error:
853 861 for k, p, s in kindpats:
854 862 try:
855 863 _rematcher('(?:%s)' % _regex(k, p, globsuffix))
856 864 except re.error:
857 865 if s:
858 866 raise error.Abort(_("%s: invalid pattern (%s): %s") %
859 867 (s, k, p))
860 868 else:
861 869 raise error.Abort(_("invalid pattern (%s): %s") % (k, p))
862 870 raise error.Abort(_("invalid pattern"))
863 871
864 872 def _patternrootsanddirs(kindpats):
865 873 '''Returns roots and directories corresponding to each pattern.
866 874
867 875 This calculates the roots and directories exactly matching the patterns and
868 876 returns a tuple of (roots, dirs) for each. It does not return other
869 877 directories which may also need to be considered, like the parent
870 878 directories.
871 879 '''
872 880 r = []
873 881 d = []
874 882 for kind, pat, source in kindpats:
875 883 if kind == 'glob': # find the non-glob prefix
876 884 root = []
877 885 for p in pat.split('/'):
878 886 if '[' in p or '{' in p or '*' in p or '?' in p:
879 887 break
880 888 root.append(p)
881 889 r.append('/'.join(root) or '.')
882 890 elif kind in ('relpath', 'path'):
883 891 r.append(pat or '.')
884 892 elif kind in ('rootfilesin',):
885 893 d.append(pat or '.')
886 894 else: # relglob, re, relre
887 895 r.append('.')
888 896 return r, d
889 897
890 898 def _roots(kindpats):
891 899 '''Returns root directories to match recursively from the given patterns.'''
892 900 roots, dirs = _patternrootsanddirs(kindpats)
893 901 return roots
894 902
895 903 def _rootsanddirs(kindpats):
896 904 '''Returns roots and exact directories from patterns.
897 905
898 906 roots are directories to match recursively, whereas exact directories should
899 907 be matched non-recursively. The returned (roots, dirs) tuple will also
900 908 include directories that need to be implicitly considered as either, such as
901 909 parent directories.
902 910
903 911 >>> _rootsanddirs(
904 912 ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
905 913 ... (b'glob', b'g*', b'')])
906 914 (['g/h', 'g/h', '.'], ['g', '.'])
907 915 >>> _rootsanddirs(
908 916 ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
909 917 ([], ['g/h', '.', 'g', '.'])
910 918 >>> _rootsanddirs(
911 919 ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
912 920 ... (b'path', b'', b'')])
913 921 (['r', 'p/p', '.'], ['p', '.'])
914 922 >>> _rootsanddirs(
915 923 ... [(b'relglob', b'rg*', b''), (b're', b're/', b''),
916 924 ... (b'relre', b'rr', b'')])
917 925 (['.', '.', '.'], ['.'])
918 926 '''
919 927 r, d = _patternrootsanddirs(kindpats)
920 928
921 929 # Append the parents as non-recursive/exact directories, since they must be
922 930 # scanned to get to either the roots or the other exact directories.
923 931 d.extend(util.dirs(d))
924 932 d.extend(util.dirs(r))
925 933 # util.dirs() does not include the root directory, so add it manually
926 934 d.append('.')
927 935
928 936 return r, d
929 937
930 938 def _explicitfiles(kindpats):
931 939 '''Returns the potential explicit filenames from the patterns.
932 940
933 941 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
934 942 ['foo/bar']
935 943 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
936 944 []
937 945 '''
938 946 # Keep only the pattern kinds where one can specify filenames (vs only
939 947 # directory names).
940 948 filable = [kp for kp in kindpats if kp[0] not in ('rootfilesin',)]
941 949 return _roots(filable)
942 950
943 951 def _prefix(kindpats):
944 952 '''Whether all the patterns match a prefix (i.e. recursively)'''
945 953 for kind, pat, source in kindpats:
946 954 if kind not in ('path', 'relpath'):
947 955 return False
948 956 return True
949 957
950 958 _commentre = None
951 959
952 960 def readpatternfile(filepath, warn, sourceinfo=False):
953 961 '''parse a pattern file, returning a list of
954 962 patterns. These patterns should be given to compile()
955 963 to be validated and converted into a match function.
956 964
957 965 trailing white space is dropped.
958 966 the escape character is backslash.
959 967 comments start with #.
960 968 empty lines are skipped.
961 969
962 970 lines can be of the following formats:
963 971
964 972 syntax: regexp # defaults following lines to non-rooted regexps
965 973 syntax: glob # defaults following lines to non-rooted globs
966 974 re:pattern # non-rooted regular expression
967 975 glob:pattern # non-rooted glob
968 976 pattern # pattern of the current default type
969 977
970 978 if sourceinfo is set, returns a list of tuples:
971 979 (pattern, lineno, originalline). This is useful to debug ignore patterns.
972 980 '''
973 981
974 982 syntaxes = {'re': 'relre:', 'regexp': 'relre:', 'glob': 'relglob:',
975 983 'include': 'include', 'subinclude': 'subinclude'}
976 984 syntax = 'relre:'
977 985 patterns = []
978 986
979 987 fp = open(filepath, 'rb')
980 988 for lineno, line in enumerate(util.iterfile(fp), start=1):
981 989 if "#" in line:
982 990 global _commentre
983 991 if not _commentre:
984 992 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
985 993 # remove comments prefixed by an even number of escapes
986 994 m = _commentre.search(line)
987 995 if m:
988 996 line = line[:m.end(1)]
989 997 # fixup properly escaped comments that survived the above
990 998 line = line.replace("\\#", "#")
991 999 line = line.rstrip()
992 1000 if not line:
993 1001 continue
994 1002
995 1003 if line.startswith('syntax:'):
996 1004 s = line[7:].strip()
997 1005 try:
998 1006 syntax = syntaxes[s]
999 1007 except KeyError:
1000 1008 if warn:
1001 1009 warn(_("%s: ignoring invalid syntax '%s'\n") %
1002 1010 (filepath, s))
1003 1011 continue
1004 1012
1005 1013 linesyntax = syntax
1006 1014 for s, rels in syntaxes.iteritems():
1007 1015 if line.startswith(rels):
1008 1016 linesyntax = rels
1009 1017 line = line[len(rels):]
1010 1018 break
1011 1019 elif line.startswith(s+':'):
1012 1020 linesyntax = rels
1013 1021 line = line[len(s) + 1:]
1014 1022 break
1015 1023 if sourceinfo:
1016 1024 patterns.append((linesyntax + line, lineno, line))
1017 1025 else:
1018 1026 patterns.append(linesyntax + line)
1019 1027 fp.close()
1020 1028 return patterns
General Comments 0
You need to be logged in to leave comments. Login now