##// END OF EJS Templates
match: remove superfluous pass statements
Augie Fackler -
r34377:acabbc5c default
parent child Browse files
Show More
@@ -1,1034 +1,1033
1 1 # match.py - filename matching
2 2 #
3 3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import, print_function
9 9
10 10 import copy
11 11 import os
12 12 import re
13 13
14 14 from .i18n import _
15 15 from . import (
16 16 error,
17 17 pathutil,
18 18 util,
19 19 )
20 20
21 21 allpatternkinds = ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
22 22 'listfile', 'listfile0', 'set', 'include', 'subinclude',
23 23 'rootfilesin')
24 24 cwdrelativepatternkinds = ('relpath', 'glob')
25 25
26 26 propertycache = util.propertycache
27 27
28 28 def _rematcher(regex):
29 29 '''compile the regexp with the best available regexp engine and return a
30 30 matcher function'''
31 31 m = util.re.compile(regex)
32 32 try:
33 33 # slightly faster, provided by facebook's re2 bindings
34 34 return m.test_match
35 35 except AttributeError:
36 36 return m.match
37 37
38 38 def _expandsets(kindpats, ctx, listsubrepos):
39 39 '''Returns the kindpats list with the 'set' patterns expanded.'''
40 40 fset = set()
41 41 other = []
42 42
43 43 for kind, pat, source in kindpats:
44 44 if kind == 'set':
45 45 if not ctx:
46 46 raise error.ProgrammingError("fileset expression with no "
47 47 "context")
48 48 s = ctx.getfileset(pat)
49 49 fset.update(s)
50 50
51 51 if listsubrepos:
52 52 for subpath in ctx.substate:
53 53 s = ctx.sub(subpath).getfileset(pat)
54 54 fset.update(subpath + '/' + f for f in s)
55 55
56 56 continue
57 57 other.append((kind, pat, source))
58 58 return fset, other
59 59
60 60 def _expandsubinclude(kindpats, root):
61 61 '''Returns the list of subinclude matcher args and the kindpats without the
62 62 subincludes in it.'''
63 63 relmatchers = []
64 64 other = []
65 65
66 66 for kind, pat, source in kindpats:
67 67 if kind == 'subinclude':
68 68 sourceroot = pathutil.dirname(util.normpath(source))
69 69 pat = util.pconvert(pat)
70 70 path = pathutil.join(sourceroot, pat)
71 71
72 72 newroot = pathutil.dirname(path)
73 73 matcherargs = (newroot, '', [], ['include:%s' % path])
74 74
75 75 prefix = pathutil.canonpath(root, root, newroot)
76 76 if prefix:
77 77 prefix += '/'
78 78 relmatchers.append((prefix, matcherargs))
79 79 else:
80 80 other.append((kind, pat, source))
81 81
82 82 return relmatchers, other
83 83
84 84 def _kindpatsalwaysmatch(kindpats):
85 85 """"Checks whether the kindspats match everything, as e.g.
86 86 'relpath:.' does.
87 87 """
88 88 for kind, pat, source in kindpats:
89 89 if pat != '' or kind not in ['relpath', 'glob']:
90 90 return False
91 91 return True
92 92
93 93 def match(root, cwd, patterns=None, include=None, exclude=None, default='glob',
94 94 exact=False, auditor=None, ctx=None, listsubrepos=False, warn=None,
95 95 badfn=None, icasefs=False):
96 96 """build an object to match a set of file patterns
97 97
98 98 arguments:
99 99 root - the canonical root of the tree you're matching against
100 100 cwd - the current working directory, if relevant
101 101 patterns - patterns to find
102 102 include - patterns to include (unless they are excluded)
103 103 exclude - patterns to exclude (even if they are included)
104 104 default - if a pattern in patterns has no explicit type, assume this one
105 105 exact - patterns are actually filenames (include/exclude still apply)
106 106 warn - optional function used for printing warnings
107 107 badfn - optional bad() callback for this matcher instead of the default
108 108 icasefs - make a matcher for wdir on case insensitive filesystems, which
109 109 normalizes the given patterns to the case in the filesystem
110 110
111 111 a pattern is one of:
112 112 'glob:<glob>' - a glob relative to cwd
113 113 're:<regexp>' - a regular expression
114 114 'path:<path>' - a path relative to repository root, which is matched
115 115 recursively
116 116 'rootfilesin:<path>' - a path relative to repository root, which is
117 117 matched non-recursively (will not match subdirectories)
118 118 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
119 119 'relpath:<path>' - a path relative to cwd
120 120 'relre:<regexp>' - a regexp that needn't match the start of a name
121 121 'set:<fileset>' - a fileset expression
122 122 'include:<path>' - a file of patterns to read and include
123 123 'subinclude:<path>' - a file of patterns to match against files under
124 124 the same directory
125 125 '<something>' - a pattern of the specified default type
126 126 """
127 127 normalize = _donormalize
128 128 if icasefs:
129 129 if exact:
130 130 raise error.ProgrammingError("a case-insensitive exact matcher "
131 131 "doesn't make sense")
132 132 dirstate = ctx.repo().dirstate
133 133 dsnormalize = dirstate.normalize
134 134
135 135 def normalize(patterns, default, root, cwd, auditor, warn):
136 136 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
137 137 kindpats = []
138 138 for kind, pats, source in kp:
139 139 if kind not in ('re', 'relre'): # regex can't be normalized
140 140 p = pats
141 141 pats = dsnormalize(pats)
142 142
143 143 # Preserve the original to handle a case only rename.
144 144 if p != pats and p in dirstate:
145 145 kindpats.append((kind, p, source))
146 146
147 147 kindpats.append((kind, pats, source))
148 148 return kindpats
149 149
150 150 if exact:
151 151 m = exactmatcher(root, cwd, patterns, badfn)
152 152 elif patterns:
153 153 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
154 154 if _kindpatsalwaysmatch(kindpats):
155 155 m = alwaysmatcher(root, cwd, badfn, relativeuipath=True)
156 156 else:
157 157 m = patternmatcher(root, cwd, kindpats, ctx=ctx,
158 158 listsubrepos=listsubrepos, badfn=badfn)
159 159 else:
160 160 # It's a little strange that no patterns means to match everything.
161 161 # Consider changing this to match nothing (probably using nevermatcher).
162 162 m = alwaysmatcher(root, cwd, badfn)
163 163
164 164 if include:
165 165 kindpats = normalize(include, 'glob', root, cwd, auditor, warn)
166 166 im = includematcher(root, cwd, kindpats, ctx=ctx,
167 167 listsubrepos=listsubrepos, badfn=None)
168 168 m = intersectmatchers(m, im)
169 169 if exclude:
170 170 kindpats = normalize(exclude, 'glob', root, cwd, auditor, warn)
171 171 em = includematcher(root, cwd, kindpats, ctx=ctx,
172 172 listsubrepos=listsubrepos, badfn=None)
173 173 m = differencematcher(m, em)
174 174 return m
175 175
176 176 def exact(root, cwd, files, badfn=None):
177 177 return exactmatcher(root, cwd, files, badfn=badfn)
178 178
179 179 def always(root, cwd):
180 180 return alwaysmatcher(root, cwd)
181 181
182 182 def never(root, cwd):
183 183 return nevermatcher(root, cwd)
184 184
185 185 def badmatch(match, badfn):
186 186 """Make a copy of the given matcher, replacing its bad method with the given
187 187 one.
188 188 """
189 189 m = copy.copy(match)
190 190 m.bad = badfn
191 191 return m
192 192
193 193 def _donormalize(patterns, default, root, cwd, auditor, warn):
194 194 '''Convert 'kind:pat' from the patterns list to tuples with kind and
195 195 normalized and rooted patterns and with listfiles expanded.'''
196 196 kindpats = []
197 197 for kind, pat in [_patsplit(p, default) for p in patterns]:
198 198 if kind in cwdrelativepatternkinds:
199 199 pat = pathutil.canonpath(root, cwd, pat, auditor)
200 200 elif kind in ('relglob', 'path', 'rootfilesin'):
201 201 pat = util.normpath(pat)
202 202 elif kind in ('listfile', 'listfile0'):
203 203 try:
204 204 files = util.readfile(pat)
205 205 if kind == 'listfile0':
206 206 files = files.split('\0')
207 207 else:
208 208 files = files.splitlines()
209 209 files = [f for f in files if f]
210 210 except EnvironmentError:
211 211 raise error.Abort(_("unable to read file list (%s)") % pat)
212 212 for k, p, source in _donormalize(files, default, root, cwd,
213 213 auditor, warn):
214 214 kindpats.append((k, p, pat))
215 215 continue
216 216 elif kind == 'include':
217 217 try:
218 218 fullpath = os.path.join(root, util.localpath(pat))
219 219 includepats = readpatternfile(fullpath, warn)
220 220 for k, p, source in _donormalize(includepats, default,
221 221 root, cwd, auditor, warn):
222 222 kindpats.append((k, p, source or pat))
223 223 except error.Abort as inst:
224 224 raise error.Abort('%s: %s' % (pat, inst[0]))
225 225 except IOError as inst:
226 226 if warn:
227 227 warn(_("skipping unreadable pattern file '%s': %s\n") %
228 228 (pat, inst.strerror))
229 229 continue
230 230 # else: re or relre - which cannot be normalized
231 231 kindpats.append((kind, pat, ''))
232 232 return kindpats
233 233
234 234 class basematcher(object):
235 235
236 236 def __init__(self, root, cwd, badfn=None, relativeuipath=True):
237 237 self._root = root
238 238 self._cwd = cwd
239 239 if badfn is not None:
240 240 self.bad = badfn
241 241 self._relativeuipath = relativeuipath
242 242
243 243 def __call__(self, fn):
244 244 return self.matchfn(fn)
245 245 def __iter__(self):
246 246 for f in self._files:
247 247 yield f
248 248 # Callbacks related to how the matcher is used by dirstate.walk.
249 249 # Subscribers to these events must monkeypatch the matcher object.
250 250 def bad(self, f, msg):
251 251 '''Callback from dirstate.walk for each explicit file that can't be
252 252 found/accessed, with an error message.'''
253 pass
254 253
255 254 # If an explicitdir is set, it will be called when an explicitly listed
256 255 # directory is visited.
257 256 explicitdir = None
258 257
259 258 # If an traversedir is set, it will be called when a directory discovered
260 259 # by recursive traversal is visited.
261 260 traversedir = None
262 261
263 262 def abs(self, f):
264 263 '''Convert a repo path back to path that is relative to the root of the
265 264 matcher.'''
266 265 return f
267 266
268 267 def rel(self, f):
269 268 '''Convert repo path back to path that is relative to cwd of matcher.'''
270 269 return util.pathto(self._root, self._cwd, f)
271 270
272 271 def uipath(self, f):
273 272 '''Convert repo path to a display path. If patterns or -I/-X were used
274 273 to create this matcher, the display path will be relative to cwd.
275 274 Otherwise it is relative to the root of the repo.'''
276 275 return (self._relativeuipath and self.rel(f)) or self.abs(f)
277 276
278 277 @propertycache
279 278 def _files(self):
280 279 return []
281 280
282 281 def files(self):
283 282 '''Explicitly listed files or patterns or roots:
284 283 if no patterns or .always(): empty list,
285 284 if exact: list exact files,
286 285 if not .anypats(): list all files and dirs,
287 286 else: optimal roots'''
288 287 return self._files
289 288
290 289 @propertycache
291 290 def _fileset(self):
292 291 return set(self._files)
293 292
294 293 def exact(self, f):
295 294 '''Returns True if f is in .files().'''
296 295 return f in self._fileset
297 296
298 297 def matchfn(self, f):
299 298 return False
300 299
301 300 def visitdir(self, dir):
302 301 '''Decides whether a directory should be visited based on whether it
303 302 has potential matches in it or one of its subdirectories. This is
304 303 based on the match's primary, included, and excluded patterns.
305 304
306 305 Returns the string 'all' if the given directory and all subdirectories
307 306 should be visited. Otherwise returns True or False indicating whether
308 307 the given directory should be visited.
309 308
310 309 This function's behavior is undefined if it has returned False for
311 310 one of the dir's parent directories.
312 311 '''
313 312 return True
314 313
315 314 def always(self):
316 315 '''Matcher will match everything and .files() will be empty --
317 316 optimization might be possible.'''
318 317 return False
319 318
320 319 def isexact(self):
321 320 '''Matcher will match exactly the list of files in .files() --
322 321 optimization might be possible.'''
323 322 return False
324 323
325 324 def prefix(self):
326 325 '''Matcher will match the paths in .files() recursively --
327 326 optimization might be possible.'''
328 327 return False
329 328
330 329 def anypats(self):
331 330 '''None of .always(), .isexact(), and .prefix() is true --
332 331 optimizations will be difficult.'''
333 332 return not self.always() and not self.isexact() and not self.prefix()
334 333
335 334 class alwaysmatcher(basematcher):
336 335 '''Matches everything.'''
337 336
338 337 def __init__(self, root, cwd, badfn=None, relativeuipath=False):
339 338 super(alwaysmatcher, self).__init__(root, cwd, badfn,
340 339 relativeuipath=relativeuipath)
341 340
342 341 def always(self):
343 342 return True
344 343
345 344 def matchfn(self, f):
346 345 return True
347 346
348 347 def visitdir(self, dir):
349 348 return 'all'
350 349
351 350 def __repr__(self):
352 351 return '<alwaysmatcher>'
353 352
354 353 class nevermatcher(basematcher):
355 354 '''Matches nothing.'''
356 355
357 356 def __init__(self, root, cwd, badfn=None):
358 357 super(nevermatcher, self).__init__(root, cwd, badfn)
359 358
360 359 # It's a little weird to say that the nevermatcher is an exact matcher
361 360 # or a prefix matcher, but it seems to make sense to let callers take
362 361 # fast paths based on either. There will be no exact matches, nor any
363 362 # prefixes (files() returns []), so fast paths iterating over them should
364 363 # be efficient (and correct).
365 364 def isexact(self):
366 365 return True
367 366
368 367 def prefix(self):
369 368 return True
370 369
371 370 def visitdir(self, dir):
372 371 return False
373 372
374 373 def __repr__(self):
375 374 return '<nevermatcher>'
376 375
377 376 class patternmatcher(basematcher):
378 377
379 378 def __init__(self, root, cwd, kindpats, ctx=None, listsubrepos=False,
380 379 badfn=None):
381 380 super(patternmatcher, self).__init__(root, cwd, badfn)
382 381
383 382 self._files = _explicitfiles(kindpats)
384 383 self._prefix = _prefix(kindpats)
385 384 self._pats, self.matchfn = _buildmatch(ctx, kindpats, '$', listsubrepos,
386 385 root)
387 386
388 387 @propertycache
389 388 def _dirs(self):
390 389 return set(util.dirs(self._fileset)) | {'.'}
391 390
392 391 def visitdir(self, dir):
393 392 if self._prefix and dir in self._fileset:
394 393 return 'all'
395 394 return ('.' in self._fileset or
396 395 dir in self._fileset or
397 396 dir in self._dirs or
398 397 any(parentdir in self._fileset
399 398 for parentdir in util.finddirs(dir)))
400 399
401 400 def prefix(self):
402 401 return self._prefix
403 402
404 403 def __repr__(self):
405 404 return ('<patternmatcher patterns=%r>' % self._pats)
406 405
407 406 class includematcher(basematcher):
408 407
409 408 def __init__(self, root, cwd, kindpats, ctx=None, listsubrepos=False,
410 409 badfn=None):
411 410 super(includematcher, self).__init__(root, cwd, badfn)
412 411
413 412 self._pats, self.matchfn = _buildmatch(ctx, kindpats, '(?:/|$)',
414 413 listsubrepos, root)
415 414 self._prefix = _prefix(kindpats)
416 415 roots, dirs = _rootsanddirs(kindpats)
417 416 # roots are directories which are recursively included.
418 417 self._roots = set(roots)
419 418 # dirs are directories which are non-recursively included.
420 419 self._dirs = set(dirs)
421 420
422 421 def visitdir(self, dir):
423 422 if self._prefix and dir in self._roots:
424 423 return 'all'
425 424 return ('.' in self._roots or
426 425 dir in self._roots or
427 426 dir in self._dirs or
428 427 any(parentdir in self._roots
429 428 for parentdir in util.finddirs(dir)))
430 429
431 430 def __repr__(self):
432 431 return ('<includematcher includes=%r>' % self._pats)
433 432
434 433 class exactmatcher(basematcher):
435 434 '''Matches the input files exactly. They are interpreted as paths, not
436 435 patterns (so no kind-prefixes).
437 436 '''
438 437
439 438 def __init__(self, root, cwd, files, badfn=None):
440 439 super(exactmatcher, self).__init__(root, cwd, badfn)
441 440
442 441 if isinstance(files, list):
443 442 self._files = files
444 443 else:
445 444 self._files = list(files)
446 445
447 446 matchfn = basematcher.exact
448 447
449 448 @propertycache
450 449 def _dirs(self):
451 450 return set(util.dirs(self._fileset)) | {'.'}
452 451
453 452 def visitdir(self, dir):
454 453 return dir in self._dirs
455 454
456 455 def isexact(self):
457 456 return True
458 457
459 458 def __repr__(self):
460 459 return ('<exactmatcher files=%r>' % self._files)
461 460
462 461 class differencematcher(basematcher):
463 462 '''Composes two matchers by matching if the first matches and the second
464 463 does not. Well, almost... If the user provides a pattern like "-X foo foo",
465 464 Mercurial actually does match "foo" against that. That's because exact
466 465 matches are treated specially. So, since this differencematcher is used for
467 466 excludes, it needs to special-case exact matching.
468 467
469 468 The second matcher's non-matching-attributes (root, cwd, bad, explicitdir,
470 469 traversedir) are ignored.
471 470
472 471 TODO: If we want to keep the behavior described above for exact matches, we
473 472 should consider instead treating the above case something like this:
474 473 union(exact(foo), difference(pattern(foo), include(foo)))
475 474 '''
476 475 def __init__(self, m1, m2):
477 476 super(differencematcher, self).__init__(m1._root, m1._cwd)
478 477 self._m1 = m1
479 478 self._m2 = m2
480 479 self.bad = m1.bad
481 480 self.explicitdir = m1.explicitdir
482 481 self.traversedir = m1.traversedir
483 482
484 483 def matchfn(self, f):
485 484 return self._m1(f) and (not self._m2(f) or self._m1.exact(f))
486 485
487 486 @propertycache
488 487 def _files(self):
489 488 if self.isexact():
490 489 return [f for f in self._m1.files() if self(f)]
491 490 # If m1 is not an exact matcher, we can't easily figure out the set of
492 491 # files, because its files() are not always files. For example, if
493 492 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
494 493 # want to remove "dir" from the set even though it would match m2,
495 494 # because the "dir" in m1 may not be a file.
496 495 return self._m1.files()
497 496
498 497 def visitdir(self, dir):
499 498 if self._m2.visitdir(dir) == 'all':
500 499 # There's a bug here: If m1 matches file 'dir/file' and m2 excludes
501 500 # 'dir' (recursively), we should still visit 'dir' due to the
502 501 # exception we have for exact matches.
503 502 return False
504 503 return bool(self._m1.visitdir(dir))
505 504
506 505 def isexact(self):
507 506 return self._m1.isexact()
508 507
509 508 def __repr__(self):
510 509 return ('<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2))
511 510
512 511 def intersectmatchers(m1, m2):
513 512 '''Composes two matchers by matching if both of them match.
514 513
515 514 The second matcher's non-matching-attributes (root, cwd, bad, explicitdir,
516 515 traversedir) are ignored.
517 516 '''
518 517 if m1 is None or m2 is None:
519 518 return m1 or m2
520 519 if m1.always():
521 520 m = copy.copy(m2)
522 521 # TODO: Consider encapsulating these things in a class so there's only
523 522 # one thing to copy from m1.
524 523 m.bad = m1.bad
525 524 m.explicitdir = m1.explicitdir
526 525 m.traversedir = m1.traversedir
527 526 m.abs = m1.abs
528 527 m.rel = m1.rel
529 528 m._relativeuipath |= m1._relativeuipath
530 529 return m
531 530 if m2.always():
532 531 m = copy.copy(m1)
533 532 m._relativeuipath |= m2._relativeuipath
534 533 return m
535 534 return intersectionmatcher(m1, m2)
536 535
537 536 class intersectionmatcher(basematcher):
538 537 def __init__(self, m1, m2):
539 538 super(intersectionmatcher, self).__init__(m1._root, m1._cwd)
540 539 self._m1 = m1
541 540 self._m2 = m2
542 541 self.bad = m1.bad
543 542 self.explicitdir = m1.explicitdir
544 543 self.traversedir = m1.traversedir
545 544
546 545 @propertycache
547 546 def _files(self):
548 547 if self.isexact():
549 548 m1, m2 = self._m1, self._m2
550 549 if not m1.isexact():
551 550 m1, m2 = m2, m1
552 551 return [f for f in m1.files() if m2(f)]
553 552 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
554 553 # the set of files, because their files() are not always files. For
555 554 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
556 555 # "path:dir2", we don't want to remove "dir2" from the set.
557 556 return self._m1.files() + self._m2.files()
558 557
559 558 def matchfn(self, f):
560 559 return self._m1(f) and self._m2(f)
561 560
562 561 def visitdir(self, dir):
563 562 visit1 = self._m1.visitdir(dir)
564 563 if visit1 == 'all':
565 564 return self._m2.visitdir(dir)
566 565 # bool() because visit1=True + visit2='all' should not be 'all'
567 566 return bool(visit1 and self._m2.visitdir(dir))
568 567
569 568 def always(self):
570 569 return self._m1.always() and self._m2.always()
571 570
572 571 def isexact(self):
573 572 return self._m1.isexact() or self._m2.isexact()
574 573
575 574 def __repr__(self):
576 575 return ('<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2))
577 576
578 577 class subdirmatcher(basematcher):
579 578 """Adapt a matcher to work on a subdirectory only.
580 579
581 580 The paths are remapped to remove/insert the path as needed:
582 581
583 582 >>> from . import pycompat
584 583 >>> m1 = match(b'root', b'', [b'a.txt', b'sub/b.txt'])
585 584 >>> m2 = subdirmatcher(b'sub', m1)
586 585 >>> bool(m2(b'a.txt'))
587 586 False
588 587 >>> bool(m2(b'b.txt'))
589 588 True
590 589 >>> bool(m2.matchfn(b'a.txt'))
591 590 False
592 591 >>> bool(m2.matchfn(b'b.txt'))
593 592 True
594 593 >>> m2.files()
595 594 ['b.txt']
596 595 >>> m2.exact(b'b.txt')
597 596 True
598 597 >>> util.pconvert(m2.rel(b'b.txt'))
599 598 'sub/b.txt'
600 599 >>> def bad(f, msg):
601 600 ... print(pycompat.sysstr(b"%s: %s" % (f, msg)))
602 601 >>> m1.bad = bad
603 602 >>> m2.bad(b'x.txt', b'No such file')
604 603 sub/x.txt: No such file
605 604 >>> m2.abs(b'c.txt')
606 605 'sub/c.txt'
607 606 """
608 607
609 608 def __init__(self, path, matcher):
610 609 super(subdirmatcher, self).__init__(matcher._root, matcher._cwd)
611 610 self._path = path
612 611 self._matcher = matcher
613 612 self._always = matcher.always()
614 613
615 614 self._files = [f[len(path) + 1:] for f in matcher._files
616 615 if f.startswith(path + "/")]
617 616
618 617 # If the parent repo had a path to this subrepo and the matcher is
619 618 # a prefix matcher, this submatcher always matches.
620 619 if matcher.prefix():
621 620 self._always = any(f == path for f in matcher._files)
622 621
623 622 def bad(self, f, msg):
624 623 self._matcher.bad(self._path + "/" + f, msg)
625 624
626 625 def abs(self, f):
627 626 return self._matcher.abs(self._path + "/" + f)
628 627
629 628 def rel(self, f):
630 629 return self._matcher.rel(self._path + "/" + f)
631 630
632 631 def uipath(self, f):
633 632 return self._matcher.uipath(self._path + "/" + f)
634 633
635 634 def matchfn(self, f):
636 635 # Some information is lost in the superclass's constructor, so we
637 636 # can not accurately create the matching function for the subdirectory
638 637 # from the inputs. Instead, we override matchfn() and visitdir() to
639 638 # call the original matcher with the subdirectory path prepended.
640 639 return self._matcher.matchfn(self._path + "/" + f)
641 640
642 641 def visitdir(self, dir):
643 642 if dir == '.':
644 643 dir = self._path
645 644 else:
646 645 dir = self._path + "/" + dir
647 646 return self._matcher.visitdir(dir)
648 647
649 648 def always(self):
650 649 return self._always
651 650
652 651 def prefix(self):
653 652 return self._matcher.prefix() and not self._always
654 653
655 654 def __repr__(self):
656 655 return ('<subdirmatcher path=%r, matcher=%r>' %
657 656 (self._path, self._matcher))
658 657
659 658 class unionmatcher(basematcher):
660 659 """A matcher that is the union of several matchers.
661 660
662 661 The non-matching-attributes (root, cwd, bad, explicitdir, traversedir) are
663 662 taken from the first matcher.
664 663 """
665 664
666 665 def __init__(self, matchers):
667 666 m1 = matchers[0]
668 667 super(unionmatcher, self).__init__(m1._root, m1._cwd)
669 668 self.explicitdir = m1.explicitdir
670 669 self.traversedir = m1.traversedir
671 670 self._matchers = matchers
672 671
673 672 def matchfn(self, f):
674 673 for match in self._matchers:
675 674 if match(f):
676 675 return True
677 676 return False
678 677
679 678 def visitdir(self, dir):
680 679 r = False
681 680 for m in self._matchers:
682 681 v = m.visitdir(dir)
683 682 if v == 'all':
684 683 return v
685 684 r |= v
686 685 return r
687 686
688 687 def __repr__(self):
689 688 return ('<unionmatcher matchers=%r>' % self._matchers)
690 689
691 690 def patkind(pattern, default=None):
692 691 '''If pattern is 'kind:pat' with a known kind, return kind.'''
693 692 return _patsplit(pattern, default)[0]
694 693
695 694 def _patsplit(pattern, default):
696 695 """Split a string into the optional pattern kind prefix and the actual
697 696 pattern."""
698 697 if ':' in pattern:
699 698 kind, pat = pattern.split(':', 1)
700 699 if kind in allpatternkinds:
701 700 return kind, pat
702 701 return default, pattern
703 702
704 703 def _globre(pat):
705 704 r'''Convert an extended glob string to a regexp string.
706 705
707 706 >>> from . import pycompat
708 707 >>> def bprint(s):
709 708 ... print(pycompat.sysstr(s))
710 709 >>> bprint(_globre(br'?'))
711 710 .
712 711 >>> bprint(_globre(br'*'))
713 712 [^/]*
714 713 >>> bprint(_globre(br'**'))
715 714 .*
716 715 >>> bprint(_globre(br'**/a'))
717 716 (?:.*/)?a
718 717 >>> bprint(_globre(br'a/**/b'))
719 718 a\/(?:.*/)?b
720 719 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
721 720 [a*?!^][\^b][^c]
722 721 >>> bprint(_globre(br'{a,b}'))
723 722 (?:a|b)
724 723 >>> bprint(_globre(br'.\*\?'))
725 724 \.\*\?
726 725 '''
727 726 i, n = 0, len(pat)
728 727 res = ''
729 728 group = 0
730 729 escape = util.re.escape
731 730 def peek():
732 731 return i < n and pat[i:i + 1]
733 732 while i < n:
734 733 c = pat[i:i + 1]
735 734 i += 1
736 735 if c not in '*?[{},\\':
737 736 res += escape(c)
738 737 elif c == '*':
739 738 if peek() == '*':
740 739 i += 1
741 740 if peek() == '/':
742 741 i += 1
743 742 res += '(?:.*/)?'
744 743 else:
745 744 res += '.*'
746 745 else:
747 746 res += '[^/]*'
748 747 elif c == '?':
749 748 res += '.'
750 749 elif c == '[':
751 750 j = i
752 751 if j < n and pat[j:j + 1] in '!]':
753 752 j += 1
754 753 while j < n and pat[j:j + 1] != ']':
755 754 j += 1
756 755 if j >= n:
757 756 res += '\\['
758 757 else:
759 758 stuff = pat[i:j].replace('\\','\\\\')
760 759 i = j + 1
761 760 if stuff[0:1] == '!':
762 761 stuff = '^' + stuff[1:]
763 762 elif stuff[0:1] == '^':
764 763 stuff = '\\' + stuff
765 764 res = '%s[%s]' % (res, stuff)
766 765 elif c == '{':
767 766 group += 1
768 767 res += '(?:'
769 768 elif c == '}' and group:
770 769 res += ')'
771 770 group -= 1
772 771 elif c == ',' and group:
773 772 res += '|'
774 773 elif c == '\\':
775 774 p = peek()
776 775 if p:
777 776 i += 1
778 777 res += escape(p)
779 778 else:
780 779 res += escape(c)
781 780 else:
782 781 res += escape(c)
783 782 return res
784 783
785 784 def _regex(kind, pat, globsuffix):
786 785 '''Convert a (normalized) pattern of any kind into a regular expression.
787 786 globsuffix is appended to the regexp of globs.'''
788 787 if not pat:
789 788 return ''
790 789 if kind == 're':
791 790 return pat
792 791 if kind in ('path', 'relpath'):
793 792 if pat == '.':
794 793 return ''
795 794 return util.re.escape(pat) + '(?:/|$)'
796 795 if kind == 'rootfilesin':
797 796 if pat == '.':
798 797 escaped = ''
799 798 else:
800 799 # Pattern is a directory name.
801 800 escaped = util.re.escape(pat) + '/'
802 801 # Anything after the pattern must be a non-directory.
803 802 return escaped + '[^/]+$'
804 803 if kind == 'relglob':
805 804 return '(?:|.*/)' + _globre(pat) + globsuffix
806 805 if kind == 'relre':
807 806 if pat.startswith('^'):
808 807 return pat
809 808 return '.*' + pat
810 809 return _globre(pat) + globsuffix
811 810
812 811 def _buildmatch(ctx, kindpats, globsuffix, listsubrepos, root):
813 812 '''Return regexp string and a matcher function for kindpats.
814 813 globsuffix is appended to the regexp of globs.'''
815 814 matchfuncs = []
816 815
817 816 subincludes, kindpats = _expandsubinclude(kindpats, root)
818 817 if subincludes:
819 818 submatchers = {}
820 819 def matchsubinclude(f):
821 820 for prefix, matcherargs in subincludes:
822 821 if f.startswith(prefix):
823 822 mf = submatchers.get(prefix)
824 823 if mf is None:
825 824 mf = match(*matcherargs)
826 825 submatchers[prefix] = mf
827 826
828 827 if mf(f[len(prefix):]):
829 828 return True
830 829 return False
831 830 matchfuncs.append(matchsubinclude)
832 831
833 832 fset, kindpats = _expandsets(kindpats, ctx, listsubrepos)
834 833 if fset:
835 834 matchfuncs.append(fset.__contains__)
836 835
837 836 regex = ''
838 837 if kindpats:
839 838 regex, mf = _buildregexmatch(kindpats, globsuffix)
840 839 matchfuncs.append(mf)
841 840
842 841 if len(matchfuncs) == 1:
843 842 return regex, matchfuncs[0]
844 843 else:
845 844 return regex, lambda f: any(mf(f) for mf in matchfuncs)
846 845
847 846 def _buildregexmatch(kindpats, globsuffix):
848 847 """Build a match function from a list of kinds and kindpats,
849 848 return regexp string and a matcher function."""
850 849 try:
851 850 regex = '(?:%s)' % '|'.join([_regex(k, p, globsuffix)
852 851 for (k, p, s) in kindpats])
853 852 if len(regex) > 20000:
854 853 raise OverflowError
855 854 return regex, _rematcher(regex)
856 855 except OverflowError:
857 856 # We're using a Python with a tiny regex engine and we
858 857 # made it explode, so we'll divide the pattern list in two
859 858 # until it works
860 859 l = len(kindpats)
861 860 if l < 2:
862 861 raise
863 862 regexa, a = _buildregexmatch(kindpats[:l//2], globsuffix)
864 863 regexb, b = _buildregexmatch(kindpats[l//2:], globsuffix)
865 864 return regex, lambda s: a(s) or b(s)
866 865 except re.error:
867 866 for k, p, s in kindpats:
868 867 try:
869 868 _rematcher('(?:%s)' % _regex(k, p, globsuffix))
870 869 except re.error:
871 870 if s:
872 871 raise error.Abort(_("%s: invalid pattern (%s): %s") %
873 872 (s, k, p))
874 873 else:
875 874 raise error.Abort(_("invalid pattern (%s): %s") % (k, p))
876 875 raise error.Abort(_("invalid pattern"))
877 876
878 877 def _patternrootsanddirs(kindpats):
879 878 '''Returns roots and directories corresponding to each pattern.
880 879
881 880 This calculates the roots and directories exactly matching the patterns and
882 881 returns a tuple of (roots, dirs) for each. It does not return other
883 882 directories which may also need to be considered, like the parent
884 883 directories.
885 884 '''
886 885 r = []
887 886 d = []
888 887 for kind, pat, source in kindpats:
889 888 if kind == 'glob': # find the non-glob prefix
890 889 root = []
891 890 for p in pat.split('/'):
892 891 if '[' in p or '{' in p or '*' in p or '?' in p:
893 892 break
894 893 root.append(p)
895 894 r.append('/'.join(root) or '.')
896 895 elif kind in ('relpath', 'path'):
897 896 r.append(pat or '.')
898 897 elif kind in ('rootfilesin',):
899 898 d.append(pat or '.')
900 899 else: # relglob, re, relre
901 900 r.append('.')
902 901 return r, d
903 902
904 903 def _roots(kindpats):
905 904 '''Returns root directories to match recursively from the given patterns.'''
906 905 roots, dirs = _patternrootsanddirs(kindpats)
907 906 return roots
908 907
909 908 def _rootsanddirs(kindpats):
910 909 '''Returns roots and exact directories from patterns.
911 910
912 911 roots are directories to match recursively, whereas exact directories should
913 912 be matched non-recursively. The returned (roots, dirs) tuple will also
914 913 include directories that need to be implicitly considered as either, such as
915 914 parent directories.
916 915
917 916 >>> _rootsanddirs(
918 917 ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
919 918 ... (b'glob', b'g*', b'')])
920 919 (['g/h', 'g/h', '.'], ['g', '.'])
921 920 >>> _rootsanddirs(
922 921 ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
923 922 ([], ['g/h', '.', 'g', '.'])
924 923 >>> _rootsanddirs(
925 924 ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
926 925 ... (b'path', b'', b'')])
927 926 (['r', 'p/p', '.'], ['p', '.'])
928 927 >>> _rootsanddirs(
929 928 ... [(b'relglob', b'rg*', b''), (b're', b're/', b''),
930 929 ... (b'relre', b'rr', b'')])
931 930 (['.', '.', '.'], ['.'])
932 931 '''
933 932 r, d = _patternrootsanddirs(kindpats)
934 933
935 934 # Append the parents as non-recursive/exact directories, since they must be
936 935 # scanned to get to either the roots or the other exact directories.
937 936 d.extend(util.dirs(d))
938 937 d.extend(util.dirs(r))
939 938 # util.dirs() does not include the root directory, so add it manually
940 939 d.append('.')
941 940
942 941 return r, d
943 942
944 943 def _explicitfiles(kindpats):
945 944 '''Returns the potential explicit filenames from the patterns.
946 945
947 946 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
948 947 ['foo/bar']
949 948 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
950 949 []
951 950 '''
952 951 # Keep only the pattern kinds where one can specify filenames (vs only
953 952 # directory names).
954 953 filable = [kp for kp in kindpats if kp[0] not in ('rootfilesin',)]
955 954 return _roots(filable)
956 955
957 956 def _prefix(kindpats):
958 957 '''Whether all the patterns match a prefix (i.e. recursively)'''
959 958 for kind, pat, source in kindpats:
960 959 if kind not in ('path', 'relpath'):
961 960 return False
962 961 return True
963 962
964 963 _commentre = None
965 964
966 965 def readpatternfile(filepath, warn, sourceinfo=False):
967 966 '''parse a pattern file, returning a list of
968 967 patterns. These patterns should be given to compile()
969 968 to be validated and converted into a match function.
970 969
971 970 trailing white space is dropped.
972 971 the escape character is backslash.
973 972 comments start with #.
974 973 empty lines are skipped.
975 974
976 975 lines can be of the following formats:
977 976
978 977 syntax: regexp # defaults following lines to non-rooted regexps
979 978 syntax: glob # defaults following lines to non-rooted globs
980 979 re:pattern # non-rooted regular expression
981 980 glob:pattern # non-rooted glob
982 981 pattern # pattern of the current default type
983 982
984 983 if sourceinfo is set, returns a list of tuples:
985 984 (pattern, lineno, originalline). This is useful to debug ignore patterns.
986 985 '''
987 986
988 987 syntaxes = {'re': 'relre:', 'regexp': 'relre:', 'glob': 'relglob:',
989 988 'include': 'include', 'subinclude': 'subinclude'}
990 989 syntax = 'relre:'
991 990 patterns = []
992 991
993 992 fp = open(filepath, 'rb')
994 993 for lineno, line in enumerate(util.iterfile(fp), start=1):
995 994 if "#" in line:
996 995 global _commentre
997 996 if not _commentre:
998 997 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
999 998 # remove comments prefixed by an even number of escapes
1000 999 m = _commentre.search(line)
1001 1000 if m:
1002 1001 line = line[:m.end(1)]
1003 1002 # fixup properly escaped comments that survived the above
1004 1003 line = line.replace("\\#", "#")
1005 1004 line = line.rstrip()
1006 1005 if not line:
1007 1006 continue
1008 1007
1009 1008 if line.startswith('syntax:'):
1010 1009 s = line[7:].strip()
1011 1010 try:
1012 1011 syntax = syntaxes[s]
1013 1012 except KeyError:
1014 1013 if warn:
1015 1014 warn(_("%s: ignoring invalid syntax '%s'\n") %
1016 1015 (filepath, s))
1017 1016 continue
1018 1017
1019 1018 linesyntax = syntax
1020 1019 for s, rels in syntaxes.iteritems():
1021 1020 if line.startswith(rels):
1022 1021 linesyntax = rels
1023 1022 line = line[len(rels):]
1024 1023 break
1025 1024 elif line.startswith(s+':'):
1026 1025 linesyntax = rels
1027 1026 line = line[len(s) + 1:]
1028 1027 break
1029 1028 if sourceinfo:
1030 1029 patterns.append((linesyntax + line, lineno, line))
1031 1030 else:
1032 1031 patterns.append(linesyntax + line)
1033 1032 fp.close()
1034 1033 return patterns
General Comments 0
You need to be logged in to leave comments. Login now