##// END OF EJS Templates
match: remove doc about undefined behavior of visitdir()...
Yuya Nishihara -
r35165:6864c405 default
parent child Browse files
Show More
@@ -1,1033 +1,1030 b''
1 1 # match.py - filename matching
2 2 #
3 3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import, print_function
9 9
10 10 import copy
11 11 import os
12 12 import re
13 13
14 14 from .i18n import _
15 15 from . import (
16 16 error,
17 17 pathutil,
18 18 util,
19 19 )
20 20
21 21 allpatternkinds = ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
22 22 'listfile', 'listfile0', 'set', 'include', 'subinclude',
23 23 'rootfilesin')
24 24 cwdrelativepatternkinds = ('relpath', 'glob')
25 25
26 26 propertycache = util.propertycache
27 27
28 28 def _rematcher(regex):
29 29 '''compile the regexp with the best available regexp engine and return a
30 30 matcher function'''
31 31 m = util.re.compile(regex)
32 32 try:
33 33 # slightly faster, provided by facebook's re2 bindings
34 34 return m.test_match
35 35 except AttributeError:
36 36 return m.match
37 37
38 38 def _expandsets(kindpats, ctx, listsubrepos):
39 39 '''Returns the kindpats list with the 'set' patterns expanded.'''
40 40 fset = set()
41 41 other = []
42 42
43 43 for kind, pat, source in kindpats:
44 44 if kind == 'set':
45 45 if not ctx:
46 46 raise error.ProgrammingError("fileset expression with no "
47 47 "context")
48 48 s = ctx.getfileset(pat)
49 49 fset.update(s)
50 50
51 51 if listsubrepos:
52 52 for subpath in ctx.substate:
53 53 s = ctx.sub(subpath).getfileset(pat)
54 54 fset.update(subpath + '/' + f for f in s)
55 55
56 56 continue
57 57 other.append((kind, pat, source))
58 58 return fset, other
59 59
60 60 def _expandsubinclude(kindpats, root):
61 61 '''Returns the list of subinclude matcher args and the kindpats without the
62 62 subincludes in it.'''
63 63 relmatchers = []
64 64 other = []
65 65
66 66 for kind, pat, source in kindpats:
67 67 if kind == 'subinclude':
68 68 sourceroot = pathutil.dirname(util.normpath(source))
69 69 pat = util.pconvert(pat)
70 70 path = pathutil.join(sourceroot, pat)
71 71
72 72 newroot = pathutil.dirname(path)
73 73 matcherargs = (newroot, '', [], ['include:%s' % path])
74 74
75 75 prefix = pathutil.canonpath(root, root, newroot)
76 76 if prefix:
77 77 prefix += '/'
78 78 relmatchers.append((prefix, matcherargs))
79 79 else:
80 80 other.append((kind, pat, source))
81 81
82 82 return relmatchers, other
83 83
84 84 def _kindpatsalwaysmatch(kindpats):
85 85 """"Checks whether the kindspats match everything, as e.g.
86 86 'relpath:.' does.
87 87 """
88 88 for kind, pat, source in kindpats:
89 89 if pat != '' or kind not in ['relpath', 'glob']:
90 90 return False
91 91 return True
92 92
93 93 def match(root, cwd, patterns=None, include=None, exclude=None, default='glob',
94 94 exact=False, auditor=None, ctx=None, listsubrepos=False, warn=None,
95 95 badfn=None, icasefs=False):
96 96 """build an object to match a set of file patterns
97 97
98 98 arguments:
99 99 root - the canonical root of the tree you're matching against
100 100 cwd - the current working directory, if relevant
101 101 patterns - patterns to find
102 102 include - patterns to include (unless they are excluded)
103 103 exclude - patterns to exclude (even if they are included)
104 104 default - if a pattern in patterns has no explicit type, assume this one
105 105 exact - patterns are actually filenames (include/exclude still apply)
106 106 warn - optional function used for printing warnings
107 107 badfn - optional bad() callback for this matcher instead of the default
108 108 icasefs - make a matcher for wdir on case insensitive filesystems, which
109 109 normalizes the given patterns to the case in the filesystem
110 110
111 111 a pattern is one of:
112 112 'glob:<glob>' - a glob relative to cwd
113 113 're:<regexp>' - a regular expression
114 114 'path:<path>' - a path relative to repository root, which is matched
115 115 recursively
116 116 'rootfilesin:<path>' - a path relative to repository root, which is
117 117 matched non-recursively (will not match subdirectories)
118 118 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
119 119 'relpath:<path>' - a path relative to cwd
120 120 'relre:<regexp>' - a regexp that needn't match the start of a name
121 121 'set:<fileset>' - a fileset expression
122 122 'include:<path>' - a file of patterns to read and include
123 123 'subinclude:<path>' - a file of patterns to match against files under
124 124 the same directory
125 125 '<something>' - a pattern of the specified default type
126 126 """
127 127 normalize = _donormalize
128 128 if icasefs:
129 129 if exact:
130 130 raise error.ProgrammingError("a case-insensitive exact matcher "
131 131 "doesn't make sense")
132 132 dirstate = ctx.repo().dirstate
133 133 dsnormalize = dirstate.normalize
134 134
135 135 def normalize(patterns, default, root, cwd, auditor, warn):
136 136 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
137 137 kindpats = []
138 138 for kind, pats, source in kp:
139 139 if kind not in ('re', 'relre'): # regex can't be normalized
140 140 p = pats
141 141 pats = dsnormalize(pats)
142 142
143 143 # Preserve the original to handle a case only rename.
144 144 if p != pats and p in dirstate:
145 145 kindpats.append((kind, p, source))
146 146
147 147 kindpats.append((kind, pats, source))
148 148 return kindpats
149 149
150 150 if exact:
151 151 m = exactmatcher(root, cwd, patterns, badfn)
152 152 elif patterns:
153 153 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
154 154 if _kindpatsalwaysmatch(kindpats):
155 155 m = alwaysmatcher(root, cwd, badfn, relativeuipath=True)
156 156 else:
157 157 m = patternmatcher(root, cwd, kindpats, ctx=ctx,
158 158 listsubrepos=listsubrepos, badfn=badfn)
159 159 else:
160 160 # It's a little strange that no patterns means to match everything.
161 161 # Consider changing this to match nothing (probably using nevermatcher).
162 162 m = alwaysmatcher(root, cwd, badfn)
163 163
164 164 if include:
165 165 kindpats = normalize(include, 'glob', root, cwd, auditor, warn)
166 166 im = includematcher(root, cwd, kindpats, ctx=ctx,
167 167 listsubrepos=listsubrepos, badfn=None)
168 168 m = intersectmatchers(m, im)
169 169 if exclude:
170 170 kindpats = normalize(exclude, 'glob', root, cwd, auditor, warn)
171 171 em = includematcher(root, cwd, kindpats, ctx=ctx,
172 172 listsubrepos=listsubrepos, badfn=None)
173 173 m = differencematcher(m, em)
174 174 return m
175 175
176 176 def exact(root, cwd, files, badfn=None):
177 177 return exactmatcher(root, cwd, files, badfn=badfn)
178 178
179 179 def always(root, cwd):
180 180 return alwaysmatcher(root, cwd)
181 181
182 182 def never(root, cwd):
183 183 return nevermatcher(root, cwd)
184 184
185 185 def badmatch(match, badfn):
186 186 """Make a copy of the given matcher, replacing its bad method with the given
187 187 one.
188 188 """
189 189 m = copy.copy(match)
190 190 m.bad = badfn
191 191 return m
192 192
193 193 def _donormalize(patterns, default, root, cwd, auditor, warn):
194 194 '''Convert 'kind:pat' from the patterns list to tuples with kind and
195 195 normalized and rooted patterns and with listfiles expanded.'''
196 196 kindpats = []
197 197 for kind, pat in [_patsplit(p, default) for p in patterns]:
198 198 if kind in cwdrelativepatternkinds:
199 199 pat = pathutil.canonpath(root, cwd, pat, auditor)
200 200 elif kind in ('relglob', 'path', 'rootfilesin'):
201 201 pat = util.normpath(pat)
202 202 elif kind in ('listfile', 'listfile0'):
203 203 try:
204 204 files = util.readfile(pat)
205 205 if kind == 'listfile0':
206 206 files = files.split('\0')
207 207 else:
208 208 files = files.splitlines()
209 209 files = [f for f in files if f]
210 210 except EnvironmentError:
211 211 raise error.Abort(_("unable to read file list (%s)") % pat)
212 212 for k, p, source in _donormalize(files, default, root, cwd,
213 213 auditor, warn):
214 214 kindpats.append((k, p, pat))
215 215 continue
216 216 elif kind == 'include':
217 217 try:
218 218 fullpath = os.path.join(root, util.localpath(pat))
219 219 includepats = readpatternfile(fullpath, warn)
220 220 for k, p, source in _donormalize(includepats, default,
221 221 root, cwd, auditor, warn):
222 222 kindpats.append((k, p, source or pat))
223 223 except error.Abort as inst:
224 224 raise error.Abort('%s: %s' % (pat, inst[0]))
225 225 except IOError as inst:
226 226 if warn:
227 227 warn(_("skipping unreadable pattern file '%s': %s\n") %
228 228 (pat, inst.strerror))
229 229 continue
230 230 # else: re or relre - which cannot be normalized
231 231 kindpats.append((kind, pat, ''))
232 232 return kindpats
233 233
234 234 class basematcher(object):
235 235
236 236 def __init__(self, root, cwd, badfn=None, relativeuipath=True):
237 237 self._root = root
238 238 self._cwd = cwd
239 239 if badfn is not None:
240 240 self.bad = badfn
241 241 self._relativeuipath = relativeuipath
242 242
243 243 def __call__(self, fn):
244 244 return self.matchfn(fn)
245 245 def __iter__(self):
246 246 for f in self._files:
247 247 yield f
248 248 # Callbacks related to how the matcher is used by dirstate.walk.
249 249 # Subscribers to these events must monkeypatch the matcher object.
250 250 def bad(self, f, msg):
251 251 '''Callback from dirstate.walk for each explicit file that can't be
252 252 found/accessed, with an error message.'''
253 253
254 254 # If an explicitdir is set, it will be called when an explicitly listed
255 255 # directory is visited.
256 256 explicitdir = None
257 257
258 258 # If an traversedir is set, it will be called when a directory discovered
259 259 # by recursive traversal is visited.
260 260 traversedir = None
261 261
262 262 def abs(self, f):
263 263 '''Convert a repo path back to path that is relative to the root of the
264 264 matcher.'''
265 265 return f
266 266
267 267 def rel(self, f):
268 268 '''Convert repo path back to path that is relative to cwd of matcher.'''
269 269 return util.pathto(self._root, self._cwd, f)
270 270
271 271 def uipath(self, f):
272 272 '''Convert repo path to a display path. If patterns or -I/-X were used
273 273 to create this matcher, the display path will be relative to cwd.
274 274 Otherwise it is relative to the root of the repo.'''
275 275 return (self._relativeuipath and self.rel(f)) or self.abs(f)
276 276
277 277 @propertycache
278 278 def _files(self):
279 279 return []
280 280
281 281 def files(self):
282 282 '''Explicitly listed files or patterns or roots:
283 283 if no patterns or .always(): empty list,
284 284 if exact: list exact files,
285 285 if not .anypats(): list all files and dirs,
286 286 else: optimal roots'''
287 287 return self._files
288 288
289 289 @propertycache
290 290 def _fileset(self):
291 291 return set(self._files)
292 292
293 293 def exact(self, f):
294 294 '''Returns True if f is in .files().'''
295 295 return f in self._fileset
296 296
297 297 def matchfn(self, f):
298 298 return False
299 299
300 300 def visitdir(self, dir):
301 301 '''Decides whether a directory should be visited based on whether it
302 302 has potential matches in it or one of its subdirectories. This is
303 303 based on the match's primary, included, and excluded patterns.
304 304
305 305 Returns the string 'all' if the given directory and all subdirectories
306 306 should be visited. Otherwise returns True or False indicating whether
307 307 the given directory should be visited.
308
309 This function's behavior is undefined if it has returned False for
310 one of the dir's parent directories.
311 308 '''
312 309 return True
313 310
314 311 def always(self):
315 312 '''Matcher will match everything and .files() will be empty --
316 313 optimization might be possible.'''
317 314 return False
318 315
319 316 def isexact(self):
320 317 '''Matcher will match exactly the list of files in .files() --
321 318 optimization might be possible.'''
322 319 return False
323 320
324 321 def prefix(self):
325 322 '''Matcher will match the paths in .files() recursively --
326 323 optimization might be possible.'''
327 324 return False
328 325
329 326 def anypats(self):
330 327 '''None of .always(), .isexact(), and .prefix() is true --
331 328 optimizations will be difficult.'''
332 329 return not self.always() and not self.isexact() and not self.prefix()
333 330
334 331 class alwaysmatcher(basematcher):
335 332 '''Matches everything.'''
336 333
337 334 def __init__(self, root, cwd, badfn=None, relativeuipath=False):
338 335 super(alwaysmatcher, self).__init__(root, cwd, badfn,
339 336 relativeuipath=relativeuipath)
340 337
341 338 def always(self):
342 339 return True
343 340
344 341 def matchfn(self, f):
345 342 return True
346 343
347 344 def visitdir(self, dir):
348 345 return 'all'
349 346
350 347 def __repr__(self):
351 348 return '<alwaysmatcher>'
352 349
353 350 class nevermatcher(basematcher):
354 351 '''Matches nothing.'''
355 352
356 353 def __init__(self, root, cwd, badfn=None):
357 354 super(nevermatcher, self).__init__(root, cwd, badfn)
358 355
359 356 # It's a little weird to say that the nevermatcher is an exact matcher
360 357 # or a prefix matcher, but it seems to make sense to let callers take
361 358 # fast paths based on either. There will be no exact matches, nor any
362 359 # prefixes (files() returns []), so fast paths iterating over them should
363 360 # be efficient (and correct).
364 361 def isexact(self):
365 362 return True
366 363
367 364 def prefix(self):
368 365 return True
369 366
370 367 def visitdir(self, dir):
371 368 return False
372 369
373 370 def __repr__(self):
374 371 return '<nevermatcher>'
375 372
376 373 class patternmatcher(basematcher):
377 374
378 375 def __init__(self, root, cwd, kindpats, ctx=None, listsubrepos=False,
379 376 badfn=None):
380 377 super(patternmatcher, self).__init__(root, cwd, badfn)
381 378
382 379 self._files = _explicitfiles(kindpats)
383 380 self._prefix = _prefix(kindpats)
384 381 self._pats, self.matchfn = _buildmatch(ctx, kindpats, '$', listsubrepos,
385 382 root)
386 383
387 384 @propertycache
388 385 def _dirs(self):
389 386 return set(util.dirs(self._fileset)) | {'.'}
390 387
391 388 def visitdir(self, dir):
392 389 if self._prefix and dir in self._fileset:
393 390 return 'all'
394 391 return ('.' in self._fileset or
395 392 dir in self._fileset or
396 393 dir in self._dirs or
397 394 any(parentdir in self._fileset
398 395 for parentdir in util.finddirs(dir)))
399 396
400 397 def prefix(self):
401 398 return self._prefix
402 399
403 400 def __repr__(self):
404 401 return ('<patternmatcher patterns=%r>' % self._pats)
405 402
406 403 class includematcher(basematcher):
407 404
408 405 def __init__(self, root, cwd, kindpats, ctx=None, listsubrepos=False,
409 406 badfn=None):
410 407 super(includematcher, self).__init__(root, cwd, badfn)
411 408
412 409 self._pats, self.matchfn = _buildmatch(ctx, kindpats, '(?:/|$)',
413 410 listsubrepos, root)
414 411 self._prefix = _prefix(kindpats)
415 412 roots, dirs = _rootsanddirs(kindpats)
416 413 # roots are directories which are recursively included.
417 414 self._roots = set(roots)
418 415 # dirs are directories which are non-recursively included.
419 416 self._dirs = set(dirs)
420 417
421 418 def visitdir(self, dir):
422 419 if self._prefix and dir in self._roots:
423 420 return 'all'
424 421 return ('.' in self._roots or
425 422 dir in self._roots or
426 423 dir in self._dirs or
427 424 any(parentdir in self._roots
428 425 for parentdir in util.finddirs(dir)))
429 426
430 427 def __repr__(self):
431 428 return ('<includematcher includes=%r>' % self._pats)
432 429
433 430 class exactmatcher(basematcher):
434 431 '''Matches the input files exactly. They are interpreted as paths, not
435 432 patterns (so no kind-prefixes).
436 433 '''
437 434
438 435 def __init__(self, root, cwd, files, badfn=None):
439 436 super(exactmatcher, self).__init__(root, cwd, badfn)
440 437
441 438 if isinstance(files, list):
442 439 self._files = files
443 440 else:
444 441 self._files = list(files)
445 442
446 443 matchfn = basematcher.exact
447 444
448 445 @propertycache
449 446 def _dirs(self):
450 447 return set(util.dirs(self._fileset)) | {'.'}
451 448
452 449 def visitdir(self, dir):
453 450 return dir in self._dirs
454 451
455 452 def isexact(self):
456 453 return True
457 454
458 455 def __repr__(self):
459 456 return ('<exactmatcher files=%r>' % self._files)
460 457
461 458 class differencematcher(basematcher):
462 459 '''Composes two matchers by matching if the first matches and the second
463 460 does not. Well, almost... If the user provides a pattern like "-X foo foo",
464 461 Mercurial actually does match "foo" against that. That's because exact
465 462 matches are treated specially. So, since this differencematcher is used for
466 463 excludes, it needs to special-case exact matching.
467 464
468 465 The second matcher's non-matching-attributes (root, cwd, bad, explicitdir,
469 466 traversedir) are ignored.
470 467
471 468 TODO: If we want to keep the behavior described above for exact matches, we
472 469 should consider instead treating the above case something like this:
473 470 union(exact(foo), difference(pattern(foo), include(foo)))
474 471 '''
475 472 def __init__(self, m1, m2):
476 473 super(differencematcher, self).__init__(m1._root, m1._cwd)
477 474 self._m1 = m1
478 475 self._m2 = m2
479 476 self.bad = m1.bad
480 477 self.explicitdir = m1.explicitdir
481 478 self.traversedir = m1.traversedir
482 479
483 480 def matchfn(self, f):
484 481 return self._m1(f) and (not self._m2(f) or self._m1.exact(f))
485 482
486 483 @propertycache
487 484 def _files(self):
488 485 if self.isexact():
489 486 return [f for f in self._m1.files() if self(f)]
490 487 # If m1 is not an exact matcher, we can't easily figure out the set of
491 488 # files, because its files() are not always files. For example, if
492 489 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
493 490 # want to remove "dir" from the set even though it would match m2,
494 491 # because the "dir" in m1 may not be a file.
495 492 return self._m1.files()
496 493
497 494 def visitdir(self, dir):
498 495 if self._m2.visitdir(dir) == 'all':
499 496 # There's a bug here: If m1 matches file 'dir/file' and m2 excludes
500 497 # 'dir' (recursively), we should still visit 'dir' due to the
501 498 # exception we have for exact matches.
502 499 return False
503 500 return bool(self._m1.visitdir(dir))
504 501
505 502 def isexact(self):
506 503 return self._m1.isexact()
507 504
508 505 def __repr__(self):
509 506 return ('<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2))
510 507
511 508 def intersectmatchers(m1, m2):
512 509 '''Composes two matchers by matching if both of them match.
513 510
514 511 The second matcher's non-matching-attributes (root, cwd, bad, explicitdir,
515 512 traversedir) are ignored.
516 513 '''
517 514 if m1 is None or m2 is None:
518 515 return m1 or m2
519 516 if m1.always():
520 517 m = copy.copy(m2)
521 518 # TODO: Consider encapsulating these things in a class so there's only
522 519 # one thing to copy from m1.
523 520 m.bad = m1.bad
524 521 m.explicitdir = m1.explicitdir
525 522 m.traversedir = m1.traversedir
526 523 m.abs = m1.abs
527 524 m.rel = m1.rel
528 525 m._relativeuipath |= m1._relativeuipath
529 526 return m
530 527 if m2.always():
531 528 m = copy.copy(m1)
532 529 m._relativeuipath |= m2._relativeuipath
533 530 return m
534 531 return intersectionmatcher(m1, m2)
535 532
536 533 class intersectionmatcher(basematcher):
537 534 def __init__(self, m1, m2):
538 535 super(intersectionmatcher, self).__init__(m1._root, m1._cwd)
539 536 self._m1 = m1
540 537 self._m2 = m2
541 538 self.bad = m1.bad
542 539 self.explicitdir = m1.explicitdir
543 540 self.traversedir = m1.traversedir
544 541
545 542 @propertycache
546 543 def _files(self):
547 544 if self.isexact():
548 545 m1, m2 = self._m1, self._m2
549 546 if not m1.isexact():
550 547 m1, m2 = m2, m1
551 548 return [f for f in m1.files() if m2(f)]
552 549 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
553 550 # the set of files, because their files() are not always files. For
554 551 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
555 552 # "path:dir2", we don't want to remove "dir2" from the set.
556 553 return self._m1.files() + self._m2.files()
557 554
558 555 def matchfn(self, f):
559 556 return self._m1(f) and self._m2(f)
560 557
561 558 def visitdir(self, dir):
562 559 visit1 = self._m1.visitdir(dir)
563 560 if visit1 == 'all':
564 561 return self._m2.visitdir(dir)
565 562 # bool() because visit1=True + visit2='all' should not be 'all'
566 563 return bool(visit1 and self._m2.visitdir(dir))
567 564
568 565 def always(self):
569 566 return self._m1.always() and self._m2.always()
570 567
571 568 def isexact(self):
572 569 return self._m1.isexact() or self._m2.isexact()
573 570
574 571 def __repr__(self):
575 572 return ('<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2))
576 573
577 574 class subdirmatcher(basematcher):
578 575 """Adapt a matcher to work on a subdirectory only.
579 576
580 577 The paths are remapped to remove/insert the path as needed:
581 578
582 579 >>> from . import pycompat
583 580 >>> m1 = match(b'root', b'', [b'a.txt', b'sub/b.txt'])
584 581 >>> m2 = subdirmatcher(b'sub', m1)
585 582 >>> bool(m2(b'a.txt'))
586 583 False
587 584 >>> bool(m2(b'b.txt'))
588 585 True
589 586 >>> bool(m2.matchfn(b'a.txt'))
590 587 False
591 588 >>> bool(m2.matchfn(b'b.txt'))
592 589 True
593 590 >>> m2.files()
594 591 ['b.txt']
595 592 >>> m2.exact(b'b.txt')
596 593 True
597 594 >>> util.pconvert(m2.rel(b'b.txt'))
598 595 'sub/b.txt'
599 596 >>> def bad(f, msg):
600 597 ... print(pycompat.sysstr(b"%s: %s" % (f, msg)))
601 598 >>> m1.bad = bad
602 599 >>> m2.bad(b'x.txt', b'No such file')
603 600 sub/x.txt: No such file
604 601 >>> m2.abs(b'c.txt')
605 602 'sub/c.txt'
606 603 """
607 604
608 605 def __init__(self, path, matcher):
609 606 super(subdirmatcher, self).__init__(matcher._root, matcher._cwd)
610 607 self._path = path
611 608 self._matcher = matcher
612 609 self._always = matcher.always()
613 610
614 611 self._files = [f[len(path) + 1:] for f in matcher._files
615 612 if f.startswith(path + "/")]
616 613
617 614 # If the parent repo had a path to this subrepo and the matcher is
618 615 # a prefix matcher, this submatcher always matches.
619 616 if matcher.prefix():
620 617 self._always = any(f == path for f in matcher._files)
621 618
622 619 def bad(self, f, msg):
623 620 self._matcher.bad(self._path + "/" + f, msg)
624 621
625 622 def abs(self, f):
626 623 return self._matcher.abs(self._path + "/" + f)
627 624
628 625 def rel(self, f):
629 626 return self._matcher.rel(self._path + "/" + f)
630 627
631 628 def uipath(self, f):
632 629 return self._matcher.uipath(self._path + "/" + f)
633 630
634 631 def matchfn(self, f):
635 632 # Some information is lost in the superclass's constructor, so we
636 633 # can not accurately create the matching function for the subdirectory
637 634 # from the inputs. Instead, we override matchfn() and visitdir() to
638 635 # call the original matcher with the subdirectory path prepended.
639 636 return self._matcher.matchfn(self._path + "/" + f)
640 637
641 638 def visitdir(self, dir):
642 639 if dir == '.':
643 640 dir = self._path
644 641 else:
645 642 dir = self._path + "/" + dir
646 643 return self._matcher.visitdir(dir)
647 644
648 645 def always(self):
649 646 return self._always
650 647
651 648 def prefix(self):
652 649 return self._matcher.prefix() and not self._always
653 650
654 651 def __repr__(self):
655 652 return ('<subdirmatcher path=%r, matcher=%r>' %
656 653 (self._path, self._matcher))
657 654
658 655 class unionmatcher(basematcher):
659 656 """A matcher that is the union of several matchers.
660 657
661 658 The non-matching-attributes (root, cwd, bad, explicitdir, traversedir) are
662 659 taken from the first matcher.
663 660 """
664 661
665 662 def __init__(self, matchers):
666 663 m1 = matchers[0]
667 664 super(unionmatcher, self).__init__(m1._root, m1._cwd)
668 665 self.explicitdir = m1.explicitdir
669 666 self.traversedir = m1.traversedir
670 667 self._matchers = matchers
671 668
672 669 def matchfn(self, f):
673 670 for match in self._matchers:
674 671 if match(f):
675 672 return True
676 673 return False
677 674
678 675 def visitdir(self, dir):
679 676 r = False
680 677 for m in self._matchers:
681 678 v = m.visitdir(dir)
682 679 if v == 'all':
683 680 return v
684 681 r |= v
685 682 return r
686 683
687 684 def __repr__(self):
688 685 return ('<unionmatcher matchers=%r>' % self._matchers)
689 686
690 687 def patkind(pattern, default=None):
691 688 '''If pattern is 'kind:pat' with a known kind, return kind.'''
692 689 return _patsplit(pattern, default)[0]
693 690
694 691 def _patsplit(pattern, default):
695 692 """Split a string into the optional pattern kind prefix and the actual
696 693 pattern."""
697 694 if ':' in pattern:
698 695 kind, pat = pattern.split(':', 1)
699 696 if kind in allpatternkinds:
700 697 return kind, pat
701 698 return default, pattern
702 699
703 700 def _globre(pat):
704 701 r'''Convert an extended glob string to a regexp string.
705 702
706 703 >>> from . import pycompat
707 704 >>> def bprint(s):
708 705 ... print(pycompat.sysstr(s))
709 706 >>> bprint(_globre(br'?'))
710 707 .
711 708 >>> bprint(_globre(br'*'))
712 709 [^/]*
713 710 >>> bprint(_globre(br'**'))
714 711 .*
715 712 >>> bprint(_globre(br'**/a'))
716 713 (?:.*/)?a
717 714 >>> bprint(_globre(br'a/**/b'))
718 715 a\/(?:.*/)?b
719 716 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
720 717 [a*?!^][\^b][^c]
721 718 >>> bprint(_globre(br'{a,b}'))
722 719 (?:a|b)
723 720 >>> bprint(_globre(br'.\*\?'))
724 721 \.\*\?
725 722 '''
726 723 i, n = 0, len(pat)
727 724 res = ''
728 725 group = 0
729 726 escape = util.re.escape
730 727 def peek():
731 728 return i < n and pat[i:i + 1]
732 729 while i < n:
733 730 c = pat[i:i + 1]
734 731 i += 1
735 732 if c not in '*?[{},\\':
736 733 res += escape(c)
737 734 elif c == '*':
738 735 if peek() == '*':
739 736 i += 1
740 737 if peek() == '/':
741 738 i += 1
742 739 res += '(?:.*/)?'
743 740 else:
744 741 res += '.*'
745 742 else:
746 743 res += '[^/]*'
747 744 elif c == '?':
748 745 res += '.'
749 746 elif c == '[':
750 747 j = i
751 748 if j < n and pat[j:j + 1] in '!]':
752 749 j += 1
753 750 while j < n and pat[j:j + 1] != ']':
754 751 j += 1
755 752 if j >= n:
756 753 res += '\\['
757 754 else:
758 755 stuff = pat[i:j].replace('\\','\\\\')
759 756 i = j + 1
760 757 if stuff[0:1] == '!':
761 758 stuff = '^' + stuff[1:]
762 759 elif stuff[0:1] == '^':
763 760 stuff = '\\' + stuff
764 761 res = '%s[%s]' % (res, stuff)
765 762 elif c == '{':
766 763 group += 1
767 764 res += '(?:'
768 765 elif c == '}' and group:
769 766 res += ')'
770 767 group -= 1
771 768 elif c == ',' and group:
772 769 res += '|'
773 770 elif c == '\\':
774 771 p = peek()
775 772 if p:
776 773 i += 1
777 774 res += escape(p)
778 775 else:
779 776 res += escape(c)
780 777 else:
781 778 res += escape(c)
782 779 return res
783 780
784 781 def _regex(kind, pat, globsuffix):
785 782 '''Convert a (normalized) pattern of any kind into a regular expression.
786 783 globsuffix is appended to the regexp of globs.'''
787 784 if not pat:
788 785 return ''
789 786 if kind == 're':
790 787 return pat
791 788 if kind in ('path', 'relpath'):
792 789 if pat == '.':
793 790 return ''
794 791 return util.re.escape(pat) + '(?:/|$)'
795 792 if kind == 'rootfilesin':
796 793 if pat == '.':
797 794 escaped = ''
798 795 else:
799 796 # Pattern is a directory name.
800 797 escaped = util.re.escape(pat) + '/'
801 798 # Anything after the pattern must be a non-directory.
802 799 return escaped + '[^/]+$'
803 800 if kind == 'relglob':
804 801 return '(?:|.*/)' + _globre(pat) + globsuffix
805 802 if kind == 'relre':
806 803 if pat.startswith('^'):
807 804 return pat
808 805 return '.*' + pat
809 806 return _globre(pat) + globsuffix
810 807
811 808 def _buildmatch(ctx, kindpats, globsuffix, listsubrepos, root):
812 809 '''Return regexp string and a matcher function for kindpats.
813 810 globsuffix is appended to the regexp of globs.'''
814 811 matchfuncs = []
815 812
816 813 subincludes, kindpats = _expandsubinclude(kindpats, root)
817 814 if subincludes:
818 815 submatchers = {}
819 816 def matchsubinclude(f):
820 817 for prefix, matcherargs in subincludes:
821 818 if f.startswith(prefix):
822 819 mf = submatchers.get(prefix)
823 820 if mf is None:
824 821 mf = match(*matcherargs)
825 822 submatchers[prefix] = mf
826 823
827 824 if mf(f[len(prefix):]):
828 825 return True
829 826 return False
830 827 matchfuncs.append(matchsubinclude)
831 828
832 829 fset, kindpats = _expandsets(kindpats, ctx, listsubrepos)
833 830 if fset:
834 831 matchfuncs.append(fset.__contains__)
835 832
836 833 regex = ''
837 834 if kindpats:
838 835 regex, mf = _buildregexmatch(kindpats, globsuffix)
839 836 matchfuncs.append(mf)
840 837
841 838 if len(matchfuncs) == 1:
842 839 return regex, matchfuncs[0]
843 840 else:
844 841 return regex, lambda f: any(mf(f) for mf in matchfuncs)
845 842
846 843 def _buildregexmatch(kindpats, globsuffix):
847 844 """Build a match function from a list of kinds and kindpats,
848 845 return regexp string and a matcher function."""
849 846 try:
850 847 regex = '(?:%s)' % '|'.join([_regex(k, p, globsuffix)
851 848 for (k, p, s) in kindpats])
852 849 if len(regex) > 20000:
853 850 raise OverflowError
854 851 return regex, _rematcher(regex)
855 852 except OverflowError:
856 853 # We're using a Python with a tiny regex engine and we
857 854 # made it explode, so we'll divide the pattern list in two
858 855 # until it works
859 856 l = len(kindpats)
860 857 if l < 2:
861 858 raise
862 859 regexa, a = _buildregexmatch(kindpats[:l//2], globsuffix)
863 860 regexb, b = _buildregexmatch(kindpats[l//2:], globsuffix)
864 861 return regex, lambda s: a(s) or b(s)
865 862 except re.error:
866 863 for k, p, s in kindpats:
867 864 try:
868 865 _rematcher('(?:%s)' % _regex(k, p, globsuffix))
869 866 except re.error:
870 867 if s:
871 868 raise error.Abort(_("%s: invalid pattern (%s): %s") %
872 869 (s, k, p))
873 870 else:
874 871 raise error.Abort(_("invalid pattern (%s): %s") % (k, p))
875 872 raise error.Abort(_("invalid pattern"))
876 873
877 874 def _patternrootsanddirs(kindpats):
878 875 '''Returns roots and directories corresponding to each pattern.
879 876
880 877 This calculates the roots and directories exactly matching the patterns and
881 878 returns a tuple of (roots, dirs) for each. It does not return other
882 879 directories which may also need to be considered, like the parent
883 880 directories.
884 881 '''
885 882 r = []
886 883 d = []
887 884 for kind, pat, source in kindpats:
888 885 if kind == 'glob': # find the non-glob prefix
889 886 root = []
890 887 for p in pat.split('/'):
891 888 if '[' in p or '{' in p or '*' in p or '?' in p:
892 889 break
893 890 root.append(p)
894 891 r.append('/'.join(root) or '.')
895 892 elif kind in ('relpath', 'path'):
896 893 r.append(pat or '.')
897 894 elif kind in ('rootfilesin',):
898 895 d.append(pat or '.')
899 896 else: # relglob, re, relre
900 897 r.append('.')
901 898 return r, d
902 899
903 900 def _roots(kindpats):
904 901 '''Returns root directories to match recursively from the given patterns.'''
905 902 roots, dirs = _patternrootsanddirs(kindpats)
906 903 return roots
907 904
908 905 def _rootsanddirs(kindpats):
909 906 '''Returns roots and exact directories from patterns.
910 907
911 908 roots are directories to match recursively, whereas exact directories should
912 909 be matched non-recursively. The returned (roots, dirs) tuple will also
913 910 include directories that need to be implicitly considered as either, such as
914 911 parent directories.
915 912
916 913 >>> _rootsanddirs(
917 914 ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
918 915 ... (b'glob', b'g*', b'')])
919 916 (['g/h', 'g/h', '.'], ['g', '.'])
920 917 >>> _rootsanddirs(
921 918 ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
922 919 ([], ['g/h', '.', 'g', '.'])
923 920 >>> _rootsanddirs(
924 921 ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
925 922 ... (b'path', b'', b'')])
926 923 (['r', 'p/p', '.'], ['p', '.'])
927 924 >>> _rootsanddirs(
928 925 ... [(b'relglob', b'rg*', b''), (b're', b're/', b''),
929 926 ... (b'relre', b'rr', b'')])
930 927 (['.', '.', '.'], ['.'])
931 928 '''
932 929 r, d = _patternrootsanddirs(kindpats)
933 930
934 931 # Append the parents as non-recursive/exact directories, since they must be
935 932 # scanned to get to either the roots or the other exact directories.
936 933 d.extend(util.dirs(d))
937 934 d.extend(util.dirs(r))
938 935 # util.dirs() does not include the root directory, so add it manually
939 936 d.append('.')
940 937
941 938 return r, d
942 939
943 940 def _explicitfiles(kindpats):
944 941 '''Returns the potential explicit filenames from the patterns.
945 942
946 943 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
947 944 ['foo/bar']
948 945 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
949 946 []
950 947 '''
951 948 # Keep only the pattern kinds where one can specify filenames (vs only
952 949 # directory names).
953 950 filable = [kp for kp in kindpats if kp[0] not in ('rootfilesin',)]
954 951 return _roots(filable)
955 952
956 953 def _prefix(kindpats):
957 954 '''Whether all the patterns match a prefix (i.e. recursively)'''
958 955 for kind, pat, source in kindpats:
959 956 if kind not in ('path', 'relpath'):
960 957 return False
961 958 return True
962 959
963 960 _commentre = None
964 961
965 962 def readpatternfile(filepath, warn, sourceinfo=False):
966 963 '''parse a pattern file, returning a list of
967 964 patterns. These patterns should be given to compile()
968 965 to be validated and converted into a match function.
969 966
970 967 trailing white space is dropped.
971 968 the escape character is backslash.
972 969 comments start with #.
973 970 empty lines are skipped.
974 971
975 972 lines can be of the following formats:
976 973
977 974 syntax: regexp # defaults following lines to non-rooted regexps
978 975 syntax: glob # defaults following lines to non-rooted globs
979 976 re:pattern # non-rooted regular expression
980 977 glob:pattern # non-rooted glob
981 978 pattern # pattern of the current default type
982 979
983 980 if sourceinfo is set, returns a list of tuples:
984 981 (pattern, lineno, originalline). This is useful to debug ignore patterns.
985 982 '''
986 983
987 984 syntaxes = {'re': 'relre:', 'regexp': 'relre:', 'glob': 'relglob:',
988 985 'include': 'include', 'subinclude': 'subinclude'}
989 986 syntax = 'relre:'
990 987 patterns = []
991 988
992 989 fp = open(filepath, 'rb')
993 990 for lineno, line in enumerate(util.iterfile(fp), start=1):
994 991 if "#" in line:
995 992 global _commentre
996 993 if not _commentre:
997 994 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
998 995 # remove comments prefixed by an even number of escapes
999 996 m = _commentre.search(line)
1000 997 if m:
1001 998 line = line[:m.end(1)]
1002 999 # fixup properly escaped comments that survived the above
1003 1000 line = line.replace("\\#", "#")
1004 1001 line = line.rstrip()
1005 1002 if not line:
1006 1003 continue
1007 1004
1008 1005 if line.startswith('syntax:'):
1009 1006 s = line[7:].strip()
1010 1007 try:
1011 1008 syntax = syntaxes[s]
1012 1009 except KeyError:
1013 1010 if warn:
1014 1011 warn(_("%s: ignoring invalid syntax '%s'\n") %
1015 1012 (filepath, s))
1016 1013 continue
1017 1014
1018 1015 linesyntax = syntax
1019 1016 for s, rels in syntaxes.iteritems():
1020 1017 if line.startswith(rels):
1021 1018 linesyntax = rels
1022 1019 line = line[len(rels):]
1023 1020 break
1024 1021 elif line.startswith(s+':'):
1025 1022 linesyntax = rels
1026 1023 line = line[len(s) + 1:]
1027 1024 break
1028 1025 if sourceinfo:
1029 1026 patterns.append((linesyntax + line, lineno, line))
1030 1027 else:
1031 1028 patterns.append(linesyntax + line)
1032 1029 fp.close()
1033 1030 return patterns
General Comments 0
You need to be logged in to leave comments. Login now