##// END OF EJS Templates
match: match explicit file using a set...
marmoute -
r51286:81c7d04f stable
parent child Browse files
Show More
@@ -1,1665 +1,1670 b''
1 1 # match.py - filename matching
2 2 #
3 3 # Copyright 2008, 2009 Olivia Mackall <olivia@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8
9 9 import bisect
10 10 import copy
11 11 import itertools
12 12 import os
13 13 import re
14 14
15 15 from .i18n import _
16 16 from .pycompat import open
17 17 from . import (
18 18 encoding,
19 19 error,
20 20 pathutil,
21 21 policy,
22 22 pycompat,
23 23 util,
24 24 )
25 25 from .utils import stringutil
26 26
27 27 rustmod = policy.importrust('dirstate')
28 28
29 29 allpatternkinds = (
30 30 b're',
31 31 b'glob',
32 32 b'path',
33 33 b'relglob',
34 34 b'relpath',
35 35 b'relre',
36 36 b'rootglob',
37 37 b'listfile',
38 38 b'listfile0',
39 39 b'set',
40 40 b'include',
41 41 b'subinclude',
42 42 b'rootfilesin',
43 43 )
44 44 cwdrelativepatternkinds = (b'relpath', b'glob')
45 45
46 46 propertycache = util.propertycache
47 47
48 48
49 49 def _rematcher(regex):
50 50 """compile the regexp with the best available regexp engine and return a
51 51 matcher function"""
52 52 m = util.re.compile(regex)
53 53 try:
54 54 # slightly faster, provided by facebook's re2 bindings
55 55 return m.test_match
56 56 except AttributeError:
57 57 return m.match
58 58
59 59
60 60 def _expandsets(cwd, kindpats, ctx=None, listsubrepos=False, badfn=None):
61 61 '''Returns the kindpats list with the 'set' patterns expanded to matchers'''
62 62 matchers = []
63 63 other = []
64 64
65 65 for kind, pat, source in kindpats:
66 66 if kind == b'set':
67 67 if ctx is None:
68 68 raise error.ProgrammingError(
69 69 b"fileset expression with no context"
70 70 )
71 71 matchers.append(ctx.matchfileset(cwd, pat, badfn=badfn))
72 72
73 73 if listsubrepos:
74 74 for subpath in ctx.substate:
75 75 sm = ctx.sub(subpath).matchfileset(cwd, pat, badfn=badfn)
76 76 pm = prefixdirmatcher(subpath, sm, badfn=badfn)
77 77 matchers.append(pm)
78 78
79 79 continue
80 80 other.append((kind, pat, source))
81 81 return matchers, other
82 82
83 83
84 84 def _expandsubinclude(kindpats, root):
85 85 """Returns the list of subinclude matcher args and the kindpats without the
86 86 subincludes in it."""
87 87 relmatchers = []
88 88 other = []
89 89
90 90 for kind, pat, source in kindpats:
91 91 if kind == b'subinclude':
92 92 sourceroot = pathutil.dirname(util.normpath(source))
93 93 pat = util.pconvert(pat)
94 94 path = pathutil.join(sourceroot, pat)
95 95
96 96 newroot = pathutil.dirname(path)
97 97 matcherargs = (newroot, b'', [], [b'include:%s' % path])
98 98
99 99 prefix = pathutil.canonpath(root, root, newroot)
100 100 if prefix:
101 101 prefix += b'/'
102 102 relmatchers.append((prefix, matcherargs))
103 103 else:
104 104 other.append((kind, pat, source))
105 105
106 106 return relmatchers, other
107 107
108 108
109 109 def _kindpatsalwaysmatch(kindpats):
110 110 """Checks whether the kindspats match everything, as e.g.
111 111 'relpath:.' does.
112 112 """
113 113 for kind, pat, source in kindpats:
114 114 if pat != b'' or kind not in [b'relpath', b'glob']:
115 115 return False
116 116 return True
117 117
118 118
119 119 def _buildkindpatsmatcher(
120 120 matchercls,
121 121 root,
122 122 cwd,
123 123 kindpats,
124 124 ctx=None,
125 125 listsubrepos=False,
126 126 badfn=None,
127 127 ):
128 128 matchers = []
129 129 fms, kindpats = _expandsets(
130 130 cwd,
131 131 kindpats,
132 132 ctx=ctx,
133 133 listsubrepos=listsubrepos,
134 134 badfn=badfn,
135 135 )
136 136 if kindpats:
137 137 m = matchercls(root, kindpats, badfn=badfn)
138 138 matchers.append(m)
139 139 if fms:
140 140 matchers.extend(fms)
141 141 if not matchers:
142 142 return nevermatcher(badfn=badfn)
143 143 if len(matchers) == 1:
144 144 return matchers[0]
145 145 return unionmatcher(matchers)
146 146
147 147
148 148 def match(
149 149 root,
150 150 cwd,
151 151 patterns=None,
152 152 include=None,
153 153 exclude=None,
154 154 default=b'glob',
155 155 auditor=None,
156 156 ctx=None,
157 157 listsubrepos=False,
158 158 warn=None,
159 159 badfn=None,
160 160 icasefs=False,
161 161 ):
162 162 r"""build an object to match a set of file patterns
163 163
164 164 arguments:
165 165 root - the canonical root of the tree you're matching against
166 166 cwd - the current working directory, if relevant
167 167 patterns - patterns to find
168 168 include - patterns to include (unless they are excluded)
169 169 exclude - patterns to exclude (even if they are included)
170 170 default - if a pattern in patterns has no explicit type, assume this one
171 171 auditor - optional path auditor
172 172 ctx - optional changecontext
173 173 listsubrepos - if True, recurse into subrepositories
174 174 warn - optional function used for printing warnings
175 175 badfn - optional bad() callback for this matcher instead of the default
176 176 icasefs - make a matcher for wdir on case insensitive filesystems, which
177 177 normalizes the given patterns to the case in the filesystem
178 178
179 179 a pattern is one of:
180 180 'glob:<glob>' - a glob relative to cwd
181 181 're:<regexp>' - a regular expression
182 182 'path:<path>' - a path relative to repository root, which is matched
183 183 recursively
184 184 'rootfilesin:<path>' - a path relative to repository root, which is
185 185 matched non-recursively (will not match subdirectories)
186 186 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
187 187 'relpath:<path>' - a path relative to cwd
188 188 'relre:<regexp>' - a regexp that needn't match the start of a name
189 189 'set:<fileset>' - a fileset expression
190 190 'include:<path>' - a file of patterns to read and include
191 191 'subinclude:<path>' - a file of patterns to match against files under
192 192 the same directory
193 193 '<something>' - a pattern of the specified default type
194 194
195 195 >>> def _match(root, *args, **kwargs):
196 196 ... return match(util.localpath(root), *args, **kwargs)
197 197
198 198 Usually a patternmatcher is returned:
199 199 >>> _match(b'/foo', b'.', [br're:.*\.c$', b'path:foo/a', b'*.py'])
200 200 <patternmatcher patterns='[^/]*\\.py$|foo/a(?:/|$)|.*\\.c$'>
201 201
202 202 Combining 'patterns' with 'include' (resp. 'exclude') gives an
203 203 intersectionmatcher (resp. a differencematcher):
204 204 >>> type(_match(b'/foo', b'.', [br're:.*\.c$'], include=[b'path:lib']))
205 205 <class 'mercurial.match.intersectionmatcher'>
206 206 >>> type(_match(b'/foo', b'.', [br're:.*\.c$'], exclude=[b'path:build']))
207 207 <class 'mercurial.match.differencematcher'>
208 208
209 209 Notice that, if 'patterns' is empty, an alwaysmatcher is returned:
210 210 >>> _match(b'/foo', b'.', [])
211 211 <alwaysmatcher>
212 212
213 213 The 'default' argument determines which kind of pattern is assumed if a
214 214 pattern has no prefix:
215 215 >>> _match(b'/foo', b'.', [br'.*\.c$'], default=b're')
216 216 <patternmatcher patterns='.*\\.c$'>
217 217 >>> _match(b'/foo', b'.', [b'main.py'], default=b'relpath')
218 218 <patternmatcher patterns='main\\.py(?:/|$)'>
219 219 >>> _match(b'/foo', b'.', [b'main.py'], default=b're')
220 220 <patternmatcher patterns='main.py'>
221 221
222 222 The primary use of matchers is to check whether a value (usually a file
223 223 name) matches againset one of the patterns given at initialization. There
224 224 are two ways of doing this check.
225 225
226 226 >>> m = _match(b'/foo', b'', [br're:.*\.c$', b'relpath:a'])
227 227
228 228 1. Calling the matcher with a file name returns True if any pattern
229 229 matches that file name:
230 230 >>> m(b'a')
231 231 True
232 232 >>> m(b'main.c')
233 233 True
234 234 >>> m(b'test.py')
235 235 False
236 236
237 237 2. Using the exact() method only returns True if the file name matches one
238 238 of the exact patterns (i.e. not re: or glob: patterns):
239 239 >>> m.exact(b'a')
240 240 True
241 241 >>> m.exact(b'main.c')
242 242 False
243 243 """
244 244 assert os.path.isabs(root)
245 245 cwd = os.path.join(root, util.localpath(cwd))
246 246 normalize = _donormalize
247 247 if icasefs:
248 248 dirstate = ctx.repo().dirstate
249 249 dsnormalize = dirstate.normalize
250 250
251 251 def normalize(patterns, default, root, cwd, auditor, warn):
252 252 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
253 253 kindpats = []
254 254 for kind, pats, source in kp:
255 255 if kind not in (b're', b'relre'): # regex can't be normalized
256 256 p = pats
257 257 pats = dsnormalize(pats)
258 258
259 259 # Preserve the original to handle a case only rename.
260 260 if p != pats and p in dirstate:
261 261 kindpats.append((kind, p, source))
262 262
263 263 kindpats.append((kind, pats, source))
264 264 return kindpats
265 265
266 266 if patterns:
267 267 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
268 268 if _kindpatsalwaysmatch(kindpats):
269 269 m = alwaysmatcher(badfn)
270 270 else:
271 271 m = _buildkindpatsmatcher(
272 272 patternmatcher,
273 273 root,
274 274 cwd,
275 275 kindpats,
276 276 ctx=ctx,
277 277 listsubrepos=listsubrepos,
278 278 badfn=badfn,
279 279 )
280 280 else:
281 281 # It's a little strange that no patterns means to match everything.
282 282 # Consider changing this to match nothing (probably using nevermatcher).
283 283 m = alwaysmatcher(badfn)
284 284
285 285 if include:
286 286 kindpats = normalize(include, b'glob', root, cwd, auditor, warn)
287 287 im = _buildkindpatsmatcher(
288 288 includematcher,
289 289 root,
290 290 cwd,
291 291 kindpats,
292 292 ctx=ctx,
293 293 listsubrepos=listsubrepos,
294 294 badfn=None,
295 295 )
296 296 m = intersectmatchers(m, im)
297 297 if exclude:
298 298 kindpats = normalize(exclude, b'glob', root, cwd, auditor, warn)
299 299 em = _buildkindpatsmatcher(
300 300 includematcher,
301 301 root,
302 302 cwd,
303 303 kindpats,
304 304 ctx=ctx,
305 305 listsubrepos=listsubrepos,
306 306 badfn=None,
307 307 )
308 308 m = differencematcher(m, em)
309 309 return m
310 310
311 311
312 312 def exact(files, badfn=None):
313 313 return exactmatcher(files, badfn=badfn)
314 314
315 315
316 316 def always(badfn=None):
317 317 return alwaysmatcher(badfn)
318 318
319 319
320 320 def never(badfn=None):
321 321 return nevermatcher(badfn)
322 322
323 323
324 324 def badmatch(match, badfn):
325 325 """Make a copy of the given matcher, replacing its bad method with the given
326 326 one.
327 327 """
328 328 m = copy.copy(match)
329 329 m.bad = badfn
330 330 return m
331 331
332 332
333 333 def _donormalize(patterns, default, root, cwd, auditor=None, warn=None):
334 334 """Convert 'kind:pat' from the patterns list to tuples with kind and
335 335 normalized and rooted patterns and with listfiles expanded."""
336 336 kindpats = []
337 337 for kind, pat in [_patsplit(p, default) for p in patterns]:
338 338 if kind in cwdrelativepatternkinds:
339 339 pat = pathutil.canonpath(root, cwd, pat, auditor=auditor)
340 340 elif kind in (b'relglob', b'path', b'rootfilesin', b'rootglob'):
341 341 pat = util.normpath(pat)
342 342 elif kind in (b'listfile', b'listfile0'):
343 343 try:
344 344 files = util.readfile(pat)
345 345 if kind == b'listfile0':
346 346 files = files.split(b'\0')
347 347 else:
348 348 files = files.splitlines()
349 349 files = [f for f in files if f]
350 350 except EnvironmentError:
351 351 raise error.Abort(_(b"unable to read file list (%s)") % pat)
352 352 for k, p, source in _donormalize(
353 353 files, default, root, cwd, auditor, warn
354 354 ):
355 355 kindpats.append((k, p, pat))
356 356 continue
357 357 elif kind == b'include':
358 358 try:
359 359 fullpath = os.path.join(root, util.localpath(pat))
360 360 includepats = readpatternfile(fullpath, warn)
361 361 for k, p, source in _donormalize(
362 362 includepats, default, root, cwd, auditor, warn
363 363 ):
364 364 kindpats.append((k, p, source or pat))
365 365 except error.Abort as inst:
366 366 raise error.Abort(
367 367 b'%s: %s'
368 368 % (
369 369 pat,
370 370 inst.message,
371 371 )
372 372 )
373 373 except IOError as inst:
374 374 if warn:
375 375 warn(
376 376 _(b"skipping unreadable pattern file '%s': %s\n")
377 377 % (pat, stringutil.forcebytestr(inst.strerror))
378 378 )
379 379 continue
380 380 # else: re or relre - which cannot be normalized
381 381 kindpats.append((kind, pat, b''))
382 382 return kindpats
383 383
384 384
385 385 class basematcher:
386 386 def __init__(self, badfn=None):
387 387 if badfn is not None:
388 388 self.bad = badfn
389 389
390 390 def __call__(self, fn):
391 391 return self.matchfn(fn)
392 392
393 393 # Callbacks related to how the matcher is used by dirstate.walk.
394 394 # Subscribers to these events must monkeypatch the matcher object.
395 395 def bad(self, f, msg):
396 396 """Callback from dirstate.walk for each explicit file that can't be
397 397 found/accessed, with an error message."""
398 398
399 399 # If an traversedir is set, it will be called when a directory discovered
400 400 # by recursive traversal is visited.
401 401 traversedir = None
402 402
403 403 @propertycache
404 404 def _files(self):
405 405 return []
406 406
407 407 def files(self):
408 408 """Explicitly listed files or patterns or roots:
409 409 if no patterns or .always(): empty list,
410 410 if exact: list exact files,
411 411 if not .anypats(): list all files and dirs,
412 412 else: optimal roots"""
413 413 return self._files
414 414
415 415 @propertycache
416 416 def _fileset(self):
417 417 return set(self._files)
418 418
419 419 def exact(self, f):
420 420 '''Returns True if f is in .files().'''
421 421 return f in self._fileset
422 422
423 423 def matchfn(self, f):
424 424 return False
425 425
426 426 def visitdir(self, dir):
427 427 """Decides whether a directory should be visited based on whether it
428 428 has potential matches in it or one of its subdirectories. This is
429 429 based on the match's primary, included, and excluded patterns.
430 430
431 431 Returns the string 'all' if the given directory and all subdirectories
432 432 should be visited. Otherwise returns True or False indicating whether
433 433 the given directory should be visited.
434 434 """
435 435 return True
436 436
437 437 def visitchildrenset(self, dir):
438 438 """Decides whether a directory should be visited based on whether it
439 439 has potential matches in it or one of its subdirectories, and
440 440 potentially lists which subdirectories of that directory should be
441 441 visited. This is based on the match's primary, included, and excluded
442 442 patterns.
443 443
444 444 This function is very similar to 'visitdir', and the following mapping
445 445 can be applied:
446 446
447 447 visitdir | visitchildrenlist
448 448 ----------+-------------------
449 449 False | set()
450 450 'all' | 'all'
451 451 True | 'this' OR non-empty set of subdirs -or files- to visit
452 452
453 453 Example:
454 454 Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return
455 455 the following values (assuming the implementation of visitchildrenset
456 456 is capable of recognizing this; some implementations are not).
457 457
458 458 '' -> {'foo', 'qux'}
459 459 'baz' -> set()
460 460 'foo' -> {'bar'}
461 461 # Ideally this would be 'all', but since the prefix nature of matchers
462 462 # is applied to the entire matcher, we have to downgrade this to
463 463 # 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed
464 464 # in.
465 465 'foo/bar' -> 'this'
466 466 'qux' -> 'this'
467 467
468 468 Important:
469 469 Most matchers do not know if they're representing files or
470 470 directories. They see ['path:dir/f'] and don't know whether 'f' is a
471 471 file or a directory, so visitchildrenset('dir') for most matchers will
472 472 return {'f'}, but if the matcher knows it's a file (like exactmatcher
473 473 does), it may return 'this'. Do not rely on the return being a set
474 474 indicating that there are no files in this dir to investigate (or
475 475 equivalently that if there are files to investigate in 'dir' that it
476 476 will always return 'this').
477 477 """
478 478 return b'this'
479 479
480 480 def always(self):
481 481 """Matcher will match everything and .files() will be empty --
482 482 optimization might be possible."""
483 483 return False
484 484
485 485 def isexact(self):
486 486 """Matcher will match exactly the list of files in .files() --
487 487 optimization might be possible."""
488 488 return False
489 489
490 490 def prefix(self):
491 491 """Matcher will match the paths in .files() recursively --
492 492 optimization might be possible."""
493 493 return False
494 494
495 495 def anypats(self):
496 496 """None of .always(), .isexact(), and .prefix() is true --
497 497 optimizations will be difficult."""
498 498 return not self.always() and not self.isexact() and not self.prefix()
499 499
500 500
501 501 class alwaysmatcher(basematcher):
502 502 '''Matches everything.'''
503 503
504 504 def __init__(self, badfn=None):
505 505 super(alwaysmatcher, self).__init__(badfn)
506 506
507 507 def always(self):
508 508 return True
509 509
510 510 def matchfn(self, f):
511 511 return True
512 512
513 513 def visitdir(self, dir):
514 514 return b'all'
515 515
516 516 def visitchildrenset(self, dir):
517 517 return b'all'
518 518
519 519 def __repr__(self):
520 520 return r'<alwaysmatcher>'
521 521
522 522
523 523 class nevermatcher(basematcher):
524 524 '''Matches nothing.'''
525 525
526 526 def __init__(self, badfn=None):
527 527 super(nevermatcher, self).__init__(badfn)
528 528
529 529 # It's a little weird to say that the nevermatcher is an exact matcher
530 530 # or a prefix matcher, but it seems to make sense to let callers take
531 531 # fast paths based on either. There will be no exact matches, nor any
532 532 # prefixes (files() returns []), so fast paths iterating over them should
533 533 # be efficient (and correct).
534 534 def isexact(self):
535 535 return True
536 536
537 537 def prefix(self):
538 538 return True
539 539
540 540 def visitdir(self, dir):
541 541 return False
542 542
543 543 def visitchildrenset(self, dir):
544 544 return set()
545 545
546 546 def __repr__(self):
547 547 return r'<nevermatcher>'
548 548
549 549
550 550 class predicatematcher(basematcher):
551 551 """A matcher adapter for a simple boolean function"""
552 552
553 553 def __init__(self, predfn, predrepr=None, badfn=None):
554 554 super(predicatematcher, self).__init__(badfn)
555 555 self.matchfn = predfn
556 556 self._predrepr = predrepr
557 557
558 558 @encoding.strmethod
559 559 def __repr__(self):
560 560 s = stringutil.buildrepr(self._predrepr) or pycompat.byterepr(
561 561 self.matchfn
562 562 )
563 563 return b'<predicatenmatcher pred=%s>' % s
564 564
565 565
566 566 def path_or_parents_in_set(path, prefix_set):
567 567 """Returns True if `path` (or any parent of `path`) is in `prefix_set`."""
568 568 l = len(prefix_set)
569 569 if l == 0:
570 570 return False
571 571 if path in prefix_set:
572 572 return True
573 573 # If there's more than 5 paths in prefix_set, it's *probably* quicker to
574 574 # "walk up" the directory hierarchy instead, with the assumption that most
575 575 # directory hierarchies are relatively shallow and hash lookup is cheap.
576 576 if l > 5:
577 577 return any(
578 578 parentdir in prefix_set for parentdir in pathutil.finddirs(path)
579 579 )
580 580
581 581 # FIXME: Ideally we'd never get to this point if this is the case - we'd
582 582 # recognize ourselves as an 'always' matcher and skip this.
583 583 if b'' in prefix_set:
584 584 return True
585 585
586 586 sl = ord(b'/')
587 587
588 588 # We already checked that path isn't in prefix_set exactly, so
589 589 # `path[len(pf)] should never raise IndexError.
590 590 return any(path.startswith(pf) and path[len(pf)] == sl for pf in prefix_set)
591 591
592 592
593 593 class patternmatcher(basematcher):
594 594 r"""Matches a set of (kind, pat, source) against a 'root' directory.
595 595
596 596 >>> kindpats = [
597 597 ... (b're', br'.*\.c$', b''),
598 598 ... (b'path', b'foo/a', b''),
599 599 ... (b'relpath', b'b', b''),
600 600 ... (b'glob', b'*.h', b''),
601 601 ... ]
602 602 >>> m = patternmatcher(b'foo', kindpats)
603 603 >>> m(b'main.c') # matches re:.*\.c$
604 604 True
605 605 >>> m(b'b.txt')
606 606 False
607 607 >>> m(b'foo/a') # matches path:foo/a
608 608 True
609 609 >>> m(b'a') # does not match path:b, since 'root' is 'foo'
610 610 False
611 611 >>> m(b'b') # matches relpath:b, since 'root' is 'foo'
612 612 True
613 613 >>> m(b'lib.h') # matches glob:*.h
614 614 True
615 615
616 616 >>> m.files()
617 617 [b'', b'foo/a', b'', b'b']
618 618 >>> m.exact(b'foo/a')
619 619 True
620 620 >>> m.exact(b'b')
621 621 True
622 622 >>> m.exact(b'lib.h') # exact matches are for (rel)path kinds
623 623 False
624 624 """
625 625
626 626 def __init__(self, root, kindpats, badfn=None):
627 627 super(patternmatcher, self).__init__(badfn)
628 628 kindpats.sort()
629 629
630 630 self._files = _explicitfiles(kindpats)
631 631 self._prefix = _prefix(kindpats)
632 self._pats, self.matchfn = _buildmatch(kindpats, b'$', root)
632 self._pats, self._matchfn = _buildmatch(kindpats, b'$', root)
633
634 def matchfn(self, fn):
635 if fn in self._fileset:
636 return True
637 return self._matchfn(fn)
633 638
634 639 @propertycache
635 640 def _dirs(self):
636 641 return set(pathutil.dirs(self._fileset))
637 642
638 643 def visitdir(self, dir):
639 644 if self._prefix and dir in self._fileset:
640 645 return b'all'
641 646 return dir in self._dirs or path_or_parents_in_set(dir, self._fileset)
642 647
643 648 def visitchildrenset(self, dir):
644 649 ret = self.visitdir(dir)
645 650 if ret is True:
646 651 return b'this'
647 652 elif not ret:
648 653 return set()
649 654 assert ret == b'all'
650 655 return b'all'
651 656
652 657 def prefix(self):
653 658 return self._prefix
654 659
655 660 @encoding.strmethod
656 661 def __repr__(self):
657 662 return b'<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats)
658 663
659 664
660 665 # This is basically a reimplementation of pathutil.dirs that stores the
661 666 # children instead of just a count of them, plus a small optional optimization
662 667 # to avoid some directories we don't need.
663 668 class _dirchildren:
664 669 def __init__(self, paths, onlyinclude=None):
665 670 self._dirs = {}
666 671 self._onlyinclude = onlyinclude or []
667 672 addpath = self.addpath
668 673 for f in paths:
669 674 addpath(f)
670 675
671 676 def addpath(self, path):
672 677 if path == b'':
673 678 return
674 679 dirs = self._dirs
675 680 findsplitdirs = _dirchildren._findsplitdirs
676 681 for d, b in findsplitdirs(path):
677 682 if d not in self._onlyinclude:
678 683 continue
679 684 dirs.setdefault(d, set()).add(b)
680 685
681 686 @staticmethod
682 687 def _findsplitdirs(path):
683 688 # yields (dirname, basename) tuples, walking back to the root. This is
684 689 # very similar to pathutil.finddirs, except:
685 690 # - produces a (dirname, basename) tuple, not just 'dirname'
686 691 # Unlike manifest._splittopdir, this does not suffix `dirname` with a
687 692 # slash.
688 693 oldpos = len(path)
689 694 pos = path.rfind(b'/')
690 695 while pos != -1:
691 696 yield path[:pos], path[pos + 1 : oldpos]
692 697 oldpos = pos
693 698 pos = path.rfind(b'/', 0, pos)
694 699 yield b'', path[:oldpos]
695 700
696 701 def get(self, path):
697 702 return self._dirs.get(path, set())
698 703
699 704
700 705 class includematcher(basematcher):
701 706 def __init__(self, root, kindpats, badfn=None):
702 707 super(includematcher, self).__init__(badfn)
703 708 if rustmod is not None:
704 709 # We need to pass the patterns to Rust because they can contain
705 710 # patterns from the user interface
706 711 self._kindpats = kindpats
707 712 self._pats, self.matchfn = _buildmatch(kindpats, b'(?:/|$)', root)
708 713 self._prefix = _prefix(kindpats)
709 714 roots, dirs, parents = _rootsdirsandparents(kindpats)
710 715 # roots are directories which are recursively included.
711 716 self._roots = set(roots)
712 717 # dirs are directories which are non-recursively included.
713 718 self._dirs = set(dirs)
714 719 # parents are directories which are non-recursively included because
715 720 # they are needed to get to items in _dirs or _roots.
716 721 self._parents = parents
717 722
718 723 def visitdir(self, dir):
719 724 if self._prefix and dir in self._roots:
720 725 return b'all'
721 726 return (
722 727 dir in self._dirs
723 728 or dir in self._parents
724 729 or path_or_parents_in_set(dir, self._roots)
725 730 )
726 731
727 732 @propertycache
728 733 def _allparentschildren(self):
729 734 # It may seem odd that we add dirs, roots, and parents, and then
730 735 # restrict to only parents. This is to catch the case of:
731 736 # dirs = ['foo/bar']
732 737 # parents = ['foo']
733 738 # if we asked for the children of 'foo', but had only added
734 739 # self._parents, we wouldn't be able to respond ['bar'].
735 740 return _dirchildren(
736 741 itertools.chain(self._dirs, self._roots, self._parents),
737 742 onlyinclude=self._parents,
738 743 )
739 744
740 745 def visitchildrenset(self, dir):
741 746 if self._prefix and dir in self._roots:
742 747 return b'all'
743 748 # Note: this does *not* include the 'dir in self._parents' case from
744 749 # visitdir, that's handled below.
745 750 if (
746 751 b'' in self._roots
747 752 or dir in self._dirs
748 753 or path_or_parents_in_set(dir, self._roots)
749 754 ):
750 755 return b'this'
751 756
752 757 if dir in self._parents:
753 758 return self._allparentschildren.get(dir) or set()
754 759 return set()
755 760
756 761 @encoding.strmethod
757 762 def __repr__(self):
758 763 return b'<includematcher includes=%r>' % pycompat.bytestr(self._pats)
759 764
760 765
761 766 class exactmatcher(basematcher):
762 767 r"""Matches the input files exactly. They are interpreted as paths, not
763 768 patterns (so no kind-prefixes).
764 769
765 770 >>> m = exactmatcher([b'a.txt', br're:.*\.c$'])
766 771 >>> m(b'a.txt')
767 772 True
768 773 >>> m(b'b.txt')
769 774 False
770 775
771 776 Input files that would be matched are exactly those returned by .files()
772 777 >>> m.files()
773 778 ['a.txt', 're:.*\\.c$']
774 779
775 780 So pattern 're:.*\.c$' is not considered as a regex, but as a file name
776 781 >>> m(b'main.c')
777 782 False
778 783 >>> m(br're:.*\.c$')
779 784 True
780 785 """
781 786
782 787 def __init__(self, files, badfn=None):
783 788 super(exactmatcher, self).__init__(badfn)
784 789
785 790 if isinstance(files, list):
786 791 self._files = files
787 792 else:
788 793 self._files = list(files)
789 794
790 795 matchfn = basematcher.exact
791 796
792 797 @propertycache
793 798 def _dirs(self):
794 799 return set(pathutil.dirs(self._fileset))
795 800
796 801 def visitdir(self, dir):
797 802 return dir in self._dirs
798 803
799 804 @propertycache
800 805 def _visitchildrenset_candidates(self):
801 806 """A memoized set of candidates for visitchildrenset."""
802 807 return self._fileset | self._dirs - {b''}
803 808
804 809 @propertycache
805 810 def _sorted_visitchildrenset_candidates(self):
806 811 """A memoized sorted list of candidates for visitchildrenset."""
807 812 return sorted(self._visitchildrenset_candidates)
808 813
809 814 def visitchildrenset(self, dir):
810 815 if not self._fileset or dir not in self._dirs:
811 816 return set()
812 817
813 818 if dir == b'':
814 819 candidates = self._visitchildrenset_candidates
815 820 else:
816 821 candidates = self._sorted_visitchildrenset_candidates
817 822 d = dir + b'/'
818 823 # Use bisect to find the first element potentially starting with d
819 824 # (i.e. >= d). This should always find at least one element (we'll
820 825 # assert later if this is not the case).
821 826 first = bisect.bisect_left(candidates, d)
822 827 # We need a representation of the first element that is > d that
823 828 # does not start with d, so since we added a `/` on the end of dir,
824 829 # we'll add whatever comes after slash (we could probably assume
825 830 # that `0` is after `/`, but let's not) to the end of dir instead.
826 831 dnext = dir + encoding.strtolocal(chr(ord(b'/') + 1))
827 832 # Use bisect to find the first element >= d_next
828 833 last = bisect.bisect_left(candidates, dnext, lo=first)
829 834 dlen = len(d)
830 835 candidates = {c[dlen:] for c in candidates[first:last]}
831 836 # self._dirs includes all of the directories, recursively, so if
832 837 # we're attempting to match foo/bar/baz.txt, it'll have '', 'foo',
833 838 # 'foo/bar' in it. Thus we can safely ignore a candidate that has a
834 839 # '/' in it, indicating a it's for a subdir-of-a-subdir; the
835 840 # immediate subdir will be in there without a slash.
836 841 ret = {c for c in candidates if b'/' not in c}
837 842 # We really do not expect ret to be empty, since that would imply that
838 843 # there's something in _dirs that didn't have a file in _fileset.
839 844 assert ret
840 845 return ret
841 846
842 847 def isexact(self):
843 848 return True
844 849
845 850 @encoding.strmethod
846 851 def __repr__(self):
847 852 return b'<exactmatcher files=%r>' % self._files
848 853
849 854
850 855 class differencematcher(basematcher):
851 856 """Composes two matchers by matching if the first matches and the second
852 857 does not.
853 858
854 859 The second matcher's non-matching-attributes (bad, traversedir) are ignored.
855 860 """
856 861
857 862 def __init__(self, m1, m2):
858 863 super(differencematcher, self).__init__()
859 864 self._m1 = m1
860 865 self._m2 = m2
861 866 self.bad = m1.bad
862 867 self.traversedir = m1.traversedir
863 868
864 869 def matchfn(self, f):
865 870 return self._m1(f) and not self._m2(f)
866 871
867 872 @propertycache
868 873 def _files(self):
869 874 if self.isexact():
870 875 return [f for f in self._m1.files() if self(f)]
871 876 # If m1 is not an exact matcher, we can't easily figure out the set of
872 877 # files, because its files() are not always files. For example, if
873 878 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
874 879 # want to remove "dir" from the set even though it would match m2,
875 880 # because the "dir" in m1 may not be a file.
876 881 return self._m1.files()
877 882
878 883 def visitdir(self, dir):
879 884 if self._m2.visitdir(dir) == b'all':
880 885 return False
881 886 elif not self._m2.visitdir(dir):
882 887 # m2 does not match dir, we can return 'all' here if possible
883 888 return self._m1.visitdir(dir)
884 889 return bool(self._m1.visitdir(dir))
885 890
886 891 def visitchildrenset(self, dir):
887 892 m2_set = self._m2.visitchildrenset(dir)
888 893 if m2_set == b'all':
889 894 return set()
890 895 m1_set = self._m1.visitchildrenset(dir)
891 896 # Possible values for m1: 'all', 'this', set(...), set()
892 897 # Possible values for m2: 'this', set(...), set()
893 898 # If m2 has nothing under here that we care about, return m1, even if
894 899 # it's 'all'. This is a change in behavior from visitdir, which would
895 900 # return True, not 'all', for some reason.
896 901 if not m2_set:
897 902 return m1_set
898 903 if m1_set in [b'all', b'this']:
899 904 # Never return 'all' here if m2_set is any kind of non-empty (either
900 905 # 'this' or set(foo)), since m2 might return set() for a
901 906 # subdirectory.
902 907 return b'this'
903 908 # Possible values for m1: set(...), set()
904 909 # Possible values for m2: 'this', set(...)
905 910 # We ignore m2's set results. They're possibly incorrect:
906 911 # m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset(''):
907 912 # m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd
908 913 # return set(), which is *not* correct, we still need to visit 'dir'!
909 914 return m1_set
910 915
911 916 def isexact(self):
912 917 return self._m1.isexact()
913 918
914 919 @encoding.strmethod
915 920 def __repr__(self):
916 921 return b'<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2)
917 922
918 923
919 924 def intersectmatchers(m1, m2):
920 925 """Composes two matchers by matching if both of them match.
921 926
922 927 The second matcher's non-matching-attributes (bad, traversedir) are ignored.
923 928 """
924 929 if m1 is None or m2 is None:
925 930 return m1 or m2
926 931 if m1.always():
927 932 m = copy.copy(m2)
928 933 # TODO: Consider encapsulating these things in a class so there's only
929 934 # one thing to copy from m1.
930 935 m.bad = m1.bad
931 936 m.traversedir = m1.traversedir
932 937 return m
933 938 if m2.always():
934 939 m = copy.copy(m1)
935 940 return m
936 941 return intersectionmatcher(m1, m2)
937 942
938 943
939 944 class intersectionmatcher(basematcher):
940 945 def __init__(self, m1, m2):
941 946 super(intersectionmatcher, self).__init__()
942 947 self._m1 = m1
943 948 self._m2 = m2
944 949 self.bad = m1.bad
945 950 self.traversedir = m1.traversedir
946 951
947 952 @propertycache
948 953 def _files(self):
949 954 if self.isexact():
950 955 m1, m2 = self._m1, self._m2
951 956 if not m1.isexact():
952 957 m1, m2 = m2, m1
953 958 return [f for f in m1.files() if m2(f)]
954 959 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
955 960 # the set of files, because their files() are not always files. For
956 961 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
957 962 # "path:dir2", we don't want to remove "dir2" from the set.
958 963 return self._m1.files() + self._m2.files()
959 964
960 965 def matchfn(self, f):
961 966 return self._m1(f) and self._m2(f)
962 967
963 968 def visitdir(self, dir):
964 969 visit1 = self._m1.visitdir(dir)
965 970 if visit1 == b'all':
966 971 return self._m2.visitdir(dir)
967 972 # bool() because visit1=True + visit2='all' should not be 'all'
968 973 return bool(visit1 and self._m2.visitdir(dir))
969 974
970 975 def visitchildrenset(self, dir):
971 976 m1_set = self._m1.visitchildrenset(dir)
972 977 if not m1_set:
973 978 return set()
974 979 m2_set = self._m2.visitchildrenset(dir)
975 980 if not m2_set:
976 981 return set()
977 982
978 983 if m1_set == b'all':
979 984 return m2_set
980 985 elif m2_set == b'all':
981 986 return m1_set
982 987
983 988 if m1_set == b'this' or m2_set == b'this':
984 989 return b'this'
985 990
986 991 assert isinstance(m1_set, set) and isinstance(m2_set, set)
987 992 return m1_set.intersection(m2_set)
988 993
989 994 def always(self):
990 995 return self._m1.always() and self._m2.always()
991 996
992 997 def isexact(self):
993 998 return self._m1.isexact() or self._m2.isexact()
994 999
995 1000 @encoding.strmethod
996 1001 def __repr__(self):
997 1002 return b'<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2)
998 1003
999 1004
1000 1005 class subdirmatcher(basematcher):
1001 1006 """Adapt a matcher to work on a subdirectory only.
1002 1007
1003 1008 The paths are remapped to remove/insert the path as needed:
1004 1009
1005 1010 >>> from . import pycompat
1006 1011 >>> m1 = match(util.localpath(b'/root'), b'', [b'a.txt', b'sub/b.txt'], auditor=lambda name: None)
1007 1012 >>> m2 = subdirmatcher(b'sub', m1)
1008 1013 >>> m2(b'a.txt')
1009 1014 False
1010 1015 >>> m2(b'b.txt')
1011 1016 True
1012 1017 >>> m2.matchfn(b'a.txt')
1013 1018 False
1014 1019 >>> m2.matchfn(b'b.txt')
1015 1020 True
1016 1021 >>> m2.files()
1017 1022 ['b.txt']
1018 1023 >>> m2.exact(b'b.txt')
1019 1024 True
1020 1025 >>> def bad(f, msg):
1021 1026 ... print(pycompat.sysstr(b"%s: %s" % (f, msg)))
1022 1027 >>> m1.bad = bad
1023 1028 >>> m2.bad(b'x.txt', b'No such file')
1024 1029 sub/x.txt: No such file
1025 1030 """
1026 1031
1027 1032 def __init__(self, path, matcher):
1028 1033 super(subdirmatcher, self).__init__()
1029 1034 self._path = path
1030 1035 self._matcher = matcher
1031 1036 self._always = matcher.always()
1032 1037
1033 1038 self._files = [
1034 1039 f[len(path) + 1 :]
1035 1040 for f in matcher._files
1036 1041 if f.startswith(path + b"/")
1037 1042 ]
1038 1043
1039 1044 # If the parent repo had a path to this subrepo and the matcher is
1040 1045 # a prefix matcher, this submatcher always matches.
1041 1046 if matcher.prefix():
1042 1047 self._always = any(f == path for f in matcher._files)
1043 1048
1044 1049 def bad(self, f, msg):
1045 1050 self._matcher.bad(self._path + b"/" + f, msg)
1046 1051
1047 1052 def matchfn(self, f):
1048 1053 # Some information is lost in the superclass's constructor, so we
1049 1054 # can not accurately create the matching function for the subdirectory
1050 1055 # from the inputs. Instead, we override matchfn() and visitdir() to
1051 1056 # call the original matcher with the subdirectory path prepended.
1052 1057 return self._matcher.matchfn(self._path + b"/" + f)
1053 1058
1054 1059 def visitdir(self, dir):
1055 1060 if dir == b'':
1056 1061 dir = self._path
1057 1062 else:
1058 1063 dir = self._path + b"/" + dir
1059 1064 return self._matcher.visitdir(dir)
1060 1065
1061 1066 def visitchildrenset(self, dir):
1062 1067 if dir == b'':
1063 1068 dir = self._path
1064 1069 else:
1065 1070 dir = self._path + b"/" + dir
1066 1071 return self._matcher.visitchildrenset(dir)
1067 1072
1068 1073 def always(self):
1069 1074 return self._always
1070 1075
1071 1076 def prefix(self):
1072 1077 return self._matcher.prefix() and not self._always
1073 1078
1074 1079 @encoding.strmethod
1075 1080 def __repr__(self):
1076 1081 return b'<subdirmatcher path=%r, matcher=%r>' % (
1077 1082 self._path,
1078 1083 self._matcher,
1079 1084 )
1080 1085
1081 1086
1082 1087 class prefixdirmatcher(basematcher):
1083 1088 """Adapt a matcher to work on a parent directory.
1084 1089
1085 1090 The matcher's non-matching-attributes (bad, traversedir) are ignored.
1086 1091
1087 1092 The prefix path should usually be the relative path from the root of
1088 1093 this matcher to the root of the wrapped matcher.
1089 1094
1090 1095 >>> m1 = match(util.localpath(b'/root/d/e'), b'f', [b'../a.txt', b'b.txt'], auditor=lambda name: None)
1091 1096 >>> m2 = prefixdirmatcher(b'd/e', m1)
1092 1097 >>> m2(b'a.txt')
1093 1098 False
1094 1099 >>> m2(b'd/e/a.txt')
1095 1100 True
1096 1101 >>> m2(b'd/e/b.txt')
1097 1102 False
1098 1103 >>> m2.files()
1099 1104 ['d/e/a.txt', 'd/e/f/b.txt']
1100 1105 >>> m2.exact(b'd/e/a.txt')
1101 1106 True
1102 1107 >>> m2.visitdir(b'd')
1103 1108 True
1104 1109 >>> m2.visitdir(b'd/e')
1105 1110 True
1106 1111 >>> m2.visitdir(b'd/e/f')
1107 1112 True
1108 1113 >>> m2.visitdir(b'd/e/g')
1109 1114 False
1110 1115 >>> m2.visitdir(b'd/ef')
1111 1116 False
1112 1117 """
1113 1118
1114 1119 def __init__(self, path, matcher, badfn=None):
1115 1120 super(prefixdirmatcher, self).__init__(badfn)
1116 1121 if not path:
1117 1122 raise error.ProgrammingError(b'prefix path must not be empty')
1118 1123 self._path = path
1119 1124 self._pathprefix = path + b'/'
1120 1125 self._matcher = matcher
1121 1126
1122 1127 @propertycache
1123 1128 def _files(self):
1124 1129 return [self._pathprefix + f for f in self._matcher._files]
1125 1130
1126 1131 def matchfn(self, f):
1127 1132 if not f.startswith(self._pathprefix):
1128 1133 return False
1129 1134 return self._matcher.matchfn(f[len(self._pathprefix) :])
1130 1135
1131 1136 @propertycache
1132 1137 def _pathdirs(self):
1133 1138 return set(pathutil.finddirs(self._path))
1134 1139
1135 1140 def visitdir(self, dir):
1136 1141 if dir == self._path:
1137 1142 return self._matcher.visitdir(b'')
1138 1143 if dir.startswith(self._pathprefix):
1139 1144 return self._matcher.visitdir(dir[len(self._pathprefix) :])
1140 1145 return dir in self._pathdirs
1141 1146
1142 1147 def visitchildrenset(self, dir):
1143 1148 if dir == self._path:
1144 1149 return self._matcher.visitchildrenset(b'')
1145 1150 if dir.startswith(self._pathprefix):
1146 1151 return self._matcher.visitchildrenset(dir[len(self._pathprefix) :])
1147 1152 if dir in self._pathdirs:
1148 1153 return b'this'
1149 1154 return set()
1150 1155
1151 1156 def isexact(self):
1152 1157 return self._matcher.isexact()
1153 1158
1154 1159 def prefix(self):
1155 1160 return self._matcher.prefix()
1156 1161
1157 1162 @encoding.strmethod
1158 1163 def __repr__(self):
1159 1164 return b'<prefixdirmatcher path=%r, matcher=%r>' % (
1160 1165 pycompat.bytestr(self._path),
1161 1166 self._matcher,
1162 1167 )
1163 1168
1164 1169
1165 1170 class unionmatcher(basematcher):
1166 1171 """A matcher that is the union of several matchers.
1167 1172
1168 1173 The non-matching-attributes (bad, traversedir) are taken from the first
1169 1174 matcher.
1170 1175 """
1171 1176
1172 1177 def __init__(self, matchers):
1173 1178 m1 = matchers[0]
1174 1179 super(unionmatcher, self).__init__()
1175 1180 self.traversedir = m1.traversedir
1176 1181 self._matchers = matchers
1177 1182
1178 1183 def matchfn(self, f):
1179 1184 for match in self._matchers:
1180 1185 if match(f):
1181 1186 return True
1182 1187 return False
1183 1188
1184 1189 def visitdir(self, dir):
1185 1190 r = False
1186 1191 for m in self._matchers:
1187 1192 v = m.visitdir(dir)
1188 1193 if v == b'all':
1189 1194 return v
1190 1195 r |= v
1191 1196 return r
1192 1197
1193 1198 def visitchildrenset(self, dir):
1194 1199 r = set()
1195 1200 this = False
1196 1201 for m in self._matchers:
1197 1202 v = m.visitchildrenset(dir)
1198 1203 if not v:
1199 1204 continue
1200 1205 if v == b'all':
1201 1206 return v
1202 1207 if this or v == b'this':
1203 1208 this = True
1204 1209 # don't break, we might have an 'all' in here.
1205 1210 continue
1206 1211 assert isinstance(v, set)
1207 1212 r = r.union(v)
1208 1213 if this:
1209 1214 return b'this'
1210 1215 return r
1211 1216
1212 1217 @encoding.strmethod
1213 1218 def __repr__(self):
1214 1219 return b'<unionmatcher matchers=%r>' % self._matchers
1215 1220
1216 1221
1217 1222 def patkind(pattern, default=None):
1218 1223 r"""If pattern is 'kind:pat' with a known kind, return kind.
1219 1224
1220 1225 >>> patkind(br're:.*\.c$')
1221 1226 're'
1222 1227 >>> patkind(b'glob:*.c')
1223 1228 'glob'
1224 1229 >>> patkind(b'relpath:test.py')
1225 1230 'relpath'
1226 1231 >>> patkind(b'main.py')
1227 1232 >>> patkind(b'main.py', default=b're')
1228 1233 're'
1229 1234 """
1230 1235 return _patsplit(pattern, default)[0]
1231 1236
1232 1237
1233 1238 def _patsplit(pattern, default):
1234 1239 """Split a string into the optional pattern kind prefix and the actual
1235 1240 pattern."""
1236 1241 if b':' in pattern:
1237 1242 kind, pat = pattern.split(b':', 1)
1238 1243 if kind in allpatternkinds:
1239 1244 return kind, pat
1240 1245 return default, pattern
1241 1246
1242 1247
1243 1248 def _globre(pat):
1244 1249 r"""Convert an extended glob string to a regexp string.
1245 1250
1246 1251 >>> from . import pycompat
1247 1252 >>> def bprint(s):
1248 1253 ... print(pycompat.sysstr(s))
1249 1254 >>> bprint(_globre(br'?'))
1250 1255 .
1251 1256 >>> bprint(_globre(br'*'))
1252 1257 [^/]*
1253 1258 >>> bprint(_globre(br'**'))
1254 1259 .*
1255 1260 >>> bprint(_globre(br'**/a'))
1256 1261 (?:.*/)?a
1257 1262 >>> bprint(_globre(br'a/**/b'))
1258 1263 a/(?:.*/)?b
1259 1264 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
1260 1265 [a*?!^][\^b][^c]
1261 1266 >>> bprint(_globre(br'{a,b}'))
1262 1267 (?:a|b)
1263 1268 >>> bprint(_globre(br'.\*\?'))
1264 1269 \.\*\?
1265 1270 """
1266 1271 i, n = 0, len(pat)
1267 1272 res = b''
1268 1273 group = 0
1269 1274 escape = util.stringutil.regexbytesescapemap.get
1270 1275
1271 1276 def peek():
1272 1277 return i < n and pat[i : i + 1]
1273 1278
1274 1279 while i < n:
1275 1280 c = pat[i : i + 1]
1276 1281 i += 1
1277 1282 if c not in b'*?[{},\\':
1278 1283 res += escape(c, c)
1279 1284 elif c == b'*':
1280 1285 if peek() == b'*':
1281 1286 i += 1
1282 1287 if peek() == b'/':
1283 1288 i += 1
1284 1289 res += b'(?:.*/)?'
1285 1290 else:
1286 1291 res += b'.*'
1287 1292 else:
1288 1293 res += b'[^/]*'
1289 1294 elif c == b'?':
1290 1295 res += b'.'
1291 1296 elif c == b'[':
1292 1297 j = i
1293 1298 if j < n and pat[j : j + 1] in b'!]':
1294 1299 j += 1
1295 1300 while j < n and pat[j : j + 1] != b']':
1296 1301 j += 1
1297 1302 if j >= n:
1298 1303 res += b'\\['
1299 1304 else:
1300 1305 stuff = pat[i:j].replace(b'\\', b'\\\\')
1301 1306 i = j + 1
1302 1307 if stuff[0:1] == b'!':
1303 1308 stuff = b'^' + stuff[1:]
1304 1309 elif stuff[0:1] == b'^':
1305 1310 stuff = b'\\' + stuff
1306 1311 res = b'%s[%s]' % (res, stuff)
1307 1312 elif c == b'{':
1308 1313 group += 1
1309 1314 res += b'(?:'
1310 1315 elif c == b'}' and group:
1311 1316 res += b')'
1312 1317 group -= 1
1313 1318 elif c == b',' and group:
1314 1319 res += b'|'
1315 1320 elif c == b'\\':
1316 1321 p = peek()
1317 1322 if p:
1318 1323 i += 1
1319 1324 res += escape(p, p)
1320 1325 else:
1321 1326 res += escape(c, c)
1322 1327 else:
1323 1328 res += escape(c, c)
1324 1329 return res
1325 1330
1326 1331
1327 1332 FLAG_RE = util.re.compile(br'^\(\?([aiLmsux]+)\)(.*)')
1328 1333
1329 1334
1330 1335 def _regex(kind, pat, globsuffix):
1331 1336 """Convert a (normalized) pattern of any kind into a
1332 1337 regular expression.
1333 1338 globsuffix is appended to the regexp of globs."""
1334 1339 if not pat and kind in (b'glob', b'relpath'):
1335 1340 return b''
1336 1341 if kind == b're':
1337 1342 return pat
1338 1343 if kind in (b'path', b'relpath'):
1339 1344 if pat == b'.':
1340 1345 return b''
1341 1346 return util.stringutil.reescape(pat) + b'(?:/|$)'
1342 1347 if kind == b'rootfilesin':
1343 1348 if pat == b'.':
1344 1349 escaped = b''
1345 1350 else:
1346 1351 # Pattern is a directory name.
1347 1352 escaped = util.stringutil.reescape(pat) + b'/'
1348 1353 # Anything after the pattern must be a non-directory.
1349 1354 return escaped + b'[^/]+$'
1350 1355 if kind == b'relglob':
1351 1356 globre = _globre(pat)
1352 1357 if globre.startswith(b'[^/]*'):
1353 1358 # When pat has the form *XYZ (common), make the returned regex more
1354 1359 # legible by returning the regex for **XYZ instead of **/*XYZ.
1355 1360 return b'.*' + globre[len(b'[^/]*') :] + globsuffix
1356 1361 return b'(?:|.*/)' + globre + globsuffix
1357 1362 if kind == b'relre':
1358 1363 flag = None
1359 1364 m = FLAG_RE.match(pat)
1360 1365 if m:
1361 1366 flag, pat = m.groups()
1362 1367 if not pat.startswith(b'^'):
1363 1368 pat = b'.*' + pat
1364 1369 if flag is not None:
1365 1370 pat = br'(?%s:%s)' % (flag, pat)
1366 1371 return pat
1367 1372 if kind in (b'glob', b'rootglob'):
1368 1373 return _globre(pat) + globsuffix
1369 1374 raise error.ProgrammingError(b'not a regex pattern: %s:%s' % (kind, pat))
1370 1375
1371 1376
1372 1377 def _buildmatch(kindpats, globsuffix, root):
1373 1378 """Return regexp string and a matcher function for kindpats.
1374 1379 globsuffix is appended to the regexp of globs."""
1375 1380 matchfuncs = []
1376 1381
1377 1382 subincludes, kindpats = _expandsubinclude(kindpats, root)
1378 1383 if subincludes:
1379 1384 submatchers = {}
1380 1385
1381 1386 def matchsubinclude(f):
1382 1387 for prefix, matcherargs in subincludes:
1383 1388 if f.startswith(prefix):
1384 1389 mf = submatchers.get(prefix)
1385 1390 if mf is None:
1386 1391 mf = match(*matcherargs)
1387 1392 submatchers[prefix] = mf
1388 1393
1389 1394 if mf(f[len(prefix) :]):
1390 1395 return True
1391 1396 return False
1392 1397
1393 1398 matchfuncs.append(matchsubinclude)
1394 1399
1395 1400 regex = b''
1396 1401 if kindpats:
1397 1402 if all(k == b'rootfilesin' for k, p, s in kindpats):
1398 1403 dirs = {p for k, p, s in kindpats}
1399 1404
1400 1405 def mf(f):
1401 1406 i = f.rfind(b'/')
1402 1407 if i >= 0:
1403 1408 dir = f[:i]
1404 1409 else:
1405 1410 dir = b'.'
1406 1411 return dir in dirs
1407 1412
1408 1413 regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))
1409 1414 matchfuncs.append(mf)
1410 1415 else:
1411 1416 regex, mf = _buildregexmatch(kindpats, globsuffix)
1412 1417 matchfuncs.append(mf)
1413 1418
1414 1419 if len(matchfuncs) == 1:
1415 1420 return regex, matchfuncs[0]
1416 1421 else:
1417 1422 return regex, lambda f: any(mf(f) for mf in matchfuncs)
1418 1423
1419 1424
1420 1425 MAX_RE_SIZE = 20000
1421 1426
1422 1427
1423 1428 def _joinregexes(regexps):
1424 1429 """gather multiple regular expressions into a single one"""
1425 1430 return b'|'.join(regexps)
1426 1431
1427 1432
1428 1433 def _buildregexmatch(kindpats, globsuffix):
1429 1434 """Build a match function from a list of kinds and kindpats,
1430 1435 return regexp string and a matcher function.
1431 1436
1432 1437 Test too large input
1433 1438 >>> _buildregexmatch([
1434 1439 ... (b'relglob', b'?' * MAX_RE_SIZE, b'')
1435 1440 ... ], b'$')
1436 1441 Traceback (most recent call last):
1437 1442 ...
1438 1443 Abort: matcher pattern is too long (20009 bytes)
1439 1444 """
1440 1445 try:
1441 1446 allgroups = []
1442 1447 regexps = [_regex(k, p, globsuffix) for (k, p, s) in kindpats]
1443 1448 fullregexp = _joinregexes(regexps)
1444 1449
1445 1450 startidx = 0
1446 1451 groupsize = 0
1447 1452 for idx, r in enumerate(regexps):
1448 1453 piecesize = len(r)
1449 1454 if piecesize > MAX_RE_SIZE:
1450 1455 msg = _(b"matcher pattern is too long (%d bytes)") % piecesize
1451 1456 raise error.Abort(msg)
1452 1457 elif (groupsize + piecesize) > MAX_RE_SIZE:
1453 1458 group = regexps[startidx:idx]
1454 1459 allgroups.append(_joinregexes(group))
1455 1460 startidx = idx
1456 1461 groupsize = 0
1457 1462 groupsize += piecesize + 1
1458 1463
1459 1464 if startidx == 0:
1460 1465 matcher = _rematcher(fullregexp)
1461 1466 func = lambda s: bool(matcher(s))
1462 1467 else:
1463 1468 group = regexps[startidx:]
1464 1469 allgroups.append(_joinregexes(group))
1465 1470 allmatchers = [_rematcher(g) for g in allgroups]
1466 1471 func = lambda s: any(m(s) for m in allmatchers)
1467 1472 return fullregexp, func
1468 1473 except re.error:
1469 1474 for k, p, s in kindpats:
1470 1475 try:
1471 1476 _rematcher(_regex(k, p, globsuffix))
1472 1477 except re.error:
1473 1478 if s:
1474 1479 raise error.Abort(
1475 1480 _(b"%s: invalid pattern (%s): %s") % (s, k, p)
1476 1481 )
1477 1482 else:
1478 1483 raise error.Abort(_(b"invalid pattern (%s): %s") % (k, p))
1479 1484 raise error.Abort(_(b"invalid pattern"))
1480 1485
1481 1486
1482 1487 def _patternrootsanddirs(kindpats):
1483 1488 """Returns roots and directories corresponding to each pattern.
1484 1489
1485 1490 This calculates the roots and directories exactly matching the patterns and
1486 1491 returns a tuple of (roots, dirs) for each. It does not return other
1487 1492 directories which may also need to be considered, like the parent
1488 1493 directories.
1489 1494 """
1490 1495 r = []
1491 1496 d = []
1492 1497 for kind, pat, source in kindpats:
1493 1498 if kind in (b'glob', b'rootglob'): # find the non-glob prefix
1494 1499 root = []
1495 1500 for p in pat.split(b'/'):
1496 1501 if b'[' in p or b'{' in p or b'*' in p or b'?' in p:
1497 1502 break
1498 1503 root.append(p)
1499 1504 r.append(b'/'.join(root))
1500 1505 elif kind in (b'relpath', b'path'):
1501 1506 if pat == b'.':
1502 1507 pat = b''
1503 1508 r.append(pat)
1504 1509 elif kind in (b'rootfilesin',):
1505 1510 if pat == b'.':
1506 1511 pat = b''
1507 1512 d.append(pat)
1508 1513 else: # relglob, re, relre
1509 1514 r.append(b'')
1510 1515 return r, d
1511 1516
1512 1517
1513 1518 def _roots(kindpats):
1514 1519 '''Returns root directories to match recursively from the given patterns.'''
1515 1520 roots, dirs = _patternrootsanddirs(kindpats)
1516 1521 return roots
1517 1522
1518 1523
1519 1524 def _rootsdirsandparents(kindpats):
1520 1525 """Returns roots and exact directories from patterns.
1521 1526
1522 1527 `roots` are directories to match recursively, `dirs` should
1523 1528 be matched non-recursively, and `parents` are the implicitly required
1524 1529 directories to walk to items in either roots or dirs.
1525 1530
1526 1531 Returns a tuple of (roots, dirs, parents).
1527 1532
1528 1533 >>> r = _rootsdirsandparents(
1529 1534 ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
1530 1535 ... (b'glob', b'g*', b'')])
1531 1536 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1532 1537 (['g/h', 'g/h', ''], []) ['', 'g']
1533 1538 >>> r = _rootsdirsandparents(
1534 1539 ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
1535 1540 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1536 1541 ([], ['g/h', '']) ['', 'g']
1537 1542 >>> r = _rootsdirsandparents(
1538 1543 ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
1539 1544 ... (b'path', b'', b'')])
1540 1545 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1541 1546 (['r', 'p/p', ''], []) ['', 'p']
1542 1547 >>> r = _rootsdirsandparents(
1543 1548 ... [(b'relglob', b'rg*', b''), (b're', b're/', b''),
1544 1549 ... (b'relre', b'rr', b'')])
1545 1550 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1546 1551 (['', '', ''], []) ['']
1547 1552 """
1548 1553 r, d = _patternrootsanddirs(kindpats)
1549 1554
1550 1555 p = set()
1551 1556 # Add the parents as non-recursive/exact directories, since they must be
1552 1557 # scanned to get to either the roots or the other exact directories.
1553 1558 p.update(pathutil.dirs(d))
1554 1559 p.update(pathutil.dirs(r))
1555 1560
1556 1561 # FIXME: all uses of this function convert these to sets, do so before
1557 1562 # returning.
1558 1563 # FIXME: all uses of this function do not need anything in 'roots' and
1559 1564 # 'dirs' to also be in 'parents', consider removing them before returning.
1560 1565 return r, d, p
1561 1566
1562 1567
1563 1568 def _explicitfiles(kindpats):
1564 1569 """Returns the potential explicit filenames from the patterns.
1565 1570
1566 1571 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
1567 1572 ['foo/bar']
1568 1573 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
1569 1574 []
1570 1575 """
1571 1576 # Keep only the pattern kinds where one can specify filenames (vs only
1572 1577 # directory names).
1573 1578 filable = [kp for kp in kindpats if kp[0] not in (b'rootfilesin',)]
1574 1579 return _roots(filable)
1575 1580
1576 1581
1577 1582 def _prefix(kindpats):
1578 1583 '''Whether all the patterns match a prefix (i.e. recursively)'''
1579 1584 for kind, pat, source in kindpats:
1580 1585 if kind not in (b'path', b'relpath'):
1581 1586 return False
1582 1587 return True
1583 1588
1584 1589
1585 1590 _commentre = None
1586 1591
1587 1592
1588 1593 def readpatternfile(filepath, warn, sourceinfo=False):
1589 1594 """parse a pattern file, returning a list of
1590 1595 patterns. These patterns should be given to compile()
1591 1596 to be validated and converted into a match function.
1592 1597
1593 1598 trailing white space is dropped.
1594 1599 the escape character is backslash.
1595 1600 comments start with #.
1596 1601 empty lines are skipped.
1597 1602
1598 1603 lines can be of the following formats:
1599 1604
1600 1605 syntax: regexp # defaults following lines to non-rooted regexps
1601 1606 syntax: glob # defaults following lines to non-rooted globs
1602 1607 re:pattern # non-rooted regular expression
1603 1608 glob:pattern # non-rooted glob
1604 1609 rootglob:pat # rooted glob (same root as ^ in regexps)
1605 1610 pattern # pattern of the current default type
1606 1611
1607 1612 if sourceinfo is set, returns a list of tuples:
1608 1613 (pattern, lineno, originalline).
1609 1614 This is useful to debug ignore patterns.
1610 1615 """
1611 1616
1612 1617 syntaxes = {
1613 1618 b're': b'relre:',
1614 1619 b'regexp': b'relre:',
1615 1620 b'glob': b'relglob:',
1616 1621 b'rootglob': b'rootglob:',
1617 1622 b'include': b'include',
1618 1623 b'subinclude': b'subinclude',
1619 1624 }
1620 1625 syntax = b'relre:'
1621 1626 patterns = []
1622 1627
1623 1628 fp = open(filepath, b'rb')
1624 1629 for lineno, line in enumerate(fp, start=1):
1625 1630 if b"#" in line:
1626 1631 global _commentre
1627 1632 if not _commentre:
1628 1633 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
1629 1634 # remove comments prefixed by an even number of escapes
1630 1635 m = _commentre.search(line)
1631 1636 if m:
1632 1637 line = line[: m.end(1)]
1633 1638 # fixup properly escaped comments that survived the above
1634 1639 line = line.replace(b"\\#", b"#")
1635 1640 line = line.rstrip()
1636 1641 if not line:
1637 1642 continue
1638 1643
1639 1644 if line.startswith(b'syntax:'):
1640 1645 s = line[7:].strip()
1641 1646 try:
1642 1647 syntax = syntaxes[s]
1643 1648 except KeyError:
1644 1649 if warn:
1645 1650 warn(
1646 1651 _(b"%s: ignoring invalid syntax '%s'\n") % (filepath, s)
1647 1652 )
1648 1653 continue
1649 1654
1650 1655 linesyntax = syntax
1651 1656 for s, rels in syntaxes.items():
1652 1657 if line.startswith(rels):
1653 1658 linesyntax = rels
1654 1659 line = line[len(rels) :]
1655 1660 break
1656 1661 elif line.startswith(s + b':'):
1657 1662 linesyntax = rels
1658 1663 line = line[len(s) + 1 :]
1659 1664 break
1660 1665 if sourceinfo:
1661 1666 patterns.append((linesyntax + line, lineno, line))
1662 1667 else:
1663 1668 patterns.append(linesyntax + line)
1664 1669 fp.close()
1665 1670 return patterns
General Comments 0
You need to be logged in to leave comments. Login now