##// END OF EJS Templates
formatting: re-blacken match.py...
Matt Harbison -
r46681:1f0ed7e6 default
parent child Browse files
Show More
@@ -1,1637 +1,1634 b''
1 1 # match.py - filename matching
2 2 #
3 3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import, print_function
9 9
10 10 import copy
11 11 import itertools
12 12 import os
13 13 import re
14 14
15 15 from .i18n import _
16 16 from .pycompat import open
17 17 from . import (
18 18 encoding,
19 19 error,
20 20 pathutil,
21 21 policy,
22 22 pycompat,
23 23 util,
24 24 )
25 25 from .utils import stringutil
26 26
27 27 rustmod = policy.importrust('dirstate')
28 28
29 29 allpatternkinds = (
30 30 b're',
31 31 b'glob',
32 32 b'path',
33 33 b'relglob',
34 34 b'relpath',
35 35 b'relre',
36 36 b'rootglob',
37 37 b'listfile',
38 38 b'listfile0',
39 39 b'set',
40 40 b'include',
41 41 b'subinclude',
42 42 b'rootfilesin',
43 43 )
44 44 cwdrelativepatternkinds = (b'relpath', b'glob')
45 45
46 46 propertycache = util.propertycache
47 47
48 48
49 49 def _rematcher(regex):
50 50 """compile the regexp with the best available regexp engine and return a
51 51 matcher function"""
52 52 m = util.re.compile(regex)
53 53 try:
54 54 # slightly faster, provided by facebook's re2 bindings
55 55 return m.test_match
56 56 except AttributeError:
57 57 return m.match
58 58
59 59
60 60 def _expandsets(cwd, kindpats, ctx=None, listsubrepos=False, badfn=None):
61 61 '''Returns the kindpats list with the 'set' patterns expanded to matchers'''
62 62 matchers = []
63 63 other = []
64 64
65 65 for kind, pat, source in kindpats:
66 66 if kind == b'set':
67 67 if ctx is None:
68 68 raise error.ProgrammingError(
69 69 b"fileset expression with no context"
70 70 )
71 71 matchers.append(ctx.matchfileset(cwd, pat, badfn=badfn))
72 72
73 73 if listsubrepos:
74 74 for subpath in ctx.substate:
75 75 sm = ctx.sub(subpath).matchfileset(cwd, pat, badfn=badfn)
76 76 pm = prefixdirmatcher(subpath, sm, badfn=badfn)
77 77 matchers.append(pm)
78 78
79 79 continue
80 80 other.append((kind, pat, source))
81 81 return matchers, other
82 82
83 83
84 84 def _expandsubinclude(kindpats, root):
85 85 """Returns the list of subinclude matcher args and the kindpats without the
86 86 subincludes in it."""
87 87 relmatchers = []
88 88 other = []
89 89
90 90 for kind, pat, source in kindpats:
91 91 if kind == b'subinclude':
92 92 sourceroot = pathutil.dirname(util.normpath(source))
93 93 pat = util.pconvert(pat)
94 94 path = pathutil.join(sourceroot, pat)
95 95
96 96 newroot = pathutil.dirname(path)
97 97 matcherargs = (newroot, b'', [], [b'include:%s' % path])
98 98
99 99 prefix = pathutil.canonpath(root, root, newroot)
100 100 if prefix:
101 101 prefix += b'/'
102 102 relmatchers.append((prefix, matcherargs))
103 103 else:
104 104 other.append((kind, pat, source))
105 105
106 106 return relmatchers, other
107 107
108 108
109 109 def _kindpatsalwaysmatch(kindpats):
110 110 """Checks whether the kindspats match everything, as e.g.
111 111 'relpath:.' does.
112 112 """
113 113 for kind, pat, source in kindpats:
114 114 if pat != b'' or kind not in [b'relpath', b'glob']:
115 115 return False
116 116 return True
117 117
118 118
119 119 def _buildkindpatsmatcher(
120 120 matchercls,
121 121 root,
122 122 cwd,
123 123 kindpats,
124 124 ctx=None,
125 125 listsubrepos=False,
126 126 badfn=None,
127 127 ):
128 128 matchers = []
129 129 fms, kindpats = _expandsets(
130 130 cwd,
131 131 kindpats,
132 132 ctx=ctx,
133 133 listsubrepos=listsubrepos,
134 134 badfn=badfn,
135 135 )
136 136 if kindpats:
137 137 m = matchercls(root, kindpats, badfn=badfn)
138 138 matchers.append(m)
139 139 if fms:
140 140 matchers.extend(fms)
141 141 if not matchers:
142 142 return nevermatcher(badfn=badfn)
143 143 if len(matchers) == 1:
144 144 return matchers[0]
145 145 return unionmatcher(matchers)
146 146
147 147
148 148 def match(
149 149 root,
150 150 cwd,
151 151 patterns=None,
152 152 include=None,
153 153 exclude=None,
154 154 default=b'glob',
155 155 auditor=None,
156 156 ctx=None,
157 157 listsubrepos=False,
158 158 warn=None,
159 159 badfn=None,
160 160 icasefs=False,
161 161 ):
162 162 r"""build an object to match a set of file patterns
163 163
164 164 arguments:
165 165 root - the canonical root of the tree you're matching against
166 166 cwd - the current working directory, if relevant
167 167 patterns - patterns to find
168 168 include - patterns to include (unless they are excluded)
169 169 exclude - patterns to exclude (even if they are included)
170 170 default - if a pattern in patterns has no explicit type, assume this one
171 171 auditor - optional path auditor
172 172 ctx - optional changecontext
173 173 listsubrepos - if True, recurse into subrepositories
174 174 warn - optional function used for printing warnings
175 175 badfn - optional bad() callback for this matcher instead of the default
176 176 icasefs - make a matcher for wdir on case insensitive filesystems, which
177 177 normalizes the given patterns to the case in the filesystem
178 178
179 179 a pattern is one of:
180 180 'glob:<glob>' - a glob relative to cwd
181 181 're:<regexp>' - a regular expression
182 182 'path:<path>' - a path relative to repository root, which is matched
183 183 recursively
184 184 'rootfilesin:<path>' - a path relative to repository root, which is
185 185 matched non-recursively (will not match subdirectories)
186 186 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
187 187 'relpath:<path>' - a path relative to cwd
188 188 'relre:<regexp>' - a regexp that needn't match the start of a name
189 189 'set:<fileset>' - a fileset expression
190 190 'include:<path>' - a file of patterns to read and include
191 191 'subinclude:<path>' - a file of patterns to match against files under
192 192 the same directory
193 193 '<something>' - a pattern of the specified default type
194 194
195 195 >>> def _match(root, *args, **kwargs):
196 196 ... return match(util.localpath(root), *args, **kwargs)
197 197
198 198 Usually a patternmatcher is returned:
199 199 >>> _match(b'/foo', b'.', [b're:.*\.c$', b'path:foo/a', b'*.py'])
200 200 <patternmatcher patterns='.*\\.c$|foo/a(?:/|$)|[^/]*\\.py$'>
201 201
202 202 Combining 'patterns' with 'include' (resp. 'exclude') gives an
203 203 intersectionmatcher (resp. a differencematcher):
204 204 >>> type(_match(b'/foo', b'.', [b're:.*\.c$'], include=[b'path:lib']))
205 205 <class 'mercurial.match.intersectionmatcher'>
206 206 >>> type(_match(b'/foo', b'.', [b're:.*\.c$'], exclude=[b'path:build']))
207 207 <class 'mercurial.match.differencematcher'>
208 208
209 209 Notice that, if 'patterns' is empty, an alwaysmatcher is returned:
210 210 >>> _match(b'/foo', b'.', [])
211 211 <alwaysmatcher>
212 212
213 213 The 'default' argument determines which kind of pattern is assumed if a
214 214 pattern has no prefix:
215 215 >>> _match(b'/foo', b'.', [b'.*\.c$'], default=b're')
216 216 <patternmatcher patterns='.*\\.c$'>
217 217 >>> _match(b'/foo', b'.', [b'main.py'], default=b'relpath')
218 218 <patternmatcher patterns='main\\.py(?:/|$)'>
219 219 >>> _match(b'/foo', b'.', [b'main.py'], default=b're')
220 220 <patternmatcher patterns='main.py'>
221 221
222 222 The primary use of matchers is to check whether a value (usually a file
223 223 name) matches againset one of the patterns given at initialization. There
224 224 are two ways of doing this check.
225 225
226 226 >>> m = _match(b'/foo', b'', [b're:.*\.c$', b'relpath:a'])
227 227
228 228 1. Calling the matcher with a file name returns True if any pattern
229 229 matches that file name:
230 230 >>> m(b'a')
231 231 True
232 232 >>> m(b'main.c')
233 233 True
234 234 >>> m(b'test.py')
235 235 False
236 236
237 237 2. Using the exact() method only returns True if the file name matches one
238 238 of the exact patterns (i.e. not re: or glob: patterns):
239 239 >>> m.exact(b'a')
240 240 True
241 241 >>> m.exact(b'main.c')
242 242 False
243 243 """
244 244 assert os.path.isabs(root)
245 245 cwd = os.path.join(root, util.localpath(cwd))
246 246 normalize = _donormalize
247 247 if icasefs:
248 248 dirstate = ctx.repo().dirstate
249 249 dsnormalize = dirstate.normalize
250 250
251 251 def normalize(patterns, default, root, cwd, auditor, warn):
252 252 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
253 253 kindpats = []
254 254 for kind, pats, source in kp:
255 255 if kind not in (b're', b'relre'): # regex can't be normalized
256 256 p = pats
257 257 pats = dsnormalize(pats)
258 258
259 259 # Preserve the original to handle a case only rename.
260 260 if p != pats and p in dirstate:
261 261 kindpats.append((kind, p, source))
262 262
263 263 kindpats.append((kind, pats, source))
264 264 return kindpats
265 265
266 266 if patterns:
267 267 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
268 268 if _kindpatsalwaysmatch(kindpats):
269 269 m = alwaysmatcher(badfn)
270 270 else:
271 271 m = _buildkindpatsmatcher(
272 272 patternmatcher,
273 273 root,
274 274 cwd,
275 275 kindpats,
276 276 ctx=ctx,
277 277 listsubrepos=listsubrepos,
278 278 badfn=badfn,
279 279 )
280 280 else:
281 281 # It's a little strange that no patterns means to match everything.
282 282 # Consider changing this to match nothing (probably using nevermatcher).
283 283 m = alwaysmatcher(badfn)
284 284
285 285 if include:
286 286 kindpats = normalize(include, b'glob', root, cwd, auditor, warn)
287 287 im = _buildkindpatsmatcher(
288 288 includematcher,
289 289 root,
290 290 cwd,
291 291 kindpats,
292 292 ctx=ctx,
293 293 listsubrepos=listsubrepos,
294 294 badfn=None,
295 295 )
296 296 m = intersectmatchers(m, im)
297 297 if exclude:
298 298 kindpats = normalize(exclude, b'glob', root, cwd, auditor, warn)
299 299 em = _buildkindpatsmatcher(
300 300 includematcher,
301 301 root,
302 302 cwd,
303 303 kindpats,
304 304 ctx=ctx,
305 305 listsubrepos=listsubrepos,
306 306 badfn=None,
307 307 )
308 308 m = differencematcher(m, em)
309 309 return m
310 310
311 311
312 312 def exact(files, badfn=None):
313 313 return exactmatcher(files, badfn=badfn)
314 314
315 315
316 316 def always(badfn=None):
317 317 return alwaysmatcher(badfn)
318 318
319 319
320 320 def never(badfn=None):
321 321 return nevermatcher(badfn)
322 322
323 323
324 324 def badmatch(match, badfn):
325 325 """Make a copy of the given matcher, replacing its bad method with the given
326 326 one.
327 327 """
328 328 m = copy.copy(match)
329 329 m.bad = badfn
330 330 return m
331 331
332 332
333 333 def _donormalize(patterns, default, root, cwd, auditor=None, warn=None):
334 334 """Convert 'kind:pat' from the patterns list to tuples with kind and
335 335 normalized and rooted patterns and with listfiles expanded."""
336 336 kindpats = []
337 337 for kind, pat in [_patsplit(p, default) for p in patterns]:
338 338 if kind in cwdrelativepatternkinds:
339 339 pat = pathutil.canonpath(root, cwd, pat, auditor=auditor)
340 340 elif kind in (b'relglob', b'path', b'rootfilesin', b'rootglob'):
341 341 pat = util.normpath(pat)
342 342 elif kind in (b'listfile', b'listfile0'):
343 343 try:
344 344 files = util.readfile(pat)
345 345 if kind == b'listfile0':
346 346 files = files.split(b'\0')
347 347 else:
348 348 files = files.splitlines()
349 349 files = [f for f in files if f]
350 350 except EnvironmentError:
351 351 raise error.Abort(_(b"unable to read file list (%s)") % pat)
352 352 for k, p, source in _donormalize(
353 353 files, default, root, cwd, auditor, warn
354 354 ):
355 355 kindpats.append((k, p, pat))
356 356 continue
357 357 elif kind == b'include':
358 358 try:
359 359 fullpath = os.path.join(root, util.localpath(pat))
360 360 includepats = readpatternfile(fullpath, warn)
361 361 for k, p, source in _donormalize(
362 362 includepats, default, root, cwd, auditor, warn
363 363 ):
364 364 kindpats.append((k, p, source or pat))
365 365 except error.Abort as inst:
366 366 raise error.Abort(
367 367 b'%s: %s'
368 368 % (
369 369 pat,
370 370 inst.message,
371 371 ) # pytype: disable=unsupported-operands
372 372 )
373 373 except IOError as inst:
374 374 if warn:
375 375 warn(
376 376 _(b"skipping unreadable pattern file '%s': %s\n")
377 377 % (pat, stringutil.forcebytestr(inst.strerror))
378 378 )
379 379 continue
380 380 # else: re or relre - which cannot be normalized
381 381 kindpats.append((kind, pat, b''))
382 382 return kindpats
383 383
384 384
385 385 class basematcher(object):
386 386 def __init__(self, badfn=None):
387 387 if badfn is not None:
388 388 self.bad = badfn
389 389
390 390 def __call__(self, fn):
391 391 return self.matchfn(fn)
392 392
393 393 # Callbacks related to how the matcher is used by dirstate.walk.
394 394 # Subscribers to these events must monkeypatch the matcher object.
395 395 def bad(self, f, msg):
396 396 """Callback from dirstate.walk for each explicit file that can't be
397 397 found/accessed, with an error message."""
398 398
399 399 # If an traversedir is set, it will be called when a directory discovered
400 400 # by recursive traversal is visited.
401 401 traversedir = None
402 402
403 403 @propertycache
404 404 def _files(self):
405 405 return []
406 406
407 407 def files(self):
408 408 """Explicitly listed files or patterns or roots:
409 409 if no patterns or .always(): empty list,
410 410 if exact: list exact files,
411 411 if not .anypats(): list all files and dirs,
412 412 else: optimal roots"""
413 413 return self._files
414 414
415 415 @propertycache
416 416 def _fileset(self):
417 417 return set(self._files)
418 418
419 419 def exact(self, f):
420 420 '''Returns True if f is in .files().'''
421 421 return f in self._fileset
422 422
423 423 def matchfn(self, f):
424 424 return False
425 425
426 426 def visitdir(self, dir):
427 427 """Decides whether a directory should be visited based on whether it
428 428 has potential matches in it or one of its subdirectories. This is
429 429 based on the match's primary, included, and excluded patterns.
430 430
431 431 Returns the string 'all' if the given directory and all subdirectories
432 432 should be visited. Otherwise returns True or False indicating whether
433 433 the given directory should be visited.
434 434 """
435 435 return True
436 436
437 437 def visitchildrenset(self, dir):
438 438 """Decides whether a directory should be visited based on whether it
439 439 has potential matches in it or one of its subdirectories, and
440 440 potentially lists which subdirectories of that directory should be
441 441 visited. This is based on the match's primary, included, and excluded
442 442 patterns.
443 443
444 444 This function is very similar to 'visitdir', and the following mapping
445 445 can be applied:
446 446
447 447 visitdir | visitchildrenlist
448 448 ----------+-------------------
449 449 False | set()
450 450 'all' | 'all'
451 451 True | 'this' OR non-empty set of subdirs -or files- to visit
452 452
453 453 Example:
454 454 Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return
455 455 the following values (assuming the implementation of visitchildrenset
456 456 is capable of recognizing this; some implementations are not).
457 457
458 458 '' -> {'foo', 'qux'}
459 459 'baz' -> set()
460 460 'foo' -> {'bar'}
461 461 # Ideally this would be 'all', but since the prefix nature of matchers
462 462 # is applied to the entire matcher, we have to downgrade this to
463 463 # 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed
464 464 # in.
465 465 'foo/bar' -> 'this'
466 466 'qux' -> 'this'
467 467
468 468 Important:
469 469 Most matchers do not know if they're representing files or
470 470 directories. They see ['path:dir/f'] and don't know whether 'f' is a
471 471 file or a directory, so visitchildrenset('dir') for most matchers will
472 472 return {'f'}, but if the matcher knows it's a file (like exactmatcher
473 473 does), it may return 'this'. Do not rely on the return being a set
474 474 indicating that there are no files in this dir to investigate (or
475 475 equivalently that if there are files to investigate in 'dir' that it
476 476 will always return 'this').
477 477 """
478 478 return b'this'
479 479
480 480 def always(self):
481 481 """Matcher will match everything and .files() will be empty --
482 482 optimization might be possible."""
483 483 return False
484 484
485 485 def isexact(self):
486 486 """Matcher will match exactly the list of files in .files() --
487 487 optimization might be possible."""
488 488 return False
489 489
490 490 def prefix(self):
491 491 """Matcher will match the paths in .files() recursively --
492 492 optimization might be possible."""
493 493 return False
494 494
495 495 def anypats(self):
496 496 """None of .always(), .isexact(), and .prefix() is true --
497 497 optimizations will be difficult."""
498 498 return not self.always() and not self.isexact() and not self.prefix()
499 499
500 500
501 501 class alwaysmatcher(basematcher):
502 502 '''Matches everything.'''
503 503
504 504 def __init__(self, badfn=None):
505 505 super(alwaysmatcher, self).__init__(badfn)
506 506
507 507 def always(self):
508 508 return True
509 509
510 510 def matchfn(self, f):
511 511 return True
512 512
513 513 def visitdir(self, dir):
514 514 return b'all'
515 515
516 516 def visitchildrenset(self, dir):
517 517 return b'all'
518 518
519 519 def __repr__(self):
520 520 return r'<alwaysmatcher>'
521 521
522 522
523 523 class nevermatcher(basematcher):
524 524 '''Matches nothing.'''
525 525
526 526 def __init__(self, badfn=None):
527 527 super(nevermatcher, self).__init__(badfn)
528 528
529 529 # It's a little weird to say that the nevermatcher is an exact matcher
530 530 # or a prefix matcher, but it seems to make sense to let callers take
531 531 # fast paths based on either. There will be no exact matches, nor any
532 532 # prefixes (files() returns []), so fast paths iterating over them should
533 533 # be efficient (and correct).
534 534 def isexact(self):
535 535 return True
536 536
537 537 def prefix(self):
538 538 return True
539 539
540 540 def visitdir(self, dir):
541 541 return False
542 542
543 543 def visitchildrenset(self, dir):
544 544 return set()
545 545
546 546 def __repr__(self):
547 547 return r'<nevermatcher>'
548 548
549 549
550 550 class predicatematcher(basematcher):
551 551 """A matcher adapter for a simple boolean function"""
552 552
553 553 def __init__(self, predfn, predrepr=None, badfn=None):
554 554 super(predicatematcher, self).__init__(badfn)
555 555 self.matchfn = predfn
556 556 self._predrepr = predrepr
557 557
558 558 @encoding.strmethod
559 559 def __repr__(self):
560 560 s = stringutil.buildrepr(self._predrepr) or pycompat.byterepr(
561 561 self.matchfn
562 562 )
563 563 return b'<predicatenmatcher pred=%s>' % s
564 564
565 565
566 566 def path_or_parents_in_set(path, prefix_set):
567 567 """Returns True if `path` (or any parent of `path`) is in `prefix_set`."""
568 568 l = len(prefix_set)
569 569 if l == 0:
570 570 return False
571 571 if path in prefix_set:
572 572 return True
573 573 # If there's more than 5 paths in prefix_set, it's *probably* quicker to
574 574 # "walk up" the directory hierarchy instead, with the assumption that most
575 575 # directory hierarchies are relatively shallow and hash lookup is cheap.
576 576 if l > 5:
577 577 return any(
578 578 parentdir in prefix_set for parentdir in pathutil.finddirs(path)
579 579 )
580 580
581 581 # FIXME: Ideally we'd never get to this point if this is the case - we'd
582 582 # recognize ourselves as an 'always' matcher and skip this.
583 583 if b'' in prefix_set:
584 584 return True
585 585
586 586 if pycompat.ispy3:
587 587 sl = ord(b'/')
588 588 else:
589 589 sl = '/'
590 590
591 591 # We already checked that path isn't in prefix_set exactly, so
592 592 # `path[len(pf)] should never raise IndexError.
593 593 return any(path.startswith(pf) and path[len(pf)] == sl for pf in prefix_set)
594 594
595 595
596 596 class patternmatcher(basematcher):
597 597 r"""Matches a set of (kind, pat, source) against a 'root' directory.
598 598
599 599 >>> kindpats = [
600 600 ... (b're', br'.*\.c$', b''),
601 601 ... (b'path', b'foo/a', b''),
602 602 ... (b'relpath', b'b', b''),
603 603 ... (b'glob', b'*.h', b''),
604 604 ... ]
605 605 >>> m = patternmatcher(b'foo', kindpats)
606 606 >>> m(b'main.c') # matches re:.*\.c$
607 607 True
608 608 >>> m(b'b.txt')
609 609 False
610 610 >>> m(b'foo/a') # matches path:foo/a
611 611 True
612 612 >>> m(b'a') # does not match path:b, since 'root' is 'foo'
613 613 False
614 614 >>> m(b'b') # matches relpath:b, since 'root' is 'foo'
615 615 True
616 616 >>> m(b'lib.h') # matches glob:*.h
617 617 True
618 618
619 619 >>> m.files()
620 620 ['', 'foo/a', 'b', '']
621 621 >>> m.exact(b'foo/a')
622 622 True
623 623 >>> m.exact(b'b')
624 624 True
625 625 >>> m.exact(b'lib.h') # exact matches are for (rel)path kinds
626 626 False
627 627 """
628 628
629 629 def __init__(self, root, kindpats, badfn=None):
630 630 super(patternmatcher, self).__init__(badfn)
631 631
632 632 self._files = _explicitfiles(kindpats)
633 633 self._prefix = _prefix(kindpats)
634 634 self._pats, self.matchfn = _buildmatch(kindpats, b'$', root)
635 635
636 636 @propertycache
637 637 def _dirs(self):
638 638 return set(pathutil.dirs(self._fileset))
639 639
640 640 def visitdir(self, dir):
641 641 if self._prefix and dir in self._fileset:
642 642 return b'all'
643 return (
644 dir in self._dirs
645 or path_or_parents_in_set(dir, self._fileset)
646 )
643 return dir in self._dirs or path_or_parents_in_set(dir, self._fileset)
647 644
648 645 def visitchildrenset(self, dir):
649 646 ret = self.visitdir(dir)
650 647 if ret is True:
651 648 return b'this'
652 649 elif not ret:
653 650 return set()
654 651 assert ret == b'all'
655 652 return b'all'
656 653
657 654 def prefix(self):
658 655 return self._prefix
659 656
660 657 @encoding.strmethod
661 658 def __repr__(self):
662 659 return b'<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats)
663 660
664 661
665 662 # This is basically a reimplementation of pathutil.dirs that stores the
666 663 # children instead of just a count of them, plus a small optional optimization
667 664 # to avoid some directories we don't need.
668 665 class _dirchildren(object):
669 666 def __init__(self, paths, onlyinclude=None):
670 667 self._dirs = {}
671 668 self._onlyinclude = onlyinclude or []
672 669 addpath = self.addpath
673 670 for f in paths:
674 671 addpath(f)
675 672
676 673 def addpath(self, path):
677 674 if path == b'':
678 675 return
679 676 dirs = self._dirs
680 677 findsplitdirs = _dirchildren._findsplitdirs
681 678 for d, b in findsplitdirs(path):
682 679 if d not in self._onlyinclude:
683 680 continue
684 681 dirs.setdefault(d, set()).add(b)
685 682
686 683 @staticmethod
687 684 def _findsplitdirs(path):
688 685 # yields (dirname, basename) tuples, walking back to the root. This is
689 686 # very similar to pathutil.finddirs, except:
690 687 # - produces a (dirname, basename) tuple, not just 'dirname'
691 688 # Unlike manifest._splittopdir, this does not suffix `dirname` with a
692 689 # slash.
693 690 oldpos = len(path)
694 691 pos = path.rfind(b'/')
695 692 while pos != -1:
696 693 yield path[:pos], path[pos + 1 : oldpos]
697 694 oldpos = pos
698 695 pos = path.rfind(b'/', 0, pos)
699 696 yield b'', path[:oldpos]
700 697
701 698 def get(self, path):
702 699 return self._dirs.get(path, set())
703 700
704 701
705 702 class includematcher(basematcher):
706 703 def __init__(self, root, kindpats, badfn=None):
707 704 super(includematcher, self).__init__(badfn)
708 705 if rustmod is not None:
709 706 # We need to pass the patterns to Rust because they can contain
710 707 # patterns from the user interface
711 708 self._kindpats = kindpats
712 709 self._pats, self.matchfn = _buildmatch(kindpats, b'(?:/|$)', root)
713 710 self._prefix = _prefix(kindpats)
714 711 roots, dirs, parents = _rootsdirsandparents(kindpats)
715 712 # roots are directories which are recursively included.
716 713 self._roots = set(roots)
717 714 # dirs are directories which are non-recursively included.
718 715 self._dirs = set(dirs)
719 716 # parents are directories which are non-recursively included because
720 717 # they are needed to get to items in _dirs or _roots.
721 718 self._parents = parents
722 719
723 720 def visitdir(self, dir):
724 721 if self._prefix and dir in self._roots:
725 722 return b'all'
726 723 return (
727 724 dir in self._dirs
728 725 or dir in self._parents
729 726 or path_or_parents_in_set(dir, self._roots)
730 727 )
731 728
732 729 @propertycache
733 730 def _allparentschildren(self):
734 731 # It may seem odd that we add dirs, roots, and parents, and then
735 732 # restrict to only parents. This is to catch the case of:
736 733 # dirs = ['foo/bar']
737 734 # parents = ['foo']
738 735 # if we asked for the children of 'foo', but had only added
739 736 # self._parents, we wouldn't be able to respond ['bar'].
740 737 return _dirchildren(
741 738 itertools.chain(self._dirs, self._roots, self._parents),
742 739 onlyinclude=self._parents,
743 740 )
744 741
745 742 def visitchildrenset(self, dir):
746 743 if self._prefix and dir in self._roots:
747 744 return b'all'
748 745 # Note: this does *not* include the 'dir in self._parents' case from
749 746 # visitdir, that's handled below.
750 747 if (
751 748 b'' in self._roots
752 749 or dir in self._dirs
753 750 or path_or_parents_in_set(dir, self._roots)
754 751 ):
755 752 return b'this'
756 753
757 754 if dir in self._parents:
758 755 return self._allparentschildren.get(dir) or set()
759 756 return set()
760 757
761 758 @encoding.strmethod
762 759 def __repr__(self):
763 760 return b'<includematcher includes=%r>' % pycompat.bytestr(self._pats)
764 761
765 762
766 763 class exactmatcher(basematcher):
767 764 r"""Matches the input files exactly. They are interpreted as paths, not
768 765 patterns (so no kind-prefixes).
769 766
770 767 >>> m = exactmatcher([b'a.txt', br're:.*\.c$'])
771 768 >>> m(b'a.txt')
772 769 True
773 770 >>> m(b'b.txt')
774 771 False
775 772
776 773 Input files that would be matched are exactly those returned by .files()
777 774 >>> m.files()
778 775 ['a.txt', 're:.*\\.c$']
779 776
780 777 So pattern 're:.*\.c$' is not considered as a regex, but as a file name
781 778 >>> m(b'main.c')
782 779 False
783 780 >>> m(br're:.*\.c$')
784 781 True
785 782 """
786 783
787 784 def __init__(self, files, badfn=None):
788 785 super(exactmatcher, self).__init__(badfn)
789 786
790 787 if isinstance(files, list):
791 788 self._files = files
792 789 else:
793 790 self._files = list(files)
794 791
795 792 matchfn = basematcher.exact
796 793
797 794 @propertycache
798 795 def _dirs(self):
799 796 return set(pathutil.dirs(self._fileset))
800 797
801 798 def visitdir(self, dir):
802 799 return dir in self._dirs
803 800
804 801 def visitchildrenset(self, dir):
805 802 if not self._fileset or dir not in self._dirs:
806 803 return set()
807 804
808 805 candidates = self._fileset | self._dirs - {b''}
809 806 if dir != b'':
810 807 d = dir + b'/'
811 808 candidates = {c[len(d) :] for c in candidates if c.startswith(d)}
812 809 # self._dirs includes all of the directories, recursively, so if
813 810 # we're attempting to match foo/bar/baz.txt, it'll have '', 'foo',
814 811 # 'foo/bar' in it. Thus we can safely ignore a candidate that has a
815 812 # '/' in it, indicating a it's for a subdir-of-a-subdir; the
816 813 # immediate subdir will be in there without a slash.
817 814 ret = {c for c in candidates if b'/' not in c}
818 815 # We really do not expect ret to be empty, since that would imply that
819 816 # there's something in _dirs that didn't have a file in _fileset.
820 817 assert ret
821 818 return ret
822 819
823 820 def isexact(self):
824 821 return True
825 822
826 823 @encoding.strmethod
827 824 def __repr__(self):
828 825 return b'<exactmatcher files=%r>' % self._files
829 826
830 827
831 828 class differencematcher(basematcher):
832 829 """Composes two matchers by matching if the first matches and the second
833 830 does not.
834 831
835 832 The second matcher's non-matching-attributes (bad, traversedir) are ignored.
836 833 """
837 834
838 835 def __init__(self, m1, m2):
839 836 super(differencematcher, self).__init__()
840 837 self._m1 = m1
841 838 self._m2 = m2
842 839 self.bad = m1.bad
843 840 self.traversedir = m1.traversedir
844 841
845 842 def matchfn(self, f):
846 843 return self._m1(f) and not self._m2(f)
847 844
848 845 @propertycache
849 846 def _files(self):
850 847 if self.isexact():
851 848 return [f for f in self._m1.files() if self(f)]
852 849 # If m1 is not an exact matcher, we can't easily figure out the set of
853 850 # files, because its files() are not always files. For example, if
854 851 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
855 852 # want to remove "dir" from the set even though it would match m2,
856 853 # because the "dir" in m1 may not be a file.
857 854 return self._m1.files()
858 855
859 856 def visitdir(self, dir):
860 857 if self._m2.visitdir(dir) == b'all':
861 858 return False
862 859 elif not self._m2.visitdir(dir):
863 860 # m2 does not match dir, we can return 'all' here if possible
864 861 return self._m1.visitdir(dir)
865 862 return bool(self._m1.visitdir(dir))
866 863
867 864 def visitchildrenset(self, dir):
868 865 m2_set = self._m2.visitchildrenset(dir)
869 866 if m2_set == b'all':
870 867 return set()
871 868 m1_set = self._m1.visitchildrenset(dir)
872 869 # Possible values for m1: 'all', 'this', set(...), set()
873 870 # Possible values for m2: 'this', set(...), set()
874 871 # If m2 has nothing under here that we care about, return m1, even if
875 872 # it's 'all'. This is a change in behavior from visitdir, which would
876 873 # return True, not 'all', for some reason.
877 874 if not m2_set:
878 875 return m1_set
879 876 if m1_set in [b'all', b'this']:
880 877 # Never return 'all' here if m2_set is any kind of non-empty (either
881 878 # 'this' or set(foo)), since m2 might return set() for a
882 879 # subdirectory.
883 880 return b'this'
884 881 # Possible values for m1: set(...), set()
885 882 # Possible values for m2: 'this', set(...)
886 883 # We ignore m2's set results. They're possibly incorrect:
887 884 # m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset(''):
888 885 # m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd
889 886 # return set(), which is *not* correct, we still need to visit 'dir'!
890 887 return m1_set
891 888
892 889 def isexact(self):
893 890 return self._m1.isexact()
894 891
895 892 @encoding.strmethod
896 893 def __repr__(self):
897 894 return b'<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2)
898 895
899 896
900 897 def intersectmatchers(m1, m2):
901 898 """Composes two matchers by matching if both of them match.
902 899
903 900 The second matcher's non-matching-attributes (bad, traversedir) are ignored.
904 901 """
905 902 if m1 is None or m2 is None:
906 903 return m1 or m2
907 904 if m1.always():
908 905 m = copy.copy(m2)
909 906 # TODO: Consider encapsulating these things in a class so there's only
910 907 # one thing to copy from m1.
911 908 m.bad = m1.bad
912 909 m.traversedir = m1.traversedir
913 910 return m
914 911 if m2.always():
915 912 m = copy.copy(m1)
916 913 return m
917 914 return intersectionmatcher(m1, m2)
918 915
919 916
920 917 class intersectionmatcher(basematcher):
921 918 def __init__(self, m1, m2):
922 919 super(intersectionmatcher, self).__init__()
923 920 self._m1 = m1
924 921 self._m2 = m2
925 922 self.bad = m1.bad
926 923 self.traversedir = m1.traversedir
927 924
928 925 @propertycache
929 926 def _files(self):
930 927 if self.isexact():
931 928 m1, m2 = self._m1, self._m2
932 929 if not m1.isexact():
933 930 m1, m2 = m2, m1
934 931 return [f for f in m1.files() if m2(f)]
935 932 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
936 933 # the set of files, because their files() are not always files. For
937 934 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
938 935 # "path:dir2", we don't want to remove "dir2" from the set.
939 936 return self._m1.files() + self._m2.files()
940 937
941 938 def matchfn(self, f):
942 939 return self._m1(f) and self._m2(f)
943 940
944 941 def visitdir(self, dir):
945 942 visit1 = self._m1.visitdir(dir)
946 943 if visit1 == b'all':
947 944 return self._m2.visitdir(dir)
948 945 # bool() because visit1=True + visit2='all' should not be 'all'
949 946 return bool(visit1 and self._m2.visitdir(dir))
950 947
951 948 def visitchildrenset(self, dir):
952 949 m1_set = self._m1.visitchildrenset(dir)
953 950 if not m1_set:
954 951 return set()
955 952 m2_set = self._m2.visitchildrenset(dir)
956 953 if not m2_set:
957 954 return set()
958 955
959 956 if m1_set == b'all':
960 957 return m2_set
961 958 elif m2_set == b'all':
962 959 return m1_set
963 960
964 961 if m1_set == b'this' or m2_set == b'this':
965 962 return b'this'
966 963
967 964 assert isinstance(m1_set, set) and isinstance(m2_set, set)
968 965 return m1_set.intersection(m2_set)
969 966
970 967 def always(self):
971 968 return self._m1.always() and self._m2.always()
972 969
973 970 def isexact(self):
974 971 return self._m1.isexact() or self._m2.isexact()
975 972
976 973 @encoding.strmethod
977 974 def __repr__(self):
978 975 return b'<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2)
979 976
980 977
981 978 class subdirmatcher(basematcher):
982 979 """Adapt a matcher to work on a subdirectory only.
983 980
984 981 The paths are remapped to remove/insert the path as needed:
985 982
986 983 >>> from . import pycompat
987 984 >>> m1 = match(util.localpath(b'/root'), b'', [b'a.txt', b'sub/b.txt'], auditor=lambda name: None)
988 985 >>> m2 = subdirmatcher(b'sub', m1)
989 986 >>> m2(b'a.txt')
990 987 False
991 988 >>> m2(b'b.txt')
992 989 True
993 990 >>> m2.matchfn(b'a.txt')
994 991 False
995 992 >>> m2.matchfn(b'b.txt')
996 993 True
997 994 >>> m2.files()
998 995 ['b.txt']
999 996 >>> m2.exact(b'b.txt')
1000 997 True
1001 998 >>> def bad(f, msg):
1002 999 ... print(pycompat.sysstr(b"%s: %s" % (f, msg)))
1003 1000 >>> m1.bad = bad
1004 1001 >>> m2.bad(b'x.txt', b'No such file')
1005 1002 sub/x.txt: No such file
1006 1003 """
1007 1004
1008 1005 def __init__(self, path, matcher):
1009 1006 super(subdirmatcher, self).__init__()
1010 1007 self._path = path
1011 1008 self._matcher = matcher
1012 1009 self._always = matcher.always()
1013 1010
1014 1011 self._files = [
1015 1012 f[len(path) + 1 :]
1016 1013 for f in matcher._files
1017 1014 if f.startswith(path + b"/")
1018 1015 ]
1019 1016
1020 1017 # If the parent repo had a path to this subrepo and the matcher is
1021 1018 # a prefix matcher, this submatcher always matches.
1022 1019 if matcher.prefix():
1023 1020 self._always = any(f == path for f in matcher._files)
1024 1021
1025 1022 def bad(self, f, msg):
1026 1023 self._matcher.bad(self._path + b"/" + f, msg)
1027 1024
1028 1025 def matchfn(self, f):
1029 1026 # Some information is lost in the superclass's constructor, so we
1030 1027 # can not accurately create the matching function for the subdirectory
1031 1028 # from the inputs. Instead, we override matchfn() and visitdir() to
1032 1029 # call the original matcher with the subdirectory path prepended.
1033 1030 return self._matcher.matchfn(self._path + b"/" + f)
1034 1031
1035 1032 def visitdir(self, dir):
1036 1033 if dir == b'':
1037 1034 dir = self._path
1038 1035 else:
1039 1036 dir = self._path + b"/" + dir
1040 1037 return self._matcher.visitdir(dir)
1041 1038
1042 1039 def visitchildrenset(self, dir):
1043 1040 if dir == b'':
1044 1041 dir = self._path
1045 1042 else:
1046 1043 dir = self._path + b"/" + dir
1047 1044 return self._matcher.visitchildrenset(dir)
1048 1045
1049 1046 def always(self):
1050 1047 return self._always
1051 1048
1052 1049 def prefix(self):
1053 1050 return self._matcher.prefix() and not self._always
1054 1051
1055 1052 @encoding.strmethod
1056 1053 def __repr__(self):
1057 1054 return b'<subdirmatcher path=%r, matcher=%r>' % (
1058 1055 self._path,
1059 1056 self._matcher,
1060 1057 )
1061 1058
1062 1059
1063 1060 class prefixdirmatcher(basematcher):
1064 1061 """Adapt a matcher to work on a parent directory.
1065 1062
1066 1063 The matcher's non-matching-attributes (bad, traversedir) are ignored.
1067 1064
1068 1065 The prefix path should usually be the relative path from the root of
1069 1066 this matcher to the root of the wrapped matcher.
1070 1067
1071 1068 >>> m1 = match(util.localpath(b'/root/d/e'), b'f', [b'../a.txt', b'b.txt'], auditor=lambda name: None)
1072 1069 >>> m2 = prefixdirmatcher(b'd/e', m1)
1073 1070 >>> m2(b'a.txt')
1074 1071 False
1075 1072 >>> m2(b'd/e/a.txt')
1076 1073 True
1077 1074 >>> m2(b'd/e/b.txt')
1078 1075 False
1079 1076 >>> m2.files()
1080 1077 ['d/e/a.txt', 'd/e/f/b.txt']
1081 1078 >>> m2.exact(b'd/e/a.txt')
1082 1079 True
1083 1080 >>> m2.visitdir(b'd')
1084 1081 True
1085 1082 >>> m2.visitdir(b'd/e')
1086 1083 True
1087 1084 >>> m2.visitdir(b'd/e/f')
1088 1085 True
1089 1086 >>> m2.visitdir(b'd/e/g')
1090 1087 False
1091 1088 >>> m2.visitdir(b'd/ef')
1092 1089 False
1093 1090 """
1094 1091
1095 1092 def __init__(self, path, matcher, badfn=None):
1096 1093 super(prefixdirmatcher, self).__init__(badfn)
1097 1094 if not path:
1098 1095 raise error.ProgrammingError(b'prefix path must not be empty')
1099 1096 self._path = path
1100 1097 self._pathprefix = path + b'/'
1101 1098 self._matcher = matcher
1102 1099
1103 1100 @propertycache
1104 1101 def _files(self):
1105 1102 return [self._pathprefix + f for f in self._matcher._files]
1106 1103
1107 1104 def matchfn(self, f):
1108 1105 if not f.startswith(self._pathprefix):
1109 1106 return False
1110 1107 return self._matcher.matchfn(f[len(self._pathprefix) :])
1111 1108
1112 1109 @propertycache
1113 1110 def _pathdirs(self):
1114 1111 return set(pathutil.finddirs(self._path))
1115 1112
1116 1113 def visitdir(self, dir):
1117 1114 if dir == self._path:
1118 1115 return self._matcher.visitdir(b'')
1119 1116 if dir.startswith(self._pathprefix):
1120 1117 return self._matcher.visitdir(dir[len(self._pathprefix) :])
1121 1118 return dir in self._pathdirs
1122 1119
1123 1120 def visitchildrenset(self, dir):
1124 1121 if dir == self._path:
1125 1122 return self._matcher.visitchildrenset(b'')
1126 1123 if dir.startswith(self._pathprefix):
1127 1124 return self._matcher.visitchildrenset(dir[len(self._pathprefix) :])
1128 1125 if dir in self._pathdirs:
1129 1126 return b'this'
1130 1127 return set()
1131 1128
1132 1129 def isexact(self):
1133 1130 return self._matcher.isexact()
1134 1131
1135 1132 def prefix(self):
1136 1133 return self._matcher.prefix()
1137 1134
1138 1135 @encoding.strmethod
1139 1136 def __repr__(self):
1140 1137 return b'<prefixdirmatcher path=%r, matcher=%r>' % (
1141 1138 pycompat.bytestr(self._path),
1142 1139 self._matcher,
1143 1140 )
1144 1141
1145 1142
1146 1143 class unionmatcher(basematcher):
1147 1144 """A matcher that is the union of several matchers.
1148 1145
1149 1146 The non-matching-attributes (bad, traversedir) are taken from the first
1150 1147 matcher.
1151 1148 """
1152 1149
1153 1150 def __init__(self, matchers):
1154 1151 m1 = matchers[0]
1155 1152 super(unionmatcher, self).__init__()
1156 1153 self.traversedir = m1.traversedir
1157 1154 self._matchers = matchers
1158 1155
1159 1156 def matchfn(self, f):
1160 1157 for match in self._matchers:
1161 1158 if match(f):
1162 1159 return True
1163 1160 return False
1164 1161
1165 1162 def visitdir(self, dir):
1166 1163 r = False
1167 1164 for m in self._matchers:
1168 1165 v = m.visitdir(dir)
1169 1166 if v == b'all':
1170 1167 return v
1171 1168 r |= v
1172 1169 return r
1173 1170
1174 1171 def visitchildrenset(self, dir):
1175 1172 r = set()
1176 1173 this = False
1177 1174 for m in self._matchers:
1178 1175 v = m.visitchildrenset(dir)
1179 1176 if not v:
1180 1177 continue
1181 1178 if v == b'all':
1182 1179 return v
1183 1180 if this or v == b'this':
1184 1181 this = True
1185 1182 # don't break, we might have an 'all' in here.
1186 1183 continue
1187 1184 assert isinstance(v, set)
1188 1185 r = r.union(v)
1189 1186 if this:
1190 1187 return b'this'
1191 1188 return r
1192 1189
1193 1190 @encoding.strmethod
1194 1191 def __repr__(self):
1195 1192 return b'<unionmatcher matchers=%r>' % self._matchers
1196 1193
1197 1194
1198 1195 def patkind(pattern, default=None):
1199 1196 r"""If pattern is 'kind:pat' with a known kind, return kind.
1200 1197
1201 1198 >>> patkind(br're:.*\.c$')
1202 1199 're'
1203 1200 >>> patkind(b'glob:*.c')
1204 1201 'glob'
1205 1202 >>> patkind(b'relpath:test.py')
1206 1203 'relpath'
1207 1204 >>> patkind(b'main.py')
1208 1205 >>> patkind(b'main.py', default=b're')
1209 1206 're'
1210 1207 """
1211 1208 return _patsplit(pattern, default)[0]
1212 1209
1213 1210
1214 1211 def _patsplit(pattern, default):
1215 1212 """Split a string into the optional pattern kind prefix and the actual
1216 1213 pattern."""
1217 1214 if b':' in pattern:
1218 1215 kind, pat = pattern.split(b':', 1)
1219 1216 if kind in allpatternkinds:
1220 1217 return kind, pat
1221 1218 return default, pattern
1222 1219
1223 1220
1224 1221 def _globre(pat):
1225 1222 r"""Convert an extended glob string to a regexp string.
1226 1223
1227 1224 >>> from . import pycompat
1228 1225 >>> def bprint(s):
1229 1226 ... print(pycompat.sysstr(s))
1230 1227 >>> bprint(_globre(br'?'))
1231 1228 .
1232 1229 >>> bprint(_globre(br'*'))
1233 1230 [^/]*
1234 1231 >>> bprint(_globre(br'**'))
1235 1232 .*
1236 1233 >>> bprint(_globre(br'**/a'))
1237 1234 (?:.*/)?a
1238 1235 >>> bprint(_globre(br'a/**/b'))
1239 1236 a/(?:.*/)?b
1240 1237 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
1241 1238 [a*?!^][\^b][^c]
1242 1239 >>> bprint(_globre(br'{a,b}'))
1243 1240 (?:a|b)
1244 1241 >>> bprint(_globre(br'.\*\?'))
1245 1242 \.\*\?
1246 1243 """
1247 1244 i, n = 0, len(pat)
1248 1245 res = b''
1249 1246 group = 0
1250 1247 escape = util.stringutil.regexbytesescapemap.get
1251 1248
1252 1249 def peek():
1253 1250 return i < n and pat[i : i + 1]
1254 1251
1255 1252 while i < n:
1256 1253 c = pat[i : i + 1]
1257 1254 i += 1
1258 1255 if c not in b'*?[{},\\':
1259 1256 res += escape(c, c)
1260 1257 elif c == b'*':
1261 1258 if peek() == b'*':
1262 1259 i += 1
1263 1260 if peek() == b'/':
1264 1261 i += 1
1265 1262 res += b'(?:.*/)?'
1266 1263 else:
1267 1264 res += b'.*'
1268 1265 else:
1269 1266 res += b'[^/]*'
1270 1267 elif c == b'?':
1271 1268 res += b'.'
1272 1269 elif c == b'[':
1273 1270 j = i
1274 1271 if j < n and pat[j : j + 1] in b'!]':
1275 1272 j += 1
1276 1273 while j < n and pat[j : j + 1] != b']':
1277 1274 j += 1
1278 1275 if j >= n:
1279 1276 res += b'\\['
1280 1277 else:
1281 1278 stuff = pat[i:j].replace(b'\\', b'\\\\')
1282 1279 i = j + 1
1283 1280 if stuff[0:1] == b'!':
1284 1281 stuff = b'^' + stuff[1:]
1285 1282 elif stuff[0:1] == b'^':
1286 1283 stuff = b'\\' + stuff
1287 1284 res = b'%s[%s]' % (res, stuff)
1288 1285 elif c == b'{':
1289 1286 group += 1
1290 1287 res += b'(?:'
1291 1288 elif c == b'}' and group:
1292 1289 res += b')'
1293 1290 group -= 1
1294 1291 elif c == b',' and group:
1295 1292 res += b'|'
1296 1293 elif c == b'\\':
1297 1294 p = peek()
1298 1295 if p:
1299 1296 i += 1
1300 1297 res += escape(p, p)
1301 1298 else:
1302 1299 res += escape(c, c)
1303 1300 else:
1304 1301 res += escape(c, c)
1305 1302 return res
1306 1303
1307 1304
1308 1305 def _regex(kind, pat, globsuffix):
1309 1306 """Convert a (normalized) pattern of any kind into a
1310 1307 regular expression.
1311 1308 globsuffix is appended to the regexp of globs."""
1312 1309 if not pat and kind in (b'glob', b'relpath'):
1313 1310 return b''
1314 1311 if kind == b're':
1315 1312 return pat
1316 1313 if kind in (b'path', b'relpath'):
1317 1314 if pat == b'.':
1318 1315 return b''
1319 1316 return util.stringutil.reescape(pat) + b'(?:/|$)'
1320 1317 if kind == b'rootfilesin':
1321 1318 if pat == b'.':
1322 1319 escaped = b''
1323 1320 else:
1324 1321 # Pattern is a directory name.
1325 1322 escaped = util.stringutil.reescape(pat) + b'/'
1326 1323 # Anything after the pattern must be a non-directory.
1327 1324 return escaped + b'[^/]+$'
1328 1325 if kind == b'relglob':
1329 1326 globre = _globre(pat)
1330 1327 if globre.startswith(b'[^/]*'):
1331 1328 # When pat has the form *XYZ (common), make the returned regex more
1332 1329 # legible by returning the regex for **XYZ instead of **/*XYZ.
1333 1330 return b'.*' + globre[len(b'[^/]*') :] + globsuffix
1334 1331 return b'(?:|.*/)' + globre + globsuffix
1335 1332 if kind == b'relre':
1336 1333 if pat.startswith(b'^'):
1337 1334 return pat
1338 1335 return b'.*' + pat
1339 1336 if kind in (b'glob', b'rootglob'):
1340 1337 return _globre(pat) + globsuffix
1341 1338 raise error.ProgrammingError(b'not a regex pattern: %s:%s' % (kind, pat))
1342 1339
1343 1340
1344 1341 def _buildmatch(kindpats, globsuffix, root):
1345 1342 """Return regexp string and a matcher function for kindpats.
1346 1343 globsuffix is appended to the regexp of globs."""
1347 1344 matchfuncs = []
1348 1345
1349 1346 subincludes, kindpats = _expandsubinclude(kindpats, root)
1350 1347 if subincludes:
1351 1348 submatchers = {}
1352 1349
1353 1350 def matchsubinclude(f):
1354 1351 for prefix, matcherargs in subincludes:
1355 1352 if f.startswith(prefix):
1356 1353 mf = submatchers.get(prefix)
1357 1354 if mf is None:
1358 1355 mf = match(*matcherargs)
1359 1356 submatchers[prefix] = mf
1360 1357
1361 1358 if mf(f[len(prefix) :]):
1362 1359 return True
1363 1360 return False
1364 1361
1365 1362 matchfuncs.append(matchsubinclude)
1366 1363
1367 1364 regex = b''
1368 1365 if kindpats:
1369 1366 if all(k == b'rootfilesin' for k, p, s in kindpats):
1370 1367 dirs = {p for k, p, s in kindpats}
1371 1368
1372 1369 def mf(f):
1373 1370 i = f.rfind(b'/')
1374 1371 if i >= 0:
1375 1372 dir = f[:i]
1376 1373 else:
1377 1374 dir = b'.'
1378 1375 return dir in dirs
1379 1376
1380 1377 regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))
1381 1378 matchfuncs.append(mf)
1382 1379 else:
1383 1380 regex, mf = _buildregexmatch(kindpats, globsuffix)
1384 1381 matchfuncs.append(mf)
1385 1382
1386 1383 if len(matchfuncs) == 1:
1387 1384 return regex, matchfuncs[0]
1388 1385 else:
1389 1386 return regex, lambda f: any(mf(f) for mf in matchfuncs)
1390 1387
1391 1388
1392 1389 MAX_RE_SIZE = 20000
1393 1390
1394 1391
1395 1392 def _joinregexes(regexps):
1396 1393 """gather multiple regular expressions into a single one"""
1397 1394 return b'|'.join(regexps)
1398 1395
1399 1396
1400 1397 def _buildregexmatch(kindpats, globsuffix):
1401 1398 """Build a match function from a list of kinds and kindpats,
1402 1399 return regexp string and a matcher function.
1403 1400
1404 1401 Test too large input
1405 1402 >>> _buildregexmatch([
1406 1403 ... (b'relglob', b'?' * MAX_RE_SIZE, b'')
1407 1404 ... ], b'$')
1408 1405 Traceback (most recent call last):
1409 1406 ...
1410 1407 Abort: matcher pattern is too long (20009 bytes)
1411 1408 """
1412 1409 try:
1413 1410 allgroups = []
1414 1411 regexps = [_regex(k, p, globsuffix) for (k, p, s) in kindpats]
1415 1412 fullregexp = _joinregexes(regexps)
1416 1413
1417 1414 startidx = 0
1418 1415 groupsize = 0
1419 1416 for idx, r in enumerate(regexps):
1420 1417 piecesize = len(r)
1421 1418 if piecesize > MAX_RE_SIZE:
1422 1419 msg = _(b"matcher pattern is too long (%d bytes)") % piecesize
1423 1420 raise error.Abort(msg)
1424 1421 elif (groupsize + piecesize) > MAX_RE_SIZE:
1425 1422 group = regexps[startidx:idx]
1426 1423 allgroups.append(_joinregexes(group))
1427 1424 startidx = idx
1428 1425 groupsize = 0
1429 1426 groupsize += piecesize + 1
1430 1427
1431 1428 if startidx == 0:
1432 1429 matcher = _rematcher(fullregexp)
1433 1430 func = lambda s: bool(matcher(s))
1434 1431 else:
1435 1432 group = regexps[startidx:]
1436 1433 allgroups.append(_joinregexes(group))
1437 1434 allmatchers = [_rematcher(g) for g in allgroups]
1438 1435 func = lambda s: any(m(s) for m in allmatchers)
1439 1436 return fullregexp, func
1440 1437 except re.error:
1441 1438 for k, p, s in kindpats:
1442 1439 try:
1443 1440 _rematcher(_regex(k, p, globsuffix))
1444 1441 except re.error:
1445 1442 if s:
1446 1443 raise error.Abort(
1447 1444 _(b"%s: invalid pattern (%s): %s") % (s, k, p)
1448 1445 )
1449 1446 else:
1450 1447 raise error.Abort(_(b"invalid pattern (%s): %s") % (k, p))
1451 1448 raise error.Abort(_(b"invalid pattern"))
1452 1449
1453 1450
1454 1451 def _patternrootsanddirs(kindpats):
1455 1452 """Returns roots and directories corresponding to each pattern.
1456 1453
1457 1454 This calculates the roots and directories exactly matching the patterns and
1458 1455 returns a tuple of (roots, dirs) for each. It does not return other
1459 1456 directories which may also need to be considered, like the parent
1460 1457 directories.
1461 1458 """
1462 1459 r = []
1463 1460 d = []
1464 1461 for kind, pat, source in kindpats:
1465 1462 if kind in (b'glob', b'rootglob'): # find the non-glob prefix
1466 1463 root = []
1467 1464 for p in pat.split(b'/'):
1468 1465 if b'[' in p or b'{' in p or b'*' in p or b'?' in p:
1469 1466 break
1470 1467 root.append(p)
1471 1468 r.append(b'/'.join(root))
1472 1469 elif kind in (b'relpath', b'path'):
1473 1470 if pat == b'.':
1474 1471 pat = b''
1475 1472 r.append(pat)
1476 1473 elif kind in (b'rootfilesin',):
1477 1474 if pat == b'.':
1478 1475 pat = b''
1479 1476 d.append(pat)
1480 1477 else: # relglob, re, relre
1481 1478 r.append(b'')
1482 1479 return r, d
1483 1480
1484 1481
1485 1482 def _roots(kindpats):
1486 1483 '''Returns root directories to match recursively from the given patterns.'''
1487 1484 roots, dirs = _patternrootsanddirs(kindpats)
1488 1485 return roots
1489 1486
1490 1487
1491 1488 def _rootsdirsandparents(kindpats):
1492 1489 """Returns roots and exact directories from patterns.
1493 1490
1494 1491 `roots` are directories to match recursively, `dirs` should
1495 1492 be matched non-recursively, and `parents` are the implicitly required
1496 1493 directories to walk to items in either roots or dirs.
1497 1494
1498 1495 Returns a tuple of (roots, dirs, parents).
1499 1496
1500 1497 >>> r = _rootsdirsandparents(
1501 1498 ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
1502 1499 ... (b'glob', b'g*', b'')])
1503 1500 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1504 1501 (['g/h', 'g/h', ''], []) ['', 'g']
1505 1502 >>> r = _rootsdirsandparents(
1506 1503 ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
1507 1504 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1508 1505 ([], ['g/h', '']) ['', 'g']
1509 1506 >>> r = _rootsdirsandparents(
1510 1507 ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
1511 1508 ... (b'path', b'', b'')])
1512 1509 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1513 1510 (['r', 'p/p', ''], []) ['', 'p']
1514 1511 >>> r = _rootsdirsandparents(
1515 1512 ... [(b'relglob', b'rg*', b''), (b're', b're/', b''),
1516 1513 ... (b'relre', b'rr', b'')])
1517 1514 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1518 1515 (['', '', ''], []) ['']
1519 1516 """
1520 1517 r, d = _patternrootsanddirs(kindpats)
1521 1518
1522 1519 p = set()
1523 1520 # Add the parents as non-recursive/exact directories, since they must be
1524 1521 # scanned to get to either the roots or the other exact directories.
1525 1522 p.update(pathutil.dirs(d))
1526 1523 p.update(pathutil.dirs(r))
1527 1524
1528 1525 # FIXME: all uses of this function convert these to sets, do so before
1529 1526 # returning.
1530 1527 # FIXME: all uses of this function do not need anything in 'roots' and
1531 1528 # 'dirs' to also be in 'parents', consider removing them before returning.
1532 1529 return r, d, p
1533 1530
1534 1531
1535 1532 def _explicitfiles(kindpats):
1536 1533 """Returns the potential explicit filenames from the patterns.
1537 1534
1538 1535 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
1539 1536 ['foo/bar']
1540 1537 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
1541 1538 []
1542 1539 """
1543 1540 # Keep only the pattern kinds where one can specify filenames (vs only
1544 1541 # directory names).
1545 1542 filable = [kp for kp in kindpats if kp[0] not in (b'rootfilesin',)]
1546 1543 return _roots(filable)
1547 1544
1548 1545
1549 1546 def _prefix(kindpats):
1550 1547 '''Whether all the patterns match a prefix (i.e. recursively)'''
1551 1548 for kind, pat, source in kindpats:
1552 1549 if kind not in (b'path', b'relpath'):
1553 1550 return False
1554 1551 return True
1555 1552
1556 1553
1557 1554 _commentre = None
1558 1555
1559 1556
1560 1557 def readpatternfile(filepath, warn, sourceinfo=False):
1561 1558 """parse a pattern file, returning a list of
1562 1559 patterns. These patterns should be given to compile()
1563 1560 to be validated and converted into a match function.
1564 1561
1565 1562 trailing white space is dropped.
1566 1563 the escape character is backslash.
1567 1564 comments start with #.
1568 1565 empty lines are skipped.
1569 1566
1570 1567 lines can be of the following formats:
1571 1568
1572 1569 syntax: regexp # defaults following lines to non-rooted regexps
1573 1570 syntax: glob # defaults following lines to non-rooted globs
1574 1571 re:pattern # non-rooted regular expression
1575 1572 glob:pattern # non-rooted glob
1576 1573 rootglob:pat # rooted glob (same root as ^ in regexps)
1577 1574 pattern # pattern of the current default type
1578 1575
1579 1576 if sourceinfo is set, returns a list of tuples:
1580 1577 (pattern, lineno, originalline).
1581 1578 This is useful to debug ignore patterns.
1582 1579 """
1583 1580
1584 1581 syntaxes = {
1585 1582 b're': b'relre:',
1586 1583 b'regexp': b'relre:',
1587 1584 b'glob': b'relglob:',
1588 1585 b'rootglob': b'rootglob:',
1589 1586 b'include': b'include',
1590 1587 b'subinclude': b'subinclude',
1591 1588 }
1592 1589 syntax = b'relre:'
1593 1590 patterns = []
1594 1591
1595 1592 fp = open(filepath, b'rb')
1596 1593 for lineno, line in enumerate(util.iterfile(fp), start=1):
1597 1594 if b"#" in line:
1598 1595 global _commentre
1599 1596 if not _commentre:
1600 1597 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
1601 1598 # remove comments prefixed by an even number of escapes
1602 1599 m = _commentre.search(line)
1603 1600 if m:
1604 1601 line = line[: m.end(1)]
1605 1602 # fixup properly escaped comments that survived the above
1606 1603 line = line.replace(b"\\#", b"#")
1607 1604 line = line.rstrip()
1608 1605 if not line:
1609 1606 continue
1610 1607
1611 1608 if line.startswith(b'syntax:'):
1612 1609 s = line[7:].strip()
1613 1610 try:
1614 1611 syntax = syntaxes[s]
1615 1612 except KeyError:
1616 1613 if warn:
1617 1614 warn(
1618 1615 _(b"%s: ignoring invalid syntax '%s'\n") % (filepath, s)
1619 1616 )
1620 1617 continue
1621 1618
1622 1619 linesyntax = syntax
1623 1620 for s, rels in pycompat.iteritems(syntaxes):
1624 1621 if line.startswith(rels):
1625 1622 linesyntax = rels
1626 1623 line = line[len(rels) :]
1627 1624 break
1628 1625 elif line.startswith(s + b':'):
1629 1626 linesyntax = rels
1630 1627 line = line[len(s) + 1 :]
1631 1628 break
1632 1629 if sourceinfo:
1633 1630 patterns.append((linesyntax + line, lineno, line))
1634 1631 else:
1635 1632 patterns.append(linesyntax + line)
1636 1633 fp.close()
1637 1634 return patterns
General Comments 0
You need to be logged in to leave comments. Login now