##// END OF EJS Templates
match: convert O(n) to O(log n) in exactmatcher.visitchildrenset...
Kyle Lippincott -
r47631:67414b0a default draft
parent child Browse files
Show More
@@ -1,1634 +1,1659 b''
1 1 # match.py - filename matching
2 2 #
3 3 # Copyright 2008, 2009 Olivia Mackall <olivia@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import, print_function
9 9
10 import bisect
10 11 import copy
11 12 import itertools
12 13 import os
13 14 import re
14 15
15 16 from .i18n import _
16 17 from .pycompat import open
17 18 from . import (
18 19 encoding,
19 20 error,
20 21 pathutil,
21 22 policy,
22 23 pycompat,
23 24 util,
24 25 )
25 26 from .utils import stringutil
26 27
27 28 rustmod = policy.importrust('dirstate')
28 29
29 30 allpatternkinds = (
30 31 b're',
31 32 b'glob',
32 33 b'path',
33 34 b'relglob',
34 35 b'relpath',
35 36 b'relre',
36 37 b'rootglob',
37 38 b'listfile',
38 39 b'listfile0',
39 40 b'set',
40 41 b'include',
41 42 b'subinclude',
42 43 b'rootfilesin',
43 44 )
44 45 cwdrelativepatternkinds = (b'relpath', b'glob')
45 46
46 47 propertycache = util.propertycache
47 48
48 49
49 50 def _rematcher(regex):
50 51 """compile the regexp with the best available regexp engine and return a
51 52 matcher function"""
52 53 m = util.re.compile(regex)
53 54 try:
54 55 # slightly faster, provided by facebook's re2 bindings
55 56 return m.test_match
56 57 except AttributeError:
57 58 return m.match
58 59
59 60
60 61 def _expandsets(cwd, kindpats, ctx=None, listsubrepos=False, badfn=None):
61 62 '''Returns the kindpats list with the 'set' patterns expanded to matchers'''
62 63 matchers = []
63 64 other = []
64 65
65 66 for kind, pat, source in kindpats:
66 67 if kind == b'set':
67 68 if ctx is None:
68 69 raise error.ProgrammingError(
69 70 b"fileset expression with no context"
70 71 )
71 72 matchers.append(ctx.matchfileset(cwd, pat, badfn=badfn))
72 73
73 74 if listsubrepos:
74 75 for subpath in ctx.substate:
75 76 sm = ctx.sub(subpath).matchfileset(cwd, pat, badfn=badfn)
76 77 pm = prefixdirmatcher(subpath, sm, badfn=badfn)
77 78 matchers.append(pm)
78 79
79 80 continue
80 81 other.append((kind, pat, source))
81 82 return matchers, other
82 83
83 84
84 85 def _expandsubinclude(kindpats, root):
85 86 """Returns the list of subinclude matcher args and the kindpats without the
86 87 subincludes in it."""
87 88 relmatchers = []
88 89 other = []
89 90
90 91 for kind, pat, source in kindpats:
91 92 if kind == b'subinclude':
92 93 sourceroot = pathutil.dirname(util.normpath(source))
93 94 pat = util.pconvert(pat)
94 95 path = pathutil.join(sourceroot, pat)
95 96
96 97 newroot = pathutil.dirname(path)
97 98 matcherargs = (newroot, b'', [], [b'include:%s' % path])
98 99
99 100 prefix = pathutil.canonpath(root, root, newroot)
100 101 if prefix:
101 102 prefix += b'/'
102 103 relmatchers.append((prefix, matcherargs))
103 104 else:
104 105 other.append((kind, pat, source))
105 106
106 107 return relmatchers, other
107 108
108 109
109 110 def _kindpatsalwaysmatch(kindpats):
110 111 """Checks whether the kindspats match everything, as e.g.
111 112 'relpath:.' does.
112 113 """
113 114 for kind, pat, source in kindpats:
114 115 if pat != b'' or kind not in [b'relpath', b'glob']:
115 116 return False
116 117 return True
117 118
118 119
119 120 def _buildkindpatsmatcher(
120 121 matchercls,
121 122 root,
122 123 cwd,
123 124 kindpats,
124 125 ctx=None,
125 126 listsubrepos=False,
126 127 badfn=None,
127 128 ):
128 129 matchers = []
129 130 fms, kindpats = _expandsets(
130 131 cwd,
131 132 kindpats,
132 133 ctx=ctx,
133 134 listsubrepos=listsubrepos,
134 135 badfn=badfn,
135 136 )
136 137 if kindpats:
137 138 m = matchercls(root, kindpats, badfn=badfn)
138 139 matchers.append(m)
139 140 if fms:
140 141 matchers.extend(fms)
141 142 if not matchers:
142 143 return nevermatcher(badfn=badfn)
143 144 if len(matchers) == 1:
144 145 return matchers[0]
145 146 return unionmatcher(matchers)
146 147
147 148
148 149 def match(
149 150 root,
150 151 cwd,
151 152 patterns=None,
152 153 include=None,
153 154 exclude=None,
154 155 default=b'glob',
155 156 auditor=None,
156 157 ctx=None,
157 158 listsubrepos=False,
158 159 warn=None,
159 160 badfn=None,
160 161 icasefs=False,
161 162 ):
162 163 r"""build an object to match a set of file patterns
163 164
164 165 arguments:
165 166 root - the canonical root of the tree you're matching against
166 167 cwd - the current working directory, if relevant
167 168 patterns - patterns to find
168 169 include - patterns to include (unless they are excluded)
169 170 exclude - patterns to exclude (even if they are included)
170 171 default - if a pattern in patterns has no explicit type, assume this one
171 172 auditor - optional path auditor
172 173 ctx - optional changecontext
173 174 listsubrepos - if True, recurse into subrepositories
174 175 warn - optional function used for printing warnings
175 176 badfn - optional bad() callback for this matcher instead of the default
176 177 icasefs - make a matcher for wdir on case insensitive filesystems, which
177 178 normalizes the given patterns to the case in the filesystem
178 179
179 180 a pattern is one of:
180 181 'glob:<glob>' - a glob relative to cwd
181 182 're:<regexp>' - a regular expression
182 183 'path:<path>' - a path relative to repository root, which is matched
183 184 recursively
184 185 'rootfilesin:<path>' - a path relative to repository root, which is
185 186 matched non-recursively (will not match subdirectories)
186 187 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
187 188 'relpath:<path>' - a path relative to cwd
188 189 'relre:<regexp>' - a regexp that needn't match the start of a name
189 190 'set:<fileset>' - a fileset expression
190 191 'include:<path>' - a file of patterns to read and include
191 192 'subinclude:<path>' - a file of patterns to match against files under
192 193 the same directory
193 194 '<something>' - a pattern of the specified default type
194 195
195 196 >>> def _match(root, *args, **kwargs):
196 197 ... return match(util.localpath(root), *args, **kwargs)
197 198
198 199 Usually a patternmatcher is returned:
199 200 >>> _match(b'/foo', b'.', [b're:.*\.c$', b'path:foo/a', b'*.py'])
200 201 <patternmatcher patterns='.*\\.c$|foo/a(?:/|$)|[^/]*\\.py$'>
201 202
202 203 Combining 'patterns' with 'include' (resp. 'exclude') gives an
203 204 intersectionmatcher (resp. a differencematcher):
204 205 >>> type(_match(b'/foo', b'.', [b're:.*\.c$'], include=[b'path:lib']))
205 206 <class 'mercurial.match.intersectionmatcher'>
206 207 >>> type(_match(b'/foo', b'.', [b're:.*\.c$'], exclude=[b'path:build']))
207 208 <class 'mercurial.match.differencematcher'>
208 209
209 210 Notice that, if 'patterns' is empty, an alwaysmatcher is returned:
210 211 >>> _match(b'/foo', b'.', [])
211 212 <alwaysmatcher>
212 213
213 214 The 'default' argument determines which kind of pattern is assumed if a
214 215 pattern has no prefix:
215 216 >>> _match(b'/foo', b'.', [b'.*\.c$'], default=b're')
216 217 <patternmatcher patterns='.*\\.c$'>
217 218 >>> _match(b'/foo', b'.', [b'main.py'], default=b'relpath')
218 219 <patternmatcher patterns='main\\.py(?:/|$)'>
219 220 >>> _match(b'/foo', b'.', [b'main.py'], default=b're')
220 221 <patternmatcher patterns='main.py'>
221 222
222 223 The primary use of matchers is to check whether a value (usually a file
223 224 name) matches againset one of the patterns given at initialization. There
224 225 are two ways of doing this check.
225 226
226 227 >>> m = _match(b'/foo', b'', [b're:.*\.c$', b'relpath:a'])
227 228
228 229 1. Calling the matcher with a file name returns True if any pattern
229 230 matches that file name:
230 231 >>> m(b'a')
231 232 True
232 233 >>> m(b'main.c')
233 234 True
234 235 >>> m(b'test.py')
235 236 False
236 237
237 238 2. Using the exact() method only returns True if the file name matches one
238 239 of the exact patterns (i.e. not re: or glob: patterns):
239 240 >>> m.exact(b'a')
240 241 True
241 242 >>> m.exact(b'main.c')
242 243 False
243 244 """
244 245 assert os.path.isabs(root)
245 246 cwd = os.path.join(root, util.localpath(cwd))
246 247 normalize = _donormalize
247 248 if icasefs:
248 249 dirstate = ctx.repo().dirstate
249 250 dsnormalize = dirstate.normalize
250 251
251 252 def normalize(patterns, default, root, cwd, auditor, warn):
252 253 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
253 254 kindpats = []
254 255 for kind, pats, source in kp:
255 256 if kind not in (b're', b'relre'): # regex can't be normalized
256 257 p = pats
257 258 pats = dsnormalize(pats)
258 259
259 260 # Preserve the original to handle a case only rename.
260 261 if p != pats and p in dirstate:
261 262 kindpats.append((kind, p, source))
262 263
263 264 kindpats.append((kind, pats, source))
264 265 return kindpats
265 266
266 267 if patterns:
267 268 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
268 269 if _kindpatsalwaysmatch(kindpats):
269 270 m = alwaysmatcher(badfn)
270 271 else:
271 272 m = _buildkindpatsmatcher(
272 273 patternmatcher,
273 274 root,
274 275 cwd,
275 276 kindpats,
276 277 ctx=ctx,
277 278 listsubrepos=listsubrepos,
278 279 badfn=badfn,
279 280 )
280 281 else:
281 282 # It's a little strange that no patterns means to match everything.
282 283 # Consider changing this to match nothing (probably using nevermatcher).
283 284 m = alwaysmatcher(badfn)
284 285
285 286 if include:
286 287 kindpats = normalize(include, b'glob', root, cwd, auditor, warn)
287 288 im = _buildkindpatsmatcher(
288 289 includematcher,
289 290 root,
290 291 cwd,
291 292 kindpats,
292 293 ctx=ctx,
293 294 listsubrepos=listsubrepos,
294 295 badfn=None,
295 296 )
296 297 m = intersectmatchers(m, im)
297 298 if exclude:
298 299 kindpats = normalize(exclude, b'glob', root, cwd, auditor, warn)
299 300 em = _buildkindpatsmatcher(
300 301 includematcher,
301 302 root,
302 303 cwd,
303 304 kindpats,
304 305 ctx=ctx,
305 306 listsubrepos=listsubrepos,
306 307 badfn=None,
307 308 )
308 309 m = differencematcher(m, em)
309 310 return m
310 311
311 312
312 313 def exact(files, badfn=None):
313 314 return exactmatcher(files, badfn=badfn)
314 315
315 316
316 317 def always(badfn=None):
317 318 return alwaysmatcher(badfn)
318 319
319 320
320 321 def never(badfn=None):
321 322 return nevermatcher(badfn)
322 323
323 324
324 325 def badmatch(match, badfn):
325 326 """Make a copy of the given matcher, replacing its bad method with the given
326 327 one.
327 328 """
328 329 m = copy.copy(match)
329 330 m.bad = badfn
330 331 return m
331 332
332 333
333 334 def _donormalize(patterns, default, root, cwd, auditor=None, warn=None):
334 335 """Convert 'kind:pat' from the patterns list to tuples with kind and
335 336 normalized and rooted patterns and with listfiles expanded."""
336 337 kindpats = []
337 338 for kind, pat in [_patsplit(p, default) for p in patterns]:
338 339 if kind in cwdrelativepatternkinds:
339 340 pat = pathutil.canonpath(root, cwd, pat, auditor=auditor)
340 341 elif kind in (b'relglob', b'path', b'rootfilesin', b'rootglob'):
341 342 pat = util.normpath(pat)
342 343 elif kind in (b'listfile', b'listfile0'):
343 344 try:
344 345 files = util.readfile(pat)
345 346 if kind == b'listfile0':
346 347 files = files.split(b'\0')
347 348 else:
348 349 files = files.splitlines()
349 350 files = [f for f in files if f]
350 351 except EnvironmentError:
351 352 raise error.Abort(_(b"unable to read file list (%s)") % pat)
352 353 for k, p, source in _donormalize(
353 354 files, default, root, cwd, auditor, warn
354 355 ):
355 356 kindpats.append((k, p, pat))
356 357 continue
357 358 elif kind == b'include':
358 359 try:
359 360 fullpath = os.path.join(root, util.localpath(pat))
360 361 includepats = readpatternfile(fullpath, warn)
361 362 for k, p, source in _donormalize(
362 363 includepats, default, root, cwd, auditor, warn
363 364 ):
364 365 kindpats.append((k, p, source or pat))
365 366 except error.Abort as inst:
366 367 raise error.Abort(
367 368 b'%s: %s'
368 369 % (
369 370 pat,
370 371 inst.message,
371 372 ) # pytype: disable=unsupported-operands
372 373 )
373 374 except IOError as inst:
374 375 if warn:
375 376 warn(
376 377 _(b"skipping unreadable pattern file '%s': %s\n")
377 378 % (pat, stringutil.forcebytestr(inst.strerror))
378 379 )
379 380 continue
380 381 # else: re or relre - which cannot be normalized
381 382 kindpats.append((kind, pat, b''))
382 383 return kindpats
383 384
384 385
385 386 class basematcher(object):
386 387 def __init__(self, badfn=None):
387 388 if badfn is not None:
388 389 self.bad = badfn
389 390
390 391 def __call__(self, fn):
391 392 return self.matchfn(fn)
392 393
393 394 # Callbacks related to how the matcher is used by dirstate.walk.
394 395 # Subscribers to these events must monkeypatch the matcher object.
395 396 def bad(self, f, msg):
396 397 """Callback from dirstate.walk for each explicit file that can't be
397 398 found/accessed, with an error message."""
398 399
399 400 # If an traversedir is set, it will be called when a directory discovered
400 401 # by recursive traversal is visited.
401 402 traversedir = None
402 403
403 404 @propertycache
404 405 def _files(self):
405 406 return []
406 407
407 408 def files(self):
408 409 """Explicitly listed files or patterns or roots:
409 410 if no patterns or .always(): empty list,
410 411 if exact: list exact files,
411 412 if not .anypats(): list all files and dirs,
412 413 else: optimal roots"""
413 414 return self._files
414 415
415 416 @propertycache
416 417 def _fileset(self):
417 418 return set(self._files)
418 419
419 420 def exact(self, f):
420 421 '''Returns True if f is in .files().'''
421 422 return f in self._fileset
422 423
423 424 def matchfn(self, f):
424 425 return False
425 426
426 427 def visitdir(self, dir):
427 428 """Decides whether a directory should be visited based on whether it
428 429 has potential matches in it or one of its subdirectories. This is
429 430 based on the match's primary, included, and excluded patterns.
430 431
431 432 Returns the string 'all' if the given directory and all subdirectories
432 433 should be visited. Otherwise returns True or False indicating whether
433 434 the given directory should be visited.
434 435 """
435 436 return True
436 437
437 438 def visitchildrenset(self, dir):
438 439 """Decides whether a directory should be visited based on whether it
439 440 has potential matches in it or one of its subdirectories, and
440 441 potentially lists which subdirectories of that directory should be
441 442 visited. This is based on the match's primary, included, and excluded
442 443 patterns.
443 444
444 445 This function is very similar to 'visitdir', and the following mapping
445 446 can be applied:
446 447
447 448 visitdir | visitchildrenlist
448 449 ----------+-------------------
449 450 False | set()
450 451 'all' | 'all'
451 452 True | 'this' OR non-empty set of subdirs -or files- to visit
452 453
453 454 Example:
454 455 Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return
455 456 the following values (assuming the implementation of visitchildrenset
456 457 is capable of recognizing this; some implementations are not).
457 458
458 459 '' -> {'foo', 'qux'}
459 460 'baz' -> set()
460 461 'foo' -> {'bar'}
461 462 # Ideally this would be 'all', but since the prefix nature of matchers
462 463 # is applied to the entire matcher, we have to downgrade this to
463 464 # 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed
464 465 # in.
465 466 'foo/bar' -> 'this'
466 467 'qux' -> 'this'
467 468
468 469 Important:
469 470 Most matchers do not know if they're representing files or
470 471 directories. They see ['path:dir/f'] and don't know whether 'f' is a
471 472 file or a directory, so visitchildrenset('dir') for most matchers will
472 473 return {'f'}, but if the matcher knows it's a file (like exactmatcher
473 474 does), it may return 'this'. Do not rely on the return being a set
474 475 indicating that there are no files in this dir to investigate (or
475 476 equivalently that if there are files to investigate in 'dir' that it
476 477 will always return 'this').
477 478 """
478 479 return b'this'
479 480
480 481 def always(self):
481 482 """Matcher will match everything and .files() will be empty --
482 483 optimization might be possible."""
483 484 return False
484 485
485 486 def isexact(self):
486 487 """Matcher will match exactly the list of files in .files() --
487 488 optimization might be possible."""
488 489 return False
489 490
490 491 def prefix(self):
491 492 """Matcher will match the paths in .files() recursively --
492 493 optimization might be possible."""
493 494 return False
494 495
495 496 def anypats(self):
496 497 """None of .always(), .isexact(), and .prefix() is true --
497 498 optimizations will be difficult."""
498 499 return not self.always() and not self.isexact() and not self.prefix()
499 500
500 501
501 502 class alwaysmatcher(basematcher):
502 503 '''Matches everything.'''
503 504
504 505 def __init__(self, badfn=None):
505 506 super(alwaysmatcher, self).__init__(badfn)
506 507
507 508 def always(self):
508 509 return True
509 510
510 511 def matchfn(self, f):
511 512 return True
512 513
513 514 def visitdir(self, dir):
514 515 return b'all'
515 516
516 517 def visitchildrenset(self, dir):
517 518 return b'all'
518 519
519 520 def __repr__(self):
520 521 return r'<alwaysmatcher>'
521 522
522 523
523 524 class nevermatcher(basematcher):
524 525 '''Matches nothing.'''
525 526
526 527 def __init__(self, badfn=None):
527 528 super(nevermatcher, self).__init__(badfn)
528 529
529 530 # It's a little weird to say that the nevermatcher is an exact matcher
530 531 # or a prefix matcher, but it seems to make sense to let callers take
531 532 # fast paths based on either. There will be no exact matches, nor any
532 533 # prefixes (files() returns []), so fast paths iterating over them should
533 534 # be efficient (and correct).
534 535 def isexact(self):
535 536 return True
536 537
537 538 def prefix(self):
538 539 return True
539 540
540 541 def visitdir(self, dir):
541 542 return False
542 543
543 544 def visitchildrenset(self, dir):
544 545 return set()
545 546
546 547 def __repr__(self):
547 548 return r'<nevermatcher>'
548 549
549 550
550 551 class predicatematcher(basematcher):
551 552 """A matcher adapter for a simple boolean function"""
552 553
553 554 def __init__(self, predfn, predrepr=None, badfn=None):
554 555 super(predicatematcher, self).__init__(badfn)
555 556 self.matchfn = predfn
556 557 self._predrepr = predrepr
557 558
558 559 @encoding.strmethod
559 560 def __repr__(self):
560 561 s = stringutil.buildrepr(self._predrepr) or pycompat.byterepr(
561 562 self.matchfn
562 563 )
563 564 return b'<predicatenmatcher pred=%s>' % s
564 565
565 566
566 567 def path_or_parents_in_set(path, prefix_set):
567 568 """Returns True if `path` (or any parent of `path`) is in `prefix_set`."""
568 569 l = len(prefix_set)
569 570 if l == 0:
570 571 return False
571 572 if path in prefix_set:
572 573 return True
573 574 # If there's more than 5 paths in prefix_set, it's *probably* quicker to
574 575 # "walk up" the directory hierarchy instead, with the assumption that most
575 576 # directory hierarchies are relatively shallow and hash lookup is cheap.
576 577 if l > 5:
577 578 return any(
578 579 parentdir in prefix_set for parentdir in pathutil.finddirs(path)
579 580 )
580 581
581 582 # FIXME: Ideally we'd never get to this point if this is the case - we'd
582 583 # recognize ourselves as an 'always' matcher and skip this.
583 584 if b'' in prefix_set:
584 585 return True
585 586
586 587 if pycompat.ispy3:
587 588 sl = ord(b'/')
588 589 else:
589 590 sl = '/'
590 591
591 592 # We already checked that path isn't in prefix_set exactly, so
592 593 # `path[len(pf)] should never raise IndexError.
593 594 return any(path.startswith(pf) and path[len(pf)] == sl for pf in prefix_set)
594 595
595 596
596 597 class patternmatcher(basematcher):
597 598 r"""Matches a set of (kind, pat, source) against a 'root' directory.
598 599
599 600 >>> kindpats = [
600 601 ... (b're', br'.*\.c$', b''),
601 602 ... (b'path', b'foo/a', b''),
602 603 ... (b'relpath', b'b', b''),
603 604 ... (b'glob', b'*.h', b''),
604 605 ... ]
605 606 >>> m = patternmatcher(b'foo', kindpats)
606 607 >>> m(b'main.c') # matches re:.*\.c$
607 608 True
608 609 >>> m(b'b.txt')
609 610 False
610 611 >>> m(b'foo/a') # matches path:foo/a
611 612 True
612 613 >>> m(b'a') # does not match path:b, since 'root' is 'foo'
613 614 False
614 615 >>> m(b'b') # matches relpath:b, since 'root' is 'foo'
615 616 True
616 617 >>> m(b'lib.h') # matches glob:*.h
617 618 True
618 619
619 620 >>> m.files()
620 621 ['', 'foo/a', 'b', '']
621 622 >>> m.exact(b'foo/a')
622 623 True
623 624 >>> m.exact(b'b')
624 625 True
625 626 >>> m.exact(b'lib.h') # exact matches are for (rel)path kinds
626 627 False
627 628 """
628 629
629 630 def __init__(self, root, kindpats, badfn=None):
630 631 super(patternmatcher, self).__init__(badfn)
631 632
632 633 self._files = _explicitfiles(kindpats)
633 634 self._prefix = _prefix(kindpats)
634 635 self._pats, self.matchfn = _buildmatch(kindpats, b'$', root)
635 636
636 637 @propertycache
637 638 def _dirs(self):
638 639 return set(pathutil.dirs(self._fileset))
639 640
640 641 def visitdir(self, dir):
641 642 if self._prefix and dir in self._fileset:
642 643 return b'all'
643 644 return dir in self._dirs or path_or_parents_in_set(dir, self._fileset)
644 645
645 646 def visitchildrenset(self, dir):
646 647 ret = self.visitdir(dir)
647 648 if ret is True:
648 649 return b'this'
649 650 elif not ret:
650 651 return set()
651 652 assert ret == b'all'
652 653 return b'all'
653 654
654 655 def prefix(self):
655 656 return self._prefix
656 657
657 658 @encoding.strmethod
658 659 def __repr__(self):
659 660 return b'<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats)
660 661
661 662
662 663 # This is basically a reimplementation of pathutil.dirs that stores the
663 664 # children instead of just a count of them, plus a small optional optimization
664 665 # to avoid some directories we don't need.
665 666 class _dirchildren(object):
666 667 def __init__(self, paths, onlyinclude=None):
667 668 self._dirs = {}
668 669 self._onlyinclude = onlyinclude or []
669 670 addpath = self.addpath
670 671 for f in paths:
671 672 addpath(f)
672 673
673 674 def addpath(self, path):
674 675 if path == b'':
675 676 return
676 677 dirs = self._dirs
677 678 findsplitdirs = _dirchildren._findsplitdirs
678 679 for d, b in findsplitdirs(path):
679 680 if d not in self._onlyinclude:
680 681 continue
681 682 dirs.setdefault(d, set()).add(b)
682 683
683 684 @staticmethod
684 685 def _findsplitdirs(path):
685 686 # yields (dirname, basename) tuples, walking back to the root. This is
686 687 # very similar to pathutil.finddirs, except:
687 688 # - produces a (dirname, basename) tuple, not just 'dirname'
688 689 # Unlike manifest._splittopdir, this does not suffix `dirname` with a
689 690 # slash.
690 691 oldpos = len(path)
691 692 pos = path.rfind(b'/')
692 693 while pos != -1:
693 694 yield path[:pos], path[pos + 1 : oldpos]
694 695 oldpos = pos
695 696 pos = path.rfind(b'/', 0, pos)
696 697 yield b'', path[:oldpos]
697 698
698 699 def get(self, path):
699 700 return self._dirs.get(path, set())
700 701
701 702
702 703 class includematcher(basematcher):
703 704 def __init__(self, root, kindpats, badfn=None):
704 705 super(includematcher, self).__init__(badfn)
705 706 if rustmod is not None:
706 707 # We need to pass the patterns to Rust because they can contain
707 708 # patterns from the user interface
708 709 self._kindpats = kindpats
709 710 self._pats, self.matchfn = _buildmatch(kindpats, b'(?:/|$)', root)
710 711 self._prefix = _prefix(kindpats)
711 712 roots, dirs, parents = _rootsdirsandparents(kindpats)
712 713 # roots are directories which are recursively included.
713 714 self._roots = set(roots)
714 715 # dirs are directories which are non-recursively included.
715 716 self._dirs = set(dirs)
716 717 # parents are directories which are non-recursively included because
717 718 # they are needed to get to items in _dirs or _roots.
718 719 self._parents = parents
719 720
720 721 def visitdir(self, dir):
721 722 if self._prefix and dir in self._roots:
722 723 return b'all'
723 724 return (
724 725 dir in self._dirs
725 726 or dir in self._parents
726 727 or path_or_parents_in_set(dir, self._roots)
727 728 )
728 729
729 730 @propertycache
730 731 def _allparentschildren(self):
731 732 # It may seem odd that we add dirs, roots, and parents, and then
732 733 # restrict to only parents. This is to catch the case of:
733 734 # dirs = ['foo/bar']
734 735 # parents = ['foo']
735 736 # if we asked for the children of 'foo', but had only added
736 737 # self._parents, we wouldn't be able to respond ['bar'].
737 738 return _dirchildren(
738 739 itertools.chain(self._dirs, self._roots, self._parents),
739 740 onlyinclude=self._parents,
740 741 )
741 742
742 743 def visitchildrenset(self, dir):
743 744 if self._prefix and dir in self._roots:
744 745 return b'all'
745 746 # Note: this does *not* include the 'dir in self._parents' case from
746 747 # visitdir, that's handled below.
747 748 if (
748 749 b'' in self._roots
749 750 or dir in self._dirs
750 751 or path_or_parents_in_set(dir, self._roots)
751 752 ):
752 753 return b'this'
753 754
754 755 if dir in self._parents:
755 756 return self._allparentschildren.get(dir) or set()
756 757 return set()
757 758
758 759 @encoding.strmethod
759 760 def __repr__(self):
760 761 return b'<includematcher includes=%r>' % pycompat.bytestr(self._pats)
761 762
762 763
763 764 class exactmatcher(basematcher):
764 765 r"""Matches the input files exactly. They are interpreted as paths, not
765 766 patterns (so no kind-prefixes).
766 767
767 768 >>> m = exactmatcher([b'a.txt', br're:.*\.c$'])
768 769 >>> m(b'a.txt')
769 770 True
770 771 >>> m(b'b.txt')
771 772 False
772 773
773 774 Input files that would be matched are exactly those returned by .files()
774 775 >>> m.files()
775 776 ['a.txt', 're:.*\\.c$']
776 777
777 778 So pattern 're:.*\.c$' is not considered as a regex, but as a file name
778 779 >>> m(b'main.c')
779 780 False
780 781 >>> m(br're:.*\.c$')
781 782 True
782 783 """
783 784
784 785 def __init__(self, files, badfn=None):
785 786 super(exactmatcher, self).__init__(badfn)
786 787
787 788 if isinstance(files, list):
788 789 self._files = files
789 790 else:
790 791 self._files = list(files)
791 792
792 793 matchfn = basematcher.exact
793 794
794 795 @propertycache
795 796 def _dirs(self):
796 797 return set(pathutil.dirs(self._fileset))
797 798
798 799 def visitdir(self, dir):
799 800 return dir in self._dirs
800 801
802 @propertycache
803 def _visitchildrenset_candidates(self):
804 """A memoized set of candidates for visitchildrenset."""
805 return self._fileset | self._dirs - {b''}
806
807 @propertycache
808 def _sorted_visitchildrenset_candidates(self):
809 """A memoized sorted list of candidates for visitchildrenset."""
810 return sorted(self._visitchildrenset_candidates)
811
801 812 def visitchildrenset(self, dir):
802 813 if not self._fileset or dir not in self._dirs:
803 814 return set()
804 815
805 candidates = self._fileset | self._dirs - {b''}
806 if dir != b'':
816 if dir == b'':
817 candidates = self._visitchildrenset_candidates
818 else:
819 candidates = self._sorted_visitchildrenset_candidates
807 820 d = dir + b'/'
808 candidates = {c[len(d) :] for c in candidates if c.startswith(d)}
821 # Use bisect to find the first element potentially starting with d
822 # (i.e. >= d). This should always find at least one element (we'll
823 # assert later if this is not the case).
824 first = bisect.bisect_left(candidates, d)
825 # We need a representation of the first element that is > d that
826 # does not start with d, so since we added a `/` on the end of dir,
827 # we'll add whatever comes after slash (we could probably assume
828 # that `0` is after `/`, but let's not) to the end of dir instead.
829 dnext = dir + encoding.strtolocal(chr(ord(b'/') + 1))
830 # Use bisect to find the first element >= d_next
831 last = bisect.bisect_left(candidates, dnext, lo=first)
832 dlen = len(d)
833 candidates = {c[dlen :] for c in candidates[first:last]}
809 834 # self._dirs includes all of the directories, recursively, so if
810 835 # we're attempting to match foo/bar/baz.txt, it'll have '', 'foo',
811 836 # 'foo/bar' in it. Thus we can safely ignore a candidate that has a
812 837 # '/' in it, indicating a it's for a subdir-of-a-subdir; the
813 838 # immediate subdir will be in there without a slash.
814 839 ret = {c for c in candidates if b'/' not in c}
815 840 # We really do not expect ret to be empty, since that would imply that
816 841 # there's something in _dirs that didn't have a file in _fileset.
817 842 assert ret
818 843 return ret
819 844
820 845 def isexact(self):
821 846 return True
822 847
823 848 @encoding.strmethod
824 849 def __repr__(self):
825 850 return b'<exactmatcher files=%r>' % self._files
826 851
827 852
828 853 class differencematcher(basematcher):
829 854 """Composes two matchers by matching if the first matches and the second
830 855 does not.
831 856
832 857 The second matcher's non-matching-attributes (bad, traversedir) are ignored.
833 858 """
834 859
835 860 def __init__(self, m1, m2):
836 861 super(differencematcher, self).__init__()
837 862 self._m1 = m1
838 863 self._m2 = m2
839 864 self.bad = m1.bad
840 865 self.traversedir = m1.traversedir
841 866
842 867 def matchfn(self, f):
843 868 return self._m1(f) and not self._m2(f)
844 869
845 870 @propertycache
846 871 def _files(self):
847 872 if self.isexact():
848 873 return [f for f in self._m1.files() if self(f)]
849 874 # If m1 is not an exact matcher, we can't easily figure out the set of
850 875 # files, because its files() are not always files. For example, if
851 876 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
852 877 # want to remove "dir" from the set even though it would match m2,
853 878 # because the "dir" in m1 may not be a file.
854 879 return self._m1.files()
855 880
856 881 def visitdir(self, dir):
857 882 if self._m2.visitdir(dir) == b'all':
858 883 return False
859 884 elif not self._m2.visitdir(dir):
860 885 # m2 does not match dir, we can return 'all' here if possible
861 886 return self._m1.visitdir(dir)
862 887 return bool(self._m1.visitdir(dir))
863 888
864 889 def visitchildrenset(self, dir):
865 890 m2_set = self._m2.visitchildrenset(dir)
866 891 if m2_set == b'all':
867 892 return set()
868 893 m1_set = self._m1.visitchildrenset(dir)
869 894 # Possible values for m1: 'all', 'this', set(...), set()
870 895 # Possible values for m2: 'this', set(...), set()
871 896 # If m2 has nothing under here that we care about, return m1, even if
872 897 # it's 'all'. This is a change in behavior from visitdir, which would
873 898 # return True, not 'all', for some reason.
874 899 if not m2_set:
875 900 return m1_set
876 901 if m1_set in [b'all', b'this']:
877 902 # Never return 'all' here if m2_set is any kind of non-empty (either
878 903 # 'this' or set(foo)), since m2 might return set() for a
879 904 # subdirectory.
880 905 return b'this'
881 906 # Possible values for m1: set(...), set()
882 907 # Possible values for m2: 'this', set(...)
883 908 # We ignore m2's set results. They're possibly incorrect:
884 909 # m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset(''):
885 910 # m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd
886 911 # return set(), which is *not* correct, we still need to visit 'dir'!
887 912 return m1_set
888 913
889 914 def isexact(self):
890 915 return self._m1.isexact()
891 916
892 917 @encoding.strmethod
893 918 def __repr__(self):
894 919 return b'<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2)
895 920
896 921
897 922 def intersectmatchers(m1, m2):
898 923 """Composes two matchers by matching if both of them match.
899 924
900 925 The second matcher's non-matching-attributes (bad, traversedir) are ignored.
901 926 """
902 927 if m1 is None or m2 is None:
903 928 return m1 or m2
904 929 if m1.always():
905 930 m = copy.copy(m2)
906 931 # TODO: Consider encapsulating these things in a class so there's only
907 932 # one thing to copy from m1.
908 933 m.bad = m1.bad
909 934 m.traversedir = m1.traversedir
910 935 return m
911 936 if m2.always():
912 937 m = copy.copy(m1)
913 938 return m
914 939 return intersectionmatcher(m1, m2)
915 940
916 941
917 942 class intersectionmatcher(basematcher):
918 943 def __init__(self, m1, m2):
919 944 super(intersectionmatcher, self).__init__()
920 945 self._m1 = m1
921 946 self._m2 = m2
922 947 self.bad = m1.bad
923 948 self.traversedir = m1.traversedir
924 949
925 950 @propertycache
926 951 def _files(self):
927 952 if self.isexact():
928 953 m1, m2 = self._m1, self._m2
929 954 if not m1.isexact():
930 955 m1, m2 = m2, m1
931 956 return [f for f in m1.files() if m2(f)]
932 957 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
933 958 # the set of files, because their files() are not always files. For
934 959 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
935 960 # "path:dir2", we don't want to remove "dir2" from the set.
936 961 return self._m1.files() + self._m2.files()
937 962
938 963 def matchfn(self, f):
939 964 return self._m1(f) and self._m2(f)
940 965
941 966 def visitdir(self, dir):
942 967 visit1 = self._m1.visitdir(dir)
943 968 if visit1 == b'all':
944 969 return self._m2.visitdir(dir)
945 970 # bool() because visit1=True + visit2='all' should not be 'all'
946 971 return bool(visit1 and self._m2.visitdir(dir))
947 972
948 973 def visitchildrenset(self, dir):
949 974 m1_set = self._m1.visitchildrenset(dir)
950 975 if not m1_set:
951 976 return set()
952 977 m2_set = self._m2.visitchildrenset(dir)
953 978 if not m2_set:
954 979 return set()
955 980
956 981 if m1_set == b'all':
957 982 return m2_set
958 983 elif m2_set == b'all':
959 984 return m1_set
960 985
961 986 if m1_set == b'this' or m2_set == b'this':
962 987 return b'this'
963 988
964 989 assert isinstance(m1_set, set) and isinstance(m2_set, set)
965 990 return m1_set.intersection(m2_set)
966 991
967 992 def always(self):
968 993 return self._m1.always() and self._m2.always()
969 994
970 995 def isexact(self):
971 996 return self._m1.isexact() or self._m2.isexact()
972 997
973 998 @encoding.strmethod
974 999 def __repr__(self):
975 1000 return b'<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2)
976 1001
977 1002
978 1003 class subdirmatcher(basematcher):
979 1004 """Adapt a matcher to work on a subdirectory only.
980 1005
981 1006 The paths are remapped to remove/insert the path as needed:
982 1007
983 1008 >>> from . import pycompat
984 1009 >>> m1 = match(util.localpath(b'/root'), b'', [b'a.txt', b'sub/b.txt'], auditor=lambda name: None)
985 1010 >>> m2 = subdirmatcher(b'sub', m1)
986 1011 >>> m2(b'a.txt')
987 1012 False
988 1013 >>> m2(b'b.txt')
989 1014 True
990 1015 >>> m2.matchfn(b'a.txt')
991 1016 False
992 1017 >>> m2.matchfn(b'b.txt')
993 1018 True
994 1019 >>> m2.files()
995 1020 ['b.txt']
996 1021 >>> m2.exact(b'b.txt')
997 1022 True
998 1023 >>> def bad(f, msg):
999 1024 ... print(pycompat.sysstr(b"%s: %s" % (f, msg)))
1000 1025 >>> m1.bad = bad
1001 1026 >>> m2.bad(b'x.txt', b'No such file')
1002 1027 sub/x.txt: No such file
1003 1028 """
1004 1029
1005 1030 def __init__(self, path, matcher):
1006 1031 super(subdirmatcher, self).__init__()
1007 1032 self._path = path
1008 1033 self._matcher = matcher
1009 1034 self._always = matcher.always()
1010 1035
1011 1036 self._files = [
1012 1037 f[len(path) + 1 :]
1013 1038 for f in matcher._files
1014 1039 if f.startswith(path + b"/")
1015 1040 ]
1016 1041
1017 1042 # If the parent repo had a path to this subrepo and the matcher is
1018 1043 # a prefix matcher, this submatcher always matches.
1019 1044 if matcher.prefix():
1020 1045 self._always = any(f == path for f in matcher._files)
1021 1046
1022 1047 def bad(self, f, msg):
1023 1048 self._matcher.bad(self._path + b"/" + f, msg)
1024 1049
1025 1050 def matchfn(self, f):
1026 1051 # Some information is lost in the superclass's constructor, so we
1027 1052 # can not accurately create the matching function for the subdirectory
1028 1053 # from the inputs. Instead, we override matchfn() and visitdir() to
1029 1054 # call the original matcher with the subdirectory path prepended.
1030 1055 return self._matcher.matchfn(self._path + b"/" + f)
1031 1056
1032 1057 def visitdir(self, dir):
1033 1058 if dir == b'':
1034 1059 dir = self._path
1035 1060 else:
1036 1061 dir = self._path + b"/" + dir
1037 1062 return self._matcher.visitdir(dir)
1038 1063
1039 1064 def visitchildrenset(self, dir):
1040 1065 if dir == b'':
1041 1066 dir = self._path
1042 1067 else:
1043 1068 dir = self._path + b"/" + dir
1044 1069 return self._matcher.visitchildrenset(dir)
1045 1070
1046 1071 def always(self):
1047 1072 return self._always
1048 1073
1049 1074 def prefix(self):
1050 1075 return self._matcher.prefix() and not self._always
1051 1076
1052 1077 @encoding.strmethod
1053 1078 def __repr__(self):
1054 1079 return b'<subdirmatcher path=%r, matcher=%r>' % (
1055 1080 self._path,
1056 1081 self._matcher,
1057 1082 )
1058 1083
1059 1084
1060 1085 class prefixdirmatcher(basematcher):
1061 1086 """Adapt a matcher to work on a parent directory.
1062 1087
1063 1088 The matcher's non-matching-attributes (bad, traversedir) are ignored.
1064 1089
1065 1090 The prefix path should usually be the relative path from the root of
1066 1091 this matcher to the root of the wrapped matcher.
1067 1092
1068 1093 >>> m1 = match(util.localpath(b'/root/d/e'), b'f', [b'../a.txt', b'b.txt'], auditor=lambda name: None)
1069 1094 >>> m2 = prefixdirmatcher(b'd/e', m1)
1070 1095 >>> m2(b'a.txt')
1071 1096 False
1072 1097 >>> m2(b'd/e/a.txt')
1073 1098 True
1074 1099 >>> m2(b'd/e/b.txt')
1075 1100 False
1076 1101 >>> m2.files()
1077 1102 ['d/e/a.txt', 'd/e/f/b.txt']
1078 1103 >>> m2.exact(b'd/e/a.txt')
1079 1104 True
1080 1105 >>> m2.visitdir(b'd')
1081 1106 True
1082 1107 >>> m2.visitdir(b'd/e')
1083 1108 True
1084 1109 >>> m2.visitdir(b'd/e/f')
1085 1110 True
1086 1111 >>> m2.visitdir(b'd/e/g')
1087 1112 False
1088 1113 >>> m2.visitdir(b'd/ef')
1089 1114 False
1090 1115 """
1091 1116
1092 1117 def __init__(self, path, matcher, badfn=None):
1093 1118 super(prefixdirmatcher, self).__init__(badfn)
1094 1119 if not path:
1095 1120 raise error.ProgrammingError(b'prefix path must not be empty')
1096 1121 self._path = path
1097 1122 self._pathprefix = path + b'/'
1098 1123 self._matcher = matcher
1099 1124
1100 1125 @propertycache
1101 1126 def _files(self):
1102 1127 return [self._pathprefix + f for f in self._matcher._files]
1103 1128
1104 1129 def matchfn(self, f):
1105 1130 if not f.startswith(self._pathprefix):
1106 1131 return False
1107 1132 return self._matcher.matchfn(f[len(self._pathprefix) :])
1108 1133
1109 1134 @propertycache
1110 1135 def _pathdirs(self):
1111 1136 return set(pathutil.finddirs(self._path))
1112 1137
1113 1138 def visitdir(self, dir):
1114 1139 if dir == self._path:
1115 1140 return self._matcher.visitdir(b'')
1116 1141 if dir.startswith(self._pathprefix):
1117 1142 return self._matcher.visitdir(dir[len(self._pathprefix) :])
1118 1143 return dir in self._pathdirs
1119 1144
1120 1145 def visitchildrenset(self, dir):
1121 1146 if dir == self._path:
1122 1147 return self._matcher.visitchildrenset(b'')
1123 1148 if dir.startswith(self._pathprefix):
1124 1149 return self._matcher.visitchildrenset(dir[len(self._pathprefix) :])
1125 1150 if dir in self._pathdirs:
1126 1151 return b'this'
1127 1152 return set()
1128 1153
1129 1154 def isexact(self):
1130 1155 return self._matcher.isexact()
1131 1156
1132 1157 def prefix(self):
1133 1158 return self._matcher.prefix()
1134 1159
1135 1160 @encoding.strmethod
1136 1161 def __repr__(self):
1137 1162 return b'<prefixdirmatcher path=%r, matcher=%r>' % (
1138 1163 pycompat.bytestr(self._path),
1139 1164 self._matcher,
1140 1165 )
1141 1166
1142 1167
1143 1168 class unionmatcher(basematcher):
1144 1169 """A matcher that is the union of several matchers.
1145 1170
1146 1171 The non-matching-attributes (bad, traversedir) are taken from the first
1147 1172 matcher.
1148 1173 """
1149 1174
1150 1175 def __init__(self, matchers):
1151 1176 m1 = matchers[0]
1152 1177 super(unionmatcher, self).__init__()
1153 1178 self.traversedir = m1.traversedir
1154 1179 self._matchers = matchers
1155 1180
1156 1181 def matchfn(self, f):
1157 1182 for match in self._matchers:
1158 1183 if match(f):
1159 1184 return True
1160 1185 return False
1161 1186
1162 1187 def visitdir(self, dir):
1163 1188 r = False
1164 1189 for m in self._matchers:
1165 1190 v = m.visitdir(dir)
1166 1191 if v == b'all':
1167 1192 return v
1168 1193 r |= v
1169 1194 return r
1170 1195
1171 1196 def visitchildrenset(self, dir):
1172 1197 r = set()
1173 1198 this = False
1174 1199 for m in self._matchers:
1175 1200 v = m.visitchildrenset(dir)
1176 1201 if not v:
1177 1202 continue
1178 1203 if v == b'all':
1179 1204 return v
1180 1205 if this or v == b'this':
1181 1206 this = True
1182 1207 # don't break, we might have an 'all' in here.
1183 1208 continue
1184 1209 assert isinstance(v, set)
1185 1210 r = r.union(v)
1186 1211 if this:
1187 1212 return b'this'
1188 1213 return r
1189 1214
1190 1215 @encoding.strmethod
1191 1216 def __repr__(self):
1192 1217 return b'<unionmatcher matchers=%r>' % self._matchers
1193 1218
1194 1219
1195 1220 def patkind(pattern, default=None):
1196 1221 r"""If pattern is 'kind:pat' with a known kind, return kind.
1197 1222
1198 1223 >>> patkind(br're:.*\.c$')
1199 1224 're'
1200 1225 >>> patkind(b'glob:*.c')
1201 1226 'glob'
1202 1227 >>> patkind(b'relpath:test.py')
1203 1228 'relpath'
1204 1229 >>> patkind(b'main.py')
1205 1230 >>> patkind(b'main.py', default=b're')
1206 1231 're'
1207 1232 """
1208 1233 return _patsplit(pattern, default)[0]
1209 1234
1210 1235
1211 1236 def _patsplit(pattern, default):
1212 1237 """Split a string into the optional pattern kind prefix and the actual
1213 1238 pattern."""
1214 1239 if b':' in pattern:
1215 1240 kind, pat = pattern.split(b':', 1)
1216 1241 if kind in allpatternkinds:
1217 1242 return kind, pat
1218 1243 return default, pattern
1219 1244
1220 1245
1221 1246 def _globre(pat):
1222 1247 r"""Convert an extended glob string to a regexp string.
1223 1248
1224 1249 >>> from . import pycompat
1225 1250 >>> def bprint(s):
1226 1251 ... print(pycompat.sysstr(s))
1227 1252 >>> bprint(_globre(br'?'))
1228 1253 .
1229 1254 >>> bprint(_globre(br'*'))
1230 1255 [^/]*
1231 1256 >>> bprint(_globre(br'**'))
1232 1257 .*
1233 1258 >>> bprint(_globre(br'**/a'))
1234 1259 (?:.*/)?a
1235 1260 >>> bprint(_globre(br'a/**/b'))
1236 1261 a/(?:.*/)?b
1237 1262 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
1238 1263 [a*?!^][\^b][^c]
1239 1264 >>> bprint(_globre(br'{a,b}'))
1240 1265 (?:a|b)
1241 1266 >>> bprint(_globre(br'.\*\?'))
1242 1267 \.\*\?
1243 1268 """
1244 1269 i, n = 0, len(pat)
1245 1270 res = b''
1246 1271 group = 0
1247 1272 escape = util.stringutil.regexbytesescapemap.get
1248 1273
1249 1274 def peek():
1250 1275 return i < n and pat[i : i + 1]
1251 1276
1252 1277 while i < n:
1253 1278 c = pat[i : i + 1]
1254 1279 i += 1
1255 1280 if c not in b'*?[{},\\':
1256 1281 res += escape(c, c)
1257 1282 elif c == b'*':
1258 1283 if peek() == b'*':
1259 1284 i += 1
1260 1285 if peek() == b'/':
1261 1286 i += 1
1262 1287 res += b'(?:.*/)?'
1263 1288 else:
1264 1289 res += b'.*'
1265 1290 else:
1266 1291 res += b'[^/]*'
1267 1292 elif c == b'?':
1268 1293 res += b'.'
1269 1294 elif c == b'[':
1270 1295 j = i
1271 1296 if j < n and pat[j : j + 1] in b'!]':
1272 1297 j += 1
1273 1298 while j < n and pat[j : j + 1] != b']':
1274 1299 j += 1
1275 1300 if j >= n:
1276 1301 res += b'\\['
1277 1302 else:
1278 1303 stuff = pat[i:j].replace(b'\\', b'\\\\')
1279 1304 i = j + 1
1280 1305 if stuff[0:1] == b'!':
1281 1306 stuff = b'^' + stuff[1:]
1282 1307 elif stuff[0:1] == b'^':
1283 1308 stuff = b'\\' + stuff
1284 1309 res = b'%s[%s]' % (res, stuff)
1285 1310 elif c == b'{':
1286 1311 group += 1
1287 1312 res += b'(?:'
1288 1313 elif c == b'}' and group:
1289 1314 res += b')'
1290 1315 group -= 1
1291 1316 elif c == b',' and group:
1292 1317 res += b'|'
1293 1318 elif c == b'\\':
1294 1319 p = peek()
1295 1320 if p:
1296 1321 i += 1
1297 1322 res += escape(p, p)
1298 1323 else:
1299 1324 res += escape(c, c)
1300 1325 else:
1301 1326 res += escape(c, c)
1302 1327 return res
1303 1328
1304 1329
1305 1330 def _regex(kind, pat, globsuffix):
1306 1331 """Convert a (normalized) pattern of any kind into a
1307 1332 regular expression.
1308 1333 globsuffix is appended to the regexp of globs."""
1309 1334 if not pat and kind in (b'glob', b'relpath'):
1310 1335 return b''
1311 1336 if kind == b're':
1312 1337 return pat
1313 1338 if kind in (b'path', b'relpath'):
1314 1339 if pat == b'.':
1315 1340 return b''
1316 1341 return util.stringutil.reescape(pat) + b'(?:/|$)'
1317 1342 if kind == b'rootfilesin':
1318 1343 if pat == b'.':
1319 1344 escaped = b''
1320 1345 else:
1321 1346 # Pattern is a directory name.
1322 1347 escaped = util.stringutil.reescape(pat) + b'/'
1323 1348 # Anything after the pattern must be a non-directory.
1324 1349 return escaped + b'[^/]+$'
1325 1350 if kind == b'relglob':
1326 1351 globre = _globre(pat)
1327 1352 if globre.startswith(b'[^/]*'):
1328 1353 # When pat has the form *XYZ (common), make the returned regex more
1329 1354 # legible by returning the regex for **XYZ instead of **/*XYZ.
1330 1355 return b'.*' + globre[len(b'[^/]*') :] + globsuffix
1331 1356 return b'(?:|.*/)' + globre + globsuffix
1332 1357 if kind == b'relre':
1333 1358 if pat.startswith(b'^'):
1334 1359 return pat
1335 1360 return b'.*' + pat
1336 1361 if kind in (b'glob', b'rootglob'):
1337 1362 return _globre(pat) + globsuffix
1338 1363 raise error.ProgrammingError(b'not a regex pattern: %s:%s' % (kind, pat))
1339 1364
1340 1365
1341 1366 def _buildmatch(kindpats, globsuffix, root):
1342 1367 """Return regexp string and a matcher function for kindpats.
1343 1368 globsuffix is appended to the regexp of globs."""
1344 1369 matchfuncs = []
1345 1370
1346 1371 subincludes, kindpats = _expandsubinclude(kindpats, root)
1347 1372 if subincludes:
1348 1373 submatchers = {}
1349 1374
1350 1375 def matchsubinclude(f):
1351 1376 for prefix, matcherargs in subincludes:
1352 1377 if f.startswith(prefix):
1353 1378 mf = submatchers.get(prefix)
1354 1379 if mf is None:
1355 1380 mf = match(*matcherargs)
1356 1381 submatchers[prefix] = mf
1357 1382
1358 1383 if mf(f[len(prefix) :]):
1359 1384 return True
1360 1385 return False
1361 1386
1362 1387 matchfuncs.append(matchsubinclude)
1363 1388
1364 1389 regex = b''
1365 1390 if kindpats:
1366 1391 if all(k == b'rootfilesin' for k, p, s in kindpats):
1367 1392 dirs = {p for k, p, s in kindpats}
1368 1393
1369 1394 def mf(f):
1370 1395 i = f.rfind(b'/')
1371 1396 if i >= 0:
1372 1397 dir = f[:i]
1373 1398 else:
1374 1399 dir = b'.'
1375 1400 return dir in dirs
1376 1401
1377 1402 regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))
1378 1403 matchfuncs.append(mf)
1379 1404 else:
1380 1405 regex, mf = _buildregexmatch(kindpats, globsuffix)
1381 1406 matchfuncs.append(mf)
1382 1407
1383 1408 if len(matchfuncs) == 1:
1384 1409 return regex, matchfuncs[0]
1385 1410 else:
1386 1411 return regex, lambda f: any(mf(f) for mf in matchfuncs)
1387 1412
1388 1413
1389 1414 MAX_RE_SIZE = 20000
1390 1415
1391 1416
1392 1417 def _joinregexes(regexps):
1393 1418 """gather multiple regular expressions into a single one"""
1394 1419 return b'|'.join(regexps)
1395 1420
1396 1421
1397 1422 def _buildregexmatch(kindpats, globsuffix):
1398 1423 """Build a match function from a list of kinds and kindpats,
1399 1424 return regexp string and a matcher function.
1400 1425
1401 1426 Test too large input
1402 1427 >>> _buildregexmatch([
1403 1428 ... (b'relglob', b'?' * MAX_RE_SIZE, b'')
1404 1429 ... ], b'$')
1405 1430 Traceback (most recent call last):
1406 1431 ...
1407 1432 Abort: matcher pattern is too long (20009 bytes)
1408 1433 """
1409 1434 try:
1410 1435 allgroups = []
1411 1436 regexps = [_regex(k, p, globsuffix) for (k, p, s) in kindpats]
1412 1437 fullregexp = _joinregexes(regexps)
1413 1438
1414 1439 startidx = 0
1415 1440 groupsize = 0
1416 1441 for idx, r in enumerate(regexps):
1417 1442 piecesize = len(r)
1418 1443 if piecesize > MAX_RE_SIZE:
1419 1444 msg = _(b"matcher pattern is too long (%d bytes)") % piecesize
1420 1445 raise error.Abort(msg)
1421 1446 elif (groupsize + piecesize) > MAX_RE_SIZE:
1422 1447 group = regexps[startidx:idx]
1423 1448 allgroups.append(_joinregexes(group))
1424 1449 startidx = idx
1425 1450 groupsize = 0
1426 1451 groupsize += piecesize + 1
1427 1452
1428 1453 if startidx == 0:
1429 1454 matcher = _rematcher(fullregexp)
1430 1455 func = lambda s: bool(matcher(s))
1431 1456 else:
1432 1457 group = regexps[startidx:]
1433 1458 allgroups.append(_joinregexes(group))
1434 1459 allmatchers = [_rematcher(g) for g in allgroups]
1435 1460 func = lambda s: any(m(s) for m in allmatchers)
1436 1461 return fullregexp, func
1437 1462 except re.error:
1438 1463 for k, p, s in kindpats:
1439 1464 try:
1440 1465 _rematcher(_regex(k, p, globsuffix))
1441 1466 except re.error:
1442 1467 if s:
1443 1468 raise error.Abort(
1444 1469 _(b"%s: invalid pattern (%s): %s") % (s, k, p)
1445 1470 )
1446 1471 else:
1447 1472 raise error.Abort(_(b"invalid pattern (%s): %s") % (k, p))
1448 1473 raise error.Abort(_(b"invalid pattern"))
1449 1474
1450 1475
1451 1476 def _patternrootsanddirs(kindpats):
1452 1477 """Returns roots and directories corresponding to each pattern.
1453 1478
1454 1479 This calculates the roots and directories exactly matching the patterns and
1455 1480 returns a tuple of (roots, dirs) for each. It does not return other
1456 1481 directories which may also need to be considered, like the parent
1457 1482 directories.
1458 1483 """
1459 1484 r = []
1460 1485 d = []
1461 1486 for kind, pat, source in kindpats:
1462 1487 if kind in (b'glob', b'rootglob'): # find the non-glob prefix
1463 1488 root = []
1464 1489 for p in pat.split(b'/'):
1465 1490 if b'[' in p or b'{' in p or b'*' in p or b'?' in p:
1466 1491 break
1467 1492 root.append(p)
1468 1493 r.append(b'/'.join(root))
1469 1494 elif kind in (b'relpath', b'path'):
1470 1495 if pat == b'.':
1471 1496 pat = b''
1472 1497 r.append(pat)
1473 1498 elif kind in (b'rootfilesin',):
1474 1499 if pat == b'.':
1475 1500 pat = b''
1476 1501 d.append(pat)
1477 1502 else: # relglob, re, relre
1478 1503 r.append(b'')
1479 1504 return r, d
1480 1505
1481 1506
1482 1507 def _roots(kindpats):
1483 1508 '''Returns root directories to match recursively from the given patterns.'''
1484 1509 roots, dirs = _patternrootsanddirs(kindpats)
1485 1510 return roots
1486 1511
1487 1512
1488 1513 def _rootsdirsandparents(kindpats):
1489 1514 """Returns roots and exact directories from patterns.
1490 1515
1491 1516 `roots` are directories to match recursively, `dirs` should
1492 1517 be matched non-recursively, and `parents` are the implicitly required
1493 1518 directories to walk to items in either roots or dirs.
1494 1519
1495 1520 Returns a tuple of (roots, dirs, parents).
1496 1521
1497 1522 >>> r = _rootsdirsandparents(
1498 1523 ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
1499 1524 ... (b'glob', b'g*', b'')])
1500 1525 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1501 1526 (['g/h', 'g/h', ''], []) ['', 'g']
1502 1527 >>> r = _rootsdirsandparents(
1503 1528 ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
1504 1529 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1505 1530 ([], ['g/h', '']) ['', 'g']
1506 1531 >>> r = _rootsdirsandparents(
1507 1532 ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
1508 1533 ... (b'path', b'', b'')])
1509 1534 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1510 1535 (['r', 'p/p', ''], []) ['', 'p']
1511 1536 >>> r = _rootsdirsandparents(
1512 1537 ... [(b'relglob', b'rg*', b''), (b're', b're/', b''),
1513 1538 ... (b'relre', b'rr', b'')])
1514 1539 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1515 1540 (['', '', ''], []) ['']
1516 1541 """
1517 1542 r, d = _patternrootsanddirs(kindpats)
1518 1543
1519 1544 p = set()
1520 1545 # Add the parents as non-recursive/exact directories, since they must be
1521 1546 # scanned to get to either the roots or the other exact directories.
1522 1547 p.update(pathutil.dirs(d))
1523 1548 p.update(pathutil.dirs(r))
1524 1549
1525 1550 # FIXME: all uses of this function convert these to sets, do so before
1526 1551 # returning.
1527 1552 # FIXME: all uses of this function do not need anything in 'roots' and
1528 1553 # 'dirs' to also be in 'parents', consider removing them before returning.
1529 1554 return r, d, p
1530 1555
1531 1556
1532 1557 def _explicitfiles(kindpats):
1533 1558 """Returns the potential explicit filenames from the patterns.
1534 1559
1535 1560 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
1536 1561 ['foo/bar']
1537 1562 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
1538 1563 []
1539 1564 """
1540 1565 # Keep only the pattern kinds where one can specify filenames (vs only
1541 1566 # directory names).
1542 1567 filable = [kp for kp in kindpats if kp[0] not in (b'rootfilesin',)]
1543 1568 return _roots(filable)
1544 1569
1545 1570
1546 1571 def _prefix(kindpats):
1547 1572 '''Whether all the patterns match a prefix (i.e. recursively)'''
1548 1573 for kind, pat, source in kindpats:
1549 1574 if kind not in (b'path', b'relpath'):
1550 1575 return False
1551 1576 return True
1552 1577
1553 1578
1554 1579 _commentre = None
1555 1580
1556 1581
1557 1582 def readpatternfile(filepath, warn, sourceinfo=False):
1558 1583 """parse a pattern file, returning a list of
1559 1584 patterns. These patterns should be given to compile()
1560 1585 to be validated and converted into a match function.
1561 1586
1562 1587 trailing white space is dropped.
1563 1588 the escape character is backslash.
1564 1589 comments start with #.
1565 1590 empty lines are skipped.
1566 1591
1567 1592 lines can be of the following formats:
1568 1593
1569 1594 syntax: regexp # defaults following lines to non-rooted regexps
1570 1595 syntax: glob # defaults following lines to non-rooted globs
1571 1596 re:pattern # non-rooted regular expression
1572 1597 glob:pattern # non-rooted glob
1573 1598 rootglob:pat # rooted glob (same root as ^ in regexps)
1574 1599 pattern # pattern of the current default type
1575 1600
1576 1601 if sourceinfo is set, returns a list of tuples:
1577 1602 (pattern, lineno, originalline).
1578 1603 This is useful to debug ignore patterns.
1579 1604 """
1580 1605
1581 1606 syntaxes = {
1582 1607 b're': b'relre:',
1583 1608 b'regexp': b'relre:',
1584 1609 b'glob': b'relglob:',
1585 1610 b'rootglob': b'rootglob:',
1586 1611 b'include': b'include',
1587 1612 b'subinclude': b'subinclude',
1588 1613 }
1589 1614 syntax = b'relre:'
1590 1615 patterns = []
1591 1616
1592 1617 fp = open(filepath, b'rb')
1593 1618 for lineno, line in enumerate(util.iterfile(fp), start=1):
1594 1619 if b"#" in line:
1595 1620 global _commentre
1596 1621 if not _commentre:
1597 1622 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
1598 1623 # remove comments prefixed by an even number of escapes
1599 1624 m = _commentre.search(line)
1600 1625 if m:
1601 1626 line = line[: m.end(1)]
1602 1627 # fixup properly escaped comments that survived the above
1603 1628 line = line.replace(b"\\#", b"#")
1604 1629 line = line.rstrip()
1605 1630 if not line:
1606 1631 continue
1607 1632
1608 1633 if line.startswith(b'syntax:'):
1609 1634 s = line[7:].strip()
1610 1635 try:
1611 1636 syntax = syntaxes[s]
1612 1637 except KeyError:
1613 1638 if warn:
1614 1639 warn(
1615 1640 _(b"%s: ignoring invalid syntax '%s'\n") % (filepath, s)
1616 1641 )
1617 1642 continue
1618 1643
1619 1644 linesyntax = syntax
1620 1645 for s, rels in pycompat.iteritems(syntaxes):
1621 1646 if line.startswith(rels):
1622 1647 linesyntax = rels
1623 1648 line = line[len(rels) :]
1624 1649 break
1625 1650 elif line.startswith(s + b':'):
1626 1651 linesyntax = rels
1627 1652 line = line[len(s) + 1 :]
1628 1653 break
1629 1654 if sourceinfo:
1630 1655 patterns.append((linesyntax + line, lineno, line))
1631 1656 else:
1632 1657 patterns.append(linesyntax + line)
1633 1658 fp.close()
1634 1659 return patterns
General Comments 0
You need to be logged in to leave comments. Login now