##// END OF EJS Templates
match: skip walking up the directory hierarchy if the number of pats are small...
Kyle Lippincott -
r46614:c4c7a6b6 default
parent child Browse files
Show More
@@ -1,1617 +1,1637 b''
1 1 # match.py - filename matching
2 2 #
3 3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import, print_function
9 9
10 10 import copy
11 11 import itertools
12 12 import os
13 13 import re
14 14
15 15 from .i18n import _
16 16 from .pycompat import open
17 17 from . import (
18 18 encoding,
19 19 error,
20 20 pathutil,
21 21 policy,
22 22 pycompat,
23 23 util,
24 24 )
25 25 from .utils import stringutil
26 26
27 27 rustmod = policy.importrust('dirstate')
28 28
29 29 allpatternkinds = (
30 30 b're',
31 31 b'glob',
32 32 b'path',
33 33 b'relglob',
34 34 b'relpath',
35 35 b'relre',
36 36 b'rootglob',
37 37 b'listfile',
38 38 b'listfile0',
39 39 b'set',
40 40 b'include',
41 41 b'subinclude',
42 42 b'rootfilesin',
43 43 )
44 44 cwdrelativepatternkinds = (b'relpath', b'glob')
45 45
46 46 propertycache = util.propertycache
47 47
48 48
49 49 def _rematcher(regex):
50 50 """compile the regexp with the best available regexp engine and return a
51 51 matcher function"""
52 52 m = util.re.compile(regex)
53 53 try:
54 54 # slightly faster, provided by facebook's re2 bindings
55 55 return m.test_match
56 56 except AttributeError:
57 57 return m.match
58 58
59 59
60 60 def _expandsets(cwd, kindpats, ctx=None, listsubrepos=False, badfn=None):
61 61 '''Returns the kindpats list with the 'set' patterns expanded to matchers'''
62 62 matchers = []
63 63 other = []
64 64
65 65 for kind, pat, source in kindpats:
66 66 if kind == b'set':
67 67 if ctx is None:
68 68 raise error.ProgrammingError(
69 69 b"fileset expression with no context"
70 70 )
71 71 matchers.append(ctx.matchfileset(cwd, pat, badfn=badfn))
72 72
73 73 if listsubrepos:
74 74 for subpath in ctx.substate:
75 75 sm = ctx.sub(subpath).matchfileset(cwd, pat, badfn=badfn)
76 76 pm = prefixdirmatcher(subpath, sm, badfn=badfn)
77 77 matchers.append(pm)
78 78
79 79 continue
80 80 other.append((kind, pat, source))
81 81 return matchers, other
82 82
83 83
84 84 def _expandsubinclude(kindpats, root):
85 85 """Returns the list of subinclude matcher args and the kindpats without the
86 86 subincludes in it."""
87 87 relmatchers = []
88 88 other = []
89 89
90 90 for kind, pat, source in kindpats:
91 91 if kind == b'subinclude':
92 92 sourceroot = pathutil.dirname(util.normpath(source))
93 93 pat = util.pconvert(pat)
94 94 path = pathutil.join(sourceroot, pat)
95 95
96 96 newroot = pathutil.dirname(path)
97 97 matcherargs = (newroot, b'', [], [b'include:%s' % path])
98 98
99 99 prefix = pathutil.canonpath(root, root, newroot)
100 100 if prefix:
101 101 prefix += b'/'
102 102 relmatchers.append((prefix, matcherargs))
103 103 else:
104 104 other.append((kind, pat, source))
105 105
106 106 return relmatchers, other
107 107
108 108
109 109 def _kindpatsalwaysmatch(kindpats):
110 110 """Checks whether the kindspats match everything, as e.g.
111 111 'relpath:.' does.
112 112 """
113 113 for kind, pat, source in kindpats:
114 114 if pat != b'' or kind not in [b'relpath', b'glob']:
115 115 return False
116 116 return True
117 117
118 118
119 119 def _buildkindpatsmatcher(
120 120 matchercls,
121 121 root,
122 122 cwd,
123 123 kindpats,
124 124 ctx=None,
125 125 listsubrepos=False,
126 126 badfn=None,
127 127 ):
128 128 matchers = []
129 129 fms, kindpats = _expandsets(
130 130 cwd,
131 131 kindpats,
132 132 ctx=ctx,
133 133 listsubrepos=listsubrepos,
134 134 badfn=badfn,
135 135 )
136 136 if kindpats:
137 137 m = matchercls(root, kindpats, badfn=badfn)
138 138 matchers.append(m)
139 139 if fms:
140 140 matchers.extend(fms)
141 141 if not matchers:
142 142 return nevermatcher(badfn=badfn)
143 143 if len(matchers) == 1:
144 144 return matchers[0]
145 145 return unionmatcher(matchers)
146 146
147 147
148 148 def match(
149 149 root,
150 150 cwd,
151 151 patterns=None,
152 152 include=None,
153 153 exclude=None,
154 154 default=b'glob',
155 155 auditor=None,
156 156 ctx=None,
157 157 listsubrepos=False,
158 158 warn=None,
159 159 badfn=None,
160 160 icasefs=False,
161 161 ):
162 162 r"""build an object to match a set of file patterns
163 163
164 164 arguments:
165 165 root - the canonical root of the tree you're matching against
166 166 cwd - the current working directory, if relevant
167 167 patterns - patterns to find
168 168 include - patterns to include (unless they are excluded)
169 169 exclude - patterns to exclude (even if they are included)
170 170 default - if a pattern in patterns has no explicit type, assume this one
171 171 auditor - optional path auditor
172 172 ctx - optional changecontext
173 173 listsubrepos - if True, recurse into subrepositories
174 174 warn - optional function used for printing warnings
175 175 badfn - optional bad() callback for this matcher instead of the default
176 176 icasefs - make a matcher for wdir on case insensitive filesystems, which
177 177 normalizes the given patterns to the case in the filesystem
178 178
179 179 a pattern is one of:
180 180 'glob:<glob>' - a glob relative to cwd
181 181 're:<regexp>' - a regular expression
182 182 'path:<path>' - a path relative to repository root, which is matched
183 183 recursively
184 184 'rootfilesin:<path>' - a path relative to repository root, which is
185 185 matched non-recursively (will not match subdirectories)
186 186 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
187 187 'relpath:<path>' - a path relative to cwd
188 188 'relre:<regexp>' - a regexp that needn't match the start of a name
189 189 'set:<fileset>' - a fileset expression
190 190 'include:<path>' - a file of patterns to read and include
191 191 'subinclude:<path>' - a file of patterns to match against files under
192 192 the same directory
193 193 '<something>' - a pattern of the specified default type
194 194
195 195 >>> def _match(root, *args, **kwargs):
196 196 ... return match(util.localpath(root), *args, **kwargs)
197 197
198 198 Usually a patternmatcher is returned:
199 199 >>> _match(b'/foo', b'.', [b're:.*\.c$', b'path:foo/a', b'*.py'])
200 200 <patternmatcher patterns='.*\\.c$|foo/a(?:/|$)|[^/]*\\.py$'>
201 201
202 202 Combining 'patterns' with 'include' (resp. 'exclude') gives an
203 203 intersectionmatcher (resp. a differencematcher):
204 204 >>> type(_match(b'/foo', b'.', [b're:.*\.c$'], include=[b'path:lib']))
205 205 <class 'mercurial.match.intersectionmatcher'>
206 206 >>> type(_match(b'/foo', b'.', [b're:.*\.c$'], exclude=[b'path:build']))
207 207 <class 'mercurial.match.differencematcher'>
208 208
209 209 Notice that, if 'patterns' is empty, an alwaysmatcher is returned:
210 210 >>> _match(b'/foo', b'.', [])
211 211 <alwaysmatcher>
212 212
213 213 The 'default' argument determines which kind of pattern is assumed if a
214 214 pattern has no prefix:
215 215 >>> _match(b'/foo', b'.', [b'.*\.c$'], default=b're')
216 216 <patternmatcher patterns='.*\\.c$'>
217 217 >>> _match(b'/foo', b'.', [b'main.py'], default=b'relpath')
218 218 <patternmatcher patterns='main\\.py(?:/|$)'>
219 219 >>> _match(b'/foo', b'.', [b'main.py'], default=b're')
220 220 <patternmatcher patterns='main.py'>
221 221
222 222 The primary use of matchers is to check whether a value (usually a file
223 223 name) matches againset one of the patterns given at initialization. There
224 224 are two ways of doing this check.
225 225
226 226 >>> m = _match(b'/foo', b'', [b're:.*\.c$', b'relpath:a'])
227 227
228 228 1. Calling the matcher with a file name returns True if any pattern
229 229 matches that file name:
230 230 >>> m(b'a')
231 231 True
232 232 >>> m(b'main.c')
233 233 True
234 234 >>> m(b'test.py')
235 235 False
236 236
237 237 2. Using the exact() method only returns True if the file name matches one
238 238 of the exact patterns (i.e. not re: or glob: patterns):
239 239 >>> m.exact(b'a')
240 240 True
241 241 >>> m.exact(b'main.c')
242 242 False
243 243 """
244 244 assert os.path.isabs(root)
245 245 cwd = os.path.join(root, util.localpath(cwd))
246 246 normalize = _donormalize
247 247 if icasefs:
248 248 dirstate = ctx.repo().dirstate
249 249 dsnormalize = dirstate.normalize
250 250
251 251 def normalize(patterns, default, root, cwd, auditor, warn):
252 252 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
253 253 kindpats = []
254 254 for kind, pats, source in kp:
255 255 if kind not in (b're', b'relre'): # regex can't be normalized
256 256 p = pats
257 257 pats = dsnormalize(pats)
258 258
259 259 # Preserve the original to handle a case only rename.
260 260 if p != pats and p in dirstate:
261 261 kindpats.append((kind, p, source))
262 262
263 263 kindpats.append((kind, pats, source))
264 264 return kindpats
265 265
266 266 if patterns:
267 267 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
268 268 if _kindpatsalwaysmatch(kindpats):
269 269 m = alwaysmatcher(badfn)
270 270 else:
271 271 m = _buildkindpatsmatcher(
272 272 patternmatcher,
273 273 root,
274 274 cwd,
275 275 kindpats,
276 276 ctx=ctx,
277 277 listsubrepos=listsubrepos,
278 278 badfn=badfn,
279 279 )
280 280 else:
281 281 # It's a little strange that no patterns means to match everything.
282 282 # Consider changing this to match nothing (probably using nevermatcher).
283 283 m = alwaysmatcher(badfn)
284 284
285 285 if include:
286 286 kindpats = normalize(include, b'glob', root, cwd, auditor, warn)
287 287 im = _buildkindpatsmatcher(
288 288 includematcher,
289 289 root,
290 290 cwd,
291 291 kindpats,
292 292 ctx=ctx,
293 293 listsubrepos=listsubrepos,
294 294 badfn=None,
295 295 )
296 296 m = intersectmatchers(m, im)
297 297 if exclude:
298 298 kindpats = normalize(exclude, b'glob', root, cwd, auditor, warn)
299 299 em = _buildkindpatsmatcher(
300 300 includematcher,
301 301 root,
302 302 cwd,
303 303 kindpats,
304 304 ctx=ctx,
305 305 listsubrepos=listsubrepos,
306 306 badfn=None,
307 307 )
308 308 m = differencematcher(m, em)
309 309 return m
310 310
311 311
312 312 def exact(files, badfn=None):
313 313 return exactmatcher(files, badfn=badfn)
314 314
315 315
316 316 def always(badfn=None):
317 317 return alwaysmatcher(badfn)
318 318
319 319
320 320 def never(badfn=None):
321 321 return nevermatcher(badfn)
322 322
323 323
324 324 def badmatch(match, badfn):
325 325 """Make a copy of the given matcher, replacing its bad method with the given
326 326 one.
327 327 """
328 328 m = copy.copy(match)
329 329 m.bad = badfn
330 330 return m
331 331
332 332
333 333 def _donormalize(patterns, default, root, cwd, auditor=None, warn=None):
334 334 """Convert 'kind:pat' from the patterns list to tuples with kind and
335 335 normalized and rooted patterns and with listfiles expanded."""
336 336 kindpats = []
337 337 for kind, pat in [_patsplit(p, default) for p in patterns]:
338 338 if kind in cwdrelativepatternkinds:
339 339 pat = pathutil.canonpath(root, cwd, pat, auditor=auditor)
340 340 elif kind in (b'relglob', b'path', b'rootfilesin', b'rootglob'):
341 341 pat = util.normpath(pat)
342 342 elif kind in (b'listfile', b'listfile0'):
343 343 try:
344 344 files = util.readfile(pat)
345 345 if kind == b'listfile0':
346 346 files = files.split(b'\0')
347 347 else:
348 348 files = files.splitlines()
349 349 files = [f for f in files if f]
350 350 except EnvironmentError:
351 351 raise error.Abort(_(b"unable to read file list (%s)") % pat)
352 352 for k, p, source in _donormalize(
353 353 files, default, root, cwd, auditor, warn
354 354 ):
355 355 kindpats.append((k, p, pat))
356 356 continue
357 357 elif kind == b'include':
358 358 try:
359 359 fullpath = os.path.join(root, util.localpath(pat))
360 360 includepats = readpatternfile(fullpath, warn)
361 361 for k, p, source in _donormalize(
362 362 includepats, default, root, cwd, auditor, warn
363 363 ):
364 364 kindpats.append((k, p, source or pat))
365 365 except error.Abort as inst:
366 366 raise error.Abort(
367 367 b'%s: %s'
368 368 % (
369 369 pat,
370 370 inst.message,
371 371 ) # pytype: disable=unsupported-operands
372 372 )
373 373 except IOError as inst:
374 374 if warn:
375 375 warn(
376 376 _(b"skipping unreadable pattern file '%s': %s\n")
377 377 % (pat, stringutil.forcebytestr(inst.strerror))
378 378 )
379 379 continue
380 380 # else: re or relre - which cannot be normalized
381 381 kindpats.append((kind, pat, b''))
382 382 return kindpats
383 383
384 384
385 385 class basematcher(object):
386 386 def __init__(self, badfn=None):
387 387 if badfn is not None:
388 388 self.bad = badfn
389 389
390 390 def __call__(self, fn):
391 391 return self.matchfn(fn)
392 392
393 393 # Callbacks related to how the matcher is used by dirstate.walk.
394 394 # Subscribers to these events must monkeypatch the matcher object.
395 395 def bad(self, f, msg):
396 396 """Callback from dirstate.walk for each explicit file that can't be
397 397 found/accessed, with an error message."""
398 398
399 399 # If an traversedir is set, it will be called when a directory discovered
400 400 # by recursive traversal is visited.
401 401 traversedir = None
402 402
403 403 @propertycache
404 404 def _files(self):
405 405 return []
406 406
407 407 def files(self):
408 408 """Explicitly listed files or patterns or roots:
409 409 if no patterns or .always(): empty list,
410 410 if exact: list exact files,
411 411 if not .anypats(): list all files and dirs,
412 412 else: optimal roots"""
413 413 return self._files
414 414
415 415 @propertycache
416 416 def _fileset(self):
417 417 return set(self._files)
418 418
419 419 def exact(self, f):
420 420 '''Returns True if f is in .files().'''
421 421 return f in self._fileset
422 422
423 423 def matchfn(self, f):
424 424 return False
425 425
426 426 def visitdir(self, dir):
427 427 """Decides whether a directory should be visited based on whether it
428 428 has potential matches in it or one of its subdirectories. This is
429 429 based on the match's primary, included, and excluded patterns.
430 430
431 431 Returns the string 'all' if the given directory and all subdirectories
432 432 should be visited. Otherwise returns True or False indicating whether
433 433 the given directory should be visited.
434 434 """
435 435 return True
436 436
437 437 def visitchildrenset(self, dir):
438 438 """Decides whether a directory should be visited based on whether it
439 439 has potential matches in it or one of its subdirectories, and
440 440 potentially lists which subdirectories of that directory should be
441 441 visited. This is based on the match's primary, included, and excluded
442 442 patterns.
443 443
444 444 This function is very similar to 'visitdir', and the following mapping
445 445 can be applied:
446 446
447 447 visitdir | visitchildrenlist
448 448 ----------+-------------------
449 449 False | set()
450 450 'all' | 'all'
451 451 True | 'this' OR non-empty set of subdirs -or files- to visit
452 452
453 453 Example:
454 454 Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return
455 455 the following values (assuming the implementation of visitchildrenset
456 456 is capable of recognizing this; some implementations are not).
457 457
458 458 '' -> {'foo', 'qux'}
459 459 'baz' -> set()
460 460 'foo' -> {'bar'}
461 461 # Ideally this would be 'all', but since the prefix nature of matchers
462 462 # is applied to the entire matcher, we have to downgrade this to
463 463 # 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed
464 464 # in.
465 465 'foo/bar' -> 'this'
466 466 'qux' -> 'this'
467 467
468 468 Important:
469 469 Most matchers do not know if they're representing files or
470 470 directories. They see ['path:dir/f'] and don't know whether 'f' is a
471 471 file or a directory, so visitchildrenset('dir') for most matchers will
472 472 return {'f'}, but if the matcher knows it's a file (like exactmatcher
473 473 does), it may return 'this'. Do not rely on the return being a set
474 474 indicating that there are no files in this dir to investigate (or
475 475 equivalently that if there are files to investigate in 'dir' that it
476 476 will always return 'this').
477 477 """
478 478 return b'this'
479 479
480 480 def always(self):
481 481 """Matcher will match everything and .files() will be empty --
482 482 optimization might be possible."""
483 483 return False
484 484
485 485 def isexact(self):
486 486 """Matcher will match exactly the list of files in .files() --
487 487 optimization might be possible."""
488 488 return False
489 489
490 490 def prefix(self):
491 491 """Matcher will match the paths in .files() recursively --
492 492 optimization might be possible."""
493 493 return False
494 494
495 495 def anypats(self):
496 496 """None of .always(), .isexact(), and .prefix() is true --
497 497 optimizations will be difficult."""
498 498 return not self.always() and not self.isexact() and not self.prefix()
499 499
500 500
501 501 class alwaysmatcher(basematcher):
502 502 '''Matches everything.'''
503 503
504 504 def __init__(self, badfn=None):
505 505 super(alwaysmatcher, self).__init__(badfn)
506 506
507 507 def always(self):
508 508 return True
509 509
510 510 def matchfn(self, f):
511 511 return True
512 512
513 513 def visitdir(self, dir):
514 514 return b'all'
515 515
516 516 def visitchildrenset(self, dir):
517 517 return b'all'
518 518
519 519 def __repr__(self):
520 520 return r'<alwaysmatcher>'
521 521
522 522
523 523 class nevermatcher(basematcher):
524 524 '''Matches nothing.'''
525 525
526 526 def __init__(self, badfn=None):
527 527 super(nevermatcher, self).__init__(badfn)
528 528
529 529 # It's a little weird to say that the nevermatcher is an exact matcher
530 530 # or a prefix matcher, but it seems to make sense to let callers take
531 531 # fast paths based on either. There will be no exact matches, nor any
532 532 # prefixes (files() returns []), so fast paths iterating over them should
533 533 # be efficient (and correct).
534 534 def isexact(self):
535 535 return True
536 536
537 537 def prefix(self):
538 538 return True
539 539
540 540 def visitdir(self, dir):
541 541 return False
542 542
543 543 def visitchildrenset(self, dir):
544 544 return set()
545 545
546 546 def __repr__(self):
547 547 return r'<nevermatcher>'
548 548
549 549
550 550 class predicatematcher(basematcher):
551 551 """A matcher adapter for a simple boolean function"""
552 552
553 553 def __init__(self, predfn, predrepr=None, badfn=None):
554 554 super(predicatematcher, self).__init__(badfn)
555 555 self.matchfn = predfn
556 556 self._predrepr = predrepr
557 557
558 558 @encoding.strmethod
559 559 def __repr__(self):
560 560 s = stringutil.buildrepr(self._predrepr) or pycompat.byterepr(
561 561 self.matchfn
562 562 )
563 563 return b'<predicatenmatcher pred=%s>' % s
564 564
565 565
566 def path_or_parents_in_set(path, prefix_set):
567 """Returns True if `path` (or any parent of `path`) is in `prefix_set`."""
568 l = len(prefix_set)
569 if l == 0:
570 return False
571 if path in prefix_set:
572 return True
573 # If there's more than 5 paths in prefix_set, it's *probably* quicker to
574 # "walk up" the directory hierarchy instead, with the assumption that most
575 # directory hierarchies are relatively shallow and hash lookup is cheap.
576 if l > 5:
577 return any(
578 parentdir in prefix_set for parentdir in pathutil.finddirs(path)
579 )
580
581 # FIXME: Ideally we'd never get to this point if this is the case - we'd
582 # recognize ourselves as an 'always' matcher and skip this.
583 if b'' in prefix_set:
584 return True
585
586 if pycompat.ispy3:
587 sl = ord(b'/')
588 else:
589 sl = '/'
590
591 # We already checked that path isn't in prefix_set exactly, so
592 # `path[len(pf)] should never raise IndexError.
593 return any(path.startswith(pf) and path[len(pf)] == sl for pf in prefix_set)
594
595
566 596 class patternmatcher(basematcher):
567 597 r"""Matches a set of (kind, pat, source) against a 'root' directory.
568 598
569 599 >>> kindpats = [
570 600 ... (b're', br'.*\.c$', b''),
571 601 ... (b'path', b'foo/a', b''),
572 602 ... (b'relpath', b'b', b''),
573 603 ... (b'glob', b'*.h', b''),
574 604 ... ]
575 605 >>> m = patternmatcher(b'foo', kindpats)
576 606 >>> m(b'main.c') # matches re:.*\.c$
577 607 True
578 608 >>> m(b'b.txt')
579 609 False
580 610 >>> m(b'foo/a') # matches path:foo/a
581 611 True
582 612 >>> m(b'a') # does not match path:b, since 'root' is 'foo'
583 613 False
584 614 >>> m(b'b') # matches relpath:b, since 'root' is 'foo'
585 615 True
586 616 >>> m(b'lib.h') # matches glob:*.h
587 617 True
588 618
589 619 >>> m.files()
590 620 ['', 'foo/a', 'b', '']
591 621 >>> m.exact(b'foo/a')
592 622 True
593 623 >>> m.exact(b'b')
594 624 True
595 625 >>> m.exact(b'lib.h') # exact matches are for (rel)path kinds
596 626 False
597 627 """
598 628
599 629 def __init__(self, root, kindpats, badfn=None):
600 630 super(patternmatcher, self).__init__(badfn)
601 631
602 632 self._files = _explicitfiles(kindpats)
603 633 self._prefix = _prefix(kindpats)
604 634 self._pats, self.matchfn = _buildmatch(kindpats, b'$', root)
605 635
606 636 @propertycache
607 637 def _dirs(self):
608 638 return set(pathutil.dirs(self._fileset))
609 639
610 640 def visitdir(self, dir):
611 641 if self._prefix and dir in self._fileset:
612 642 return b'all'
613 643 return (
614 dir in self._fileset
615 or dir in self._dirs
616 or any(
617 parentdir in self._fileset
618 for parentdir in pathutil.finddirs(dir)
619 )
644 dir in self._dirs
645 or path_or_parents_in_set(dir, self._fileset)
620 646 )
621 647
622 648 def visitchildrenset(self, dir):
623 649 ret = self.visitdir(dir)
624 650 if ret is True:
625 651 return b'this'
626 652 elif not ret:
627 653 return set()
628 654 assert ret == b'all'
629 655 return b'all'
630 656
631 657 def prefix(self):
632 658 return self._prefix
633 659
634 660 @encoding.strmethod
635 661 def __repr__(self):
636 662 return b'<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats)
637 663
638 664
639 665 # This is basically a reimplementation of pathutil.dirs that stores the
640 666 # children instead of just a count of them, plus a small optional optimization
641 667 # to avoid some directories we don't need.
642 668 class _dirchildren(object):
643 669 def __init__(self, paths, onlyinclude=None):
644 670 self._dirs = {}
645 671 self._onlyinclude = onlyinclude or []
646 672 addpath = self.addpath
647 673 for f in paths:
648 674 addpath(f)
649 675
650 676 def addpath(self, path):
651 677 if path == b'':
652 678 return
653 679 dirs = self._dirs
654 680 findsplitdirs = _dirchildren._findsplitdirs
655 681 for d, b in findsplitdirs(path):
656 682 if d not in self._onlyinclude:
657 683 continue
658 684 dirs.setdefault(d, set()).add(b)
659 685
660 686 @staticmethod
661 687 def _findsplitdirs(path):
662 688 # yields (dirname, basename) tuples, walking back to the root. This is
663 689 # very similar to pathutil.finddirs, except:
664 690 # - produces a (dirname, basename) tuple, not just 'dirname'
665 691 # Unlike manifest._splittopdir, this does not suffix `dirname` with a
666 692 # slash.
667 693 oldpos = len(path)
668 694 pos = path.rfind(b'/')
669 695 while pos != -1:
670 696 yield path[:pos], path[pos + 1 : oldpos]
671 697 oldpos = pos
672 698 pos = path.rfind(b'/', 0, pos)
673 699 yield b'', path[:oldpos]
674 700
675 701 def get(self, path):
676 702 return self._dirs.get(path, set())
677 703
678 704
679 705 class includematcher(basematcher):
680 706 def __init__(self, root, kindpats, badfn=None):
681 707 super(includematcher, self).__init__(badfn)
682 708 if rustmod is not None:
683 709 # We need to pass the patterns to Rust because they can contain
684 710 # patterns from the user interface
685 711 self._kindpats = kindpats
686 712 self._pats, self.matchfn = _buildmatch(kindpats, b'(?:/|$)', root)
687 713 self._prefix = _prefix(kindpats)
688 714 roots, dirs, parents = _rootsdirsandparents(kindpats)
689 715 # roots are directories which are recursively included.
690 716 self._roots = set(roots)
691 717 # dirs are directories which are non-recursively included.
692 718 self._dirs = set(dirs)
693 719 # parents are directories which are non-recursively included because
694 720 # they are needed to get to items in _dirs or _roots.
695 721 self._parents = parents
696 722
697 723 def visitdir(self, dir):
698 724 if self._prefix and dir in self._roots:
699 725 return b'all'
700 726 return (
701 dir in self._roots
702 or dir in self._dirs
727 dir in self._dirs
703 728 or dir in self._parents
704 or any(
705 parentdir in self._roots for parentdir in pathutil.finddirs(dir)
706 )
729 or path_or_parents_in_set(dir, self._roots)
707 730 )
708 731
709 732 @propertycache
710 733 def _allparentschildren(self):
711 734 # It may seem odd that we add dirs, roots, and parents, and then
712 735 # restrict to only parents. This is to catch the case of:
713 736 # dirs = ['foo/bar']
714 737 # parents = ['foo']
715 738 # if we asked for the children of 'foo', but had only added
716 739 # self._parents, we wouldn't be able to respond ['bar'].
717 740 return _dirchildren(
718 741 itertools.chain(self._dirs, self._roots, self._parents),
719 742 onlyinclude=self._parents,
720 743 )
721 744
722 745 def visitchildrenset(self, dir):
723 746 if self._prefix and dir in self._roots:
724 747 return b'all'
725 748 # Note: this does *not* include the 'dir in self._parents' case from
726 749 # visitdir, that's handled below.
727 750 if (
728 751 b'' in self._roots
729 or dir in self._roots
730 752 or dir in self._dirs
731 or any(
732 parentdir in self._roots for parentdir in pathutil.finddirs(dir)
733 )
753 or path_or_parents_in_set(dir, self._roots)
734 754 ):
735 755 return b'this'
736 756
737 757 if dir in self._parents:
738 758 return self._allparentschildren.get(dir) or set()
739 759 return set()
740 760
741 761 @encoding.strmethod
742 762 def __repr__(self):
743 763 return b'<includematcher includes=%r>' % pycompat.bytestr(self._pats)
744 764
745 765
746 766 class exactmatcher(basematcher):
747 767 r"""Matches the input files exactly. They are interpreted as paths, not
748 768 patterns (so no kind-prefixes).
749 769
750 770 >>> m = exactmatcher([b'a.txt', br're:.*\.c$'])
751 771 >>> m(b'a.txt')
752 772 True
753 773 >>> m(b'b.txt')
754 774 False
755 775
756 776 Input files that would be matched are exactly those returned by .files()
757 777 >>> m.files()
758 778 ['a.txt', 're:.*\\.c$']
759 779
760 780 So pattern 're:.*\.c$' is not considered as a regex, but as a file name
761 781 >>> m(b'main.c')
762 782 False
763 783 >>> m(br're:.*\.c$')
764 784 True
765 785 """
766 786
767 787 def __init__(self, files, badfn=None):
768 788 super(exactmatcher, self).__init__(badfn)
769 789
770 790 if isinstance(files, list):
771 791 self._files = files
772 792 else:
773 793 self._files = list(files)
774 794
775 795 matchfn = basematcher.exact
776 796
777 797 @propertycache
778 798 def _dirs(self):
779 799 return set(pathutil.dirs(self._fileset))
780 800
781 801 def visitdir(self, dir):
782 802 return dir in self._dirs
783 803
784 804 def visitchildrenset(self, dir):
785 805 if not self._fileset or dir not in self._dirs:
786 806 return set()
787 807
788 808 candidates = self._fileset | self._dirs - {b''}
789 809 if dir != b'':
790 810 d = dir + b'/'
791 811 candidates = {c[len(d) :] for c in candidates if c.startswith(d)}
792 812 # self._dirs includes all of the directories, recursively, so if
793 813 # we're attempting to match foo/bar/baz.txt, it'll have '', 'foo',
794 814 # 'foo/bar' in it. Thus we can safely ignore a candidate that has a
795 815 # '/' in it, indicating a it's for a subdir-of-a-subdir; the
796 816 # immediate subdir will be in there without a slash.
797 817 ret = {c for c in candidates if b'/' not in c}
798 818 # We really do not expect ret to be empty, since that would imply that
799 819 # there's something in _dirs that didn't have a file in _fileset.
800 820 assert ret
801 821 return ret
802 822
803 823 def isexact(self):
804 824 return True
805 825
806 826 @encoding.strmethod
807 827 def __repr__(self):
808 828 return b'<exactmatcher files=%r>' % self._files
809 829
810 830
811 831 class differencematcher(basematcher):
812 832 """Composes two matchers by matching if the first matches and the second
813 833 does not.
814 834
815 835 The second matcher's non-matching-attributes (bad, traversedir) are ignored.
816 836 """
817 837
818 838 def __init__(self, m1, m2):
819 839 super(differencematcher, self).__init__()
820 840 self._m1 = m1
821 841 self._m2 = m2
822 842 self.bad = m1.bad
823 843 self.traversedir = m1.traversedir
824 844
825 845 def matchfn(self, f):
826 846 return self._m1(f) and not self._m2(f)
827 847
828 848 @propertycache
829 849 def _files(self):
830 850 if self.isexact():
831 851 return [f for f in self._m1.files() if self(f)]
832 852 # If m1 is not an exact matcher, we can't easily figure out the set of
833 853 # files, because its files() are not always files. For example, if
834 854 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
835 855 # want to remove "dir" from the set even though it would match m2,
836 856 # because the "dir" in m1 may not be a file.
837 857 return self._m1.files()
838 858
839 859 def visitdir(self, dir):
840 860 if self._m2.visitdir(dir) == b'all':
841 861 return False
842 862 elif not self._m2.visitdir(dir):
843 863 # m2 does not match dir, we can return 'all' here if possible
844 864 return self._m1.visitdir(dir)
845 865 return bool(self._m1.visitdir(dir))
846 866
847 867 def visitchildrenset(self, dir):
848 868 m2_set = self._m2.visitchildrenset(dir)
849 869 if m2_set == b'all':
850 870 return set()
851 871 m1_set = self._m1.visitchildrenset(dir)
852 872 # Possible values for m1: 'all', 'this', set(...), set()
853 873 # Possible values for m2: 'this', set(...), set()
854 874 # If m2 has nothing under here that we care about, return m1, even if
855 875 # it's 'all'. This is a change in behavior from visitdir, which would
856 876 # return True, not 'all', for some reason.
857 877 if not m2_set:
858 878 return m1_set
859 879 if m1_set in [b'all', b'this']:
860 880 # Never return 'all' here if m2_set is any kind of non-empty (either
861 881 # 'this' or set(foo)), since m2 might return set() for a
862 882 # subdirectory.
863 883 return b'this'
864 884 # Possible values for m1: set(...), set()
865 885 # Possible values for m2: 'this', set(...)
866 886 # We ignore m2's set results. They're possibly incorrect:
867 887 # m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset(''):
868 888 # m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd
869 889 # return set(), which is *not* correct, we still need to visit 'dir'!
870 890 return m1_set
871 891
872 892 def isexact(self):
873 893 return self._m1.isexact()
874 894
875 895 @encoding.strmethod
876 896 def __repr__(self):
877 897 return b'<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2)
878 898
879 899
880 900 def intersectmatchers(m1, m2):
881 901 """Composes two matchers by matching if both of them match.
882 902
883 903 The second matcher's non-matching-attributes (bad, traversedir) are ignored.
884 904 """
885 905 if m1 is None or m2 is None:
886 906 return m1 or m2
887 907 if m1.always():
888 908 m = copy.copy(m2)
889 909 # TODO: Consider encapsulating these things in a class so there's only
890 910 # one thing to copy from m1.
891 911 m.bad = m1.bad
892 912 m.traversedir = m1.traversedir
893 913 return m
894 914 if m2.always():
895 915 m = copy.copy(m1)
896 916 return m
897 917 return intersectionmatcher(m1, m2)
898 918
899 919
900 920 class intersectionmatcher(basematcher):
901 921 def __init__(self, m1, m2):
902 922 super(intersectionmatcher, self).__init__()
903 923 self._m1 = m1
904 924 self._m2 = m2
905 925 self.bad = m1.bad
906 926 self.traversedir = m1.traversedir
907 927
908 928 @propertycache
909 929 def _files(self):
910 930 if self.isexact():
911 931 m1, m2 = self._m1, self._m2
912 932 if not m1.isexact():
913 933 m1, m2 = m2, m1
914 934 return [f for f in m1.files() if m2(f)]
915 935 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
916 936 # the set of files, because their files() are not always files. For
917 937 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
918 938 # "path:dir2", we don't want to remove "dir2" from the set.
919 939 return self._m1.files() + self._m2.files()
920 940
921 941 def matchfn(self, f):
922 942 return self._m1(f) and self._m2(f)
923 943
924 944 def visitdir(self, dir):
925 945 visit1 = self._m1.visitdir(dir)
926 946 if visit1 == b'all':
927 947 return self._m2.visitdir(dir)
928 948 # bool() because visit1=True + visit2='all' should not be 'all'
929 949 return bool(visit1 and self._m2.visitdir(dir))
930 950
931 951 def visitchildrenset(self, dir):
932 952 m1_set = self._m1.visitchildrenset(dir)
933 953 if not m1_set:
934 954 return set()
935 955 m2_set = self._m2.visitchildrenset(dir)
936 956 if not m2_set:
937 957 return set()
938 958
939 959 if m1_set == b'all':
940 960 return m2_set
941 961 elif m2_set == b'all':
942 962 return m1_set
943 963
944 964 if m1_set == b'this' or m2_set == b'this':
945 965 return b'this'
946 966
947 967 assert isinstance(m1_set, set) and isinstance(m2_set, set)
948 968 return m1_set.intersection(m2_set)
949 969
950 970 def always(self):
951 971 return self._m1.always() and self._m2.always()
952 972
953 973 def isexact(self):
954 974 return self._m1.isexact() or self._m2.isexact()
955 975
956 976 @encoding.strmethod
957 977 def __repr__(self):
958 978 return b'<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2)
959 979
960 980
961 981 class subdirmatcher(basematcher):
962 982 """Adapt a matcher to work on a subdirectory only.
963 983
964 984 The paths are remapped to remove/insert the path as needed:
965 985
966 986 >>> from . import pycompat
967 987 >>> m1 = match(util.localpath(b'/root'), b'', [b'a.txt', b'sub/b.txt'], auditor=lambda name: None)
968 988 >>> m2 = subdirmatcher(b'sub', m1)
969 989 >>> m2(b'a.txt')
970 990 False
971 991 >>> m2(b'b.txt')
972 992 True
973 993 >>> m2.matchfn(b'a.txt')
974 994 False
975 995 >>> m2.matchfn(b'b.txt')
976 996 True
977 997 >>> m2.files()
978 998 ['b.txt']
979 999 >>> m2.exact(b'b.txt')
980 1000 True
981 1001 >>> def bad(f, msg):
982 1002 ... print(pycompat.sysstr(b"%s: %s" % (f, msg)))
983 1003 >>> m1.bad = bad
984 1004 >>> m2.bad(b'x.txt', b'No such file')
985 1005 sub/x.txt: No such file
986 1006 """
987 1007
988 1008 def __init__(self, path, matcher):
989 1009 super(subdirmatcher, self).__init__()
990 1010 self._path = path
991 1011 self._matcher = matcher
992 1012 self._always = matcher.always()
993 1013
994 1014 self._files = [
995 1015 f[len(path) + 1 :]
996 1016 for f in matcher._files
997 1017 if f.startswith(path + b"/")
998 1018 ]
999 1019
1000 1020 # If the parent repo had a path to this subrepo and the matcher is
1001 1021 # a prefix matcher, this submatcher always matches.
1002 1022 if matcher.prefix():
1003 1023 self._always = any(f == path for f in matcher._files)
1004 1024
1005 1025 def bad(self, f, msg):
1006 1026 self._matcher.bad(self._path + b"/" + f, msg)
1007 1027
1008 1028 def matchfn(self, f):
1009 1029 # Some information is lost in the superclass's constructor, so we
1010 1030 # can not accurately create the matching function for the subdirectory
1011 1031 # from the inputs. Instead, we override matchfn() and visitdir() to
1012 1032 # call the original matcher with the subdirectory path prepended.
1013 1033 return self._matcher.matchfn(self._path + b"/" + f)
1014 1034
1015 1035 def visitdir(self, dir):
1016 1036 if dir == b'':
1017 1037 dir = self._path
1018 1038 else:
1019 1039 dir = self._path + b"/" + dir
1020 1040 return self._matcher.visitdir(dir)
1021 1041
1022 1042 def visitchildrenset(self, dir):
1023 1043 if dir == b'':
1024 1044 dir = self._path
1025 1045 else:
1026 1046 dir = self._path + b"/" + dir
1027 1047 return self._matcher.visitchildrenset(dir)
1028 1048
1029 1049 def always(self):
1030 1050 return self._always
1031 1051
1032 1052 def prefix(self):
1033 1053 return self._matcher.prefix() and not self._always
1034 1054
1035 1055 @encoding.strmethod
1036 1056 def __repr__(self):
1037 1057 return b'<subdirmatcher path=%r, matcher=%r>' % (
1038 1058 self._path,
1039 1059 self._matcher,
1040 1060 )
1041 1061
1042 1062
1043 1063 class prefixdirmatcher(basematcher):
1044 1064 """Adapt a matcher to work on a parent directory.
1045 1065
1046 1066 The matcher's non-matching-attributes (bad, traversedir) are ignored.
1047 1067
1048 1068 The prefix path should usually be the relative path from the root of
1049 1069 this matcher to the root of the wrapped matcher.
1050 1070
1051 1071 >>> m1 = match(util.localpath(b'/root/d/e'), b'f', [b'../a.txt', b'b.txt'], auditor=lambda name: None)
1052 1072 >>> m2 = prefixdirmatcher(b'd/e', m1)
1053 1073 >>> m2(b'a.txt')
1054 1074 False
1055 1075 >>> m2(b'd/e/a.txt')
1056 1076 True
1057 1077 >>> m2(b'd/e/b.txt')
1058 1078 False
1059 1079 >>> m2.files()
1060 1080 ['d/e/a.txt', 'd/e/f/b.txt']
1061 1081 >>> m2.exact(b'd/e/a.txt')
1062 1082 True
1063 1083 >>> m2.visitdir(b'd')
1064 1084 True
1065 1085 >>> m2.visitdir(b'd/e')
1066 1086 True
1067 1087 >>> m2.visitdir(b'd/e/f')
1068 1088 True
1069 1089 >>> m2.visitdir(b'd/e/g')
1070 1090 False
1071 1091 >>> m2.visitdir(b'd/ef')
1072 1092 False
1073 1093 """
1074 1094
1075 1095 def __init__(self, path, matcher, badfn=None):
1076 1096 super(prefixdirmatcher, self).__init__(badfn)
1077 1097 if not path:
1078 1098 raise error.ProgrammingError(b'prefix path must not be empty')
1079 1099 self._path = path
1080 1100 self._pathprefix = path + b'/'
1081 1101 self._matcher = matcher
1082 1102
1083 1103 @propertycache
1084 1104 def _files(self):
1085 1105 return [self._pathprefix + f for f in self._matcher._files]
1086 1106
1087 1107 def matchfn(self, f):
1088 1108 if not f.startswith(self._pathprefix):
1089 1109 return False
1090 1110 return self._matcher.matchfn(f[len(self._pathprefix) :])
1091 1111
1092 1112 @propertycache
1093 1113 def _pathdirs(self):
1094 1114 return set(pathutil.finddirs(self._path))
1095 1115
1096 1116 def visitdir(self, dir):
1097 1117 if dir == self._path:
1098 1118 return self._matcher.visitdir(b'')
1099 1119 if dir.startswith(self._pathprefix):
1100 1120 return self._matcher.visitdir(dir[len(self._pathprefix) :])
1101 1121 return dir in self._pathdirs
1102 1122
1103 1123 def visitchildrenset(self, dir):
1104 1124 if dir == self._path:
1105 1125 return self._matcher.visitchildrenset(b'')
1106 1126 if dir.startswith(self._pathprefix):
1107 1127 return self._matcher.visitchildrenset(dir[len(self._pathprefix) :])
1108 1128 if dir in self._pathdirs:
1109 1129 return b'this'
1110 1130 return set()
1111 1131
1112 1132 def isexact(self):
1113 1133 return self._matcher.isexact()
1114 1134
1115 1135 def prefix(self):
1116 1136 return self._matcher.prefix()
1117 1137
1118 1138 @encoding.strmethod
1119 1139 def __repr__(self):
1120 1140 return b'<prefixdirmatcher path=%r, matcher=%r>' % (
1121 1141 pycompat.bytestr(self._path),
1122 1142 self._matcher,
1123 1143 )
1124 1144
1125 1145
1126 1146 class unionmatcher(basematcher):
1127 1147 """A matcher that is the union of several matchers.
1128 1148
1129 1149 The non-matching-attributes (bad, traversedir) are taken from the first
1130 1150 matcher.
1131 1151 """
1132 1152
1133 1153 def __init__(self, matchers):
1134 1154 m1 = matchers[0]
1135 1155 super(unionmatcher, self).__init__()
1136 1156 self.traversedir = m1.traversedir
1137 1157 self._matchers = matchers
1138 1158
1139 1159 def matchfn(self, f):
1140 1160 for match in self._matchers:
1141 1161 if match(f):
1142 1162 return True
1143 1163 return False
1144 1164
1145 1165 def visitdir(self, dir):
1146 1166 r = False
1147 1167 for m in self._matchers:
1148 1168 v = m.visitdir(dir)
1149 1169 if v == b'all':
1150 1170 return v
1151 1171 r |= v
1152 1172 return r
1153 1173
1154 1174 def visitchildrenset(self, dir):
1155 1175 r = set()
1156 1176 this = False
1157 1177 for m in self._matchers:
1158 1178 v = m.visitchildrenset(dir)
1159 1179 if not v:
1160 1180 continue
1161 1181 if v == b'all':
1162 1182 return v
1163 1183 if this or v == b'this':
1164 1184 this = True
1165 1185 # don't break, we might have an 'all' in here.
1166 1186 continue
1167 1187 assert isinstance(v, set)
1168 1188 r = r.union(v)
1169 1189 if this:
1170 1190 return b'this'
1171 1191 return r
1172 1192
1173 1193 @encoding.strmethod
1174 1194 def __repr__(self):
1175 1195 return b'<unionmatcher matchers=%r>' % self._matchers
1176 1196
1177 1197
1178 1198 def patkind(pattern, default=None):
1179 1199 r"""If pattern is 'kind:pat' with a known kind, return kind.
1180 1200
1181 1201 >>> patkind(br're:.*\.c$')
1182 1202 're'
1183 1203 >>> patkind(b'glob:*.c')
1184 1204 'glob'
1185 1205 >>> patkind(b'relpath:test.py')
1186 1206 'relpath'
1187 1207 >>> patkind(b'main.py')
1188 1208 >>> patkind(b'main.py', default=b're')
1189 1209 're'
1190 1210 """
1191 1211 return _patsplit(pattern, default)[0]
1192 1212
1193 1213
1194 1214 def _patsplit(pattern, default):
1195 1215 """Split a string into the optional pattern kind prefix and the actual
1196 1216 pattern."""
1197 1217 if b':' in pattern:
1198 1218 kind, pat = pattern.split(b':', 1)
1199 1219 if kind in allpatternkinds:
1200 1220 return kind, pat
1201 1221 return default, pattern
1202 1222
1203 1223
1204 1224 def _globre(pat):
1205 1225 r"""Convert an extended glob string to a regexp string.
1206 1226
1207 1227 >>> from . import pycompat
1208 1228 >>> def bprint(s):
1209 1229 ... print(pycompat.sysstr(s))
1210 1230 >>> bprint(_globre(br'?'))
1211 1231 .
1212 1232 >>> bprint(_globre(br'*'))
1213 1233 [^/]*
1214 1234 >>> bprint(_globre(br'**'))
1215 1235 .*
1216 1236 >>> bprint(_globre(br'**/a'))
1217 1237 (?:.*/)?a
1218 1238 >>> bprint(_globre(br'a/**/b'))
1219 1239 a/(?:.*/)?b
1220 1240 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
1221 1241 [a*?!^][\^b][^c]
1222 1242 >>> bprint(_globre(br'{a,b}'))
1223 1243 (?:a|b)
1224 1244 >>> bprint(_globre(br'.\*\?'))
1225 1245 \.\*\?
1226 1246 """
1227 1247 i, n = 0, len(pat)
1228 1248 res = b''
1229 1249 group = 0
1230 1250 escape = util.stringutil.regexbytesescapemap.get
1231 1251
1232 1252 def peek():
1233 1253 return i < n and pat[i : i + 1]
1234 1254
1235 1255 while i < n:
1236 1256 c = pat[i : i + 1]
1237 1257 i += 1
1238 1258 if c not in b'*?[{},\\':
1239 1259 res += escape(c, c)
1240 1260 elif c == b'*':
1241 1261 if peek() == b'*':
1242 1262 i += 1
1243 1263 if peek() == b'/':
1244 1264 i += 1
1245 1265 res += b'(?:.*/)?'
1246 1266 else:
1247 1267 res += b'.*'
1248 1268 else:
1249 1269 res += b'[^/]*'
1250 1270 elif c == b'?':
1251 1271 res += b'.'
1252 1272 elif c == b'[':
1253 1273 j = i
1254 1274 if j < n and pat[j : j + 1] in b'!]':
1255 1275 j += 1
1256 1276 while j < n and pat[j : j + 1] != b']':
1257 1277 j += 1
1258 1278 if j >= n:
1259 1279 res += b'\\['
1260 1280 else:
1261 1281 stuff = pat[i:j].replace(b'\\', b'\\\\')
1262 1282 i = j + 1
1263 1283 if stuff[0:1] == b'!':
1264 1284 stuff = b'^' + stuff[1:]
1265 1285 elif stuff[0:1] == b'^':
1266 1286 stuff = b'\\' + stuff
1267 1287 res = b'%s[%s]' % (res, stuff)
1268 1288 elif c == b'{':
1269 1289 group += 1
1270 1290 res += b'(?:'
1271 1291 elif c == b'}' and group:
1272 1292 res += b')'
1273 1293 group -= 1
1274 1294 elif c == b',' and group:
1275 1295 res += b'|'
1276 1296 elif c == b'\\':
1277 1297 p = peek()
1278 1298 if p:
1279 1299 i += 1
1280 1300 res += escape(p, p)
1281 1301 else:
1282 1302 res += escape(c, c)
1283 1303 else:
1284 1304 res += escape(c, c)
1285 1305 return res
1286 1306
1287 1307
1288 1308 def _regex(kind, pat, globsuffix):
1289 1309 """Convert a (normalized) pattern of any kind into a
1290 1310 regular expression.
1291 1311 globsuffix is appended to the regexp of globs."""
1292 1312 if not pat and kind in (b'glob', b'relpath'):
1293 1313 return b''
1294 1314 if kind == b're':
1295 1315 return pat
1296 1316 if kind in (b'path', b'relpath'):
1297 1317 if pat == b'.':
1298 1318 return b''
1299 1319 return util.stringutil.reescape(pat) + b'(?:/|$)'
1300 1320 if kind == b'rootfilesin':
1301 1321 if pat == b'.':
1302 1322 escaped = b''
1303 1323 else:
1304 1324 # Pattern is a directory name.
1305 1325 escaped = util.stringutil.reescape(pat) + b'/'
1306 1326 # Anything after the pattern must be a non-directory.
1307 1327 return escaped + b'[^/]+$'
1308 1328 if kind == b'relglob':
1309 1329 globre = _globre(pat)
1310 1330 if globre.startswith(b'[^/]*'):
1311 1331 # When pat has the form *XYZ (common), make the returned regex more
1312 1332 # legible by returning the regex for **XYZ instead of **/*XYZ.
1313 1333 return b'.*' + globre[len(b'[^/]*') :] + globsuffix
1314 1334 return b'(?:|.*/)' + globre + globsuffix
1315 1335 if kind == b'relre':
1316 1336 if pat.startswith(b'^'):
1317 1337 return pat
1318 1338 return b'.*' + pat
1319 1339 if kind in (b'glob', b'rootglob'):
1320 1340 return _globre(pat) + globsuffix
1321 1341 raise error.ProgrammingError(b'not a regex pattern: %s:%s' % (kind, pat))
1322 1342
1323 1343
1324 1344 def _buildmatch(kindpats, globsuffix, root):
1325 1345 """Return regexp string and a matcher function for kindpats.
1326 1346 globsuffix is appended to the regexp of globs."""
1327 1347 matchfuncs = []
1328 1348
1329 1349 subincludes, kindpats = _expandsubinclude(kindpats, root)
1330 1350 if subincludes:
1331 1351 submatchers = {}
1332 1352
1333 1353 def matchsubinclude(f):
1334 1354 for prefix, matcherargs in subincludes:
1335 1355 if f.startswith(prefix):
1336 1356 mf = submatchers.get(prefix)
1337 1357 if mf is None:
1338 1358 mf = match(*matcherargs)
1339 1359 submatchers[prefix] = mf
1340 1360
1341 1361 if mf(f[len(prefix) :]):
1342 1362 return True
1343 1363 return False
1344 1364
1345 1365 matchfuncs.append(matchsubinclude)
1346 1366
1347 1367 regex = b''
1348 1368 if kindpats:
1349 1369 if all(k == b'rootfilesin' for k, p, s in kindpats):
1350 1370 dirs = {p for k, p, s in kindpats}
1351 1371
1352 1372 def mf(f):
1353 1373 i = f.rfind(b'/')
1354 1374 if i >= 0:
1355 1375 dir = f[:i]
1356 1376 else:
1357 1377 dir = b'.'
1358 1378 return dir in dirs
1359 1379
1360 1380 regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))
1361 1381 matchfuncs.append(mf)
1362 1382 else:
1363 1383 regex, mf = _buildregexmatch(kindpats, globsuffix)
1364 1384 matchfuncs.append(mf)
1365 1385
1366 1386 if len(matchfuncs) == 1:
1367 1387 return regex, matchfuncs[0]
1368 1388 else:
1369 1389 return regex, lambda f: any(mf(f) for mf in matchfuncs)
1370 1390
1371 1391
1372 1392 MAX_RE_SIZE = 20000
1373 1393
1374 1394
1375 1395 def _joinregexes(regexps):
1376 1396 """gather multiple regular expressions into a single one"""
1377 1397 return b'|'.join(regexps)
1378 1398
1379 1399
1380 1400 def _buildregexmatch(kindpats, globsuffix):
1381 1401 """Build a match function from a list of kinds and kindpats,
1382 1402 return regexp string and a matcher function.
1383 1403
1384 1404 Test too large input
1385 1405 >>> _buildregexmatch([
1386 1406 ... (b'relglob', b'?' * MAX_RE_SIZE, b'')
1387 1407 ... ], b'$')
1388 1408 Traceback (most recent call last):
1389 1409 ...
1390 1410 Abort: matcher pattern is too long (20009 bytes)
1391 1411 """
1392 1412 try:
1393 1413 allgroups = []
1394 1414 regexps = [_regex(k, p, globsuffix) for (k, p, s) in kindpats]
1395 1415 fullregexp = _joinregexes(regexps)
1396 1416
1397 1417 startidx = 0
1398 1418 groupsize = 0
1399 1419 for idx, r in enumerate(regexps):
1400 1420 piecesize = len(r)
1401 1421 if piecesize > MAX_RE_SIZE:
1402 1422 msg = _(b"matcher pattern is too long (%d bytes)") % piecesize
1403 1423 raise error.Abort(msg)
1404 1424 elif (groupsize + piecesize) > MAX_RE_SIZE:
1405 1425 group = regexps[startidx:idx]
1406 1426 allgroups.append(_joinregexes(group))
1407 1427 startidx = idx
1408 1428 groupsize = 0
1409 1429 groupsize += piecesize + 1
1410 1430
1411 1431 if startidx == 0:
1412 1432 matcher = _rematcher(fullregexp)
1413 1433 func = lambda s: bool(matcher(s))
1414 1434 else:
1415 1435 group = regexps[startidx:]
1416 1436 allgroups.append(_joinregexes(group))
1417 1437 allmatchers = [_rematcher(g) for g in allgroups]
1418 1438 func = lambda s: any(m(s) for m in allmatchers)
1419 1439 return fullregexp, func
1420 1440 except re.error:
1421 1441 for k, p, s in kindpats:
1422 1442 try:
1423 1443 _rematcher(_regex(k, p, globsuffix))
1424 1444 except re.error:
1425 1445 if s:
1426 1446 raise error.Abort(
1427 1447 _(b"%s: invalid pattern (%s): %s") % (s, k, p)
1428 1448 )
1429 1449 else:
1430 1450 raise error.Abort(_(b"invalid pattern (%s): %s") % (k, p))
1431 1451 raise error.Abort(_(b"invalid pattern"))
1432 1452
1433 1453
1434 1454 def _patternrootsanddirs(kindpats):
1435 1455 """Returns roots and directories corresponding to each pattern.
1436 1456
1437 1457 This calculates the roots and directories exactly matching the patterns and
1438 1458 returns a tuple of (roots, dirs) for each. It does not return other
1439 1459 directories which may also need to be considered, like the parent
1440 1460 directories.
1441 1461 """
1442 1462 r = []
1443 1463 d = []
1444 1464 for kind, pat, source in kindpats:
1445 1465 if kind in (b'glob', b'rootglob'): # find the non-glob prefix
1446 1466 root = []
1447 1467 for p in pat.split(b'/'):
1448 1468 if b'[' in p or b'{' in p or b'*' in p or b'?' in p:
1449 1469 break
1450 1470 root.append(p)
1451 1471 r.append(b'/'.join(root))
1452 1472 elif kind in (b'relpath', b'path'):
1453 1473 if pat == b'.':
1454 1474 pat = b''
1455 1475 r.append(pat)
1456 1476 elif kind in (b'rootfilesin',):
1457 1477 if pat == b'.':
1458 1478 pat = b''
1459 1479 d.append(pat)
1460 1480 else: # relglob, re, relre
1461 1481 r.append(b'')
1462 1482 return r, d
1463 1483
1464 1484
1465 1485 def _roots(kindpats):
1466 1486 '''Returns root directories to match recursively from the given patterns.'''
1467 1487 roots, dirs = _patternrootsanddirs(kindpats)
1468 1488 return roots
1469 1489
1470 1490
1471 1491 def _rootsdirsandparents(kindpats):
1472 1492 """Returns roots and exact directories from patterns.
1473 1493
1474 1494 `roots` are directories to match recursively, `dirs` should
1475 1495 be matched non-recursively, and `parents` are the implicitly required
1476 1496 directories to walk to items in either roots or dirs.
1477 1497
1478 1498 Returns a tuple of (roots, dirs, parents).
1479 1499
1480 1500 >>> r = _rootsdirsandparents(
1481 1501 ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
1482 1502 ... (b'glob', b'g*', b'')])
1483 1503 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1484 1504 (['g/h', 'g/h', ''], []) ['', 'g']
1485 1505 >>> r = _rootsdirsandparents(
1486 1506 ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
1487 1507 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1488 1508 ([], ['g/h', '']) ['', 'g']
1489 1509 >>> r = _rootsdirsandparents(
1490 1510 ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
1491 1511 ... (b'path', b'', b'')])
1492 1512 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1493 1513 (['r', 'p/p', ''], []) ['', 'p']
1494 1514 >>> r = _rootsdirsandparents(
1495 1515 ... [(b'relglob', b'rg*', b''), (b're', b're/', b''),
1496 1516 ... (b'relre', b'rr', b'')])
1497 1517 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1498 1518 (['', '', ''], []) ['']
1499 1519 """
1500 1520 r, d = _patternrootsanddirs(kindpats)
1501 1521
1502 1522 p = set()
1503 1523 # Add the parents as non-recursive/exact directories, since they must be
1504 1524 # scanned to get to either the roots or the other exact directories.
1505 1525 p.update(pathutil.dirs(d))
1506 1526 p.update(pathutil.dirs(r))
1507 1527
1508 1528 # FIXME: all uses of this function convert these to sets, do so before
1509 1529 # returning.
1510 1530 # FIXME: all uses of this function do not need anything in 'roots' and
1511 1531 # 'dirs' to also be in 'parents', consider removing them before returning.
1512 1532 return r, d, p
1513 1533
1514 1534
1515 1535 def _explicitfiles(kindpats):
1516 1536 """Returns the potential explicit filenames from the patterns.
1517 1537
1518 1538 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
1519 1539 ['foo/bar']
1520 1540 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
1521 1541 []
1522 1542 """
1523 1543 # Keep only the pattern kinds where one can specify filenames (vs only
1524 1544 # directory names).
1525 1545 filable = [kp for kp in kindpats if kp[0] not in (b'rootfilesin',)]
1526 1546 return _roots(filable)
1527 1547
1528 1548
1529 1549 def _prefix(kindpats):
1530 1550 '''Whether all the patterns match a prefix (i.e. recursively)'''
1531 1551 for kind, pat, source in kindpats:
1532 1552 if kind not in (b'path', b'relpath'):
1533 1553 return False
1534 1554 return True
1535 1555
1536 1556
1537 1557 _commentre = None
1538 1558
1539 1559
1540 1560 def readpatternfile(filepath, warn, sourceinfo=False):
1541 1561 """parse a pattern file, returning a list of
1542 1562 patterns. These patterns should be given to compile()
1543 1563 to be validated and converted into a match function.
1544 1564
1545 1565 trailing white space is dropped.
1546 1566 the escape character is backslash.
1547 1567 comments start with #.
1548 1568 empty lines are skipped.
1549 1569
1550 1570 lines can be of the following formats:
1551 1571
1552 1572 syntax: regexp # defaults following lines to non-rooted regexps
1553 1573 syntax: glob # defaults following lines to non-rooted globs
1554 1574 re:pattern # non-rooted regular expression
1555 1575 glob:pattern # non-rooted glob
1556 1576 rootglob:pat # rooted glob (same root as ^ in regexps)
1557 1577 pattern # pattern of the current default type
1558 1578
1559 1579 if sourceinfo is set, returns a list of tuples:
1560 1580 (pattern, lineno, originalline).
1561 1581 This is useful to debug ignore patterns.
1562 1582 """
1563 1583
1564 1584 syntaxes = {
1565 1585 b're': b'relre:',
1566 1586 b'regexp': b'relre:',
1567 1587 b'glob': b'relglob:',
1568 1588 b'rootglob': b'rootglob:',
1569 1589 b'include': b'include',
1570 1590 b'subinclude': b'subinclude',
1571 1591 }
1572 1592 syntax = b'relre:'
1573 1593 patterns = []
1574 1594
1575 1595 fp = open(filepath, b'rb')
1576 1596 for lineno, line in enumerate(util.iterfile(fp), start=1):
1577 1597 if b"#" in line:
1578 1598 global _commentre
1579 1599 if not _commentre:
1580 1600 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
1581 1601 # remove comments prefixed by an even number of escapes
1582 1602 m = _commentre.search(line)
1583 1603 if m:
1584 1604 line = line[: m.end(1)]
1585 1605 # fixup properly escaped comments that survived the above
1586 1606 line = line.replace(b"\\#", b"#")
1587 1607 line = line.rstrip()
1588 1608 if not line:
1589 1609 continue
1590 1610
1591 1611 if line.startswith(b'syntax:'):
1592 1612 s = line[7:].strip()
1593 1613 try:
1594 1614 syntax = syntaxes[s]
1595 1615 except KeyError:
1596 1616 if warn:
1597 1617 warn(
1598 1618 _(b"%s: ignoring invalid syntax '%s'\n") % (filepath, s)
1599 1619 )
1600 1620 continue
1601 1621
1602 1622 linesyntax = syntax
1603 1623 for s, rels in pycompat.iteritems(syntaxes):
1604 1624 if line.startswith(rels):
1605 1625 linesyntax = rels
1606 1626 line = line[len(rels) :]
1607 1627 break
1608 1628 elif line.startswith(s + b':'):
1609 1629 linesyntax = rels
1610 1630 line = line[len(s) + 1 :]
1611 1631 break
1612 1632 if sourceinfo:
1613 1633 patterns.append((linesyntax + line, lineno, line))
1614 1634 else:
1615 1635 patterns.append(linesyntax + line)
1616 1636 fp.close()
1617 1637 return patterns
General Comments 0
You need to be logged in to leave comments. Login now