##// END OF EJS Templates
match: drop support for passing '.' for root dir to visit*() methods...
Martin von Zweigbergk -
r43992:ecd11c4d default
parent child Browse files
Show More
@@ -1,1638 +1,1621 b''
1 1 # match.py - filename matching
2 2 #
3 3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import, print_function
9 9
10 10 import copy
11 11 import itertools
12 12 import os
13 13 import re
14 14
15 15 from .i18n import _
16 16 from .pycompat import open
17 17 from . import (
18 18 encoding,
19 19 error,
20 20 pathutil,
21 21 pathutil,
22 22 policy,
23 23 pycompat,
24 24 util,
25 25 )
26 26 from .utils import stringutil
27 27
28 28 rustmod = policy.importrust('filepatterns')
29 29
30 30 allpatternkinds = (
31 31 b're',
32 32 b'glob',
33 33 b'path',
34 34 b'relglob',
35 35 b'relpath',
36 36 b'relre',
37 37 b'rootglob',
38 38 b'listfile',
39 39 b'listfile0',
40 40 b'set',
41 41 b'include',
42 42 b'subinclude',
43 43 b'rootfilesin',
44 44 )
45 45 cwdrelativepatternkinds = (b'relpath', b'glob')
46 46
47 47 propertycache = util.propertycache
48 48
49 49
50 50 def _rematcher(regex):
51 51 '''compile the regexp with the best available regexp engine and return a
52 52 matcher function'''
53 53 m = util.re.compile(regex)
54 54 try:
55 55 # slightly faster, provided by facebook's re2 bindings
56 56 return m.test_match
57 57 except AttributeError:
58 58 return m.match
59 59
60 60
61 61 def _expandsets(kindpats, ctx=None, listsubrepos=False, badfn=None):
62 62 '''Returns the kindpats list with the 'set' patterns expanded to matchers'''
63 63 matchers = []
64 64 other = []
65 65
66 66 for kind, pat, source in kindpats:
67 67 if kind == b'set':
68 68 if ctx is None:
69 69 raise error.ProgrammingError(
70 70 b"fileset expression with no context"
71 71 )
72 72 matchers.append(ctx.matchfileset(pat, badfn=badfn))
73 73
74 74 if listsubrepos:
75 75 for subpath in ctx.substate:
76 76 sm = ctx.sub(subpath).matchfileset(pat, badfn=badfn)
77 77 pm = prefixdirmatcher(subpath, sm, badfn=badfn)
78 78 matchers.append(pm)
79 79
80 80 continue
81 81 other.append((kind, pat, source))
82 82 return matchers, other
83 83
84 84
85 85 def _expandsubinclude(kindpats, root):
86 86 '''Returns the list of subinclude matcher args and the kindpats without the
87 87 subincludes in it.'''
88 88 relmatchers = []
89 89 other = []
90 90
91 91 for kind, pat, source in kindpats:
92 92 if kind == b'subinclude':
93 93 sourceroot = pathutil.dirname(util.normpath(source))
94 94 pat = util.pconvert(pat)
95 95 path = pathutil.join(sourceroot, pat)
96 96
97 97 newroot = pathutil.dirname(path)
98 98 matcherargs = (newroot, b'', [], [b'include:%s' % path])
99 99
100 100 prefix = pathutil.canonpath(root, root, newroot)
101 101 if prefix:
102 102 prefix += b'/'
103 103 relmatchers.append((prefix, matcherargs))
104 104 else:
105 105 other.append((kind, pat, source))
106 106
107 107 return relmatchers, other
108 108
109 109
110 110 def _kindpatsalwaysmatch(kindpats):
111 111 """"Checks whether the kindspats match everything, as e.g.
112 112 'relpath:.' does.
113 113 """
114 114 for kind, pat, source in kindpats:
115 115 if pat != b'' or kind not in [b'relpath', b'glob']:
116 116 return False
117 117 return True
118 118
119 119
120 120 def _buildkindpatsmatcher(
121 121 matchercls, root, kindpats, ctx=None, listsubrepos=False, badfn=None
122 122 ):
123 123 matchers = []
124 124 fms, kindpats = _expandsets(
125 125 kindpats, ctx=ctx, listsubrepos=listsubrepos, badfn=badfn
126 126 )
127 127 if kindpats:
128 128 m = matchercls(root, kindpats, badfn=badfn)
129 129 matchers.append(m)
130 130 if fms:
131 131 matchers.extend(fms)
132 132 if not matchers:
133 133 return nevermatcher(badfn=badfn)
134 134 if len(matchers) == 1:
135 135 return matchers[0]
136 136 return unionmatcher(matchers)
137 137
138 138
139 139 def match(
140 140 root,
141 141 cwd,
142 142 patterns=None,
143 143 include=None,
144 144 exclude=None,
145 145 default=b'glob',
146 146 auditor=None,
147 147 ctx=None,
148 148 listsubrepos=False,
149 149 warn=None,
150 150 badfn=None,
151 151 icasefs=False,
152 152 ):
153 153 r"""build an object to match a set of file patterns
154 154
155 155 arguments:
156 156 root - the canonical root of the tree you're matching against
157 157 cwd - the current working directory, if relevant
158 158 patterns - patterns to find
159 159 include - patterns to include (unless they are excluded)
160 160 exclude - patterns to exclude (even if they are included)
161 161 default - if a pattern in patterns has no explicit type, assume this one
162 162 auditor - optional path auditor
163 163 ctx - optional changecontext
164 164 listsubrepos - if True, recurse into subrepositories
165 165 warn - optional function used for printing warnings
166 166 badfn - optional bad() callback for this matcher instead of the default
167 167 icasefs - make a matcher for wdir on case insensitive filesystems, which
168 168 normalizes the given patterns to the case in the filesystem
169 169
170 170 a pattern is one of:
171 171 'glob:<glob>' - a glob relative to cwd
172 172 're:<regexp>' - a regular expression
173 173 'path:<path>' - a path relative to repository root, which is matched
174 174 recursively
175 175 'rootfilesin:<path>' - a path relative to repository root, which is
176 176 matched non-recursively (will not match subdirectories)
177 177 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
178 178 'relpath:<path>' - a path relative to cwd
179 179 'relre:<regexp>' - a regexp that needn't match the start of a name
180 180 'set:<fileset>' - a fileset expression
181 181 'include:<path>' - a file of patterns to read and include
182 182 'subinclude:<path>' - a file of patterns to match against files under
183 183 the same directory
184 184 '<something>' - a pattern of the specified default type
185 185
186 186 Usually a patternmatcher is returned:
187 187 >>> match(b'foo', b'.', [b're:.*\.c$', b'path:foo/a', b'*.py'])
188 188 <patternmatcher patterns='.*\\.c$|foo/a(?:/|$)|[^/]*\\.py$'>
189 189
190 190 Combining 'patterns' with 'include' (resp. 'exclude') gives an
191 191 intersectionmatcher (resp. a differencematcher):
192 192 >>> type(match(b'foo', b'.', [b're:.*\.c$'], include=[b'path:lib']))
193 193 <class 'mercurial.match.intersectionmatcher'>
194 194 >>> type(match(b'foo', b'.', [b're:.*\.c$'], exclude=[b'path:build']))
195 195 <class 'mercurial.match.differencematcher'>
196 196
197 197 Notice that, if 'patterns' is empty, an alwaysmatcher is returned:
198 198 >>> match(b'foo', b'.', [])
199 199 <alwaysmatcher>
200 200
201 201 The 'default' argument determines which kind of pattern is assumed if a
202 202 pattern has no prefix:
203 203 >>> match(b'foo', b'.', [b'.*\.c$'], default=b're')
204 204 <patternmatcher patterns='.*\\.c$'>
205 205 >>> match(b'foo', b'.', [b'main.py'], default=b'relpath')
206 206 <patternmatcher patterns='main\\.py(?:/|$)'>
207 207 >>> match(b'foo', b'.', [b'main.py'], default=b're')
208 208 <patternmatcher patterns='main.py'>
209 209
210 210 The primary use of matchers is to check whether a value (usually a file
211 211 name) matches againset one of the patterns given at initialization. There
212 212 are two ways of doing this check.
213 213
214 214 >>> m = match(b'foo', b'', [b're:.*\.c$', b'relpath:a'])
215 215
216 216 1. Calling the matcher with a file name returns True if any pattern
217 217 matches that file name:
218 218 >>> m(b'a')
219 219 True
220 220 >>> m(b'main.c')
221 221 True
222 222 >>> m(b'test.py')
223 223 False
224 224
225 225 2. Using the exact() method only returns True if the file name matches one
226 226 of the exact patterns (i.e. not re: or glob: patterns):
227 227 >>> m.exact(b'a')
228 228 True
229 229 >>> m.exact(b'main.c')
230 230 False
231 231 """
232 232 normalize = _donormalize
233 233 if icasefs:
234 234 dirstate = ctx.repo().dirstate
235 235 dsnormalize = dirstate.normalize
236 236
237 237 def normalize(patterns, default, root, cwd, auditor, warn):
238 238 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
239 239 kindpats = []
240 240 for kind, pats, source in kp:
241 241 if kind not in (b're', b'relre'): # regex can't be normalized
242 242 p = pats
243 243 pats = dsnormalize(pats)
244 244
245 245 # Preserve the original to handle a case only rename.
246 246 if p != pats and p in dirstate:
247 247 kindpats.append((kind, p, source))
248 248
249 249 kindpats.append((kind, pats, source))
250 250 return kindpats
251 251
252 252 if patterns:
253 253 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
254 254 if _kindpatsalwaysmatch(kindpats):
255 255 m = alwaysmatcher(badfn)
256 256 else:
257 257 m = _buildkindpatsmatcher(
258 258 patternmatcher,
259 259 root,
260 260 kindpats,
261 261 ctx=ctx,
262 262 listsubrepos=listsubrepos,
263 263 badfn=badfn,
264 264 )
265 265 else:
266 266 # It's a little strange that no patterns means to match everything.
267 267 # Consider changing this to match nothing (probably using nevermatcher).
268 268 m = alwaysmatcher(badfn)
269 269
270 270 if include:
271 271 kindpats = normalize(include, b'glob', root, cwd, auditor, warn)
272 272 im = _buildkindpatsmatcher(
273 273 includematcher,
274 274 root,
275 275 kindpats,
276 276 ctx=ctx,
277 277 listsubrepos=listsubrepos,
278 278 badfn=None,
279 279 )
280 280 m = intersectmatchers(m, im)
281 281 if exclude:
282 282 kindpats = normalize(exclude, b'glob', root, cwd, auditor, warn)
283 283 em = _buildkindpatsmatcher(
284 284 includematcher,
285 285 root,
286 286 kindpats,
287 287 ctx=ctx,
288 288 listsubrepos=listsubrepos,
289 289 badfn=None,
290 290 )
291 291 m = differencematcher(m, em)
292 292 return m
293 293
294 294
295 295 def exact(files, badfn=None):
296 296 return exactmatcher(files, badfn=badfn)
297 297
298 298
299 299 def always(badfn=None):
300 300 return alwaysmatcher(badfn)
301 301
302 302
303 303 def never(badfn=None):
304 304 return nevermatcher(badfn)
305 305
306 306
307 307 def badmatch(match, badfn):
308 308 """Make a copy of the given matcher, replacing its bad method with the given
309 309 one.
310 310 """
311 311 m = copy.copy(match)
312 312 m.bad = badfn
313 313 return m
314 314
315 315
316 316 def _donormalize(patterns, default, root, cwd, auditor=None, warn=None):
317 317 '''Convert 'kind:pat' from the patterns list to tuples with kind and
318 318 normalized and rooted patterns and with listfiles expanded.'''
319 319 kindpats = []
320 320 for kind, pat in [_patsplit(p, default) for p in patterns]:
321 321 if kind in cwdrelativepatternkinds:
322 322 pat = pathutil.canonpath(root, cwd, pat, auditor=auditor)
323 323 elif kind in (b'relglob', b'path', b'rootfilesin', b'rootglob'):
324 324 pat = util.normpath(pat)
325 325 elif kind in (b'listfile', b'listfile0'):
326 326 try:
327 327 files = util.readfile(pat)
328 328 if kind == b'listfile0':
329 329 files = files.split(b'\0')
330 330 else:
331 331 files = files.splitlines()
332 332 files = [f for f in files if f]
333 333 except EnvironmentError:
334 334 raise error.Abort(_(b"unable to read file list (%s)") % pat)
335 335 for k, p, source in _donormalize(
336 336 files, default, root, cwd, auditor, warn
337 337 ):
338 338 kindpats.append((k, p, pat))
339 339 continue
340 340 elif kind == b'include':
341 341 try:
342 342 fullpath = os.path.join(root, util.localpath(pat))
343 343 includepats = readpatternfile(fullpath, warn)
344 344 for k, p, source in _donormalize(
345 345 includepats, default, root, cwd, auditor, warn
346 346 ):
347 347 kindpats.append((k, p, source or pat))
348 348 except error.Abort as inst:
349 349 raise error.Abort(
350 350 b'%s: %s'
351 351 % (pat, inst[0]) # pytype: disable=unsupported-operands
352 352 )
353 353 except IOError as inst:
354 354 if warn:
355 355 warn(
356 356 _(b"skipping unreadable pattern file '%s': %s\n")
357 357 % (pat, stringutil.forcebytestr(inst.strerror))
358 358 )
359 359 continue
360 360 # else: re or relre - which cannot be normalized
361 361 kindpats.append((kind, pat, b''))
362 362 return kindpats
363 363
364 364
365 365 class basematcher(object):
366 366 def __init__(self, badfn=None):
367 367 if badfn is not None:
368 368 self.bad = badfn
369 369
370 370 def __call__(self, fn):
371 371 return self.matchfn(fn)
372 372
373 373 # Callbacks related to how the matcher is used by dirstate.walk.
374 374 # Subscribers to these events must monkeypatch the matcher object.
375 375 def bad(self, f, msg):
376 376 '''Callback from dirstate.walk for each explicit file that can't be
377 377 found/accessed, with an error message.'''
378 378
379 379 # If an explicitdir is set, it will be called when an explicitly listed
380 380 # directory is visited.
381 381 explicitdir = None
382 382
383 383 # If an traversedir is set, it will be called when a directory discovered
384 384 # by recursive traversal is visited.
385 385 traversedir = None
386 386
387 387 @propertycache
388 388 def _files(self):
389 389 return []
390 390
391 391 def files(self):
392 392 '''Explicitly listed files or patterns or roots:
393 393 if no patterns or .always(): empty list,
394 394 if exact: list exact files,
395 395 if not .anypats(): list all files and dirs,
396 396 else: optimal roots'''
397 397 return self._files
398 398
399 399 @propertycache
400 400 def _fileset(self):
401 401 return set(self._files)
402 402
403 403 def exact(self, f):
404 404 '''Returns True if f is in .files().'''
405 405 return f in self._fileset
406 406
407 407 def matchfn(self, f):
408 408 return False
409 409
410 410 def visitdir(self, dir):
411 411 '''Decides whether a directory should be visited based on whether it
412 412 has potential matches in it or one of its subdirectories. This is
413 413 based on the match's primary, included, and excluded patterns.
414 414
415 415 Returns the string 'all' if the given directory and all subdirectories
416 416 should be visited. Otherwise returns True or False indicating whether
417 417 the given directory should be visited.
418 418 '''
419 419 return True
420 420
421 421 def visitchildrenset(self, dir):
422 422 '''Decides whether a directory should be visited based on whether it
423 423 has potential matches in it or one of its subdirectories, and
424 424 potentially lists which subdirectories of that directory should be
425 425 visited. This is based on the match's primary, included, and excluded
426 426 patterns.
427 427
428 428 This function is very similar to 'visitdir', and the following mapping
429 429 can be applied:
430 430
431 431 visitdir | visitchildrenlist
432 432 ----------+-------------------
433 433 False | set()
434 434 'all' | 'all'
435 435 True | 'this' OR non-empty set of subdirs -or files- to visit
436 436
437 437 Example:
438 438 Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return
439 439 the following values (assuming the implementation of visitchildrenset
440 440 is capable of recognizing this; some implementations are not).
441 441
442 442 '' -> {'foo', 'qux'}
443 443 'baz' -> set()
444 444 'foo' -> {'bar'}
445 445 # Ideally this would be 'all', but since the prefix nature of matchers
446 446 # is applied to the entire matcher, we have to downgrade this to
447 447 # 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed
448 448 # in.
449 449 'foo/bar' -> 'this'
450 450 'qux' -> 'this'
451 451
452 452 Important:
453 453 Most matchers do not know if they're representing files or
454 454 directories. They see ['path:dir/f'] and don't know whether 'f' is a
455 455 file or a directory, so visitchildrenset('dir') for most matchers will
456 456 return {'f'}, but if the matcher knows it's a file (like exactmatcher
457 457 does), it may return 'this'. Do not rely on the return being a set
458 458 indicating that there are no files in this dir to investigate (or
459 459 equivalently that if there are files to investigate in 'dir' that it
460 460 will always return 'this').
461 461 '''
462 462 return b'this'
463 463
464 464 def always(self):
465 465 '''Matcher will match everything and .files() will be empty --
466 466 optimization might be possible.'''
467 467 return False
468 468
469 469 def isexact(self):
470 470 '''Matcher will match exactly the list of files in .files() --
471 471 optimization might be possible.'''
472 472 return False
473 473
474 474 def prefix(self):
475 475 '''Matcher will match the paths in .files() recursively --
476 476 optimization might be possible.'''
477 477 return False
478 478
479 479 def anypats(self):
480 480 '''None of .always(), .isexact(), and .prefix() is true --
481 481 optimizations will be difficult.'''
482 482 return not self.always() and not self.isexact() and not self.prefix()
483 483
484 484
485 485 class alwaysmatcher(basematcher):
486 486 '''Matches everything.'''
487 487
488 488 def __init__(self, badfn=None):
489 489 super(alwaysmatcher, self).__init__(badfn)
490 490
491 491 def always(self):
492 492 return True
493 493
494 494 def matchfn(self, f):
495 495 return True
496 496
497 497 def visitdir(self, dir):
498 498 return b'all'
499 499
500 500 def visitchildrenset(self, dir):
501 501 return b'all'
502 502
503 503 def __repr__(self):
504 504 return r'<alwaysmatcher>'
505 505
506 506
507 507 class nevermatcher(basematcher):
508 508 '''Matches nothing.'''
509 509
510 510 def __init__(self, badfn=None):
511 511 super(nevermatcher, self).__init__(badfn)
512 512
513 513 # It's a little weird to say that the nevermatcher is an exact matcher
514 514 # or a prefix matcher, but it seems to make sense to let callers take
515 515 # fast paths based on either. There will be no exact matches, nor any
516 516 # prefixes (files() returns []), so fast paths iterating over them should
517 517 # be efficient (and correct).
518 518 def isexact(self):
519 519 return True
520 520
521 521 def prefix(self):
522 522 return True
523 523
524 524 def visitdir(self, dir):
525 525 return False
526 526
527 527 def visitchildrenset(self, dir):
528 528 return set()
529 529
530 530 def __repr__(self):
531 531 return r'<nevermatcher>'
532 532
533 533
534 534 class predicatematcher(basematcher):
535 535 """A matcher adapter for a simple boolean function"""
536 536
537 537 def __init__(self, predfn, predrepr=None, badfn=None):
538 538 super(predicatematcher, self).__init__(badfn)
539 539 self.matchfn = predfn
540 540 self._predrepr = predrepr
541 541
542 542 @encoding.strmethod
543 543 def __repr__(self):
544 544 s = stringutil.buildrepr(self._predrepr) or pycompat.byterepr(
545 545 self.matchfn
546 546 )
547 547 return b'<predicatenmatcher pred=%s>' % s
548 548
549 549
550 def normalizerootdir(dir, funcname):
551 if dir == b'.':
552 util.nouideprecwarn(
553 b"match.%s() no longer accepts '.', use '' instead." % funcname,
554 b'5.1',
555 )
556 return b''
557 return dir
558
559
560 550 class patternmatcher(basematcher):
561 551 """Matches a set of (kind, pat, source) against a 'root' directory.
562 552
563 553 >>> kindpats = [
564 554 ... (b're', br'.*\.c$', b''),
565 555 ... (b'path', b'foo/a', b''),
566 556 ... (b'relpath', b'b', b''),
567 557 ... (b'glob', b'*.h', b''),
568 558 ... ]
569 559 >>> m = patternmatcher(b'foo', kindpats)
570 560 >>> m(b'main.c') # matches re:.*\.c$
571 561 True
572 562 >>> m(b'b.txt')
573 563 False
574 564 >>> m(b'foo/a') # matches path:foo/a
575 565 True
576 566 >>> m(b'a') # does not match path:b, since 'root' is 'foo'
577 567 False
578 568 >>> m(b'b') # matches relpath:b, since 'root' is 'foo'
579 569 True
580 570 >>> m(b'lib.h') # matches glob:*.h
581 571 True
582 572
583 573 >>> m.files()
584 574 ['', 'foo/a', 'b', '']
585 575 >>> m.exact(b'foo/a')
586 576 True
587 577 >>> m.exact(b'b')
588 578 True
589 579 >>> m.exact(b'lib.h') # exact matches are for (rel)path kinds
590 580 False
591 581 """
592 582
593 583 def __init__(self, root, kindpats, badfn=None):
594 584 super(patternmatcher, self).__init__(badfn)
595 585
596 586 self._files = _explicitfiles(kindpats)
597 587 self._prefix = _prefix(kindpats)
598 588 self._pats, self.matchfn = _buildmatch(kindpats, b'$', root)
599 589
600 590 @propertycache
601 591 def _dirs(self):
602 592 return set(pathutil.dirs(self._fileset))
603 593
604 594 def visitdir(self, dir):
605 dir = normalizerootdir(dir, b'visitdir')
606 595 if self._prefix and dir in self._fileset:
607 596 return b'all'
608 597 return (
609 598 dir in self._fileset
610 599 or dir in self._dirs
611 600 or any(
612 601 parentdir in self._fileset for parentdir in util.finddirs(dir)
613 602 )
614 603 )
615 604
616 605 def visitchildrenset(self, dir):
617 606 ret = self.visitdir(dir)
618 607 if ret is True:
619 608 return b'this'
620 609 elif not ret:
621 610 return set()
622 611 assert ret == b'all'
623 612 return b'all'
624 613
625 614 def prefix(self):
626 615 return self._prefix
627 616
628 617 @encoding.strmethod
629 618 def __repr__(self):
630 619 return b'<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats)
631 620
632 621
633 622 # This is basically a reimplementation of pathutil.dirs that stores the
634 623 # children instead of just a count of them, plus a small optional optimization
635 624 # to avoid some directories we don't need.
636 625 class _dirchildren(object):
637 626 def __init__(self, paths, onlyinclude=None):
638 627 self._dirs = {}
639 628 self._onlyinclude = onlyinclude or []
640 629 addpath = self.addpath
641 630 for f in paths:
642 631 addpath(f)
643 632
644 633 def addpath(self, path):
645 634 if path == b'':
646 635 return
647 636 dirs = self._dirs
648 637 findsplitdirs = _dirchildren._findsplitdirs
649 638 for d, b in findsplitdirs(path):
650 639 if d not in self._onlyinclude:
651 640 continue
652 641 dirs.setdefault(d, set()).add(b)
653 642
654 643 @staticmethod
655 644 def _findsplitdirs(path):
656 645 # yields (dirname, basename) tuples, walking back to the root. This is
657 646 # very similar to util.finddirs, except:
658 647 # - produces a (dirname, basename) tuple, not just 'dirname'
659 648 # Unlike manifest._splittopdir, this does not suffix `dirname` with a
660 649 # slash.
661 650 oldpos = len(path)
662 651 pos = path.rfind(b'/')
663 652 while pos != -1:
664 653 yield path[:pos], path[pos + 1 : oldpos]
665 654 oldpos = pos
666 655 pos = path.rfind(b'/', 0, pos)
667 656 yield b'', path[:oldpos]
668 657
669 658 def get(self, path):
670 659 return self._dirs.get(path, set())
671 660
672 661
673 662 class includematcher(basematcher):
674 663 def __init__(self, root, kindpats, badfn=None):
675 664 super(includematcher, self).__init__(badfn)
676 665
677 666 self._pats, self.matchfn = _buildmatch(kindpats, b'(?:/|$)', root)
678 667 self._prefix = _prefix(kindpats)
679 668 roots, dirs, parents = _rootsdirsandparents(kindpats)
680 669 # roots are directories which are recursively included.
681 670 self._roots = set(roots)
682 671 # dirs are directories which are non-recursively included.
683 672 self._dirs = set(dirs)
684 673 # parents are directories which are non-recursively included because
685 674 # they are needed to get to items in _dirs or _roots.
686 675 self._parents = parents
687 676
688 677 def visitdir(self, dir):
689 dir = normalizerootdir(dir, b'visitdir')
690 678 if self._prefix and dir in self._roots:
691 679 return b'all'
692 680 return (
693 681 dir in self._roots
694 682 or dir in self._dirs
695 683 or dir in self._parents
696 684 or any(parentdir in self._roots for parentdir in util.finddirs(dir))
697 685 )
698 686
699 687 @propertycache
700 688 def _allparentschildren(self):
701 689 # It may seem odd that we add dirs, roots, and parents, and then
702 690 # restrict to only parents. This is to catch the case of:
703 691 # dirs = ['foo/bar']
704 692 # parents = ['foo']
705 693 # if we asked for the children of 'foo', but had only added
706 694 # self._parents, we wouldn't be able to respond ['bar'].
707 695 return _dirchildren(
708 696 itertools.chain(self._dirs, self._roots, self._parents),
709 697 onlyinclude=self._parents,
710 698 )
711 699
712 700 def visitchildrenset(self, dir):
713 701 if self._prefix and dir in self._roots:
714 702 return b'all'
715 703 # Note: this does *not* include the 'dir in self._parents' case from
716 704 # visitdir, that's handled below.
717 705 if (
718 706 b'' in self._roots
719 707 or dir in self._roots
720 708 or dir in self._dirs
721 709 or any(parentdir in self._roots for parentdir in util.finddirs(dir))
722 710 ):
723 711 return b'this'
724 712
725 713 if dir in self._parents:
726 714 return self._allparentschildren.get(dir) or set()
727 715 return set()
728 716
729 717 @encoding.strmethod
730 718 def __repr__(self):
731 719 return b'<includematcher includes=%r>' % pycompat.bytestr(self._pats)
732 720
733 721
734 722 class exactmatcher(basematcher):
735 723 r'''Matches the input files exactly. They are interpreted as paths, not
736 724 patterns (so no kind-prefixes).
737 725
738 726 >>> m = exactmatcher([b'a.txt', br're:.*\.c$'])
739 727 >>> m(b'a.txt')
740 728 True
741 729 >>> m(b'b.txt')
742 730 False
743 731
744 732 Input files that would be matched are exactly those returned by .files()
745 733 >>> m.files()
746 734 ['a.txt', 're:.*\\.c$']
747 735
748 736 So pattern 're:.*\.c$' is not considered as a regex, but as a file name
749 737 >>> m(b'main.c')
750 738 False
751 739 >>> m(br're:.*\.c$')
752 740 True
753 741 '''
754 742
755 743 def __init__(self, files, badfn=None):
756 744 super(exactmatcher, self).__init__(badfn)
757 745
758 746 if isinstance(files, list):
759 747 self._files = files
760 748 else:
761 749 self._files = list(files)
762 750
763 751 matchfn = basematcher.exact
764 752
765 753 @propertycache
766 754 def _dirs(self):
767 755 return set(pathutil.dirs(self._fileset))
768 756
769 757 def visitdir(self, dir):
770 dir = normalizerootdir(dir, b'visitdir')
771 758 return dir in self._dirs
772 759
773 760 def visitchildrenset(self, dir):
774 dir = normalizerootdir(dir, b'visitchildrenset')
775
776 761 if not self._fileset or dir not in self._dirs:
777 762 return set()
778 763
779 764 candidates = self._fileset | self._dirs - {b''}
780 765 if dir != b'':
781 766 d = dir + b'/'
782 767 candidates = set(c[len(d) :] for c in candidates if c.startswith(d))
783 768 # self._dirs includes all of the directories, recursively, so if
784 769 # we're attempting to match foo/bar/baz.txt, it'll have '', 'foo',
785 770 # 'foo/bar' in it. Thus we can safely ignore a candidate that has a
786 771 # '/' in it, indicating a it's for a subdir-of-a-subdir; the
787 772 # immediate subdir will be in there without a slash.
788 773 ret = {c for c in candidates if b'/' not in c}
789 774 # We really do not expect ret to be empty, since that would imply that
790 775 # there's something in _dirs that didn't have a file in _fileset.
791 776 assert ret
792 777 return ret
793 778
794 779 def isexact(self):
795 780 return True
796 781
797 782 @encoding.strmethod
798 783 def __repr__(self):
799 784 return b'<exactmatcher files=%r>' % self._files
800 785
801 786
802 787 class differencematcher(basematcher):
803 788 '''Composes two matchers by matching if the first matches and the second
804 789 does not.
805 790
806 791 The second matcher's non-matching-attributes (bad, explicitdir,
807 792 traversedir) are ignored.
808 793 '''
809 794
810 795 def __init__(self, m1, m2):
811 796 super(differencematcher, self).__init__()
812 797 self._m1 = m1
813 798 self._m2 = m2
814 799 self.bad = m1.bad
815 800 self.explicitdir = m1.explicitdir
816 801 self.traversedir = m1.traversedir
817 802
818 803 def matchfn(self, f):
819 804 return self._m1(f) and not self._m2(f)
820 805
821 806 @propertycache
822 807 def _files(self):
823 808 if self.isexact():
824 809 return [f for f in self._m1.files() if self(f)]
825 810 # If m1 is not an exact matcher, we can't easily figure out the set of
826 811 # files, because its files() are not always files. For example, if
827 812 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
828 813 # want to remove "dir" from the set even though it would match m2,
829 814 # because the "dir" in m1 may not be a file.
830 815 return self._m1.files()
831 816
832 817 def visitdir(self, dir):
833 818 if self._m2.visitdir(dir) == b'all':
834 819 return False
835 820 elif not self._m2.visitdir(dir):
836 821 # m2 does not match dir, we can return 'all' here if possible
837 822 return self._m1.visitdir(dir)
838 823 return bool(self._m1.visitdir(dir))
839 824
840 825 def visitchildrenset(self, dir):
841 826 m2_set = self._m2.visitchildrenset(dir)
842 827 if m2_set == b'all':
843 828 return set()
844 829 m1_set = self._m1.visitchildrenset(dir)
845 830 # Possible values for m1: 'all', 'this', set(...), set()
846 831 # Possible values for m2: 'this', set(...), set()
847 832 # If m2 has nothing under here that we care about, return m1, even if
848 833 # it's 'all'. This is a change in behavior from visitdir, which would
849 834 # return True, not 'all', for some reason.
850 835 if not m2_set:
851 836 return m1_set
852 837 if m1_set in [b'all', b'this']:
853 838 # Never return 'all' here if m2_set is any kind of non-empty (either
854 839 # 'this' or set(foo)), since m2 might return set() for a
855 840 # subdirectory.
856 841 return b'this'
857 842 # Possible values for m1: set(...), set()
858 843 # Possible values for m2: 'this', set(...)
859 844 # We ignore m2's set results. They're possibly incorrect:
860 845 # m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset(''):
861 846 # m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd
862 847 # return set(), which is *not* correct, we still need to visit 'dir'!
863 848 return m1_set
864 849
865 850 def isexact(self):
866 851 return self._m1.isexact()
867 852
868 853 @encoding.strmethod
869 854 def __repr__(self):
870 855 return b'<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2)
871 856
872 857
873 858 def intersectmatchers(m1, m2):
874 859 '''Composes two matchers by matching if both of them match.
875 860
876 861 The second matcher's non-matching-attributes (bad, explicitdir,
877 862 traversedir) are ignored.
878 863 '''
879 864 if m1 is None or m2 is None:
880 865 return m1 or m2
881 866 if m1.always():
882 867 m = copy.copy(m2)
883 868 # TODO: Consider encapsulating these things in a class so there's only
884 869 # one thing to copy from m1.
885 870 m.bad = m1.bad
886 871 m.explicitdir = m1.explicitdir
887 872 m.traversedir = m1.traversedir
888 873 return m
889 874 if m2.always():
890 875 m = copy.copy(m1)
891 876 return m
892 877 return intersectionmatcher(m1, m2)
893 878
894 879
895 880 class intersectionmatcher(basematcher):
896 881 def __init__(self, m1, m2):
897 882 super(intersectionmatcher, self).__init__()
898 883 self._m1 = m1
899 884 self._m2 = m2
900 885 self.bad = m1.bad
901 886 self.explicitdir = m1.explicitdir
902 887 self.traversedir = m1.traversedir
903 888
904 889 @propertycache
905 890 def _files(self):
906 891 if self.isexact():
907 892 m1, m2 = self._m1, self._m2
908 893 if not m1.isexact():
909 894 m1, m2 = m2, m1
910 895 return [f for f in m1.files() if m2(f)]
911 896 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
912 897 # the set of files, because their files() are not always files. For
913 898 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
914 899 # "path:dir2", we don't want to remove "dir2" from the set.
915 900 return self._m1.files() + self._m2.files()
916 901
917 902 def matchfn(self, f):
918 903 return self._m1(f) and self._m2(f)
919 904
920 905 def visitdir(self, dir):
921 906 visit1 = self._m1.visitdir(dir)
922 907 if visit1 == b'all':
923 908 return self._m2.visitdir(dir)
924 909 # bool() because visit1=True + visit2='all' should not be 'all'
925 910 return bool(visit1 and self._m2.visitdir(dir))
926 911
927 912 def visitchildrenset(self, dir):
928 913 m1_set = self._m1.visitchildrenset(dir)
929 914 if not m1_set:
930 915 return set()
931 916 m2_set = self._m2.visitchildrenset(dir)
932 917 if not m2_set:
933 918 return set()
934 919
935 920 if m1_set == b'all':
936 921 return m2_set
937 922 elif m2_set == b'all':
938 923 return m1_set
939 924
940 925 if m1_set == b'this' or m2_set == b'this':
941 926 return b'this'
942 927
943 928 assert isinstance(m1_set, set) and isinstance(m2_set, set)
944 929 return m1_set.intersection(m2_set)
945 930
946 931 def always(self):
947 932 return self._m1.always() and self._m2.always()
948 933
949 934 def isexact(self):
950 935 return self._m1.isexact() or self._m2.isexact()
951 936
952 937 @encoding.strmethod
953 938 def __repr__(self):
954 939 return b'<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2)
955 940
956 941
957 942 class subdirmatcher(basematcher):
958 943 """Adapt a matcher to work on a subdirectory only.
959 944
960 945 The paths are remapped to remove/insert the path as needed:
961 946
962 947 >>> from . import pycompat
963 948 >>> m1 = match(b'root', b'', [b'a.txt', b'sub/b.txt'])
964 949 >>> m2 = subdirmatcher(b'sub', m1)
965 950 >>> m2(b'a.txt')
966 951 False
967 952 >>> m2(b'b.txt')
968 953 True
969 954 >>> m2.matchfn(b'a.txt')
970 955 False
971 956 >>> m2.matchfn(b'b.txt')
972 957 True
973 958 >>> m2.files()
974 959 ['b.txt']
975 960 >>> m2.exact(b'b.txt')
976 961 True
977 962 >>> def bad(f, msg):
978 963 ... print(pycompat.sysstr(b"%s: %s" % (f, msg)))
979 964 >>> m1.bad = bad
980 965 >>> m2.bad(b'x.txt', b'No such file')
981 966 sub/x.txt: No such file
982 967 """
983 968
984 969 def __init__(self, path, matcher):
985 970 super(subdirmatcher, self).__init__()
986 971 self._path = path
987 972 self._matcher = matcher
988 973 self._always = matcher.always()
989 974
990 975 self._files = [
991 976 f[len(path) + 1 :]
992 977 for f in matcher._files
993 978 if f.startswith(path + b"/")
994 979 ]
995 980
996 981 # If the parent repo had a path to this subrepo and the matcher is
997 982 # a prefix matcher, this submatcher always matches.
998 983 if matcher.prefix():
999 984 self._always = any(f == path for f in matcher._files)
1000 985
1001 986 def bad(self, f, msg):
1002 987 self._matcher.bad(self._path + b"/" + f, msg)
1003 988
1004 989 def matchfn(self, f):
1005 990 # Some information is lost in the superclass's constructor, so we
1006 991 # can not accurately create the matching function for the subdirectory
1007 992 # from the inputs. Instead, we override matchfn() and visitdir() to
1008 993 # call the original matcher with the subdirectory path prepended.
1009 994 return self._matcher.matchfn(self._path + b"/" + f)
1010 995
1011 996 def visitdir(self, dir):
1012 dir = normalizerootdir(dir, b'visitdir')
1013 997 if dir == b'':
1014 998 dir = self._path
1015 999 else:
1016 1000 dir = self._path + b"/" + dir
1017 1001 return self._matcher.visitdir(dir)
1018 1002
1019 1003 def visitchildrenset(self, dir):
1020 dir = normalizerootdir(dir, b'visitchildrenset')
1021 1004 if dir == b'':
1022 1005 dir = self._path
1023 1006 else:
1024 1007 dir = self._path + b"/" + dir
1025 1008 return self._matcher.visitchildrenset(dir)
1026 1009
1027 1010 def always(self):
1028 1011 return self._always
1029 1012
1030 1013 def prefix(self):
1031 1014 return self._matcher.prefix() and not self._always
1032 1015
1033 1016 @encoding.strmethod
1034 1017 def __repr__(self):
1035 1018 return b'<subdirmatcher path=%r, matcher=%r>' % (
1036 1019 self._path,
1037 1020 self._matcher,
1038 1021 )
1039 1022
1040 1023
1041 1024 class prefixdirmatcher(basematcher):
1042 1025 """Adapt a matcher to work on a parent directory.
1043 1026
1044 1027 The matcher's non-matching-attributes (bad, explicitdir, traversedir) are
1045 1028 ignored.
1046 1029
1047 1030 The prefix path should usually be the relative path from the root of
1048 1031 this matcher to the root of the wrapped matcher.
1049 1032
1050 1033 >>> m1 = match(util.localpath(b'root/d/e'), b'f', [b'../a.txt', b'b.txt'])
1051 1034 >>> m2 = prefixdirmatcher(b'd/e', m1)
1052 1035 >>> m2(b'a.txt')
1053 1036 False
1054 1037 >>> m2(b'd/e/a.txt')
1055 1038 True
1056 1039 >>> m2(b'd/e/b.txt')
1057 1040 False
1058 1041 >>> m2.files()
1059 1042 ['d/e/a.txt', 'd/e/f/b.txt']
1060 1043 >>> m2.exact(b'd/e/a.txt')
1061 1044 True
1062 1045 >>> m2.visitdir(b'd')
1063 1046 True
1064 1047 >>> m2.visitdir(b'd/e')
1065 1048 True
1066 1049 >>> m2.visitdir(b'd/e/f')
1067 1050 True
1068 1051 >>> m2.visitdir(b'd/e/g')
1069 1052 False
1070 1053 >>> m2.visitdir(b'd/ef')
1071 1054 False
1072 1055 """
1073 1056
1074 1057 def __init__(self, path, matcher, badfn=None):
1075 1058 super(prefixdirmatcher, self).__init__(badfn)
1076 1059 if not path:
1077 1060 raise error.ProgrammingError(b'prefix path must not be empty')
1078 1061 self._path = path
1079 1062 self._pathprefix = path + b'/'
1080 1063 self._matcher = matcher
1081 1064
1082 1065 @propertycache
1083 1066 def _files(self):
1084 1067 return [self._pathprefix + f for f in self._matcher._files]
1085 1068
1086 1069 def matchfn(self, f):
1087 1070 if not f.startswith(self._pathprefix):
1088 1071 return False
1089 1072 return self._matcher.matchfn(f[len(self._pathprefix) :])
1090 1073
1091 1074 @propertycache
1092 1075 def _pathdirs(self):
1093 1076 return set(util.finddirs(self._path))
1094 1077
1095 1078 def visitdir(self, dir):
1096 1079 if dir == self._path:
1097 1080 return self._matcher.visitdir(b'')
1098 1081 if dir.startswith(self._pathprefix):
1099 1082 return self._matcher.visitdir(dir[len(self._pathprefix) :])
1100 1083 return dir in self._pathdirs
1101 1084
1102 1085 def visitchildrenset(self, dir):
1103 1086 if dir == self._path:
1104 1087 return self._matcher.visitchildrenset(b'')
1105 1088 if dir.startswith(self._pathprefix):
1106 1089 return self._matcher.visitchildrenset(dir[len(self._pathprefix) :])
1107 1090 if dir in self._pathdirs:
1108 1091 return b'this'
1109 1092 return set()
1110 1093
1111 1094 def isexact(self):
1112 1095 return self._matcher.isexact()
1113 1096
1114 1097 def prefix(self):
1115 1098 return self._matcher.prefix()
1116 1099
1117 1100 @encoding.strmethod
1118 1101 def __repr__(self):
1119 1102 return b'<prefixdirmatcher path=%r, matcher=%r>' % (
1120 1103 pycompat.bytestr(self._path),
1121 1104 self._matcher,
1122 1105 )
1123 1106
1124 1107
1125 1108 class unionmatcher(basematcher):
1126 1109 """A matcher that is the union of several matchers.
1127 1110
1128 1111 The non-matching-attributes (bad, explicitdir, traversedir) are taken from
1129 1112 the first matcher.
1130 1113 """
1131 1114
1132 1115 def __init__(self, matchers):
1133 1116 m1 = matchers[0]
1134 1117 super(unionmatcher, self).__init__()
1135 1118 self.explicitdir = m1.explicitdir
1136 1119 self.traversedir = m1.traversedir
1137 1120 self._matchers = matchers
1138 1121
1139 1122 def matchfn(self, f):
1140 1123 for match in self._matchers:
1141 1124 if match(f):
1142 1125 return True
1143 1126 return False
1144 1127
1145 1128 def visitdir(self, dir):
1146 1129 r = False
1147 1130 for m in self._matchers:
1148 1131 v = m.visitdir(dir)
1149 1132 if v == b'all':
1150 1133 return v
1151 1134 r |= v
1152 1135 return r
1153 1136
1154 1137 def visitchildrenset(self, dir):
1155 1138 r = set()
1156 1139 this = False
1157 1140 for m in self._matchers:
1158 1141 v = m.visitchildrenset(dir)
1159 1142 if not v:
1160 1143 continue
1161 1144 if v == b'all':
1162 1145 return v
1163 1146 if this or v == b'this':
1164 1147 this = True
1165 1148 # don't break, we might have an 'all' in here.
1166 1149 continue
1167 1150 assert isinstance(v, set)
1168 1151 r = r.union(v)
1169 1152 if this:
1170 1153 return b'this'
1171 1154 return r
1172 1155
1173 1156 @encoding.strmethod
1174 1157 def __repr__(self):
1175 1158 return b'<unionmatcher matchers=%r>' % self._matchers
1176 1159
1177 1160
1178 1161 def patkind(pattern, default=None):
1179 1162 '''If pattern is 'kind:pat' with a known kind, return kind.
1180 1163
1181 1164 >>> patkind(br're:.*\.c$')
1182 1165 're'
1183 1166 >>> patkind(b'glob:*.c')
1184 1167 'glob'
1185 1168 >>> patkind(b'relpath:test.py')
1186 1169 'relpath'
1187 1170 >>> patkind(b'main.py')
1188 1171 >>> patkind(b'main.py', default=b're')
1189 1172 're'
1190 1173 '''
1191 1174 return _patsplit(pattern, default)[0]
1192 1175
1193 1176
1194 1177 def _patsplit(pattern, default):
1195 1178 """Split a string into the optional pattern kind prefix and the actual
1196 1179 pattern."""
1197 1180 if b':' in pattern:
1198 1181 kind, pat = pattern.split(b':', 1)
1199 1182 if kind in allpatternkinds:
1200 1183 return kind, pat
1201 1184 return default, pattern
1202 1185
1203 1186
1204 1187 def _globre(pat):
1205 1188 r'''Convert an extended glob string to a regexp string.
1206 1189
1207 1190 >>> from . import pycompat
1208 1191 >>> def bprint(s):
1209 1192 ... print(pycompat.sysstr(s))
1210 1193 >>> bprint(_globre(br'?'))
1211 1194 .
1212 1195 >>> bprint(_globre(br'*'))
1213 1196 [^/]*
1214 1197 >>> bprint(_globre(br'**'))
1215 1198 .*
1216 1199 >>> bprint(_globre(br'**/a'))
1217 1200 (?:.*/)?a
1218 1201 >>> bprint(_globre(br'a/**/b'))
1219 1202 a/(?:.*/)?b
1220 1203 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
1221 1204 [a*?!^][\^b][^c]
1222 1205 >>> bprint(_globre(br'{a,b}'))
1223 1206 (?:a|b)
1224 1207 >>> bprint(_globre(br'.\*\?'))
1225 1208 \.\*\?
1226 1209 '''
1227 1210 i, n = 0, len(pat)
1228 1211 res = b''
1229 1212 group = 0
1230 1213 escape = util.stringutil.regexbytesescapemap.get
1231 1214
1232 1215 def peek():
1233 1216 return i < n and pat[i : i + 1]
1234 1217
1235 1218 while i < n:
1236 1219 c = pat[i : i + 1]
1237 1220 i += 1
1238 1221 if c not in b'*?[{},\\':
1239 1222 res += escape(c, c)
1240 1223 elif c == b'*':
1241 1224 if peek() == b'*':
1242 1225 i += 1
1243 1226 if peek() == b'/':
1244 1227 i += 1
1245 1228 res += b'(?:.*/)?'
1246 1229 else:
1247 1230 res += b'.*'
1248 1231 else:
1249 1232 res += b'[^/]*'
1250 1233 elif c == b'?':
1251 1234 res += b'.'
1252 1235 elif c == b'[':
1253 1236 j = i
1254 1237 if j < n and pat[j : j + 1] in b'!]':
1255 1238 j += 1
1256 1239 while j < n and pat[j : j + 1] != b']':
1257 1240 j += 1
1258 1241 if j >= n:
1259 1242 res += b'\\['
1260 1243 else:
1261 1244 stuff = pat[i:j].replace(b'\\', b'\\\\')
1262 1245 i = j + 1
1263 1246 if stuff[0:1] == b'!':
1264 1247 stuff = b'^' + stuff[1:]
1265 1248 elif stuff[0:1] == b'^':
1266 1249 stuff = b'\\' + stuff
1267 1250 res = b'%s[%s]' % (res, stuff)
1268 1251 elif c == b'{':
1269 1252 group += 1
1270 1253 res += b'(?:'
1271 1254 elif c == b'}' and group:
1272 1255 res += b')'
1273 1256 group -= 1
1274 1257 elif c == b',' and group:
1275 1258 res += b'|'
1276 1259 elif c == b'\\':
1277 1260 p = peek()
1278 1261 if p:
1279 1262 i += 1
1280 1263 res += escape(p, p)
1281 1264 else:
1282 1265 res += escape(c, c)
1283 1266 else:
1284 1267 res += escape(c, c)
1285 1268 return res
1286 1269
1287 1270
1288 1271 def _regex(kind, pat, globsuffix):
1289 1272 '''Convert a (normalized) pattern of any kind into a
1290 1273 regular expression.
1291 1274 globsuffix is appended to the regexp of globs.'''
1292 1275
1293 1276 if rustmod is not None:
1294 1277 try:
1295 1278 return rustmod.build_single_regex(kind, pat, globsuffix)
1296 1279 except rustmod.PatternError:
1297 1280 raise error.ProgrammingError(
1298 1281 b'not a regex pattern: %s:%s' % (kind, pat)
1299 1282 )
1300 1283
1301 1284 if not pat and kind in (b'glob', b'relpath'):
1302 1285 return b''
1303 1286 if kind == b're':
1304 1287 return pat
1305 1288 if kind in (b'path', b'relpath'):
1306 1289 if pat == b'.':
1307 1290 return b''
1308 1291 return util.stringutil.reescape(pat) + b'(?:/|$)'
1309 1292 if kind == b'rootfilesin':
1310 1293 if pat == b'.':
1311 1294 escaped = b''
1312 1295 else:
1313 1296 # Pattern is a directory name.
1314 1297 escaped = util.stringutil.reescape(pat) + b'/'
1315 1298 # Anything after the pattern must be a non-directory.
1316 1299 return escaped + b'[^/]+$'
1317 1300 if kind == b'relglob':
1318 1301 globre = _globre(pat)
1319 1302 if globre.startswith(b'[^/]*'):
1320 1303 # When pat has the form *XYZ (common), make the returned regex more
1321 1304 # legible by returning the regex for **XYZ instead of **/*XYZ.
1322 1305 return b'.*' + globre[len(b'[^/]*') :] + globsuffix
1323 1306 return b'(?:|.*/)' + globre + globsuffix
1324 1307 if kind == b'relre':
1325 1308 if pat.startswith(b'^'):
1326 1309 return pat
1327 1310 return b'.*' + pat
1328 1311 if kind in (b'glob', b'rootglob'):
1329 1312 return _globre(pat) + globsuffix
1330 1313 raise error.ProgrammingError(b'not a regex pattern: %s:%s' % (kind, pat))
1331 1314
1332 1315
1333 1316 def _buildmatch(kindpats, globsuffix, root):
1334 1317 '''Return regexp string and a matcher function for kindpats.
1335 1318 globsuffix is appended to the regexp of globs.'''
1336 1319 matchfuncs = []
1337 1320
1338 1321 subincludes, kindpats = _expandsubinclude(kindpats, root)
1339 1322 if subincludes:
1340 1323 submatchers = {}
1341 1324
1342 1325 def matchsubinclude(f):
1343 1326 for prefix, matcherargs in subincludes:
1344 1327 if f.startswith(prefix):
1345 1328 mf = submatchers.get(prefix)
1346 1329 if mf is None:
1347 1330 mf = match(*matcherargs)
1348 1331 submatchers[prefix] = mf
1349 1332
1350 1333 if mf(f[len(prefix) :]):
1351 1334 return True
1352 1335 return False
1353 1336
1354 1337 matchfuncs.append(matchsubinclude)
1355 1338
1356 1339 regex = b''
1357 1340 if kindpats:
1358 1341 if all(k == b'rootfilesin' for k, p, s in kindpats):
1359 1342 dirs = {p for k, p, s in kindpats}
1360 1343
1361 1344 def mf(f):
1362 1345 i = f.rfind(b'/')
1363 1346 if i >= 0:
1364 1347 dir = f[:i]
1365 1348 else:
1366 1349 dir = b'.'
1367 1350 return dir in dirs
1368 1351
1369 1352 regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))
1370 1353 matchfuncs.append(mf)
1371 1354 else:
1372 1355 regex, mf = _buildregexmatch(kindpats, globsuffix)
1373 1356 matchfuncs.append(mf)
1374 1357
1375 1358 if len(matchfuncs) == 1:
1376 1359 return regex, matchfuncs[0]
1377 1360 else:
1378 1361 return regex, lambda f: any(mf(f) for mf in matchfuncs)
1379 1362
1380 1363
1381 1364 MAX_RE_SIZE = 20000
1382 1365
1383 1366
1384 1367 def _joinregexes(regexps):
1385 1368 """gather multiple regular expressions into a single one"""
1386 1369 return b'|'.join(regexps)
1387 1370
1388 1371
1389 1372 def _buildregexmatch(kindpats, globsuffix):
1390 1373 """Build a match function from a list of kinds and kindpats,
1391 1374 return regexp string and a matcher function.
1392 1375
1393 1376 Test too large input
1394 1377 >>> _buildregexmatch([
1395 1378 ... (b'relglob', b'?' * MAX_RE_SIZE, b'')
1396 1379 ... ], b'$')
1397 1380 Traceback (most recent call last):
1398 1381 ...
1399 1382 Abort: matcher pattern is too long (20009 bytes)
1400 1383 """
1401 1384 try:
1402 1385 allgroups = []
1403 1386 regexps = [_regex(k, p, globsuffix) for (k, p, s) in kindpats]
1404 1387 fullregexp = _joinregexes(regexps)
1405 1388
1406 1389 startidx = 0
1407 1390 groupsize = 0
1408 1391 for idx, r in enumerate(regexps):
1409 1392 piecesize = len(r)
1410 1393 if piecesize > MAX_RE_SIZE:
1411 1394 msg = _(b"matcher pattern is too long (%d bytes)") % piecesize
1412 1395 raise error.Abort(msg)
1413 1396 elif (groupsize + piecesize) > MAX_RE_SIZE:
1414 1397 group = regexps[startidx:idx]
1415 1398 allgroups.append(_joinregexes(group))
1416 1399 startidx = idx
1417 1400 groupsize = 0
1418 1401 groupsize += piecesize + 1
1419 1402
1420 1403 if startidx == 0:
1421 1404 matcher = _rematcher(fullregexp)
1422 1405 func = lambda s: bool(matcher(s))
1423 1406 else:
1424 1407 group = regexps[startidx:]
1425 1408 allgroups.append(_joinregexes(group))
1426 1409 allmatchers = [_rematcher(g) for g in allgroups]
1427 1410 func = lambda s: any(m(s) for m in allmatchers)
1428 1411 return fullregexp, func
1429 1412 except re.error:
1430 1413 for k, p, s in kindpats:
1431 1414 try:
1432 1415 _rematcher(_regex(k, p, globsuffix))
1433 1416 except re.error:
1434 1417 if s:
1435 1418 raise error.Abort(
1436 1419 _(b"%s: invalid pattern (%s): %s") % (s, k, p)
1437 1420 )
1438 1421 else:
1439 1422 raise error.Abort(_(b"invalid pattern (%s): %s") % (k, p))
1440 1423 raise error.Abort(_(b"invalid pattern"))
1441 1424
1442 1425
1443 1426 def _patternrootsanddirs(kindpats):
1444 1427 '''Returns roots and directories corresponding to each pattern.
1445 1428
1446 1429 This calculates the roots and directories exactly matching the patterns and
1447 1430 returns a tuple of (roots, dirs) for each. It does not return other
1448 1431 directories which may also need to be considered, like the parent
1449 1432 directories.
1450 1433 '''
1451 1434 r = []
1452 1435 d = []
1453 1436 for kind, pat, source in kindpats:
1454 1437 if kind in (b'glob', b'rootglob'): # find the non-glob prefix
1455 1438 root = []
1456 1439 for p in pat.split(b'/'):
1457 1440 if b'[' in p or b'{' in p or b'*' in p or b'?' in p:
1458 1441 break
1459 1442 root.append(p)
1460 1443 r.append(b'/'.join(root))
1461 1444 elif kind in (b'relpath', b'path'):
1462 1445 if pat == b'.':
1463 1446 pat = b''
1464 1447 r.append(pat)
1465 1448 elif kind in (b'rootfilesin',):
1466 1449 if pat == b'.':
1467 1450 pat = b''
1468 1451 d.append(pat)
1469 1452 else: # relglob, re, relre
1470 1453 r.append(b'')
1471 1454 return r, d
1472 1455
1473 1456
1474 1457 def _roots(kindpats):
1475 1458 '''Returns root directories to match recursively from the given patterns.'''
1476 1459 roots, dirs = _patternrootsanddirs(kindpats)
1477 1460 return roots
1478 1461
1479 1462
1480 1463 def _rootsdirsandparents(kindpats):
1481 1464 '''Returns roots and exact directories from patterns.
1482 1465
1483 1466 `roots` are directories to match recursively, `dirs` should
1484 1467 be matched non-recursively, and `parents` are the implicitly required
1485 1468 directories to walk to items in either roots or dirs.
1486 1469
1487 1470 Returns a tuple of (roots, dirs, parents).
1488 1471
1489 1472 >>> r = _rootsdirsandparents(
1490 1473 ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
1491 1474 ... (b'glob', b'g*', b'')])
1492 1475 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1493 1476 (['g/h', 'g/h', ''], []) ['', 'g']
1494 1477 >>> r = _rootsdirsandparents(
1495 1478 ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
1496 1479 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1497 1480 ([], ['g/h', '']) ['', 'g']
1498 1481 >>> r = _rootsdirsandparents(
1499 1482 ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
1500 1483 ... (b'path', b'', b'')])
1501 1484 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1502 1485 (['r', 'p/p', ''], []) ['', 'p']
1503 1486 >>> r = _rootsdirsandparents(
1504 1487 ... [(b'relglob', b'rg*', b''), (b're', b're/', b''),
1505 1488 ... (b'relre', b'rr', b'')])
1506 1489 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1507 1490 (['', '', ''], []) ['']
1508 1491 '''
1509 1492 r, d = _patternrootsanddirs(kindpats)
1510 1493
1511 1494 p = set()
1512 1495 # Add the parents as non-recursive/exact directories, since they must be
1513 1496 # scanned to get to either the roots or the other exact directories.
1514 1497 p.update(pathutil.dirs(d))
1515 1498 p.update(pathutil.dirs(r))
1516 1499
1517 1500 # FIXME: all uses of this function convert these to sets, do so before
1518 1501 # returning.
1519 1502 # FIXME: all uses of this function do not need anything in 'roots' and
1520 1503 # 'dirs' to also be in 'parents', consider removing them before returning.
1521 1504 return r, d, p
1522 1505
1523 1506
1524 1507 def _explicitfiles(kindpats):
1525 1508 '''Returns the potential explicit filenames from the patterns.
1526 1509
1527 1510 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
1528 1511 ['foo/bar']
1529 1512 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
1530 1513 []
1531 1514 '''
1532 1515 # Keep only the pattern kinds where one can specify filenames (vs only
1533 1516 # directory names).
1534 1517 filable = [kp for kp in kindpats if kp[0] not in (b'rootfilesin',)]
1535 1518 return _roots(filable)
1536 1519
1537 1520
1538 1521 def _prefix(kindpats):
1539 1522 '''Whether all the patterns match a prefix (i.e. recursively)'''
1540 1523 for kind, pat, source in kindpats:
1541 1524 if kind not in (b'path', b'relpath'):
1542 1525 return False
1543 1526 return True
1544 1527
1545 1528
1546 1529 _commentre = None
1547 1530
1548 1531
1549 1532 def readpatternfile(filepath, warn, sourceinfo=False):
1550 1533 '''parse a pattern file, returning a list of
1551 1534 patterns. These patterns should be given to compile()
1552 1535 to be validated and converted into a match function.
1553 1536
1554 1537 trailing white space is dropped.
1555 1538 the escape character is backslash.
1556 1539 comments start with #.
1557 1540 empty lines are skipped.
1558 1541
1559 1542 lines can be of the following formats:
1560 1543
1561 1544 syntax: regexp # defaults following lines to non-rooted regexps
1562 1545 syntax: glob # defaults following lines to non-rooted globs
1563 1546 re:pattern # non-rooted regular expression
1564 1547 glob:pattern # non-rooted glob
1565 1548 rootglob:pat # rooted glob (same root as ^ in regexps)
1566 1549 pattern # pattern of the current default type
1567 1550
1568 1551 if sourceinfo is set, returns a list of tuples:
1569 1552 (pattern, lineno, originalline).
1570 1553 This is useful to debug ignore patterns.
1571 1554 '''
1572 1555
1573 1556 if rustmod is not None:
1574 1557 result, warnings = rustmod.read_pattern_file(
1575 1558 filepath, bool(warn), sourceinfo,
1576 1559 )
1577 1560
1578 1561 for warning_params in warnings:
1579 1562 # Can't be easily emitted from Rust, because it would require
1580 1563 # a mechanism for both gettext and calling the `warn` function.
1581 1564 warn(_(b"%s: ignoring invalid syntax '%s'\n") % warning_params)
1582 1565
1583 1566 return result
1584 1567
1585 1568 syntaxes = {
1586 1569 b're': b'relre:',
1587 1570 b'regexp': b'relre:',
1588 1571 b'glob': b'relglob:',
1589 1572 b'rootglob': b'rootglob:',
1590 1573 b'include': b'include',
1591 1574 b'subinclude': b'subinclude',
1592 1575 }
1593 1576 syntax = b'relre:'
1594 1577 patterns = []
1595 1578
1596 1579 fp = open(filepath, b'rb')
1597 1580 for lineno, line in enumerate(util.iterfile(fp), start=1):
1598 1581 if b"#" in line:
1599 1582 global _commentre
1600 1583 if not _commentre:
1601 1584 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
1602 1585 # remove comments prefixed by an even number of escapes
1603 1586 m = _commentre.search(line)
1604 1587 if m:
1605 1588 line = line[: m.end(1)]
1606 1589 # fixup properly escaped comments that survived the above
1607 1590 line = line.replace(b"\\#", b"#")
1608 1591 line = line.rstrip()
1609 1592 if not line:
1610 1593 continue
1611 1594
1612 1595 if line.startswith(b'syntax:'):
1613 1596 s = line[7:].strip()
1614 1597 try:
1615 1598 syntax = syntaxes[s]
1616 1599 except KeyError:
1617 1600 if warn:
1618 1601 warn(
1619 1602 _(b"%s: ignoring invalid syntax '%s'\n") % (filepath, s)
1620 1603 )
1621 1604 continue
1622 1605
1623 1606 linesyntax = syntax
1624 1607 for s, rels in pycompat.iteritems(syntaxes):
1625 1608 if line.startswith(rels):
1626 1609 linesyntax = rels
1627 1610 line = line[len(rels) :]
1628 1611 break
1629 1612 elif line.startswith(s + b':'):
1630 1613 linesyntax = rels
1631 1614 line = line[len(s) + 1 :]
1632 1615 break
1633 1616 if sourceinfo:
1634 1617 patterns.append((linesyntax + line, lineno, line))
1635 1618 else:
1636 1619 patterns.append(linesyntax + line)
1637 1620 fp.close()
1638 1621 return patterns
General Comments 0
You need to be logged in to leave comments. Login now