##// END OF EJS Templates
match: normalize `cwd` early...
Martin von Zweigbergk -
r44402:5685ce2e default
parent child Browse files
Show More
@@ -1,1615 +1,1616 b''
1 1 # match.py - filename matching
2 2 #
3 3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import, print_function
9 9
10 10 import copy
11 11 import itertools
12 12 import os
13 13 import re
14 14
15 15 from .i18n import _
16 16 from .pycompat import open
17 17 from . import (
18 18 encoding,
19 19 error,
20 20 pathutil,
21 21 policy,
22 22 pycompat,
23 23 util,
24 24 )
25 25 from .utils import stringutil
26 26
27 27 rustmod = policy.importrust('filepatterns')
28 28
29 29 allpatternkinds = (
30 30 b're',
31 31 b'glob',
32 32 b'path',
33 33 b'relglob',
34 34 b'relpath',
35 35 b'relre',
36 36 b'rootglob',
37 37 b'listfile',
38 38 b'listfile0',
39 39 b'set',
40 40 b'include',
41 41 b'subinclude',
42 42 b'rootfilesin',
43 43 )
44 44 cwdrelativepatternkinds = (b'relpath', b'glob')
45 45
46 46 propertycache = util.propertycache
47 47
48 48
49 49 def _rematcher(regex):
50 50 '''compile the regexp with the best available regexp engine and return a
51 51 matcher function'''
52 52 m = util.re.compile(regex)
53 53 try:
54 54 # slightly faster, provided by facebook's re2 bindings
55 55 return m.test_match
56 56 except AttributeError:
57 57 return m.match
58 58
59 59
60 60 def _expandsets(kindpats, ctx=None, listsubrepos=False, badfn=None):
61 61 '''Returns the kindpats list with the 'set' patterns expanded to matchers'''
62 62 matchers = []
63 63 other = []
64 64
65 65 for kind, pat, source in kindpats:
66 66 if kind == b'set':
67 67 if ctx is None:
68 68 raise error.ProgrammingError(
69 69 b"fileset expression with no context"
70 70 )
71 71 matchers.append(ctx.matchfileset(pat, badfn=badfn))
72 72
73 73 if listsubrepos:
74 74 for subpath in ctx.substate:
75 75 sm = ctx.sub(subpath).matchfileset(pat, badfn=badfn)
76 76 pm = prefixdirmatcher(subpath, sm, badfn=badfn)
77 77 matchers.append(pm)
78 78
79 79 continue
80 80 other.append((kind, pat, source))
81 81 return matchers, other
82 82
83 83
84 84 def _expandsubinclude(kindpats, root):
85 85 '''Returns the list of subinclude matcher args and the kindpats without the
86 86 subincludes in it.'''
87 87 relmatchers = []
88 88 other = []
89 89
90 90 for kind, pat, source in kindpats:
91 91 if kind == b'subinclude':
92 92 sourceroot = pathutil.dirname(util.normpath(source))
93 93 pat = util.pconvert(pat)
94 94 path = pathutil.join(sourceroot, pat)
95 95
96 96 newroot = pathutil.dirname(path)
97 97 matcherargs = (newroot, b'', [], [b'include:%s' % path])
98 98
99 99 prefix = pathutil.canonpath(root, root, newroot)
100 100 if prefix:
101 101 prefix += b'/'
102 102 relmatchers.append((prefix, matcherargs))
103 103 else:
104 104 other.append((kind, pat, source))
105 105
106 106 return relmatchers, other
107 107
108 108
109 109 def _kindpatsalwaysmatch(kindpats):
110 110 """"Checks whether the kindspats match everything, as e.g.
111 111 'relpath:.' does.
112 112 """
113 113 for kind, pat, source in kindpats:
114 114 if pat != b'' or kind not in [b'relpath', b'glob']:
115 115 return False
116 116 return True
117 117
118 118
119 119 def _buildkindpatsmatcher(
120 120 matchercls, root, kindpats, ctx=None, listsubrepos=False, badfn=None
121 121 ):
122 122 matchers = []
123 123 fms, kindpats = _expandsets(
124 124 kindpats, ctx=ctx, listsubrepos=listsubrepos, badfn=badfn
125 125 )
126 126 if kindpats:
127 127 m = matchercls(root, kindpats, badfn=badfn)
128 128 matchers.append(m)
129 129 if fms:
130 130 matchers.extend(fms)
131 131 if not matchers:
132 132 return nevermatcher(badfn=badfn)
133 133 if len(matchers) == 1:
134 134 return matchers[0]
135 135 return unionmatcher(matchers)
136 136
137 137
138 138 def match(
139 139 root,
140 140 cwd,
141 141 patterns=None,
142 142 include=None,
143 143 exclude=None,
144 144 default=b'glob',
145 145 auditor=None,
146 146 ctx=None,
147 147 listsubrepos=False,
148 148 warn=None,
149 149 badfn=None,
150 150 icasefs=False,
151 151 ):
152 152 r"""build an object to match a set of file patterns
153 153
154 154 arguments:
155 155 root - the canonical root of the tree you're matching against
156 156 cwd - the current working directory, if relevant
157 157 patterns - patterns to find
158 158 include - patterns to include (unless they are excluded)
159 159 exclude - patterns to exclude (even if they are included)
160 160 default - if a pattern in patterns has no explicit type, assume this one
161 161 auditor - optional path auditor
162 162 ctx - optional changecontext
163 163 listsubrepos - if True, recurse into subrepositories
164 164 warn - optional function used for printing warnings
165 165 badfn - optional bad() callback for this matcher instead of the default
166 166 icasefs - make a matcher for wdir on case insensitive filesystems, which
167 167 normalizes the given patterns to the case in the filesystem
168 168
169 169 a pattern is one of:
170 170 'glob:<glob>' - a glob relative to cwd
171 171 're:<regexp>' - a regular expression
172 172 'path:<path>' - a path relative to repository root, which is matched
173 173 recursively
174 174 'rootfilesin:<path>' - a path relative to repository root, which is
175 175 matched non-recursively (will not match subdirectories)
176 176 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
177 177 'relpath:<path>' - a path relative to cwd
178 178 'relre:<regexp>' - a regexp that needn't match the start of a name
179 179 'set:<fileset>' - a fileset expression
180 180 'include:<path>' - a file of patterns to read and include
181 181 'subinclude:<path>' - a file of patterns to match against files under
182 182 the same directory
183 183 '<something>' - a pattern of the specified default type
184 184
185 185 Usually a patternmatcher is returned:
186 186 >>> match(b'/foo', b'.', [b're:.*\.c$', b'path:foo/a', b'*.py'])
187 187 <patternmatcher patterns='.*\\.c$|foo/a(?:/|$)|[^/]*\\.py$'>
188 188
189 189 Combining 'patterns' with 'include' (resp. 'exclude') gives an
190 190 intersectionmatcher (resp. a differencematcher):
191 191 >>> type(match(b'/foo', b'.', [b're:.*\.c$'], include=[b'path:lib']))
192 192 <class 'mercurial.match.intersectionmatcher'>
193 193 >>> type(match(b'/foo', b'.', [b're:.*\.c$'], exclude=[b'path:build']))
194 194 <class 'mercurial.match.differencematcher'>
195 195
196 196 Notice that, if 'patterns' is empty, an alwaysmatcher is returned:
197 197 >>> match(b'/foo', b'.', [])
198 198 <alwaysmatcher>
199 199
200 200 The 'default' argument determines which kind of pattern is assumed if a
201 201 pattern has no prefix:
202 202 >>> match(b'/foo', b'.', [b'.*\.c$'], default=b're')
203 203 <patternmatcher patterns='.*\\.c$'>
204 204 >>> match(b'/foo', b'.', [b'main.py'], default=b'relpath')
205 205 <patternmatcher patterns='main\\.py(?:/|$)'>
206 206 >>> match(b'/foo', b'.', [b'main.py'], default=b're')
207 207 <patternmatcher patterns='main.py'>
208 208
209 209 The primary use of matchers is to check whether a value (usually a file
210 210 name) matches againset one of the patterns given at initialization. There
211 211 are two ways of doing this check.
212 212
213 213 >>> m = match(b'/foo', b'', [b're:.*\.c$', b'relpath:a'])
214 214
215 215 1. Calling the matcher with a file name returns True if any pattern
216 216 matches that file name:
217 217 >>> m(b'a')
218 218 True
219 219 >>> m(b'main.c')
220 220 True
221 221 >>> m(b'test.py')
222 222 False
223 223
224 224 2. Using the exact() method only returns True if the file name matches one
225 225 of the exact patterns (i.e. not re: or glob: patterns):
226 226 >>> m.exact(b'a')
227 227 True
228 228 >>> m.exact(b'main.c')
229 229 False
230 230 """
231 231 assert os.path.isabs(root)
232 cwd = util.normpath(os.path.join(root, cwd))
232 233 normalize = _donormalize
233 234 if icasefs:
234 235 dirstate = ctx.repo().dirstate
235 236 dsnormalize = dirstate.normalize
236 237
237 238 def normalize(patterns, default, root, cwd, auditor, warn):
238 239 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
239 240 kindpats = []
240 241 for kind, pats, source in kp:
241 242 if kind not in (b're', b'relre'): # regex can't be normalized
242 243 p = pats
243 244 pats = dsnormalize(pats)
244 245
245 246 # Preserve the original to handle a case only rename.
246 247 if p != pats and p in dirstate:
247 248 kindpats.append((kind, p, source))
248 249
249 250 kindpats.append((kind, pats, source))
250 251 return kindpats
251 252
252 253 if patterns:
253 254 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
254 255 if _kindpatsalwaysmatch(kindpats):
255 256 m = alwaysmatcher(badfn)
256 257 else:
257 258 m = _buildkindpatsmatcher(
258 259 patternmatcher,
259 260 root,
260 261 kindpats,
261 262 ctx=ctx,
262 263 listsubrepos=listsubrepos,
263 264 badfn=badfn,
264 265 )
265 266 else:
266 267 # It's a little strange that no patterns means to match everything.
267 268 # Consider changing this to match nothing (probably using nevermatcher).
268 269 m = alwaysmatcher(badfn)
269 270
270 271 if include:
271 272 kindpats = normalize(include, b'glob', root, cwd, auditor, warn)
272 273 im = _buildkindpatsmatcher(
273 274 includematcher,
274 275 root,
275 276 kindpats,
276 277 ctx=ctx,
277 278 listsubrepos=listsubrepos,
278 279 badfn=None,
279 280 )
280 281 m = intersectmatchers(m, im)
281 282 if exclude:
282 283 kindpats = normalize(exclude, b'glob', root, cwd, auditor, warn)
283 284 em = _buildkindpatsmatcher(
284 285 includematcher,
285 286 root,
286 287 kindpats,
287 288 ctx=ctx,
288 289 listsubrepos=listsubrepos,
289 290 badfn=None,
290 291 )
291 292 m = differencematcher(m, em)
292 293 return m
293 294
294 295
295 296 def exact(files, badfn=None):
296 297 return exactmatcher(files, badfn=badfn)
297 298
298 299
299 300 def always(badfn=None):
300 301 return alwaysmatcher(badfn)
301 302
302 303
303 304 def never(badfn=None):
304 305 return nevermatcher(badfn)
305 306
306 307
307 308 def badmatch(match, badfn):
308 309 """Make a copy of the given matcher, replacing its bad method with the given
309 310 one.
310 311 """
311 312 m = copy.copy(match)
312 313 m.bad = badfn
313 314 return m
314 315
315 316
316 317 def _donormalize(patterns, default, root, cwd, auditor=None, warn=None):
317 318 '''Convert 'kind:pat' from the patterns list to tuples with kind and
318 319 normalized and rooted patterns and with listfiles expanded.'''
319 320 kindpats = []
320 321 for kind, pat in [_patsplit(p, default) for p in patterns]:
321 322 if kind in cwdrelativepatternkinds:
322 323 pat = pathutil.canonpath(root, cwd, pat, auditor=auditor)
323 324 elif kind in (b'relglob', b'path', b'rootfilesin', b'rootglob'):
324 325 pat = util.normpath(pat)
325 326 elif kind in (b'listfile', b'listfile0'):
326 327 try:
327 328 files = util.readfile(pat)
328 329 if kind == b'listfile0':
329 330 files = files.split(b'\0')
330 331 else:
331 332 files = files.splitlines()
332 333 files = [f for f in files if f]
333 334 except EnvironmentError:
334 335 raise error.Abort(_(b"unable to read file list (%s)") % pat)
335 336 for k, p, source in _donormalize(
336 337 files, default, root, cwd, auditor, warn
337 338 ):
338 339 kindpats.append((k, p, pat))
339 340 continue
340 341 elif kind == b'include':
341 342 try:
342 343 fullpath = os.path.join(root, util.localpath(pat))
343 344 includepats = readpatternfile(fullpath, warn)
344 345 for k, p, source in _donormalize(
345 346 includepats, default, root, cwd, auditor, warn
346 347 ):
347 348 kindpats.append((k, p, source or pat))
348 349 except error.Abort as inst:
349 350 raise error.Abort(
350 351 b'%s: %s'
351 352 % (pat, inst[0]) # pytype: disable=unsupported-operands
352 353 )
353 354 except IOError as inst:
354 355 if warn:
355 356 warn(
356 357 _(b"skipping unreadable pattern file '%s': %s\n")
357 358 % (pat, stringutil.forcebytestr(inst.strerror))
358 359 )
359 360 continue
360 361 # else: re or relre - which cannot be normalized
361 362 kindpats.append((kind, pat, b''))
362 363 return kindpats
363 364
364 365
365 366 class basematcher(object):
366 367 def __init__(self, badfn=None):
367 368 if badfn is not None:
368 369 self.bad = badfn
369 370
370 371 def __call__(self, fn):
371 372 return self.matchfn(fn)
372 373
373 374 # Callbacks related to how the matcher is used by dirstate.walk.
374 375 # Subscribers to these events must monkeypatch the matcher object.
375 376 def bad(self, f, msg):
376 377 '''Callback from dirstate.walk for each explicit file that can't be
377 378 found/accessed, with an error message.'''
378 379
379 380 # If an traversedir is set, it will be called when a directory discovered
380 381 # by recursive traversal is visited.
381 382 traversedir = None
382 383
383 384 @propertycache
384 385 def _files(self):
385 386 return []
386 387
387 388 def files(self):
388 389 '''Explicitly listed files or patterns or roots:
389 390 if no patterns or .always(): empty list,
390 391 if exact: list exact files,
391 392 if not .anypats(): list all files and dirs,
392 393 else: optimal roots'''
393 394 return self._files
394 395
395 396 @propertycache
396 397 def _fileset(self):
397 398 return set(self._files)
398 399
399 400 def exact(self, f):
400 401 '''Returns True if f is in .files().'''
401 402 return f in self._fileset
402 403
403 404 def matchfn(self, f):
404 405 return False
405 406
406 407 def visitdir(self, dir):
407 408 '''Decides whether a directory should be visited based on whether it
408 409 has potential matches in it or one of its subdirectories. This is
409 410 based on the match's primary, included, and excluded patterns.
410 411
411 412 Returns the string 'all' if the given directory and all subdirectories
412 413 should be visited. Otherwise returns True or False indicating whether
413 414 the given directory should be visited.
414 415 '''
415 416 return True
416 417
417 418 def visitchildrenset(self, dir):
418 419 '''Decides whether a directory should be visited based on whether it
419 420 has potential matches in it or one of its subdirectories, and
420 421 potentially lists which subdirectories of that directory should be
421 422 visited. This is based on the match's primary, included, and excluded
422 423 patterns.
423 424
424 425 This function is very similar to 'visitdir', and the following mapping
425 426 can be applied:
426 427
427 428 visitdir | visitchildrenlist
428 429 ----------+-------------------
429 430 False | set()
430 431 'all' | 'all'
431 432 True | 'this' OR non-empty set of subdirs -or files- to visit
432 433
433 434 Example:
434 435 Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return
435 436 the following values (assuming the implementation of visitchildrenset
436 437 is capable of recognizing this; some implementations are not).
437 438
438 439 '' -> {'foo', 'qux'}
439 440 'baz' -> set()
440 441 'foo' -> {'bar'}
441 442 # Ideally this would be 'all', but since the prefix nature of matchers
442 443 # is applied to the entire matcher, we have to downgrade this to
443 444 # 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed
444 445 # in.
445 446 'foo/bar' -> 'this'
446 447 'qux' -> 'this'
447 448
448 449 Important:
449 450 Most matchers do not know if they're representing files or
450 451 directories. They see ['path:dir/f'] and don't know whether 'f' is a
451 452 file or a directory, so visitchildrenset('dir') for most matchers will
452 453 return {'f'}, but if the matcher knows it's a file (like exactmatcher
453 454 does), it may return 'this'. Do not rely on the return being a set
454 455 indicating that there are no files in this dir to investigate (or
455 456 equivalently that if there are files to investigate in 'dir' that it
456 457 will always return 'this').
457 458 '''
458 459 return b'this'
459 460
460 461 def always(self):
461 462 '''Matcher will match everything and .files() will be empty --
462 463 optimization might be possible.'''
463 464 return False
464 465
465 466 def isexact(self):
466 467 '''Matcher will match exactly the list of files in .files() --
467 468 optimization might be possible.'''
468 469 return False
469 470
470 471 def prefix(self):
471 472 '''Matcher will match the paths in .files() recursively --
472 473 optimization might be possible.'''
473 474 return False
474 475
475 476 def anypats(self):
476 477 '''None of .always(), .isexact(), and .prefix() is true --
477 478 optimizations will be difficult.'''
478 479 return not self.always() and not self.isexact() and not self.prefix()
479 480
480 481
481 482 class alwaysmatcher(basematcher):
482 483 '''Matches everything.'''
483 484
484 485 def __init__(self, badfn=None):
485 486 super(alwaysmatcher, self).__init__(badfn)
486 487
487 488 def always(self):
488 489 return True
489 490
490 491 def matchfn(self, f):
491 492 return True
492 493
493 494 def visitdir(self, dir):
494 495 return b'all'
495 496
496 497 def visitchildrenset(self, dir):
497 498 return b'all'
498 499
499 500 def __repr__(self):
500 501 return r'<alwaysmatcher>'
501 502
502 503
503 504 class nevermatcher(basematcher):
504 505 '''Matches nothing.'''
505 506
506 507 def __init__(self, badfn=None):
507 508 super(nevermatcher, self).__init__(badfn)
508 509
509 510 # It's a little weird to say that the nevermatcher is an exact matcher
510 511 # or a prefix matcher, but it seems to make sense to let callers take
511 512 # fast paths based on either. There will be no exact matches, nor any
512 513 # prefixes (files() returns []), so fast paths iterating over them should
513 514 # be efficient (and correct).
514 515 def isexact(self):
515 516 return True
516 517
517 518 def prefix(self):
518 519 return True
519 520
520 521 def visitdir(self, dir):
521 522 return False
522 523
523 524 def visitchildrenset(self, dir):
524 525 return set()
525 526
526 527 def __repr__(self):
527 528 return r'<nevermatcher>'
528 529
529 530
530 531 class predicatematcher(basematcher):
531 532 """A matcher adapter for a simple boolean function"""
532 533
533 534 def __init__(self, predfn, predrepr=None, badfn=None):
534 535 super(predicatematcher, self).__init__(badfn)
535 536 self.matchfn = predfn
536 537 self._predrepr = predrepr
537 538
538 539 @encoding.strmethod
539 540 def __repr__(self):
540 541 s = stringutil.buildrepr(self._predrepr) or pycompat.byterepr(
541 542 self.matchfn
542 543 )
543 544 return b'<predicatenmatcher pred=%s>' % s
544 545
545 546
546 547 class patternmatcher(basematcher):
547 548 r"""Matches a set of (kind, pat, source) against a 'root' directory.
548 549
549 550 >>> kindpats = [
550 551 ... (b're', br'.*\.c$', b''),
551 552 ... (b'path', b'foo/a', b''),
552 553 ... (b'relpath', b'b', b''),
553 554 ... (b'glob', b'*.h', b''),
554 555 ... ]
555 556 >>> m = patternmatcher(b'foo', kindpats)
556 557 >>> m(b'main.c') # matches re:.*\.c$
557 558 True
558 559 >>> m(b'b.txt')
559 560 False
560 561 >>> m(b'foo/a') # matches path:foo/a
561 562 True
562 563 >>> m(b'a') # does not match path:b, since 'root' is 'foo'
563 564 False
564 565 >>> m(b'b') # matches relpath:b, since 'root' is 'foo'
565 566 True
566 567 >>> m(b'lib.h') # matches glob:*.h
567 568 True
568 569
569 570 >>> m.files()
570 571 ['', 'foo/a', 'b', '']
571 572 >>> m.exact(b'foo/a')
572 573 True
573 574 >>> m.exact(b'b')
574 575 True
575 576 >>> m.exact(b'lib.h') # exact matches are for (rel)path kinds
576 577 False
577 578 """
578 579
579 580 def __init__(self, root, kindpats, badfn=None):
580 581 super(patternmatcher, self).__init__(badfn)
581 582
582 583 self._files = _explicitfiles(kindpats)
583 584 self._prefix = _prefix(kindpats)
584 585 self._pats, self.matchfn = _buildmatch(kindpats, b'$', root)
585 586
586 587 @propertycache
587 588 def _dirs(self):
588 589 return set(pathutil.dirs(self._fileset))
589 590
590 591 def visitdir(self, dir):
591 592 if self._prefix and dir in self._fileset:
592 593 return b'all'
593 594 return (
594 595 dir in self._fileset
595 596 or dir in self._dirs
596 597 or any(
597 598 parentdir in self._fileset
598 599 for parentdir in pathutil.finddirs(dir)
599 600 )
600 601 )
601 602
602 603 def visitchildrenset(self, dir):
603 604 ret = self.visitdir(dir)
604 605 if ret is True:
605 606 return b'this'
606 607 elif not ret:
607 608 return set()
608 609 assert ret == b'all'
609 610 return b'all'
610 611
611 612 def prefix(self):
612 613 return self._prefix
613 614
614 615 @encoding.strmethod
615 616 def __repr__(self):
616 617 return b'<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats)
617 618
618 619
619 620 # This is basically a reimplementation of pathutil.dirs that stores the
620 621 # children instead of just a count of them, plus a small optional optimization
621 622 # to avoid some directories we don't need.
622 623 class _dirchildren(object):
623 624 def __init__(self, paths, onlyinclude=None):
624 625 self._dirs = {}
625 626 self._onlyinclude = onlyinclude or []
626 627 addpath = self.addpath
627 628 for f in paths:
628 629 addpath(f)
629 630
630 631 def addpath(self, path):
631 632 if path == b'':
632 633 return
633 634 dirs = self._dirs
634 635 findsplitdirs = _dirchildren._findsplitdirs
635 636 for d, b in findsplitdirs(path):
636 637 if d not in self._onlyinclude:
637 638 continue
638 639 dirs.setdefault(d, set()).add(b)
639 640
640 641 @staticmethod
641 642 def _findsplitdirs(path):
642 643 # yields (dirname, basename) tuples, walking back to the root. This is
643 644 # very similar to pathutil.finddirs, except:
644 645 # - produces a (dirname, basename) tuple, not just 'dirname'
645 646 # Unlike manifest._splittopdir, this does not suffix `dirname` with a
646 647 # slash.
647 648 oldpos = len(path)
648 649 pos = path.rfind(b'/')
649 650 while pos != -1:
650 651 yield path[:pos], path[pos + 1 : oldpos]
651 652 oldpos = pos
652 653 pos = path.rfind(b'/', 0, pos)
653 654 yield b'', path[:oldpos]
654 655
655 656 def get(self, path):
656 657 return self._dirs.get(path, set())
657 658
658 659
659 660 class includematcher(basematcher):
660 661 def __init__(self, root, kindpats, badfn=None):
661 662 super(includematcher, self).__init__(badfn)
662 663
663 664 self._pats, self.matchfn = _buildmatch(kindpats, b'(?:/|$)', root)
664 665 self._prefix = _prefix(kindpats)
665 666 roots, dirs, parents = _rootsdirsandparents(kindpats)
666 667 # roots are directories which are recursively included.
667 668 self._roots = set(roots)
668 669 # dirs are directories which are non-recursively included.
669 670 self._dirs = set(dirs)
670 671 # parents are directories which are non-recursively included because
671 672 # they are needed to get to items in _dirs or _roots.
672 673 self._parents = parents
673 674
674 675 def visitdir(self, dir):
675 676 if self._prefix and dir in self._roots:
676 677 return b'all'
677 678 return (
678 679 dir in self._roots
679 680 or dir in self._dirs
680 681 or dir in self._parents
681 682 or any(
682 683 parentdir in self._roots for parentdir in pathutil.finddirs(dir)
683 684 )
684 685 )
685 686
686 687 @propertycache
687 688 def _allparentschildren(self):
688 689 # It may seem odd that we add dirs, roots, and parents, and then
689 690 # restrict to only parents. This is to catch the case of:
690 691 # dirs = ['foo/bar']
691 692 # parents = ['foo']
692 693 # if we asked for the children of 'foo', but had only added
693 694 # self._parents, we wouldn't be able to respond ['bar'].
694 695 return _dirchildren(
695 696 itertools.chain(self._dirs, self._roots, self._parents),
696 697 onlyinclude=self._parents,
697 698 )
698 699
699 700 def visitchildrenset(self, dir):
700 701 if self._prefix and dir in self._roots:
701 702 return b'all'
702 703 # Note: this does *not* include the 'dir in self._parents' case from
703 704 # visitdir, that's handled below.
704 705 if (
705 706 b'' in self._roots
706 707 or dir in self._roots
707 708 or dir in self._dirs
708 709 or any(
709 710 parentdir in self._roots for parentdir in pathutil.finddirs(dir)
710 711 )
711 712 ):
712 713 return b'this'
713 714
714 715 if dir in self._parents:
715 716 return self._allparentschildren.get(dir) or set()
716 717 return set()
717 718
718 719 @encoding.strmethod
719 720 def __repr__(self):
720 721 return b'<includematcher includes=%r>' % pycompat.bytestr(self._pats)
721 722
722 723
723 724 class exactmatcher(basematcher):
724 725 r'''Matches the input files exactly. They are interpreted as paths, not
725 726 patterns (so no kind-prefixes).
726 727
727 728 >>> m = exactmatcher([b'a.txt', br're:.*\.c$'])
728 729 >>> m(b'a.txt')
729 730 True
730 731 >>> m(b'b.txt')
731 732 False
732 733
733 734 Input files that would be matched are exactly those returned by .files()
734 735 >>> m.files()
735 736 ['a.txt', 're:.*\\.c$']
736 737
737 738 So pattern 're:.*\.c$' is not considered as a regex, but as a file name
738 739 >>> m(b'main.c')
739 740 False
740 741 >>> m(br're:.*\.c$')
741 742 True
742 743 '''
743 744
744 745 def __init__(self, files, badfn=None):
745 746 super(exactmatcher, self).__init__(badfn)
746 747
747 748 if isinstance(files, list):
748 749 self._files = files
749 750 else:
750 751 self._files = list(files)
751 752
752 753 matchfn = basematcher.exact
753 754
754 755 @propertycache
755 756 def _dirs(self):
756 757 return set(pathutil.dirs(self._fileset))
757 758
758 759 def visitdir(self, dir):
759 760 return dir in self._dirs
760 761
761 762 def visitchildrenset(self, dir):
762 763 if not self._fileset or dir not in self._dirs:
763 764 return set()
764 765
765 766 candidates = self._fileset | self._dirs - {b''}
766 767 if dir != b'':
767 768 d = dir + b'/'
768 769 candidates = set(c[len(d) :] for c in candidates if c.startswith(d))
769 770 # self._dirs includes all of the directories, recursively, so if
770 771 # we're attempting to match foo/bar/baz.txt, it'll have '', 'foo',
771 772 # 'foo/bar' in it. Thus we can safely ignore a candidate that has a
772 773 # '/' in it, indicating a it's for a subdir-of-a-subdir; the
773 774 # immediate subdir will be in there without a slash.
774 775 ret = {c for c in candidates if b'/' not in c}
775 776 # We really do not expect ret to be empty, since that would imply that
776 777 # there's something in _dirs that didn't have a file in _fileset.
777 778 assert ret
778 779 return ret
779 780
780 781 def isexact(self):
781 782 return True
782 783
783 784 @encoding.strmethod
784 785 def __repr__(self):
785 786 return b'<exactmatcher files=%r>' % self._files
786 787
787 788
788 789 class differencematcher(basematcher):
789 790 '''Composes two matchers by matching if the first matches and the second
790 791 does not.
791 792
792 793 The second matcher's non-matching-attributes (bad, traversedir) are ignored.
793 794 '''
794 795
795 796 def __init__(self, m1, m2):
796 797 super(differencematcher, self).__init__()
797 798 self._m1 = m1
798 799 self._m2 = m2
799 800 self.bad = m1.bad
800 801 self.traversedir = m1.traversedir
801 802
802 803 def matchfn(self, f):
803 804 return self._m1(f) and not self._m2(f)
804 805
805 806 @propertycache
806 807 def _files(self):
807 808 if self.isexact():
808 809 return [f for f in self._m1.files() if self(f)]
809 810 # If m1 is not an exact matcher, we can't easily figure out the set of
810 811 # files, because its files() are not always files. For example, if
811 812 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
812 813 # want to remove "dir" from the set even though it would match m2,
813 814 # because the "dir" in m1 may not be a file.
814 815 return self._m1.files()
815 816
816 817 def visitdir(self, dir):
817 818 if self._m2.visitdir(dir) == b'all':
818 819 return False
819 820 elif not self._m2.visitdir(dir):
820 821 # m2 does not match dir, we can return 'all' here if possible
821 822 return self._m1.visitdir(dir)
822 823 return bool(self._m1.visitdir(dir))
823 824
824 825 def visitchildrenset(self, dir):
825 826 m2_set = self._m2.visitchildrenset(dir)
826 827 if m2_set == b'all':
827 828 return set()
828 829 m1_set = self._m1.visitchildrenset(dir)
829 830 # Possible values for m1: 'all', 'this', set(...), set()
830 831 # Possible values for m2: 'this', set(...), set()
831 832 # If m2 has nothing under here that we care about, return m1, even if
832 833 # it's 'all'. This is a change in behavior from visitdir, which would
833 834 # return True, not 'all', for some reason.
834 835 if not m2_set:
835 836 return m1_set
836 837 if m1_set in [b'all', b'this']:
837 838 # Never return 'all' here if m2_set is any kind of non-empty (either
838 839 # 'this' or set(foo)), since m2 might return set() for a
839 840 # subdirectory.
840 841 return b'this'
841 842 # Possible values for m1: set(...), set()
842 843 # Possible values for m2: 'this', set(...)
843 844 # We ignore m2's set results. They're possibly incorrect:
844 845 # m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset(''):
845 846 # m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd
846 847 # return set(), which is *not* correct, we still need to visit 'dir'!
847 848 return m1_set
848 849
849 850 def isexact(self):
850 851 return self._m1.isexact()
851 852
852 853 @encoding.strmethod
853 854 def __repr__(self):
854 855 return b'<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2)
855 856
856 857
857 858 def intersectmatchers(m1, m2):
858 859 '''Composes two matchers by matching if both of them match.
859 860
860 861 The second matcher's non-matching-attributes (bad, traversedir) are ignored.
861 862 '''
862 863 if m1 is None or m2 is None:
863 864 return m1 or m2
864 865 if m1.always():
865 866 m = copy.copy(m2)
866 867 # TODO: Consider encapsulating these things in a class so there's only
867 868 # one thing to copy from m1.
868 869 m.bad = m1.bad
869 870 m.traversedir = m1.traversedir
870 871 return m
871 872 if m2.always():
872 873 m = copy.copy(m1)
873 874 return m
874 875 return intersectionmatcher(m1, m2)
875 876
876 877
877 878 class intersectionmatcher(basematcher):
878 879 def __init__(self, m1, m2):
879 880 super(intersectionmatcher, self).__init__()
880 881 self._m1 = m1
881 882 self._m2 = m2
882 883 self.bad = m1.bad
883 884 self.traversedir = m1.traversedir
884 885
885 886 @propertycache
886 887 def _files(self):
887 888 if self.isexact():
888 889 m1, m2 = self._m1, self._m2
889 890 if not m1.isexact():
890 891 m1, m2 = m2, m1
891 892 return [f for f in m1.files() if m2(f)]
892 893 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
893 894 # the set of files, because their files() are not always files. For
894 895 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
895 896 # "path:dir2", we don't want to remove "dir2" from the set.
896 897 return self._m1.files() + self._m2.files()
897 898
898 899 def matchfn(self, f):
899 900 return self._m1(f) and self._m2(f)
900 901
901 902 def visitdir(self, dir):
902 903 visit1 = self._m1.visitdir(dir)
903 904 if visit1 == b'all':
904 905 return self._m2.visitdir(dir)
905 906 # bool() because visit1=True + visit2='all' should not be 'all'
906 907 return bool(visit1 and self._m2.visitdir(dir))
907 908
908 909 def visitchildrenset(self, dir):
909 910 m1_set = self._m1.visitchildrenset(dir)
910 911 if not m1_set:
911 912 return set()
912 913 m2_set = self._m2.visitchildrenset(dir)
913 914 if not m2_set:
914 915 return set()
915 916
916 917 if m1_set == b'all':
917 918 return m2_set
918 919 elif m2_set == b'all':
919 920 return m1_set
920 921
921 922 if m1_set == b'this' or m2_set == b'this':
922 923 return b'this'
923 924
924 925 assert isinstance(m1_set, set) and isinstance(m2_set, set)
925 926 return m1_set.intersection(m2_set)
926 927
927 928 def always(self):
928 929 return self._m1.always() and self._m2.always()
929 930
930 931 def isexact(self):
931 932 return self._m1.isexact() or self._m2.isexact()
932 933
933 934 @encoding.strmethod
934 935 def __repr__(self):
935 936 return b'<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2)
936 937
937 938
938 939 class subdirmatcher(basematcher):
939 940 """Adapt a matcher to work on a subdirectory only.
940 941
941 942 The paths are remapped to remove/insert the path as needed:
942 943
943 944 >>> from . import pycompat
944 945 >>> m1 = match(b'/root', b'', [b'a.txt', b'sub/b.txt'])
945 946 >>> m2 = subdirmatcher(b'sub', m1)
946 947 >>> m2(b'a.txt')
947 948 False
948 949 >>> m2(b'b.txt')
949 950 True
950 951 >>> m2.matchfn(b'a.txt')
951 952 False
952 953 >>> m2.matchfn(b'b.txt')
953 954 True
954 955 >>> m2.files()
955 956 ['b.txt']
956 957 >>> m2.exact(b'b.txt')
957 958 True
958 959 >>> def bad(f, msg):
959 960 ... print(pycompat.sysstr(b"%s: %s" % (f, msg)))
960 961 >>> m1.bad = bad
961 962 >>> m2.bad(b'x.txt', b'No such file')
962 963 sub/x.txt: No such file
963 964 """
964 965
965 966 def __init__(self, path, matcher):
966 967 super(subdirmatcher, self).__init__()
967 968 self._path = path
968 969 self._matcher = matcher
969 970 self._always = matcher.always()
970 971
971 972 self._files = [
972 973 f[len(path) + 1 :]
973 974 for f in matcher._files
974 975 if f.startswith(path + b"/")
975 976 ]
976 977
977 978 # If the parent repo had a path to this subrepo and the matcher is
978 979 # a prefix matcher, this submatcher always matches.
979 980 if matcher.prefix():
980 981 self._always = any(f == path for f in matcher._files)
981 982
982 983 def bad(self, f, msg):
983 984 self._matcher.bad(self._path + b"/" + f, msg)
984 985
985 986 def matchfn(self, f):
986 987 # Some information is lost in the superclass's constructor, so we
987 988 # can not accurately create the matching function for the subdirectory
988 989 # from the inputs. Instead, we override matchfn() and visitdir() to
989 990 # call the original matcher with the subdirectory path prepended.
990 991 return self._matcher.matchfn(self._path + b"/" + f)
991 992
992 993 def visitdir(self, dir):
993 994 if dir == b'':
994 995 dir = self._path
995 996 else:
996 997 dir = self._path + b"/" + dir
997 998 return self._matcher.visitdir(dir)
998 999
999 1000 def visitchildrenset(self, dir):
1000 1001 if dir == b'':
1001 1002 dir = self._path
1002 1003 else:
1003 1004 dir = self._path + b"/" + dir
1004 1005 return self._matcher.visitchildrenset(dir)
1005 1006
1006 1007 def always(self):
1007 1008 return self._always
1008 1009
1009 1010 def prefix(self):
1010 1011 return self._matcher.prefix() and not self._always
1011 1012
1012 1013 @encoding.strmethod
1013 1014 def __repr__(self):
1014 1015 return b'<subdirmatcher path=%r, matcher=%r>' % (
1015 1016 self._path,
1016 1017 self._matcher,
1017 1018 )
1018 1019
1019 1020
1020 1021 class prefixdirmatcher(basematcher):
1021 1022 """Adapt a matcher to work on a parent directory.
1022 1023
1023 1024 The matcher's non-matching-attributes (bad, traversedir) are ignored.
1024 1025
1025 1026 The prefix path should usually be the relative path from the root of
1026 1027 this matcher to the root of the wrapped matcher.
1027 1028
1028 1029 >>> m1 = match(util.localpath(b'/root/d/e'), b'f', [b'../a.txt', b'b.txt'], auditor=lambda name: None)
1029 1030 >>> m2 = prefixdirmatcher(b'd/e', m1)
1030 1031 >>> m2(b'a.txt')
1031 1032 False
1032 1033 >>> m2(b'd/e/a.txt')
1033 1034 True
1034 1035 >>> m2(b'd/e/b.txt')
1035 1036 False
1036 1037 >>> m2.files()
1037 1038 ['d/e/a.txt', 'd/e/f/b.txt']
1038 1039 >>> m2.exact(b'd/e/a.txt')
1039 1040 True
1040 1041 >>> m2.visitdir(b'd')
1041 1042 True
1042 1043 >>> m2.visitdir(b'd/e')
1043 1044 True
1044 1045 >>> m2.visitdir(b'd/e/f')
1045 1046 True
1046 1047 >>> m2.visitdir(b'd/e/g')
1047 1048 False
1048 1049 >>> m2.visitdir(b'd/ef')
1049 1050 False
1050 1051 """
1051 1052
1052 1053 def __init__(self, path, matcher, badfn=None):
1053 1054 super(prefixdirmatcher, self).__init__(badfn)
1054 1055 if not path:
1055 1056 raise error.ProgrammingError(b'prefix path must not be empty')
1056 1057 self._path = path
1057 1058 self._pathprefix = path + b'/'
1058 1059 self._matcher = matcher
1059 1060
1060 1061 @propertycache
1061 1062 def _files(self):
1062 1063 return [self._pathprefix + f for f in self._matcher._files]
1063 1064
1064 1065 def matchfn(self, f):
1065 1066 if not f.startswith(self._pathprefix):
1066 1067 return False
1067 1068 return self._matcher.matchfn(f[len(self._pathprefix) :])
1068 1069
1069 1070 @propertycache
1070 1071 def _pathdirs(self):
1071 1072 return set(pathutil.finddirs(self._path))
1072 1073
1073 1074 def visitdir(self, dir):
1074 1075 if dir == self._path:
1075 1076 return self._matcher.visitdir(b'')
1076 1077 if dir.startswith(self._pathprefix):
1077 1078 return self._matcher.visitdir(dir[len(self._pathprefix) :])
1078 1079 return dir in self._pathdirs
1079 1080
1080 1081 def visitchildrenset(self, dir):
1081 1082 if dir == self._path:
1082 1083 return self._matcher.visitchildrenset(b'')
1083 1084 if dir.startswith(self._pathprefix):
1084 1085 return self._matcher.visitchildrenset(dir[len(self._pathprefix) :])
1085 1086 if dir in self._pathdirs:
1086 1087 return b'this'
1087 1088 return set()
1088 1089
1089 1090 def isexact(self):
1090 1091 return self._matcher.isexact()
1091 1092
1092 1093 def prefix(self):
1093 1094 return self._matcher.prefix()
1094 1095
1095 1096 @encoding.strmethod
1096 1097 def __repr__(self):
1097 1098 return b'<prefixdirmatcher path=%r, matcher=%r>' % (
1098 1099 pycompat.bytestr(self._path),
1099 1100 self._matcher,
1100 1101 )
1101 1102
1102 1103
1103 1104 class unionmatcher(basematcher):
1104 1105 """A matcher that is the union of several matchers.
1105 1106
1106 1107 The non-matching-attributes (bad, traversedir) are taken from the first
1107 1108 matcher.
1108 1109 """
1109 1110
1110 1111 def __init__(self, matchers):
1111 1112 m1 = matchers[0]
1112 1113 super(unionmatcher, self).__init__()
1113 1114 self.traversedir = m1.traversedir
1114 1115 self._matchers = matchers
1115 1116
1116 1117 def matchfn(self, f):
1117 1118 for match in self._matchers:
1118 1119 if match(f):
1119 1120 return True
1120 1121 return False
1121 1122
1122 1123 def visitdir(self, dir):
1123 1124 r = False
1124 1125 for m in self._matchers:
1125 1126 v = m.visitdir(dir)
1126 1127 if v == b'all':
1127 1128 return v
1128 1129 r |= v
1129 1130 return r
1130 1131
1131 1132 def visitchildrenset(self, dir):
1132 1133 r = set()
1133 1134 this = False
1134 1135 for m in self._matchers:
1135 1136 v = m.visitchildrenset(dir)
1136 1137 if not v:
1137 1138 continue
1138 1139 if v == b'all':
1139 1140 return v
1140 1141 if this or v == b'this':
1141 1142 this = True
1142 1143 # don't break, we might have an 'all' in here.
1143 1144 continue
1144 1145 assert isinstance(v, set)
1145 1146 r = r.union(v)
1146 1147 if this:
1147 1148 return b'this'
1148 1149 return r
1149 1150
1150 1151 @encoding.strmethod
1151 1152 def __repr__(self):
1152 1153 return b'<unionmatcher matchers=%r>' % self._matchers
1153 1154
1154 1155
1155 1156 def patkind(pattern, default=None):
1156 1157 r'''If pattern is 'kind:pat' with a known kind, return kind.
1157 1158
1158 1159 >>> patkind(br're:.*\.c$')
1159 1160 're'
1160 1161 >>> patkind(b'glob:*.c')
1161 1162 'glob'
1162 1163 >>> patkind(b'relpath:test.py')
1163 1164 'relpath'
1164 1165 >>> patkind(b'main.py')
1165 1166 >>> patkind(b'main.py', default=b're')
1166 1167 're'
1167 1168 '''
1168 1169 return _patsplit(pattern, default)[0]
1169 1170
1170 1171
1171 1172 def _patsplit(pattern, default):
1172 1173 """Split a string into the optional pattern kind prefix and the actual
1173 1174 pattern."""
1174 1175 if b':' in pattern:
1175 1176 kind, pat = pattern.split(b':', 1)
1176 1177 if kind in allpatternkinds:
1177 1178 return kind, pat
1178 1179 return default, pattern
1179 1180
1180 1181
1181 1182 def _globre(pat):
1182 1183 r'''Convert an extended glob string to a regexp string.
1183 1184
1184 1185 >>> from . import pycompat
1185 1186 >>> def bprint(s):
1186 1187 ... print(pycompat.sysstr(s))
1187 1188 >>> bprint(_globre(br'?'))
1188 1189 .
1189 1190 >>> bprint(_globre(br'*'))
1190 1191 [^/]*
1191 1192 >>> bprint(_globre(br'**'))
1192 1193 .*
1193 1194 >>> bprint(_globre(br'**/a'))
1194 1195 (?:.*/)?a
1195 1196 >>> bprint(_globre(br'a/**/b'))
1196 1197 a/(?:.*/)?b
1197 1198 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
1198 1199 [a*?!^][\^b][^c]
1199 1200 >>> bprint(_globre(br'{a,b}'))
1200 1201 (?:a|b)
1201 1202 >>> bprint(_globre(br'.\*\?'))
1202 1203 \.\*\?
1203 1204 '''
1204 1205 i, n = 0, len(pat)
1205 1206 res = b''
1206 1207 group = 0
1207 1208 escape = util.stringutil.regexbytesescapemap.get
1208 1209
1209 1210 def peek():
1210 1211 return i < n and pat[i : i + 1]
1211 1212
1212 1213 while i < n:
1213 1214 c = pat[i : i + 1]
1214 1215 i += 1
1215 1216 if c not in b'*?[{},\\':
1216 1217 res += escape(c, c)
1217 1218 elif c == b'*':
1218 1219 if peek() == b'*':
1219 1220 i += 1
1220 1221 if peek() == b'/':
1221 1222 i += 1
1222 1223 res += b'(?:.*/)?'
1223 1224 else:
1224 1225 res += b'.*'
1225 1226 else:
1226 1227 res += b'[^/]*'
1227 1228 elif c == b'?':
1228 1229 res += b'.'
1229 1230 elif c == b'[':
1230 1231 j = i
1231 1232 if j < n and pat[j : j + 1] in b'!]':
1232 1233 j += 1
1233 1234 while j < n and pat[j : j + 1] != b']':
1234 1235 j += 1
1235 1236 if j >= n:
1236 1237 res += b'\\['
1237 1238 else:
1238 1239 stuff = pat[i:j].replace(b'\\', b'\\\\')
1239 1240 i = j + 1
1240 1241 if stuff[0:1] == b'!':
1241 1242 stuff = b'^' + stuff[1:]
1242 1243 elif stuff[0:1] == b'^':
1243 1244 stuff = b'\\' + stuff
1244 1245 res = b'%s[%s]' % (res, stuff)
1245 1246 elif c == b'{':
1246 1247 group += 1
1247 1248 res += b'(?:'
1248 1249 elif c == b'}' and group:
1249 1250 res += b')'
1250 1251 group -= 1
1251 1252 elif c == b',' and group:
1252 1253 res += b'|'
1253 1254 elif c == b'\\':
1254 1255 p = peek()
1255 1256 if p:
1256 1257 i += 1
1257 1258 res += escape(p, p)
1258 1259 else:
1259 1260 res += escape(c, c)
1260 1261 else:
1261 1262 res += escape(c, c)
1262 1263 return res
1263 1264
1264 1265
1265 1266 def _regex(kind, pat, globsuffix):
1266 1267 '''Convert a (normalized) pattern of any kind into a
1267 1268 regular expression.
1268 1269 globsuffix is appended to the regexp of globs.'''
1269 1270
1270 1271 if rustmod is not None:
1271 1272 try:
1272 1273 return rustmod.build_single_regex(kind, pat, globsuffix)
1273 1274 except rustmod.PatternError:
1274 1275 raise error.ProgrammingError(
1275 1276 b'not a regex pattern: %s:%s' % (kind, pat)
1276 1277 )
1277 1278
1278 1279 if not pat and kind in (b'glob', b'relpath'):
1279 1280 return b''
1280 1281 if kind == b're':
1281 1282 return pat
1282 1283 if kind in (b'path', b'relpath'):
1283 1284 if pat == b'.':
1284 1285 return b''
1285 1286 return util.stringutil.reescape(pat) + b'(?:/|$)'
1286 1287 if kind == b'rootfilesin':
1287 1288 if pat == b'.':
1288 1289 escaped = b''
1289 1290 else:
1290 1291 # Pattern is a directory name.
1291 1292 escaped = util.stringutil.reescape(pat) + b'/'
1292 1293 # Anything after the pattern must be a non-directory.
1293 1294 return escaped + b'[^/]+$'
1294 1295 if kind == b'relglob':
1295 1296 globre = _globre(pat)
1296 1297 if globre.startswith(b'[^/]*'):
1297 1298 # When pat has the form *XYZ (common), make the returned regex more
1298 1299 # legible by returning the regex for **XYZ instead of **/*XYZ.
1299 1300 return b'.*' + globre[len(b'[^/]*') :] + globsuffix
1300 1301 return b'(?:|.*/)' + globre + globsuffix
1301 1302 if kind == b'relre':
1302 1303 if pat.startswith(b'^'):
1303 1304 return pat
1304 1305 return b'.*' + pat
1305 1306 if kind in (b'glob', b'rootglob'):
1306 1307 return _globre(pat) + globsuffix
1307 1308 raise error.ProgrammingError(b'not a regex pattern: %s:%s' % (kind, pat))
1308 1309
1309 1310
1310 1311 def _buildmatch(kindpats, globsuffix, root):
1311 1312 '''Return regexp string and a matcher function for kindpats.
1312 1313 globsuffix is appended to the regexp of globs.'''
1313 1314 matchfuncs = []
1314 1315
1315 1316 subincludes, kindpats = _expandsubinclude(kindpats, root)
1316 1317 if subincludes:
1317 1318 submatchers = {}
1318 1319
1319 1320 def matchsubinclude(f):
1320 1321 for prefix, matcherargs in subincludes:
1321 1322 if f.startswith(prefix):
1322 1323 mf = submatchers.get(prefix)
1323 1324 if mf is None:
1324 1325 mf = match(*matcherargs)
1325 1326 submatchers[prefix] = mf
1326 1327
1327 1328 if mf(f[len(prefix) :]):
1328 1329 return True
1329 1330 return False
1330 1331
1331 1332 matchfuncs.append(matchsubinclude)
1332 1333
1333 1334 regex = b''
1334 1335 if kindpats:
1335 1336 if all(k == b'rootfilesin' for k, p, s in kindpats):
1336 1337 dirs = {p for k, p, s in kindpats}
1337 1338
1338 1339 def mf(f):
1339 1340 i = f.rfind(b'/')
1340 1341 if i >= 0:
1341 1342 dir = f[:i]
1342 1343 else:
1343 1344 dir = b'.'
1344 1345 return dir in dirs
1345 1346
1346 1347 regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))
1347 1348 matchfuncs.append(mf)
1348 1349 else:
1349 1350 regex, mf = _buildregexmatch(kindpats, globsuffix)
1350 1351 matchfuncs.append(mf)
1351 1352
1352 1353 if len(matchfuncs) == 1:
1353 1354 return regex, matchfuncs[0]
1354 1355 else:
1355 1356 return regex, lambda f: any(mf(f) for mf in matchfuncs)
1356 1357
1357 1358
1358 1359 MAX_RE_SIZE = 20000
1359 1360
1360 1361
1361 1362 def _joinregexes(regexps):
1362 1363 """gather multiple regular expressions into a single one"""
1363 1364 return b'|'.join(regexps)
1364 1365
1365 1366
1366 1367 def _buildregexmatch(kindpats, globsuffix):
1367 1368 """Build a match function from a list of kinds and kindpats,
1368 1369 return regexp string and a matcher function.
1369 1370
1370 1371 Test too large input
1371 1372 >>> _buildregexmatch([
1372 1373 ... (b'relglob', b'?' * MAX_RE_SIZE, b'')
1373 1374 ... ], b'$')
1374 1375 Traceback (most recent call last):
1375 1376 ...
1376 1377 Abort: matcher pattern is too long (20009 bytes)
1377 1378 """
1378 1379 try:
1379 1380 allgroups = []
1380 1381 regexps = [_regex(k, p, globsuffix) for (k, p, s) in kindpats]
1381 1382 fullregexp = _joinregexes(regexps)
1382 1383
1383 1384 startidx = 0
1384 1385 groupsize = 0
1385 1386 for idx, r in enumerate(regexps):
1386 1387 piecesize = len(r)
1387 1388 if piecesize > MAX_RE_SIZE:
1388 1389 msg = _(b"matcher pattern is too long (%d bytes)") % piecesize
1389 1390 raise error.Abort(msg)
1390 1391 elif (groupsize + piecesize) > MAX_RE_SIZE:
1391 1392 group = regexps[startidx:idx]
1392 1393 allgroups.append(_joinregexes(group))
1393 1394 startidx = idx
1394 1395 groupsize = 0
1395 1396 groupsize += piecesize + 1
1396 1397
1397 1398 if startidx == 0:
1398 1399 matcher = _rematcher(fullregexp)
1399 1400 func = lambda s: bool(matcher(s))
1400 1401 else:
1401 1402 group = regexps[startidx:]
1402 1403 allgroups.append(_joinregexes(group))
1403 1404 allmatchers = [_rematcher(g) for g in allgroups]
1404 1405 func = lambda s: any(m(s) for m in allmatchers)
1405 1406 return fullregexp, func
1406 1407 except re.error:
1407 1408 for k, p, s in kindpats:
1408 1409 try:
1409 1410 _rematcher(_regex(k, p, globsuffix))
1410 1411 except re.error:
1411 1412 if s:
1412 1413 raise error.Abort(
1413 1414 _(b"%s: invalid pattern (%s): %s") % (s, k, p)
1414 1415 )
1415 1416 else:
1416 1417 raise error.Abort(_(b"invalid pattern (%s): %s") % (k, p))
1417 1418 raise error.Abort(_(b"invalid pattern"))
1418 1419
1419 1420
1420 1421 def _patternrootsanddirs(kindpats):
1421 1422 '''Returns roots and directories corresponding to each pattern.
1422 1423
1423 1424 This calculates the roots and directories exactly matching the patterns and
1424 1425 returns a tuple of (roots, dirs) for each. It does not return other
1425 1426 directories which may also need to be considered, like the parent
1426 1427 directories.
1427 1428 '''
1428 1429 r = []
1429 1430 d = []
1430 1431 for kind, pat, source in kindpats:
1431 1432 if kind in (b'glob', b'rootglob'): # find the non-glob prefix
1432 1433 root = []
1433 1434 for p in pat.split(b'/'):
1434 1435 if b'[' in p or b'{' in p or b'*' in p or b'?' in p:
1435 1436 break
1436 1437 root.append(p)
1437 1438 r.append(b'/'.join(root))
1438 1439 elif kind in (b'relpath', b'path'):
1439 1440 if pat == b'.':
1440 1441 pat = b''
1441 1442 r.append(pat)
1442 1443 elif kind in (b'rootfilesin',):
1443 1444 if pat == b'.':
1444 1445 pat = b''
1445 1446 d.append(pat)
1446 1447 else: # relglob, re, relre
1447 1448 r.append(b'')
1448 1449 return r, d
1449 1450
1450 1451
1451 1452 def _roots(kindpats):
1452 1453 '''Returns root directories to match recursively from the given patterns.'''
1453 1454 roots, dirs = _patternrootsanddirs(kindpats)
1454 1455 return roots
1455 1456
1456 1457
1457 1458 def _rootsdirsandparents(kindpats):
1458 1459 '''Returns roots and exact directories from patterns.
1459 1460
1460 1461 `roots` are directories to match recursively, `dirs` should
1461 1462 be matched non-recursively, and `parents` are the implicitly required
1462 1463 directories to walk to items in either roots or dirs.
1463 1464
1464 1465 Returns a tuple of (roots, dirs, parents).
1465 1466
1466 1467 >>> r = _rootsdirsandparents(
1467 1468 ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
1468 1469 ... (b'glob', b'g*', b'')])
1469 1470 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1470 1471 (['g/h', 'g/h', ''], []) ['', 'g']
1471 1472 >>> r = _rootsdirsandparents(
1472 1473 ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
1473 1474 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1474 1475 ([], ['g/h', '']) ['', 'g']
1475 1476 >>> r = _rootsdirsandparents(
1476 1477 ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
1477 1478 ... (b'path', b'', b'')])
1478 1479 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1479 1480 (['r', 'p/p', ''], []) ['', 'p']
1480 1481 >>> r = _rootsdirsandparents(
1481 1482 ... [(b'relglob', b'rg*', b''), (b're', b're/', b''),
1482 1483 ... (b'relre', b'rr', b'')])
1483 1484 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1484 1485 (['', '', ''], []) ['']
1485 1486 '''
1486 1487 r, d = _patternrootsanddirs(kindpats)
1487 1488
1488 1489 p = set()
1489 1490 # Add the parents as non-recursive/exact directories, since they must be
1490 1491 # scanned to get to either the roots or the other exact directories.
1491 1492 p.update(pathutil.dirs(d))
1492 1493 p.update(pathutil.dirs(r))
1493 1494
1494 1495 # FIXME: all uses of this function convert these to sets, do so before
1495 1496 # returning.
1496 1497 # FIXME: all uses of this function do not need anything in 'roots' and
1497 1498 # 'dirs' to also be in 'parents', consider removing them before returning.
1498 1499 return r, d, p
1499 1500
1500 1501
1501 1502 def _explicitfiles(kindpats):
1502 1503 '''Returns the potential explicit filenames from the patterns.
1503 1504
1504 1505 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
1505 1506 ['foo/bar']
1506 1507 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
1507 1508 []
1508 1509 '''
1509 1510 # Keep only the pattern kinds where one can specify filenames (vs only
1510 1511 # directory names).
1511 1512 filable = [kp for kp in kindpats if kp[0] not in (b'rootfilesin',)]
1512 1513 return _roots(filable)
1513 1514
1514 1515
1515 1516 def _prefix(kindpats):
1516 1517 '''Whether all the patterns match a prefix (i.e. recursively)'''
1517 1518 for kind, pat, source in kindpats:
1518 1519 if kind not in (b'path', b'relpath'):
1519 1520 return False
1520 1521 return True
1521 1522
1522 1523
1523 1524 _commentre = None
1524 1525
1525 1526
1526 1527 def readpatternfile(filepath, warn, sourceinfo=False):
1527 1528 '''parse a pattern file, returning a list of
1528 1529 patterns. These patterns should be given to compile()
1529 1530 to be validated and converted into a match function.
1530 1531
1531 1532 trailing white space is dropped.
1532 1533 the escape character is backslash.
1533 1534 comments start with #.
1534 1535 empty lines are skipped.
1535 1536
1536 1537 lines can be of the following formats:
1537 1538
1538 1539 syntax: regexp # defaults following lines to non-rooted regexps
1539 1540 syntax: glob # defaults following lines to non-rooted globs
1540 1541 re:pattern # non-rooted regular expression
1541 1542 glob:pattern # non-rooted glob
1542 1543 rootglob:pat # rooted glob (same root as ^ in regexps)
1543 1544 pattern # pattern of the current default type
1544 1545
1545 1546 if sourceinfo is set, returns a list of tuples:
1546 1547 (pattern, lineno, originalline).
1547 1548 This is useful to debug ignore patterns.
1548 1549 '''
1549 1550
1550 1551 if rustmod is not None:
1551 1552 result, warnings = rustmod.read_pattern_file(
1552 1553 filepath, bool(warn), sourceinfo,
1553 1554 )
1554 1555
1555 1556 for warning_params in warnings:
1556 1557 # Can't be easily emitted from Rust, because it would require
1557 1558 # a mechanism for both gettext and calling the `warn` function.
1558 1559 warn(_(b"%s: ignoring invalid syntax '%s'\n") % warning_params)
1559 1560
1560 1561 return result
1561 1562
1562 1563 syntaxes = {
1563 1564 b're': b'relre:',
1564 1565 b'regexp': b'relre:',
1565 1566 b'glob': b'relglob:',
1566 1567 b'rootglob': b'rootglob:',
1567 1568 b'include': b'include',
1568 1569 b'subinclude': b'subinclude',
1569 1570 }
1570 1571 syntax = b'relre:'
1571 1572 patterns = []
1572 1573
1573 1574 fp = open(filepath, b'rb')
1574 1575 for lineno, line in enumerate(util.iterfile(fp), start=1):
1575 1576 if b"#" in line:
1576 1577 global _commentre
1577 1578 if not _commentre:
1578 1579 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
1579 1580 # remove comments prefixed by an even number of escapes
1580 1581 m = _commentre.search(line)
1581 1582 if m:
1582 1583 line = line[: m.end(1)]
1583 1584 # fixup properly escaped comments that survived the above
1584 1585 line = line.replace(b"\\#", b"#")
1585 1586 line = line.rstrip()
1586 1587 if not line:
1587 1588 continue
1588 1589
1589 1590 if line.startswith(b'syntax:'):
1590 1591 s = line[7:].strip()
1591 1592 try:
1592 1593 syntax = syntaxes[s]
1593 1594 except KeyError:
1594 1595 if warn:
1595 1596 warn(
1596 1597 _(b"%s: ignoring invalid syntax '%s'\n") % (filepath, s)
1597 1598 )
1598 1599 continue
1599 1600
1600 1601 linesyntax = syntax
1601 1602 for s, rels in pycompat.iteritems(syntaxes):
1602 1603 if line.startswith(rels):
1603 1604 linesyntax = rels
1604 1605 line = line[len(rels) :]
1605 1606 break
1606 1607 elif line.startswith(s + b':'):
1607 1608 linesyntax = rels
1608 1609 line = line[len(s) + 1 :]
1609 1610 break
1610 1611 if sourceinfo:
1611 1612 patterns.append((linesyntax + line, lineno, line))
1612 1613 else:
1613 1614 patterns.append(linesyntax + line)
1614 1615 fp.close()
1615 1616 return patterns
General Comments 0
You need to be logged in to leave comments. Login now