##// END OF EJS Templates
match: don't util.normpath() cwd...
Matt Harbison -
r44417:8a81fa44 default
parent child Browse files
Show More
@@ -1,1619 +1,1619 b''
1 1 # match.py - filename matching
2 2 #
3 3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import, print_function
9 9
10 10 import copy
11 11 import itertools
12 12 import os
13 13 import re
14 14
15 15 from .i18n import _
16 16 from .pycompat import open
17 17 from . import (
18 18 encoding,
19 19 error,
20 20 pathutil,
21 21 policy,
22 22 pycompat,
23 23 util,
24 24 )
25 25 from .utils import stringutil
26 26
27 27 rustmod = policy.importrust('filepatterns')
28 28
29 29 allpatternkinds = (
30 30 b're',
31 31 b'glob',
32 32 b'path',
33 33 b'relglob',
34 34 b'relpath',
35 35 b'relre',
36 36 b'rootglob',
37 37 b'listfile',
38 38 b'listfile0',
39 39 b'set',
40 40 b'include',
41 41 b'subinclude',
42 42 b'rootfilesin',
43 43 )
44 44 cwdrelativepatternkinds = (b'relpath', b'glob')
45 45
46 46 propertycache = util.propertycache
47 47
48 48
49 49 def _rematcher(regex):
50 50 '''compile the regexp with the best available regexp engine and return a
51 51 matcher function'''
52 52 m = util.re.compile(regex)
53 53 try:
54 54 # slightly faster, provided by facebook's re2 bindings
55 55 return m.test_match
56 56 except AttributeError:
57 57 return m.match
58 58
59 59
60 60 def _expandsets(kindpats, ctx=None, listsubrepos=False, badfn=None):
61 61 '''Returns the kindpats list with the 'set' patterns expanded to matchers'''
62 62 matchers = []
63 63 other = []
64 64
65 65 for kind, pat, source in kindpats:
66 66 if kind == b'set':
67 67 if ctx is None:
68 68 raise error.ProgrammingError(
69 69 b"fileset expression with no context"
70 70 )
71 71 matchers.append(ctx.matchfileset(pat, badfn=badfn))
72 72
73 73 if listsubrepos:
74 74 for subpath in ctx.substate:
75 75 sm = ctx.sub(subpath).matchfileset(pat, badfn=badfn)
76 76 pm = prefixdirmatcher(subpath, sm, badfn=badfn)
77 77 matchers.append(pm)
78 78
79 79 continue
80 80 other.append((kind, pat, source))
81 81 return matchers, other
82 82
83 83
84 84 def _expandsubinclude(kindpats, root):
85 85 '''Returns the list of subinclude matcher args and the kindpats without the
86 86 subincludes in it.'''
87 87 relmatchers = []
88 88 other = []
89 89
90 90 for kind, pat, source in kindpats:
91 91 if kind == b'subinclude':
92 92 sourceroot = pathutil.dirname(util.normpath(source))
93 93 pat = util.pconvert(pat)
94 94 path = pathutil.join(sourceroot, pat)
95 95
96 96 newroot = pathutil.dirname(path)
97 97 matcherargs = (newroot, b'', [], [b'include:%s' % path])
98 98
99 99 prefix = pathutil.canonpath(root, root, newroot)
100 100 if prefix:
101 101 prefix += b'/'
102 102 relmatchers.append((prefix, matcherargs))
103 103 else:
104 104 other.append((kind, pat, source))
105 105
106 106 return relmatchers, other
107 107
108 108
109 109 def _kindpatsalwaysmatch(kindpats):
110 110 """"Checks whether the kindspats match everything, as e.g.
111 111 'relpath:.' does.
112 112 """
113 113 for kind, pat, source in kindpats:
114 114 if pat != b'' or kind not in [b'relpath', b'glob']:
115 115 return False
116 116 return True
117 117
118 118
119 119 def _buildkindpatsmatcher(
120 120 matchercls, root, kindpats, ctx=None, listsubrepos=False, badfn=None
121 121 ):
122 122 matchers = []
123 123 fms, kindpats = _expandsets(
124 124 kindpats, ctx=ctx, listsubrepos=listsubrepos, badfn=badfn
125 125 )
126 126 if kindpats:
127 127 m = matchercls(root, kindpats, badfn=badfn)
128 128 matchers.append(m)
129 129 if fms:
130 130 matchers.extend(fms)
131 131 if not matchers:
132 132 return nevermatcher(badfn=badfn)
133 133 if len(matchers) == 1:
134 134 return matchers[0]
135 135 return unionmatcher(matchers)
136 136
137 137
138 138 def match(
139 139 root,
140 140 cwd,
141 141 patterns=None,
142 142 include=None,
143 143 exclude=None,
144 144 default=b'glob',
145 145 auditor=None,
146 146 ctx=None,
147 147 listsubrepos=False,
148 148 warn=None,
149 149 badfn=None,
150 150 icasefs=False,
151 151 ):
152 152 r"""build an object to match a set of file patterns
153 153
154 154 arguments:
155 155 root - the canonical root of the tree you're matching against
156 156 cwd - the current working directory, if relevant
157 157 patterns - patterns to find
158 158 include - patterns to include (unless they are excluded)
159 159 exclude - patterns to exclude (even if they are included)
160 160 default - if a pattern in patterns has no explicit type, assume this one
161 161 auditor - optional path auditor
162 162 ctx - optional changecontext
163 163 listsubrepos - if True, recurse into subrepositories
164 164 warn - optional function used for printing warnings
165 165 badfn - optional bad() callback for this matcher instead of the default
166 166 icasefs - make a matcher for wdir on case insensitive filesystems, which
167 167 normalizes the given patterns to the case in the filesystem
168 168
169 169 a pattern is one of:
170 170 'glob:<glob>' - a glob relative to cwd
171 171 're:<regexp>' - a regular expression
172 172 'path:<path>' - a path relative to repository root, which is matched
173 173 recursively
174 174 'rootfilesin:<path>' - a path relative to repository root, which is
175 175 matched non-recursively (will not match subdirectories)
176 176 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
177 177 'relpath:<path>' - a path relative to cwd
178 178 'relre:<regexp>' - a regexp that needn't match the start of a name
179 179 'set:<fileset>' - a fileset expression
180 180 'include:<path>' - a file of patterns to read and include
181 181 'subinclude:<path>' - a file of patterns to match against files under
182 182 the same directory
183 183 '<something>' - a pattern of the specified default type
184 184
185 185 >>> def _match(root, *args, **kwargs):
186 186 ... return match(util.localpath(root), *args, **kwargs)
187 187
188 188 Usually a patternmatcher is returned:
189 189 >>> _match(b'/foo', b'.', [b're:.*\.c$', b'path:foo/a', b'*.py'])
190 190 <patternmatcher patterns='.*\\.c$|foo/a(?:/|$)|[^/]*\\.py$'>
191 191
192 192 Combining 'patterns' with 'include' (resp. 'exclude') gives an
193 193 intersectionmatcher (resp. a differencematcher):
194 194 >>> type(_match(b'/foo', b'.', [b're:.*\.c$'], include=[b'path:lib']))
195 195 <class 'mercurial.match.intersectionmatcher'>
196 196 >>> type(_match(b'/foo', b'.', [b're:.*\.c$'], exclude=[b'path:build']))
197 197 <class 'mercurial.match.differencematcher'>
198 198
199 199 Notice that, if 'patterns' is empty, an alwaysmatcher is returned:
200 200 >>> _match(b'/foo', b'.', [])
201 201 <alwaysmatcher>
202 202
203 203 The 'default' argument determines which kind of pattern is assumed if a
204 204 pattern has no prefix:
205 205 >>> _match(b'/foo', b'.', [b'.*\.c$'], default=b're')
206 206 <patternmatcher patterns='.*\\.c$'>
207 207 >>> _match(b'/foo', b'.', [b'main.py'], default=b'relpath')
208 208 <patternmatcher patterns='main\\.py(?:/|$)'>
209 209 >>> _match(b'/foo', b'.', [b'main.py'], default=b're')
210 210 <patternmatcher patterns='main.py'>
211 211
212 212 The primary use of matchers is to check whether a value (usually a file
213 213 name) matches againset one of the patterns given at initialization. There
214 214 are two ways of doing this check.
215 215
216 216 >>> m = _match(b'/foo', b'', [b're:.*\.c$', b'relpath:a'])
217 217
218 218 1. Calling the matcher with a file name returns True if any pattern
219 219 matches that file name:
220 220 >>> m(b'a')
221 221 True
222 222 >>> m(b'main.c')
223 223 True
224 224 >>> m(b'test.py')
225 225 False
226 226
227 227 2. Using the exact() method only returns True if the file name matches one
228 228 of the exact patterns (i.e. not re: or glob: patterns):
229 229 >>> m.exact(b'a')
230 230 True
231 231 >>> m.exact(b'main.c')
232 232 False
233 233 """
234 234 assert os.path.isabs(root)
235 cwd = util.normpath(os.path.join(root, cwd))
235 cwd = os.path.join(root, util.localpath(cwd))
236 236 normalize = _donormalize
237 237 if icasefs:
238 238 dirstate = ctx.repo().dirstate
239 239 dsnormalize = dirstate.normalize
240 240
241 241 def normalize(patterns, default, root, cwd, auditor, warn):
242 242 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
243 243 kindpats = []
244 244 for kind, pats, source in kp:
245 245 if kind not in (b're', b'relre'): # regex can't be normalized
246 246 p = pats
247 247 pats = dsnormalize(pats)
248 248
249 249 # Preserve the original to handle a case only rename.
250 250 if p != pats and p in dirstate:
251 251 kindpats.append((kind, p, source))
252 252
253 253 kindpats.append((kind, pats, source))
254 254 return kindpats
255 255
256 256 if patterns:
257 257 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
258 258 if _kindpatsalwaysmatch(kindpats):
259 259 m = alwaysmatcher(badfn)
260 260 else:
261 261 m = _buildkindpatsmatcher(
262 262 patternmatcher,
263 263 root,
264 264 kindpats,
265 265 ctx=ctx,
266 266 listsubrepos=listsubrepos,
267 267 badfn=badfn,
268 268 )
269 269 else:
270 270 # It's a little strange that no patterns means to match everything.
271 271 # Consider changing this to match nothing (probably using nevermatcher).
272 272 m = alwaysmatcher(badfn)
273 273
274 274 if include:
275 275 kindpats = normalize(include, b'glob', root, cwd, auditor, warn)
276 276 im = _buildkindpatsmatcher(
277 277 includematcher,
278 278 root,
279 279 kindpats,
280 280 ctx=ctx,
281 281 listsubrepos=listsubrepos,
282 282 badfn=None,
283 283 )
284 284 m = intersectmatchers(m, im)
285 285 if exclude:
286 286 kindpats = normalize(exclude, b'glob', root, cwd, auditor, warn)
287 287 em = _buildkindpatsmatcher(
288 288 includematcher,
289 289 root,
290 290 kindpats,
291 291 ctx=ctx,
292 292 listsubrepos=listsubrepos,
293 293 badfn=None,
294 294 )
295 295 m = differencematcher(m, em)
296 296 return m
297 297
298 298
299 299 def exact(files, badfn=None):
300 300 return exactmatcher(files, badfn=badfn)
301 301
302 302
303 303 def always(badfn=None):
304 304 return alwaysmatcher(badfn)
305 305
306 306
307 307 def never(badfn=None):
308 308 return nevermatcher(badfn)
309 309
310 310
311 311 def badmatch(match, badfn):
312 312 """Make a copy of the given matcher, replacing its bad method with the given
313 313 one.
314 314 """
315 315 m = copy.copy(match)
316 316 m.bad = badfn
317 317 return m
318 318
319 319
320 320 def _donormalize(patterns, default, root, cwd, auditor=None, warn=None):
321 321 '''Convert 'kind:pat' from the patterns list to tuples with kind and
322 322 normalized and rooted patterns and with listfiles expanded.'''
323 323 kindpats = []
324 324 for kind, pat in [_patsplit(p, default) for p in patterns]:
325 325 if kind in cwdrelativepatternkinds:
326 326 pat = pathutil.canonpath(root, cwd, pat, auditor=auditor)
327 327 elif kind in (b'relglob', b'path', b'rootfilesin', b'rootglob'):
328 328 pat = util.normpath(pat)
329 329 elif kind in (b'listfile', b'listfile0'):
330 330 try:
331 331 files = util.readfile(pat)
332 332 if kind == b'listfile0':
333 333 files = files.split(b'\0')
334 334 else:
335 335 files = files.splitlines()
336 336 files = [f for f in files if f]
337 337 except EnvironmentError:
338 338 raise error.Abort(_(b"unable to read file list (%s)") % pat)
339 339 for k, p, source in _donormalize(
340 340 files, default, root, cwd, auditor, warn
341 341 ):
342 342 kindpats.append((k, p, pat))
343 343 continue
344 344 elif kind == b'include':
345 345 try:
346 346 fullpath = os.path.join(root, util.localpath(pat))
347 347 includepats = readpatternfile(fullpath, warn)
348 348 for k, p, source in _donormalize(
349 349 includepats, default, root, cwd, auditor, warn
350 350 ):
351 351 kindpats.append((k, p, source or pat))
352 352 except error.Abort as inst:
353 353 raise error.Abort(
354 354 b'%s: %s'
355 355 % (pat, inst[0]) # pytype: disable=unsupported-operands
356 356 )
357 357 except IOError as inst:
358 358 if warn:
359 359 warn(
360 360 _(b"skipping unreadable pattern file '%s': %s\n")
361 361 % (pat, stringutil.forcebytestr(inst.strerror))
362 362 )
363 363 continue
364 364 # else: re or relre - which cannot be normalized
365 365 kindpats.append((kind, pat, b''))
366 366 return kindpats
367 367
368 368
369 369 class basematcher(object):
370 370 def __init__(self, badfn=None):
371 371 if badfn is not None:
372 372 self.bad = badfn
373 373
374 374 def __call__(self, fn):
375 375 return self.matchfn(fn)
376 376
377 377 # Callbacks related to how the matcher is used by dirstate.walk.
378 378 # Subscribers to these events must monkeypatch the matcher object.
379 379 def bad(self, f, msg):
380 380 '''Callback from dirstate.walk for each explicit file that can't be
381 381 found/accessed, with an error message.'''
382 382
383 383 # If an traversedir is set, it will be called when a directory discovered
384 384 # by recursive traversal is visited.
385 385 traversedir = None
386 386
387 387 @propertycache
388 388 def _files(self):
389 389 return []
390 390
391 391 def files(self):
392 392 '''Explicitly listed files or patterns or roots:
393 393 if no patterns or .always(): empty list,
394 394 if exact: list exact files,
395 395 if not .anypats(): list all files and dirs,
396 396 else: optimal roots'''
397 397 return self._files
398 398
399 399 @propertycache
400 400 def _fileset(self):
401 401 return set(self._files)
402 402
403 403 def exact(self, f):
404 404 '''Returns True if f is in .files().'''
405 405 return f in self._fileset
406 406
407 407 def matchfn(self, f):
408 408 return False
409 409
410 410 def visitdir(self, dir):
411 411 '''Decides whether a directory should be visited based on whether it
412 412 has potential matches in it or one of its subdirectories. This is
413 413 based on the match's primary, included, and excluded patterns.
414 414
415 415 Returns the string 'all' if the given directory and all subdirectories
416 416 should be visited. Otherwise returns True or False indicating whether
417 417 the given directory should be visited.
418 418 '''
419 419 return True
420 420
421 421 def visitchildrenset(self, dir):
422 422 '''Decides whether a directory should be visited based on whether it
423 423 has potential matches in it or one of its subdirectories, and
424 424 potentially lists which subdirectories of that directory should be
425 425 visited. This is based on the match's primary, included, and excluded
426 426 patterns.
427 427
428 428 This function is very similar to 'visitdir', and the following mapping
429 429 can be applied:
430 430
431 431 visitdir | visitchildrenlist
432 432 ----------+-------------------
433 433 False | set()
434 434 'all' | 'all'
435 435 True | 'this' OR non-empty set of subdirs -or files- to visit
436 436
437 437 Example:
438 438 Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return
439 439 the following values (assuming the implementation of visitchildrenset
440 440 is capable of recognizing this; some implementations are not).
441 441
442 442 '' -> {'foo', 'qux'}
443 443 'baz' -> set()
444 444 'foo' -> {'bar'}
445 445 # Ideally this would be 'all', but since the prefix nature of matchers
446 446 # is applied to the entire matcher, we have to downgrade this to
447 447 # 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed
448 448 # in.
449 449 'foo/bar' -> 'this'
450 450 'qux' -> 'this'
451 451
452 452 Important:
453 453 Most matchers do not know if they're representing files or
454 454 directories. They see ['path:dir/f'] and don't know whether 'f' is a
455 455 file or a directory, so visitchildrenset('dir') for most matchers will
456 456 return {'f'}, but if the matcher knows it's a file (like exactmatcher
457 457 does), it may return 'this'. Do not rely on the return being a set
458 458 indicating that there are no files in this dir to investigate (or
459 459 equivalently that if there are files to investigate in 'dir' that it
460 460 will always return 'this').
461 461 '''
462 462 return b'this'
463 463
464 464 def always(self):
465 465 '''Matcher will match everything and .files() will be empty --
466 466 optimization might be possible.'''
467 467 return False
468 468
469 469 def isexact(self):
470 470 '''Matcher will match exactly the list of files in .files() --
471 471 optimization might be possible.'''
472 472 return False
473 473
474 474 def prefix(self):
475 475 '''Matcher will match the paths in .files() recursively --
476 476 optimization might be possible.'''
477 477 return False
478 478
479 479 def anypats(self):
480 480 '''None of .always(), .isexact(), and .prefix() is true --
481 481 optimizations will be difficult.'''
482 482 return not self.always() and not self.isexact() and not self.prefix()
483 483
484 484
485 485 class alwaysmatcher(basematcher):
486 486 '''Matches everything.'''
487 487
488 488 def __init__(self, badfn=None):
489 489 super(alwaysmatcher, self).__init__(badfn)
490 490
491 491 def always(self):
492 492 return True
493 493
494 494 def matchfn(self, f):
495 495 return True
496 496
497 497 def visitdir(self, dir):
498 498 return b'all'
499 499
500 500 def visitchildrenset(self, dir):
501 501 return b'all'
502 502
503 503 def __repr__(self):
504 504 return r'<alwaysmatcher>'
505 505
506 506
507 507 class nevermatcher(basematcher):
508 508 '''Matches nothing.'''
509 509
510 510 def __init__(self, badfn=None):
511 511 super(nevermatcher, self).__init__(badfn)
512 512
513 513 # It's a little weird to say that the nevermatcher is an exact matcher
514 514 # or a prefix matcher, but it seems to make sense to let callers take
515 515 # fast paths based on either. There will be no exact matches, nor any
516 516 # prefixes (files() returns []), so fast paths iterating over them should
517 517 # be efficient (and correct).
518 518 def isexact(self):
519 519 return True
520 520
521 521 def prefix(self):
522 522 return True
523 523
524 524 def visitdir(self, dir):
525 525 return False
526 526
527 527 def visitchildrenset(self, dir):
528 528 return set()
529 529
530 530 def __repr__(self):
531 531 return r'<nevermatcher>'
532 532
533 533
534 534 class predicatematcher(basematcher):
535 535 """A matcher adapter for a simple boolean function"""
536 536
537 537 def __init__(self, predfn, predrepr=None, badfn=None):
538 538 super(predicatematcher, self).__init__(badfn)
539 539 self.matchfn = predfn
540 540 self._predrepr = predrepr
541 541
542 542 @encoding.strmethod
543 543 def __repr__(self):
544 544 s = stringutil.buildrepr(self._predrepr) or pycompat.byterepr(
545 545 self.matchfn
546 546 )
547 547 return b'<predicatenmatcher pred=%s>' % s
548 548
549 549
550 550 class patternmatcher(basematcher):
551 551 r"""Matches a set of (kind, pat, source) against a 'root' directory.
552 552
553 553 >>> kindpats = [
554 554 ... (b're', br'.*\.c$', b''),
555 555 ... (b'path', b'foo/a', b''),
556 556 ... (b'relpath', b'b', b''),
557 557 ... (b'glob', b'*.h', b''),
558 558 ... ]
559 559 >>> m = patternmatcher(b'foo', kindpats)
560 560 >>> m(b'main.c') # matches re:.*\.c$
561 561 True
562 562 >>> m(b'b.txt')
563 563 False
564 564 >>> m(b'foo/a') # matches path:foo/a
565 565 True
566 566 >>> m(b'a') # does not match path:b, since 'root' is 'foo'
567 567 False
568 568 >>> m(b'b') # matches relpath:b, since 'root' is 'foo'
569 569 True
570 570 >>> m(b'lib.h') # matches glob:*.h
571 571 True
572 572
573 573 >>> m.files()
574 574 ['', 'foo/a', 'b', '']
575 575 >>> m.exact(b'foo/a')
576 576 True
577 577 >>> m.exact(b'b')
578 578 True
579 579 >>> m.exact(b'lib.h') # exact matches are for (rel)path kinds
580 580 False
581 581 """
582 582
583 583 def __init__(self, root, kindpats, badfn=None):
584 584 super(patternmatcher, self).__init__(badfn)
585 585
586 586 self._files = _explicitfiles(kindpats)
587 587 self._prefix = _prefix(kindpats)
588 588 self._pats, self.matchfn = _buildmatch(kindpats, b'$', root)
589 589
590 590 @propertycache
591 591 def _dirs(self):
592 592 return set(pathutil.dirs(self._fileset))
593 593
594 594 def visitdir(self, dir):
595 595 if self._prefix and dir in self._fileset:
596 596 return b'all'
597 597 return (
598 598 dir in self._fileset
599 599 or dir in self._dirs
600 600 or any(
601 601 parentdir in self._fileset
602 602 for parentdir in pathutil.finddirs(dir)
603 603 )
604 604 )
605 605
606 606 def visitchildrenset(self, dir):
607 607 ret = self.visitdir(dir)
608 608 if ret is True:
609 609 return b'this'
610 610 elif not ret:
611 611 return set()
612 612 assert ret == b'all'
613 613 return b'all'
614 614
615 615 def prefix(self):
616 616 return self._prefix
617 617
618 618 @encoding.strmethod
619 619 def __repr__(self):
620 620 return b'<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats)
621 621
622 622
623 623 # This is basically a reimplementation of pathutil.dirs that stores the
624 624 # children instead of just a count of them, plus a small optional optimization
625 625 # to avoid some directories we don't need.
626 626 class _dirchildren(object):
627 627 def __init__(self, paths, onlyinclude=None):
628 628 self._dirs = {}
629 629 self._onlyinclude = onlyinclude or []
630 630 addpath = self.addpath
631 631 for f in paths:
632 632 addpath(f)
633 633
634 634 def addpath(self, path):
635 635 if path == b'':
636 636 return
637 637 dirs = self._dirs
638 638 findsplitdirs = _dirchildren._findsplitdirs
639 639 for d, b in findsplitdirs(path):
640 640 if d not in self._onlyinclude:
641 641 continue
642 642 dirs.setdefault(d, set()).add(b)
643 643
644 644 @staticmethod
645 645 def _findsplitdirs(path):
646 646 # yields (dirname, basename) tuples, walking back to the root. This is
647 647 # very similar to pathutil.finddirs, except:
648 648 # - produces a (dirname, basename) tuple, not just 'dirname'
649 649 # Unlike manifest._splittopdir, this does not suffix `dirname` with a
650 650 # slash.
651 651 oldpos = len(path)
652 652 pos = path.rfind(b'/')
653 653 while pos != -1:
654 654 yield path[:pos], path[pos + 1 : oldpos]
655 655 oldpos = pos
656 656 pos = path.rfind(b'/', 0, pos)
657 657 yield b'', path[:oldpos]
658 658
659 659 def get(self, path):
660 660 return self._dirs.get(path, set())
661 661
662 662
663 663 class includematcher(basematcher):
664 664 def __init__(self, root, kindpats, badfn=None):
665 665 super(includematcher, self).__init__(badfn)
666 666
667 667 self._pats, self.matchfn = _buildmatch(kindpats, b'(?:/|$)', root)
668 668 self._prefix = _prefix(kindpats)
669 669 roots, dirs, parents = _rootsdirsandparents(kindpats)
670 670 # roots are directories which are recursively included.
671 671 self._roots = set(roots)
672 672 # dirs are directories which are non-recursively included.
673 673 self._dirs = set(dirs)
674 674 # parents are directories which are non-recursively included because
675 675 # they are needed to get to items in _dirs or _roots.
676 676 self._parents = parents
677 677
678 678 def visitdir(self, dir):
679 679 if self._prefix and dir in self._roots:
680 680 return b'all'
681 681 return (
682 682 dir in self._roots
683 683 or dir in self._dirs
684 684 or dir in self._parents
685 685 or any(
686 686 parentdir in self._roots for parentdir in pathutil.finddirs(dir)
687 687 )
688 688 )
689 689
690 690 @propertycache
691 691 def _allparentschildren(self):
692 692 # It may seem odd that we add dirs, roots, and parents, and then
693 693 # restrict to only parents. This is to catch the case of:
694 694 # dirs = ['foo/bar']
695 695 # parents = ['foo']
696 696 # if we asked for the children of 'foo', but had only added
697 697 # self._parents, we wouldn't be able to respond ['bar'].
698 698 return _dirchildren(
699 699 itertools.chain(self._dirs, self._roots, self._parents),
700 700 onlyinclude=self._parents,
701 701 )
702 702
703 703 def visitchildrenset(self, dir):
704 704 if self._prefix and dir in self._roots:
705 705 return b'all'
706 706 # Note: this does *not* include the 'dir in self._parents' case from
707 707 # visitdir, that's handled below.
708 708 if (
709 709 b'' in self._roots
710 710 or dir in self._roots
711 711 or dir in self._dirs
712 712 or any(
713 713 parentdir in self._roots for parentdir in pathutil.finddirs(dir)
714 714 )
715 715 ):
716 716 return b'this'
717 717
718 718 if dir in self._parents:
719 719 return self._allparentschildren.get(dir) or set()
720 720 return set()
721 721
722 722 @encoding.strmethod
723 723 def __repr__(self):
724 724 return b'<includematcher includes=%r>' % pycompat.bytestr(self._pats)
725 725
726 726
727 727 class exactmatcher(basematcher):
728 728 r'''Matches the input files exactly. They are interpreted as paths, not
729 729 patterns (so no kind-prefixes).
730 730
731 731 >>> m = exactmatcher([b'a.txt', br're:.*\.c$'])
732 732 >>> m(b'a.txt')
733 733 True
734 734 >>> m(b'b.txt')
735 735 False
736 736
737 737 Input files that would be matched are exactly those returned by .files()
738 738 >>> m.files()
739 739 ['a.txt', 're:.*\\.c$']
740 740
741 741 So pattern 're:.*\.c$' is not considered as a regex, but as a file name
742 742 >>> m(b'main.c')
743 743 False
744 744 >>> m(br're:.*\.c$')
745 745 True
746 746 '''
747 747
748 748 def __init__(self, files, badfn=None):
749 749 super(exactmatcher, self).__init__(badfn)
750 750
751 751 if isinstance(files, list):
752 752 self._files = files
753 753 else:
754 754 self._files = list(files)
755 755
756 756 matchfn = basematcher.exact
757 757
758 758 @propertycache
759 759 def _dirs(self):
760 760 return set(pathutil.dirs(self._fileset))
761 761
762 762 def visitdir(self, dir):
763 763 return dir in self._dirs
764 764
765 765 def visitchildrenset(self, dir):
766 766 if not self._fileset or dir not in self._dirs:
767 767 return set()
768 768
769 769 candidates = self._fileset | self._dirs - {b''}
770 770 if dir != b'':
771 771 d = dir + b'/'
772 772 candidates = set(c[len(d) :] for c in candidates if c.startswith(d))
773 773 # self._dirs includes all of the directories, recursively, so if
774 774 # we're attempting to match foo/bar/baz.txt, it'll have '', 'foo',
775 775 # 'foo/bar' in it. Thus we can safely ignore a candidate that has a
776 776 # '/' in it, indicating a it's for a subdir-of-a-subdir; the
777 777 # immediate subdir will be in there without a slash.
778 778 ret = {c for c in candidates if b'/' not in c}
779 779 # We really do not expect ret to be empty, since that would imply that
780 780 # there's something in _dirs that didn't have a file in _fileset.
781 781 assert ret
782 782 return ret
783 783
784 784 def isexact(self):
785 785 return True
786 786
787 787 @encoding.strmethod
788 788 def __repr__(self):
789 789 return b'<exactmatcher files=%r>' % self._files
790 790
791 791
792 792 class differencematcher(basematcher):
793 793 '''Composes two matchers by matching if the first matches and the second
794 794 does not.
795 795
796 796 The second matcher's non-matching-attributes (bad, traversedir) are ignored.
797 797 '''
798 798
799 799 def __init__(self, m1, m2):
800 800 super(differencematcher, self).__init__()
801 801 self._m1 = m1
802 802 self._m2 = m2
803 803 self.bad = m1.bad
804 804 self.traversedir = m1.traversedir
805 805
806 806 def matchfn(self, f):
807 807 return self._m1(f) and not self._m2(f)
808 808
809 809 @propertycache
810 810 def _files(self):
811 811 if self.isexact():
812 812 return [f for f in self._m1.files() if self(f)]
813 813 # If m1 is not an exact matcher, we can't easily figure out the set of
814 814 # files, because its files() are not always files. For example, if
815 815 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
816 816 # want to remove "dir" from the set even though it would match m2,
817 817 # because the "dir" in m1 may not be a file.
818 818 return self._m1.files()
819 819
820 820 def visitdir(self, dir):
821 821 if self._m2.visitdir(dir) == b'all':
822 822 return False
823 823 elif not self._m2.visitdir(dir):
824 824 # m2 does not match dir, we can return 'all' here if possible
825 825 return self._m1.visitdir(dir)
826 826 return bool(self._m1.visitdir(dir))
827 827
828 828 def visitchildrenset(self, dir):
829 829 m2_set = self._m2.visitchildrenset(dir)
830 830 if m2_set == b'all':
831 831 return set()
832 832 m1_set = self._m1.visitchildrenset(dir)
833 833 # Possible values for m1: 'all', 'this', set(...), set()
834 834 # Possible values for m2: 'this', set(...), set()
835 835 # If m2 has nothing under here that we care about, return m1, even if
836 836 # it's 'all'. This is a change in behavior from visitdir, which would
837 837 # return True, not 'all', for some reason.
838 838 if not m2_set:
839 839 return m1_set
840 840 if m1_set in [b'all', b'this']:
841 841 # Never return 'all' here if m2_set is any kind of non-empty (either
842 842 # 'this' or set(foo)), since m2 might return set() for a
843 843 # subdirectory.
844 844 return b'this'
845 845 # Possible values for m1: set(...), set()
846 846 # Possible values for m2: 'this', set(...)
847 847 # We ignore m2's set results. They're possibly incorrect:
848 848 # m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset(''):
849 849 # m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd
850 850 # return set(), which is *not* correct, we still need to visit 'dir'!
851 851 return m1_set
852 852
853 853 def isexact(self):
854 854 return self._m1.isexact()
855 855
856 856 @encoding.strmethod
857 857 def __repr__(self):
858 858 return b'<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2)
859 859
860 860
861 861 def intersectmatchers(m1, m2):
862 862 '''Composes two matchers by matching if both of them match.
863 863
864 864 The second matcher's non-matching-attributes (bad, traversedir) are ignored.
865 865 '''
866 866 if m1 is None or m2 is None:
867 867 return m1 or m2
868 868 if m1.always():
869 869 m = copy.copy(m2)
870 870 # TODO: Consider encapsulating these things in a class so there's only
871 871 # one thing to copy from m1.
872 872 m.bad = m1.bad
873 873 m.traversedir = m1.traversedir
874 874 return m
875 875 if m2.always():
876 876 m = copy.copy(m1)
877 877 return m
878 878 return intersectionmatcher(m1, m2)
879 879
880 880
881 881 class intersectionmatcher(basematcher):
882 882 def __init__(self, m1, m2):
883 883 super(intersectionmatcher, self).__init__()
884 884 self._m1 = m1
885 885 self._m2 = m2
886 886 self.bad = m1.bad
887 887 self.traversedir = m1.traversedir
888 888
889 889 @propertycache
890 890 def _files(self):
891 891 if self.isexact():
892 892 m1, m2 = self._m1, self._m2
893 893 if not m1.isexact():
894 894 m1, m2 = m2, m1
895 895 return [f for f in m1.files() if m2(f)]
896 896 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
897 897 # the set of files, because their files() are not always files. For
898 898 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
899 899 # "path:dir2", we don't want to remove "dir2" from the set.
900 900 return self._m1.files() + self._m2.files()
901 901
902 902 def matchfn(self, f):
903 903 return self._m1(f) and self._m2(f)
904 904
905 905 def visitdir(self, dir):
906 906 visit1 = self._m1.visitdir(dir)
907 907 if visit1 == b'all':
908 908 return self._m2.visitdir(dir)
909 909 # bool() because visit1=True + visit2='all' should not be 'all'
910 910 return bool(visit1 and self._m2.visitdir(dir))
911 911
912 912 def visitchildrenset(self, dir):
913 913 m1_set = self._m1.visitchildrenset(dir)
914 914 if not m1_set:
915 915 return set()
916 916 m2_set = self._m2.visitchildrenset(dir)
917 917 if not m2_set:
918 918 return set()
919 919
920 920 if m1_set == b'all':
921 921 return m2_set
922 922 elif m2_set == b'all':
923 923 return m1_set
924 924
925 925 if m1_set == b'this' or m2_set == b'this':
926 926 return b'this'
927 927
928 928 assert isinstance(m1_set, set) and isinstance(m2_set, set)
929 929 return m1_set.intersection(m2_set)
930 930
931 931 def always(self):
932 932 return self._m1.always() and self._m2.always()
933 933
934 934 def isexact(self):
935 935 return self._m1.isexact() or self._m2.isexact()
936 936
937 937 @encoding.strmethod
938 938 def __repr__(self):
939 939 return b'<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2)
940 940
941 941
942 942 class subdirmatcher(basematcher):
943 943 """Adapt a matcher to work on a subdirectory only.
944 944
945 945 The paths are remapped to remove/insert the path as needed:
946 946
947 947 >>> from . import pycompat
948 948 >>> m1 = match(util.localpath(b'/root'), b'', [b'a.txt', b'sub/b.txt'])
949 949 >>> m2 = subdirmatcher(b'sub', m1)
950 950 >>> m2(b'a.txt')
951 951 False
952 952 >>> m2(b'b.txt')
953 953 True
954 954 >>> m2.matchfn(b'a.txt')
955 955 False
956 956 >>> m2.matchfn(b'b.txt')
957 957 True
958 958 >>> m2.files()
959 959 ['b.txt']
960 960 >>> m2.exact(b'b.txt')
961 961 True
962 962 >>> def bad(f, msg):
963 963 ... print(pycompat.sysstr(b"%s: %s" % (f, msg)))
964 964 >>> m1.bad = bad
965 965 >>> m2.bad(b'x.txt', b'No such file')
966 966 sub/x.txt: No such file
967 967 """
968 968
969 969 def __init__(self, path, matcher):
970 970 super(subdirmatcher, self).__init__()
971 971 self._path = path
972 972 self._matcher = matcher
973 973 self._always = matcher.always()
974 974
975 975 self._files = [
976 976 f[len(path) + 1 :]
977 977 for f in matcher._files
978 978 if f.startswith(path + b"/")
979 979 ]
980 980
981 981 # If the parent repo had a path to this subrepo and the matcher is
982 982 # a prefix matcher, this submatcher always matches.
983 983 if matcher.prefix():
984 984 self._always = any(f == path for f in matcher._files)
985 985
986 986 def bad(self, f, msg):
987 987 self._matcher.bad(self._path + b"/" + f, msg)
988 988
989 989 def matchfn(self, f):
990 990 # Some information is lost in the superclass's constructor, so we
991 991 # can not accurately create the matching function for the subdirectory
992 992 # from the inputs. Instead, we override matchfn() and visitdir() to
993 993 # call the original matcher with the subdirectory path prepended.
994 994 return self._matcher.matchfn(self._path + b"/" + f)
995 995
996 996 def visitdir(self, dir):
997 997 if dir == b'':
998 998 dir = self._path
999 999 else:
1000 1000 dir = self._path + b"/" + dir
1001 1001 return self._matcher.visitdir(dir)
1002 1002
1003 1003 def visitchildrenset(self, dir):
1004 1004 if dir == b'':
1005 1005 dir = self._path
1006 1006 else:
1007 1007 dir = self._path + b"/" + dir
1008 1008 return self._matcher.visitchildrenset(dir)
1009 1009
1010 1010 def always(self):
1011 1011 return self._always
1012 1012
1013 1013 def prefix(self):
1014 1014 return self._matcher.prefix() and not self._always
1015 1015
1016 1016 @encoding.strmethod
1017 1017 def __repr__(self):
1018 1018 return b'<subdirmatcher path=%r, matcher=%r>' % (
1019 1019 self._path,
1020 1020 self._matcher,
1021 1021 )
1022 1022
1023 1023
1024 1024 class prefixdirmatcher(basematcher):
1025 1025 """Adapt a matcher to work on a parent directory.
1026 1026
1027 1027 The matcher's non-matching-attributes (bad, traversedir) are ignored.
1028 1028
1029 1029 The prefix path should usually be the relative path from the root of
1030 1030 this matcher to the root of the wrapped matcher.
1031 1031
1032 1032 >>> m1 = match(util.localpath(b'/root/d/e'), b'f', [b'../a.txt', b'b.txt'], auditor=lambda name: None)
1033 1033 >>> m2 = prefixdirmatcher(b'd/e', m1)
1034 1034 >>> m2(b'a.txt')
1035 1035 False
1036 1036 >>> m2(b'd/e/a.txt')
1037 1037 True
1038 1038 >>> m2(b'd/e/b.txt')
1039 1039 False
1040 1040 >>> m2.files()
1041 1041 ['d/e/a.txt', 'd/e/f/b.txt']
1042 1042 >>> m2.exact(b'd/e/a.txt')
1043 1043 True
1044 1044 >>> m2.visitdir(b'd')
1045 1045 True
1046 1046 >>> m2.visitdir(b'd/e')
1047 1047 True
1048 1048 >>> m2.visitdir(b'd/e/f')
1049 1049 True
1050 1050 >>> m2.visitdir(b'd/e/g')
1051 1051 False
1052 1052 >>> m2.visitdir(b'd/ef')
1053 1053 False
1054 1054 """
1055 1055
1056 1056 def __init__(self, path, matcher, badfn=None):
1057 1057 super(prefixdirmatcher, self).__init__(badfn)
1058 1058 if not path:
1059 1059 raise error.ProgrammingError(b'prefix path must not be empty')
1060 1060 self._path = path
1061 1061 self._pathprefix = path + b'/'
1062 1062 self._matcher = matcher
1063 1063
1064 1064 @propertycache
1065 1065 def _files(self):
1066 1066 return [self._pathprefix + f for f in self._matcher._files]
1067 1067
1068 1068 def matchfn(self, f):
1069 1069 if not f.startswith(self._pathprefix):
1070 1070 return False
1071 1071 return self._matcher.matchfn(f[len(self._pathprefix) :])
1072 1072
1073 1073 @propertycache
1074 1074 def _pathdirs(self):
1075 1075 return set(pathutil.finddirs(self._path))
1076 1076
1077 1077 def visitdir(self, dir):
1078 1078 if dir == self._path:
1079 1079 return self._matcher.visitdir(b'')
1080 1080 if dir.startswith(self._pathprefix):
1081 1081 return self._matcher.visitdir(dir[len(self._pathprefix) :])
1082 1082 return dir in self._pathdirs
1083 1083
1084 1084 def visitchildrenset(self, dir):
1085 1085 if dir == self._path:
1086 1086 return self._matcher.visitchildrenset(b'')
1087 1087 if dir.startswith(self._pathprefix):
1088 1088 return self._matcher.visitchildrenset(dir[len(self._pathprefix) :])
1089 1089 if dir in self._pathdirs:
1090 1090 return b'this'
1091 1091 return set()
1092 1092
1093 1093 def isexact(self):
1094 1094 return self._matcher.isexact()
1095 1095
1096 1096 def prefix(self):
1097 1097 return self._matcher.prefix()
1098 1098
1099 1099 @encoding.strmethod
1100 1100 def __repr__(self):
1101 1101 return b'<prefixdirmatcher path=%r, matcher=%r>' % (
1102 1102 pycompat.bytestr(self._path),
1103 1103 self._matcher,
1104 1104 )
1105 1105
1106 1106
1107 1107 class unionmatcher(basematcher):
1108 1108 """A matcher that is the union of several matchers.
1109 1109
1110 1110 The non-matching-attributes (bad, traversedir) are taken from the first
1111 1111 matcher.
1112 1112 """
1113 1113
1114 1114 def __init__(self, matchers):
1115 1115 m1 = matchers[0]
1116 1116 super(unionmatcher, self).__init__()
1117 1117 self.traversedir = m1.traversedir
1118 1118 self._matchers = matchers
1119 1119
1120 1120 def matchfn(self, f):
1121 1121 for match in self._matchers:
1122 1122 if match(f):
1123 1123 return True
1124 1124 return False
1125 1125
1126 1126 def visitdir(self, dir):
1127 1127 r = False
1128 1128 for m in self._matchers:
1129 1129 v = m.visitdir(dir)
1130 1130 if v == b'all':
1131 1131 return v
1132 1132 r |= v
1133 1133 return r
1134 1134
1135 1135 def visitchildrenset(self, dir):
1136 1136 r = set()
1137 1137 this = False
1138 1138 for m in self._matchers:
1139 1139 v = m.visitchildrenset(dir)
1140 1140 if not v:
1141 1141 continue
1142 1142 if v == b'all':
1143 1143 return v
1144 1144 if this or v == b'this':
1145 1145 this = True
1146 1146 # don't break, we might have an 'all' in here.
1147 1147 continue
1148 1148 assert isinstance(v, set)
1149 1149 r = r.union(v)
1150 1150 if this:
1151 1151 return b'this'
1152 1152 return r
1153 1153
1154 1154 @encoding.strmethod
1155 1155 def __repr__(self):
1156 1156 return b'<unionmatcher matchers=%r>' % self._matchers
1157 1157
1158 1158
1159 1159 def patkind(pattern, default=None):
1160 1160 r'''If pattern is 'kind:pat' with a known kind, return kind.
1161 1161
1162 1162 >>> patkind(br're:.*\.c$')
1163 1163 're'
1164 1164 >>> patkind(b'glob:*.c')
1165 1165 'glob'
1166 1166 >>> patkind(b'relpath:test.py')
1167 1167 'relpath'
1168 1168 >>> patkind(b'main.py')
1169 1169 >>> patkind(b'main.py', default=b're')
1170 1170 're'
1171 1171 '''
1172 1172 return _patsplit(pattern, default)[0]
1173 1173
1174 1174
1175 1175 def _patsplit(pattern, default):
1176 1176 """Split a string into the optional pattern kind prefix and the actual
1177 1177 pattern."""
1178 1178 if b':' in pattern:
1179 1179 kind, pat = pattern.split(b':', 1)
1180 1180 if kind in allpatternkinds:
1181 1181 return kind, pat
1182 1182 return default, pattern
1183 1183
1184 1184
1185 1185 def _globre(pat):
1186 1186 r'''Convert an extended glob string to a regexp string.
1187 1187
1188 1188 >>> from . import pycompat
1189 1189 >>> def bprint(s):
1190 1190 ... print(pycompat.sysstr(s))
1191 1191 >>> bprint(_globre(br'?'))
1192 1192 .
1193 1193 >>> bprint(_globre(br'*'))
1194 1194 [^/]*
1195 1195 >>> bprint(_globre(br'**'))
1196 1196 .*
1197 1197 >>> bprint(_globre(br'**/a'))
1198 1198 (?:.*/)?a
1199 1199 >>> bprint(_globre(br'a/**/b'))
1200 1200 a/(?:.*/)?b
1201 1201 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
1202 1202 [a*?!^][\^b][^c]
1203 1203 >>> bprint(_globre(br'{a,b}'))
1204 1204 (?:a|b)
1205 1205 >>> bprint(_globre(br'.\*\?'))
1206 1206 \.\*\?
1207 1207 '''
1208 1208 i, n = 0, len(pat)
1209 1209 res = b''
1210 1210 group = 0
1211 1211 escape = util.stringutil.regexbytesescapemap.get
1212 1212
1213 1213 def peek():
1214 1214 return i < n and pat[i : i + 1]
1215 1215
1216 1216 while i < n:
1217 1217 c = pat[i : i + 1]
1218 1218 i += 1
1219 1219 if c not in b'*?[{},\\':
1220 1220 res += escape(c, c)
1221 1221 elif c == b'*':
1222 1222 if peek() == b'*':
1223 1223 i += 1
1224 1224 if peek() == b'/':
1225 1225 i += 1
1226 1226 res += b'(?:.*/)?'
1227 1227 else:
1228 1228 res += b'.*'
1229 1229 else:
1230 1230 res += b'[^/]*'
1231 1231 elif c == b'?':
1232 1232 res += b'.'
1233 1233 elif c == b'[':
1234 1234 j = i
1235 1235 if j < n and pat[j : j + 1] in b'!]':
1236 1236 j += 1
1237 1237 while j < n and pat[j : j + 1] != b']':
1238 1238 j += 1
1239 1239 if j >= n:
1240 1240 res += b'\\['
1241 1241 else:
1242 1242 stuff = pat[i:j].replace(b'\\', b'\\\\')
1243 1243 i = j + 1
1244 1244 if stuff[0:1] == b'!':
1245 1245 stuff = b'^' + stuff[1:]
1246 1246 elif stuff[0:1] == b'^':
1247 1247 stuff = b'\\' + stuff
1248 1248 res = b'%s[%s]' % (res, stuff)
1249 1249 elif c == b'{':
1250 1250 group += 1
1251 1251 res += b'(?:'
1252 1252 elif c == b'}' and group:
1253 1253 res += b')'
1254 1254 group -= 1
1255 1255 elif c == b',' and group:
1256 1256 res += b'|'
1257 1257 elif c == b'\\':
1258 1258 p = peek()
1259 1259 if p:
1260 1260 i += 1
1261 1261 res += escape(p, p)
1262 1262 else:
1263 1263 res += escape(c, c)
1264 1264 else:
1265 1265 res += escape(c, c)
1266 1266 return res
1267 1267
1268 1268
1269 1269 def _regex(kind, pat, globsuffix):
1270 1270 '''Convert a (normalized) pattern of any kind into a
1271 1271 regular expression.
1272 1272 globsuffix is appended to the regexp of globs.'''
1273 1273
1274 1274 if rustmod is not None:
1275 1275 try:
1276 1276 return rustmod.build_single_regex(kind, pat, globsuffix)
1277 1277 except rustmod.PatternError:
1278 1278 raise error.ProgrammingError(
1279 1279 b'not a regex pattern: %s:%s' % (kind, pat)
1280 1280 )
1281 1281
1282 1282 if not pat and kind in (b'glob', b'relpath'):
1283 1283 return b''
1284 1284 if kind == b're':
1285 1285 return pat
1286 1286 if kind in (b'path', b'relpath'):
1287 1287 if pat == b'.':
1288 1288 return b''
1289 1289 return util.stringutil.reescape(pat) + b'(?:/|$)'
1290 1290 if kind == b'rootfilesin':
1291 1291 if pat == b'.':
1292 1292 escaped = b''
1293 1293 else:
1294 1294 # Pattern is a directory name.
1295 1295 escaped = util.stringutil.reescape(pat) + b'/'
1296 1296 # Anything after the pattern must be a non-directory.
1297 1297 return escaped + b'[^/]+$'
1298 1298 if kind == b'relglob':
1299 1299 globre = _globre(pat)
1300 1300 if globre.startswith(b'[^/]*'):
1301 1301 # When pat has the form *XYZ (common), make the returned regex more
1302 1302 # legible by returning the regex for **XYZ instead of **/*XYZ.
1303 1303 return b'.*' + globre[len(b'[^/]*') :] + globsuffix
1304 1304 return b'(?:|.*/)' + globre + globsuffix
1305 1305 if kind == b'relre':
1306 1306 if pat.startswith(b'^'):
1307 1307 return pat
1308 1308 return b'.*' + pat
1309 1309 if kind in (b'glob', b'rootglob'):
1310 1310 return _globre(pat) + globsuffix
1311 1311 raise error.ProgrammingError(b'not a regex pattern: %s:%s' % (kind, pat))
1312 1312
1313 1313
1314 1314 def _buildmatch(kindpats, globsuffix, root):
1315 1315 '''Return regexp string and a matcher function for kindpats.
1316 1316 globsuffix is appended to the regexp of globs.'''
1317 1317 matchfuncs = []
1318 1318
1319 1319 subincludes, kindpats = _expandsubinclude(kindpats, root)
1320 1320 if subincludes:
1321 1321 submatchers = {}
1322 1322
1323 1323 def matchsubinclude(f):
1324 1324 for prefix, matcherargs in subincludes:
1325 1325 if f.startswith(prefix):
1326 1326 mf = submatchers.get(prefix)
1327 1327 if mf is None:
1328 1328 mf = match(*matcherargs)
1329 1329 submatchers[prefix] = mf
1330 1330
1331 1331 if mf(f[len(prefix) :]):
1332 1332 return True
1333 1333 return False
1334 1334
1335 1335 matchfuncs.append(matchsubinclude)
1336 1336
1337 1337 regex = b''
1338 1338 if kindpats:
1339 1339 if all(k == b'rootfilesin' for k, p, s in kindpats):
1340 1340 dirs = {p for k, p, s in kindpats}
1341 1341
1342 1342 def mf(f):
1343 1343 i = f.rfind(b'/')
1344 1344 if i >= 0:
1345 1345 dir = f[:i]
1346 1346 else:
1347 1347 dir = b'.'
1348 1348 return dir in dirs
1349 1349
1350 1350 regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))
1351 1351 matchfuncs.append(mf)
1352 1352 else:
1353 1353 regex, mf = _buildregexmatch(kindpats, globsuffix)
1354 1354 matchfuncs.append(mf)
1355 1355
1356 1356 if len(matchfuncs) == 1:
1357 1357 return regex, matchfuncs[0]
1358 1358 else:
1359 1359 return regex, lambda f: any(mf(f) for mf in matchfuncs)
1360 1360
1361 1361
1362 1362 MAX_RE_SIZE = 20000
1363 1363
1364 1364
1365 1365 def _joinregexes(regexps):
1366 1366 """gather multiple regular expressions into a single one"""
1367 1367 return b'|'.join(regexps)
1368 1368
1369 1369
1370 1370 def _buildregexmatch(kindpats, globsuffix):
1371 1371 """Build a match function from a list of kinds and kindpats,
1372 1372 return regexp string and a matcher function.
1373 1373
1374 1374 Test too large input
1375 1375 >>> _buildregexmatch([
1376 1376 ... (b'relglob', b'?' * MAX_RE_SIZE, b'')
1377 1377 ... ], b'$')
1378 1378 Traceback (most recent call last):
1379 1379 ...
1380 1380 Abort: matcher pattern is too long (20009 bytes)
1381 1381 """
1382 1382 try:
1383 1383 allgroups = []
1384 1384 regexps = [_regex(k, p, globsuffix) for (k, p, s) in kindpats]
1385 1385 fullregexp = _joinregexes(regexps)
1386 1386
1387 1387 startidx = 0
1388 1388 groupsize = 0
1389 1389 for idx, r in enumerate(regexps):
1390 1390 piecesize = len(r)
1391 1391 if piecesize > MAX_RE_SIZE:
1392 1392 msg = _(b"matcher pattern is too long (%d bytes)") % piecesize
1393 1393 raise error.Abort(msg)
1394 1394 elif (groupsize + piecesize) > MAX_RE_SIZE:
1395 1395 group = regexps[startidx:idx]
1396 1396 allgroups.append(_joinregexes(group))
1397 1397 startidx = idx
1398 1398 groupsize = 0
1399 1399 groupsize += piecesize + 1
1400 1400
1401 1401 if startidx == 0:
1402 1402 matcher = _rematcher(fullregexp)
1403 1403 func = lambda s: bool(matcher(s))
1404 1404 else:
1405 1405 group = regexps[startidx:]
1406 1406 allgroups.append(_joinregexes(group))
1407 1407 allmatchers = [_rematcher(g) for g in allgroups]
1408 1408 func = lambda s: any(m(s) for m in allmatchers)
1409 1409 return fullregexp, func
1410 1410 except re.error:
1411 1411 for k, p, s in kindpats:
1412 1412 try:
1413 1413 _rematcher(_regex(k, p, globsuffix))
1414 1414 except re.error:
1415 1415 if s:
1416 1416 raise error.Abort(
1417 1417 _(b"%s: invalid pattern (%s): %s") % (s, k, p)
1418 1418 )
1419 1419 else:
1420 1420 raise error.Abort(_(b"invalid pattern (%s): %s") % (k, p))
1421 1421 raise error.Abort(_(b"invalid pattern"))
1422 1422
1423 1423
1424 1424 def _patternrootsanddirs(kindpats):
1425 1425 '''Returns roots and directories corresponding to each pattern.
1426 1426
1427 1427 This calculates the roots and directories exactly matching the patterns and
1428 1428 returns a tuple of (roots, dirs) for each. It does not return other
1429 1429 directories which may also need to be considered, like the parent
1430 1430 directories.
1431 1431 '''
1432 1432 r = []
1433 1433 d = []
1434 1434 for kind, pat, source in kindpats:
1435 1435 if kind in (b'glob', b'rootglob'): # find the non-glob prefix
1436 1436 root = []
1437 1437 for p in pat.split(b'/'):
1438 1438 if b'[' in p or b'{' in p or b'*' in p or b'?' in p:
1439 1439 break
1440 1440 root.append(p)
1441 1441 r.append(b'/'.join(root))
1442 1442 elif kind in (b'relpath', b'path'):
1443 1443 if pat == b'.':
1444 1444 pat = b''
1445 1445 r.append(pat)
1446 1446 elif kind in (b'rootfilesin',):
1447 1447 if pat == b'.':
1448 1448 pat = b''
1449 1449 d.append(pat)
1450 1450 else: # relglob, re, relre
1451 1451 r.append(b'')
1452 1452 return r, d
1453 1453
1454 1454
1455 1455 def _roots(kindpats):
1456 1456 '''Returns root directories to match recursively from the given patterns.'''
1457 1457 roots, dirs = _patternrootsanddirs(kindpats)
1458 1458 return roots
1459 1459
1460 1460
1461 1461 def _rootsdirsandparents(kindpats):
1462 1462 '''Returns roots and exact directories from patterns.
1463 1463
1464 1464 `roots` are directories to match recursively, `dirs` should
1465 1465 be matched non-recursively, and `parents` are the implicitly required
1466 1466 directories to walk to items in either roots or dirs.
1467 1467
1468 1468 Returns a tuple of (roots, dirs, parents).
1469 1469
1470 1470 >>> r = _rootsdirsandparents(
1471 1471 ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
1472 1472 ... (b'glob', b'g*', b'')])
1473 1473 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1474 1474 (['g/h', 'g/h', ''], []) ['', 'g']
1475 1475 >>> r = _rootsdirsandparents(
1476 1476 ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
1477 1477 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1478 1478 ([], ['g/h', '']) ['', 'g']
1479 1479 >>> r = _rootsdirsandparents(
1480 1480 ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
1481 1481 ... (b'path', b'', b'')])
1482 1482 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1483 1483 (['r', 'p/p', ''], []) ['', 'p']
1484 1484 >>> r = _rootsdirsandparents(
1485 1485 ... [(b'relglob', b'rg*', b''), (b're', b're/', b''),
1486 1486 ... (b'relre', b'rr', b'')])
1487 1487 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1488 1488 (['', '', ''], []) ['']
1489 1489 '''
1490 1490 r, d = _patternrootsanddirs(kindpats)
1491 1491
1492 1492 p = set()
1493 1493 # Add the parents as non-recursive/exact directories, since they must be
1494 1494 # scanned to get to either the roots or the other exact directories.
1495 1495 p.update(pathutil.dirs(d))
1496 1496 p.update(pathutil.dirs(r))
1497 1497
1498 1498 # FIXME: all uses of this function convert these to sets, do so before
1499 1499 # returning.
1500 1500 # FIXME: all uses of this function do not need anything in 'roots' and
1501 1501 # 'dirs' to also be in 'parents', consider removing them before returning.
1502 1502 return r, d, p
1503 1503
1504 1504
1505 1505 def _explicitfiles(kindpats):
1506 1506 '''Returns the potential explicit filenames from the patterns.
1507 1507
1508 1508 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
1509 1509 ['foo/bar']
1510 1510 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
1511 1511 []
1512 1512 '''
1513 1513 # Keep only the pattern kinds where one can specify filenames (vs only
1514 1514 # directory names).
1515 1515 filable = [kp for kp in kindpats if kp[0] not in (b'rootfilesin',)]
1516 1516 return _roots(filable)
1517 1517
1518 1518
1519 1519 def _prefix(kindpats):
1520 1520 '''Whether all the patterns match a prefix (i.e. recursively)'''
1521 1521 for kind, pat, source in kindpats:
1522 1522 if kind not in (b'path', b'relpath'):
1523 1523 return False
1524 1524 return True
1525 1525
1526 1526
1527 1527 _commentre = None
1528 1528
1529 1529
1530 1530 def readpatternfile(filepath, warn, sourceinfo=False):
1531 1531 '''parse a pattern file, returning a list of
1532 1532 patterns. These patterns should be given to compile()
1533 1533 to be validated and converted into a match function.
1534 1534
1535 1535 trailing white space is dropped.
1536 1536 the escape character is backslash.
1537 1537 comments start with #.
1538 1538 empty lines are skipped.
1539 1539
1540 1540 lines can be of the following formats:
1541 1541
1542 1542 syntax: regexp # defaults following lines to non-rooted regexps
1543 1543 syntax: glob # defaults following lines to non-rooted globs
1544 1544 re:pattern # non-rooted regular expression
1545 1545 glob:pattern # non-rooted glob
1546 1546 rootglob:pat # rooted glob (same root as ^ in regexps)
1547 1547 pattern # pattern of the current default type
1548 1548
1549 1549 if sourceinfo is set, returns a list of tuples:
1550 1550 (pattern, lineno, originalline).
1551 1551 This is useful to debug ignore patterns.
1552 1552 '''
1553 1553
1554 1554 if rustmod is not None:
1555 1555 result, warnings = rustmod.read_pattern_file(
1556 1556 filepath, bool(warn), sourceinfo,
1557 1557 )
1558 1558
1559 1559 for warning_params in warnings:
1560 1560 # Can't be easily emitted from Rust, because it would require
1561 1561 # a mechanism for both gettext and calling the `warn` function.
1562 1562 warn(_(b"%s: ignoring invalid syntax '%s'\n") % warning_params)
1563 1563
1564 1564 return result
1565 1565
1566 1566 syntaxes = {
1567 1567 b're': b'relre:',
1568 1568 b'regexp': b'relre:',
1569 1569 b'glob': b'relglob:',
1570 1570 b'rootglob': b'rootglob:',
1571 1571 b'include': b'include',
1572 1572 b'subinclude': b'subinclude',
1573 1573 }
1574 1574 syntax = b'relre:'
1575 1575 patterns = []
1576 1576
1577 1577 fp = open(filepath, b'rb')
1578 1578 for lineno, line in enumerate(util.iterfile(fp), start=1):
1579 1579 if b"#" in line:
1580 1580 global _commentre
1581 1581 if not _commentre:
1582 1582 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
1583 1583 # remove comments prefixed by an even number of escapes
1584 1584 m = _commentre.search(line)
1585 1585 if m:
1586 1586 line = line[: m.end(1)]
1587 1587 # fixup properly escaped comments that survived the above
1588 1588 line = line.replace(b"\\#", b"#")
1589 1589 line = line.rstrip()
1590 1590 if not line:
1591 1591 continue
1592 1592
1593 1593 if line.startswith(b'syntax:'):
1594 1594 s = line[7:].strip()
1595 1595 try:
1596 1596 syntax = syntaxes[s]
1597 1597 except KeyError:
1598 1598 if warn:
1599 1599 warn(
1600 1600 _(b"%s: ignoring invalid syntax '%s'\n") % (filepath, s)
1601 1601 )
1602 1602 continue
1603 1603
1604 1604 linesyntax = syntax
1605 1605 for s, rels in pycompat.iteritems(syntaxes):
1606 1606 if line.startswith(rels):
1607 1607 linesyntax = rels
1608 1608 line = line[len(rels) :]
1609 1609 break
1610 1610 elif line.startswith(s + b':'):
1611 1611 linesyntax = rels
1612 1612 line = line[len(s) + 1 :]
1613 1613 break
1614 1614 if sourceinfo:
1615 1615 patterns.append((linesyntax + line, lineno, line))
1616 1616 else:
1617 1617 patterns.append(linesyntax + line)
1618 1618 fp.close()
1619 1619 return patterns
General Comments 0
You need to be logged in to leave comments. Login now