##// END OF EJS Templates
match: remove explicitdir attribute...
Martin von Zweigbergk -
r44114:5e1b0470 default
parent child Browse files
Show More
@@ -1,1625 +1,1614 b''
1 1 # match.py - filename matching
2 2 #
3 3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import, print_function
9 9
10 10 import copy
11 11 import itertools
12 12 import os
13 13 import re
14 14
15 15 from .i18n import _
16 16 from .pycompat import open
17 17 from . import (
18 18 encoding,
19 19 error,
20 20 pathutil,
21 21 policy,
22 22 pycompat,
23 23 util,
24 24 )
25 25 from .utils import stringutil
26 26
27 27 rustmod = policy.importrust('filepatterns')
28 28
29 29 allpatternkinds = (
30 30 b're',
31 31 b'glob',
32 32 b'path',
33 33 b'relglob',
34 34 b'relpath',
35 35 b'relre',
36 36 b'rootglob',
37 37 b'listfile',
38 38 b'listfile0',
39 39 b'set',
40 40 b'include',
41 41 b'subinclude',
42 42 b'rootfilesin',
43 43 )
44 44 cwdrelativepatternkinds = (b'relpath', b'glob')
45 45
46 46 propertycache = util.propertycache
47 47
48 48
49 49 def _rematcher(regex):
50 50 '''compile the regexp with the best available regexp engine and return a
51 51 matcher function'''
52 52 m = util.re.compile(regex)
53 53 try:
54 54 # slightly faster, provided by facebook's re2 bindings
55 55 return m.test_match
56 56 except AttributeError:
57 57 return m.match
58 58
59 59
60 60 def _expandsets(kindpats, ctx=None, listsubrepos=False, badfn=None):
61 61 '''Returns the kindpats list with the 'set' patterns expanded to matchers'''
62 62 matchers = []
63 63 other = []
64 64
65 65 for kind, pat, source in kindpats:
66 66 if kind == b'set':
67 67 if ctx is None:
68 68 raise error.ProgrammingError(
69 69 b"fileset expression with no context"
70 70 )
71 71 matchers.append(ctx.matchfileset(pat, badfn=badfn))
72 72
73 73 if listsubrepos:
74 74 for subpath in ctx.substate:
75 75 sm = ctx.sub(subpath).matchfileset(pat, badfn=badfn)
76 76 pm = prefixdirmatcher(subpath, sm, badfn=badfn)
77 77 matchers.append(pm)
78 78
79 79 continue
80 80 other.append((kind, pat, source))
81 81 return matchers, other
82 82
83 83
84 84 def _expandsubinclude(kindpats, root):
85 85 '''Returns the list of subinclude matcher args and the kindpats without the
86 86 subincludes in it.'''
87 87 relmatchers = []
88 88 other = []
89 89
90 90 for kind, pat, source in kindpats:
91 91 if kind == b'subinclude':
92 92 sourceroot = pathutil.dirname(util.normpath(source))
93 93 pat = util.pconvert(pat)
94 94 path = pathutil.join(sourceroot, pat)
95 95
96 96 newroot = pathutil.dirname(path)
97 97 matcherargs = (newroot, b'', [], [b'include:%s' % path])
98 98
99 99 prefix = pathutil.canonpath(root, root, newroot)
100 100 if prefix:
101 101 prefix += b'/'
102 102 relmatchers.append((prefix, matcherargs))
103 103 else:
104 104 other.append((kind, pat, source))
105 105
106 106 return relmatchers, other
107 107
108 108
109 109 def _kindpatsalwaysmatch(kindpats):
110 110 """"Checks whether the kindspats match everything, as e.g.
111 111 'relpath:.' does.
112 112 """
113 113 for kind, pat, source in kindpats:
114 114 if pat != b'' or kind not in [b'relpath', b'glob']:
115 115 return False
116 116 return True
117 117
118 118
119 119 def _buildkindpatsmatcher(
120 120 matchercls, root, kindpats, ctx=None, listsubrepos=False, badfn=None
121 121 ):
122 122 matchers = []
123 123 fms, kindpats = _expandsets(
124 124 kindpats, ctx=ctx, listsubrepos=listsubrepos, badfn=badfn
125 125 )
126 126 if kindpats:
127 127 m = matchercls(root, kindpats, badfn=badfn)
128 128 matchers.append(m)
129 129 if fms:
130 130 matchers.extend(fms)
131 131 if not matchers:
132 132 return nevermatcher(badfn=badfn)
133 133 if len(matchers) == 1:
134 134 return matchers[0]
135 135 return unionmatcher(matchers)
136 136
137 137
138 138 def match(
139 139 root,
140 140 cwd,
141 141 patterns=None,
142 142 include=None,
143 143 exclude=None,
144 144 default=b'glob',
145 145 auditor=None,
146 146 ctx=None,
147 147 listsubrepos=False,
148 148 warn=None,
149 149 badfn=None,
150 150 icasefs=False,
151 151 ):
152 152 r"""build an object to match a set of file patterns
153 153
154 154 arguments:
155 155 root - the canonical root of the tree you're matching against
156 156 cwd - the current working directory, if relevant
157 157 patterns - patterns to find
158 158 include - patterns to include (unless they are excluded)
159 159 exclude - patterns to exclude (even if they are included)
160 160 default - if a pattern in patterns has no explicit type, assume this one
161 161 auditor - optional path auditor
162 162 ctx - optional changecontext
163 163 listsubrepos - if True, recurse into subrepositories
164 164 warn - optional function used for printing warnings
165 165 badfn - optional bad() callback for this matcher instead of the default
166 166 icasefs - make a matcher for wdir on case insensitive filesystems, which
167 167 normalizes the given patterns to the case in the filesystem
168 168
169 169 a pattern is one of:
170 170 'glob:<glob>' - a glob relative to cwd
171 171 're:<regexp>' - a regular expression
172 172 'path:<path>' - a path relative to repository root, which is matched
173 173 recursively
174 174 'rootfilesin:<path>' - a path relative to repository root, which is
175 175 matched non-recursively (will not match subdirectories)
176 176 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
177 177 'relpath:<path>' - a path relative to cwd
178 178 'relre:<regexp>' - a regexp that needn't match the start of a name
179 179 'set:<fileset>' - a fileset expression
180 180 'include:<path>' - a file of patterns to read and include
181 181 'subinclude:<path>' - a file of patterns to match against files under
182 182 the same directory
183 183 '<something>' - a pattern of the specified default type
184 184
185 185 Usually a patternmatcher is returned:
186 186 >>> match(b'foo', b'.', [b're:.*\.c$', b'path:foo/a', b'*.py'])
187 187 <patternmatcher patterns='.*\\.c$|foo/a(?:/|$)|[^/]*\\.py$'>
188 188
189 189 Combining 'patterns' with 'include' (resp. 'exclude') gives an
190 190 intersectionmatcher (resp. a differencematcher):
191 191 >>> type(match(b'foo', b'.', [b're:.*\.c$'], include=[b'path:lib']))
192 192 <class 'mercurial.match.intersectionmatcher'>
193 193 >>> type(match(b'foo', b'.', [b're:.*\.c$'], exclude=[b'path:build']))
194 194 <class 'mercurial.match.differencematcher'>
195 195
196 196 Notice that, if 'patterns' is empty, an alwaysmatcher is returned:
197 197 >>> match(b'foo', b'.', [])
198 198 <alwaysmatcher>
199 199
200 200 The 'default' argument determines which kind of pattern is assumed if a
201 201 pattern has no prefix:
202 202 >>> match(b'foo', b'.', [b'.*\.c$'], default=b're')
203 203 <patternmatcher patterns='.*\\.c$'>
204 204 >>> match(b'foo', b'.', [b'main.py'], default=b'relpath')
205 205 <patternmatcher patterns='main\\.py(?:/|$)'>
206 206 >>> match(b'foo', b'.', [b'main.py'], default=b're')
207 207 <patternmatcher patterns='main.py'>
208 208
209 209 The primary use of matchers is to check whether a value (usually a file
210 210 name) matches againset one of the patterns given at initialization. There
211 211 are two ways of doing this check.
212 212
213 213 >>> m = match(b'foo', b'', [b're:.*\.c$', b'relpath:a'])
214 214
215 215 1. Calling the matcher with a file name returns True if any pattern
216 216 matches that file name:
217 217 >>> m(b'a')
218 218 True
219 219 >>> m(b'main.c')
220 220 True
221 221 >>> m(b'test.py')
222 222 False
223 223
224 224 2. Using the exact() method only returns True if the file name matches one
225 225 of the exact patterns (i.e. not re: or glob: patterns):
226 226 >>> m.exact(b'a')
227 227 True
228 228 >>> m.exact(b'main.c')
229 229 False
230 230 """
231 231 normalize = _donormalize
232 232 if icasefs:
233 233 dirstate = ctx.repo().dirstate
234 234 dsnormalize = dirstate.normalize
235 235
236 236 def normalize(patterns, default, root, cwd, auditor, warn):
237 237 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
238 238 kindpats = []
239 239 for kind, pats, source in kp:
240 240 if kind not in (b're', b'relre'): # regex can't be normalized
241 241 p = pats
242 242 pats = dsnormalize(pats)
243 243
244 244 # Preserve the original to handle a case only rename.
245 245 if p != pats and p in dirstate:
246 246 kindpats.append((kind, p, source))
247 247
248 248 kindpats.append((kind, pats, source))
249 249 return kindpats
250 250
251 251 if patterns:
252 252 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
253 253 if _kindpatsalwaysmatch(kindpats):
254 254 m = alwaysmatcher(badfn)
255 255 else:
256 256 m = _buildkindpatsmatcher(
257 257 patternmatcher,
258 258 root,
259 259 kindpats,
260 260 ctx=ctx,
261 261 listsubrepos=listsubrepos,
262 262 badfn=badfn,
263 263 )
264 264 else:
265 265 # It's a little strange that no patterns means to match everything.
266 266 # Consider changing this to match nothing (probably using nevermatcher).
267 267 m = alwaysmatcher(badfn)
268 268
269 269 if include:
270 270 kindpats = normalize(include, b'glob', root, cwd, auditor, warn)
271 271 im = _buildkindpatsmatcher(
272 272 includematcher,
273 273 root,
274 274 kindpats,
275 275 ctx=ctx,
276 276 listsubrepos=listsubrepos,
277 277 badfn=None,
278 278 )
279 279 m = intersectmatchers(m, im)
280 280 if exclude:
281 281 kindpats = normalize(exclude, b'glob', root, cwd, auditor, warn)
282 282 em = _buildkindpatsmatcher(
283 283 includematcher,
284 284 root,
285 285 kindpats,
286 286 ctx=ctx,
287 287 listsubrepos=listsubrepos,
288 288 badfn=None,
289 289 )
290 290 m = differencematcher(m, em)
291 291 return m
292 292
293 293
294 294 def exact(files, badfn=None):
295 295 return exactmatcher(files, badfn=badfn)
296 296
297 297
298 298 def always(badfn=None):
299 299 return alwaysmatcher(badfn)
300 300
301 301
302 302 def never(badfn=None):
303 303 return nevermatcher(badfn)
304 304
305 305
306 306 def badmatch(match, badfn):
307 307 """Make a copy of the given matcher, replacing its bad method with the given
308 308 one.
309 309 """
310 310 m = copy.copy(match)
311 311 m.bad = badfn
312 312 return m
313 313
314 314
315 315 def _donormalize(patterns, default, root, cwd, auditor=None, warn=None):
316 316 '''Convert 'kind:pat' from the patterns list to tuples with kind and
317 317 normalized and rooted patterns and with listfiles expanded.'''
318 318 kindpats = []
319 319 for kind, pat in [_patsplit(p, default) for p in patterns]:
320 320 if kind in cwdrelativepatternkinds:
321 321 pat = pathutil.canonpath(root, cwd, pat, auditor=auditor)
322 322 elif kind in (b'relglob', b'path', b'rootfilesin', b'rootglob'):
323 323 pat = util.normpath(pat)
324 324 elif kind in (b'listfile', b'listfile0'):
325 325 try:
326 326 files = util.readfile(pat)
327 327 if kind == b'listfile0':
328 328 files = files.split(b'\0')
329 329 else:
330 330 files = files.splitlines()
331 331 files = [f for f in files if f]
332 332 except EnvironmentError:
333 333 raise error.Abort(_(b"unable to read file list (%s)") % pat)
334 334 for k, p, source in _donormalize(
335 335 files, default, root, cwd, auditor, warn
336 336 ):
337 337 kindpats.append((k, p, pat))
338 338 continue
339 339 elif kind == b'include':
340 340 try:
341 341 fullpath = os.path.join(root, util.localpath(pat))
342 342 includepats = readpatternfile(fullpath, warn)
343 343 for k, p, source in _donormalize(
344 344 includepats, default, root, cwd, auditor, warn
345 345 ):
346 346 kindpats.append((k, p, source or pat))
347 347 except error.Abort as inst:
348 348 raise error.Abort(
349 349 b'%s: %s'
350 350 % (pat, inst[0]) # pytype: disable=unsupported-operands
351 351 )
352 352 except IOError as inst:
353 353 if warn:
354 354 warn(
355 355 _(b"skipping unreadable pattern file '%s': %s\n")
356 356 % (pat, stringutil.forcebytestr(inst.strerror))
357 357 )
358 358 continue
359 359 # else: re or relre - which cannot be normalized
360 360 kindpats.append((kind, pat, b''))
361 361 return kindpats
362 362
363 363
364 364 class basematcher(object):
365 365 def __init__(self, badfn=None):
366 366 if badfn is not None:
367 367 self.bad = badfn
368 368
369 369 def __call__(self, fn):
370 370 return self.matchfn(fn)
371 371
372 372 # Callbacks related to how the matcher is used by dirstate.walk.
373 373 # Subscribers to these events must monkeypatch the matcher object.
374 374 def bad(self, f, msg):
375 375 '''Callback from dirstate.walk for each explicit file that can't be
376 376 found/accessed, with an error message.'''
377 377
378 # If an explicitdir is set, it will be called when an explicitly listed
379 # directory is visited.
380 explicitdir = None
381
382 378 # If an traversedir is set, it will be called when a directory discovered
383 379 # by recursive traversal is visited.
384 380 traversedir = None
385 381
386 382 @propertycache
387 383 def _files(self):
388 384 return []
389 385
390 386 def files(self):
391 387 '''Explicitly listed files or patterns or roots:
392 388 if no patterns or .always(): empty list,
393 389 if exact: list exact files,
394 390 if not .anypats(): list all files and dirs,
395 391 else: optimal roots'''
396 392 return self._files
397 393
398 394 @propertycache
399 395 def _fileset(self):
400 396 return set(self._files)
401 397
402 398 def exact(self, f):
403 399 '''Returns True if f is in .files().'''
404 400 return f in self._fileset
405 401
406 402 def matchfn(self, f):
407 403 return False
408 404
409 405 def visitdir(self, dir):
410 406 '''Decides whether a directory should be visited based on whether it
411 407 has potential matches in it or one of its subdirectories. This is
412 408 based on the match's primary, included, and excluded patterns.
413 409
414 410 Returns the string 'all' if the given directory and all subdirectories
415 411 should be visited. Otherwise returns True or False indicating whether
416 412 the given directory should be visited.
417 413 '''
418 414 return True
419 415
420 416 def visitchildrenset(self, dir):
421 417 '''Decides whether a directory should be visited based on whether it
422 418 has potential matches in it or one of its subdirectories, and
423 419 potentially lists which subdirectories of that directory should be
424 420 visited. This is based on the match's primary, included, and excluded
425 421 patterns.
426 422
427 423 This function is very similar to 'visitdir', and the following mapping
428 424 can be applied:
429 425
430 426 visitdir | visitchildrenlist
431 427 ----------+-------------------
432 428 False | set()
433 429 'all' | 'all'
434 430 True | 'this' OR non-empty set of subdirs -or files- to visit
435 431
436 432 Example:
437 433 Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return
438 434 the following values (assuming the implementation of visitchildrenset
439 435 is capable of recognizing this; some implementations are not).
440 436
441 437 '' -> {'foo', 'qux'}
442 438 'baz' -> set()
443 439 'foo' -> {'bar'}
444 440 # Ideally this would be 'all', but since the prefix nature of matchers
445 441 # is applied to the entire matcher, we have to downgrade this to
446 442 # 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed
447 443 # in.
448 444 'foo/bar' -> 'this'
449 445 'qux' -> 'this'
450 446
451 447 Important:
452 448 Most matchers do not know if they're representing files or
453 449 directories. They see ['path:dir/f'] and don't know whether 'f' is a
454 450 file or a directory, so visitchildrenset('dir') for most matchers will
455 451 return {'f'}, but if the matcher knows it's a file (like exactmatcher
456 452 does), it may return 'this'. Do not rely on the return being a set
457 453 indicating that there are no files in this dir to investigate (or
458 454 equivalently that if there are files to investigate in 'dir' that it
459 455 will always return 'this').
460 456 '''
461 457 return b'this'
462 458
463 459 def always(self):
464 460 '''Matcher will match everything and .files() will be empty --
465 461 optimization might be possible.'''
466 462 return False
467 463
468 464 def isexact(self):
469 465 '''Matcher will match exactly the list of files in .files() --
470 466 optimization might be possible.'''
471 467 return False
472 468
473 469 def prefix(self):
474 470 '''Matcher will match the paths in .files() recursively --
475 471 optimization might be possible.'''
476 472 return False
477 473
478 474 def anypats(self):
479 475 '''None of .always(), .isexact(), and .prefix() is true --
480 476 optimizations will be difficult.'''
481 477 return not self.always() and not self.isexact() and not self.prefix()
482 478
483 479
484 480 class alwaysmatcher(basematcher):
485 481 '''Matches everything.'''
486 482
487 483 def __init__(self, badfn=None):
488 484 super(alwaysmatcher, self).__init__(badfn)
489 485
490 486 def always(self):
491 487 return True
492 488
493 489 def matchfn(self, f):
494 490 return True
495 491
496 492 def visitdir(self, dir):
497 493 return b'all'
498 494
499 495 def visitchildrenset(self, dir):
500 496 return b'all'
501 497
502 498 def __repr__(self):
503 499 return r'<alwaysmatcher>'
504 500
505 501
506 502 class nevermatcher(basematcher):
507 503 '''Matches nothing.'''
508 504
509 505 def __init__(self, badfn=None):
510 506 super(nevermatcher, self).__init__(badfn)
511 507
512 508 # It's a little weird to say that the nevermatcher is an exact matcher
513 509 # or a prefix matcher, but it seems to make sense to let callers take
514 510 # fast paths based on either. There will be no exact matches, nor any
515 511 # prefixes (files() returns []), so fast paths iterating over them should
516 512 # be efficient (and correct).
517 513 def isexact(self):
518 514 return True
519 515
520 516 def prefix(self):
521 517 return True
522 518
523 519 def visitdir(self, dir):
524 520 return False
525 521
526 522 def visitchildrenset(self, dir):
527 523 return set()
528 524
529 525 def __repr__(self):
530 526 return r'<nevermatcher>'
531 527
532 528
533 529 class predicatematcher(basematcher):
534 530 """A matcher adapter for a simple boolean function"""
535 531
536 532 def __init__(self, predfn, predrepr=None, badfn=None):
537 533 super(predicatematcher, self).__init__(badfn)
538 534 self.matchfn = predfn
539 535 self._predrepr = predrepr
540 536
541 537 @encoding.strmethod
542 538 def __repr__(self):
543 539 s = stringutil.buildrepr(self._predrepr) or pycompat.byterepr(
544 540 self.matchfn
545 541 )
546 542 return b'<predicatenmatcher pred=%s>' % s
547 543
548 544
549 545 class patternmatcher(basematcher):
550 546 """Matches a set of (kind, pat, source) against a 'root' directory.
551 547
552 548 >>> kindpats = [
553 549 ... (b're', br'.*\.c$', b''),
554 550 ... (b'path', b'foo/a', b''),
555 551 ... (b'relpath', b'b', b''),
556 552 ... (b'glob', b'*.h', b''),
557 553 ... ]
558 554 >>> m = patternmatcher(b'foo', kindpats)
559 555 >>> m(b'main.c') # matches re:.*\.c$
560 556 True
561 557 >>> m(b'b.txt')
562 558 False
563 559 >>> m(b'foo/a') # matches path:foo/a
564 560 True
565 561 >>> m(b'a') # does not match path:b, since 'root' is 'foo'
566 562 False
567 563 >>> m(b'b') # matches relpath:b, since 'root' is 'foo'
568 564 True
569 565 >>> m(b'lib.h') # matches glob:*.h
570 566 True
571 567
572 568 >>> m.files()
573 569 ['', 'foo/a', 'b', '']
574 570 >>> m.exact(b'foo/a')
575 571 True
576 572 >>> m.exact(b'b')
577 573 True
578 574 >>> m.exact(b'lib.h') # exact matches are for (rel)path kinds
579 575 False
580 576 """
581 577
582 578 def __init__(self, root, kindpats, badfn=None):
583 579 super(patternmatcher, self).__init__(badfn)
584 580
585 581 self._files = _explicitfiles(kindpats)
586 582 self._prefix = _prefix(kindpats)
587 583 self._pats, self.matchfn = _buildmatch(kindpats, b'$', root)
588 584
589 585 @propertycache
590 586 def _dirs(self):
591 587 return set(pathutil.dirs(self._fileset))
592 588
593 589 def visitdir(self, dir):
594 590 if self._prefix and dir in self._fileset:
595 591 return b'all'
596 592 return (
597 593 dir in self._fileset
598 594 or dir in self._dirs
599 595 or any(
600 596 parentdir in self._fileset
601 597 for parentdir in pathutil.finddirs(dir)
602 598 )
603 599 )
604 600
605 601 def visitchildrenset(self, dir):
606 602 ret = self.visitdir(dir)
607 603 if ret is True:
608 604 return b'this'
609 605 elif not ret:
610 606 return set()
611 607 assert ret == b'all'
612 608 return b'all'
613 609
614 610 def prefix(self):
615 611 return self._prefix
616 612
617 613 @encoding.strmethod
618 614 def __repr__(self):
619 615 return b'<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats)
620 616
621 617
622 618 # This is basically a reimplementation of pathutil.dirs that stores the
623 619 # children instead of just a count of them, plus a small optional optimization
624 620 # to avoid some directories we don't need.
625 621 class _dirchildren(object):
626 622 def __init__(self, paths, onlyinclude=None):
627 623 self._dirs = {}
628 624 self._onlyinclude = onlyinclude or []
629 625 addpath = self.addpath
630 626 for f in paths:
631 627 addpath(f)
632 628
633 629 def addpath(self, path):
634 630 if path == b'':
635 631 return
636 632 dirs = self._dirs
637 633 findsplitdirs = _dirchildren._findsplitdirs
638 634 for d, b in findsplitdirs(path):
639 635 if d not in self._onlyinclude:
640 636 continue
641 637 dirs.setdefault(d, set()).add(b)
642 638
643 639 @staticmethod
644 640 def _findsplitdirs(path):
645 641 # yields (dirname, basename) tuples, walking back to the root. This is
646 642 # very similar to pathutil.finddirs, except:
647 643 # - produces a (dirname, basename) tuple, not just 'dirname'
648 644 # Unlike manifest._splittopdir, this does not suffix `dirname` with a
649 645 # slash.
650 646 oldpos = len(path)
651 647 pos = path.rfind(b'/')
652 648 while pos != -1:
653 649 yield path[:pos], path[pos + 1 : oldpos]
654 650 oldpos = pos
655 651 pos = path.rfind(b'/', 0, pos)
656 652 yield b'', path[:oldpos]
657 653
658 654 def get(self, path):
659 655 return self._dirs.get(path, set())
660 656
661 657
662 658 class includematcher(basematcher):
663 659 def __init__(self, root, kindpats, badfn=None):
664 660 super(includematcher, self).__init__(badfn)
665 661
666 662 self._pats, self.matchfn = _buildmatch(kindpats, b'(?:/|$)', root)
667 663 self._prefix = _prefix(kindpats)
668 664 roots, dirs, parents = _rootsdirsandparents(kindpats)
669 665 # roots are directories which are recursively included.
670 666 self._roots = set(roots)
671 667 # dirs are directories which are non-recursively included.
672 668 self._dirs = set(dirs)
673 669 # parents are directories which are non-recursively included because
674 670 # they are needed to get to items in _dirs or _roots.
675 671 self._parents = parents
676 672
677 673 def visitdir(self, dir):
678 674 if self._prefix and dir in self._roots:
679 675 return b'all'
680 676 return (
681 677 dir in self._roots
682 678 or dir in self._dirs
683 679 or dir in self._parents
684 680 or any(
685 681 parentdir in self._roots for parentdir in pathutil.finddirs(dir)
686 682 )
687 683 )
688 684
689 685 @propertycache
690 686 def _allparentschildren(self):
691 687 # It may seem odd that we add dirs, roots, and parents, and then
692 688 # restrict to only parents. This is to catch the case of:
693 689 # dirs = ['foo/bar']
694 690 # parents = ['foo']
695 691 # if we asked for the children of 'foo', but had only added
696 692 # self._parents, we wouldn't be able to respond ['bar'].
697 693 return _dirchildren(
698 694 itertools.chain(self._dirs, self._roots, self._parents),
699 695 onlyinclude=self._parents,
700 696 )
701 697
702 698 def visitchildrenset(self, dir):
703 699 if self._prefix and dir in self._roots:
704 700 return b'all'
705 701 # Note: this does *not* include the 'dir in self._parents' case from
706 702 # visitdir, that's handled below.
707 703 if (
708 704 b'' in self._roots
709 705 or dir in self._roots
710 706 or dir in self._dirs
711 707 or any(
712 708 parentdir in self._roots for parentdir in pathutil.finddirs(dir)
713 709 )
714 710 ):
715 711 return b'this'
716 712
717 713 if dir in self._parents:
718 714 return self._allparentschildren.get(dir) or set()
719 715 return set()
720 716
721 717 @encoding.strmethod
722 718 def __repr__(self):
723 719 return b'<includematcher includes=%r>' % pycompat.bytestr(self._pats)
724 720
725 721
726 722 class exactmatcher(basematcher):
727 723 r'''Matches the input files exactly. They are interpreted as paths, not
728 724 patterns (so no kind-prefixes).
729 725
730 726 >>> m = exactmatcher([b'a.txt', br're:.*\.c$'])
731 727 >>> m(b'a.txt')
732 728 True
733 729 >>> m(b'b.txt')
734 730 False
735 731
736 732 Input files that would be matched are exactly those returned by .files()
737 733 >>> m.files()
738 734 ['a.txt', 're:.*\\.c$']
739 735
740 736 So pattern 're:.*\.c$' is not considered as a regex, but as a file name
741 737 >>> m(b'main.c')
742 738 False
743 739 >>> m(br're:.*\.c$')
744 740 True
745 741 '''
746 742
747 743 def __init__(self, files, badfn=None):
748 744 super(exactmatcher, self).__init__(badfn)
749 745
750 746 if isinstance(files, list):
751 747 self._files = files
752 748 else:
753 749 self._files = list(files)
754 750
755 751 matchfn = basematcher.exact
756 752
757 753 @propertycache
758 754 def _dirs(self):
759 755 return set(pathutil.dirs(self._fileset))
760 756
761 757 def visitdir(self, dir):
762 758 return dir in self._dirs
763 759
764 760 def visitchildrenset(self, dir):
765 761 if not self._fileset or dir not in self._dirs:
766 762 return set()
767 763
768 764 candidates = self._fileset | self._dirs - {b''}
769 765 if dir != b'':
770 766 d = dir + b'/'
771 767 candidates = set(c[len(d) :] for c in candidates if c.startswith(d))
772 768 # self._dirs includes all of the directories, recursively, so if
773 769 # we're attempting to match foo/bar/baz.txt, it'll have '', 'foo',
774 770 # 'foo/bar' in it. Thus we can safely ignore a candidate that has a
775 771 # '/' in it, indicating a it's for a subdir-of-a-subdir; the
776 772 # immediate subdir will be in there without a slash.
777 773 ret = {c for c in candidates if b'/' not in c}
778 774 # We really do not expect ret to be empty, since that would imply that
779 775 # there's something in _dirs that didn't have a file in _fileset.
780 776 assert ret
781 777 return ret
782 778
783 779 def isexact(self):
784 780 return True
785 781
786 782 @encoding.strmethod
787 783 def __repr__(self):
788 784 return b'<exactmatcher files=%r>' % self._files
789 785
790 786
791 787 class differencematcher(basematcher):
792 788 '''Composes two matchers by matching if the first matches and the second
793 789 does not.
794 790
795 The second matcher's non-matching-attributes (bad, explicitdir,
796 traversedir) are ignored.
791 The second matcher's non-matching-attributes (bad, traversedir) are ignored.
797 792 '''
798 793
799 794 def __init__(self, m1, m2):
800 795 super(differencematcher, self).__init__()
801 796 self._m1 = m1
802 797 self._m2 = m2
803 798 self.bad = m1.bad
804 self.explicitdir = m1.explicitdir
805 799 self.traversedir = m1.traversedir
806 800
807 801 def matchfn(self, f):
808 802 return self._m1(f) and not self._m2(f)
809 803
810 804 @propertycache
811 805 def _files(self):
812 806 if self.isexact():
813 807 return [f for f in self._m1.files() if self(f)]
814 808 # If m1 is not an exact matcher, we can't easily figure out the set of
815 809 # files, because its files() are not always files. For example, if
816 810 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
817 811 # want to remove "dir" from the set even though it would match m2,
818 812 # because the "dir" in m1 may not be a file.
819 813 return self._m1.files()
820 814
821 815 def visitdir(self, dir):
822 816 if self._m2.visitdir(dir) == b'all':
823 817 return False
824 818 elif not self._m2.visitdir(dir):
825 819 # m2 does not match dir, we can return 'all' here if possible
826 820 return self._m1.visitdir(dir)
827 821 return bool(self._m1.visitdir(dir))
828 822
829 823 def visitchildrenset(self, dir):
830 824 m2_set = self._m2.visitchildrenset(dir)
831 825 if m2_set == b'all':
832 826 return set()
833 827 m1_set = self._m1.visitchildrenset(dir)
834 828 # Possible values for m1: 'all', 'this', set(...), set()
835 829 # Possible values for m2: 'this', set(...), set()
836 830 # If m2 has nothing under here that we care about, return m1, even if
837 831 # it's 'all'. This is a change in behavior from visitdir, which would
838 832 # return True, not 'all', for some reason.
839 833 if not m2_set:
840 834 return m1_set
841 835 if m1_set in [b'all', b'this']:
842 836 # Never return 'all' here if m2_set is any kind of non-empty (either
843 837 # 'this' or set(foo)), since m2 might return set() for a
844 838 # subdirectory.
845 839 return b'this'
846 840 # Possible values for m1: set(...), set()
847 841 # Possible values for m2: 'this', set(...)
848 842 # We ignore m2's set results. They're possibly incorrect:
849 843 # m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset(''):
850 844 # m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd
851 845 # return set(), which is *not* correct, we still need to visit 'dir'!
852 846 return m1_set
853 847
854 848 def isexact(self):
855 849 return self._m1.isexact()
856 850
857 851 @encoding.strmethod
858 852 def __repr__(self):
859 853 return b'<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2)
860 854
861 855
862 856 def intersectmatchers(m1, m2):
863 857 '''Composes two matchers by matching if both of them match.
864 858
865 The second matcher's non-matching-attributes (bad, explicitdir,
866 traversedir) are ignored.
859 The second matcher's non-matching-attributes (bad, traversedir) are ignored.
867 860 '''
868 861 if m1 is None or m2 is None:
869 862 return m1 or m2
870 863 if m1.always():
871 864 m = copy.copy(m2)
872 865 # TODO: Consider encapsulating these things in a class so there's only
873 866 # one thing to copy from m1.
874 867 m.bad = m1.bad
875 m.explicitdir = m1.explicitdir
876 868 m.traversedir = m1.traversedir
877 869 return m
878 870 if m2.always():
879 871 m = copy.copy(m1)
880 872 return m
881 873 return intersectionmatcher(m1, m2)
882 874
883 875
884 876 class intersectionmatcher(basematcher):
885 877 def __init__(self, m1, m2):
886 878 super(intersectionmatcher, self).__init__()
887 879 self._m1 = m1
888 880 self._m2 = m2
889 881 self.bad = m1.bad
890 self.explicitdir = m1.explicitdir
891 882 self.traversedir = m1.traversedir
892 883
893 884 @propertycache
894 885 def _files(self):
895 886 if self.isexact():
896 887 m1, m2 = self._m1, self._m2
897 888 if not m1.isexact():
898 889 m1, m2 = m2, m1
899 890 return [f for f in m1.files() if m2(f)]
900 891 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
901 892 # the set of files, because their files() are not always files. For
902 893 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
903 894 # "path:dir2", we don't want to remove "dir2" from the set.
904 895 return self._m1.files() + self._m2.files()
905 896
906 897 def matchfn(self, f):
907 898 return self._m1(f) and self._m2(f)
908 899
909 900 def visitdir(self, dir):
910 901 visit1 = self._m1.visitdir(dir)
911 902 if visit1 == b'all':
912 903 return self._m2.visitdir(dir)
913 904 # bool() because visit1=True + visit2='all' should not be 'all'
914 905 return bool(visit1 and self._m2.visitdir(dir))
915 906
916 907 def visitchildrenset(self, dir):
917 908 m1_set = self._m1.visitchildrenset(dir)
918 909 if not m1_set:
919 910 return set()
920 911 m2_set = self._m2.visitchildrenset(dir)
921 912 if not m2_set:
922 913 return set()
923 914
924 915 if m1_set == b'all':
925 916 return m2_set
926 917 elif m2_set == b'all':
927 918 return m1_set
928 919
929 920 if m1_set == b'this' or m2_set == b'this':
930 921 return b'this'
931 922
932 923 assert isinstance(m1_set, set) and isinstance(m2_set, set)
933 924 return m1_set.intersection(m2_set)
934 925
935 926 def always(self):
936 927 return self._m1.always() and self._m2.always()
937 928
938 929 def isexact(self):
939 930 return self._m1.isexact() or self._m2.isexact()
940 931
941 932 @encoding.strmethod
942 933 def __repr__(self):
943 934 return b'<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2)
944 935
945 936
946 937 class subdirmatcher(basematcher):
947 938 """Adapt a matcher to work on a subdirectory only.
948 939
949 940 The paths are remapped to remove/insert the path as needed:
950 941
951 942 >>> from . import pycompat
952 943 >>> m1 = match(b'root', b'', [b'a.txt', b'sub/b.txt'])
953 944 >>> m2 = subdirmatcher(b'sub', m1)
954 945 >>> m2(b'a.txt')
955 946 False
956 947 >>> m2(b'b.txt')
957 948 True
958 949 >>> m2.matchfn(b'a.txt')
959 950 False
960 951 >>> m2.matchfn(b'b.txt')
961 952 True
962 953 >>> m2.files()
963 954 ['b.txt']
964 955 >>> m2.exact(b'b.txt')
965 956 True
966 957 >>> def bad(f, msg):
967 958 ... print(pycompat.sysstr(b"%s: %s" % (f, msg)))
968 959 >>> m1.bad = bad
969 960 >>> m2.bad(b'x.txt', b'No such file')
970 961 sub/x.txt: No such file
971 962 """
972 963
973 964 def __init__(self, path, matcher):
974 965 super(subdirmatcher, self).__init__()
975 966 self._path = path
976 967 self._matcher = matcher
977 968 self._always = matcher.always()
978 969
979 970 self._files = [
980 971 f[len(path) + 1 :]
981 972 for f in matcher._files
982 973 if f.startswith(path + b"/")
983 974 ]
984 975
985 976 # If the parent repo had a path to this subrepo and the matcher is
986 977 # a prefix matcher, this submatcher always matches.
987 978 if matcher.prefix():
988 979 self._always = any(f == path for f in matcher._files)
989 980
990 981 def bad(self, f, msg):
991 982 self._matcher.bad(self._path + b"/" + f, msg)
992 983
993 984 def matchfn(self, f):
994 985 # Some information is lost in the superclass's constructor, so we
995 986 # can not accurately create the matching function for the subdirectory
996 987 # from the inputs. Instead, we override matchfn() and visitdir() to
997 988 # call the original matcher with the subdirectory path prepended.
998 989 return self._matcher.matchfn(self._path + b"/" + f)
999 990
1000 991 def visitdir(self, dir):
1001 992 if dir == b'':
1002 993 dir = self._path
1003 994 else:
1004 995 dir = self._path + b"/" + dir
1005 996 return self._matcher.visitdir(dir)
1006 997
1007 998 def visitchildrenset(self, dir):
1008 999 if dir == b'':
1009 1000 dir = self._path
1010 1001 else:
1011 1002 dir = self._path + b"/" + dir
1012 1003 return self._matcher.visitchildrenset(dir)
1013 1004
1014 1005 def always(self):
1015 1006 return self._always
1016 1007
1017 1008 def prefix(self):
1018 1009 return self._matcher.prefix() and not self._always
1019 1010
1020 1011 @encoding.strmethod
1021 1012 def __repr__(self):
1022 1013 return b'<subdirmatcher path=%r, matcher=%r>' % (
1023 1014 self._path,
1024 1015 self._matcher,
1025 1016 )
1026 1017
1027 1018
1028 1019 class prefixdirmatcher(basematcher):
1029 1020 """Adapt a matcher to work on a parent directory.
1030 1021
1031 The matcher's non-matching-attributes (bad, explicitdir, traversedir) are
1032 ignored.
1022 The matcher's non-matching-attributes (bad, traversedir) are ignored.
1033 1023
1034 1024 The prefix path should usually be the relative path from the root of
1035 1025 this matcher to the root of the wrapped matcher.
1036 1026
1037 1027 >>> m1 = match(util.localpath(b'root/d/e'), b'f', [b'../a.txt', b'b.txt'])
1038 1028 >>> m2 = prefixdirmatcher(b'd/e', m1)
1039 1029 >>> m2(b'a.txt')
1040 1030 False
1041 1031 >>> m2(b'd/e/a.txt')
1042 1032 True
1043 1033 >>> m2(b'd/e/b.txt')
1044 1034 False
1045 1035 >>> m2.files()
1046 1036 ['d/e/a.txt', 'd/e/f/b.txt']
1047 1037 >>> m2.exact(b'd/e/a.txt')
1048 1038 True
1049 1039 >>> m2.visitdir(b'd')
1050 1040 True
1051 1041 >>> m2.visitdir(b'd/e')
1052 1042 True
1053 1043 >>> m2.visitdir(b'd/e/f')
1054 1044 True
1055 1045 >>> m2.visitdir(b'd/e/g')
1056 1046 False
1057 1047 >>> m2.visitdir(b'd/ef')
1058 1048 False
1059 1049 """
1060 1050
1061 1051 def __init__(self, path, matcher, badfn=None):
1062 1052 super(prefixdirmatcher, self).__init__(badfn)
1063 1053 if not path:
1064 1054 raise error.ProgrammingError(b'prefix path must not be empty')
1065 1055 self._path = path
1066 1056 self._pathprefix = path + b'/'
1067 1057 self._matcher = matcher
1068 1058
1069 1059 @propertycache
1070 1060 def _files(self):
1071 1061 return [self._pathprefix + f for f in self._matcher._files]
1072 1062
1073 1063 def matchfn(self, f):
1074 1064 if not f.startswith(self._pathprefix):
1075 1065 return False
1076 1066 return self._matcher.matchfn(f[len(self._pathprefix) :])
1077 1067
1078 1068 @propertycache
1079 1069 def _pathdirs(self):
1080 1070 return set(pathutil.finddirs(self._path))
1081 1071
1082 1072 def visitdir(self, dir):
1083 1073 if dir == self._path:
1084 1074 return self._matcher.visitdir(b'')
1085 1075 if dir.startswith(self._pathprefix):
1086 1076 return self._matcher.visitdir(dir[len(self._pathprefix) :])
1087 1077 return dir in self._pathdirs
1088 1078
1089 1079 def visitchildrenset(self, dir):
1090 1080 if dir == self._path:
1091 1081 return self._matcher.visitchildrenset(b'')
1092 1082 if dir.startswith(self._pathprefix):
1093 1083 return self._matcher.visitchildrenset(dir[len(self._pathprefix) :])
1094 1084 if dir in self._pathdirs:
1095 1085 return b'this'
1096 1086 return set()
1097 1087
1098 1088 def isexact(self):
1099 1089 return self._matcher.isexact()
1100 1090
1101 1091 def prefix(self):
1102 1092 return self._matcher.prefix()
1103 1093
1104 1094 @encoding.strmethod
1105 1095 def __repr__(self):
1106 1096 return b'<prefixdirmatcher path=%r, matcher=%r>' % (
1107 1097 pycompat.bytestr(self._path),
1108 1098 self._matcher,
1109 1099 )
1110 1100
1111 1101
1112 1102 class unionmatcher(basematcher):
1113 1103 """A matcher that is the union of several matchers.
1114 1104
1115 The non-matching-attributes (bad, explicitdir, traversedir) are taken from
1116 the first matcher.
1105 The non-matching-attributes (bad, traversedir) are taken from the first
1106 matcher.
1117 1107 """
1118 1108
1119 1109 def __init__(self, matchers):
1120 1110 m1 = matchers[0]
1121 1111 super(unionmatcher, self).__init__()
1122 self.explicitdir = m1.explicitdir
1123 1112 self.traversedir = m1.traversedir
1124 1113 self._matchers = matchers
1125 1114
1126 1115 def matchfn(self, f):
1127 1116 for match in self._matchers:
1128 1117 if match(f):
1129 1118 return True
1130 1119 return False
1131 1120
1132 1121 def visitdir(self, dir):
1133 1122 r = False
1134 1123 for m in self._matchers:
1135 1124 v = m.visitdir(dir)
1136 1125 if v == b'all':
1137 1126 return v
1138 1127 r |= v
1139 1128 return r
1140 1129
1141 1130 def visitchildrenset(self, dir):
1142 1131 r = set()
1143 1132 this = False
1144 1133 for m in self._matchers:
1145 1134 v = m.visitchildrenset(dir)
1146 1135 if not v:
1147 1136 continue
1148 1137 if v == b'all':
1149 1138 return v
1150 1139 if this or v == b'this':
1151 1140 this = True
1152 1141 # don't break, we might have an 'all' in here.
1153 1142 continue
1154 1143 assert isinstance(v, set)
1155 1144 r = r.union(v)
1156 1145 if this:
1157 1146 return b'this'
1158 1147 return r
1159 1148
1160 1149 @encoding.strmethod
1161 1150 def __repr__(self):
1162 1151 return b'<unionmatcher matchers=%r>' % self._matchers
1163 1152
1164 1153
1165 1154 def patkind(pattern, default=None):
1166 1155 '''If pattern is 'kind:pat' with a known kind, return kind.
1167 1156
1168 1157 >>> patkind(br're:.*\.c$')
1169 1158 're'
1170 1159 >>> patkind(b'glob:*.c')
1171 1160 'glob'
1172 1161 >>> patkind(b'relpath:test.py')
1173 1162 'relpath'
1174 1163 >>> patkind(b'main.py')
1175 1164 >>> patkind(b'main.py', default=b're')
1176 1165 're'
1177 1166 '''
1178 1167 return _patsplit(pattern, default)[0]
1179 1168
1180 1169
1181 1170 def _patsplit(pattern, default):
1182 1171 """Split a string into the optional pattern kind prefix and the actual
1183 1172 pattern."""
1184 1173 if b':' in pattern:
1185 1174 kind, pat = pattern.split(b':', 1)
1186 1175 if kind in allpatternkinds:
1187 1176 return kind, pat
1188 1177 return default, pattern
1189 1178
1190 1179
1191 1180 def _globre(pat):
1192 1181 r'''Convert an extended glob string to a regexp string.
1193 1182
1194 1183 >>> from . import pycompat
1195 1184 >>> def bprint(s):
1196 1185 ... print(pycompat.sysstr(s))
1197 1186 >>> bprint(_globre(br'?'))
1198 1187 .
1199 1188 >>> bprint(_globre(br'*'))
1200 1189 [^/]*
1201 1190 >>> bprint(_globre(br'**'))
1202 1191 .*
1203 1192 >>> bprint(_globre(br'**/a'))
1204 1193 (?:.*/)?a
1205 1194 >>> bprint(_globre(br'a/**/b'))
1206 1195 a/(?:.*/)?b
1207 1196 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
1208 1197 [a*?!^][\^b][^c]
1209 1198 >>> bprint(_globre(br'{a,b}'))
1210 1199 (?:a|b)
1211 1200 >>> bprint(_globre(br'.\*\?'))
1212 1201 \.\*\?
1213 1202 '''
1214 1203 i, n = 0, len(pat)
1215 1204 res = b''
1216 1205 group = 0
1217 1206 escape = util.stringutil.regexbytesescapemap.get
1218 1207
1219 1208 def peek():
1220 1209 return i < n and pat[i : i + 1]
1221 1210
1222 1211 while i < n:
1223 1212 c = pat[i : i + 1]
1224 1213 i += 1
1225 1214 if c not in b'*?[{},\\':
1226 1215 res += escape(c, c)
1227 1216 elif c == b'*':
1228 1217 if peek() == b'*':
1229 1218 i += 1
1230 1219 if peek() == b'/':
1231 1220 i += 1
1232 1221 res += b'(?:.*/)?'
1233 1222 else:
1234 1223 res += b'.*'
1235 1224 else:
1236 1225 res += b'[^/]*'
1237 1226 elif c == b'?':
1238 1227 res += b'.'
1239 1228 elif c == b'[':
1240 1229 j = i
1241 1230 if j < n and pat[j : j + 1] in b'!]':
1242 1231 j += 1
1243 1232 while j < n and pat[j : j + 1] != b']':
1244 1233 j += 1
1245 1234 if j >= n:
1246 1235 res += b'\\['
1247 1236 else:
1248 1237 stuff = pat[i:j].replace(b'\\', b'\\\\')
1249 1238 i = j + 1
1250 1239 if stuff[0:1] == b'!':
1251 1240 stuff = b'^' + stuff[1:]
1252 1241 elif stuff[0:1] == b'^':
1253 1242 stuff = b'\\' + stuff
1254 1243 res = b'%s[%s]' % (res, stuff)
1255 1244 elif c == b'{':
1256 1245 group += 1
1257 1246 res += b'(?:'
1258 1247 elif c == b'}' and group:
1259 1248 res += b')'
1260 1249 group -= 1
1261 1250 elif c == b',' and group:
1262 1251 res += b'|'
1263 1252 elif c == b'\\':
1264 1253 p = peek()
1265 1254 if p:
1266 1255 i += 1
1267 1256 res += escape(p, p)
1268 1257 else:
1269 1258 res += escape(c, c)
1270 1259 else:
1271 1260 res += escape(c, c)
1272 1261 return res
1273 1262
1274 1263
1275 1264 def _regex(kind, pat, globsuffix):
1276 1265 '''Convert a (normalized) pattern of any kind into a
1277 1266 regular expression.
1278 1267 globsuffix is appended to the regexp of globs.'''
1279 1268
1280 1269 if rustmod is not None:
1281 1270 try:
1282 1271 return rustmod.build_single_regex(kind, pat, globsuffix)
1283 1272 except rustmod.PatternError:
1284 1273 raise error.ProgrammingError(
1285 1274 b'not a regex pattern: %s:%s' % (kind, pat)
1286 1275 )
1287 1276
1288 1277 if not pat and kind in (b'glob', b'relpath'):
1289 1278 return b''
1290 1279 if kind == b're':
1291 1280 return pat
1292 1281 if kind in (b'path', b'relpath'):
1293 1282 if pat == b'.':
1294 1283 return b''
1295 1284 return util.stringutil.reescape(pat) + b'(?:/|$)'
1296 1285 if kind == b'rootfilesin':
1297 1286 if pat == b'.':
1298 1287 escaped = b''
1299 1288 else:
1300 1289 # Pattern is a directory name.
1301 1290 escaped = util.stringutil.reescape(pat) + b'/'
1302 1291 # Anything after the pattern must be a non-directory.
1303 1292 return escaped + b'[^/]+$'
1304 1293 if kind == b'relglob':
1305 1294 globre = _globre(pat)
1306 1295 if globre.startswith(b'[^/]*'):
1307 1296 # When pat has the form *XYZ (common), make the returned regex more
1308 1297 # legible by returning the regex for **XYZ instead of **/*XYZ.
1309 1298 return b'.*' + globre[len(b'[^/]*') :] + globsuffix
1310 1299 return b'(?:|.*/)' + globre + globsuffix
1311 1300 if kind == b'relre':
1312 1301 if pat.startswith(b'^'):
1313 1302 return pat
1314 1303 return b'.*' + pat
1315 1304 if kind in (b'glob', b'rootglob'):
1316 1305 return _globre(pat) + globsuffix
1317 1306 raise error.ProgrammingError(b'not a regex pattern: %s:%s' % (kind, pat))
1318 1307
1319 1308
1320 1309 def _buildmatch(kindpats, globsuffix, root):
1321 1310 '''Return regexp string and a matcher function for kindpats.
1322 1311 globsuffix is appended to the regexp of globs.'''
1323 1312 matchfuncs = []
1324 1313
1325 1314 subincludes, kindpats = _expandsubinclude(kindpats, root)
1326 1315 if subincludes:
1327 1316 submatchers = {}
1328 1317
1329 1318 def matchsubinclude(f):
1330 1319 for prefix, matcherargs in subincludes:
1331 1320 if f.startswith(prefix):
1332 1321 mf = submatchers.get(prefix)
1333 1322 if mf is None:
1334 1323 mf = match(*matcherargs)
1335 1324 submatchers[prefix] = mf
1336 1325
1337 1326 if mf(f[len(prefix) :]):
1338 1327 return True
1339 1328 return False
1340 1329
1341 1330 matchfuncs.append(matchsubinclude)
1342 1331
1343 1332 regex = b''
1344 1333 if kindpats:
1345 1334 if all(k == b'rootfilesin' for k, p, s in kindpats):
1346 1335 dirs = {p for k, p, s in kindpats}
1347 1336
1348 1337 def mf(f):
1349 1338 i = f.rfind(b'/')
1350 1339 if i >= 0:
1351 1340 dir = f[:i]
1352 1341 else:
1353 1342 dir = b'.'
1354 1343 return dir in dirs
1355 1344
1356 1345 regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))
1357 1346 matchfuncs.append(mf)
1358 1347 else:
1359 1348 regex, mf = _buildregexmatch(kindpats, globsuffix)
1360 1349 matchfuncs.append(mf)
1361 1350
1362 1351 if len(matchfuncs) == 1:
1363 1352 return regex, matchfuncs[0]
1364 1353 else:
1365 1354 return regex, lambda f: any(mf(f) for mf in matchfuncs)
1366 1355
1367 1356
1368 1357 MAX_RE_SIZE = 20000
1369 1358
1370 1359
1371 1360 def _joinregexes(regexps):
1372 1361 """gather multiple regular expressions into a single one"""
1373 1362 return b'|'.join(regexps)
1374 1363
1375 1364
1376 1365 def _buildregexmatch(kindpats, globsuffix):
1377 1366 """Build a match function from a list of kinds and kindpats,
1378 1367 return regexp string and a matcher function.
1379 1368
1380 1369 Test too large input
1381 1370 >>> _buildregexmatch([
1382 1371 ... (b'relglob', b'?' * MAX_RE_SIZE, b'')
1383 1372 ... ], b'$')
1384 1373 Traceback (most recent call last):
1385 1374 ...
1386 1375 Abort: matcher pattern is too long (20009 bytes)
1387 1376 """
1388 1377 try:
1389 1378 allgroups = []
1390 1379 regexps = [_regex(k, p, globsuffix) for (k, p, s) in kindpats]
1391 1380 fullregexp = _joinregexes(regexps)
1392 1381
1393 1382 startidx = 0
1394 1383 groupsize = 0
1395 1384 for idx, r in enumerate(regexps):
1396 1385 piecesize = len(r)
1397 1386 if piecesize > MAX_RE_SIZE:
1398 1387 msg = _(b"matcher pattern is too long (%d bytes)") % piecesize
1399 1388 raise error.Abort(msg)
1400 1389 elif (groupsize + piecesize) > MAX_RE_SIZE:
1401 1390 group = regexps[startidx:idx]
1402 1391 allgroups.append(_joinregexes(group))
1403 1392 startidx = idx
1404 1393 groupsize = 0
1405 1394 groupsize += piecesize + 1
1406 1395
1407 1396 if startidx == 0:
1408 1397 matcher = _rematcher(fullregexp)
1409 1398 func = lambda s: bool(matcher(s))
1410 1399 else:
1411 1400 group = regexps[startidx:]
1412 1401 allgroups.append(_joinregexes(group))
1413 1402 allmatchers = [_rematcher(g) for g in allgroups]
1414 1403 func = lambda s: any(m(s) for m in allmatchers)
1415 1404 return fullregexp, func
1416 1405 except re.error:
1417 1406 for k, p, s in kindpats:
1418 1407 try:
1419 1408 _rematcher(_regex(k, p, globsuffix))
1420 1409 except re.error:
1421 1410 if s:
1422 1411 raise error.Abort(
1423 1412 _(b"%s: invalid pattern (%s): %s") % (s, k, p)
1424 1413 )
1425 1414 else:
1426 1415 raise error.Abort(_(b"invalid pattern (%s): %s") % (k, p))
1427 1416 raise error.Abort(_(b"invalid pattern"))
1428 1417
1429 1418
1430 1419 def _patternrootsanddirs(kindpats):
1431 1420 '''Returns roots and directories corresponding to each pattern.
1432 1421
1433 1422 This calculates the roots and directories exactly matching the patterns and
1434 1423 returns a tuple of (roots, dirs) for each. It does not return other
1435 1424 directories which may also need to be considered, like the parent
1436 1425 directories.
1437 1426 '''
1438 1427 r = []
1439 1428 d = []
1440 1429 for kind, pat, source in kindpats:
1441 1430 if kind in (b'glob', b'rootglob'): # find the non-glob prefix
1442 1431 root = []
1443 1432 for p in pat.split(b'/'):
1444 1433 if b'[' in p or b'{' in p or b'*' in p or b'?' in p:
1445 1434 break
1446 1435 root.append(p)
1447 1436 r.append(b'/'.join(root))
1448 1437 elif kind in (b'relpath', b'path'):
1449 1438 if pat == b'.':
1450 1439 pat = b''
1451 1440 r.append(pat)
1452 1441 elif kind in (b'rootfilesin',):
1453 1442 if pat == b'.':
1454 1443 pat = b''
1455 1444 d.append(pat)
1456 1445 else: # relglob, re, relre
1457 1446 r.append(b'')
1458 1447 return r, d
1459 1448
1460 1449
1461 1450 def _roots(kindpats):
1462 1451 '''Returns root directories to match recursively from the given patterns.'''
1463 1452 roots, dirs = _patternrootsanddirs(kindpats)
1464 1453 return roots
1465 1454
1466 1455
1467 1456 def _rootsdirsandparents(kindpats):
1468 1457 '''Returns roots and exact directories from patterns.
1469 1458
1470 1459 `roots` are directories to match recursively, `dirs` should
1471 1460 be matched non-recursively, and `parents` are the implicitly required
1472 1461 directories to walk to items in either roots or dirs.
1473 1462
1474 1463 Returns a tuple of (roots, dirs, parents).
1475 1464
1476 1465 >>> r = _rootsdirsandparents(
1477 1466 ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
1478 1467 ... (b'glob', b'g*', b'')])
1479 1468 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1480 1469 (['g/h', 'g/h', ''], []) ['', 'g']
1481 1470 >>> r = _rootsdirsandparents(
1482 1471 ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
1483 1472 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1484 1473 ([], ['g/h', '']) ['', 'g']
1485 1474 >>> r = _rootsdirsandparents(
1486 1475 ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
1487 1476 ... (b'path', b'', b'')])
1488 1477 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1489 1478 (['r', 'p/p', ''], []) ['', 'p']
1490 1479 >>> r = _rootsdirsandparents(
1491 1480 ... [(b'relglob', b'rg*', b''), (b're', b're/', b''),
1492 1481 ... (b'relre', b'rr', b'')])
1493 1482 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1494 1483 (['', '', ''], []) ['']
1495 1484 '''
1496 1485 r, d = _patternrootsanddirs(kindpats)
1497 1486
1498 1487 p = set()
1499 1488 # Add the parents as non-recursive/exact directories, since they must be
1500 1489 # scanned to get to either the roots or the other exact directories.
1501 1490 p.update(pathutil.dirs(d))
1502 1491 p.update(pathutil.dirs(r))
1503 1492
1504 1493 # FIXME: all uses of this function convert these to sets, do so before
1505 1494 # returning.
1506 1495 # FIXME: all uses of this function do not need anything in 'roots' and
1507 1496 # 'dirs' to also be in 'parents', consider removing them before returning.
1508 1497 return r, d, p
1509 1498
1510 1499
1511 1500 def _explicitfiles(kindpats):
1512 1501 '''Returns the potential explicit filenames from the patterns.
1513 1502
1514 1503 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
1515 1504 ['foo/bar']
1516 1505 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
1517 1506 []
1518 1507 '''
1519 1508 # Keep only the pattern kinds where one can specify filenames (vs only
1520 1509 # directory names).
1521 1510 filable = [kp for kp in kindpats if kp[0] not in (b'rootfilesin',)]
1522 1511 return _roots(filable)
1523 1512
1524 1513
1525 1514 def _prefix(kindpats):
1526 1515 '''Whether all the patterns match a prefix (i.e. recursively)'''
1527 1516 for kind, pat, source in kindpats:
1528 1517 if kind not in (b'path', b'relpath'):
1529 1518 return False
1530 1519 return True
1531 1520
1532 1521
1533 1522 _commentre = None
1534 1523
1535 1524
1536 1525 def readpatternfile(filepath, warn, sourceinfo=False):
1537 1526 '''parse a pattern file, returning a list of
1538 1527 patterns. These patterns should be given to compile()
1539 1528 to be validated and converted into a match function.
1540 1529
1541 1530 trailing white space is dropped.
1542 1531 the escape character is backslash.
1543 1532 comments start with #.
1544 1533 empty lines are skipped.
1545 1534
1546 1535 lines can be of the following formats:
1547 1536
1548 1537 syntax: regexp # defaults following lines to non-rooted regexps
1549 1538 syntax: glob # defaults following lines to non-rooted globs
1550 1539 re:pattern # non-rooted regular expression
1551 1540 glob:pattern # non-rooted glob
1552 1541 rootglob:pat # rooted glob (same root as ^ in regexps)
1553 1542 pattern # pattern of the current default type
1554 1543
1555 1544 if sourceinfo is set, returns a list of tuples:
1556 1545 (pattern, lineno, originalline).
1557 1546 This is useful to debug ignore patterns.
1558 1547 '''
1559 1548
1560 1549 if rustmod is not None:
1561 1550 result, warnings = rustmod.read_pattern_file(
1562 1551 filepath, bool(warn), sourceinfo,
1563 1552 )
1564 1553
1565 1554 for warning_params in warnings:
1566 1555 # Can't be easily emitted from Rust, because it would require
1567 1556 # a mechanism for both gettext and calling the `warn` function.
1568 1557 warn(_(b"%s: ignoring invalid syntax '%s'\n") % warning_params)
1569 1558
1570 1559 return result
1571 1560
1572 1561 syntaxes = {
1573 1562 b're': b'relre:',
1574 1563 b'regexp': b'relre:',
1575 1564 b'glob': b'relglob:',
1576 1565 b'rootglob': b'rootglob:',
1577 1566 b'include': b'include',
1578 1567 b'subinclude': b'subinclude',
1579 1568 }
1580 1569 syntax = b'relre:'
1581 1570 patterns = []
1582 1571
1583 1572 fp = open(filepath, b'rb')
1584 1573 for lineno, line in enumerate(util.iterfile(fp), start=1):
1585 1574 if b"#" in line:
1586 1575 global _commentre
1587 1576 if not _commentre:
1588 1577 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
1589 1578 # remove comments prefixed by an even number of escapes
1590 1579 m = _commentre.search(line)
1591 1580 if m:
1592 1581 line = line[: m.end(1)]
1593 1582 # fixup properly escaped comments that survived the above
1594 1583 line = line.replace(b"\\#", b"#")
1595 1584 line = line.rstrip()
1596 1585 if not line:
1597 1586 continue
1598 1587
1599 1588 if line.startswith(b'syntax:'):
1600 1589 s = line[7:].strip()
1601 1590 try:
1602 1591 syntax = syntaxes[s]
1603 1592 except KeyError:
1604 1593 if warn:
1605 1594 warn(
1606 1595 _(b"%s: ignoring invalid syntax '%s'\n") % (filepath, s)
1607 1596 )
1608 1597 continue
1609 1598
1610 1599 linesyntax = syntax
1611 1600 for s, rels in pycompat.iteritems(syntaxes):
1612 1601 if line.startswith(rels):
1613 1602 linesyntax = rels
1614 1603 line = line[len(rels) :]
1615 1604 break
1616 1605 elif line.startswith(s + b':'):
1617 1606 linesyntax = rels
1618 1607 line = line[len(s) + 1 :]
1619 1608 break
1620 1609 if sourceinfo:
1621 1610 patterns.append((linesyntax + line, lineno, line))
1622 1611 else:
1623 1612 patterns.append(linesyntax + line)
1624 1613 fp.close()
1625 1614 return patterns
General Comments 0
You need to be logged in to leave comments. Login now