##// END OF EJS Templates
match: stabilize _rootsdirsandparents doctest...
marmoute -
r42559:96fc696a default
parent child Browse files
Show More
@@ -1,1525 +1,1529
1 1 # match.py - filename matching
2 2 #
3 3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import, print_function
9 9
10 10 import copy
11 11 import itertools
12 12 import os
13 13 import re
14 14
15 15 from .i18n import _
16 16 from . import (
17 17 encoding,
18 18 error,
19 19 pathutil,
20 20 pycompat,
21 21 util,
22 22 )
23 23 from .utils import (
24 24 stringutil,
25 25 )
26 26
27 27 try:
28 28 from . import rustext
29 29 rustext.__name__ # force actual import (see hgdemandimport)
30 30 except ImportError:
31 31 rustext = None
32 32
33 33 allpatternkinds = ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
34 34 'rootglob',
35 35 'listfile', 'listfile0', 'set', 'include', 'subinclude',
36 36 'rootfilesin')
37 37 cwdrelativepatternkinds = ('relpath', 'glob')
38 38
39 39 propertycache = util.propertycache
40 40
41 41 def _rematcher(regex):
42 42 '''compile the regexp with the best available regexp engine and return a
43 43 matcher function'''
44 44 m = util.re.compile(regex)
45 45 try:
46 46 # slightly faster, provided by facebook's re2 bindings
47 47 return m.test_match
48 48 except AttributeError:
49 49 return m.match
50 50
51 51 def _expandsets(kindpats, ctx=None, listsubrepos=False, badfn=None):
52 52 '''Returns the kindpats list with the 'set' patterns expanded to matchers'''
53 53 matchers = []
54 54 other = []
55 55
56 56 for kind, pat, source in kindpats:
57 57 if kind == 'set':
58 58 if ctx is None:
59 59 raise error.ProgrammingError("fileset expression with no "
60 60 "context")
61 61 matchers.append(ctx.matchfileset(pat, badfn=badfn))
62 62
63 63 if listsubrepos:
64 64 for subpath in ctx.substate:
65 65 sm = ctx.sub(subpath).matchfileset(pat, badfn=badfn)
66 66 pm = prefixdirmatcher(subpath, sm, badfn=badfn)
67 67 matchers.append(pm)
68 68
69 69 continue
70 70 other.append((kind, pat, source))
71 71 return matchers, other
72 72
73 73 def _expandsubinclude(kindpats, root):
74 74 '''Returns the list of subinclude matcher args and the kindpats without the
75 75 subincludes in it.'''
76 76 relmatchers = []
77 77 other = []
78 78
79 79 for kind, pat, source in kindpats:
80 80 if kind == 'subinclude':
81 81 sourceroot = pathutil.dirname(util.normpath(source))
82 82 pat = util.pconvert(pat)
83 83 path = pathutil.join(sourceroot, pat)
84 84
85 85 newroot = pathutil.dirname(path)
86 86 matcherargs = (newroot, '', [], ['include:%s' % path])
87 87
88 88 prefix = pathutil.canonpath(root, root, newroot)
89 89 if prefix:
90 90 prefix += '/'
91 91 relmatchers.append((prefix, matcherargs))
92 92 else:
93 93 other.append((kind, pat, source))
94 94
95 95 return relmatchers, other
96 96
97 97 def _kindpatsalwaysmatch(kindpats):
98 98 """"Checks whether the kindspats match everything, as e.g.
99 99 'relpath:.' does.
100 100 """
101 101 for kind, pat, source in kindpats:
102 102 if pat != '' or kind not in ['relpath', 'glob']:
103 103 return False
104 104 return True
105 105
106 106 def _buildkindpatsmatcher(matchercls, root, kindpats, ctx=None,
107 107 listsubrepos=False, badfn=None):
108 108 matchers = []
109 109 fms, kindpats = _expandsets(kindpats, ctx=ctx,
110 110 listsubrepos=listsubrepos, badfn=badfn)
111 111 if kindpats:
112 112 m = matchercls(root, kindpats, badfn=badfn)
113 113 matchers.append(m)
114 114 if fms:
115 115 matchers.extend(fms)
116 116 if not matchers:
117 117 return nevermatcher(badfn=badfn)
118 118 if len(matchers) == 1:
119 119 return matchers[0]
120 120 return unionmatcher(matchers)
121 121
122 122 def match(root, cwd, patterns=None, include=None, exclude=None, default='glob',
123 123 auditor=None, ctx=None, listsubrepos=False, warn=None,
124 124 badfn=None, icasefs=False):
125 125 r"""build an object to match a set of file patterns
126 126
127 127 arguments:
128 128 root - the canonical root of the tree you're matching against
129 129 cwd - the current working directory, if relevant
130 130 patterns - patterns to find
131 131 include - patterns to include (unless they are excluded)
132 132 exclude - patterns to exclude (even if they are included)
133 133 default - if a pattern in patterns has no explicit type, assume this one
134 134 auditor - optional path auditor
135 135 ctx - optional changecontext
136 136 listsubrepos - if True, recurse into subrepositories
137 137 warn - optional function used for printing warnings
138 138 badfn - optional bad() callback for this matcher instead of the default
139 139 icasefs - make a matcher for wdir on case insensitive filesystems, which
140 140 normalizes the given patterns to the case in the filesystem
141 141
142 142 a pattern is one of:
143 143 'glob:<glob>' - a glob relative to cwd
144 144 're:<regexp>' - a regular expression
145 145 'path:<path>' - a path relative to repository root, which is matched
146 146 recursively
147 147 'rootfilesin:<path>' - a path relative to repository root, which is
148 148 matched non-recursively (will not match subdirectories)
149 149 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
150 150 'relpath:<path>' - a path relative to cwd
151 151 'relre:<regexp>' - a regexp that needn't match the start of a name
152 152 'set:<fileset>' - a fileset expression
153 153 'include:<path>' - a file of patterns to read and include
154 154 'subinclude:<path>' - a file of patterns to match against files under
155 155 the same directory
156 156 '<something>' - a pattern of the specified default type
157 157
158 158 Usually a patternmatcher is returned:
159 159 >>> match(b'foo', b'.', [b're:.*\.c$', b'path:foo/a', b'*.py'])
160 160 <patternmatcher patterns='.*\\.c$|foo/a(?:/|$)|[^/]*\\.py$'>
161 161
162 162 Combining 'patterns' with 'include' (resp. 'exclude') gives an
163 163 intersectionmatcher (resp. a differencematcher):
164 164 >>> type(match(b'foo', b'.', [b're:.*\.c$'], include=[b'path:lib']))
165 165 <class 'mercurial.match.intersectionmatcher'>
166 166 >>> type(match(b'foo', b'.', [b're:.*\.c$'], exclude=[b'path:build']))
167 167 <class 'mercurial.match.differencematcher'>
168 168
169 169 Notice that, if 'patterns' is empty, an alwaysmatcher is returned:
170 170 >>> match(b'foo', b'.', [])
171 171 <alwaysmatcher>
172 172
173 173 The 'default' argument determines which kind of pattern is assumed if a
174 174 pattern has no prefix:
175 175 >>> match(b'foo', b'.', [b'.*\.c$'], default=b're')
176 176 <patternmatcher patterns='.*\\.c$'>
177 177 >>> match(b'foo', b'.', [b'main.py'], default=b'relpath')
178 178 <patternmatcher patterns='main\\.py(?:/|$)'>
179 179 >>> match(b'foo', b'.', [b'main.py'], default=b're')
180 180 <patternmatcher patterns='main.py'>
181 181
182 182 The primary use of matchers is to check whether a value (usually a file
183 183 name) matches againset one of the patterns given at initialization. There
184 184 are two ways of doing this check.
185 185
186 186 >>> m = match(b'foo', b'', [b're:.*\.c$', b'relpath:a'])
187 187
188 188 1. Calling the matcher with a file name returns True if any pattern
189 189 matches that file name:
190 190 >>> m(b'a')
191 191 True
192 192 >>> m(b'main.c')
193 193 True
194 194 >>> m(b'test.py')
195 195 False
196 196
197 197 2. Using the exact() method only returns True if the file name matches one
198 198 of the exact patterns (i.e. not re: or glob: patterns):
199 199 >>> m.exact(b'a')
200 200 True
201 201 >>> m.exact(b'main.c')
202 202 False
203 203 """
204 204 normalize = _donormalize
205 205 if icasefs:
206 206 dirstate = ctx.repo().dirstate
207 207 dsnormalize = dirstate.normalize
208 208
209 209 def normalize(patterns, default, root, cwd, auditor, warn):
210 210 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
211 211 kindpats = []
212 212 for kind, pats, source in kp:
213 213 if kind not in ('re', 'relre'): # regex can't be normalized
214 214 p = pats
215 215 pats = dsnormalize(pats)
216 216
217 217 # Preserve the original to handle a case only rename.
218 218 if p != pats and p in dirstate:
219 219 kindpats.append((kind, p, source))
220 220
221 221 kindpats.append((kind, pats, source))
222 222 return kindpats
223 223
224 224 if patterns:
225 225 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
226 226 if _kindpatsalwaysmatch(kindpats):
227 227 m = alwaysmatcher(badfn)
228 228 else:
229 229 m = _buildkindpatsmatcher(patternmatcher, root, kindpats, ctx=ctx,
230 230 listsubrepos=listsubrepos, badfn=badfn)
231 231 else:
232 232 # It's a little strange that no patterns means to match everything.
233 233 # Consider changing this to match nothing (probably using nevermatcher).
234 234 m = alwaysmatcher(badfn)
235 235
236 236 if include:
237 237 kindpats = normalize(include, 'glob', root, cwd, auditor, warn)
238 238 im = _buildkindpatsmatcher(includematcher, root, kindpats, ctx=ctx,
239 239 listsubrepos=listsubrepos, badfn=None)
240 240 m = intersectmatchers(m, im)
241 241 if exclude:
242 242 kindpats = normalize(exclude, 'glob', root, cwd, auditor, warn)
243 243 em = _buildkindpatsmatcher(includematcher, root, kindpats, ctx=ctx,
244 244 listsubrepos=listsubrepos, badfn=None)
245 245 m = differencematcher(m, em)
246 246 return m
247 247
248 248 def exact(files, badfn=None):
249 249 return exactmatcher(files, badfn=badfn)
250 250
251 251 def always(badfn=None):
252 252 return alwaysmatcher(badfn)
253 253
254 254 def never(badfn=None):
255 255 return nevermatcher(badfn)
256 256
257 257 def badmatch(match, badfn):
258 258 """Make a copy of the given matcher, replacing its bad method with the given
259 259 one.
260 260 """
261 261 m = copy.copy(match)
262 262 m.bad = badfn
263 263 return m
264 264
265 265 def _donormalize(patterns, default, root, cwd, auditor=None, warn=None):
266 266 '''Convert 'kind:pat' from the patterns list to tuples with kind and
267 267 normalized and rooted patterns and with listfiles expanded.'''
268 268 kindpats = []
269 269 for kind, pat in [_patsplit(p, default) for p in patterns]:
270 270 if kind in cwdrelativepatternkinds:
271 271 pat = pathutil.canonpath(root, cwd, pat, auditor=auditor)
272 272 elif kind in ('relglob', 'path', 'rootfilesin', 'rootglob'):
273 273 pat = util.normpath(pat)
274 274 elif kind in ('listfile', 'listfile0'):
275 275 try:
276 276 files = util.readfile(pat)
277 277 if kind == 'listfile0':
278 278 files = files.split('\0')
279 279 else:
280 280 files = files.splitlines()
281 281 files = [f for f in files if f]
282 282 except EnvironmentError:
283 283 raise error.Abort(_("unable to read file list (%s)") % pat)
284 284 for k, p, source in _donormalize(files, default, root, cwd,
285 285 auditor, warn):
286 286 kindpats.append((k, p, pat))
287 287 continue
288 288 elif kind == 'include':
289 289 try:
290 290 fullpath = os.path.join(root, util.localpath(pat))
291 291 includepats = readpatternfile(fullpath, warn)
292 292 for k, p, source in _donormalize(includepats, default,
293 293 root, cwd, auditor, warn):
294 294 kindpats.append((k, p, source or pat))
295 295 except error.Abort as inst:
296 296 raise error.Abort('%s: %s' % (pat, inst[0]))
297 297 except IOError as inst:
298 298 if warn:
299 299 warn(_("skipping unreadable pattern file '%s': %s\n") %
300 300 (pat, stringutil.forcebytestr(inst.strerror)))
301 301 continue
302 302 # else: re or relre - which cannot be normalized
303 303 kindpats.append((kind, pat, ''))
304 304 return kindpats
305 305
306 306 class basematcher(object):
307 307
308 308 def __init__(self, badfn=None):
309 309 if badfn is not None:
310 310 self.bad = badfn
311 311
312 312 def __call__(self, fn):
313 313 return self.matchfn(fn)
314 314 # Callbacks related to how the matcher is used by dirstate.walk.
315 315 # Subscribers to these events must monkeypatch the matcher object.
316 316 def bad(self, f, msg):
317 317 '''Callback from dirstate.walk for each explicit file that can't be
318 318 found/accessed, with an error message.'''
319 319
320 320 # If an explicitdir is set, it will be called when an explicitly listed
321 321 # directory is visited.
322 322 explicitdir = None
323 323
324 324 # If an traversedir is set, it will be called when a directory discovered
325 325 # by recursive traversal is visited.
326 326 traversedir = None
327 327
328 328 @propertycache
329 329 def _files(self):
330 330 return []
331 331
332 332 def files(self):
333 333 '''Explicitly listed files or patterns or roots:
334 334 if no patterns or .always(): empty list,
335 335 if exact: list exact files,
336 336 if not .anypats(): list all files and dirs,
337 337 else: optimal roots'''
338 338 return self._files
339 339
340 340 @propertycache
341 341 def _fileset(self):
342 342 return set(self._files)
343 343
344 344 def exact(self, f):
345 345 '''Returns True if f is in .files().'''
346 346 return f in self._fileset
347 347
348 348 def matchfn(self, f):
349 349 return False
350 350
351 351 def visitdir(self, dir):
352 352 '''Decides whether a directory should be visited based on whether it
353 353 has potential matches in it or one of its subdirectories. This is
354 354 based on the match's primary, included, and excluded patterns.
355 355
356 356 Returns the string 'all' if the given directory and all subdirectories
357 357 should be visited. Otherwise returns True or False indicating whether
358 358 the given directory should be visited.
359 359 '''
360 360 return True
361 361
362 362 def visitchildrenset(self, dir):
363 363 '''Decides whether a directory should be visited based on whether it
364 364 has potential matches in it or one of its subdirectories, and
365 365 potentially lists which subdirectories of that directory should be
366 366 visited. This is based on the match's primary, included, and excluded
367 367 patterns.
368 368
369 369 This function is very similar to 'visitdir', and the following mapping
370 370 can be applied:
371 371
372 372 visitdir | visitchildrenlist
373 373 ----------+-------------------
374 374 False | set()
375 375 'all' | 'all'
376 376 True | 'this' OR non-empty set of subdirs -or files- to visit
377 377
378 378 Example:
379 379 Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return
380 380 the following values (assuming the implementation of visitchildrenset
381 381 is capable of recognizing this; some implementations are not).
382 382
383 383 '' -> {'foo', 'qux'}
384 384 'baz' -> set()
385 385 'foo' -> {'bar'}
386 386 # Ideally this would be 'all', but since the prefix nature of matchers
387 387 # is applied to the entire matcher, we have to downgrade this to
388 388 # 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed
389 389 # in.
390 390 'foo/bar' -> 'this'
391 391 'qux' -> 'this'
392 392
393 393 Important:
394 394 Most matchers do not know if they're representing files or
395 395 directories. They see ['path:dir/f'] and don't know whether 'f' is a
396 396 file or a directory, so visitchildrenset('dir') for most matchers will
397 397 return {'f'}, but if the matcher knows it's a file (like exactmatcher
398 398 does), it may return 'this'. Do not rely on the return being a set
399 399 indicating that there are no files in this dir to investigate (or
400 400 equivalently that if there are files to investigate in 'dir' that it
401 401 will always return 'this').
402 402 '''
403 403 return 'this'
404 404
405 405 def always(self):
406 406 '''Matcher will match everything and .files() will be empty --
407 407 optimization might be possible.'''
408 408 return False
409 409
410 410 def isexact(self):
411 411 '''Matcher will match exactly the list of files in .files() --
412 412 optimization might be possible.'''
413 413 return False
414 414
415 415 def prefix(self):
416 416 '''Matcher will match the paths in .files() recursively --
417 417 optimization might be possible.'''
418 418 return False
419 419
420 420 def anypats(self):
421 421 '''None of .always(), .isexact(), and .prefix() is true --
422 422 optimizations will be difficult.'''
423 423 return not self.always() and not self.isexact() and not self.prefix()
424 424
425 425 class alwaysmatcher(basematcher):
426 426 '''Matches everything.'''
427 427
428 428 def __init__(self, badfn=None):
429 429 super(alwaysmatcher, self).__init__(badfn)
430 430
431 431 def always(self):
432 432 return True
433 433
434 434 def matchfn(self, f):
435 435 return True
436 436
437 437 def visitdir(self, dir):
438 438 return 'all'
439 439
440 440 def visitchildrenset(self, dir):
441 441 return 'all'
442 442
443 443 def __repr__(self):
444 444 return r'<alwaysmatcher>'
445 445
446 446 class nevermatcher(basematcher):
447 447 '''Matches nothing.'''
448 448
449 449 def __init__(self, badfn=None):
450 450 super(nevermatcher, self).__init__(badfn)
451 451
452 452 # It's a little weird to say that the nevermatcher is an exact matcher
453 453 # or a prefix matcher, but it seems to make sense to let callers take
454 454 # fast paths based on either. There will be no exact matches, nor any
455 455 # prefixes (files() returns []), so fast paths iterating over them should
456 456 # be efficient (and correct).
457 457 def isexact(self):
458 458 return True
459 459
460 460 def prefix(self):
461 461 return True
462 462
463 463 def visitdir(self, dir):
464 464 return False
465 465
466 466 def visitchildrenset(self, dir):
467 467 return set()
468 468
469 469 def __repr__(self):
470 470 return r'<nevermatcher>'
471 471
472 472 class predicatematcher(basematcher):
473 473 """A matcher adapter for a simple boolean function"""
474 474
475 475 def __init__(self, predfn, predrepr=None, badfn=None):
476 476 super(predicatematcher, self).__init__(badfn)
477 477 self.matchfn = predfn
478 478 self._predrepr = predrepr
479 479
480 480 @encoding.strmethod
481 481 def __repr__(self):
482 482 s = (stringutil.buildrepr(self._predrepr)
483 483 or pycompat.byterepr(self.matchfn))
484 484 return '<predicatenmatcher pred=%s>' % s
485 485
486 486 def normalizerootdir(dir, funcname):
487 487 if dir == '.':
488 488 util.nouideprecwarn("match.%s() no longer accepts "
489 489 "'.', use '' instead." % funcname, '5.1')
490 490 return ''
491 491 return dir
492 492
493 493
494 494 class patternmatcher(basematcher):
495 495 """Matches a set of (kind, pat, source) against a 'root' directory.
496 496
497 497 >>> kindpats = [
498 498 ... (b're', br'.*\.c$', b''),
499 499 ... (b'path', b'foo/a', b''),
500 500 ... (b'relpath', b'b', b''),
501 501 ... (b'glob', b'*.h', b''),
502 502 ... ]
503 503 >>> m = patternmatcher(b'foo', kindpats)
504 504 >>> m(b'main.c') # matches re:.*\.c$
505 505 True
506 506 >>> m(b'b.txt')
507 507 False
508 508 >>> m(b'foo/a') # matches path:foo/a
509 509 True
510 510 >>> m(b'a') # does not match path:b, since 'root' is 'foo'
511 511 False
512 512 >>> m(b'b') # matches relpath:b, since 'root' is 'foo'
513 513 True
514 514 >>> m(b'lib.h') # matches glob:*.h
515 515 True
516 516
517 517 >>> m.files()
518 518 ['', 'foo/a', 'b', '']
519 519 >>> m.exact(b'foo/a')
520 520 True
521 521 >>> m.exact(b'b')
522 522 True
523 523 >>> m.exact(b'lib.h') # exact matches are for (rel)path kinds
524 524 False
525 525 """
526 526
527 527 def __init__(self, root, kindpats, badfn=None):
528 528 super(patternmatcher, self).__init__(badfn)
529 529
530 530 self._files = _explicitfiles(kindpats)
531 531 self._prefix = _prefix(kindpats)
532 532 self._pats, self.matchfn = _buildmatch(kindpats, '$', root)
533 533
534 534 @propertycache
535 535 def _dirs(self):
536 536 return set(util.dirs(self._fileset))
537 537
538 538 def visitdir(self, dir):
539 539 dir = normalizerootdir(dir, 'visitdir')
540 540 if self._prefix and dir in self._fileset:
541 541 return 'all'
542 542 return (dir in self._fileset or
543 543 dir in self._dirs or
544 544 any(parentdir in self._fileset
545 545 for parentdir in util.finddirs(dir)))
546 546
547 547 def visitchildrenset(self, dir):
548 548 ret = self.visitdir(dir)
549 549 if ret is True:
550 550 return 'this'
551 551 elif not ret:
552 552 return set()
553 553 assert ret == 'all'
554 554 return 'all'
555 555
556 556 def prefix(self):
557 557 return self._prefix
558 558
559 559 @encoding.strmethod
560 560 def __repr__(self):
561 561 return ('<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats))
562 562
563 563 # This is basically a reimplementation of util.dirs that stores the children
564 564 # instead of just a count of them, plus a small optional optimization to avoid
565 565 # some directories we don't need.
566 566 class _dirchildren(object):
567 567 def __init__(self, paths, onlyinclude=None):
568 568 self._dirs = {}
569 569 self._onlyinclude = onlyinclude or []
570 570 addpath = self.addpath
571 571 for f in paths:
572 572 addpath(f)
573 573
574 574 def addpath(self, path):
575 575 if path == '':
576 576 return
577 577 dirs = self._dirs
578 578 findsplitdirs = _dirchildren._findsplitdirs
579 579 for d, b in findsplitdirs(path):
580 580 if d not in self._onlyinclude:
581 581 continue
582 582 dirs.setdefault(d, set()).add(b)
583 583
584 584 @staticmethod
585 585 def _findsplitdirs(path):
586 586 # yields (dirname, basename) tuples, walking back to the root. This is
587 587 # very similar to util.finddirs, except:
588 588 # - produces a (dirname, basename) tuple, not just 'dirname'
589 589 # Unlike manifest._splittopdir, this does not suffix `dirname` with a
590 590 # slash.
591 591 oldpos = len(path)
592 592 pos = path.rfind('/')
593 593 while pos != -1:
594 594 yield path[:pos], path[pos + 1:oldpos]
595 595 oldpos = pos
596 596 pos = path.rfind('/', 0, pos)
597 597 yield '', path[:oldpos]
598 598
599 599 def get(self, path):
600 600 return self._dirs.get(path, set())
601 601
602 602 class includematcher(basematcher):
603 603
604 604 def __init__(self, root, kindpats, badfn=None):
605 605 super(includematcher, self).__init__(badfn)
606 606
607 607 self._pats, self.matchfn = _buildmatch(kindpats, '(?:/|$)', root)
608 608 self._prefix = _prefix(kindpats)
609 609 roots, dirs, parents = _rootsdirsandparents(kindpats)
610 610 # roots are directories which are recursively included.
611 611 self._roots = set(roots)
612 612 # dirs are directories which are non-recursively included.
613 613 self._dirs = set(dirs)
614 614 # parents are directories which are non-recursively included because
615 615 # they are needed to get to items in _dirs or _roots.
616 616 self._parents = parents
617 617
618 618 def visitdir(self, dir):
619 619 dir = normalizerootdir(dir, 'visitdir')
620 620 if self._prefix and dir in self._roots:
621 621 return 'all'
622 622 return (dir in self._roots or
623 623 dir in self._dirs or
624 624 dir in self._parents or
625 625 any(parentdir in self._roots
626 626 for parentdir in util.finddirs(dir)))
627 627
628 628 @propertycache
629 629 def _allparentschildren(self):
630 630 # It may seem odd that we add dirs, roots, and parents, and then
631 631 # restrict to only parents. This is to catch the case of:
632 632 # dirs = ['foo/bar']
633 633 # parents = ['foo']
634 634 # if we asked for the children of 'foo', but had only added
635 635 # self._parents, we wouldn't be able to respond ['bar'].
636 636 return _dirchildren(
637 637 itertools.chain(self._dirs, self._roots, self._parents),
638 638 onlyinclude=self._parents)
639 639
640 640 def visitchildrenset(self, dir):
641 641 if self._prefix and dir in self._roots:
642 642 return 'all'
643 643 # Note: this does *not* include the 'dir in self._parents' case from
644 644 # visitdir, that's handled below.
645 645 if ('' in self._roots or
646 646 dir in self._roots or
647 647 dir in self._dirs or
648 648 any(parentdir in self._roots
649 649 for parentdir in util.finddirs(dir))):
650 650 return 'this'
651 651
652 652 if dir in self._parents:
653 653 return self._allparentschildren.get(dir) or set()
654 654 return set()
655 655
656 656 @encoding.strmethod
657 657 def __repr__(self):
658 658 return ('<includematcher includes=%r>' % pycompat.bytestr(self._pats))
659 659
660 660 class exactmatcher(basematcher):
661 661 r'''Matches the input files exactly. They are interpreted as paths, not
662 662 patterns (so no kind-prefixes).
663 663
664 664 >>> m = exactmatcher([b'a.txt', br're:.*\.c$'])
665 665 >>> m(b'a.txt')
666 666 True
667 667 >>> m(b'b.txt')
668 668 False
669 669
670 670 Input files that would be matched are exactly those returned by .files()
671 671 >>> m.files()
672 672 ['a.txt', 're:.*\\.c$']
673 673
674 674 So pattern 're:.*\.c$' is not considered as a regex, but as a file name
675 675 >>> m(b'main.c')
676 676 False
677 677 >>> m(br're:.*\.c$')
678 678 True
679 679 '''
680 680
681 681 def __init__(self, files, badfn=None):
682 682 super(exactmatcher, self).__init__(badfn)
683 683
684 684 if isinstance(files, list):
685 685 self._files = files
686 686 else:
687 687 self._files = list(files)
688 688
689 689 matchfn = basematcher.exact
690 690
691 691 @propertycache
692 692 def _dirs(self):
693 693 return set(util.dirs(self._fileset))
694 694
695 695 def visitdir(self, dir):
696 696 dir = normalizerootdir(dir, 'visitdir')
697 697 return dir in self._dirs
698 698
699 699 def visitchildrenset(self, dir):
700 700 dir = normalizerootdir(dir, 'visitchildrenset')
701 701
702 702 if not self._fileset or dir not in self._dirs:
703 703 return set()
704 704
705 705 candidates = self._fileset | self._dirs - {''}
706 706 if dir != '':
707 707 d = dir + '/'
708 708 candidates = set(c[len(d):] for c in candidates if
709 709 c.startswith(d))
710 710 # self._dirs includes all of the directories, recursively, so if
711 711 # we're attempting to match foo/bar/baz.txt, it'll have '', 'foo',
712 712 # 'foo/bar' in it. Thus we can safely ignore a candidate that has a
713 713 # '/' in it, indicating a it's for a subdir-of-a-subdir; the
714 714 # immediate subdir will be in there without a slash.
715 715 ret = {c for c in candidates if '/' not in c}
716 716 # We really do not expect ret to be empty, since that would imply that
717 717 # there's something in _dirs that didn't have a file in _fileset.
718 718 assert ret
719 719 return ret
720 720
721 721 def isexact(self):
722 722 return True
723 723
724 724 @encoding.strmethod
725 725 def __repr__(self):
726 726 return ('<exactmatcher files=%r>' % self._files)
727 727
728 728 class differencematcher(basematcher):
729 729 '''Composes two matchers by matching if the first matches and the second
730 730 does not.
731 731
732 732 The second matcher's non-matching-attributes (bad, explicitdir,
733 733 traversedir) are ignored.
734 734 '''
735 735 def __init__(self, m1, m2):
736 736 super(differencematcher, self).__init__()
737 737 self._m1 = m1
738 738 self._m2 = m2
739 739 self.bad = m1.bad
740 740 self.explicitdir = m1.explicitdir
741 741 self.traversedir = m1.traversedir
742 742
743 743 def matchfn(self, f):
744 744 return self._m1(f) and not self._m2(f)
745 745
746 746 @propertycache
747 747 def _files(self):
748 748 if self.isexact():
749 749 return [f for f in self._m1.files() if self(f)]
750 750 # If m1 is not an exact matcher, we can't easily figure out the set of
751 751 # files, because its files() are not always files. For example, if
752 752 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
753 753 # want to remove "dir" from the set even though it would match m2,
754 754 # because the "dir" in m1 may not be a file.
755 755 return self._m1.files()
756 756
757 757 def visitdir(self, dir):
758 758 if self._m2.visitdir(dir) == 'all':
759 759 return False
760 760 elif not self._m2.visitdir(dir):
761 761 # m2 does not match dir, we can return 'all' here if possible
762 762 return self._m1.visitdir(dir)
763 763 return bool(self._m1.visitdir(dir))
764 764
765 765 def visitchildrenset(self, dir):
766 766 m2_set = self._m2.visitchildrenset(dir)
767 767 if m2_set == 'all':
768 768 return set()
769 769 m1_set = self._m1.visitchildrenset(dir)
770 770 # Possible values for m1: 'all', 'this', set(...), set()
771 771 # Possible values for m2: 'this', set(...), set()
772 772 # If m2 has nothing under here that we care about, return m1, even if
773 773 # it's 'all'. This is a change in behavior from visitdir, which would
774 774 # return True, not 'all', for some reason.
775 775 if not m2_set:
776 776 return m1_set
777 777 if m1_set in ['all', 'this']:
778 778 # Never return 'all' here if m2_set is any kind of non-empty (either
779 779 # 'this' or set(foo)), since m2 might return set() for a
780 780 # subdirectory.
781 781 return 'this'
782 782 # Possible values for m1: set(...), set()
783 783 # Possible values for m2: 'this', set(...)
784 784 # We ignore m2's set results. They're possibly incorrect:
785 785 # m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset(''):
786 786 # m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd
787 787 # return set(), which is *not* correct, we still need to visit 'dir'!
788 788 return m1_set
789 789
790 790 def isexact(self):
791 791 return self._m1.isexact()
792 792
793 793 @encoding.strmethod
794 794 def __repr__(self):
795 795 return ('<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2))
796 796
797 797 def intersectmatchers(m1, m2):
798 798 '''Composes two matchers by matching if both of them match.
799 799
800 800 The second matcher's non-matching-attributes (bad, explicitdir,
801 801 traversedir) are ignored.
802 802 '''
803 803 if m1 is None or m2 is None:
804 804 return m1 or m2
805 805 if m1.always():
806 806 m = copy.copy(m2)
807 807 # TODO: Consider encapsulating these things in a class so there's only
808 808 # one thing to copy from m1.
809 809 m.bad = m1.bad
810 810 m.explicitdir = m1.explicitdir
811 811 m.traversedir = m1.traversedir
812 812 return m
813 813 if m2.always():
814 814 m = copy.copy(m1)
815 815 return m
816 816 return intersectionmatcher(m1, m2)
817 817
818 818 class intersectionmatcher(basematcher):
819 819 def __init__(self, m1, m2):
820 820 super(intersectionmatcher, self).__init__()
821 821 self._m1 = m1
822 822 self._m2 = m2
823 823 self.bad = m1.bad
824 824 self.explicitdir = m1.explicitdir
825 825 self.traversedir = m1.traversedir
826 826
827 827 @propertycache
828 828 def _files(self):
829 829 if self.isexact():
830 830 m1, m2 = self._m1, self._m2
831 831 if not m1.isexact():
832 832 m1, m2 = m2, m1
833 833 return [f for f in m1.files() if m2(f)]
834 834 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
835 835 # the set of files, because their files() are not always files. For
836 836 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
837 837 # "path:dir2", we don't want to remove "dir2" from the set.
838 838 return self._m1.files() + self._m2.files()
839 839
840 840 def matchfn(self, f):
841 841 return self._m1(f) and self._m2(f)
842 842
843 843 def visitdir(self, dir):
844 844 visit1 = self._m1.visitdir(dir)
845 845 if visit1 == 'all':
846 846 return self._m2.visitdir(dir)
847 847 # bool() because visit1=True + visit2='all' should not be 'all'
848 848 return bool(visit1 and self._m2.visitdir(dir))
849 849
850 850 def visitchildrenset(self, dir):
851 851 m1_set = self._m1.visitchildrenset(dir)
852 852 if not m1_set:
853 853 return set()
854 854 m2_set = self._m2.visitchildrenset(dir)
855 855 if not m2_set:
856 856 return set()
857 857
858 858 if m1_set == 'all':
859 859 return m2_set
860 860 elif m2_set == 'all':
861 861 return m1_set
862 862
863 863 if m1_set == 'this' or m2_set == 'this':
864 864 return 'this'
865 865
866 866 assert isinstance(m1_set, set) and isinstance(m2_set, set)
867 867 return m1_set.intersection(m2_set)
868 868
869 869 def always(self):
870 870 return self._m1.always() and self._m2.always()
871 871
872 872 def isexact(self):
873 873 return self._m1.isexact() or self._m2.isexact()
874 874
875 875 @encoding.strmethod
876 876 def __repr__(self):
877 877 return ('<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2))
878 878
879 879 class subdirmatcher(basematcher):
880 880 """Adapt a matcher to work on a subdirectory only.
881 881
882 882 The paths are remapped to remove/insert the path as needed:
883 883
884 884 >>> from . import pycompat
885 885 >>> m1 = match(b'root', b'', [b'a.txt', b'sub/b.txt'])
886 886 >>> m2 = subdirmatcher(b'sub', m1)
887 887 >>> m2(b'a.txt')
888 888 False
889 889 >>> m2(b'b.txt')
890 890 True
891 891 >>> m2.matchfn(b'a.txt')
892 892 False
893 893 >>> m2.matchfn(b'b.txt')
894 894 True
895 895 >>> m2.files()
896 896 ['b.txt']
897 897 >>> m2.exact(b'b.txt')
898 898 True
899 899 >>> def bad(f, msg):
900 900 ... print(pycompat.sysstr(b"%s: %s" % (f, msg)))
901 901 >>> m1.bad = bad
902 902 >>> m2.bad(b'x.txt', b'No such file')
903 903 sub/x.txt: No such file
904 904 """
905 905
906 906 def __init__(self, path, matcher):
907 907 super(subdirmatcher, self).__init__()
908 908 self._path = path
909 909 self._matcher = matcher
910 910 self._always = matcher.always()
911 911
912 912 self._files = [f[len(path) + 1:] for f in matcher._files
913 913 if f.startswith(path + "/")]
914 914
915 915 # If the parent repo had a path to this subrepo and the matcher is
916 916 # a prefix matcher, this submatcher always matches.
917 917 if matcher.prefix():
918 918 self._always = any(f == path for f in matcher._files)
919 919
920 920 def bad(self, f, msg):
921 921 self._matcher.bad(self._path + "/" + f, msg)
922 922
923 923 def matchfn(self, f):
924 924 # Some information is lost in the superclass's constructor, so we
925 925 # can not accurately create the matching function for the subdirectory
926 926 # from the inputs. Instead, we override matchfn() and visitdir() to
927 927 # call the original matcher with the subdirectory path prepended.
928 928 return self._matcher.matchfn(self._path + "/" + f)
929 929
930 930 def visitdir(self, dir):
931 931 dir = normalizerootdir(dir, 'visitdir')
932 932 if dir == '':
933 933 dir = self._path
934 934 else:
935 935 dir = self._path + "/" + dir
936 936 return self._matcher.visitdir(dir)
937 937
938 938 def visitchildrenset(self, dir):
939 939 dir = normalizerootdir(dir, 'visitchildrenset')
940 940 if dir == '':
941 941 dir = self._path
942 942 else:
943 943 dir = self._path + "/" + dir
944 944 return self._matcher.visitchildrenset(dir)
945 945
946 946 def always(self):
947 947 return self._always
948 948
949 949 def prefix(self):
950 950 return self._matcher.prefix() and not self._always
951 951
952 952 @encoding.strmethod
953 953 def __repr__(self):
954 954 return ('<subdirmatcher path=%r, matcher=%r>' %
955 955 (self._path, self._matcher))
956 956
957 957 class prefixdirmatcher(basematcher):
958 958 """Adapt a matcher to work on a parent directory.
959 959
960 960 The matcher's non-matching-attributes (bad, explicitdir, traversedir) are
961 961 ignored.
962 962
963 963 The prefix path should usually be the relative path from the root of
964 964 this matcher to the root of the wrapped matcher.
965 965
966 966 >>> m1 = match(util.localpath(b'root/d/e'), b'f', [b'../a.txt', b'b.txt'])
967 967 >>> m2 = prefixdirmatcher(b'd/e', m1)
968 968 >>> m2(b'a.txt')
969 969 False
970 970 >>> m2(b'd/e/a.txt')
971 971 True
972 972 >>> m2(b'd/e/b.txt')
973 973 False
974 974 >>> m2.files()
975 975 ['d/e/a.txt', 'd/e/f/b.txt']
976 976 >>> m2.exact(b'd/e/a.txt')
977 977 True
978 978 >>> m2.visitdir(b'd')
979 979 True
980 980 >>> m2.visitdir(b'd/e')
981 981 True
982 982 >>> m2.visitdir(b'd/e/f')
983 983 True
984 984 >>> m2.visitdir(b'd/e/g')
985 985 False
986 986 >>> m2.visitdir(b'd/ef')
987 987 False
988 988 """
989 989
990 990 def __init__(self, path, matcher, badfn=None):
991 991 super(prefixdirmatcher, self).__init__(badfn)
992 992 if not path:
993 993 raise error.ProgrammingError('prefix path must not be empty')
994 994 self._path = path
995 995 self._pathprefix = path + '/'
996 996 self._matcher = matcher
997 997
998 998 @propertycache
999 999 def _files(self):
1000 1000 return [self._pathprefix + f for f in self._matcher._files]
1001 1001
1002 1002 def matchfn(self, f):
1003 1003 if not f.startswith(self._pathprefix):
1004 1004 return False
1005 1005 return self._matcher.matchfn(f[len(self._pathprefix):])
1006 1006
1007 1007 @propertycache
1008 1008 def _pathdirs(self):
1009 1009 return set(util.finddirs(self._path))
1010 1010
1011 1011 def visitdir(self, dir):
1012 1012 if dir == self._path:
1013 1013 return self._matcher.visitdir('')
1014 1014 if dir.startswith(self._pathprefix):
1015 1015 return self._matcher.visitdir(dir[len(self._pathprefix):])
1016 1016 return dir in self._pathdirs
1017 1017
1018 1018 def visitchildrenset(self, dir):
1019 1019 if dir == self._path:
1020 1020 return self._matcher.visitchildrenset('')
1021 1021 if dir.startswith(self._pathprefix):
1022 1022 return self._matcher.visitchildrenset(dir[len(self._pathprefix):])
1023 1023 if dir in self._pathdirs:
1024 1024 return 'this'
1025 1025 return set()
1026 1026
1027 1027 def isexact(self):
1028 1028 return self._matcher.isexact()
1029 1029
1030 1030 def prefix(self):
1031 1031 return self._matcher.prefix()
1032 1032
1033 1033 @encoding.strmethod
1034 1034 def __repr__(self):
1035 1035 return ('<prefixdirmatcher path=%r, matcher=%r>'
1036 1036 % (pycompat.bytestr(self._path), self._matcher))
1037 1037
1038 1038 class unionmatcher(basematcher):
1039 1039 """A matcher that is the union of several matchers.
1040 1040
1041 1041 The non-matching-attributes (bad, explicitdir, traversedir) are taken from
1042 1042 the first matcher.
1043 1043 """
1044 1044
1045 1045 def __init__(self, matchers):
1046 1046 m1 = matchers[0]
1047 1047 super(unionmatcher, self).__init__()
1048 1048 self.explicitdir = m1.explicitdir
1049 1049 self.traversedir = m1.traversedir
1050 1050 self._matchers = matchers
1051 1051
1052 1052 def matchfn(self, f):
1053 1053 for match in self._matchers:
1054 1054 if match(f):
1055 1055 return True
1056 1056 return False
1057 1057
1058 1058 def visitdir(self, dir):
1059 1059 r = False
1060 1060 for m in self._matchers:
1061 1061 v = m.visitdir(dir)
1062 1062 if v == 'all':
1063 1063 return v
1064 1064 r |= v
1065 1065 return r
1066 1066
1067 1067 def visitchildrenset(self, dir):
1068 1068 r = set()
1069 1069 this = False
1070 1070 for m in self._matchers:
1071 1071 v = m.visitchildrenset(dir)
1072 1072 if not v:
1073 1073 continue
1074 1074 if v == 'all':
1075 1075 return v
1076 1076 if this or v == 'this':
1077 1077 this = True
1078 1078 # don't break, we might have an 'all' in here.
1079 1079 continue
1080 1080 assert isinstance(v, set)
1081 1081 r = r.union(v)
1082 1082 if this:
1083 1083 return 'this'
1084 1084 return r
1085 1085
1086 1086 @encoding.strmethod
1087 1087 def __repr__(self):
1088 1088 return ('<unionmatcher matchers=%r>' % self._matchers)
1089 1089
1090 1090 def patkind(pattern, default=None):
1091 1091 '''If pattern is 'kind:pat' with a known kind, return kind.
1092 1092
1093 1093 >>> patkind(br're:.*\.c$')
1094 1094 're'
1095 1095 >>> patkind(b'glob:*.c')
1096 1096 'glob'
1097 1097 >>> patkind(b'relpath:test.py')
1098 1098 'relpath'
1099 1099 >>> patkind(b'main.py')
1100 1100 >>> patkind(b'main.py', default=b're')
1101 1101 're'
1102 1102 '''
1103 1103 return _patsplit(pattern, default)[0]
1104 1104
1105 1105 def _patsplit(pattern, default):
1106 1106 """Split a string into the optional pattern kind prefix and the actual
1107 1107 pattern."""
1108 1108 if ':' in pattern:
1109 1109 kind, pat = pattern.split(':', 1)
1110 1110 if kind in allpatternkinds:
1111 1111 return kind, pat
1112 1112 return default, pattern
1113 1113
1114 1114 def _globre(pat):
1115 1115 r'''Convert an extended glob string to a regexp string.
1116 1116
1117 1117 >>> from . import pycompat
1118 1118 >>> def bprint(s):
1119 1119 ... print(pycompat.sysstr(s))
1120 1120 >>> bprint(_globre(br'?'))
1121 1121 .
1122 1122 >>> bprint(_globre(br'*'))
1123 1123 [^/]*
1124 1124 >>> bprint(_globre(br'**'))
1125 1125 .*
1126 1126 >>> bprint(_globre(br'**/a'))
1127 1127 (?:.*/)?a
1128 1128 >>> bprint(_globre(br'a/**/b'))
1129 1129 a/(?:.*/)?b
1130 1130 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
1131 1131 [a*?!^][\^b][^c]
1132 1132 >>> bprint(_globre(br'{a,b}'))
1133 1133 (?:a|b)
1134 1134 >>> bprint(_globre(br'.\*\?'))
1135 1135 \.\*\?
1136 1136 '''
1137 1137 i, n = 0, len(pat)
1138 1138 res = ''
1139 1139 group = 0
1140 1140 escape = util.stringutil.regexbytesescapemap.get
1141 1141 def peek():
1142 1142 return i < n and pat[i:i + 1]
1143 1143 while i < n:
1144 1144 c = pat[i:i + 1]
1145 1145 i += 1
1146 1146 if c not in '*?[{},\\':
1147 1147 res += escape(c, c)
1148 1148 elif c == '*':
1149 1149 if peek() == '*':
1150 1150 i += 1
1151 1151 if peek() == '/':
1152 1152 i += 1
1153 1153 res += '(?:.*/)?'
1154 1154 else:
1155 1155 res += '.*'
1156 1156 else:
1157 1157 res += '[^/]*'
1158 1158 elif c == '?':
1159 1159 res += '.'
1160 1160 elif c == '[':
1161 1161 j = i
1162 1162 if j < n and pat[j:j + 1] in '!]':
1163 1163 j += 1
1164 1164 while j < n and pat[j:j + 1] != ']':
1165 1165 j += 1
1166 1166 if j >= n:
1167 1167 res += '\\['
1168 1168 else:
1169 1169 stuff = pat[i:j].replace('\\','\\\\')
1170 1170 i = j + 1
1171 1171 if stuff[0:1] == '!':
1172 1172 stuff = '^' + stuff[1:]
1173 1173 elif stuff[0:1] == '^':
1174 1174 stuff = '\\' + stuff
1175 1175 res = '%s[%s]' % (res, stuff)
1176 1176 elif c == '{':
1177 1177 group += 1
1178 1178 res += '(?:'
1179 1179 elif c == '}' and group:
1180 1180 res += ')'
1181 1181 group -= 1
1182 1182 elif c == ',' and group:
1183 1183 res += '|'
1184 1184 elif c == '\\':
1185 1185 p = peek()
1186 1186 if p:
1187 1187 i += 1
1188 1188 res += escape(p, p)
1189 1189 else:
1190 1190 res += escape(c, c)
1191 1191 else:
1192 1192 res += escape(c, c)
1193 1193 return res
1194 1194
1195 1195 def _regex(kind, pat, globsuffix):
1196 1196 '''Convert a (normalized) pattern of any kind into a
1197 1197 regular expression.
1198 1198 globsuffix is appended to the regexp of globs.'''
1199 1199
1200 1200 if rustext is not None:
1201 1201 try:
1202 1202 return rustext.filepatterns.build_single_regex(
1203 1203 kind,
1204 1204 pat,
1205 1205 globsuffix
1206 1206 )
1207 1207 except rustext.filepatterns.PatternError:
1208 1208 raise error.ProgrammingError(
1209 1209 'not a regex pattern: %s:%s' % (kind, pat)
1210 1210 )
1211 1211
1212 1212 if not pat and kind in ('glob', 'relpath'):
1213 1213 return ''
1214 1214 if kind == 're':
1215 1215 return pat
1216 1216 if kind in ('path', 'relpath'):
1217 1217 if pat == '.':
1218 1218 return ''
1219 1219 return util.stringutil.reescape(pat) + '(?:/|$)'
1220 1220 if kind == 'rootfilesin':
1221 1221 if pat == '.':
1222 1222 escaped = ''
1223 1223 else:
1224 1224 # Pattern is a directory name.
1225 1225 escaped = util.stringutil.reescape(pat) + '/'
1226 1226 # Anything after the pattern must be a non-directory.
1227 1227 return escaped + '[^/]+$'
1228 1228 if kind == 'relglob':
1229 1229 return '(?:|.*/)' + _globre(pat) + globsuffix
1230 1230 if kind == 'relre':
1231 1231 if pat.startswith('^'):
1232 1232 return pat
1233 1233 return '.*' + pat
1234 1234 if kind in ('glob', 'rootglob'):
1235 1235 return _globre(pat) + globsuffix
1236 1236 raise error.ProgrammingError('not a regex pattern: %s:%s' % (kind, pat))
1237 1237
1238 1238 def _buildmatch(kindpats, globsuffix, root):
1239 1239 '''Return regexp string and a matcher function for kindpats.
1240 1240 globsuffix is appended to the regexp of globs.'''
1241 1241 matchfuncs = []
1242 1242
1243 1243 subincludes, kindpats = _expandsubinclude(kindpats, root)
1244 1244 if subincludes:
1245 1245 submatchers = {}
1246 1246 def matchsubinclude(f):
1247 1247 for prefix, matcherargs in subincludes:
1248 1248 if f.startswith(prefix):
1249 1249 mf = submatchers.get(prefix)
1250 1250 if mf is None:
1251 1251 mf = match(*matcherargs)
1252 1252 submatchers[prefix] = mf
1253 1253
1254 1254 if mf(f[len(prefix):]):
1255 1255 return True
1256 1256 return False
1257 1257 matchfuncs.append(matchsubinclude)
1258 1258
1259 1259 regex = ''
1260 1260 if kindpats:
1261 1261 if all(k == 'rootfilesin' for k, p, s in kindpats):
1262 1262 dirs = {p for k, p, s in kindpats}
1263 1263 def mf(f):
1264 1264 i = f.rfind('/')
1265 1265 if i >= 0:
1266 1266 dir = f[:i]
1267 1267 else:
1268 1268 dir = '.'
1269 1269 return dir in dirs
1270 1270 regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))
1271 1271 matchfuncs.append(mf)
1272 1272 else:
1273 1273 regex, mf = _buildregexmatch(kindpats, globsuffix)
1274 1274 matchfuncs.append(mf)
1275 1275
1276 1276 if len(matchfuncs) == 1:
1277 1277 return regex, matchfuncs[0]
1278 1278 else:
1279 1279 return regex, lambda f: any(mf(f) for mf in matchfuncs)
1280 1280
1281 1281 MAX_RE_SIZE = 20000
1282 1282
1283 1283 def _joinregexes(regexps):
1284 1284 """gather multiple regular expressions into a single one"""
1285 1285 return '|'.join(regexps)
1286 1286
1287 1287 def _buildregexmatch(kindpats, globsuffix):
1288 1288 """Build a match function from a list of kinds and kindpats,
1289 1289 return regexp string and a matcher function.
1290 1290
1291 1291 Test too large input
1292 1292 >>> _buildregexmatch([
1293 1293 ... (b'relglob', b'?' * MAX_RE_SIZE, b'')
1294 1294 ... ], b'$')
1295 1295 Traceback (most recent call last):
1296 1296 ...
1297 1297 Abort: matcher pattern is too long (20009 bytes)
1298 1298 """
1299 1299 try:
1300 1300 allgroups = []
1301 1301 regexps = [_regex(k, p, globsuffix) for (k, p, s) in kindpats]
1302 1302 fullregexp = _joinregexes(regexps)
1303 1303
1304 1304 startidx = 0
1305 1305 groupsize = 0
1306 1306 for idx, r in enumerate(regexps):
1307 1307 piecesize = len(r)
1308 1308 if piecesize > MAX_RE_SIZE:
1309 1309 msg = _("matcher pattern is too long (%d bytes)") % piecesize
1310 1310 raise error.Abort(msg)
1311 1311 elif (groupsize + piecesize) > MAX_RE_SIZE:
1312 1312 group = regexps[startidx:idx]
1313 1313 allgroups.append(_joinregexes(group))
1314 1314 startidx = idx
1315 1315 groupsize = 0
1316 1316 groupsize += piecesize + 1
1317 1317
1318 1318 if startidx == 0:
1319 1319 matcher = _rematcher(fullregexp)
1320 1320 func = lambda s: bool(matcher(s))
1321 1321 else:
1322 1322 group = regexps[startidx:]
1323 1323 allgroups.append(_joinregexes(group))
1324 1324 allmatchers = [_rematcher(g) for g in allgroups]
1325 1325 func = lambda s: any(m(s) for m in allmatchers)
1326 1326 return fullregexp, func
1327 1327 except re.error:
1328 1328 for k, p, s in kindpats:
1329 1329 try:
1330 1330 _rematcher(_regex(k, p, globsuffix))
1331 1331 except re.error:
1332 1332 if s:
1333 1333 raise error.Abort(_("%s: invalid pattern (%s): %s") %
1334 1334 (s, k, p))
1335 1335 else:
1336 1336 raise error.Abort(_("invalid pattern (%s): %s") % (k, p))
1337 1337 raise error.Abort(_("invalid pattern"))
1338 1338
1339 1339 def _patternrootsanddirs(kindpats):
1340 1340 '''Returns roots and directories corresponding to each pattern.
1341 1341
1342 1342 This calculates the roots and directories exactly matching the patterns and
1343 1343 returns a tuple of (roots, dirs) for each. It does not return other
1344 1344 directories which may also need to be considered, like the parent
1345 1345 directories.
1346 1346 '''
1347 1347 r = []
1348 1348 d = []
1349 1349 for kind, pat, source in kindpats:
1350 1350 if kind in ('glob', 'rootglob'): # find the non-glob prefix
1351 1351 root = []
1352 1352 for p in pat.split('/'):
1353 1353 if '[' in p or '{' in p or '*' in p or '?' in p:
1354 1354 break
1355 1355 root.append(p)
1356 1356 r.append('/'.join(root))
1357 1357 elif kind in ('relpath', 'path'):
1358 1358 if pat == '.':
1359 1359 pat = ''
1360 1360 r.append(pat)
1361 1361 elif kind in ('rootfilesin',):
1362 1362 if pat == '.':
1363 1363 pat = ''
1364 1364 d.append(pat)
1365 1365 else: # relglob, re, relre
1366 1366 r.append('')
1367 1367 return r, d
1368 1368
1369 1369 def _roots(kindpats):
1370 1370 '''Returns root directories to match recursively from the given patterns.'''
1371 1371 roots, dirs = _patternrootsanddirs(kindpats)
1372 1372 return roots
1373 1373
1374 1374 def _rootsdirsandparents(kindpats):
1375 1375 '''Returns roots and exact directories from patterns.
1376 1376
1377 1377 `roots` are directories to match recursively, `dirs` should
1378 1378 be matched non-recursively, and `parents` are the implicitly required
1379 1379 directories to walk to items in either roots or dirs.
1380 1380
1381 1381 Returns a tuple of (roots, dirs, parents).
1382 1382
1383 >>> _rootsdirsandparents(
1383 >>> r = _rootsdirsandparents(
1384 1384 ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
1385 1385 ... (b'glob', b'g*', b'')])
1386 (['g/h', 'g/h', ''], [], set(['', 'g']))
1387 >>> _rootsdirsandparents(
1386 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1387 (['g/h', 'g/h', ''], []) ['', 'g']
1388 >>> r = _rootsdirsandparents(
1388 1389 ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
1389 ([], ['g/h', ''], set(['', 'g']))
1390 >>> _rootsdirsandparents(
1390 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1391 ([], ['g/h', '']) ['', 'g']
1392 >>> r = _rootsdirsandparents(
1391 1393 ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
1392 1394 ... (b'path', b'', b'')])
1393 (['r', 'p/p', ''], [], set(['', 'p']))
1394 >>> _rootsdirsandparents(
1395 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1396 (['r', 'p/p', ''], []) ['', 'p']
1397 >>> r = _rootsdirsandparents(
1395 1398 ... [(b'relglob', b'rg*', b''), (b're', b're/', b''),
1396 1399 ... (b'relre', b'rr', b'')])
1397 (['', '', ''], [], set(['']))
1400 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1401 (['', '', ''], []) ['']
1398 1402 '''
1399 1403 r, d = _patternrootsanddirs(kindpats)
1400 1404
1401 1405 p = set()
1402 1406 # Add the parents as non-recursive/exact directories, since they must be
1403 1407 # scanned to get to either the roots or the other exact directories.
1404 1408 p.update(util.dirs(d))
1405 1409 p.update(util.dirs(r))
1406 1410
1407 1411 # FIXME: all uses of this function convert these to sets, do so before
1408 1412 # returning.
1409 1413 # FIXME: all uses of this function do not need anything in 'roots' and
1410 1414 # 'dirs' to also be in 'parents', consider removing them before returning.
1411 1415 return r, d, p
1412 1416
1413 1417 def _explicitfiles(kindpats):
1414 1418 '''Returns the potential explicit filenames from the patterns.
1415 1419
1416 1420 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
1417 1421 ['foo/bar']
1418 1422 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
1419 1423 []
1420 1424 '''
1421 1425 # Keep only the pattern kinds where one can specify filenames (vs only
1422 1426 # directory names).
1423 1427 filable = [kp for kp in kindpats if kp[0] not in ('rootfilesin',)]
1424 1428 return _roots(filable)
1425 1429
1426 1430 def _prefix(kindpats):
1427 1431 '''Whether all the patterns match a prefix (i.e. recursively)'''
1428 1432 for kind, pat, source in kindpats:
1429 1433 if kind not in ('path', 'relpath'):
1430 1434 return False
1431 1435 return True
1432 1436
1433 1437 _commentre = None
1434 1438
1435 1439 def readpatternfile(filepath, warn, sourceinfo=False):
1436 1440 '''parse a pattern file, returning a list of
1437 1441 patterns. These patterns should be given to compile()
1438 1442 to be validated and converted into a match function.
1439 1443
1440 1444 trailing white space is dropped.
1441 1445 the escape character is backslash.
1442 1446 comments start with #.
1443 1447 empty lines are skipped.
1444 1448
1445 1449 lines can be of the following formats:
1446 1450
1447 1451 syntax: regexp # defaults following lines to non-rooted regexps
1448 1452 syntax: glob # defaults following lines to non-rooted globs
1449 1453 re:pattern # non-rooted regular expression
1450 1454 glob:pattern # non-rooted glob
1451 1455 rootglob:pat # rooted glob (same root as ^ in regexps)
1452 1456 pattern # pattern of the current default type
1453 1457
1454 1458 if sourceinfo is set, returns a list of tuples:
1455 1459 (pattern, lineno, originalline).
1456 1460 This is useful to debug ignore patterns.
1457 1461 '''
1458 1462
1459 1463 if rustext is not None:
1460 1464 result, warnings = rustext.filepatterns.read_pattern_file(
1461 1465 filepath,
1462 1466 bool(warn),
1463 1467 sourceinfo,
1464 1468 )
1465 1469
1466 1470 for warning_params in warnings:
1467 1471 # Can't be easily emitted from Rust, because it would require
1468 1472 # a mechanism for both gettext and calling the `warn` function.
1469 1473 warn(_("%s: ignoring invalid syntax '%s'\n") % warning_params)
1470 1474
1471 1475 return result
1472 1476
1473 1477 syntaxes = {
1474 1478 're': 'relre:',
1475 1479 'regexp': 'relre:',
1476 1480 'glob': 'relglob:',
1477 1481 'rootglob': 'rootglob:',
1478 1482 'include': 'include',
1479 1483 'subinclude': 'subinclude',
1480 1484 }
1481 1485 syntax = 'relre:'
1482 1486 patterns = []
1483 1487
1484 1488 fp = open(filepath, 'rb')
1485 1489 for lineno, line in enumerate(util.iterfile(fp), start=1):
1486 1490 if "#" in line:
1487 1491 global _commentre
1488 1492 if not _commentre:
1489 1493 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
1490 1494 # remove comments prefixed by an even number of escapes
1491 1495 m = _commentre.search(line)
1492 1496 if m:
1493 1497 line = line[:m.end(1)]
1494 1498 # fixup properly escaped comments that survived the above
1495 1499 line = line.replace("\\#", "#")
1496 1500 line = line.rstrip()
1497 1501 if not line:
1498 1502 continue
1499 1503
1500 1504 if line.startswith('syntax:'):
1501 1505 s = line[7:].strip()
1502 1506 try:
1503 1507 syntax = syntaxes[s]
1504 1508 except KeyError:
1505 1509 if warn:
1506 1510 warn(_("%s: ignoring invalid syntax '%s'\n") %
1507 1511 (filepath, s))
1508 1512 continue
1509 1513
1510 1514 linesyntax = syntax
1511 1515 for s, rels in syntaxes.iteritems():
1512 1516 if line.startswith(rels):
1513 1517 linesyntax = rels
1514 1518 line = line[len(rels):]
1515 1519 break
1516 1520 elif line.startswith(s+':'):
1517 1521 linesyntax = rels
1518 1522 line = line[len(s) + 1:]
1519 1523 break
1520 1524 if sourceinfo:
1521 1525 patterns.append((linesyntax + line, lineno, line))
1522 1526 else:
1523 1527 patterns.append(linesyntax + line)
1524 1528 fp.close()
1525 1529 return patterns
General Comments 0
You need to be logged in to leave comments. Login now