##// END OF EJS Templates
match: de-flake test-doctest.py by not depending on util.dirs() order...
Martin von Zweigbergk -
r42938:c4b8f863 default
parent child Browse files
Show More
@@ -1,1526 +1,1526
1 1 # match.py - filename matching
2 2 #
3 3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import, print_function
9 9
10 10 import copy
11 11 import itertools
12 12 import os
13 13 import re
14 14
15 15 from .i18n import _
16 16 from . import (
17 17 encoding,
18 18 error,
19 19 pathutil,
20 20 pycompat,
21 21 util,
22 22 )
23 23 from .utils import (
24 24 stringutil,
25 25 )
26 26
27 27 try:
28 28 from . import rustext
29 29 rustext.__name__ # force actual import (see hgdemandimport)
30 30 except ImportError:
31 31 rustext = None
32 32
33 33 allpatternkinds = ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
34 34 'rootglob',
35 35 'listfile', 'listfile0', 'set', 'include', 'subinclude',
36 36 'rootfilesin')
37 37 cwdrelativepatternkinds = ('relpath', 'glob')
38 38
39 39 propertycache = util.propertycache
40 40
41 41 def _rematcher(regex):
42 42 '''compile the regexp with the best available regexp engine and return a
43 43 matcher function'''
44 44 m = util.re.compile(regex)
45 45 try:
46 46 # slightly faster, provided by facebook's re2 bindings
47 47 return m.test_match
48 48 except AttributeError:
49 49 return m.match
50 50
51 51 def _expandsets(kindpats, ctx=None, listsubrepos=False, badfn=None):
52 52 '''Returns the kindpats list with the 'set' patterns expanded to matchers'''
53 53 matchers = []
54 54 other = []
55 55
56 56 for kind, pat, source in kindpats:
57 57 if kind == 'set':
58 58 if ctx is None:
59 59 raise error.ProgrammingError("fileset expression with no "
60 60 "context")
61 61 matchers.append(ctx.matchfileset(pat, badfn=badfn))
62 62
63 63 if listsubrepos:
64 64 for subpath in ctx.substate:
65 65 sm = ctx.sub(subpath).matchfileset(pat, badfn=badfn)
66 66 pm = prefixdirmatcher(subpath, sm, badfn=badfn)
67 67 matchers.append(pm)
68 68
69 69 continue
70 70 other.append((kind, pat, source))
71 71 return matchers, other
72 72
73 73 def _expandsubinclude(kindpats, root):
74 74 '''Returns the list of subinclude matcher args and the kindpats without the
75 75 subincludes in it.'''
76 76 relmatchers = []
77 77 other = []
78 78
79 79 for kind, pat, source in kindpats:
80 80 if kind == 'subinclude':
81 81 sourceroot = pathutil.dirname(util.normpath(source))
82 82 pat = util.pconvert(pat)
83 83 path = pathutil.join(sourceroot, pat)
84 84
85 85 newroot = pathutil.dirname(path)
86 86 matcherargs = (newroot, '', [], ['include:%s' % path])
87 87
88 88 prefix = pathutil.canonpath(root, root, newroot)
89 89 if prefix:
90 90 prefix += '/'
91 91 relmatchers.append((prefix, matcherargs))
92 92 else:
93 93 other.append((kind, pat, source))
94 94
95 95 return relmatchers, other
96 96
97 97 def _kindpatsalwaysmatch(kindpats):
98 98 """"Checks whether the kindspats match everything, as e.g.
99 99 'relpath:.' does.
100 100 """
101 101 for kind, pat, source in kindpats:
102 102 if pat != '' or kind not in ['relpath', 'glob']:
103 103 return False
104 104 return True
105 105
106 106 def _buildkindpatsmatcher(matchercls, root, kindpats, ctx=None,
107 107 listsubrepos=False, badfn=None):
108 108 matchers = []
109 109 fms, kindpats = _expandsets(kindpats, ctx=ctx,
110 110 listsubrepos=listsubrepos, badfn=badfn)
111 111 if kindpats:
112 112 m = matchercls(root, kindpats, badfn=badfn)
113 113 matchers.append(m)
114 114 if fms:
115 115 matchers.extend(fms)
116 116 if not matchers:
117 117 return nevermatcher(badfn=badfn)
118 118 if len(matchers) == 1:
119 119 return matchers[0]
120 120 return unionmatcher(matchers)
121 121
122 122 def match(root, cwd, patterns=None, include=None, exclude=None, default='glob',
123 123 auditor=None, ctx=None, listsubrepos=False, warn=None,
124 124 badfn=None, icasefs=False):
125 125 r"""build an object to match a set of file patterns
126 126
127 127 arguments:
128 128 root - the canonical root of the tree you're matching against
129 129 cwd - the current working directory, if relevant
130 130 patterns - patterns to find
131 131 include - patterns to include (unless they are excluded)
132 132 exclude - patterns to exclude (even if they are included)
133 133 default - if a pattern in patterns has no explicit type, assume this one
134 134 auditor - optional path auditor
135 135 ctx - optional changecontext
136 136 listsubrepos - if True, recurse into subrepositories
137 137 warn - optional function used for printing warnings
138 138 badfn - optional bad() callback for this matcher instead of the default
139 139 icasefs - make a matcher for wdir on case insensitive filesystems, which
140 140 normalizes the given patterns to the case in the filesystem
141 141
142 142 a pattern is one of:
143 143 'glob:<glob>' - a glob relative to cwd
144 144 're:<regexp>' - a regular expression
145 145 'path:<path>' - a path relative to repository root, which is matched
146 146 recursively
147 147 'rootfilesin:<path>' - a path relative to repository root, which is
148 148 matched non-recursively (will not match subdirectories)
149 149 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
150 150 'relpath:<path>' - a path relative to cwd
151 151 'relre:<regexp>' - a regexp that needn't match the start of a name
152 152 'set:<fileset>' - a fileset expression
153 153 'include:<path>' - a file of patterns to read and include
154 154 'subinclude:<path>' - a file of patterns to match against files under
155 155 the same directory
156 156 '<something>' - a pattern of the specified default type
157 157
158 158 Usually a patternmatcher is returned:
159 159 >>> match(b'foo', b'.', [b're:.*\.c$', b'path:foo/a', b'*.py'])
160 160 <patternmatcher patterns='.*\\.c$|foo/a(?:/|$)|[^/]*\\.py$'>
161 161
162 162 Combining 'patterns' with 'include' (resp. 'exclude') gives an
163 163 intersectionmatcher (resp. a differencematcher):
164 164 >>> type(match(b'foo', b'.', [b're:.*\.c$'], include=[b'path:lib']))
165 165 <class 'mercurial.match.intersectionmatcher'>
166 166 >>> type(match(b'foo', b'.', [b're:.*\.c$'], exclude=[b'path:build']))
167 167 <class 'mercurial.match.differencematcher'>
168 168
169 169 Notice that, if 'patterns' is empty, an alwaysmatcher is returned:
170 170 >>> match(b'foo', b'.', [])
171 171 <alwaysmatcher>
172 172
173 173 The 'default' argument determines which kind of pattern is assumed if a
174 174 pattern has no prefix:
175 175 >>> match(b'foo', b'.', [b'.*\.c$'], default=b're')
176 176 <patternmatcher patterns='.*\\.c$'>
177 177 >>> match(b'foo', b'.', [b'main.py'], default=b'relpath')
178 178 <patternmatcher patterns='main\\.py(?:/|$)'>
179 179 >>> match(b'foo', b'.', [b'main.py'], default=b're')
180 180 <patternmatcher patterns='main.py'>
181 181
182 182 The primary use of matchers is to check whether a value (usually a file
183 183 name) matches againset one of the patterns given at initialization. There
184 184 are two ways of doing this check.
185 185
186 186 >>> m = match(b'foo', b'', [b're:.*\.c$', b'relpath:a'])
187 187
188 188 1. Calling the matcher with a file name returns True if any pattern
189 189 matches that file name:
190 190 >>> m(b'a')
191 191 True
192 192 >>> m(b'main.c')
193 193 True
194 194 >>> m(b'test.py')
195 195 False
196 196
197 197 2. Using the exact() method only returns True if the file name matches one
198 198 of the exact patterns (i.e. not re: or glob: patterns):
199 199 >>> m.exact(b'a')
200 200 True
201 201 >>> m.exact(b'main.c')
202 202 False
203 203 """
204 204 normalize = _donormalize
205 205 if icasefs:
206 206 dirstate = ctx.repo().dirstate
207 207 dsnormalize = dirstate.normalize
208 208
209 209 def normalize(patterns, default, root, cwd, auditor, warn):
210 210 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
211 211 kindpats = []
212 212 for kind, pats, source in kp:
213 213 if kind not in ('re', 'relre'): # regex can't be normalized
214 214 p = pats
215 215 pats = dsnormalize(pats)
216 216
217 217 # Preserve the original to handle a case only rename.
218 218 if p != pats and p in dirstate:
219 219 kindpats.append((kind, p, source))
220 220
221 221 kindpats.append((kind, pats, source))
222 222 return kindpats
223 223
224 224 if patterns:
225 225 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
226 226 if _kindpatsalwaysmatch(kindpats):
227 227 m = alwaysmatcher(badfn)
228 228 else:
229 229 m = _buildkindpatsmatcher(patternmatcher, root, kindpats, ctx=ctx,
230 230 listsubrepos=listsubrepos, badfn=badfn)
231 231 else:
232 232 # It's a little strange that no patterns means to match everything.
233 233 # Consider changing this to match nothing (probably using nevermatcher).
234 234 m = alwaysmatcher(badfn)
235 235
236 236 if include:
237 237 kindpats = normalize(include, 'glob', root, cwd, auditor, warn)
238 238 im = _buildkindpatsmatcher(includematcher, root, kindpats, ctx=ctx,
239 239 listsubrepos=listsubrepos, badfn=None)
240 240 m = intersectmatchers(m, im)
241 241 if exclude:
242 242 kindpats = normalize(exclude, 'glob', root, cwd, auditor, warn)
243 243 em = _buildkindpatsmatcher(includematcher, root, kindpats, ctx=ctx,
244 244 listsubrepos=listsubrepos, badfn=None)
245 245 m = differencematcher(m, em)
246 246 return m
247 247
248 248 def exact(files, badfn=None):
249 249 return exactmatcher(files, badfn=badfn)
250 250
251 251 def always(badfn=None):
252 252 return alwaysmatcher(badfn)
253 253
254 254 def never(badfn=None):
255 255 return nevermatcher(badfn)
256 256
257 257 def badmatch(match, badfn):
258 258 """Make a copy of the given matcher, replacing its bad method with the given
259 259 one.
260 260 """
261 261 m = copy.copy(match)
262 262 m.bad = badfn
263 263 return m
264 264
265 265 def _donormalize(patterns, default, root, cwd, auditor=None, warn=None):
266 266 '''Convert 'kind:pat' from the patterns list to tuples with kind and
267 267 normalized and rooted patterns and with listfiles expanded.'''
268 268 kindpats = []
269 269 for kind, pat in [_patsplit(p, default) for p in patterns]:
270 270 if kind in cwdrelativepatternkinds:
271 271 pat = pathutil.canonpath(root, cwd, pat, auditor=auditor)
272 272 elif kind in ('relglob', 'path', 'rootfilesin', 'rootglob'):
273 273 pat = util.normpath(pat)
274 274 elif kind in ('listfile', 'listfile0'):
275 275 try:
276 276 files = util.readfile(pat)
277 277 if kind == 'listfile0':
278 278 files = files.split('\0')
279 279 else:
280 280 files = files.splitlines()
281 281 files = [f for f in files if f]
282 282 except EnvironmentError:
283 283 raise error.Abort(_("unable to read file list (%s)") % pat)
284 284 for k, p, source in _donormalize(files, default, root, cwd,
285 285 auditor, warn):
286 286 kindpats.append((k, p, pat))
287 287 continue
288 288 elif kind == 'include':
289 289 try:
290 290 fullpath = os.path.join(root, util.localpath(pat))
291 291 includepats = readpatternfile(fullpath, warn)
292 292 for k, p, source in _donormalize(includepats, default,
293 293 root, cwd, auditor, warn):
294 294 kindpats.append((k, p, source or pat))
295 295 except error.Abort as inst:
296 296 raise error.Abort('%s: %s' % (pat, inst[0]))
297 297 except IOError as inst:
298 298 if warn:
299 299 warn(_("skipping unreadable pattern file '%s': %s\n") %
300 300 (pat, stringutil.forcebytestr(inst.strerror)))
301 301 continue
302 302 # else: re or relre - which cannot be normalized
303 303 kindpats.append((kind, pat, ''))
304 304 return kindpats
305 305
306 306 class basematcher(object):
307 307
308 308 def __init__(self, badfn=None):
309 309 if badfn is not None:
310 310 self.bad = badfn
311 311
312 312 def __call__(self, fn):
313 313 return self.matchfn(fn)
314 314 # Callbacks related to how the matcher is used by dirstate.walk.
315 315 # Subscribers to these events must monkeypatch the matcher object.
316 316 def bad(self, f, msg):
317 317 '''Callback from dirstate.walk for each explicit file that can't be
318 318 found/accessed, with an error message.'''
319 319
320 320 # If an explicitdir is set, it will be called when an explicitly listed
321 321 # directory is visited.
322 322 explicitdir = None
323 323
324 324 # If an traversedir is set, it will be called when a directory discovered
325 325 # by recursive traversal is visited.
326 326 traversedir = None
327 327
328 328 @propertycache
329 329 def _files(self):
330 330 return []
331 331
332 332 def files(self):
333 333 '''Explicitly listed files or patterns or roots:
334 334 if no patterns or .always(): empty list,
335 335 if exact: list exact files,
336 336 if not .anypats(): list all files and dirs,
337 337 else: optimal roots'''
338 338 return self._files
339 339
340 340 @propertycache
341 341 def _fileset(self):
342 342 return set(self._files)
343 343
344 344 def exact(self, f):
345 345 '''Returns True if f is in .files().'''
346 346 return f in self._fileset
347 347
348 348 def matchfn(self, f):
349 349 return False
350 350
351 351 def visitdir(self, dir):
352 352 '''Decides whether a directory should be visited based on whether it
353 353 has potential matches in it or one of its subdirectories. This is
354 354 based on the match's primary, included, and excluded patterns.
355 355
356 356 Returns the string 'all' if the given directory and all subdirectories
357 357 should be visited. Otherwise returns True or False indicating whether
358 358 the given directory should be visited.
359 359 '''
360 360 return True
361 361
362 362 def visitchildrenset(self, dir):
363 363 '''Decides whether a directory should be visited based on whether it
364 364 has potential matches in it or one of its subdirectories, and
365 365 potentially lists which subdirectories of that directory should be
366 366 visited. This is based on the match's primary, included, and excluded
367 367 patterns.
368 368
369 369 This function is very similar to 'visitdir', and the following mapping
370 370 can be applied:
371 371
372 372 visitdir | visitchildrenlist
373 373 ----------+-------------------
374 374 False | set()
375 375 'all' | 'all'
376 376 True | 'this' OR non-empty set of subdirs -or files- to visit
377 377
378 378 Example:
379 379 Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return
380 380 the following values (assuming the implementation of visitchildrenset
381 381 is capable of recognizing this; some implementations are not).
382 382
383 383 '' -> {'foo', 'qux'}
384 384 'baz' -> set()
385 385 'foo' -> {'bar'}
386 386 # Ideally this would be 'all', but since the prefix nature of matchers
387 387 # is applied to the entire matcher, we have to downgrade this to
388 388 # 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed
389 389 # in.
390 390 'foo/bar' -> 'this'
391 391 'qux' -> 'this'
392 392
393 393 Important:
394 394 Most matchers do not know if they're representing files or
395 395 directories. They see ['path:dir/f'] and don't know whether 'f' is a
396 396 file or a directory, so visitchildrenset('dir') for most matchers will
397 397 return {'f'}, but if the matcher knows it's a file (like exactmatcher
398 398 does), it may return 'this'. Do not rely on the return being a set
399 399 indicating that there are no files in this dir to investigate (or
400 400 equivalently that if there are files to investigate in 'dir' that it
401 401 will always return 'this').
402 402 '''
403 403 return 'this'
404 404
405 405 def always(self):
406 406 '''Matcher will match everything and .files() will be empty --
407 407 optimization might be possible.'''
408 408 return False
409 409
410 410 def isexact(self):
411 411 '''Matcher will match exactly the list of files in .files() --
412 412 optimization might be possible.'''
413 413 return False
414 414
415 415 def prefix(self):
416 416 '''Matcher will match the paths in .files() recursively --
417 417 optimization might be possible.'''
418 418 return False
419 419
420 420 def anypats(self):
421 421 '''None of .always(), .isexact(), and .prefix() is true --
422 422 optimizations will be difficult.'''
423 423 return not self.always() and not self.isexact() and not self.prefix()
424 424
425 425 class alwaysmatcher(basematcher):
426 426 '''Matches everything.'''
427 427
428 428 def __init__(self, badfn=None):
429 429 super(alwaysmatcher, self).__init__(badfn)
430 430
431 431 def always(self):
432 432 return True
433 433
434 434 def matchfn(self, f):
435 435 return True
436 436
437 437 def visitdir(self, dir):
438 438 return 'all'
439 439
440 440 def visitchildrenset(self, dir):
441 441 return 'all'
442 442
443 443 def __repr__(self):
444 444 return r'<alwaysmatcher>'
445 445
446 446 class nevermatcher(basematcher):
447 447 '''Matches nothing.'''
448 448
449 449 def __init__(self, badfn=None):
450 450 super(nevermatcher, self).__init__(badfn)
451 451
452 452 # It's a little weird to say that the nevermatcher is an exact matcher
453 453 # or a prefix matcher, but it seems to make sense to let callers take
454 454 # fast paths based on either. There will be no exact matches, nor any
455 455 # prefixes (files() returns []), so fast paths iterating over them should
456 456 # be efficient (and correct).
457 457 def isexact(self):
458 458 return True
459 459
460 460 def prefix(self):
461 461 return True
462 462
463 463 def visitdir(self, dir):
464 464 return False
465 465
466 466 def visitchildrenset(self, dir):
467 467 return set()
468 468
469 469 def __repr__(self):
470 470 return r'<nevermatcher>'
471 471
472 472 class predicatematcher(basematcher):
473 473 """A matcher adapter for a simple boolean function"""
474 474
475 475 def __init__(self, predfn, predrepr=None, badfn=None):
476 476 super(predicatematcher, self).__init__(badfn)
477 477 self.matchfn = predfn
478 478 self._predrepr = predrepr
479 479
480 480 @encoding.strmethod
481 481 def __repr__(self):
482 482 s = (stringutil.buildrepr(self._predrepr)
483 483 or pycompat.byterepr(self.matchfn))
484 484 return '<predicatenmatcher pred=%s>' % s
485 485
486 486 def normalizerootdir(dir, funcname):
487 487 if dir == '.':
488 488 util.nouideprecwarn("match.%s() no longer accepts "
489 489 "'.', use '' instead." % funcname, '5.1')
490 490 return ''
491 491 return dir
492 492
493 493
494 494 class patternmatcher(basematcher):
495 495 """Matches a set of (kind, pat, source) against a 'root' directory.
496 496
497 497 >>> kindpats = [
498 498 ... (b're', br'.*\.c$', b''),
499 499 ... (b'path', b'foo/a', b''),
500 500 ... (b'relpath', b'b', b''),
501 501 ... (b'glob', b'*.h', b''),
502 502 ... ]
503 503 >>> m = patternmatcher(b'foo', kindpats)
504 504 >>> m(b'main.c') # matches re:.*\.c$
505 505 True
506 506 >>> m(b'b.txt')
507 507 False
508 508 >>> m(b'foo/a') # matches path:foo/a
509 509 True
510 510 >>> m(b'a') # does not match path:b, since 'root' is 'foo'
511 511 False
512 512 >>> m(b'b') # matches relpath:b, since 'root' is 'foo'
513 513 True
514 514 >>> m(b'lib.h') # matches glob:*.h
515 515 True
516 516
517 517 >>> m.files()
518 518 ['', 'foo/a', 'b', '']
519 519 >>> m.exact(b'foo/a')
520 520 True
521 521 >>> m.exact(b'b')
522 522 True
523 523 >>> m.exact(b'lib.h') # exact matches are for (rel)path kinds
524 524 False
525 525 """
526 526
527 527 def __init__(self, root, kindpats, badfn=None):
528 528 super(patternmatcher, self).__init__(badfn)
529 529
530 530 self._files = _explicitfiles(kindpats)
531 531 self._prefix = _prefix(kindpats)
532 532 self._pats, self.matchfn = _buildmatch(kindpats, '$', root)
533 533
534 534 @propertycache
535 535 def _dirs(self):
536 536 return set(util.dirs(self._fileset))
537 537
538 538 def visitdir(self, dir):
539 539 dir = normalizerootdir(dir, 'visitdir')
540 540 if self._prefix and dir in self._fileset:
541 541 return 'all'
542 542 return (dir in self._fileset or
543 543 dir in self._dirs or
544 544 any(parentdir in self._fileset
545 545 for parentdir in util.finddirs(dir)))
546 546
547 547 def visitchildrenset(self, dir):
548 548 ret = self.visitdir(dir)
549 549 if ret is True:
550 550 return 'this'
551 551 elif not ret:
552 552 return set()
553 553 assert ret == 'all'
554 554 return 'all'
555 555
556 556 def prefix(self):
557 557 return self._prefix
558 558
559 559 @encoding.strmethod
560 560 def __repr__(self):
561 561 return ('<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats))
562 562
563 563 # This is basically a reimplementation of util.dirs that stores the children
564 564 # instead of just a count of them, plus a small optional optimization to avoid
565 565 # some directories we don't need.
566 566 class _dirchildren(object):
567 567 def __init__(self, paths, onlyinclude=None):
568 568 self._dirs = {}
569 569 self._onlyinclude = onlyinclude or []
570 570 addpath = self.addpath
571 571 for f in paths:
572 572 addpath(f)
573 573
574 574 def addpath(self, path):
575 575 if path == '':
576 576 return
577 577 dirs = self._dirs
578 578 findsplitdirs = _dirchildren._findsplitdirs
579 579 for d, b in findsplitdirs(path):
580 580 if d not in self._onlyinclude:
581 581 continue
582 582 dirs.setdefault(d, set()).add(b)
583 583
584 584 @staticmethod
585 585 def _findsplitdirs(path):
586 586 # yields (dirname, basename) tuples, walking back to the root. This is
587 587 # very similar to util.finddirs, except:
588 588 # - produces a (dirname, basename) tuple, not just 'dirname'
589 589 # - includes root dir
590 590 # Unlike manifest._splittopdir, this does not suffix `dirname` with a
591 591 # slash.
592 592 oldpos = len(path)
593 593 pos = path.rfind('/')
594 594 while pos != -1:
595 595 yield path[:pos], path[pos + 1:oldpos]
596 596 oldpos = pos
597 597 pos = path.rfind('/', 0, pos)
598 598 yield '', path[:oldpos]
599 599
600 600 def get(self, path):
601 601 return self._dirs.get(path, set())
602 602
603 603 class includematcher(basematcher):
604 604
605 605 def __init__(self, root, kindpats, badfn=None):
606 606 super(includematcher, self).__init__(badfn)
607 607
608 608 self._pats, self.matchfn = _buildmatch(kindpats, '(?:/|$)', root)
609 609 self._prefix = _prefix(kindpats)
610 610 roots, dirs, parents = _rootsdirsandparents(kindpats)
611 611 # roots are directories which are recursively included.
612 612 self._roots = set(roots)
613 613 # dirs are directories which are non-recursively included.
614 614 self._dirs = set(dirs)
615 615 # parents are directories which are non-recursively included because
616 616 # they are needed to get to items in _dirs or _roots.
617 self._parents = set(parents)
617 self._parents = parents
618 618
619 619 def visitdir(self, dir):
620 620 dir = normalizerootdir(dir, 'visitdir')
621 621 if self._prefix and dir in self._roots:
622 622 return 'all'
623 623 return (dir in self._roots or
624 624 dir in self._dirs or
625 625 dir in self._parents or
626 626 any(parentdir in self._roots
627 627 for parentdir in util.finddirs(dir)))
628 628
629 629 @propertycache
630 630 def _allparentschildren(self):
631 631 # It may seem odd that we add dirs, roots, and parents, and then
632 632 # restrict to only parents. This is to catch the case of:
633 633 # dirs = ['foo/bar']
634 634 # parents = ['foo']
635 635 # if we asked for the children of 'foo', but had only added
636 636 # self._parents, we wouldn't be able to respond ['bar'].
637 637 return _dirchildren(
638 638 itertools.chain(self._dirs, self._roots, self._parents),
639 639 onlyinclude=self._parents)
640 640
641 641 def visitchildrenset(self, dir):
642 642 if self._prefix and dir in self._roots:
643 643 return 'all'
644 644 # Note: this does *not* include the 'dir in self._parents' case from
645 645 # visitdir, that's handled below.
646 646 if ('' in self._roots or
647 647 dir in self._roots or
648 648 dir in self._dirs or
649 649 any(parentdir in self._roots
650 650 for parentdir in util.finddirs(dir))):
651 651 return 'this'
652 652
653 653 if dir in self._parents:
654 654 return self._allparentschildren.get(dir) or set()
655 655 return set()
656 656
657 657 @encoding.strmethod
658 658 def __repr__(self):
659 659 return ('<includematcher includes=%r>' % pycompat.bytestr(self._pats))
660 660
661 661 class exactmatcher(basematcher):
662 662 r'''Matches the input files exactly. They are interpreted as paths, not
663 663 patterns (so no kind-prefixes).
664 664
665 665 >>> m = exactmatcher([b'a.txt', br're:.*\.c$'])
666 666 >>> m(b'a.txt')
667 667 True
668 668 >>> m(b'b.txt')
669 669 False
670 670
671 671 Input files that would be matched are exactly those returned by .files()
672 672 >>> m.files()
673 673 ['a.txt', 're:.*\\.c$']
674 674
675 675 So pattern 're:.*\.c$' is not considered as a regex, but as a file name
676 676 >>> m(b'main.c')
677 677 False
678 678 >>> m(br're:.*\.c$')
679 679 True
680 680 '''
681 681
682 682 def __init__(self, files, badfn=None):
683 683 super(exactmatcher, self).__init__(badfn)
684 684
685 685 if isinstance(files, list):
686 686 self._files = files
687 687 else:
688 688 self._files = list(files)
689 689
690 690 matchfn = basematcher.exact
691 691
692 692 @propertycache
693 693 def _dirs(self):
694 694 return set(util.dirs(self._fileset))
695 695
696 696 def visitdir(self, dir):
697 697 dir = normalizerootdir(dir, 'visitdir')
698 698 return dir in self._dirs
699 699
700 700 def visitchildrenset(self, dir):
701 701 dir = normalizerootdir(dir, 'visitchildrenset')
702 702
703 703 if not self._fileset or dir not in self._dirs:
704 704 return set()
705 705
706 706 candidates = self._fileset | self._dirs - {''}
707 707 if dir != '':
708 708 d = dir + '/'
709 709 candidates = set(c[len(d):] for c in candidates if
710 710 c.startswith(d))
711 711 # self._dirs includes all of the directories, recursively, so if
712 712 # we're attempting to match foo/bar/baz.txt, it'll have '', 'foo',
713 713 # 'foo/bar' in it. Thus we can safely ignore a candidate that has a
714 714 # '/' in it, indicating a it's for a subdir-of-a-subdir; the
715 715 # immediate subdir will be in there without a slash.
716 716 ret = {c for c in candidates if '/' not in c}
717 717 # We really do not expect ret to be empty, since that would imply that
718 718 # there's something in _dirs that didn't have a file in _fileset.
719 719 assert ret
720 720 return ret
721 721
722 722 def isexact(self):
723 723 return True
724 724
725 725 @encoding.strmethod
726 726 def __repr__(self):
727 727 return ('<exactmatcher files=%r>' % self._files)
728 728
729 729 class differencematcher(basematcher):
730 730 '''Composes two matchers by matching if the first matches and the second
731 731 does not.
732 732
733 733 The second matcher's non-matching-attributes (bad, explicitdir,
734 734 traversedir) are ignored.
735 735 '''
736 736 def __init__(self, m1, m2):
737 737 super(differencematcher, self).__init__()
738 738 self._m1 = m1
739 739 self._m2 = m2
740 740 self.bad = m1.bad
741 741 self.explicitdir = m1.explicitdir
742 742 self.traversedir = m1.traversedir
743 743
744 744 def matchfn(self, f):
745 745 return self._m1(f) and not self._m2(f)
746 746
747 747 @propertycache
748 748 def _files(self):
749 749 if self.isexact():
750 750 return [f for f in self._m1.files() if self(f)]
751 751 # If m1 is not an exact matcher, we can't easily figure out the set of
752 752 # files, because its files() are not always files. For example, if
753 753 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
754 754 # want to remove "dir" from the set even though it would match m2,
755 755 # because the "dir" in m1 may not be a file.
756 756 return self._m1.files()
757 757
758 758 def visitdir(self, dir):
759 759 if self._m2.visitdir(dir) == 'all':
760 760 return False
761 761 elif not self._m2.visitdir(dir):
762 762 # m2 does not match dir, we can return 'all' here if possible
763 763 return self._m1.visitdir(dir)
764 764 return bool(self._m1.visitdir(dir))
765 765
766 766 def visitchildrenset(self, dir):
767 767 m2_set = self._m2.visitchildrenset(dir)
768 768 if m2_set == 'all':
769 769 return set()
770 770 m1_set = self._m1.visitchildrenset(dir)
771 771 # Possible values for m1: 'all', 'this', set(...), set()
772 772 # Possible values for m2: 'this', set(...), set()
773 773 # If m2 has nothing under here that we care about, return m1, even if
774 774 # it's 'all'. This is a change in behavior from visitdir, which would
775 775 # return True, not 'all', for some reason.
776 776 if not m2_set:
777 777 return m1_set
778 778 if m1_set in ['all', 'this']:
779 779 # Never return 'all' here if m2_set is any kind of non-empty (either
780 780 # 'this' or set(foo)), since m2 might return set() for a
781 781 # subdirectory.
782 782 return 'this'
783 783 # Possible values for m1: set(...), set()
784 784 # Possible values for m2: 'this', set(...)
785 785 # We ignore m2's set results. They're possibly incorrect:
786 786 # m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset(''):
787 787 # m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd
788 788 # return set(), which is *not* correct, we still need to visit 'dir'!
789 789 return m1_set
790 790
791 791 def isexact(self):
792 792 return self._m1.isexact()
793 793
794 794 @encoding.strmethod
795 795 def __repr__(self):
796 796 return ('<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2))
797 797
798 798 def intersectmatchers(m1, m2):
799 799 '''Composes two matchers by matching if both of them match.
800 800
801 801 The second matcher's non-matching-attributes (bad, explicitdir,
802 802 traversedir) are ignored.
803 803 '''
804 804 if m1 is None or m2 is None:
805 805 return m1 or m2
806 806 if m1.always():
807 807 m = copy.copy(m2)
808 808 # TODO: Consider encapsulating these things in a class so there's only
809 809 # one thing to copy from m1.
810 810 m.bad = m1.bad
811 811 m.explicitdir = m1.explicitdir
812 812 m.traversedir = m1.traversedir
813 813 return m
814 814 if m2.always():
815 815 m = copy.copy(m1)
816 816 return m
817 817 return intersectionmatcher(m1, m2)
818 818
819 819 class intersectionmatcher(basematcher):
820 820 def __init__(self, m1, m2):
821 821 super(intersectionmatcher, self).__init__()
822 822 self._m1 = m1
823 823 self._m2 = m2
824 824 self.bad = m1.bad
825 825 self.explicitdir = m1.explicitdir
826 826 self.traversedir = m1.traversedir
827 827
828 828 @propertycache
829 829 def _files(self):
830 830 if self.isexact():
831 831 m1, m2 = self._m1, self._m2
832 832 if not m1.isexact():
833 833 m1, m2 = m2, m1
834 834 return [f for f in m1.files() if m2(f)]
835 835 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
836 836 # the set of files, because their files() are not always files. For
837 837 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
838 838 # "path:dir2", we don't want to remove "dir2" from the set.
839 839 return self._m1.files() + self._m2.files()
840 840
841 841 def matchfn(self, f):
842 842 return self._m1(f) and self._m2(f)
843 843
844 844 def visitdir(self, dir):
845 845 visit1 = self._m1.visitdir(dir)
846 846 if visit1 == 'all':
847 847 return self._m2.visitdir(dir)
848 848 # bool() because visit1=True + visit2='all' should not be 'all'
849 849 return bool(visit1 and self._m2.visitdir(dir))
850 850
851 851 def visitchildrenset(self, dir):
852 852 m1_set = self._m1.visitchildrenset(dir)
853 853 if not m1_set:
854 854 return set()
855 855 m2_set = self._m2.visitchildrenset(dir)
856 856 if not m2_set:
857 857 return set()
858 858
859 859 if m1_set == 'all':
860 860 return m2_set
861 861 elif m2_set == 'all':
862 862 return m1_set
863 863
864 864 if m1_set == 'this' or m2_set == 'this':
865 865 return 'this'
866 866
867 867 assert isinstance(m1_set, set) and isinstance(m2_set, set)
868 868 return m1_set.intersection(m2_set)
869 869
870 870 def always(self):
871 871 return self._m1.always() and self._m2.always()
872 872
873 873 def isexact(self):
874 874 return self._m1.isexact() or self._m2.isexact()
875 875
876 876 @encoding.strmethod
877 877 def __repr__(self):
878 878 return ('<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2))
879 879
880 880 class subdirmatcher(basematcher):
881 881 """Adapt a matcher to work on a subdirectory only.
882 882
883 883 The paths are remapped to remove/insert the path as needed:
884 884
885 885 >>> from . import pycompat
886 886 >>> m1 = match(b'root', b'', [b'a.txt', b'sub/b.txt'])
887 887 >>> m2 = subdirmatcher(b'sub', m1)
888 888 >>> m2(b'a.txt')
889 889 False
890 890 >>> m2(b'b.txt')
891 891 True
892 892 >>> m2.matchfn(b'a.txt')
893 893 False
894 894 >>> m2.matchfn(b'b.txt')
895 895 True
896 896 >>> m2.files()
897 897 ['b.txt']
898 898 >>> m2.exact(b'b.txt')
899 899 True
900 900 >>> def bad(f, msg):
901 901 ... print(pycompat.sysstr(b"%s: %s" % (f, msg)))
902 902 >>> m1.bad = bad
903 903 >>> m2.bad(b'x.txt', b'No such file')
904 904 sub/x.txt: No such file
905 905 """
906 906
907 907 def __init__(self, path, matcher):
908 908 super(subdirmatcher, self).__init__()
909 909 self._path = path
910 910 self._matcher = matcher
911 911 self._always = matcher.always()
912 912
913 913 self._files = [f[len(path) + 1:] for f in matcher._files
914 914 if f.startswith(path + "/")]
915 915
916 916 # If the parent repo had a path to this subrepo and the matcher is
917 917 # a prefix matcher, this submatcher always matches.
918 918 if matcher.prefix():
919 919 self._always = any(f == path for f in matcher._files)
920 920
921 921 def bad(self, f, msg):
922 922 self._matcher.bad(self._path + "/" + f, msg)
923 923
924 924 def matchfn(self, f):
925 925 # Some information is lost in the superclass's constructor, so we
926 926 # can not accurately create the matching function for the subdirectory
927 927 # from the inputs. Instead, we override matchfn() and visitdir() to
928 928 # call the original matcher with the subdirectory path prepended.
929 929 return self._matcher.matchfn(self._path + "/" + f)
930 930
931 931 def visitdir(self, dir):
932 932 dir = normalizerootdir(dir, 'visitdir')
933 933 if dir == '':
934 934 dir = self._path
935 935 else:
936 936 dir = self._path + "/" + dir
937 937 return self._matcher.visitdir(dir)
938 938
939 939 def visitchildrenset(self, dir):
940 940 dir = normalizerootdir(dir, 'visitchildrenset')
941 941 if dir == '':
942 942 dir = self._path
943 943 else:
944 944 dir = self._path + "/" + dir
945 945 return self._matcher.visitchildrenset(dir)
946 946
947 947 def always(self):
948 948 return self._always
949 949
950 950 def prefix(self):
951 951 return self._matcher.prefix() and not self._always
952 952
953 953 @encoding.strmethod
954 954 def __repr__(self):
955 955 return ('<subdirmatcher path=%r, matcher=%r>' %
956 956 (self._path, self._matcher))
957 957
958 958 class prefixdirmatcher(basematcher):
959 959 """Adapt a matcher to work on a parent directory.
960 960
961 961 The matcher's non-matching-attributes (bad, explicitdir, traversedir) are
962 962 ignored.
963 963
964 964 The prefix path should usually be the relative path from the root of
965 965 this matcher to the root of the wrapped matcher.
966 966
967 967 >>> m1 = match(util.localpath(b'root/d/e'), b'f', [b'../a.txt', b'b.txt'])
968 968 >>> m2 = prefixdirmatcher(b'd/e', m1)
969 969 >>> m2(b'a.txt')
970 970 False
971 971 >>> m2(b'd/e/a.txt')
972 972 True
973 973 >>> m2(b'd/e/b.txt')
974 974 False
975 975 >>> m2.files()
976 976 ['d/e/a.txt', 'd/e/f/b.txt']
977 977 >>> m2.exact(b'd/e/a.txt')
978 978 True
979 979 >>> m2.visitdir(b'd')
980 980 True
981 981 >>> m2.visitdir(b'd/e')
982 982 True
983 983 >>> m2.visitdir(b'd/e/f')
984 984 True
985 985 >>> m2.visitdir(b'd/e/g')
986 986 False
987 987 >>> m2.visitdir(b'd/ef')
988 988 False
989 989 """
990 990
991 991 def __init__(self, path, matcher, badfn=None):
992 992 super(prefixdirmatcher, self).__init__(badfn)
993 993 if not path:
994 994 raise error.ProgrammingError('prefix path must not be empty')
995 995 self._path = path
996 996 self._pathprefix = path + '/'
997 997 self._matcher = matcher
998 998
999 999 @propertycache
1000 1000 def _files(self):
1001 1001 return [self._pathprefix + f for f in self._matcher._files]
1002 1002
1003 1003 def matchfn(self, f):
1004 1004 if not f.startswith(self._pathprefix):
1005 1005 return False
1006 1006 return self._matcher.matchfn(f[len(self._pathprefix):])
1007 1007
1008 1008 @propertycache
1009 1009 def _pathdirs(self):
1010 1010 return set(util.finddirs(self._path))
1011 1011
1012 1012 def visitdir(self, dir):
1013 1013 if dir == self._path:
1014 1014 return self._matcher.visitdir('')
1015 1015 if dir.startswith(self._pathprefix):
1016 1016 return self._matcher.visitdir(dir[len(self._pathprefix):])
1017 1017 return dir in self._pathdirs
1018 1018
1019 1019 def visitchildrenset(self, dir):
1020 1020 if dir == self._path:
1021 1021 return self._matcher.visitchildrenset('')
1022 1022 if dir.startswith(self._pathprefix):
1023 1023 return self._matcher.visitchildrenset(dir[len(self._pathprefix):])
1024 1024 if dir in self._pathdirs:
1025 1025 return 'this'
1026 1026 return set()
1027 1027
1028 1028 def isexact(self):
1029 1029 return self._matcher.isexact()
1030 1030
1031 1031 def prefix(self):
1032 1032 return self._matcher.prefix()
1033 1033
1034 1034 @encoding.strmethod
1035 1035 def __repr__(self):
1036 1036 return ('<prefixdirmatcher path=%r, matcher=%r>'
1037 1037 % (pycompat.bytestr(self._path), self._matcher))
1038 1038
1039 1039 class unionmatcher(basematcher):
1040 1040 """A matcher that is the union of several matchers.
1041 1041
1042 1042 The non-matching-attributes (bad, explicitdir, traversedir) are taken from
1043 1043 the first matcher.
1044 1044 """
1045 1045
1046 1046 def __init__(self, matchers):
1047 1047 m1 = matchers[0]
1048 1048 super(unionmatcher, self).__init__()
1049 1049 self.explicitdir = m1.explicitdir
1050 1050 self.traversedir = m1.traversedir
1051 1051 self._matchers = matchers
1052 1052
1053 1053 def matchfn(self, f):
1054 1054 for match in self._matchers:
1055 1055 if match(f):
1056 1056 return True
1057 1057 return False
1058 1058
1059 1059 def visitdir(self, dir):
1060 1060 r = False
1061 1061 for m in self._matchers:
1062 1062 v = m.visitdir(dir)
1063 1063 if v == 'all':
1064 1064 return v
1065 1065 r |= v
1066 1066 return r
1067 1067
1068 1068 def visitchildrenset(self, dir):
1069 1069 r = set()
1070 1070 this = False
1071 1071 for m in self._matchers:
1072 1072 v = m.visitchildrenset(dir)
1073 1073 if not v:
1074 1074 continue
1075 1075 if v == 'all':
1076 1076 return v
1077 1077 if this or v == 'this':
1078 1078 this = True
1079 1079 # don't break, we might have an 'all' in here.
1080 1080 continue
1081 1081 assert isinstance(v, set)
1082 1082 r = r.union(v)
1083 1083 if this:
1084 1084 return 'this'
1085 1085 return r
1086 1086
1087 1087 @encoding.strmethod
1088 1088 def __repr__(self):
1089 1089 return ('<unionmatcher matchers=%r>' % self._matchers)
1090 1090
1091 1091 def patkind(pattern, default=None):
1092 1092 '''If pattern is 'kind:pat' with a known kind, return kind.
1093 1093
1094 1094 >>> patkind(br're:.*\.c$')
1095 1095 're'
1096 1096 >>> patkind(b'glob:*.c')
1097 1097 'glob'
1098 1098 >>> patkind(b'relpath:test.py')
1099 1099 'relpath'
1100 1100 >>> patkind(b'main.py')
1101 1101 >>> patkind(b'main.py', default=b're')
1102 1102 're'
1103 1103 '''
1104 1104 return _patsplit(pattern, default)[0]
1105 1105
1106 1106 def _patsplit(pattern, default):
1107 1107 """Split a string into the optional pattern kind prefix and the actual
1108 1108 pattern."""
1109 1109 if ':' in pattern:
1110 1110 kind, pat = pattern.split(':', 1)
1111 1111 if kind in allpatternkinds:
1112 1112 return kind, pat
1113 1113 return default, pattern
1114 1114
1115 1115 def _globre(pat):
1116 1116 r'''Convert an extended glob string to a regexp string.
1117 1117
1118 1118 >>> from . import pycompat
1119 1119 >>> def bprint(s):
1120 1120 ... print(pycompat.sysstr(s))
1121 1121 >>> bprint(_globre(br'?'))
1122 1122 .
1123 1123 >>> bprint(_globre(br'*'))
1124 1124 [^/]*
1125 1125 >>> bprint(_globre(br'**'))
1126 1126 .*
1127 1127 >>> bprint(_globre(br'**/a'))
1128 1128 (?:.*/)?a
1129 1129 >>> bprint(_globre(br'a/**/b'))
1130 1130 a/(?:.*/)?b
1131 1131 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
1132 1132 [a*?!^][\^b][^c]
1133 1133 >>> bprint(_globre(br'{a,b}'))
1134 1134 (?:a|b)
1135 1135 >>> bprint(_globre(br'.\*\?'))
1136 1136 \.\*\?
1137 1137 '''
1138 1138 i, n = 0, len(pat)
1139 1139 res = ''
1140 1140 group = 0
1141 1141 escape = util.stringutil.regexbytesescapemap.get
1142 1142 def peek():
1143 1143 return i < n and pat[i:i + 1]
1144 1144 while i < n:
1145 1145 c = pat[i:i + 1]
1146 1146 i += 1
1147 1147 if c not in '*?[{},\\':
1148 1148 res += escape(c, c)
1149 1149 elif c == '*':
1150 1150 if peek() == '*':
1151 1151 i += 1
1152 1152 if peek() == '/':
1153 1153 i += 1
1154 1154 res += '(?:.*/)?'
1155 1155 else:
1156 1156 res += '.*'
1157 1157 else:
1158 1158 res += '[^/]*'
1159 1159 elif c == '?':
1160 1160 res += '.'
1161 1161 elif c == '[':
1162 1162 j = i
1163 1163 if j < n and pat[j:j + 1] in '!]':
1164 1164 j += 1
1165 1165 while j < n and pat[j:j + 1] != ']':
1166 1166 j += 1
1167 1167 if j >= n:
1168 1168 res += '\\['
1169 1169 else:
1170 1170 stuff = pat[i:j].replace('\\','\\\\')
1171 1171 i = j + 1
1172 1172 if stuff[0:1] == '!':
1173 1173 stuff = '^' + stuff[1:]
1174 1174 elif stuff[0:1] == '^':
1175 1175 stuff = '\\' + stuff
1176 1176 res = '%s[%s]' % (res, stuff)
1177 1177 elif c == '{':
1178 1178 group += 1
1179 1179 res += '(?:'
1180 1180 elif c == '}' and group:
1181 1181 res += ')'
1182 1182 group -= 1
1183 1183 elif c == ',' and group:
1184 1184 res += '|'
1185 1185 elif c == '\\':
1186 1186 p = peek()
1187 1187 if p:
1188 1188 i += 1
1189 1189 res += escape(p, p)
1190 1190 else:
1191 1191 res += escape(c, c)
1192 1192 else:
1193 1193 res += escape(c, c)
1194 1194 return res
1195 1195
1196 1196 def _regex(kind, pat, globsuffix):
1197 1197 '''Convert a (normalized) pattern of any kind into a
1198 1198 regular expression.
1199 1199 globsuffix is appended to the regexp of globs.'''
1200 1200
1201 1201 if rustext is not None:
1202 1202 try:
1203 1203 return rustext.filepatterns.build_single_regex(
1204 1204 kind,
1205 1205 pat,
1206 1206 globsuffix
1207 1207 )
1208 1208 except rustext.filepatterns.PatternError:
1209 1209 raise error.ProgrammingError(
1210 1210 'not a regex pattern: %s:%s' % (kind, pat)
1211 1211 )
1212 1212
1213 1213 if not pat and kind in ('glob', 'relpath'):
1214 1214 return ''
1215 1215 if kind == 're':
1216 1216 return pat
1217 1217 if kind in ('path', 'relpath'):
1218 1218 if pat == '.':
1219 1219 return ''
1220 1220 return util.stringutil.reescape(pat) + '(?:/|$)'
1221 1221 if kind == 'rootfilesin':
1222 1222 if pat == '.':
1223 1223 escaped = ''
1224 1224 else:
1225 1225 # Pattern is a directory name.
1226 1226 escaped = util.stringutil.reescape(pat) + '/'
1227 1227 # Anything after the pattern must be a non-directory.
1228 1228 return escaped + '[^/]+$'
1229 1229 if kind == 'relglob':
1230 1230 return '(?:|.*/)' + _globre(pat) + globsuffix
1231 1231 if kind == 'relre':
1232 1232 if pat.startswith('^'):
1233 1233 return pat
1234 1234 return '.*' + pat
1235 1235 if kind in ('glob', 'rootglob'):
1236 1236 return _globre(pat) + globsuffix
1237 1237 raise error.ProgrammingError('not a regex pattern: %s:%s' % (kind, pat))
1238 1238
1239 1239 def _buildmatch(kindpats, globsuffix, root):
1240 1240 '''Return regexp string and a matcher function for kindpats.
1241 1241 globsuffix is appended to the regexp of globs.'''
1242 1242 matchfuncs = []
1243 1243
1244 1244 subincludes, kindpats = _expandsubinclude(kindpats, root)
1245 1245 if subincludes:
1246 1246 submatchers = {}
1247 1247 def matchsubinclude(f):
1248 1248 for prefix, matcherargs in subincludes:
1249 1249 if f.startswith(prefix):
1250 1250 mf = submatchers.get(prefix)
1251 1251 if mf is None:
1252 1252 mf = match(*matcherargs)
1253 1253 submatchers[prefix] = mf
1254 1254
1255 1255 if mf(f[len(prefix):]):
1256 1256 return True
1257 1257 return False
1258 1258 matchfuncs.append(matchsubinclude)
1259 1259
1260 1260 regex = ''
1261 1261 if kindpats:
1262 1262 if all(k == 'rootfilesin' for k, p, s in kindpats):
1263 1263 dirs = {p for k, p, s in kindpats}
1264 1264 def mf(f):
1265 1265 i = f.rfind('/')
1266 1266 if i >= 0:
1267 1267 dir = f[:i]
1268 1268 else:
1269 1269 dir = '.'
1270 1270 return dir in dirs
1271 1271 regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))
1272 1272 matchfuncs.append(mf)
1273 1273 else:
1274 1274 regex, mf = _buildregexmatch(kindpats, globsuffix)
1275 1275 matchfuncs.append(mf)
1276 1276
1277 1277 if len(matchfuncs) == 1:
1278 1278 return regex, matchfuncs[0]
1279 1279 else:
1280 1280 return regex, lambda f: any(mf(f) for mf in matchfuncs)
1281 1281
1282 1282 MAX_RE_SIZE = 20000
1283 1283
1284 1284 def _joinregexes(regexps):
1285 1285 """gather multiple regular expressions into a single one"""
1286 1286 return '|'.join(regexps)
1287 1287
1288 1288 def _buildregexmatch(kindpats, globsuffix):
1289 1289 """Build a match function from a list of kinds and kindpats,
1290 1290 return regexp string and a matcher function.
1291 1291
1292 1292 Test too large input
1293 1293 >>> _buildregexmatch([
1294 1294 ... (b'relglob', b'?' * MAX_RE_SIZE, b'')
1295 1295 ... ], b'$')
1296 1296 Traceback (most recent call last):
1297 1297 ...
1298 1298 Abort: matcher pattern is too long (20009 bytes)
1299 1299 """
1300 1300 try:
1301 1301 allgroups = []
1302 1302 regexps = [_regex(k, p, globsuffix) for (k, p, s) in kindpats]
1303 1303 fullregexp = _joinregexes(regexps)
1304 1304
1305 1305 startidx = 0
1306 1306 groupsize = 0
1307 1307 for idx, r in enumerate(regexps):
1308 1308 piecesize = len(r)
1309 1309 if piecesize > MAX_RE_SIZE:
1310 1310 msg = _("matcher pattern is too long (%d bytes)") % piecesize
1311 1311 raise error.Abort(msg)
1312 1312 elif (groupsize + piecesize) > MAX_RE_SIZE:
1313 1313 group = regexps[startidx:idx]
1314 1314 allgroups.append(_joinregexes(group))
1315 1315 startidx = idx
1316 1316 groupsize = 0
1317 1317 groupsize += piecesize + 1
1318 1318
1319 1319 if startidx == 0:
1320 1320 matcher = _rematcher(fullregexp)
1321 1321 func = lambda s: bool(matcher(s))
1322 1322 else:
1323 1323 group = regexps[startidx:]
1324 1324 allgroups.append(_joinregexes(group))
1325 1325 allmatchers = [_rematcher(g) for g in allgroups]
1326 1326 func = lambda s: any(m(s) for m in allmatchers)
1327 1327 return fullregexp, func
1328 1328 except re.error:
1329 1329 for k, p, s in kindpats:
1330 1330 try:
1331 1331 _rematcher(_regex(k, p, globsuffix))
1332 1332 except re.error:
1333 1333 if s:
1334 1334 raise error.Abort(_("%s: invalid pattern (%s): %s") %
1335 1335 (s, k, p))
1336 1336 else:
1337 1337 raise error.Abort(_("invalid pattern (%s): %s") % (k, p))
1338 1338 raise error.Abort(_("invalid pattern"))
1339 1339
1340 1340 def _patternrootsanddirs(kindpats):
1341 1341 '''Returns roots and directories corresponding to each pattern.
1342 1342
1343 1343 This calculates the roots and directories exactly matching the patterns and
1344 1344 returns a tuple of (roots, dirs) for each. It does not return other
1345 1345 directories which may also need to be considered, like the parent
1346 1346 directories.
1347 1347 '''
1348 1348 r = []
1349 1349 d = []
1350 1350 for kind, pat, source in kindpats:
1351 1351 if kind in ('glob', 'rootglob'): # find the non-glob prefix
1352 1352 root = []
1353 1353 for p in pat.split('/'):
1354 1354 if '[' in p or '{' in p or '*' in p or '?' in p:
1355 1355 break
1356 1356 root.append(p)
1357 1357 r.append('/'.join(root))
1358 1358 elif kind in ('relpath', 'path'):
1359 1359 if pat == '.':
1360 1360 pat = ''
1361 1361 r.append(pat)
1362 1362 elif kind in ('rootfilesin',):
1363 1363 if pat == '.':
1364 1364 pat = ''
1365 1365 d.append(pat)
1366 1366 else: # relglob, re, relre
1367 1367 r.append('')
1368 1368 return r, d
1369 1369
1370 1370 def _roots(kindpats):
1371 1371 '''Returns root directories to match recursively from the given patterns.'''
1372 1372 roots, dirs = _patternrootsanddirs(kindpats)
1373 1373 return roots
1374 1374
1375 1375 def _rootsdirsandparents(kindpats):
1376 1376 '''Returns roots and exact directories from patterns.
1377 1377
1378 1378 `roots` are directories to match recursively, `dirs` should
1379 1379 be matched non-recursively, and `parents` are the implicitly required
1380 1380 directories to walk to items in either roots or dirs.
1381 1381
1382 1382 Returns a tuple of (roots, dirs, parents).
1383 1383
1384 1384 >>> _rootsdirsandparents(
1385 1385 ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
1386 1386 ... (b'glob', b'g*', b'')])
1387 (['g/h', 'g/h', ''], [], ['', 'g'])
1387 (['g/h', 'g/h', ''], [], set(['', 'g']))
1388 1388 >>> _rootsdirsandparents(
1389 1389 ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
1390 ([], ['g/h', ''], ['', 'g'])
1390 ([], ['g/h', ''], set(['', 'g']))
1391 1391 >>> _rootsdirsandparents(
1392 1392 ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
1393 1393 ... (b'path', b'', b'')])
1394 (['r', 'p/p', ''], [], ['', 'p'])
1394 (['r', 'p/p', ''], [], set(['', 'p']))
1395 1395 >>> _rootsdirsandparents(
1396 1396 ... [(b'relglob', b'rg*', b''), (b're', b're/', b''),
1397 1397 ... (b'relre', b'rr', b'')])
1398 (['', '', ''], [], [''])
1398 (['', '', ''], [], set(['']))
1399 1399 '''
1400 1400 r, d = _patternrootsanddirs(kindpats)
1401 1401
1402 p = []
1403 # Append the parents as non-recursive/exact directories, since they must be
1402 p = set()
1403 # Add the parents as non-recursive/exact directories, since they must be
1404 1404 # scanned to get to either the roots or the other exact directories.
1405 p.extend(util.dirs(d))
1406 p.extend(util.dirs(r))
1405 p.update(util.dirs(d))
1406 p.update(util.dirs(r))
1407 1407
1408 1408 # FIXME: all uses of this function convert these to sets, do so before
1409 1409 # returning.
1410 1410 # FIXME: all uses of this function do not need anything in 'roots' and
1411 1411 # 'dirs' to also be in 'parents', consider removing them before returning.
1412 1412 return r, d, p
1413 1413
1414 1414 def _explicitfiles(kindpats):
1415 1415 '''Returns the potential explicit filenames from the patterns.
1416 1416
1417 1417 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
1418 1418 ['foo/bar']
1419 1419 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
1420 1420 []
1421 1421 '''
1422 1422 # Keep only the pattern kinds where one can specify filenames (vs only
1423 1423 # directory names).
1424 1424 filable = [kp for kp in kindpats if kp[0] not in ('rootfilesin',)]
1425 1425 return _roots(filable)
1426 1426
1427 1427 def _prefix(kindpats):
1428 1428 '''Whether all the patterns match a prefix (i.e. recursively)'''
1429 1429 for kind, pat, source in kindpats:
1430 1430 if kind not in ('path', 'relpath'):
1431 1431 return False
1432 1432 return True
1433 1433
1434 1434 _commentre = None
1435 1435
1436 1436 def readpatternfile(filepath, warn, sourceinfo=False):
1437 1437 '''parse a pattern file, returning a list of
1438 1438 patterns. These patterns should be given to compile()
1439 1439 to be validated and converted into a match function.
1440 1440
1441 1441 trailing white space is dropped.
1442 1442 the escape character is backslash.
1443 1443 comments start with #.
1444 1444 empty lines are skipped.
1445 1445
1446 1446 lines can be of the following formats:
1447 1447
1448 1448 syntax: regexp # defaults following lines to non-rooted regexps
1449 1449 syntax: glob # defaults following lines to non-rooted globs
1450 1450 re:pattern # non-rooted regular expression
1451 1451 glob:pattern # non-rooted glob
1452 1452 rootglob:pat # rooted glob (same root as ^ in regexps)
1453 1453 pattern # pattern of the current default type
1454 1454
1455 1455 if sourceinfo is set, returns a list of tuples:
1456 1456 (pattern, lineno, originalline).
1457 1457 This is useful to debug ignore patterns.
1458 1458 '''
1459 1459
1460 1460 if rustext is not None:
1461 1461 result, warnings = rustext.filepatterns.read_pattern_file(
1462 1462 filepath,
1463 1463 bool(warn),
1464 1464 sourceinfo,
1465 1465 )
1466 1466
1467 1467 for warning_params in warnings:
1468 1468 # Can't be easily emitted from Rust, because it would require
1469 1469 # a mechanism for both gettext and calling the `warn` function.
1470 1470 warn(_("%s: ignoring invalid syntax '%s'\n") % warning_params)
1471 1471
1472 1472 return result
1473 1473
1474 1474 syntaxes = {
1475 1475 're': 'relre:',
1476 1476 'regexp': 'relre:',
1477 1477 'glob': 'relglob:',
1478 1478 'rootglob': 'rootglob:',
1479 1479 'include': 'include',
1480 1480 'subinclude': 'subinclude',
1481 1481 }
1482 1482 syntax = 'relre:'
1483 1483 patterns = []
1484 1484
1485 1485 fp = open(filepath, 'rb')
1486 1486 for lineno, line in enumerate(util.iterfile(fp), start=1):
1487 1487 if "#" in line:
1488 1488 global _commentre
1489 1489 if not _commentre:
1490 1490 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
1491 1491 # remove comments prefixed by an even number of escapes
1492 1492 m = _commentre.search(line)
1493 1493 if m:
1494 1494 line = line[:m.end(1)]
1495 1495 # fixup properly escaped comments that survived the above
1496 1496 line = line.replace("\\#", "#")
1497 1497 line = line.rstrip()
1498 1498 if not line:
1499 1499 continue
1500 1500
1501 1501 if line.startswith('syntax:'):
1502 1502 s = line[7:].strip()
1503 1503 try:
1504 1504 syntax = syntaxes[s]
1505 1505 except KeyError:
1506 1506 if warn:
1507 1507 warn(_("%s: ignoring invalid syntax '%s'\n") %
1508 1508 (filepath, s))
1509 1509 continue
1510 1510
1511 1511 linesyntax = syntax
1512 1512 for s, rels in syntaxes.iteritems():
1513 1513 if line.startswith(rels):
1514 1514 linesyntax = rels
1515 1515 line = line[len(rels):]
1516 1516 break
1517 1517 elif line.startswith(s+':'):
1518 1518 linesyntax = rels
1519 1519 line = line[len(s) + 1:]
1520 1520 break
1521 1521 if sourceinfo:
1522 1522 patterns.append((linesyntax + line, lineno, line))
1523 1523 else:
1524 1524 patterns.append(linesyntax + line)
1525 1525 fp.close()
1526 1526 return patterns
General Comments 0
You need to be logged in to leave comments. Login now