##// END OF EJS Templates
match: complete documentation of match() parameters
Denis Laxalde -
r42252:bee16475 default
parent child Browse files
Show More
@@ -1,1429 +1,1432
1 1 # match.py - filename matching
2 2 #
3 3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import, print_function
9 9
10 10 import copy
11 11 import itertools
12 12 import os
13 13 import re
14 14
15 15 from .i18n import _
16 16 from . import (
17 17 encoding,
18 18 error,
19 19 pathutil,
20 20 pycompat,
21 21 util,
22 22 )
23 23 from .utils import (
24 24 stringutil,
25 25 )
26 26
27 27 allpatternkinds = ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
28 28 'rootglob',
29 29 'listfile', 'listfile0', 'set', 'include', 'subinclude',
30 30 'rootfilesin')
31 31 cwdrelativepatternkinds = ('relpath', 'glob')
32 32
33 33 propertycache = util.propertycache
34 34
35 35 def _rematcher(regex):
36 36 '''compile the regexp with the best available regexp engine and return a
37 37 matcher function'''
38 38 m = util.re.compile(regex)
39 39 try:
40 40 # slightly faster, provided by facebook's re2 bindings
41 41 return m.test_match
42 42 except AttributeError:
43 43 return m.match
44 44
45 45 def _expandsets(kindpats, ctx, listsubrepos, badfn):
46 46 '''Returns the kindpats list with the 'set' patterns expanded to matchers'''
47 47 matchers = []
48 48 other = []
49 49
50 50 for kind, pat, source in kindpats:
51 51 if kind == 'set':
52 52 if ctx is None:
53 53 raise error.ProgrammingError("fileset expression with no "
54 54 "context")
55 55 matchers.append(ctx.matchfileset(pat, badfn=badfn))
56 56
57 57 if listsubrepos:
58 58 for subpath in ctx.substate:
59 59 sm = ctx.sub(subpath).matchfileset(pat, badfn=badfn)
60 60 pm = prefixdirmatcher(subpath, sm, badfn=badfn)
61 61 matchers.append(pm)
62 62
63 63 continue
64 64 other.append((kind, pat, source))
65 65 return matchers, other
66 66
67 67 def _expandsubinclude(kindpats, root):
68 68 '''Returns the list of subinclude matcher args and the kindpats without the
69 69 subincludes in it.'''
70 70 relmatchers = []
71 71 other = []
72 72
73 73 for kind, pat, source in kindpats:
74 74 if kind == 'subinclude':
75 75 sourceroot = pathutil.dirname(util.normpath(source))
76 76 pat = util.pconvert(pat)
77 77 path = pathutil.join(sourceroot, pat)
78 78
79 79 newroot = pathutil.dirname(path)
80 80 matcherargs = (newroot, '', [], ['include:%s' % path])
81 81
82 82 prefix = pathutil.canonpath(root, root, newroot)
83 83 if prefix:
84 84 prefix += '/'
85 85 relmatchers.append((prefix, matcherargs))
86 86 else:
87 87 other.append((kind, pat, source))
88 88
89 89 return relmatchers, other
90 90
91 91 def _kindpatsalwaysmatch(kindpats):
92 92 """"Checks whether the kindspats match everything, as e.g.
93 93 'relpath:.' does.
94 94 """
95 95 for kind, pat, source in kindpats:
96 96 if pat != '' or kind not in ['relpath', 'glob']:
97 97 return False
98 98 return True
99 99
100 100 def _buildkindpatsmatcher(matchercls, root, kindpats, ctx=None,
101 101 listsubrepos=False, badfn=None):
102 102 matchers = []
103 103 fms, kindpats = _expandsets(kindpats, ctx=ctx,
104 104 listsubrepos=listsubrepos, badfn=badfn)
105 105 if kindpats:
106 106 m = matchercls(root, kindpats, badfn=badfn)
107 107 matchers.append(m)
108 108 if fms:
109 109 matchers.extend(fms)
110 110 if not matchers:
111 111 return nevermatcher(badfn=badfn)
112 112 if len(matchers) == 1:
113 113 return matchers[0]
114 114 return unionmatcher(matchers)
115 115
116 116 def match(root, cwd, patterns=None, include=None, exclude=None, default='glob',
117 117 auditor=None, ctx=None, listsubrepos=False, warn=None,
118 118 badfn=None, icasefs=False):
119 119 """build an object to match a set of file patterns
120 120
121 121 arguments:
122 122 root - the canonical root of the tree you're matching against
123 123 cwd - the current working directory, if relevant
124 124 patterns - patterns to find
125 125 include - patterns to include (unless they are excluded)
126 126 exclude - patterns to exclude (even if they are included)
127 127 default - if a pattern in patterns has no explicit type, assume this one
128 auditor - optional path auditor
129 ctx - optional changecontext
130 listsubrepos - if True, recurse into subrepositories
128 131 warn - optional function used for printing warnings
129 132 badfn - optional bad() callback for this matcher instead of the default
130 133 icasefs - make a matcher for wdir on case insensitive filesystems, which
131 134 normalizes the given patterns to the case in the filesystem
132 135
133 136 a pattern is one of:
134 137 'glob:<glob>' - a glob relative to cwd
135 138 're:<regexp>' - a regular expression
136 139 'path:<path>' - a path relative to repository root, which is matched
137 140 recursively
138 141 'rootfilesin:<path>' - a path relative to repository root, which is
139 142 matched non-recursively (will not match subdirectories)
140 143 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
141 144 'relpath:<path>' - a path relative to cwd
142 145 'relre:<regexp>' - a regexp that needn't match the start of a name
143 146 'set:<fileset>' - a fileset expression
144 147 'include:<path>' - a file of patterns to read and include
145 148 'subinclude:<path>' - a file of patterns to match against files under
146 149 the same directory
147 150 '<something>' - a pattern of the specified default type
148 151 """
149 152 normalize = _donormalize
150 153 if icasefs:
151 154 dirstate = ctx.repo().dirstate
152 155 dsnormalize = dirstate.normalize
153 156
154 157 def normalize(patterns, default, root, cwd, auditor, warn):
155 158 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
156 159 kindpats = []
157 160 for kind, pats, source in kp:
158 161 if kind not in ('re', 'relre'): # regex can't be normalized
159 162 p = pats
160 163 pats = dsnormalize(pats)
161 164
162 165 # Preserve the original to handle a case only rename.
163 166 if p != pats and p in dirstate:
164 167 kindpats.append((kind, p, source))
165 168
166 169 kindpats.append((kind, pats, source))
167 170 return kindpats
168 171
169 172 if patterns:
170 173 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
171 174 if _kindpatsalwaysmatch(kindpats):
172 175 m = alwaysmatcher(badfn)
173 176 else:
174 177 m = _buildkindpatsmatcher(patternmatcher, root, kindpats, ctx=ctx,
175 178 listsubrepos=listsubrepos, badfn=badfn)
176 179 else:
177 180 # It's a little strange that no patterns means to match everything.
178 181 # Consider changing this to match nothing (probably using nevermatcher).
179 182 m = alwaysmatcher(badfn)
180 183
181 184 if include:
182 185 kindpats = normalize(include, 'glob', root, cwd, auditor, warn)
183 186 im = _buildkindpatsmatcher(includematcher, root, kindpats, ctx=ctx,
184 187 listsubrepos=listsubrepos, badfn=None)
185 188 m = intersectmatchers(m, im)
186 189 if exclude:
187 190 kindpats = normalize(exclude, 'glob', root, cwd, auditor, warn)
188 191 em = _buildkindpatsmatcher(includematcher, root, kindpats, ctx=ctx,
189 192 listsubrepos=listsubrepos, badfn=None)
190 193 m = differencematcher(m, em)
191 194 return m
192 195
193 196 def exact(files, badfn=None):
194 197 return exactmatcher(files, badfn=badfn)
195 198
196 199 def always(badfn=None):
197 200 return alwaysmatcher(badfn)
198 201
199 202 def never(badfn=None):
200 203 return nevermatcher(badfn)
201 204
202 205 def badmatch(match, badfn):
203 206 """Make a copy of the given matcher, replacing its bad method with the given
204 207 one.
205 208 """
206 209 m = copy.copy(match)
207 210 m.bad = badfn
208 211 return m
209 212
210 213 def _donormalize(patterns, default, root, cwd, auditor, warn):
211 214 '''Convert 'kind:pat' from the patterns list to tuples with kind and
212 215 normalized and rooted patterns and with listfiles expanded.'''
213 216 kindpats = []
214 217 for kind, pat in [_patsplit(p, default) for p in patterns]:
215 218 if kind in cwdrelativepatternkinds:
216 219 pat = pathutil.canonpath(root, cwd, pat, auditor)
217 220 elif kind in ('relglob', 'path', 'rootfilesin', 'rootglob'):
218 221 pat = util.normpath(pat)
219 222 elif kind in ('listfile', 'listfile0'):
220 223 try:
221 224 files = util.readfile(pat)
222 225 if kind == 'listfile0':
223 226 files = files.split('\0')
224 227 else:
225 228 files = files.splitlines()
226 229 files = [f for f in files if f]
227 230 except EnvironmentError:
228 231 raise error.Abort(_("unable to read file list (%s)") % pat)
229 232 for k, p, source in _donormalize(files, default, root, cwd,
230 233 auditor, warn):
231 234 kindpats.append((k, p, pat))
232 235 continue
233 236 elif kind == 'include':
234 237 try:
235 238 fullpath = os.path.join(root, util.localpath(pat))
236 239 includepats = readpatternfile(fullpath, warn)
237 240 for k, p, source in _donormalize(includepats, default,
238 241 root, cwd, auditor, warn):
239 242 kindpats.append((k, p, source or pat))
240 243 except error.Abort as inst:
241 244 raise error.Abort('%s: %s' % (pat, inst[0]))
242 245 except IOError as inst:
243 246 if warn:
244 247 warn(_("skipping unreadable pattern file '%s': %s\n") %
245 248 (pat, stringutil.forcebytestr(inst.strerror)))
246 249 continue
247 250 # else: re or relre - which cannot be normalized
248 251 kindpats.append((kind, pat, ''))
249 252 return kindpats
250 253
251 254 class basematcher(object):
252 255
253 256 def __init__(self, badfn=None):
254 257 if badfn is not None:
255 258 self.bad = badfn
256 259
257 260 def __call__(self, fn):
258 261 return self.matchfn(fn)
259 262 def __iter__(self):
260 263 for f in self._files:
261 264 yield f
262 265 # Callbacks related to how the matcher is used by dirstate.walk.
263 266 # Subscribers to these events must monkeypatch the matcher object.
264 267 def bad(self, f, msg):
265 268 '''Callback from dirstate.walk for each explicit file that can't be
266 269 found/accessed, with an error message.'''
267 270
268 271 # If an explicitdir is set, it will be called when an explicitly listed
269 272 # directory is visited.
270 273 explicitdir = None
271 274
272 275 # If an traversedir is set, it will be called when a directory discovered
273 276 # by recursive traversal is visited.
274 277 traversedir = None
275 278
276 279 @propertycache
277 280 def _files(self):
278 281 return []
279 282
280 283 def files(self):
281 284 '''Explicitly listed files or patterns or roots:
282 285 if no patterns or .always(): empty list,
283 286 if exact: list exact files,
284 287 if not .anypats(): list all files and dirs,
285 288 else: optimal roots'''
286 289 return self._files
287 290
288 291 @propertycache
289 292 def _fileset(self):
290 293 return set(self._files)
291 294
292 295 def exact(self, f):
293 296 '''Returns True if f is in .files().'''
294 297 return f in self._fileset
295 298
296 299 def matchfn(self, f):
297 300 return False
298 301
299 302 def visitdir(self, dir):
300 303 '''Decides whether a directory should be visited based on whether it
301 304 has potential matches in it or one of its subdirectories. This is
302 305 based on the match's primary, included, and excluded patterns.
303 306
304 307 Returns the string 'all' if the given directory and all subdirectories
305 308 should be visited. Otherwise returns True or False indicating whether
306 309 the given directory should be visited.
307 310 '''
308 311 return True
309 312
310 313 def visitchildrenset(self, dir):
311 314 '''Decides whether a directory should be visited based on whether it
312 315 has potential matches in it or one of its subdirectories, and
313 316 potentially lists which subdirectories of that directory should be
314 317 visited. This is based on the match's primary, included, and excluded
315 318 patterns.
316 319
317 320 This function is very similar to 'visitdir', and the following mapping
318 321 can be applied:
319 322
320 323 visitdir | visitchildrenlist
321 324 ----------+-------------------
322 325 False | set()
323 326 'all' | 'all'
324 327 True | 'this' OR non-empty set of subdirs -or files- to visit
325 328
326 329 Example:
327 330 Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return
328 331 the following values (assuming the implementation of visitchildrenset
329 332 is capable of recognizing this; some implementations are not).
330 333
331 334 '.' -> {'foo', 'qux'}
332 335 'baz' -> set()
333 336 'foo' -> {'bar'}
334 337 # Ideally this would be 'all', but since the prefix nature of matchers
335 338 # is applied to the entire matcher, we have to downgrade this to
336 339 # 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed
337 340 # in.
338 341 'foo/bar' -> 'this'
339 342 'qux' -> 'this'
340 343
341 344 Important:
342 345 Most matchers do not know if they're representing files or
343 346 directories. They see ['path:dir/f'] and don't know whether 'f' is a
344 347 file or a directory, so visitchildrenset('dir') for most matchers will
345 348 return {'f'}, but if the matcher knows it's a file (like exactmatcher
346 349 does), it may return 'this'. Do not rely on the return being a set
347 350 indicating that there are no files in this dir to investigate (or
348 351 equivalently that if there are files to investigate in 'dir' that it
349 352 will always return 'this').
350 353 '''
351 354 return 'this'
352 355
353 356 def always(self):
354 357 '''Matcher will match everything and .files() will be empty --
355 358 optimization might be possible.'''
356 359 return False
357 360
358 361 def isexact(self):
359 362 '''Matcher will match exactly the list of files in .files() --
360 363 optimization might be possible.'''
361 364 return False
362 365
363 366 def prefix(self):
364 367 '''Matcher will match the paths in .files() recursively --
365 368 optimization might be possible.'''
366 369 return False
367 370
368 371 def anypats(self):
369 372 '''None of .always(), .isexact(), and .prefix() is true --
370 373 optimizations will be difficult.'''
371 374 return not self.always() and not self.isexact() and not self.prefix()
372 375
373 376 class alwaysmatcher(basematcher):
374 377 '''Matches everything.'''
375 378
376 379 def __init__(self, badfn=None):
377 380 super(alwaysmatcher, self).__init__(badfn)
378 381
379 382 def always(self):
380 383 return True
381 384
382 385 def matchfn(self, f):
383 386 return True
384 387
385 388 def visitdir(self, dir):
386 389 return 'all'
387 390
388 391 def visitchildrenset(self, dir):
389 392 return 'all'
390 393
391 394 def __repr__(self):
392 395 return r'<alwaysmatcher>'
393 396
394 397 class nevermatcher(basematcher):
395 398 '''Matches nothing.'''
396 399
397 400 def __init__(self, badfn=None):
398 401 super(nevermatcher, self).__init__(badfn)
399 402
400 403 # It's a little weird to say that the nevermatcher is an exact matcher
401 404 # or a prefix matcher, but it seems to make sense to let callers take
402 405 # fast paths based on either. There will be no exact matches, nor any
403 406 # prefixes (files() returns []), so fast paths iterating over them should
404 407 # be efficient (and correct).
405 408 def isexact(self):
406 409 return True
407 410
408 411 def prefix(self):
409 412 return True
410 413
411 414 def visitdir(self, dir):
412 415 return False
413 416
414 417 def visitchildrenset(self, dir):
415 418 return set()
416 419
417 420 def __repr__(self):
418 421 return r'<nevermatcher>'
419 422
420 423 class predicatematcher(basematcher):
421 424 """A matcher adapter for a simple boolean function"""
422 425
423 426 def __init__(self, predfn, predrepr=None, badfn=None):
424 427 super(predicatematcher, self).__init__(badfn)
425 428 self.matchfn = predfn
426 429 self._predrepr = predrepr
427 430
428 431 @encoding.strmethod
429 432 def __repr__(self):
430 433 s = (stringutil.buildrepr(self._predrepr)
431 434 or pycompat.byterepr(self.matchfn))
432 435 return '<predicatenmatcher pred=%s>' % s
433 436
434 437 class patternmatcher(basematcher):
435 438 """Matches a set of (kind, pat, source) against a 'root' directory.
436 439
437 440 >>> kindpats = [
438 441 ... ('re', '.*\.c$', ''),
439 442 ... ('path', 'foo/a', ''),
440 443 ... ('relpath', 'b', ''),
441 444 ... ('glob', '*.h', ''),
442 445 ... ]
443 446 >>> m = patternmatcher('foo', kindpats)
444 447 >>> bool(m('main.c')) # matches re:.*\.c$
445 448 True
446 449 >>> bool(m('b.txt'))
447 450 False
448 451 >>> bool(m('foo/a')) # matches path:foo/a
449 452 True
450 453 >>> bool(m('a')) # does not match path:b, since 'root' is 'foo'
451 454 False
452 455 >>> bool(m('b')) # matches relpath:b, since 'root' is 'foo'
453 456 True
454 457 >>> bool(m('lib.h')) # matches glob:*.h
455 458 True
456 459
457 460 >>> m.files()
458 461 ['.', 'foo/a', 'b', '.']
459 462 >>> m.exact('foo/a')
460 463 True
461 464 >>> m.exact('b')
462 465 True
463 466 >>> m.exact('lib.h') # exact matches are for (rel)path kinds
464 467 False
465 468 """
466 469
467 470 def __init__(self, root, kindpats, badfn=None):
468 471 super(patternmatcher, self).__init__(badfn)
469 472
470 473 self._files = _explicitfiles(kindpats)
471 474 self._prefix = _prefix(kindpats)
472 475 self._pats, self.matchfn = _buildmatch(kindpats, '$', root)
473 476
474 477 @propertycache
475 478 def _dirs(self):
476 479 return set(util.dirs(self._fileset)) | {'.'}
477 480
478 481 def visitdir(self, dir):
479 482 if self._prefix and dir in self._fileset:
480 483 return 'all'
481 484 return ('.' in self._fileset or
482 485 dir in self._fileset or
483 486 dir in self._dirs or
484 487 any(parentdir in self._fileset
485 488 for parentdir in util.finddirs(dir)))
486 489
487 490 def visitchildrenset(self, dir):
488 491 ret = self.visitdir(dir)
489 492 if ret is True:
490 493 return 'this'
491 494 elif not ret:
492 495 return set()
493 496 assert ret == 'all'
494 497 return 'all'
495 498
496 499 def prefix(self):
497 500 return self._prefix
498 501
499 502 @encoding.strmethod
500 503 def __repr__(self):
501 504 return ('<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats))
502 505
503 506 # This is basically a reimplementation of util.dirs that stores the children
504 507 # instead of just a count of them, plus a small optional optimization to avoid
505 508 # some directories we don't need.
506 509 class _dirchildren(object):
507 510 def __init__(self, paths, onlyinclude=None):
508 511 self._dirs = {}
509 512 self._onlyinclude = onlyinclude or []
510 513 addpath = self.addpath
511 514 for f in paths:
512 515 addpath(f)
513 516
514 517 def addpath(self, path):
515 518 if path == '.':
516 519 return
517 520 dirs = self._dirs
518 521 findsplitdirs = _dirchildren._findsplitdirs
519 522 for d, b in findsplitdirs(path):
520 523 if d not in self._onlyinclude:
521 524 continue
522 525 dirs.setdefault(d, set()).add(b)
523 526
524 527 @staticmethod
525 528 def _findsplitdirs(path):
526 529 # yields (dirname, basename) tuples, walking back to the root. This is
527 530 # very similar to util.finddirs, except:
528 531 # - produces a (dirname, basename) tuple, not just 'dirname'
529 532 # - includes root dir
530 533 # Unlike manifest._splittopdir, this does not suffix `dirname` with a
531 534 # slash, and produces '.' for the root instead of ''.
532 535 oldpos = len(path)
533 536 pos = path.rfind('/')
534 537 while pos != -1:
535 538 yield path[:pos], path[pos + 1:oldpos]
536 539 oldpos = pos
537 540 pos = path.rfind('/', 0, pos)
538 541 yield '.', path[:oldpos]
539 542
540 543 def get(self, path):
541 544 return self._dirs.get(path, set())
542 545
543 546 class includematcher(basematcher):
544 547
545 548 def __init__(self, root, kindpats, badfn=None):
546 549 super(includematcher, self).__init__(badfn)
547 550
548 551 self._pats, self.matchfn = _buildmatch(kindpats, '(?:/|$)', root)
549 552 self._prefix = _prefix(kindpats)
550 553 roots, dirs, parents = _rootsdirsandparents(kindpats)
551 554 # roots are directories which are recursively included.
552 555 self._roots = set(roots)
553 556 # dirs are directories which are non-recursively included.
554 557 self._dirs = set(dirs)
555 558 # parents are directories which are non-recursively included because
556 559 # they are needed to get to items in _dirs or _roots.
557 560 self._parents = set(parents)
558 561
559 562 def visitdir(self, dir):
560 563 if self._prefix and dir in self._roots:
561 564 return 'all'
562 565 return ('.' in self._roots or
563 566 dir in self._roots or
564 567 dir in self._dirs or
565 568 dir in self._parents or
566 569 any(parentdir in self._roots
567 570 for parentdir in util.finddirs(dir)))
568 571
569 572 @propertycache
570 573 def _allparentschildren(self):
571 574 # It may seem odd that we add dirs, roots, and parents, and then
572 575 # restrict to only parents. This is to catch the case of:
573 576 # dirs = ['foo/bar']
574 577 # parents = ['foo']
575 578 # if we asked for the children of 'foo', but had only added
576 579 # self._parents, we wouldn't be able to respond ['bar'].
577 580 return _dirchildren(
578 581 itertools.chain(self._dirs, self._roots, self._parents),
579 582 onlyinclude=self._parents)
580 583
581 584 def visitchildrenset(self, dir):
582 585 if self._prefix and dir in self._roots:
583 586 return 'all'
584 587 # Note: this does *not* include the 'dir in self._parents' case from
585 588 # visitdir, that's handled below.
586 589 if ('.' in self._roots or
587 590 dir in self._roots or
588 591 dir in self._dirs or
589 592 any(parentdir in self._roots
590 593 for parentdir in util.finddirs(dir))):
591 594 return 'this'
592 595
593 596 if dir in self._parents:
594 597 return self._allparentschildren.get(dir) or set()
595 598 return set()
596 599
597 600 @encoding.strmethod
598 601 def __repr__(self):
599 602 return ('<includematcher includes=%r>' % pycompat.bytestr(self._pats))
600 603
601 604 class exactmatcher(basematcher):
602 605 r'''Matches the input files exactly. They are interpreted as paths, not
603 606 patterns (so no kind-prefixes).
604 607
605 608 >>> m = exactmatcher(['a.txt', 're:.*\.c$'])
606 609 >>> m('a.txt')
607 610 True
608 611 >>> m('b.txt')
609 612 False
610 613
611 614 Input files that would be matched are exactly those returned by .files()
612 615 >>> m.files()
613 616 ['a.txt', 're:.*\\.c$']
614 617
615 618 So pattern 're:.*\.c$' is not considered as a regex, but as a file name
616 619 >>> m('main.c')
617 620 False
618 621 >>> m('re:.*\.c$')
619 622 True
620 623 '''
621 624
622 625 def __init__(self, files, badfn=None):
623 626 super(exactmatcher, self).__init__(badfn)
624 627
625 628 if isinstance(files, list):
626 629 self._files = files
627 630 else:
628 631 self._files = list(files)
629 632
630 633 matchfn = basematcher.exact
631 634
632 635 @propertycache
633 636 def _dirs(self):
634 637 return set(util.dirs(self._fileset)) | {'.'}
635 638
636 639 def visitdir(self, dir):
637 640 return dir in self._dirs
638 641
639 642 def visitchildrenset(self, dir):
640 643 if not self._fileset or dir not in self._dirs:
641 644 return set()
642 645
643 646 candidates = self._fileset | self._dirs - {'.'}
644 647 if dir != '.':
645 648 d = dir + '/'
646 649 candidates = set(c[len(d):] for c in candidates if
647 650 c.startswith(d))
648 651 # self._dirs includes all of the directories, recursively, so if
649 652 # we're attempting to match foo/bar/baz.txt, it'll have '.', 'foo',
650 653 # 'foo/bar' in it. Thus we can safely ignore a candidate that has a
651 654 # '/' in it, indicating a it's for a subdir-of-a-subdir; the
652 655 # immediate subdir will be in there without a slash.
653 656 ret = {c for c in candidates if '/' not in c}
654 657 # We really do not expect ret to be empty, since that would imply that
655 658 # there's something in _dirs that didn't have a file in _fileset.
656 659 assert ret
657 660 return ret
658 661
659 662 def isexact(self):
660 663 return True
661 664
662 665 @encoding.strmethod
663 666 def __repr__(self):
664 667 return ('<exactmatcher files=%r>' % self._files)
665 668
666 669 class differencematcher(basematcher):
667 670 '''Composes two matchers by matching if the first matches and the second
668 671 does not.
669 672
670 673 The second matcher's non-matching-attributes (bad, explicitdir,
671 674 traversedir) are ignored.
672 675 '''
673 676 def __init__(self, m1, m2):
674 677 super(differencematcher, self).__init__()
675 678 self._m1 = m1
676 679 self._m2 = m2
677 680 self.bad = m1.bad
678 681 self.explicitdir = m1.explicitdir
679 682 self.traversedir = m1.traversedir
680 683
681 684 def matchfn(self, f):
682 685 return self._m1(f) and not self._m2(f)
683 686
684 687 @propertycache
685 688 def _files(self):
686 689 if self.isexact():
687 690 return [f for f in self._m1.files() if self(f)]
688 691 # If m1 is not an exact matcher, we can't easily figure out the set of
689 692 # files, because its files() are not always files. For example, if
690 693 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
691 694 # want to remove "dir" from the set even though it would match m2,
692 695 # because the "dir" in m1 may not be a file.
693 696 return self._m1.files()
694 697
695 698 def visitdir(self, dir):
696 699 if self._m2.visitdir(dir) == 'all':
697 700 return False
698 701 elif not self._m2.visitdir(dir):
699 702 # m2 does not match dir, we can return 'all' here if possible
700 703 return self._m1.visitdir(dir)
701 704 return bool(self._m1.visitdir(dir))
702 705
703 706 def visitchildrenset(self, dir):
704 707 m2_set = self._m2.visitchildrenset(dir)
705 708 if m2_set == 'all':
706 709 return set()
707 710 m1_set = self._m1.visitchildrenset(dir)
708 711 # Possible values for m1: 'all', 'this', set(...), set()
709 712 # Possible values for m2: 'this', set(...), set()
710 713 # If m2 has nothing under here that we care about, return m1, even if
711 714 # it's 'all'. This is a change in behavior from visitdir, which would
712 715 # return True, not 'all', for some reason.
713 716 if not m2_set:
714 717 return m1_set
715 718 if m1_set in ['all', 'this']:
716 719 # Never return 'all' here if m2_set is any kind of non-empty (either
717 720 # 'this' or set(foo)), since m2 might return set() for a
718 721 # subdirectory.
719 722 return 'this'
720 723 # Possible values for m1: set(...), set()
721 724 # Possible values for m2: 'this', set(...)
722 725 # We ignore m2's set results. They're possibly incorrect:
723 726 # m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset('.'):
724 727 # m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd
725 728 # return set(), which is *not* correct, we still need to visit 'dir'!
726 729 return m1_set
727 730
728 731 def isexact(self):
729 732 return self._m1.isexact()
730 733
731 734 @encoding.strmethod
732 735 def __repr__(self):
733 736 return ('<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2))
734 737
735 738 def intersectmatchers(m1, m2):
736 739 '''Composes two matchers by matching if both of them match.
737 740
738 741 The second matcher's non-matching-attributes (bad, explicitdir,
739 742 traversedir) are ignored.
740 743 '''
741 744 if m1 is None or m2 is None:
742 745 return m1 or m2
743 746 if m1.always():
744 747 m = copy.copy(m2)
745 748 # TODO: Consider encapsulating these things in a class so there's only
746 749 # one thing to copy from m1.
747 750 m.bad = m1.bad
748 751 m.explicitdir = m1.explicitdir
749 752 m.traversedir = m1.traversedir
750 753 return m
751 754 if m2.always():
752 755 m = copy.copy(m1)
753 756 return m
754 757 return intersectionmatcher(m1, m2)
755 758
756 759 class intersectionmatcher(basematcher):
757 760 def __init__(self, m1, m2):
758 761 super(intersectionmatcher, self).__init__()
759 762 self._m1 = m1
760 763 self._m2 = m2
761 764 self.bad = m1.bad
762 765 self.explicitdir = m1.explicitdir
763 766 self.traversedir = m1.traversedir
764 767
765 768 @propertycache
766 769 def _files(self):
767 770 if self.isexact():
768 771 m1, m2 = self._m1, self._m2
769 772 if not m1.isexact():
770 773 m1, m2 = m2, m1
771 774 return [f for f in m1.files() if m2(f)]
772 775 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
773 776 # the set of files, because their files() are not always files. For
774 777 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
775 778 # "path:dir2", we don't want to remove "dir2" from the set.
776 779 return self._m1.files() + self._m2.files()
777 780
778 781 def matchfn(self, f):
779 782 return self._m1(f) and self._m2(f)
780 783
781 784 def visitdir(self, dir):
782 785 visit1 = self._m1.visitdir(dir)
783 786 if visit1 == 'all':
784 787 return self._m2.visitdir(dir)
785 788 # bool() because visit1=True + visit2='all' should not be 'all'
786 789 return bool(visit1 and self._m2.visitdir(dir))
787 790
788 791 def visitchildrenset(self, dir):
789 792 m1_set = self._m1.visitchildrenset(dir)
790 793 if not m1_set:
791 794 return set()
792 795 m2_set = self._m2.visitchildrenset(dir)
793 796 if not m2_set:
794 797 return set()
795 798
796 799 if m1_set == 'all':
797 800 return m2_set
798 801 elif m2_set == 'all':
799 802 return m1_set
800 803
801 804 if m1_set == 'this' or m2_set == 'this':
802 805 return 'this'
803 806
804 807 assert isinstance(m1_set, set) and isinstance(m2_set, set)
805 808 return m1_set.intersection(m2_set)
806 809
807 810 def always(self):
808 811 return self._m1.always() and self._m2.always()
809 812
810 813 def isexact(self):
811 814 return self._m1.isexact() or self._m2.isexact()
812 815
813 816 @encoding.strmethod
814 817 def __repr__(self):
815 818 return ('<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2))
816 819
817 820 class subdirmatcher(basematcher):
818 821 """Adapt a matcher to work on a subdirectory only.
819 822
820 823 The paths are remapped to remove/insert the path as needed:
821 824
822 825 >>> from . import pycompat
823 826 >>> m1 = match(b'root', b'', [b'a.txt', b'sub/b.txt'])
824 827 >>> m2 = subdirmatcher(b'sub', m1)
825 828 >>> bool(m2(b'a.txt'))
826 829 False
827 830 >>> bool(m2(b'b.txt'))
828 831 True
829 832 >>> bool(m2.matchfn(b'a.txt'))
830 833 False
831 834 >>> bool(m2.matchfn(b'b.txt'))
832 835 True
833 836 >>> m2.files()
834 837 ['b.txt']
835 838 >>> m2.exact(b'b.txt')
836 839 True
837 840 >>> def bad(f, msg):
838 841 ... print(pycompat.sysstr(b"%s: %s" % (f, msg)))
839 842 >>> m1.bad = bad
840 843 >>> m2.bad(b'x.txt', b'No such file')
841 844 sub/x.txt: No such file
842 845 """
843 846
844 847 def __init__(self, path, matcher):
845 848 super(subdirmatcher, self).__init__()
846 849 self._path = path
847 850 self._matcher = matcher
848 851 self._always = matcher.always()
849 852
850 853 self._files = [f[len(path) + 1:] for f in matcher._files
851 854 if f.startswith(path + "/")]
852 855
853 856 # If the parent repo had a path to this subrepo and the matcher is
854 857 # a prefix matcher, this submatcher always matches.
855 858 if matcher.prefix():
856 859 self._always = any(f == path for f in matcher._files)
857 860
858 861 def bad(self, f, msg):
859 862 self._matcher.bad(self._path + "/" + f, msg)
860 863
861 864 def matchfn(self, f):
862 865 # Some information is lost in the superclass's constructor, so we
863 866 # can not accurately create the matching function for the subdirectory
864 867 # from the inputs. Instead, we override matchfn() and visitdir() to
865 868 # call the original matcher with the subdirectory path prepended.
866 869 return self._matcher.matchfn(self._path + "/" + f)
867 870
868 871 def visitdir(self, dir):
869 872 if dir == '.':
870 873 dir = self._path
871 874 else:
872 875 dir = self._path + "/" + dir
873 876 return self._matcher.visitdir(dir)
874 877
875 878 def visitchildrenset(self, dir):
876 879 if dir == '.':
877 880 dir = self._path
878 881 else:
879 882 dir = self._path + "/" + dir
880 883 return self._matcher.visitchildrenset(dir)
881 884
882 885 def always(self):
883 886 return self._always
884 887
885 888 def prefix(self):
886 889 return self._matcher.prefix() and not self._always
887 890
888 891 @encoding.strmethod
889 892 def __repr__(self):
890 893 return ('<subdirmatcher path=%r, matcher=%r>' %
891 894 (self._path, self._matcher))
892 895
893 896 class prefixdirmatcher(basematcher):
894 897 """Adapt a matcher to work on a parent directory.
895 898
896 899 The matcher's non-matching-attributes (bad, explicitdir, traversedir) are
897 900 ignored.
898 901
899 902 The prefix path should usually be the relative path from the root of
900 903 this matcher to the root of the wrapped matcher.
901 904
902 905 >>> m1 = match(util.localpath(b'root/d/e'), b'f', [b'../a.txt', b'b.txt'])
903 906 >>> m2 = prefixdirmatcher(b'd/e', m1)
904 907 >>> bool(m2(b'a.txt'),)
905 908 False
906 909 >>> bool(m2(b'd/e/a.txt'))
907 910 True
908 911 >>> bool(m2(b'd/e/b.txt'))
909 912 False
910 913 >>> m2.files()
911 914 ['d/e/a.txt', 'd/e/f/b.txt']
912 915 >>> m2.exact(b'd/e/a.txt')
913 916 True
914 917 >>> m2.visitdir(b'd')
915 918 True
916 919 >>> m2.visitdir(b'd/e')
917 920 True
918 921 >>> m2.visitdir(b'd/e/f')
919 922 True
920 923 >>> m2.visitdir(b'd/e/g')
921 924 False
922 925 >>> m2.visitdir(b'd/ef')
923 926 False
924 927 """
925 928
926 929 def __init__(self, path, matcher, badfn=None):
927 930 super(prefixdirmatcher, self).__init__(badfn)
928 931 if not path:
929 932 raise error.ProgrammingError('prefix path must not be empty')
930 933 self._path = path
931 934 self._pathprefix = path + '/'
932 935 self._matcher = matcher
933 936
934 937 @propertycache
935 938 def _files(self):
936 939 return [self._pathprefix + f for f in self._matcher._files]
937 940
938 941 def matchfn(self, f):
939 942 if not f.startswith(self._pathprefix):
940 943 return False
941 944 return self._matcher.matchfn(f[len(self._pathprefix):])
942 945
943 946 @propertycache
944 947 def _pathdirs(self):
945 948 return set(util.finddirs(self._path)) | {'.'}
946 949
947 950 def visitdir(self, dir):
948 951 if dir == self._path:
949 952 return self._matcher.visitdir('.')
950 953 if dir.startswith(self._pathprefix):
951 954 return self._matcher.visitdir(dir[len(self._pathprefix):])
952 955 return dir in self._pathdirs
953 956
954 957 def visitchildrenset(self, dir):
955 958 if dir == self._path:
956 959 return self._matcher.visitchildrenset('.')
957 960 if dir.startswith(self._pathprefix):
958 961 return self._matcher.visitchildrenset(dir[len(self._pathprefix):])
959 962 if dir in self._pathdirs:
960 963 return 'this'
961 964 return set()
962 965
963 966 def isexact(self):
964 967 return self._matcher.isexact()
965 968
966 969 def prefix(self):
967 970 return self._matcher.prefix()
968 971
969 972 @encoding.strmethod
970 973 def __repr__(self):
971 974 return ('<prefixdirmatcher path=%r, matcher=%r>'
972 975 % (pycompat.bytestr(self._path), self._matcher))
973 976
974 977 class unionmatcher(basematcher):
975 978 """A matcher that is the union of several matchers.
976 979
977 980 The non-matching-attributes (bad, explicitdir, traversedir) are taken from
978 981 the first matcher.
979 982 """
980 983
981 984 def __init__(self, matchers):
982 985 m1 = matchers[0]
983 986 super(unionmatcher, self).__init__()
984 987 self.explicitdir = m1.explicitdir
985 988 self.traversedir = m1.traversedir
986 989 self._matchers = matchers
987 990
988 991 def matchfn(self, f):
989 992 for match in self._matchers:
990 993 if match(f):
991 994 return True
992 995 return False
993 996
994 997 def visitdir(self, dir):
995 998 r = False
996 999 for m in self._matchers:
997 1000 v = m.visitdir(dir)
998 1001 if v == 'all':
999 1002 return v
1000 1003 r |= v
1001 1004 return r
1002 1005
1003 1006 def visitchildrenset(self, dir):
1004 1007 r = set()
1005 1008 this = False
1006 1009 for m in self._matchers:
1007 1010 v = m.visitchildrenset(dir)
1008 1011 if not v:
1009 1012 continue
1010 1013 if v == 'all':
1011 1014 return v
1012 1015 if this or v == 'this':
1013 1016 this = True
1014 1017 # don't break, we might have an 'all' in here.
1015 1018 continue
1016 1019 assert isinstance(v, set)
1017 1020 r = r.union(v)
1018 1021 if this:
1019 1022 return 'this'
1020 1023 return r
1021 1024
1022 1025 @encoding.strmethod
1023 1026 def __repr__(self):
1024 1027 return ('<unionmatcher matchers=%r>' % self._matchers)
1025 1028
1026 1029 def patkind(pattern, default=None):
1027 1030 '''If pattern is 'kind:pat' with a known kind, return kind.
1028 1031
1029 1032 >>> patkind('re:.*\.c$')
1030 1033 're'
1031 1034 >>> patkind('glob:*.c')
1032 1035 'glob'
1033 1036 >>> patkind('relpath:test.py')
1034 1037 'relpath'
1035 1038 >>> patkind('main.py')
1036 1039 >>> patkind('main.py', default='re')
1037 1040 're'
1038 1041 '''
1039 1042 return _patsplit(pattern, default)[0]
1040 1043
1041 1044 def _patsplit(pattern, default):
1042 1045 """Split a string into the optional pattern kind prefix and the actual
1043 1046 pattern."""
1044 1047 if ':' in pattern:
1045 1048 kind, pat = pattern.split(':', 1)
1046 1049 if kind in allpatternkinds:
1047 1050 return kind, pat
1048 1051 return default, pattern
1049 1052
1050 1053 def _globre(pat):
1051 1054 r'''Convert an extended glob string to a regexp string.
1052 1055
1053 1056 >>> from . import pycompat
1054 1057 >>> def bprint(s):
1055 1058 ... print(pycompat.sysstr(s))
1056 1059 >>> bprint(_globre(br'?'))
1057 1060 .
1058 1061 >>> bprint(_globre(br'*'))
1059 1062 [^/]*
1060 1063 >>> bprint(_globre(br'**'))
1061 1064 .*
1062 1065 >>> bprint(_globre(br'**/a'))
1063 1066 (?:.*/)?a
1064 1067 >>> bprint(_globre(br'a/**/b'))
1065 1068 a/(?:.*/)?b
1066 1069 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
1067 1070 [a*?!^][\^b][^c]
1068 1071 >>> bprint(_globre(br'{a,b}'))
1069 1072 (?:a|b)
1070 1073 >>> bprint(_globre(br'.\*\?'))
1071 1074 \.\*\?
1072 1075 '''
1073 1076 i, n = 0, len(pat)
1074 1077 res = ''
1075 1078 group = 0
1076 1079 escape = util.stringutil.regexbytesescapemap.get
1077 1080 def peek():
1078 1081 return i < n and pat[i:i + 1]
1079 1082 while i < n:
1080 1083 c = pat[i:i + 1]
1081 1084 i += 1
1082 1085 if c not in '*?[{},\\':
1083 1086 res += escape(c, c)
1084 1087 elif c == '*':
1085 1088 if peek() == '*':
1086 1089 i += 1
1087 1090 if peek() == '/':
1088 1091 i += 1
1089 1092 res += '(?:.*/)?'
1090 1093 else:
1091 1094 res += '.*'
1092 1095 else:
1093 1096 res += '[^/]*'
1094 1097 elif c == '?':
1095 1098 res += '.'
1096 1099 elif c == '[':
1097 1100 j = i
1098 1101 if j < n and pat[j:j + 1] in '!]':
1099 1102 j += 1
1100 1103 while j < n and pat[j:j + 1] != ']':
1101 1104 j += 1
1102 1105 if j >= n:
1103 1106 res += '\\['
1104 1107 else:
1105 1108 stuff = pat[i:j].replace('\\','\\\\')
1106 1109 i = j + 1
1107 1110 if stuff[0:1] == '!':
1108 1111 stuff = '^' + stuff[1:]
1109 1112 elif stuff[0:1] == '^':
1110 1113 stuff = '\\' + stuff
1111 1114 res = '%s[%s]' % (res, stuff)
1112 1115 elif c == '{':
1113 1116 group += 1
1114 1117 res += '(?:'
1115 1118 elif c == '}' and group:
1116 1119 res += ')'
1117 1120 group -= 1
1118 1121 elif c == ',' and group:
1119 1122 res += '|'
1120 1123 elif c == '\\':
1121 1124 p = peek()
1122 1125 if p:
1123 1126 i += 1
1124 1127 res += escape(p, p)
1125 1128 else:
1126 1129 res += escape(c, c)
1127 1130 else:
1128 1131 res += escape(c, c)
1129 1132 return res
1130 1133
1131 1134 def _regex(kind, pat, globsuffix):
1132 1135 '''Convert a (normalized) pattern of any kind into a regular expression.
1133 1136 globsuffix is appended to the regexp of globs.'''
1134 1137 if not pat:
1135 1138 return ''
1136 1139 if kind == 're':
1137 1140 return pat
1138 1141 if kind in ('path', 'relpath'):
1139 1142 if pat == '.':
1140 1143 return ''
1141 1144 return util.stringutil.reescape(pat) + '(?:/|$)'
1142 1145 if kind == 'rootfilesin':
1143 1146 if pat == '.':
1144 1147 escaped = ''
1145 1148 else:
1146 1149 # Pattern is a directory name.
1147 1150 escaped = util.stringutil.reescape(pat) + '/'
1148 1151 # Anything after the pattern must be a non-directory.
1149 1152 return escaped + '[^/]+$'
1150 1153 if kind == 'relglob':
1151 1154 return '(?:|.*/)' + _globre(pat) + globsuffix
1152 1155 if kind == 'relre':
1153 1156 if pat.startswith('^'):
1154 1157 return pat
1155 1158 return '.*' + pat
1156 1159 if kind in ('glob', 'rootglob'):
1157 1160 return _globre(pat) + globsuffix
1158 1161 raise error.ProgrammingError('not a regex pattern: %s:%s' % (kind, pat))
1159 1162
1160 1163 def _buildmatch(kindpats, globsuffix, root):
1161 1164 '''Return regexp string and a matcher function for kindpats.
1162 1165 globsuffix is appended to the regexp of globs.'''
1163 1166 matchfuncs = []
1164 1167
1165 1168 subincludes, kindpats = _expandsubinclude(kindpats, root)
1166 1169 if subincludes:
1167 1170 submatchers = {}
1168 1171 def matchsubinclude(f):
1169 1172 for prefix, matcherargs in subincludes:
1170 1173 if f.startswith(prefix):
1171 1174 mf = submatchers.get(prefix)
1172 1175 if mf is None:
1173 1176 mf = match(*matcherargs)
1174 1177 submatchers[prefix] = mf
1175 1178
1176 1179 if mf(f[len(prefix):]):
1177 1180 return True
1178 1181 return False
1179 1182 matchfuncs.append(matchsubinclude)
1180 1183
1181 1184 regex = ''
1182 1185 if kindpats:
1183 1186 if all(k == 'rootfilesin' for k, p, s in kindpats):
1184 1187 dirs = {p for k, p, s in kindpats}
1185 1188 def mf(f):
1186 1189 i = f.rfind('/')
1187 1190 if i >= 0:
1188 1191 dir = f[:i]
1189 1192 else:
1190 1193 dir = '.'
1191 1194 return dir in dirs
1192 1195 regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))
1193 1196 matchfuncs.append(mf)
1194 1197 else:
1195 1198 regex, mf = _buildregexmatch(kindpats, globsuffix)
1196 1199 matchfuncs.append(mf)
1197 1200
1198 1201 if len(matchfuncs) == 1:
1199 1202 return regex, matchfuncs[0]
1200 1203 else:
1201 1204 return regex, lambda f: any(mf(f) for mf in matchfuncs)
1202 1205
1203 1206 MAX_RE_SIZE = 20000
1204 1207
1205 1208 def _joinregexes(regexps):
1206 1209 """gather multiple regular expressions into a single one"""
1207 1210 return '|'.join(regexps)
1208 1211
1209 1212 def _buildregexmatch(kindpats, globsuffix):
1210 1213 """Build a match function from a list of kinds and kindpats,
1211 1214 return regexp string and a matcher function.
1212 1215
1213 1216 Test too large input
1214 1217 >>> _buildregexmatch([
1215 1218 ... (b'relglob', b'?' * MAX_RE_SIZE, b'')
1216 1219 ... ], b'$')
1217 1220 Traceback (most recent call last):
1218 1221 ...
1219 1222 Abort: matcher pattern is too long (20009 bytes)
1220 1223 """
1221 1224 try:
1222 1225 allgroups = []
1223 1226 regexps = [_regex(k, p, globsuffix) for (k, p, s) in kindpats]
1224 1227 fullregexp = _joinregexes(regexps)
1225 1228
1226 1229 startidx = 0
1227 1230 groupsize = 0
1228 1231 for idx, r in enumerate(regexps):
1229 1232 piecesize = len(r)
1230 1233 if piecesize > MAX_RE_SIZE:
1231 1234 msg = _("matcher pattern is too long (%d bytes)") % piecesize
1232 1235 raise error.Abort(msg)
1233 1236 elif (groupsize + piecesize) > MAX_RE_SIZE:
1234 1237 group = regexps[startidx:idx]
1235 1238 allgroups.append(_joinregexes(group))
1236 1239 startidx = idx
1237 1240 groupsize = 0
1238 1241 groupsize += piecesize + 1
1239 1242
1240 1243 if startidx == 0:
1241 1244 func = _rematcher(fullregexp)
1242 1245 else:
1243 1246 group = regexps[startidx:]
1244 1247 allgroups.append(_joinregexes(group))
1245 1248 allmatchers = [_rematcher(g) for g in allgroups]
1246 1249 func = lambda s: any(m(s) for m in allmatchers)
1247 1250 return fullregexp, func
1248 1251 except re.error:
1249 1252 for k, p, s in kindpats:
1250 1253 try:
1251 1254 _rematcher(_regex(k, p, globsuffix))
1252 1255 except re.error:
1253 1256 if s:
1254 1257 raise error.Abort(_("%s: invalid pattern (%s): %s") %
1255 1258 (s, k, p))
1256 1259 else:
1257 1260 raise error.Abort(_("invalid pattern (%s): %s") % (k, p))
1258 1261 raise error.Abort(_("invalid pattern"))
1259 1262
1260 1263 def _patternrootsanddirs(kindpats):
1261 1264 '''Returns roots and directories corresponding to each pattern.
1262 1265
1263 1266 This calculates the roots and directories exactly matching the patterns and
1264 1267 returns a tuple of (roots, dirs) for each. It does not return other
1265 1268 directories which may also need to be considered, like the parent
1266 1269 directories.
1267 1270 '''
1268 1271 r = []
1269 1272 d = []
1270 1273 for kind, pat, source in kindpats:
1271 1274 if kind in ('glob', 'rootglob'): # find the non-glob prefix
1272 1275 root = []
1273 1276 for p in pat.split('/'):
1274 1277 if '[' in p or '{' in p or '*' in p or '?' in p:
1275 1278 break
1276 1279 root.append(p)
1277 1280 r.append('/'.join(root) or '.')
1278 1281 elif kind in ('relpath', 'path'):
1279 1282 r.append(pat or '.')
1280 1283 elif kind in ('rootfilesin',):
1281 1284 d.append(pat or '.')
1282 1285 else: # relglob, re, relre
1283 1286 r.append('.')
1284 1287 return r, d
1285 1288
1286 1289 def _roots(kindpats):
1287 1290 '''Returns root directories to match recursively from the given patterns.'''
1288 1291 roots, dirs = _patternrootsanddirs(kindpats)
1289 1292 return roots
1290 1293
1291 1294 def _rootsdirsandparents(kindpats):
1292 1295 '''Returns roots and exact directories from patterns.
1293 1296
1294 1297 `roots` are directories to match recursively, `dirs` should
1295 1298 be matched non-recursively, and `parents` are the implicitly required
1296 1299 directories to walk to items in either roots or dirs.
1297 1300
1298 1301 Returns a tuple of (roots, dirs, parents).
1299 1302
1300 1303 >>> _rootsdirsandparents(
1301 1304 ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
1302 1305 ... (b'glob', b'g*', b'')])
1303 1306 (['g/h', 'g/h', '.'], [], ['g', '.'])
1304 1307 >>> _rootsdirsandparents(
1305 1308 ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
1306 1309 ([], ['g/h', '.'], ['g', '.'])
1307 1310 >>> _rootsdirsandparents(
1308 1311 ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
1309 1312 ... (b'path', b'', b'')])
1310 1313 (['r', 'p/p', '.'], [], ['p', '.'])
1311 1314 >>> _rootsdirsandparents(
1312 1315 ... [(b'relglob', b'rg*', b''), (b're', b're/', b''),
1313 1316 ... (b'relre', b'rr', b'')])
1314 1317 (['.', '.', '.'], [], ['.'])
1315 1318 '''
1316 1319 r, d = _patternrootsanddirs(kindpats)
1317 1320
1318 1321 p = []
1319 1322 # Append the parents as non-recursive/exact directories, since they must be
1320 1323 # scanned to get to either the roots or the other exact directories.
1321 1324 p.extend(util.dirs(d))
1322 1325 p.extend(util.dirs(r))
1323 1326 # util.dirs() does not include the root directory, so add it manually
1324 1327 p.append('.')
1325 1328
1326 1329 # FIXME: all uses of this function convert these to sets, do so before
1327 1330 # returning.
1328 1331 # FIXME: all uses of this function do not need anything in 'roots' and
1329 1332 # 'dirs' to also be in 'parents', consider removing them before returning.
1330 1333 return r, d, p
1331 1334
1332 1335 def _explicitfiles(kindpats):
1333 1336 '''Returns the potential explicit filenames from the patterns.
1334 1337
1335 1338 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
1336 1339 ['foo/bar']
1337 1340 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
1338 1341 []
1339 1342 '''
1340 1343 # Keep only the pattern kinds where one can specify filenames (vs only
1341 1344 # directory names).
1342 1345 filable = [kp for kp in kindpats if kp[0] not in ('rootfilesin',)]
1343 1346 return _roots(filable)
1344 1347
1345 1348 def _prefix(kindpats):
1346 1349 '''Whether all the patterns match a prefix (i.e. recursively)'''
1347 1350 for kind, pat, source in kindpats:
1348 1351 if kind not in ('path', 'relpath'):
1349 1352 return False
1350 1353 return True
1351 1354
1352 1355 _commentre = None
1353 1356
1354 1357 def readpatternfile(filepath, warn, sourceinfo=False):
1355 1358 '''parse a pattern file, returning a list of
1356 1359 patterns. These patterns should be given to compile()
1357 1360 to be validated and converted into a match function.
1358 1361
1359 1362 trailing white space is dropped.
1360 1363 the escape character is backslash.
1361 1364 comments start with #.
1362 1365 empty lines are skipped.
1363 1366
1364 1367 lines can be of the following formats:
1365 1368
1366 1369 syntax: regexp # defaults following lines to non-rooted regexps
1367 1370 syntax: glob # defaults following lines to non-rooted globs
1368 1371 re:pattern # non-rooted regular expression
1369 1372 glob:pattern # non-rooted glob
1370 1373 rootglob:pat # rooted glob (same root as ^ in regexps)
1371 1374 pattern # pattern of the current default type
1372 1375
1373 1376 if sourceinfo is set, returns a list of tuples:
1374 1377 (pattern, lineno, originalline). This is useful to debug ignore patterns.
1375 1378 '''
1376 1379
1377 1380 syntaxes = {
1378 1381 're': 'relre:',
1379 1382 'regexp': 'relre:',
1380 1383 'glob': 'relglob:',
1381 1384 'rootglob': 'rootglob:',
1382 1385 'include': 'include',
1383 1386 'subinclude': 'subinclude',
1384 1387 }
1385 1388 syntax = 'relre:'
1386 1389 patterns = []
1387 1390
1388 1391 fp = open(filepath, 'rb')
1389 1392 for lineno, line in enumerate(util.iterfile(fp), start=1):
1390 1393 if "#" in line:
1391 1394 global _commentre
1392 1395 if not _commentre:
1393 1396 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
1394 1397 # remove comments prefixed by an even number of escapes
1395 1398 m = _commentre.search(line)
1396 1399 if m:
1397 1400 line = line[:m.end(1)]
1398 1401 # fixup properly escaped comments that survived the above
1399 1402 line = line.replace("\\#", "#")
1400 1403 line = line.rstrip()
1401 1404 if not line:
1402 1405 continue
1403 1406
1404 1407 if line.startswith('syntax:'):
1405 1408 s = line[7:].strip()
1406 1409 try:
1407 1410 syntax = syntaxes[s]
1408 1411 except KeyError:
1409 1412 if warn:
1410 1413 warn(_("%s: ignoring invalid syntax '%s'\n") %
1411 1414 (filepath, s))
1412 1415 continue
1413 1416
1414 1417 linesyntax = syntax
1415 1418 for s, rels in syntaxes.iteritems():
1416 1419 if line.startswith(rels):
1417 1420 linesyntax = rels
1418 1421 line = line[len(rels):]
1419 1422 break
1420 1423 elif line.startswith(s+':'):
1421 1424 linesyntax = rels
1422 1425 line = line[len(s) + 1:]
1423 1426 break
1424 1427 if sourceinfo:
1425 1428 patterns.append((linesyntax + line, lineno, line))
1426 1429 else:
1427 1430 patterns.append(linesyntax + line)
1428 1431 fp.close()
1429 1432 return patterns
General Comments 0
You need to be logged in to leave comments. Login now