##// END OF EJS Templates
match: add doctest examples for patkind()
Denis Laxalde -
r42251:413a75da default
parent child Browse files
Show More
@@ -1,1418 +1,1429 b''
1 1 # match.py - filename matching
2 2 #
3 3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import, print_function
9 9
10 10 import copy
11 11 import itertools
12 12 import os
13 13 import re
14 14
15 15 from .i18n import _
16 16 from . import (
17 17 encoding,
18 18 error,
19 19 pathutil,
20 20 pycompat,
21 21 util,
22 22 )
23 23 from .utils import (
24 24 stringutil,
25 25 )
26 26
27 27 allpatternkinds = ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
28 28 'rootglob',
29 29 'listfile', 'listfile0', 'set', 'include', 'subinclude',
30 30 'rootfilesin')
31 31 cwdrelativepatternkinds = ('relpath', 'glob')
32 32
33 33 propertycache = util.propertycache
34 34
35 35 def _rematcher(regex):
36 36 '''compile the regexp with the best available regexp engine and return a
37 37 matcher function'''
38 38 m = util.re.compile(regex)
39 39 try:
40 40 # slightly faster, provided by facebook's re2 bindings
41 41 return m.test_match
42 42 except AttributeError:
43 43 return m.match
44 44
45 45 def _expandsets(kindpats, ctx, listsubrepos, badfn):
46 46 '''Returns the kindpats list with the 'set' patterns expanded to matchers'''
47 47 matchers = []
48 48 other = []
49 49
50 50 for kind, pat, source in kindpats:
51 51 if kind == 'set':
52 52 if ctx is None:
53 53 raise error.ProgrammingError("fileset expression with no "
54 54 "context")
55 55 matchers.append(ctx.matchfileset(pat, badfn=badfn))
56 56
57 57 if listsubrepos:
58 58 for subpath in ctx.substate:
59 59 sm = ctx.sub(subpath).matchfileset(pat, badfn=badfn)
60 60 pm = prefixdirmatcher(subpath, sm, badfn=badfn)
61 61 matchers.append(pm)
62 62
63 63 continue
64 64 other.append((kind, pat, source))
65 65 return matchers, other
66 66
67 67 def _expandsubinclude(kindpats, root):
68 68 '''Returns the list of subinclude matcher args and the kindpats without the
69 69 subincludes in it.'''
70 70 relmatchers = []
71 71 other = []
72 72
73 73 for kind, pat, source in kindpats:
74 74 if kind == 'subinclude':
75 75 sourceroot = pathutil.dirname(util.normpath(source))
76 76 pat = util.pconvert(pat)
77 77 path = pathutil.join(sourceroot, pat)
78 78
79 79 newroot = pathutil.dirname(path)
80 80 matcherargs = (newroot, '', [], ['include:%s' % path])
81 81
82 82 prefix = pathutil.canonpath(root, root, newroot)
83 83 if prefix:
84 84 prefix += '/'
85 85 relmatchers.append((prefix, matcherargs))
86 86 else:
87 87 other.append((kind, pat, source))
88 88
89 89 return relmatchers, other
90 90
91 91 def _kindpatsalwaysmatch(kindpats):
92 92 """"Checks whether the kindspats match everything, as e.g.
93 93 'relpath:.' does.
94 94 """
95 95 for kind, pat, source in kindpats:
96 96 if pat != '' or kind not in ['relpath', 'glob']:
97 97 return False
98 98 return True
99 99
100 100 def _buildkindpatsmatcher(matchercls, root, kindpats, ctx=None,
101 101 listsubrepos=False, badfn=None):
102 102 matchers = []
103 103 fms, kindpats = _expandsets(kindpats, ctx=ctx,
104 104 listsubrepos=listsubrepos, badfn=badfn)
105 105 if kindpats:
106 106 m = matchercls(root, kindpats, badfn=badfn)
107 107 matchers.append(m)
108 108 if fms:
109 109 matchers.extend(fms)
110 110 if not matchers:
111 111 return nevermatcher(badfn=badfn)
112 112 if len(matchers) == 1:
113 113 return matchers[0]
114 114 return unionmatcher(matchers)
115 115
116 116 def match(root, cwd, patterns=None, include=None, exclude=None, default='glob',
117 117 auditor=None, ctx=None, listsubrepos=False, warn=None,
118 118 badfn=None, icasefs=False):
119 119 """build an object to match a set of file patterns
120 120
121 121 arguments:
122 122 root - the canonical root of the tree you're matching against
123 123 cwd - the current working directory, if relevant
124 124 patterns - patterns to find
125 125 include - patterns to include (unless they are excluded)
126 126 exclude - patterns to exclude (even if they are included)
127 127 default - if a pattern in patterns has no explicit type, assume this one
128 128 warn - optional function used for printing warnings
129 129 badfn - optional bad() callback for this matcher instead of the default
130 130 icasefs - make a matcher for wdir on case insensitive filesystems, which
131 131 normalizes the given patterns to the case in the filesystem
132 132
133 133 a pattern is one of:
134 134 'glob:<glob>' - a glob relative to cwd
135 135 're:<regexp>' - a regular expression
136 136 'path:<path>' - a path relative to repository root, which is matched
137 137 recursively
138 138 'rootfilesin:<path>' - a path relative to repository root, which is
139 139 matched non-recursively (will not match subdirectories)
140 140 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
141 141 'relpath:<path>' - a path relative to cwd
142 142 'relre:<regexp>' - a regexp that needn't match the start of a name
143 143 'set:<fileset>' - a fileset expression
144 144 'include:<path>' - a file of patterns to read and include
145 145 'subinclude:<path>' - a file of patterns to match against files under
146 146 the same directory
147 147 '<something>' - a pattern of the specified default type
148 148 """
149 149 normalize = _donormalize
150 150 if icasefs:
151 151 dirstate = ctx.repo().dirstate
152 152 dsnormalize = dirstate.normalize
153 153
154 154 def normalize(patterns, default, root, cwd, auditor, warn):
155 155 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
156 156 kindpats = []
157 157 for kind, pats, source in kp:
158 158 if kind not in ('re', 'relre'): # regex can't be normalized
159 159 p = pats
160 160 pats = dsnormalize(pats)
161 161
162 162 # Preserve the original to handle a case only rename.
163 163 if p != pats and p in dirstate:
164 164 kindpats.append((kind, p, source))
165 165
166 166 kindpats.append((kind, pats, source))
167 167 return kindpats
168 168
169 169 if patterns:
170 170 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
171 171 if _kindpatsalwaysmatch(kindpats):
172 172 m = alwaysmatcher(badfn)
173 173 else:
174 174 m = _buildkindpatsmatcher(patternmatcher, root, kindpats, ctx=ctx,
175 175 listsubrepos=listsubrepos, badfn=badfn)
176 176 else:
177 177 # It's a little strange that no patterns means to match everything.
178 178 # Consider changing this to match nothing (probably using nevermatcher).
179 179 m = alwaysmatcher(badfn)
180 180
181 181 if include:
182 182 kindpats = normalize(include, 'glob', root, cwd, auditor, warn)
183 183 im = _buildkindpatsmatcher(includematcher, root, kindpats, ctx=ctx,
184 184 listsubrepos=listsubrepos, badfn=None)
185 185 m = intersectmatchers(m, im)
186 186 if exclude:
187 187 kindpats = normalize(exclude, 'glob', root, cwd, auditor, warn)
188 188 em = _buildkindpatsmatcher(includematcher, root, kindpats, ctx=ctx,
189 189 listsubrepos=listsubrepos, badfn=None)
190 190 m = differencematcher(m, em)
191 191 return m
192 192
193 193 def exact(files, badfn=None):
194 194 return exactmatcher(files, badfn=badfn)
195 195
196 196 def always(badfn=None):
197 197 return alwaysmatcher(badfn)
198 198
199 199 def never(badfn=None):
200 200 return nevermatcher(badfn)
201 201
202 202 def badmatch(match, badfn):
203 203 """Make a copy of the given matcher, replacing its bad method with the given
204 204 one.
205 205 """
206 206 m = copy.copy(match)
207 207 m.bad = badfn
208 208 return m
209 209
210 210 def _donormalize(patterns, default, root, cwd, auditor, warn):
211 211 '''Convert 'kind:pat' from the patterns list to tuples with kind and
212 212 normalized and rooted patterns and with listfiles expanded.'''
213 213 kindpats = []
214 214 for kind, pat in [_patsplit(p, default) for p in patterns]:
215 215 if kind in cwdrelativepatternkinds:
216 216 pat = pathutil.canonpath(root, cwd, pat, auditor)
217 217 elif kind in ('relglob', 'path', 'rootfilesin', 'rootglob'):
218 218 pat = util.normpath(pat)
219 219 elif kind in ('listfile', 'listfile0'):
220 220 try:
221 221 files = util.readfile(pat)
222 222 if kind == 'listfile0':
223 223 files = files.split('\0')
224 224 else:
225 225 files = files.splitlines()
226 226 files = [f for f in files if f]
227 227 except EnvironmentError:
228 228 raise error.Abort(_("unable to read file list (%s)") % pat)
229 229 for k, p, source in _donormalize(files, default, root, cwd,
230 230 auditor, warn):
231 231 kindpats.append((k, p, pat))
232 232 continue
233 233 elif kind == 'include':
234 234 try:
235 235 fullpath = os.path.join(root, util.localpath(pat))
236 236 includepats = readpatternfile(fullpath, warn)
237 237 for k, p, source in _donormalize(includepats, default,
238 238 root, cwd, auditor, warn):
239 239 kindpats.append((k, p, source or pat))
240 240 except error.Abort as inst:
241 241 raise error.Abort('%s: %s' % (pat, inst[0]))
242 242 except IOError as inst:
243 243 if warn:
244 244 warn(_("skipping unreadable pattern file '%s': %s\n") %
245 245 (pat, stringutil.forcebytestr(inst.strerror)))
246 246 continue
247 247 # else: re or relre - which cannot be normalized
248 248 kindpats.append((kind, pat, ''))
249 249 return kindpats
250 250
251 251 class basematcher(object):
252 252
253 253 def __init__(self, badfn=None):
254 254 if badfn is not None:
255 255 self.bad = badfn
256 256
257 257 def __call__(self, fn):
258 258 return self.matchfn(fn)
259 259 def __iter__(self):
260 260 for f in self._files:
261 261 yield f
262 262 # Callbacks related to how the matcher is used by dirstate.walk.
263 263 # Subscribers to these events must monkeypatch the matcher object.
264 264 def bad(self, f, msg):
265 265 '''Callback from dirstate.walk for each explicit file that can't be
266 266 found/accessed, with an error message.'''
267 267
268 268 # If an explicitdir is set, it will be called when an explicitly listed
269 269 # directory is visited.
270 270 explicitdir = None
271 271
272 272 # If an traversedir is set, it will be called when a directory discovered
273 273 # by recursive traversal is visited.
274 274 traversedir = None
275 275
276 276 @propertycache
277 277 def _files(self):
278 278 return []
279 279
280 280 def files(self):
281 281 '''Explicitly listed files or patterns or roots:
282 282 if no patterns or .always(): empty list,
283 283 if exact: list exact files,
284 284 if not .anypats(): list all files and dirs,
285 285 else: optimal roots'''
286 286 return self._files
287 287
288 288 @propertycache
289 289 def _fileset(self):
290 290 return set(self._files)
291 291
292 292 def exact(self, f):
293 293 '''Returns True if f is in .files().'''
294 294 return f in self._fileset
295 295
296 296 def matchfn(self, f):
297 297 return False
298 298
299 299 def visitdir(self, dir):
300 300 '''Decides whether a directory should be visited based on whether it
301 301 has potential matches in it or one of its subdirectories. This is
302 302 based on the match's primary, included, and excluded patterns.
303 303
304 304 Returns the string 'all' if the given directory and all subdirectories
305 305 should be visited. Otherwise returns True or False indicating whether
306 306 the given directory should be visited.
307 307 '''
308 308 return True
309 309
310 310 def visitchildrenset(self, dir):
311 311 '''Decides whether a directory should be visited based on whether it
312 312 has potential matches in it or one of its subdirectories, and
313 313 potentially lists which subdirectories of that directory should be
314 314 visited. This is based on the match's primary, included, and excluded
315 315 patterns.
316 316
317 317 This function is very similar to 'visitdir', and the following mapping
318 318 can be applied:
319 319
320 320 visitdir | visitchildrenlist
321 321 ----------+-------------------
322 322 False | set()
323 323 'all' | 'all'
324 324 True | 'this' OR non-empty set of subdirs -or files- to visit
325 325
326 326 Example:
327 327 Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return
328 328 the following values (assuming the implementation of visitchildrenset
329 329 is capable of recognizing this; some implementations are not).
330 330
331 331 '.' -> {'foo', 'qux'}
332 332 'baz' -> set()
333 333 'foo' -> {'bar'}
334 334 # Ideally this would be 'all', but since the prefix nature of matchers
335 335 # is applied to the entire matcher, we have to downgrade this to
336 336 # 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed
337 337 # in.
338 338 'foo/bar' -> 'this'
339 339 'qux' -> 'this'
340 340
341 341 Important:
342 342 Most matchers do not know if they're representing files or
343 343 directories. They see ['path:dir/f'] and don't know whether 'f' is a
344 344 file or a directory, so visitchildrenset('dir') for most matchers will
345 345 return {'f'}, but if the matcher knows it's a file (like exactmatcher
346 346 does), it may return 'this'. Do not rely on the return being a set
347 347 indicating that there are no files in this dir to investigate (or
348 348 equivalently that if there are files to investigate in 'dir' that it
349 349 will always return 'this').
350 350 '''
351 351 return 'this'
352 352
353 353 def always(self):
354 354 '''Matcher will match everything and .files() will be empty --
355 355 optimization might be possible.'''
356 356 return False
357 357
358 358 def isexact(self):
359 359 '''Matcher will match exactly the list of files in .files() --
360 360 optimization might be possible.'''
361 361 return False
362 362
363 363 def prefix(self):
364 364 '''Matcher will match the paths in .files() recursively --
365 365 optimization might be possible.'''
366 366 return False
367 367
368 368 def anypats(self):
369 369 '''None of .always(), .isexact(), and .prefix() is true --
370 370 optimizations will be difficult.'''
371 371 return not self.always() and not self.isexact() and not self.prefix()
372 372
373 373 class alwaysmatcher(basematcher):
374 374 '''Matches everything.'''
375 375
376 376 def __init__(self, badfn=None):
377 377 super(alwaysmatcher, self).__init__(badfn)
378 378
379 379 def always(self):
380 380 return True
381 381
382 382 def matchfn(self, f):
383 383 return True
384 384
385 385 def visitdir(self, dir):
386 386 return 'all'
387 387
388 388 def visitchildrenset(self, dir):
389 389 return 'all'
390 390
391 391 def __repr__(self):
392 392 return r'<alwaysmatcher>'
393 393
394 394 class nevermatcher(basematcher):
395 395 '''Matches nothing.'''
396 396
397 397 def __init__(self, badfn=None):
398 398 super(nevermatcher, self).__init__(badfn)
399 399
400 400 # It's a little weird to say that the nevermatcher is an exact matcher
401 401 # or a prefix matcher, but it seems to make sense to let callers take
402 402 # fast paths based on either. There will be no exact matches, nor any
403 403 # prefixes (files() returns []), so fast paths iterating over them should
404 404 # be efficient (and correct).
405 405 def isexact(self):
406 406 return True
407 407
408 408 def prefix(self):
409 409 return True
410 410
411 411 def visitdir(self, dir):
412 412 return False
413 413
414 414 def visitchildrenset(self, dir):
415 415 return set()
416 416
417 417 def __repr__(self):
418 418 return r'<nevermatcher>'
419 419
420 420 class predicatematcher(basematcher):
421 421 """A matcher adapter for a simple boolean function"""
422 422
423 423 def __init__(self, predfn, predrepr=None, badfn=None):
424 424 super(predicatematcher, self).__init__(badfn)
425 425 self.matchfn = predfn
426 426 self._predrepr = predrepr
427 427
428 428 @encoding.strmethod
429 429 def __repr__(self):
430 430 s = (stringutil.buildrepr(self._predrepr)
431 431 or pycompat.byterepr(self.matchfn))
432 432 return '<predicatenmatcher pred=%s>' % s
433 433
434 434 class patternmatcher(basematcher):
435 435 """Matches a set of (kind, pat, source) against a 'root' directory.
436 436
437 437 >>> kindpats = [
438 438 ... ('re', '.*\.c$', ''),
439 439 ... ('path', 'foo/a', ''),
440 440 ... ('relpath', 'b', ''),
441 441 ... ('glob', '*.h', ''),
442 442 ... ]
443 443 >>> m = patternmatcher('foo', kindpats)
444 444 >>> bool(m('main.c')) # matches re:.*\.c$
445 445 True
446 446 >>> bool(m('b.txt'))
447 447 False
448 448 >>> bool(m('foo/a')) # matches path:foo/a
449 449 True
450 450 >>> bool(m('a')) # does not match path:b, since 'root' is 'foo'
451 451 False
452 452 >>> bool(m('b')) # matches relpath:b, since 'root' is 'foo'
453 453 True
454 454 >>> bool(m('lib.h')) # matches glob:*.h
455 455 True
456 456
457 457 >>> m.files()
458 458 ['.', 'foo/a', 'b', '.']
459 459 >>> m.exact('foo/a')
460 460 True
461 461 >>> m.exact('b')
462 462 True
463 463 >>> m.exact('lib.h') # exact matches are for (rel)path kinds
464 464 False
465 465 """
466 466
467 467 def __init__(self, root, kindpats, badfn=None):
468 468 super(patternmatcher, self).__init__(badfn)
469 469
470 470 self._files = _explicitfiles(kindpats)
471 471 self._prefix = _prefix(kindpats)
472 472 self._pats, self.matchfn = _buildmatch(kindpats, '$', root)
473 473
474 474 @propertycache
475 475 def _dirs(self):
476 476 return set(util.dirs(self._fileset)) | {'.'}
477 477
478 478 def visitdir(self, dir):
479 479 if self._prefix and dir in self._fileset:
480 480 return 'all'
481 481 return ('.' in self._fileset or
482 482 dir in self._fileset or
483 483 dir in self._dirs or
484 484 any(parentdir in self._fileset
485 485 for parentdir in util.finddirs(dir)))
486 486
487 487 def visitchildrenset(self, dir):
488 488 ret = self.visitdir(dir)
489 489 if ret is True:
490 490 return 'this'
491 491 elif not ret:
492 492 return set()
493 493 assert ret == 'all'
494 494 return 'all'
495 495
496 496 def prefix(self):
497 497 return self._prefix
498 498
499 499 @encoding.strmethod
500 500 def __repr__(self):
501 501 return ('<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats))
502 502
503 503 # This is basically a reimplementation of util.dirs that stores the children
504 504 # instead of just a count of them, plus a small optional optimization to avoid
505 505 # some directories we don't need.
506 506 class _dirchildren(object):
507 507 def __init__(self, paths, onlyinclude=None):
508 508 self._dirs = {}
509 509 self._onlyinclude = onlyinclude or []
510 510 addpath = self.addpath
511 511 for f in paths:
512 512 addpath(f)
513 513
514 514 def addpath(self, path):
515 515 if path == '.':
516 516 return
517 517 dirs = self._dirs
518 518 findsplitdirs = _dirchildren._findsplitdirs
519 519 for d, b in findsplitdirs(path):
520 520 if d not in self._onlyinclude:
521 521 continue
522 522 dirs.setdefault(d, set()).add(b)
523 523
524 524 @staticmethod
525 525 def _findsplitdirs(path):
526 526 # yields (dirname, basename) tuples, walking back to the root. This is
527 527 # very similar to util.finddirs, except:
528 528 # - produces a (dirname, basename) tuple, not just 'dirname'
529 529 # - includes root dir
530 530 # Unlike manifest._splittopdir, this does not suffix `dirname` with a
531 531 # slash, and produces '.' for the root instead of ''.
532 532 oldpos = len(path)
533 533 pos = path.rfind('/')
534 534 while pos != -1:
535 535 yield path[:pos], path[pos + 1:oldpos]
536 536 oldpos = pos
537 537 pos = path.rfind('/', 0, pos)
538 538 yield '.', path[:oldpos]
539 539
540 540 def get(self, path):
541 541 return self._dirs.get(path, set())
542 542
543 543 class includematcher(basematcher):
544 544
545 545 def __init__(self, root, kindpats, badfn=None):
546 546 super(includematcher, self).__init__(badfn)
547 547
548 548 self._pats, self.matchfn = _buildmatch(kindpats, '(?:/|$)', root)
549 549 self._prefix = _prefix(kindpats)
550 550 roots, dirs, parents = _rootsdirsandparents(kindpats)
551 551 # roots are directories which are recursively included.
552 552 self._roots = set(roots)
553 553 # dirs are directories which are non-recursively included.
554 554 self._dirs = set(dirs)
555 555 # parents are directories which are non-recursively included because
556 556 # they are needed to get to items in _dirs or _roots.
557 557 self._parents = set(parents)
558 558
559 559 def visitdir(self, dir):
560 560 if self._prefix and dir in self._roots:
561 561 return 'all'
562 562 return ('.' in self._roots or
563 563 dir in self._roots or
564 564 dir in self._dirs or
565 565 dir in self._parents or
566 566 any(parentdir in self._roots
567 567 for parentdir in util.finddirs(dir)))
568 568
569 569 @propertycache
570 570 def _allparentschildren(self):
571 571 # It may seem odd that we add dirs, roots, and parents, and then
572 572 # restrict to only parents. This is to catch the case of:
573 573 # dirs = ['foo/bar']
574 574 # parents = ['foo']
575 575 # if we asked for the children of 'foo', but had only added
576 576 # self._parents, we wouldn't be able to respond ['bar'].
577 577 return _dirchildren(
578 578 itertools.chain(self._dirs, self._roots, self._parents),
579 579 onlyinclude=self._parents)
580 580
581 581 def visitchildrenset(self, dir):
582 582 if self._prefix and dir in self._roots:
583 583 return 'all'
584 584 # Note: this does *not* include the 'dir in self._parents' case from
585 585 # visitdir, that's handled below.
586 586 if ('.' in self._roots or
587 587 dir in self._roots or
588 588 dir in self._dirs or
589 589 any(parentdir in self._roots
590 590 for parentdir in util.finddirs(dir))):
591 591 return 'this'
592 592
593 593 if dir in self._parents:
594 594 return self._allparentschildren.get(dir) or set()
595 595 return set()
596 596
597 597 @encoding.strmethod
598 598 def __repr__(self):
599 599 return ('<includematcher includes=%r>' % pycompat.bytestr(self._pats))
600 600
601 601 class exactmatcher(basematcher):
602 602 r'''Matches the input files exactly. They are interpreted as paths, not
603 603 patterns (so no kind-prefixes).
604 604
605 605 >>> m = exactmatcher(['a.txt', 're:.*\.c$'])
606 606 >>> m('a.txt')
607 607 True
608 608 >>> m('b.txt')
609 609 False
610 610
611 611 Input files that would be matched are exactly those returned by .files()
612 612 >>> m.files()
613 613 ['a.txt', 're:.*\\.c$']
614 614
615 615 So pattern 're:.*\.c$' is not considered as a regex, but as a file name
616 616 >>> m('main.c')
617 617 False
618 618 >>> m('re:.*\.c$')
619 619 True
620 620 '''
621 621
622 622 def __init__(self, files, badfn=None):
623 623 super(exactmatcher, self).__init__(badfn)
624 624
625 625 if isinstance(files, list):
626 626 self._files = files
627 627 else:
628 628 self._files = list(files)
629 629
630 630 matchfn = basematcher.exact
631 631
632 632 @propertycache
633 633 def _dirs(self):
634 634 return set(util.dirs(self._fileset)) | {'.'}
635 635
636 636 def visitdir(self, dir):
637 637 return dir in self._dirs
638 638
639 639 def visitchildrenset(self, dir):
640 640 if not self._fileset or dir not in self._dirs:
641 641 return set()
642 642
643 643 candidates = self._fileset | self._dirs - {'.'}
644 644 if dir != '.':
645 645 d = dir + '/'
646 646 candidates = set(c[len(d):] for c in candidates if
647 647 c.startswith(d))
648 648 # self._dirs includes all of the directories, recursively, so if
649 649 # we're attempting to match foo/bar/baz.txt, it'll have '.', 'foo',
650 650 # 'foo/bar' in it. Thus we can safely ignore a candidate that has a
651 651 # '/' in it, indicating a it's for a subdir-of-a-subdir; the
652 652 # immediate subdir will be in there without a slash.
653 653 ret = {c for c in candidates if '/' not in c}
654 654 # We really do not expect ret to be empty, since that would imply that
655 655 # there's something in _dirs that didn't have a file in _fileset.
656 656 assert ret
657 657 return ret
658 658
659 659 def isexact(self):
660 660 return True
661 661
662 662 @encoding.strmethod
663 663 def __repr__(self):
664 664 return ('<exactmatcher files=%r>' % self._files)
665 665
666 666 class differencematcher(basematcher):
667 667 '''Composes two matchers by matching if the first matches and the second
668 668 does not.
669 669
670 670 The second matcher's non-matching-attributes (bad, explicitdir,
671 671 traversedir) are ignored.
672 672 '''
673 673 def __init__(self, m1, m2):
674 674 super(differencematcher, self).__init__()
675 675 self._m1 = m1
676 676 self._m2 = m2
677 677 self.bad = m1.bad
678 678 self.explicitdir = m1.explicitdir
679 679 self.traversedir = m1.traversedir
680 680
681 681 def matchfn(self, f):
682 682 return self._m1(f) and not self._m2(f)
683 683
684 684 @propertycache
685 685 def _files(self):
686 686 if self.isexact():
687 687 return [f for f in self._m1.files() if self(f)]
688 688 # If m1 is not an exact matcher, we can't easily figure out the set of
689 689 # files, because its files() are not always files. For example, if
690 690 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
691 691 # want to remove "dir" from the set even though it would match m2,
692 692 # because the "dir" in m1 may not be a file.
693 693 return self._m1.files()
694 694
695 695 def visitdir(self, dir):
696 696 if self._m2.visitdir(dir) == 'all':
697 697 return False
698 698 elif not self._m2.visitdir(dir):
699 699 # m2 does not match dir, we can return 'all' here if possible
700 700 return self._m1.visitdir(dir)
701 701 return bool(self._m1.visitdir(dir))
702 702
703 703 def visitchildrenset(self, dir):
704 704 m2_set = self._m2.visitchildrenset(dir)
705 705 if m2_set == 'all':
706 706 return set()
707 707 m1_set = self._m1.visitchildrenset(dir)
708 708 # Possible values for m1: 'all', 'this', set(...), set()
709 709 # Possible values for m2: 'this', set(...), set()
710 710 # If m2 has nothing under here that we care about, return m1, even if
711 711 # it's 'all'. This is a change in behavior from visitdir, which would
712 712 # return True, not 'all', for some reason.
713 713 if not m2_set:
714 714 return m1_set
715 715 if m1_set in ['all', 'this']:
716 716 # Never return 'all' here if m2_set is any kind of non-empty (either
717 717 # 'this' or set(foo)), since m2 might return set() for a
718 718 # subdirectory.
719 719 return 'this'
720 720 # Possible values for m1: set(...), set()
721 721 # Possible values for m2: 'this', set(...)
722 722 # We ignore m2's set results. They're possibly incorrect:
723 723 # m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset('.'):
724 724 # m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd
725 725 # return set(), which is *not* correct, we still need to visit 'dir'!
726 726 return m1_set
727 727
728 728 def isexact(self):
729 729 return self._m1.isexact()
730 730
731 731 @encoding.strmethod
732 732 def __repr__(self):
733 733 return ('<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2))
734 734
735 735 def intersectmatchers(m1, m2):
736 736 '''Composes two matchers by matching if both of them match.
737 737
738 738 The second matcher's non-matching-attributes (bad, explicitdir,
739 739 traversedir) are ignored.
740 740 '''
741 741 if m1 is None or m2 is None:
742 742 return m1 or m2
743 743 if m1.always():
744 744 m = copy.copy(m2)
745 745 # TODO: Consider encapsulating these things in a class so there's only
746 746 # one thing to copy from m1.
747 747 m.bad = m1.bad
748 748 m.explicitdir = m1.explicitdir
749 749 m.traversedir = m1.traversedir
750 750 return m
751 751 if m2.always():
752 752 m = copy.copy(m1)
753 753 return m
754 754 return intersectionmatcher(m1, m2)
755 755
756 756 class intersectionmatcher(basematcher):
757 757 def __init__(self, m1, m2):
758 758 super(intersectionmatcher, self).__init__()
759 759 self._m1 = m1
760 760 self._m2 = m2
761 761 self.bad = m1.bad
762 762 self.explicitdir = m1.explicitdir
763 763 self.traversedir = m1.traversedir
764 764
765 765 @propertycache
766 766 def _files(self):
767 767 if self.isexact():
768 768 m1, m2 = self._m1, self._m2
769 769 if not m1.isexact():
770 770 m1, m2 = m2, m1
771 771 return [f for f in m1.files() if m2(f)]
772 772 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
773 773 # the set of files, because their files() are not always files. For
774 774 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
775 775 # "path:dir2", we don't want to remove "dir2" from the set.
776 776 return self._m1.files() + self._m2.files()
777 777
778 778 def matchfn(self, f):
779 779 return self._m1(f) and self._m2(f)
780 780
781 781 def visitdir(self, dir):
782 782 visit1 = self._m1.visitdir(dir)
783 783 if visit1 == 'all':
784 784 return self._m2.visitdir(dir)
785 785 # bool() because visit1=True + visit2='all' should not be 'all'
786 786 return bool(visit1 and self._m2.visitdir(dir))
787 787
788 788 def visitchildrenset(self, dir):
789 789 m1_set = self._m1.visitchildrenset(dir)
790 790 if not m1_set:
791 791 return set()
792 792 m2_set = self._m2.visitchildrenset(dir)
793 793 if not m2_set:
794 794 return set()
795 795
796 796 if m1_set == 'all':
797 797 return m2_set
798 798 elif m2_set == 'all':
799 799 return m1_set
800 800
801 801 if m1_set == 'this' or m2_set == 'this':
802 802 return 'this'
803 803
804 804 assert isinstance(m1_set, set) and isinstance(m2_set, set)
805 805 return m1_set.intersection(m2_set)
806 806
807 807 def always(self):
808 808 return self._m1.always() and self._m2.always()
809 809
810 810 def isexact(self):
811 811 return self._m1.isexact() or self._m2.isexact()
812 812
813 813 @encoding.strmethod
814 814 def __repr__(self):
815 815 return ('<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2))
816 816
817 817 class subdirmatcher(basematcher):
818 818 """Adapt a matcher to work on a subdirectory only.
819 819
820 820 The paths are remapped to remove/insert the path as needed:
821 821
822 822 >>> from . import pycompat
823 823 >>> m1 = match(b'root', b'', [b'a.txt', b'sub/b.txt'])
824 824 >>> m2 = subdirmatcher(b'sub', m1)
825 825 >>> bool(m2(b'a.txt'))
826 826 False
827 827 >>> bool(m2(b'b.txt'))
828 828 True
829 829 >>> bool(m2.matchfn(b'a.txt'))
830 830 False
831 831 >>> bool(m2.matchfn(b'b.txt'))
832 832 True
833 833 >>> m2.files()
834 834 ['b.txt']
835 835 >>> m2.exact(b'b.txt')
836 836 True
837 837 >>> def bad(f, msg):
838 838 ... print(pycompat.sysstr(b"%s: %s" % (f, msg)))
839 839 >>> m1.bad = bad
840 840 >>> m2.bad(b'x.txt', b'No such file')
841 841 sub/x.txt: No such file
842 842 """
843 843
844 844 def __init__(self, path, matcher):
845 845 super(subdirmatcher, self).__init__()
846 846 self._path = path
847 847 self._matcher = matcher
848 848 self._always = matcher.always()
849 849
850 850 self._files = [f[len(path) + 1:] for f in matcher._files
851 851 if f.startswith(path + "/")]
852 852
853 853 # If the parent repo had a path to this subrepo and the matcher is
854 854 # a prefix matcher, this submatcher always matches.
855 855 if matcher.prefix():
856 856 self._always = any(f == path for f in matcher._files)
857 857
858 858 def bad(self, f, msg):
859 859 self._matcher.bad(self._path + "/" + f, msg)
860 860
861 861 def matchfn(self, f):
862 862 # Some information is lost in the superclass's constructor, so we
863 863 # can not accurately create the matching function for the subdirectory
864 864 # from the inputs. Instead, we override matchfn() and visitdir() to
865 865 # call the original matcher with the subdirectory path prepended.
866 866 return self._matcher.matchfn(self._path + "/" + f)
867 867
868 868 def visitdir(self, dir):
869 869 if dir == '.':
870 870 dir = self._path
871 871 else:
872 872 dir = self._path + "/" + dir
873 873 return self._matcher.visitdir(dir)
874 874
875 875 def visitchildrenset(self, dir):
876 876 if dir == '.':
877 877 dir = self._path
878 878 else:
879 879 dir = self._path + "/" + dir
880 880 return self._matcher.visitchildrenset(dir)
881 881
882 882 def always(self):
883 883 return self._always
884 884
885 885 def prefix(self):
886 886 return self._matcher.prefix() and not self._always
887 887
888 888 @encoding.strmethod
889 889 def __repr__(self):
890 890 return ('<subdirmatcher path=%r, matcher=%r>' %
891 891 (self._path, self._matcher))
892 892
893 893 class prefixdirmatcher(basematcher):
894 894 """Adapt a matcher to work on a parent directory.
895 895
896 896 The matcher's non-matching-attributes (bad, explicitdir, traversedir) are
897 897 ignored.
898 898
899 899 The prefix path should usually be the relative path from the root of
900 900 this matcher to the root of the wrapped matcher.
901 901
902 902 >>> m1 = match(util.localpath(b'root/d/e'), b'f', [b'../a.txt', b'b.txt'])
903 903 >>> m2 = prefixdirmatcher(b'd/e', m1)
904 904 >>> bool(m2(b'a.txt'),)
905 905 False
906 906 >>> bool(m2(b'd/e/a.txt'))
907 907 True
908 908 >>> bool(m2(b'd/e/b.txt'))
909 909 False
910 910 >>> m2.files()
911 911 ['d/e/a.txt', 'd/e/f/b.txt']
912 912 >>> m2.exact(b'd/e/a.txt')
913 913 True
914 914 >>> m2.visitdir(b'd')
915 915 True
916 916 >>> m2.visitdir(b'd/e')
917 917 True
918 918 >>> m2.visitdir(b'd/e/f')
919 919 True
920 920 >>> m2.visitdir(b'd/e/g')
921 921 False
922 922 >>> m2.visitdir(b'd/ef')
923 923 False
924 924 """
925 925
926 926 def __init__(self, path, matcher, badfn=None):
927 927 super(prefixdirmatcher, self).__init__(badfn)
928 928 if not path:
929 929 raise error.ProgrammingError('prefix path must not be empty')
930 930 self._path = path
931 931 self._pathprefix = path + '/'
932 932 self._matcher = matcher
933 933
934 934 @propertycache
935 935 def _files(self):
936 936 return [self._pathprefix + f for f in self._matcher._files]
937 937
938 938 def matchfn(self, f):
939 939 if not f.startswith(self._pathprefix):
940 940 return False
941 941 return self._matcher.matchfn(f[len(self._pathprefix):])
942 942
943 943 @propertycache
944 944 def _pathdirs(self):
945 945 return set(util.finddirs(self._path)) | {'.'}
946 946
947 947 def visitdir(self, dir):
948 948 if dir == self._path:
949 949 return self._matcher.visitdir('.')
950 950 if dir.startswith(self._pathprefix):
951 951 return self._matcher.visitdir(dir[len(self._pathprefix):])
952 952 return dir in self._pathdirs
953 953
954 954 def visitchildrenset(self, dir):
955 955 if dir == self._path:
956 956 return self._matcher.visitchildrenset('.')
957 957 if dir.startswith(self._pathprefix):
958 958 return self._matcher.visitchildrenset(dir[len(self._pathprefix):])
959 959 if dir in self._pathdirs:
960 960 return 'this'
961 961 return set()
962 962
963 963 def isexact(self):
964 964 return self._matcher.isexact()
965 965
966 966 def prefix(self):
967 967 return self._matcher.prefix()
968 968
969 969 @encoding.strmethod
970 970 def __repr__(self):
971 971 return ('<prefixdirmatcher path=%r, matcher=%r>'
972 972 % (pycompat.bytestr(self._path), self._matcher))
973 973
974 974 class unionmatcher(basematcher):
975 975 """A matcher that is the union of several matchers.
976 976
977 977 The non-matching-attributes (bad, explicitdir, traversedir) are taken from
978 978 the first matcher.
979 979 """
980 980
981 981 def __init__(self, matchers):
982 982 m1 = matchers[0]
983 983 super(unionmatcher, self).__init__()
984 984 self.explicitdir = m1.explicitdir
985 985 self.traversedir = m1.traversedir
986 986 self._matchers = matchers
987 987
988 988 def matchfn(self, f):
989 989 for match in self._matchers:
990 990 if match(f):
991 991 return True
992 992 return False
993 993
994 994 def visitdir(self, dir):
995 995 r = False
996 996 for m in self._matchers:
997 997 v = m.visitdir(dir)
998 998 if v == 'all':
999 999 return v
1000 1000 r |= v
1001 1001 return r
1002 1002
1003 1003 def visitchildrenset(self, dir):
1004 1004 r = set()
1005 1005 this = False
1006 1006 for m in self._matchers:
1007 1007 v = m.visitchildrenset(dir)
1008 1008 if not v:
1009 1009 continue
1010 1010 if v == 'all':
1011 1011 return v
1012 1012 if this or v == 'this':
1013 1013 this = True
1014 1014 # don't break, we might have an 'all' in here.
1015 1015 continue
1016 1016 assert isinstance(v, set)
1017 1017 r = r.union(v)
1018 1018 if this:
1019 1019 return 'this'
1020 1020 return r
1021 1021
1022 1022 @encoding.strmethod
1023 1023 def __repr__(self):
1024 1024 return ('<unionmatcher matchers=%r>' % self._matchers)
1025 1025
1026 1026 def patkind(pattern, default=None):
1027 '''If pattern is 'kind:pat' with a known kind, return kind.'''
1027 '''If pattern is 'kind:pat' with a known kind, return kind.
1028
1029 >>> patkind('re:.*\.c$')
1030 're'
1031 >>> patkind('glob:*.c')
1032 'glob'
1033 >>> patkind('relpath:test.py')
1034 'relpath'
1035 >>> patkind('main.py')
1036 >>> patkind('main.py', default='re')
1037 're'
1038 '''
1028 1039 return _patsplit(pattern, default)[0]
1029 1040
1030 1041 def _patsplit(pattern, default):
1031 1042 """Split a string into the optional pattern kind prefix and the actual
1032 1043 pattern."""
1033 1044 if ':' in pattern:
1034 1045 kind, pat = pattern.split(':', 1)
1035 1046 if kind in allpatternkinds:
1036 1047 return kind, pat
1037 1048 return default, pattern
1038 1049
1039 1050 def _globre(pat):
1040 1051 r'''Convert an extended glob string to a regexp string.
1041 1052
1042 1053 >>> from . import pycompat
1043 1054 >>> def bprint(s):
1044 1055 ... print(pycompat.sysstr(s))
1045 1056 >>> bprint(_globre(br'?'))
1046 1057 .
1047 1058 >>> bprint(_globre(br'*'))
1048 1059 [^/]*
1049 1060 >>> bprint(_globre(br'**'))
1050 1061 .*
1051 1062 >>> bprint(_globre(br'**/a'))
1052 1063 (?:.*/)?a
1053 1064 >>> bprint(_globre(br'a/**/b'))
1054 1065 a/(?:.*/)?b
1055 1066 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
1056 1067 [a*?!^][\^b][^c]
1057 1068 >>> bprint(_globre(br'{a,b}'))
1058 1069 (?:a|b)
1059 1070 >>> bprint(_globre(br'.\*\?'))
1060 1071 \.\*\?
1061 1072 '''
1062 1073 i, n = 0, len(pat)
1063 1074 res = ''
1064 1075 group = 0
1065 1076 escape = util.stringutil.regexbytesescapemap.get
1066 1077 def peek():
1067 1078 return i < n and pat[i:i + 1]
1068 1079 while i < n:
1069 1080 c = pat[i:i + 1]
1070 1081 i += 1
1071 1082 if c not in '*?[{},\\':
1072 1083 res += escape(c, c)
1073 1084 elif c == '*':
1074 1085 if peek() == '*':
1075 1086 i += 1
1076 1087 if peek() == '/':
1077 1088 i += 1
1078 1089 res += '(?:.*/)?'
1079 1090 else:
1080 1091 res += '.*'
1081 1092 else:
1082 1093 res += '[^/]*'
1083 1094 elif c == '?':
1084 1095 res += '.'
1085 1096 elif c == '[':
1086 1097 j = i
1087 1098 if j < n and pat[j:j + 1] in '!]':
1088 1099 j += 1
1089 1100 while j < n and pat[j:j + 1] != ']':
1090 1101 j += 1
1091 1102 if j >= n:
1092 1103 res += '\\['
1093 1104 else:
1094 1105 stuff = pat[i:j].replace('\\','\\\\')
1095 1106 i = j + 1
1096 1107 if stuff[0:1] == '!':
1097 1108 stuff = '^' + stuff[1:]
1098 1109 elif stuff[0:1] == '^':
1099 1110 stuff = '\\' + stuff
1100 1111 res = '%s[%s]' % (res, stuff)
1101 1112 elif c == '{':
1102 1113 group += 1
1103 1114 res += '(?:'
1104 1115 elif c == '}' and group:
1105 1116 res += ')'
1106 1117 group -= 1
1107 1118 elif c == ',' and group:
1108 1119 res += '|'
1109 1120 elif c == '\\':
1110 1121 p = peek()
1111 1122 if p:
1112 1123 i += 1
1113 1124 res += escape(p, p)
1114 1125 else:
1115 1126 res += escape(c, c)
1116 1127 else:
1117 1128 res += escape(c, c)
1118 1129 return res
1119 1130
1120 1131 def _regex(kind, pat, globsuffix):
1121 1132 '''Convert a (normalized) pattern of any kind into a regular expression.
1122 1133 globsuffix is appended to the regexp of globs.'''
1123 1134 if not pat:
1124 1135 return ''
1125 1136 if kind == 're':
1126 1137 return pat
1127 1138 if kind in ('path', 'relpath'):
1128 1139 if pat == '.':
1129 1140 return ''
1130 1141 return util.stringutil.reescape(pat) + '(?:/|$)'
1131 1142 if kind == 'rootfilesin':
1132 1143 if pat == '.':
1133 1144 escaped = ''
1134 1145 else:
1135 1146 # Pattern is a directory name.
1136 1147 escaped = util.stringutil.reescape(pat) + '/'
1137 1148 # Anything after the pattern must be a non-directory.
1138 1149 return escaped + '[^/]+$'
1139 1150 if kind == 'relglob':
1140 1151 return '(?:|.*/)' + _globre(pat) + globsuffix
1141 1152 if kind == 'relre':
1142 1153 if pat.startswith('^'):
1143 1154 return pat
1144 1155 return '.*' + pat
1145 1156 if kind in ('glob', 'rootglob'):
1146 1157 return _globre(pat) + globsuffix
1147 1158 raise error.ProgrammingError('not a regex pattern: %s:%s' % (kind, pat))
1148 1159
1149 1160 def _buildmatch(kindpats, globsuffix, root):
1150 1161 '''Return regexp string and a matcher function for kindpats.
1151 1162 globsuffix is appended to the regexp of globs.'''
1152 1163 matchfuncs = []
1153 1164
1154 1165 subincludes, kindpats = _expandsubinclude(kindpats, root)
1155 1166 if subincludes:
1156 1167 submatchers = {}
1157 1168 def matchsubinclude(f):
1158 1169 for prefix, matcherargs in subincludes:
1159 1170 if f.startswith(prefix):
1160 1171 mf = submatchers.get(prefix)
1161 1172 if mf is None:
1162 1173 mf = match(*matcherargs)
1163 1174 submatchers[prefix] = mf
1164 1175
1165 1176 if mf(f[len(prefix):]):
1166 1177 return True
1167 1178 return False
1168 1179 matchfuncs.append(matchsubinclude)
1169 1180
1170 1181 regex = ''
1171 1182 if kindpats:
1172 1183 if all(k == 'rootfilesin' for k, p, s in kindpats):
1173 1184 dirs = {p for k, p, s in kindpats}
1174 1185 def mf(f):
1175 1186 i = f.rfind('/')
1176 1187 if i >= 0:
1177 1188 dir = f[:i]
1178 1189 else:
1179 1190 dir = '.'
1180 1191 return dir in dirs
1181 1192 regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))
1182 1193 matchfuncs.append(mf)
1183 1194 else:
1184 1195 regex, mf = _buildregexmatch(kindpats, globsuffix)
1185 1196 matchfuncs.append(mf)
1186 1197
1187 1198 if len(matchfuncs) == 1:
1188 1199 return regex, matchfuncs[0]
1189 1200 else:
1190 1201 return regex, lambda f: any(mf(f) for mf in matchfuncs)
1191 1202
1192 1203 MAX_RE_SIZE = 20000
1193 1204
1194 1205 def _joinregexes(regexps):
1195 1206 """gather multiple regular expressions into a single one"""
1196 1207 return '|'.join(regexps)
1197 1208
1198 1209 def _buildregexmatch(kindpats, globsuffix):
1199 1210 """Build a match function from a list of kinds and kindpats,
1200 1211 return regexp string and a matcher function.
1201 1212
1202 1213 Test too large input
1203 1214 >>> _buildregexmatch([
1204 1215 ... (b'relglob', b'?' * MAX_RE_SIZE, b'')
1205 1216 ... ], b'$')
1206 1217 Traceback (most recent call last):
1207 1218 ...
1208 1219 Abort: matcher pattern is too long (20009 bytes)
1209 1220 """
1210 1221 try:
1211 1222 allgroups = []
1212 1223 regexps = [_regex(k, p, globsuffix) for (k, p, s) in kindpats]
1213 1224 fullregexp = _joinregexes(regexps)
1214 1225
1215 1226 startidx = 0
1216 1227 groupsize = 0
1217 1228 for idx, r in enumerate(regexps):
1218 1229 piecesize = len(r)
1219 1230 if piecesize > MAX_RE_SIZE:
1220 1231 msg = _("matcher pattern is too long (%d bytes)") % piecesize
1221 1232 raise error.Abort(msg)
1222 1233 elif (groupsize + piecesize) > MAX_RE_SIZE:
1223 1234 group = regexps[startidx:idx]
1224 1235 allgroups.append(_joinregexes(group))
1225 1236 startidx = idx
1226 1237 groupsize = 0
1227 1238 groupsize += piecesize + 1
1228 1239
1229 1240 if startidx == 0:
1230 1241 func = _rematcher(fullregexp)
1231 1242 else:
1232 1243 group = regexps[startidx:]
1233 1244 allgroups.append(_joinregexes(group))
1234 1245 allmatchers = [_rematcher(g) for g in allgroups]
1235 1246 func = lambda s: any(m(s) for m in allmatchers)
1236 1247 return fullregexp, func
1237 1248 except re.error:
1238 1249 for k, p, s in kindpats:
1239 1250 try:
1240 1251 _rematcher(_regex(k, p, globsuffix))
1241 1252 except re.error:
1242 1253 if s:
1243 1254 raise error.Abort(_("%s: invalid pattern (%s): %s") %
1244 1255 (s, k, p))
1245 1256 else:
1246 1257 raise error.Abort(_("invalid pattern (%s): %s") % (k, p))
1247 1258 raise error.Abort(_("invalid pattern"))
1248 1259
1249 1260 def _patternrootsanddirs(kindpats):
1250 1261 '''Returns roots and directories corresponding to each pattern.
1251 1262
1252 1263 This calculates the roots and directories exactly matching the patterns and
1253 1264 returns a tuple of (roots, dirs) for each. It does not return other
1254 1265 directories which may also need to be considered, like the parent
1255 1266 directories.
1256 1267 '''
1257 1268 r = []
1258 1269 d = []
1259 1270 for kind, pat, source in kindpats:
1260 1271 if kind in ('glob', 'rootglob'): # find the non-glob prefix
1261 1272 root = []
1262 1273 for p in pat.split('/'):
1263 1274 if '[' in p or '{' in p or '*' in p or '?' in p:
1264 1275 break
1265 1276 root.append(p)
1266 1277 r.append('/'.join(root) or '.')
1267 1278 elif kind in ('relpath', 'path'):
1268 1279 r.append(pat or '.')
1269 1280 elif kind in ('rootfilesin',):
1270 1281 d.append(pat or '.')
1271 1282 else: # relglob, re, relre
1272 1283 r.append('.')
1273 1284 return r, d
1274 1285
1275 1286 def _roots(kindpats):
1276 1287 '''Returns root directories to match recursively from the given patterns.'''
1277 1288 roots, dirs = _patternrootsanddirs(kindpats)
1278 1289 return roots
1279 1290
1280 1291 def _rootsdirsandparents(kindpats):
1281 1292 '''Returns roots and exact directories from patterns.
1282 1293
1283 1294 `roots` are directories to match recursively, `dirs` should
1284 1295 be matched non-recursively, and `parents` are the implicitly required
1285 1296 directories to walk to items in either roots or dirs.
1286 1297
1287 1298 Returns a tuple of (roots, dirs, parents).
1288 1299
1289 1300 >>> _rootsdirsandparents(
1290 1301 ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
1291 1302 ... (b'glob', b'g*', b'')])
1292 1303 (['g/h', 'g/h', '.'], [], ['g', '.'])
1293 1304 >>> _rootsdirsandparents(
1294 1305 ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
1295 1306 ([], ['g/h', '.'], ['g', '.'])
1296 1307 >>> _rootsdirsandparents(
1297 1308 ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
1298 1309 ... (b'path', b'', b'')])
1299 1310 (['r', 'p/p', '.'], [], ['p', '.'])
1300 1311 >>> _rootsdirsandparents(
1301 1312 ... [(b'relglob', b'rg*', b''), (b're', b're/', b''),
1302 1313 ... (b'relre', b'rr', b'')])
1303 1314 (['.', '.', '.'], [], ['.'])
1304 1315 '''
1305 1316 r, d = _patternrootsanddirs(kindpats)
1306 1317
1307 1318 p = []
1308 1319 # Append the parents as non-recursive/exact directories, since they must be
1309 1320 # scanned to get to either the roots or the other exact directories.
1310 1321 p.extend(util.dirs(d))
1311 1322 p.extend(util.dirs(r))
1312 1323 # util.dirs() does not include the root directory, so add it manually
1313 1324 p.append('.')
1314 1325
1315 1326 # FIXME: all uses of this function convert these to sets, do so before
1316 1327 # returning.
1317 1328 # FIXME: all uses of this function do not need anything in 'roots' and
1318 1329 # 'dirs' to also be in 'parents', consider removing them before returning.
1319 1330 return r, d, p
1320 1331
1321 1332 def _explicitfiles(kindpats):
1322 1333 '''Returns the potential explicit filenames from the patterns.
1323 1334
1324 1335 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
1325 1336 ['foo/bar']
1326 1337 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
1327 1338 []
1328 1339 '''
1329 1340 # Keep only the pattern kinds where one can specify filenames (vs only
1330 1341 # directory names).
1331 1342 filable = [kp for kp in kindpats if kp[0] not in ('rootfilesin',)]
1332 1343 return _roots(filable)
1333 1344
1334 1345 def _prefix(kindpats):
1335 1346 '''Whether all the patterns match a prefix (i.e. recursively)'''
1336 1347 for kind, pat, source in kindpats:
1337 1348 if kind not in ('path', 'relpath'):
1338 1349 return False
1339 1350 return True
1340 1351
1341 1352 _commentre = None
1342 1353
1343 1354 def readpatternfile(filepath, warn, sourceinfo=False):
1344 1355 '''parse a pattern file, returning a list of
1345 1356 patterns. These patterns should be given to compile()
1346 1357 to be validated and converted into a match function.
1347 1358
1348 1359 trailing white space is dropped.
1349 1360 the escape character is backslash.
1350 1361 comments start with #.
1351 1362 empty lines are skipped.
1352 1363
1353 1364 lines can be of the following formats:
1354 1365
1355 1366 syntax: regexp # defaults following lines to non-rooted regexps
1356 1367 syntax: glob # defaults following lines to non-rooted globs
1357 1368 re:pattern # non-rooted regular expression
1358 1369 glob:pattern # non-rooted glob
1359 1370 rootglob:pat # rooted glob (same root as ^ in regexps)
1360 1371 pattern # pattern of the current default type
1361 1372
1362 1373 if sourceinfo is set, returns a list of tuples:
1363 1374 (pattern, lineno, originalline). This is useful to debug ignore patterns.
1364 1375 '''
1365 1376
1366 1377 syntaxes = {
1367 1378 're': 'relre:',
1368 1379 'regexp': 'relre:',
1369 1380 'glob': 'relglob:',
1370 1381 'rootglob': 'rootglob:',
1371 1382 'include': 'include',
1372 1383 'subinclude': 'subinclude',
1373 1384 }
1374 1385 syntax = 'relre:'
1375 1386 patterns = []
1376 1387
1377 1388 fp = open(filepath, 'rb')
1378 1389 for lineno, line in enumerate(util.iterfile(fp), start=1):
1379 1390 if "#" in line:
1380 1391 global _commentre
1381 1392 if not _commentre:
1382 1393 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
1383 1394 # remove comments prefixed by an even number of escapes
1384 1395 m = _commentre.search(line)
1385 1396 if m:
1386 1397 line = line[:m.end(1)]
1387 1398 # fixup properly escaped comments that survived the above
1388 1399 line = line.replace("\\#", "#")
1389 1400 line = line.rstrip()
1390 1401 if not line:
1391 1402 continue
1392 1403
1393 1404 if line.startswith('syntax:'):
1394 1405 s = line[7:].strip()
1395 1406 try:
1396 1407 syntax = syntaxes[s]
1397 1408 except KeyError:
1398 1409 if warn:
1399 1410 warn(_("%s: ignoring invalid syntax '%s'\n") %
1400 1411 (filepath, s))
1401 1412 continue
1402 1413
1403 1414 linesyntax = syntax
1404 1415 for s, rels in syntaxes.iteritems():
1405 1416 if line.startswith(rels):
1406 1417 linesyntax = rels
1407 1418 line = line[len(rels):]
1408 1419 break
1409 1420 elif line.startswith(s+':'):
1410 1421 linesyntax = rels
1411 1422 line = line[len(s) + 1:]
1412 1423 break
1413 1424 if sourceinfo:
1414 1425 patterns.append((linesyntax + line, lineno, line))
1415 1426 else:
1416 1427 patterns.append(linesyntax + line)
1417 1428 fp.close()
1418 1429 return patterns
General Comments 0
You need to be logged in to leave comments. Login now