##// END OF EJS Templates
match: delete unused argument "listsubrepos" from _buildmatch()...
Martin von Zweigbergk -
r41818:a1326852 default
parent child Browse files
Show More
@@ -1,1377 +1,1374
1 1 # match.py - filename matching
2 2 #
3 3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import, print_function
9 9
10 10 import copy
11 11 import itertools
12 12 import os
13 13 import re
14 14
15 15 from .i18n import _
16 16 from . import (
17 17 encoding,
18 18 error,
19 19 pathutil,
20 20 pycompat,
21 21 util,
22 22 )
23 23 from .utils import (
24 24 stringutil,
25 25 )
26 26
27 27 allpatternkinds = ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
28 28 'rootglob',
29 29 'listfile', 'listfile0', 'set', 'include', 'subinclude',
30 30 'rootfilesin')
31 31 cwdrelativepatternkinds = ('relpath', 'glob')
32 32
33 33 propertycache = util.propertycache
34 34
35 35 def _rematcher(regex):
36 36 '''compile the regexp with the best available regexp engine and return a
37 37 matcher function'''
38 38 m = util.re.compile(regex)
39 39 try:
40 40 # slightly faster, provided by facebook's re2 bindings
41 41 return m.test_match
42 42 except AttributeError:
43 43 return m.match
44 44
45 45 def _expandsets(root, cwd, kindpats, ctx, listsubrepos, badfn):
46 46 '''Returns the kindpats list with the 'set' patterns expanded to matchers'''
47 47 matchers = []
48 48 other = []
49 49
50 50 for kind, pat, source in kindpats:
51 51 if kind == 'set':
52 52 if ctx is None:
53 53 raise error.ProgrammingError("fileset expression with no "
54 54 "context")
55 55 matchers.append(ctx.matchfileset(pat, badfn=badfn))
56 56
57 57 if listsubrepos:
58 58 for subpath in ctx.substate:
59 59 sm = ctx.sub(subpath).matchfileset(pat, badfn=badfn)
60 60 pm = prefixdirmatcher(root, cwd, subpath, sm, badfn=badfn)
61 61 matchers.append(pm)
62 62
63 63 continue
64 64 other.append((kind, pat, source))
65 65 return matchers, other
66 66
67 67 def _expandsubinclude(kindpats, root):
68 68 '''Returns the list of subinclude matcher args and the kindpats without the
69 69 subincludes in it.'''
70 70 relmatchers = []
71 71 other = []
72 72
73 73 for kind, pat, source in kindpats:
74 74 if kind == 'subinclude':
75 75 sourceroot = pathutil.dirname(util.normpath(source))
76 76 pat = util.pconvert(pat)
77 77 path = pathutil.join(sourceroot, pat)
78 78
79 79 newroot = pathutil.dirname(path)
80 80 matcherargs = (newroot, '', [], ['include:%s' % path])
81 81
82 82 prefix = pathutil.canonpath(root, root, newroot)
83 83 if prefix:
84 84 prefix += '/'
85 85 relmatchers.append((prefix, matcherargs))
86 86 else:
87 87 other.append((kind, pat, source))
88 88
89 89 return relmatchers, other
90 90
91 91 def _kindpatsalwaysmatch(kindpats):
92 92 """"Checks whether the kindspats match everything, as e.g.
93 93 'relpath:.' does.
94 94 """
95 95 for kind, pat, source in kindpats:
96 96 if pat != '' or kind not in ['relpath', 'glob']:
97 97 return False
98 98 return True
99 99
100 100 def _buildkindpatsmatcher(matchercls, root, cwd, kindpats, ctx=None,
101 101 listsubrepos=False, badfn=None):
102 102 matchers = []
103 103 fms, kindpats = _expandsets(root, cwd, kindpats, ctx=ctx,
104 104 listsubrepos=listsubrepos, badfn=badfn)
105 105 if kindpats:
106 m = matchercls(root, cwd, kindpats, listsubrepos=listsubrepos,
107 badfn=badfn)
106 m = matchercls(root, cwd, kindpats, badfn=badfn)
108 107 matchers.append(m)
109 108 if fms:
110 109 matchers.extend(fms)
111 110 if not matchers:
112 111 return nevermatcher(root, cwd, badfn=badfn)
113 112 if len(matchers) == 1:
114 113 return matchers[0]
115 114 return unionmatcher(matchers)
116 115
117 116 def match(root, cwd, patterns=None, include=None, exclude=None, default='glob',
118 117 auditor=None, ctx=None, listsubrepos=False, warn=None,
119 118 badfn=None, icasefs=False):
120 119 """build an object to match a set of file patterns
121 120
122 121 arguments:
123 122 root - the canonical root of the tree you're matching against
124 123 cwd - the current working directory, if relevant
125 124 patterns - patterns to find
126 125 include - patterns to include (unless they are excluded)
127 126 exclude - patterns to exclude (even if they are included)
128 127 default - if a pattern in patterns has no explicit type, assume this one
129 128 warn - optional function used for printing warnings
130 129 badfn - optional bad() callback for this matcher instead of the default
131 130 icasefs - make a matcher for wdir on case insensitive filesystems, which
132 131 normalizes the given patterns to the case in the filesystem
133 132
134 133 a pattern is one of:
135 134 'glob:<glob>' - a glob relative to cwd
136 135 're:<regexp>' - a regular expression
137 136 'path:<path>' - a path relative to repository root, which is matched
138 137 recursively
139 138 'rootfilesin:<path>' - a path relative to repository root, which is
140 139 matched non-recursively (will not match subdirectories)
141 140 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
142 141 'relpath:<path>' - a path relative to cwd
143 142 'relre:<regexp>' - a regexp that needn't match the start of a name
144 143 'set:<fileset>' - a fileset expression
145 144 'include:<path>' - a file of patterns to read and include
146 145 'subinclude:<path>' - a file of patterns to match against files under
147 146 the same directory
148 147 '<something>' - a pattern of the specified default type
149 148 """
150 149 normalize = _donormalize
151 150 if icasefs:
152 151 dirstate = ctx.repo().dirstate
153 152 dsnormalize = dirstate.normalize
154 153
155 154 def normalize(patterns, default, root, cwd, auditor, warn):
156 155 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
157 156 kindpats = []
158 157 for kind, pats, source in kp:
159 158 if kind not in ('re', 'relre'): # regex can't be normalized
160 159 p = pats
161 160 pats = dsnormalize(pats)
162 161
163 162 # Preserve the original to handle a case only rename.
164 163 if p != pats and p in dirstate:
165 164 kindpats.append((kind, p, source))
166 165
167 166 kindpats.append((kind, pats, source))
168 167 return kindpats
169 168
170 169 if patterns:
171 170 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
172 171 if _kindpatsalwaysmatch(kindpats):
173 172 m = alwaysmatcher(root, cwd, badfn)
174 173 else:
175 174 m = _buildkindpatsmatcher(patternmatcher, root, cwd, kindpats,
176 175 ctx=ctx, listsubrepos=listsubrepos,
177 176 badfn=badfn)
178 177 else:
179 178 # It's a little strange that no patterns means to match everything.
180 179 # Consider changing this to match nothing (probably using nevermatcher).
181 180 m = alwaysmatcher(root, cwd, badfn)
182 181
183 182 if include:
184 183 kindpats = normalize(include, 'glob', root, cwd, auditor, warn)
185 184 im = _buildkindpatsmatcher(includematcher, root, cwd, kindpats, ctx=ctx,
186 185 listsubrepos=listsubrepos, badfn=None)
187 186 m = intersectmatchers(m, im)
188 187 if exclude:
189 188 kindpats = normalize(exclude, 'glob', root, cwd, auditor, warn)
190 189 em = _buildkindpatsmatcher(includematcher, root, cwd, kindpats, ctx=ctx,
191 190 listsubrepos=listsubrepos, badfn=None)
192 191 m = differencematcher(m, em)
193 192 return m
194 193
195 194 def exact(root, cwd, files, badfn=None):
196 195 return exactmatcher(root, cwd, files, badfn=badfn)
197 196
198 197 def always(root, cwd):
199 198 return alwaysmatcher(root, cwd)
200 199
201 200 def never(root, cwd):
202 201 return nevermatcher(root, cwd)
203 202
204 203 def badmatch(match, badfn):
205 204 """Make a copy of the given matcher, replacing its bad method with the given
206 205 one.
207 206 """
208 207 m = copy.copy(match)
209 208 m.bad = badfn
210 209 return m
211 210
212 211 def _donormalize(patterns, default, root, cwd, auditor, warn):
213 212 '''Convert 'kind:pat' from the patterns list to tuples with kind and
214 213 normalized and rooted patterns and with listfiles expanded.'''
215 214 kindpats = []
216 215 for kind, pat in [_patsplit(p, default) for p in patterns]:
217 216 if kind in cwdrelativepatternkinds:
218 217 pat = pathutil.canonpath(root, cwd, pat, auditor)
219 218 elif kind in ('relglob', 'path', 'rootfilesin', 'rootglob'):
220 219 pat = util.normpath(pat)
221 220 elif kind in ('listfile', 'listfile0'):
222 221 try:
223 222 files = util.readfile(pat)
224 223 if kind == 'listfile0':
225 224 files = files.split('\0')
226 225 else:
227 226 files = files.splitlines()
228 227 files = [f for f in files if f]
229 228 except EnvironmentError:
230 229 raise error.Abort(_("unable to read file list (%s)") % pat)
231 230 for k, p, source in _donormalize(files, default, root, cwd,
232 231 auditor, warn):
233 232 kindpats.append((k, p, pat))
234 233 continue
235 234 elif kind == 'include':
236 235 try:
237 236 fullpath = os.path.join(root, util.localpath(pat))
238 237 includepats = readpatternfile(fullpath, warn)
239 238 for k, p, source in _donormalize(includepats, default,
240 239 root, cwd, auditor, warn):
241 240 kindpats.append((k, p, source or pat))
242 241 except error.Abort as inst:
243 242 raise error.Abort('%s: %s' % (pat, inst[0]))
244 243 except IOError as inst:
245 244 if warn:
246 245 warn(_("skipping unreadable pattern file '%s': %s\n") %
247 246 (pat, stringutil.forcebytestr(inst.strerror)))
248 247 continue
249 248 # else: re or relre - which cannot be normalized
250 249 kindpats.append((kind, pat, ''))
251 250 return kindpats
252 251
253 252 class basematcher(object):
254 253
255 254 def __init__(self, root, cwd, badfn=None):
256 255 self._root = root
257 256 self._cwd = cwd
258 257 if badfn is not None:
259 258 self.bad = badfn
260 259
261 260 def __call__(self, fn):
262 261 return self.matchfn(fn)
263 262 def __iter__(self):
264 263 for f in self._files:
265 264 yield f
266 265 # Callbacks related to how the matcher is used by dirstate.walk.
267 266 # Subscribers to these events must monkeypatch the matcher object.
268 267 def bad(self, f, msg):
269 268 '''Callback from dirstate.walk for each explicit file that can't be
270 269 found/accessed, with an error message.'''
271 270
272 271 # If an explicitdir is set, it will be called when an explicitly listed
273 272 # directory is visited.
274 273 explicitdir = None
275 274
276 275 # If an traversedir is set, it will be called when a directory discovered
277 276 # by recursive traversal is visited.
278 277 traversedir = None
279 278
280 279 @propertycache
281 280 def _files(self):
282 281 return []
283 282
284 283 def files(self):
285 284 '''Explicitly listed files or patterns or roots:
286 285 if no patterns or .always(): empty list,
287 286 if exact: list exact files,
288 287 if not .anypats(): list all files and dirs,
289 288 else: optimal roots'''
290 289 return self._files
291 290
292 291 @propertycache
293 292 def _fileset(self):
294 293 return set(self._files)
295 294
296 295 def exact(self, f):
297 296 '''Returns True if f is in .files().'''
298 297 return f in self._fileset
299 298
300 299 def matchfn(self, f):
301 300 return False
302 301
303 302 def visitdir(self, dir):
304 303 '''Decides whether a directory should be visited based on whether it
305 304 has potential matches in it or one of its subdirectories. This is
306 305 based on the match's primary, included, and excluded patterns.
307 306
308 307 Returns the string 'all' if the given directory and all subdirectories
309 308 should be visited. Otherwise returns True or False indicating whether
310 309 the given directory should be visited.
311 310 '''
312 311 return True
313 312
314 313 def visitchildrenset(self, dir):
315 314 '''Decides whether a directory should be visited based on whether it
316 315 has potential matches in it or one of its subdirectories, and
317 316 potentially lists which subdirectories of that directory should be
318 317 visited. This is based on the match's primary, included, and excluded
319 318 patterns.
320 319
321 320 This function is very similar to 'visitdir', and the following mapping
322 321 can be applied:
323 322
324 323 visitdir | visitchildrenlist
325 324 ----------+-------------------
326 325 False | set()
327 326 'all' | 'all'
328 327 True | 'this' OR non-empty set of subdirs -or files- to visit
329 328
330 329 Example:
331 330 Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return
332 331 the following values (assuming the implementation of visitchildrenset
333 332 is capable of recognizing this; some implementations are not).
334 333
335 334 '.' -> {'foo', 'qux'}
336 335 'baz' -> set()
337 336 'foo' -> {'bar'}
338 337 # Ideally this would be 'all', but since the prefix nature of matchers
339 338 # is applied to the entire matcher, we have to downgrade this to
340 339 # 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed
341 340 # in.
342 341 'foo/bar' -> 'this'
343 342 'qux' -> 'this'
344 343
345 344 Important:
346 345 Most matchers do not know if they're representing files or
347 346 directories. They see ['path:dir/f'] and don't know whether 'f' is a
348 347 file or a directory, so visitchildrenset('dir') for most matchers will
349 348 return {'f'}, but if the matcher knows it's a file (like exactmatcher
350 349 does), it may return 'this'. Do not rely on the return being a set
351 350 indicating that there are no files in this dir to investigate (or
352 351 equivalently that if there are files to investigate in 'dir' that it
353 352 will always return 'this').
354 353 '''
355 354 return 'this'
356 355
357 356 def always(self):
358 357 '''Matcher will match everything and .files() will be empty --
359 358 optimization might be possible.'''
360 359 return False
361 360
362 361 def isexact(self):
363 362 '''Matcher will match exactly the list of files in .files() --
364 363 optimization might be possible.'''
365 364 return False
366 365
367 366 def prefix(self):
368 367 '''Matcher will match the paths in .files() recursively --
369 368 optimization might be possible.'''
370 369 return False
371 370
372 371 def anypats(self):
373 372 '''None of .always(), .isexact(), and .prefix() is true --
374 373 optimizations will be difficult.'''
375 374 return not self.always() and not self.isexact() and not self.prefix()
376 375
377 376 class alwaysmatcher(basematcher):
378 377 '''Matches everything.'''
379 378
380 379 def __init__(self, root, cwd, badfn=None):
381 380 super(alwaysmatcher, self).__init__(root, cwd, badfn)
382 381
383 382 def always(self):
384 383 return True
385 384
386 385 def matchfn(self, f):
387 386 return True
388 387
389 388 def visitdir(self, dir):
390 389 return 'all'
391 390
392 391 def visitchildrenset(self, dir):
393 392 return 'all'
394 393
395 394 def __repr__(self):
396 395 return r'<alwaysmatcher>'
397 396
398 397 class nevermatcher(basematcher):
399 398 '''Matches nothing.'''
400 399
401 400 def __init__(self, root, cwd, badfn=None):
402 401 super(nevermatcher, self).__init__(root, cwd, badfn)
403 402
404 403 # It's a little weird to say that the nevermatcher is an exact matcher
405 404 # or a prefix matcher, but it seems to make sense to let callers take
406 405 # fast paths based on either. There will be no exact matches, nor any
407 406 # prefixes (files() returns []), so fast paths iterating over them should
408 407 # be efficient (and correct).
409 408 def isexact(self):
410 409 return True
411 410
412 411 def prefix(self):
413 412 return True
414 413
415 414 def visitdir(self, dir):
416 415 return False
417 416
418 417 def visitchildrenset(self, dir):
419 418 return set()
420 419
421 420 def __repr__(self):
422 421 return r'<nevermatcher>'
423 422
424 423 class predicatematcher(basematcher):
425 424 """A matcher adapter for a simple boolean function"""
426 425
427 426 def __init__(self, root, cwd, predfn, predrepr=None, badfn=None):
428 427 super(predicatematcher, self).__init__(root, cwd, badfn)
429 428 self.matchfn = predfn
430 429 self._predrepr = predrepr
431 430
432 431 @encoding.strmethod
433 432 def __repr__(self):
434 433 s = (stringutil.buildrepr(self._predrepr)
435 434 or pycompat.byterepr(self.matchfn))
436 435 return '<predicatenmatcher pred=%s>' % s
437 436
438 437 class patternmatcher(basematcher):
439 438
440 def __init__(self, root, cwd, kindpats, listsubrepos=False, badfn=None):
439 def __init__(self, root, cwd, kindpats, badfn=None):
441 440 super(patternmatcher, self).__init__(root, cwd, badfn)
442 441
443 442 self._files = _explicitfiles(kindpats)
444 443 self._prefix = _prefix(kindpats)
445 self._pats, self.matchfn = _buildmatch(kindpats, '$', listsubrepos,
446 root)
444 self._pats, self.matchfn = _buildmatch(kindpats, '$', root)
447 445
448 446 @propertycache
449 447 def _dirs(self):
450 448 return set(util.dirs(self._fileset)) | {'.'}
451 449
452 450 def visitdir(self, dir):
453 451 if self._prefix and dir in self._fileset:
454 452 return 'all'
455 453 return ('.' in self._fileset or
456 454 dir in self._fileset or
457 455 dir in self._dirs or
458 456 any(parentdir in self._fileset
459 457 for parentdir in util.finddirs(dir)))
460 458
461 459 def visitchildrenset(self, dir):
462 460 ret = self.visitdir(dir)
463 461 if ret is True:
464 462 return 'this'
465 463 elif not ret:
466 464 return set()
467 465 assert ret == 'all'
468 466 return 'all'
469 467
470 468 def prefix(self):
471 469 return self._prefix
472 470
473 471 @encoding.strmethod
474 472 def __repr__(self):
475 473 return ('<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats))
476 474
477 475 # This is basically a reimplementation of util.dirs that stores the children
478 476 # instead of just a count of them, plus a small optional optimization to avoid
479 477 # some directories we don't need.
480 478 class _dirchildren(object):
481 479 def __init__(self, paths, onlyinclude=None):
482 480 self._dirs = {}
483 481 self._onlyinclude = onlyinclude or []
484 482 addpath = self.addpath
485 483 for f in paths:
486 484 addpath(f)
487 485
488 486 def addpath(self, path):
489 487 if path == '.':
490 488 return
491 489 dirs = self._dirs
492 490 findsplitdirs = _dirchildren._findsplitdirs
493 491 for d, b in findsplitdirs(path):
494 492 if d not in self._onlyinclude:
495 493 continue
496 494 dirs.setdefault(d, set()).add(b)
497 495
498 496 @staticmethod
499 497 def _findsplitdirs(path):
500 498 # yields (dirname, basename) tuples, walking back to the root. This is
501 499 # very similar to util.finddirs, except:
502 500 # - produces a (dirname, basename) tuple, not just 'dirname'
503 501 # - includes root dir
504 502 # Unlike manifest._splittopdir, this does not suffix `dirname` with a
505 503 # slash, and produces '.' for the root instead of ''.
506 504 oldpos = len(path)
507 505 pos = path.rfind('/')
508 506 while pos != -1:
509 507 yield path[:pos], path[pos + 1:oldpos]
510 508 oldpos = pos
511 509 pos = path.rfind('/', 0, pos)
512 510 yield '.', path[:oldpos]
513 511
514 512 def get(self, path):
515 513 return self._dirs.get(path, set())
516 514
517 515 class includematcher(basematcher):
518 516
519 def __init__(self, root, cwd, kindpats, listsubrepos=False, badfn=None):
517 def __init__(self, root, cwd, kindpats, badfn=None):
520 518 super(includematcher, self).__init__(root, cwd, badfn)
521 519
522 self._pats, self.matchfn = _buildmatch(kindpats, '(?:/|$)',
523 listsubrepos, root)
520 self._pats, self.matchfn = _buildmatch(kindpats, '(?:/|$)', root)
524 521 self._prefix = _prefix(kindpats)
525 522 roots, dirs, parents = _rootsdirsandparents(kindpats)
526 523 # roots are directories which are recursively included.
527 524 self._roots = set(roots)
528 525 # dirs are directories which are non-recursively included.
529 526 self._dirs = set(dirs)
530 527 # parents are directories which are non-recursively included because
531 528 # they are needed to get to items in _dirs or _roots.
532 529 self._parents = set(parents)
533 530
534 531 def visitdir(self, dir):
535 532 if self._prefix and dir in self._roots:
536 533 return 'all'
537 534 return ('.' in self._roots or
538 535 dir in self._roots or
539 536 dir in self._dirs or
540 537 dir in self._parents or
541 538 any(parentdir in self._roots
542 539 for parentdir in util.finddirs(dir)))
543 540
544 541 @propertycache
545 542 def _allparentschildren(self):
546 543 # It may seem odd that we add dirs, roots, and parents, and then
547 544 # restrict to only parents. This is to catch the case of:
548 545 # dirs = ['foo/bar']
549 546 # parents = ['foo']
550 547 # if we asked for the children of 'foo', but had only added
551 548 # self._parents, we wouldn't be able to respond ['bar'].
552 549 return _dirchildren(
553 550 itertools.chain(self._dirs, self._roots, self._parents),
554 551 onlyinclude=self._parents)
555 552
556 553 def visitchildrenset(self, dir):
557 554 if self._prefix and dir in self._roots:
558 555 return 'all'
559 556 # Note: this does *not* include the 'dir in self._parents' case from
560 557 # visitdir, that's handled below.
561 558 if ('.' in self._roots or
562 559 dir in self._roots or
563 560 dir in self._dirs or
564 561 any(parentdir in self._roots
565 562 for parentdir in util.finddirs(dir))):
566 563 return 'this'
567 564
568 565 if dir in self._parents:
569 566 return self._allparentschildren.get(dir) or set()
570 567 return set()
571 568
572 569 @encoding.strmethod
573 570 def __repr__(self):
574 571 return ('<includematcher includes=%r>' % pycompat.bytestr(self._pats))
575 572
576 573 class exactmatcher(basematcher):
577 574 '''Matches the input files exactly. They are interpreted as paths, not
578 575 patterns (so no kind-prefixes).
579 576 '''
580 577
581 578 def __init__(self, root, cwd, files, badfn=None):
582 579 super(exactmatcher, self).__init__(root, cwd, badfn)
583 580
584 581 if isinstance(files, list):
585 582 self._files = files
586 583 else:
587 584 self._files = list(files)
588 585
589 586 matchfn = basematcher.exact
590 587
591 588 @propertycache
592 589 def _dirs(self):
593 590 return set(util.dirs(self._fileset)) | {'.'}
594 591
595 592 def visitdir(self, dir):
596 593 return dir in self._dirs
597 594
598 595 def visitchildrenset(self, dir):
599 596 if not self._fileset or dir not in self._dirs:
600 597 return set()
601 598
602 599 candidates = self._fileset | self._dirs - {'.'}
603 600 if dir != '.':
604 601 d = dir + '/'
605 602 candidates = set(c[len(d):] for c in candidates if
606 603 c.startswith(d))
607 604 # self._dirs includes all of the directories, recursively, so if
608 605 # we're attempting to match foo/bar/baz.txt, it'll have '.', 'foo',
609 606 # 'foo/bar' in it. Thus we can safely ignore a candidate that has a
610 607 # '/' in it, indicating a it's for a subdir-of-a-subdir; the
611 608 # immediate subdir will be in there without a slash.
612 609 ret = {c for c in candidates if '/' not in c}
613 610 # We really do not expect ret to be empty, since that would imply that
614 611 # there's something in _dirs that didn't have a file in _fileset.
615 612 assert ret
616 613 return ret
617 614
618 615 def isexact(self):
619 616 return True
620 617
621 618 @encoding.strmethod
622 619 def __repr__(self):
623 620 return ('<exactmatcher files=%r>' % self._files)
624 621
625 622 class differencematcher(basematcher):
626 623 '''Composes two matchers by matching if the first matches and the second
627 624 does not.
628 625
629 626 The second matcher's non-matching-attributes (root, cwd, bad, explicitdir,
630 627 traversedir) are ignored.
631 628 '''
632 629 def __init__(self, m1, m2):
633 630 super(differencematcher, self).__init__(m1._root, m1._cwd)
634 631 self._m1 = m1
635 632 self._m2 = m2
636 633 self.bad = m1.bad
637 634 self.explicitdir = m1.explicitdir
638 635 self.traversedir = m1.traversedir
639 636
640 637 def matchfn(self, f):
641 638 return self._m1(f) and not self._m2(f)
642 639
643 640 @propertycache
644 641 def _files(self):
645 642 if self.isexact():
646 643 return [f for f in self._m1.files() if self(f)]
647 644 # If m1 is not an exact matcher, we can't easily figure out the set of
648 645 # files, because its files() are not always files. For example, if
649 646 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
650 647 # want to remove "dir" from the set even though it would match m2,
651 648 # because the "dir" in m1 may not be a file.
652 649 return self._m1.files()
653 650
654 651 def visitdir(self, dir):
655 652 if self._m2.visitdir(dir) == 'all':
656 653 return False
657 654 elif not self._m2.visitdir(dir):
658 655 # m2 does not match dir, we can return 'all' here if possible
659 656 return self._m1.visitdir(dir)
660 657 return bool(self._m1.visitdir(dir))
661 658
662 659 def visitchildrenset(self, dir):
663 660 m2_set = self._m2.visitchildrenset(dir)
664 661 if m2_set == 'all':
665 662 return set()
666 663 m1_set = self._m1.visitchildrenset(dir)
667 664 # Possible values for m1: 'all', 'this', set(...), set()
668 665 # Possible values for m2: 'this', set(...), set()
669 666 # If m2 has nothing under here that we care about, return m1, even if
670 667 # it's 'all'. This is a change in behavior from visitdir, which would
671 668 # return True, not 'all', for some reason.
672 669 if not m2_set:
673 670 return m1_set
674 671 if m1_set in ['all', 'this']:
675 672 # Never return 'all' here if m2_set is any kind of non-empty (either
676 673 # 'this' or set(foo)), since m2 might return set() for a
677 674 # subdirectory.
678 675 return 'this'
679 676 # Possible values for m1: set(...), set()
680 677 # Possible values for m2: 'this', set(...)
681 678 # We ignore m2's set results. They're possibly incorrect:
682 679 # m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset('.'):
683 680 # m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd
684 681 # return set(), which is *not* correct, we still need to visit 'dir'!
685 682 return m1_set
686 683
687 684 def isexact(self):
688 685 return self._m1.isexact()
689 686
690 687 @encoding.strmethod
691 688 def __repr__(self):
692 689 return ('<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2))
693 690
694 691 def intersectmatchers(m1, m2):
695 692 '''Composes two matchers by matching if both of them match.
696 693
697 694 The second matcher's non-matching-attributes (root, cwd, bad, explicitdir,
698 695 traversedir) are ignored.
699 696 '''
700 697 if m1 is None or m2 is None:
701 698 return m1 or m2
702 699 if m1.always():
703 700 m = copy.copy(m2)
704 701 # TODO: Consider encapsulating these things in a class so there's only
705 702 # one thing to copy from m1.
706 703 m.bad = m1.bad
707 704 m.explicitdir = m1.explicitdir
708 705 m.traversedir = m1.traversedir
709 706 return m
710 707 if m2.always():
711 708 m = copy.copy(m1)
712 709 return m
713 710 return intersectionmatcher(m1, m2)
714 711
715 712 class intersectionmatcher(basematcher):
716 713 def __init__(self, m1, m2):
717 714 super(intersectionmatcher, self).__init__(m1._root, m1._cwd)
718 715 self._m1 = m1
719 716 self._m2 = m2
720 717 self.bad = m1.bad
721 718 self.explicitdir = m1.explicitdir
722 719 self.traversedir = m1.traversedir
723 720
724 721 @propertycache
725 722 def _files(self):
726 723 if self.isexact():
727 724 m1, m2 = self._m1, self._m2
728 725 if not m1.isexact():
729 726 m1, m2 = m2, m1
730 727 return [f for f in m1.files() if m2(f)]
731 728 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
732 729 # the set of files, because their files() are not always files. For
733 730 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
734 731 # "path:dir2", we don't want to remove "dir2" from the set.
735 732 return self._m1.files() + self._m2.files()
736 733
737 734 def matchfn(self, f):
738 735 return self._m1(f) and self._m2(f)
739 736
740 737 def visitdir(self, dir):
741 738 visit1 = self._m1.visitdir(dir)
742 739 if visit1 == 'all':
743 740 return self._m2.visitdir(dir)
744 741 # bool() because visit1=True + visit2='all' should not be 'all'
745 742 return bool(visit1 and self._m2.visitdir(dir))
746 743
747 744 def visitchildrenset(self, dir):
748 745 m1_set = self._m1.visitchildrenset(dir)
749 746 if not m1_set:
750 747 return set()
751 748 m2_set = self._m2.visitchildrenset(dir)
752 749 if not m2_set:
753 750 return set()
754 751
755 752 if m1_set == 'all':
756 753 return m2_set
757 754 elif m2_set == 'all':
758 755 return m1_set
759 756
760 757 if m1_set == 'this' or m2_set == 'this':
761 758 return 'this'
762 759
763 760 assert isinstance(m1_set, set) and isinstance(m2_set, set)
764 761 return m1_set.intersection(m2_set)
765 762
766 763 def always(self):
767 764 return self._m1.always() and self._m2.always()
768 765
769 766 def isexact(self):
770 767 return self._m1.isexact() or self._m2.isexact()
771 768
772 769 @encoding.strmethod
773 770 def __repr__(self):
774 771 return ('<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2))
775 772
776 773 class subdirmatcher(basematcher):
777 774 """Adapt a matcher to work on a subdirectory only.
778 775
779 776 The paths are remapped to remove/insert the path as needed:
780 777
781 778 >>> from . import pycompat
782 779 >>> m1 = match(b'root', b'', [b'a.txt', b'sub/b.txt'])
783 780 >>> m2 = subdirmatcher(b'sub', m1)
784 781 >>> bool(m2(b'a.txt'))
785 782 False
786 783 >>> bool(m2(b'b.txt'))
787 784 True
788 785 >>> bool(m2.matchfn(b'a.txt'))
789 786 False
790 787 >>> bool(m2.matchfn(b'b.txt'))
791 788 True
792 789 >>> m2.files()
793 790 ['b.txt']
794 791 >>> m2.exact(b'b.txt')
795 792 True
796 793 >>> def bad(f, msg):
797 794 ... print(pycompat.sysstr(b"%s: %s" % (f, msg)))
798 795 >>> m1.bad = bad
799 796 >>> m2.bad(b'x.txt', b'No such file')
800 797 sub/x.txt: No such file
801 798 """
802 799
803 800 def __init__(self, path, matcher):
804 801 super(subdirmatcher, self).__init__(matcher._root, matcher._cwd)
805 802 self._path = path
806 803 self._matcher = matcher
807 804 self._always = matcher.always()
808 805
809 806 self._files = [f[len(path) + 1:] for f in matcher._files
810 807 if f.startswith(path + "/")]
811 808
812 809 # If the parent repo had a path to this subrepo and the matcher is
813 810 # a prefix matcher, this submatcher always matches.
814 811 if matcher.prefix():
815 812 self._always = any(f == path for f in matcher._files)
816 813
817 814 def bad(self, f, msg):
818 815 self._matcher.bad(self._path + "/" + f, msg)
819 816
820 817 def matchfn(self, f):
821 818 # Some information is lost in the superclass's constructor, so we
822 819 # can not accurately create the matching function for the subdirectory
823 820 # from the inputs. Instead, we override matchfn() and visitdir() to
824 821 # call the original matcher with the subdirectory path prepended.
825 822 return self._matcher.matchfn(self._path + "/" + f)
826 823
827 824 def visitdir(self, dir):
828 825 if dir == '.':
829 826 dir = self._path
830 827 else:
831 828 dir = self._path + "/" + dir
832 829 return self._matcher.visitdir(dir)
833 830
834 831 def visitchildrenset(self, dir):
835 832 if dir == '.':
836 833 dir = self._path
837 834 else:
838 835 dir = self._path + "/" + dir
839 836 return self._matcher.visitchildrenset(dir)
840 837
841 838 def always(self):
842 839 return self._always
843 840
844 841 def prefix(self):
845 842 return self._matcher.prefix() and not self._always
846 843
847 844 @encoding.strmethod
848 845 def __repr__(self):
849 846 return ('<subdirmatcher path=%r, matcher=%r>' %
850 847 (self._path, self._matcher))
851 848
852 849 class prefixdirmatcher(basematcher):
853 850 """Adapt a matcher to work on a parent directory.
854 851
855 852 The matcher's non-matching-attributes (root, cwd, bad, explicitdir,
856 853 traversedir) are ignored.
857 854
858 855 The prefix path should usually be the relative path from the root of
859 856 this matcher to the root of the wrapped matcher.
860 857
861 858 >>> m1 = match(util.localpath(b'root/d/e'), b'f', [b'../a.txt', b'b.txt'])
862 859 >>> m2 = prefixdirmatcher(b'root', b'd/e/f', b'd/e', m1)
863 860 >>> bool(m2(b'a.txt'),)
864 861 False
865 862 >>> bool(m2(b'd/e/a.txt'))
866 863 True
867 864 >>> bool(m2(b'd/e/b.txt'))
868 865 False
869 866 >>> m2.files()
870 867 ['d/e/a.txt', 'd/e/f/b.txt']
871 868 >>> m2.exact(b'd/e/a.txt')
872 869 True
873 870 >>> m2.visitdir(b'd')
874 871 True
875 872 >>> m2.visitdir(b'd/e')
876 873 True
877 874 >>> m2.visitdir(b'd/e/f')
878 875 True
879 876 >>> m2.visitdir(b'd/e/g')
880 877 False
881 878 >>> m2.visitdir(b'd/ef')
882 879 False
883 880 """
884 881
885 882 def __init__(self, root, cwd, path, matcher, badfn=None):
886 883 super(prefixdirmatcher, self).__init__(root, cwd, badfn)
887 884 if not path:
888 885 raise error.ProgrammingError('prefix path must not be empty')
889 886 self._path = path
890 887 self._pathprefix = path + '/'
891 888 self._matcher = matcher
892 889
893 890 @propertycache
894 891 def _files(self):
895 892 return [self._pathprefix + f for f in self._matcher._files]
896 893
897 894 def matchfn(self, f):
898 895 if not f.startswith(self._pathprefix):
899 896 return False
900 897 return self._matcher.matchfn(f[len(self._pathprefix):])
901 898
902 899 @propertycache
903 900 def _pathdirs(self):
904 901 return set(util.finddirs(self._path)) | {'.'}
905 902
906 903 def visitdir(self, dir):
907 904 if dir == self._path:
908 905 return self._matcher.visitdir('.')
909 906 if dir.startswith(self._pathprefix):
910 907 return self._matcher.visitdir(dir[len(self._pathprefix):])
911 908 return dir in self._pathdirs
912 909
913 910 def visitchildrenset(self, dir):
914 911 if dir == self._path:
915 912 return self._matcher.visitchildrenset('.')
916 913 if dir.startswith(self._pathprefix):
917 914 return self._matcher.visitchildrenset(dir[len(self._pathprefix):])
918 915 if dir in self._pathdirs:
919 916 return 'this'
920 917 return set()
921 918
922 919 def isexact(self):
923 920 return self._matcher.isexact()
924 921
925 922 def prefix(self):
926 923 return self._matcher.prefix()
927 924
928 925 @encoding.strmethod
929 926 def __repr__(self):
930 927 return ('<prefixdirmatcher path=%r, matcher=%r>'
931 928 % (pycompat.bytestr(self._path), self._matcher))
932 929
933 930 class unionmatcher(basematcher):
934 931 """A matcher that is the union of several matchers.
935 932
936 933 The non-matching-attributes (root, cwd, bad, explicitdir, traversedir) are
937 934 taken from the first matcher.
938 935 """
939 936
940 937 def __init__(self, matchers):
941 938 m1 = matchers[0]
942 939 super(unionmatcher, self).__init__(m1._root, m1._cwd)
943 940 self.explicitdir = m1.explicitdir
944 941 self.traversedir = m1.traversedir
945 942 self._matchers = matchers
946 943
947 944 def matchfn(self, f):
948 945 for match in self._matchers:
949 946 if match(f):
950 947 return True
951 948 return False
952 949
953 950 def visitdir(self, dir):
954 951 r = False
955 952 for m in self._matchers:
956 953 v = m.visitdir(dir)
957 954 if v == 'all':
958 955 return v
959 956 r |= v
960 957 return r
961 958
962 959 def visitchildrenset(self, dir):
963 960 r = set()
964 961 this = False
965 962 for m in self._matchers:
966 963 v = m.visitchildrenset(dir)
967 964 if not v:
968 965 continue
969 966 if v == 'all':
970 967 return v
971 968 if this or v == 'this':
972 969 this = True
973 970 # don't break, we might have an 'all' in here.
974 971 continue
975 972 assert isinstance(v, set)
976 973 r = r.union(v)
977 974 if this:
978 975 return 'this'
979 976 return r
980 977
981 978 @encoding.strmethod
982 979 def __repr__(self):
983 980 return ('<unionmatcher matchers=%r>' % self._matchers)
984 981
985 982 def patkind(pattern, default=None):
986 983 '''If pattern is 'kind:pat' with a known kind, return kind.'''
987 984 return _patsplit(pattern, default)[0]
988 985
989 986 def _patsplit(pattern, default):
990 987 """Split a string into the optional pattern kind prefix and the actual
991 988 pattern."""
992 989 if ':' in pattern:
993 990 kind, pat = pattern.split(':', 1)
994 991 if kind in allpatternkinds:
995 992 return kind, pat
996 993 return default, pattern
997 994
998 995 def _globre(pat):
999 996 r'''Convert an extended glob string to a regexp string.
1000 997
1001 998 >>> from . import pycompat
1002 999 >>> def bprint(s):
1003 1000 ... print(pycompat.sysstr(s))
1004 1001 >>> bprint(_globre(br'?'))
1005 1002 .
1006 1003 >>> bprint(_globre(br'*'))
1007 1004 [^/]*
1008 1005 >>> bprint(_globre(br'**'))
1009 1006 .*
1010 1007 >>> bprint(_globre(br'**/a'))
1011 1008 (?:.*/)?a
1012 1009 >>> bprint(_globre(br'a/**/b'))
1013 1010 a/(?:.*/)?b
1014 1011 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
1015 1012 [a*?!^][\^b][^c]
1016 1013 >>> bprint(_globre(br'{a,b}'))
1017 1014 (?:a|b)
1018 1015 >>> bprint(_globre(br'.\*\?'))
1019 1016 \.\*\?
1020 1017 '''
1021 1018 i, n = 0, len(pat)
1022 1019 res = ''
1023 1020 group = 0
1024 1021 escape = util.stringutil.regexbytesescapemap.get
1025 1022 def peek():
1026 1023 return i < n and pat[i:i + 1]
1027 1024 while i < n:
1028 1025 c = pat[i:i + 1]
1029 1026 i += 1
1030 1027 if c not in '*?[{},\\':
1031 1028 res += escape(c, c)
1032 1029 elif c == '*':
1033 1030 if peek() == '*':
1034 1031 i += 1
1035 1032 if peek() == '/':
1036 1033 i += 1
1037 1034 res += '(?:.*/)?'
1038 1035 else:
1039 1036 res += '.*'
1040 1037 else:
1041 1038 res += '[^/]*'
1042 1039 elif c == '?':
1043 1040 res += '.'
1044 1041 elif c == '[':
1045 1042 j = i
1046 1043 if j < n and pat[j:j + 1] in '!]':
1047 1044 j += 1
1048 1045 while j < n and pat[j:j + 1] != ']':
1049 1046 j += 1
1050 1047 if j >= n:
1051 1048 res += '\\['
1052 1049 else:
1053 1050 stuff = pat[i:j].replace('\\','\\\\')
1054 1051 i = j + 1
1055 1052 if stuff[0:1] == '!':
1056 1053 stuff = '^' + stuff[1:]
1057 1054 elif stuff[0:1] == '^':
1058 1055 stuff = '\\' + stuff
1059 1056 res = '%s[%s]' % (res, stuff)
1060 1057 elif c == '{':
1061 1058 group += 1
1062 1059 res += '(?:'
1063 1060 elif c == '}' and group:
1064 1061 res += ')'
1065 1062 group -= 1
1066 1063 elif c == ',' and group:
1067 1064 res += '|'
1068 1065 elif c == '\\':
1069 1066 p = peek()
1070 1067 if p:
1071 1068 i += 1
1072 1069 res += escape(p, p)
1073 1070 else:
1074 1071 res += escape(c, c)
1075 1072 else:
1076 1073 res += escape(c, c)
1077 1074 return res
1078 1075
1079 1076 def _regex(kind, pat, globsuffix):
1080 1077 '''Convert a (normalized) pattern of any kind into a regular expression.
1081 1078 globsuffix is appended to the regexp of globs.'''
1082 1079 if not pat:
1083 1080 return ''
1084 1081 if kind == 're':
1085 1082 return pat
1086 1083 if kind in ('path', 'relpath'):
1087 1084 if pat == '.':
1088 1085 return ''
1089 1086 return util.stringutil.reescape(pat) + '(?:/|$)'
1090 1087 if kind == 'rootfilesin':
1091 1088 if pat == '.':
1092 1089 escaped = ''
1093 1090 else:
1094 1091 # Pattern is a directory name.
1095 1092 escaped = util.stringutil.reescape(pat) + '/'
1096 1093 # Anything after the pattern must be a non-directory.
1097 1094 return escaped + '[^/]+$'
1098 1095 if kind == 'relglob':
1099 1096 return '(?:|.*/)' + _globre(pat) + globsuffix
1100 1097 if kind == 'relre':
1101 1098 if pat.startswith('^'):
1102 1099 return pat
1103 1100 return '.*' + pat
1104 1101 if kind in ('glob', 'rootglob'):
1105 1102 return _globre(pat) + globsuffix
1106 1103 raise error.ProgrammingError('not a regex pattern: %s:%s' % (kind, pat))
1107 1104
1108 def _buildmatch(kindpats, globsuffix, listsubrepos, root):
1105 def _buildmatch(kindpats, globsuffix, root):
1109 1106 '''Return regexp string and a matcher function for kindpats.
1110 1107 globsuffix is appended to the regexp of globs.'''
1111 1108 matchfuncs = []
1112 1109
1113 1110 subincludes, kindpats = _expandsubinclude(kindpats, root)
1114 1111 if subincludes:
1115 1112 submatchers = {}
1116 1113 def matchsubinclude(f):
1117 1114 for prefix, matcherargs in subincludes:
1118 1115 if f.startswith(prefix):
1119 1116 mf = submatchers.get(prefix)
1120 1117 if mf is None:
1121 1118 mf = match(*matcherargs)
1122 1119 submatchers[prefix] = mf
1123 1120
1124 1121 if mf(f[len(prefix):]):
1125 1122 return True
1126 1123 return False
1127 1124 matchfuncs.append(matchsubinclude)
1128 1125
1129 1126 regex = ''
1130 1127 if kindpats:
1131 1128 if all(k == 'rootfilesin' for k, p, s in kindpats):
1132 1129 dirs = {p for k, p, s in kindpats}
1133 1130 def mf(f):
1134 1131 i = f.rfind('/')
1135 1132 if i >= 0:
1136 1133 dir = f[:i]
1137 1134 else:
1138 1135 dir = '.'
1139 1136 return dir in dirs
1140 1137 regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))
1141 1138 matchfuncs.append(mf)
1142 1139 else:
1143 1140 regex, mf = _buildregexmatch(kindpats, globsuffix)
1144 1141 matchfuncs.append(mf)
1145 1142
1146 1143 if len(matchfuncs) == 1:
1147 1144 return regex, matchfuncs[0]
1148 1145 else:
1149 1146 return regex, lambda f: any(mf(f) for mf in matchfuncs)
1150 1147
1151 1148 MAX_RE_SIZE = 20000
1152 1149
1153 1150 def _joinregexes(regexps):
1154 1151 """gather multiple regular expressions into a single one"""
1155 1152 return '|'.join(regexps)
1156 1153
1157 1154 def _buildregexmatch(kindpats, globsuffix):
1158 1155 """Build a match function from a list of kinds and kindpats,
1159 1156 return regexp string and a matcher function.
1160 1157
1161 1158 Test too large input
1162 1159 >>> _buildregexmatch([
1163 1160 ... (b'relglob', b'?' * MAX_RE_SIZE, b'')
1164 1161 ... ], b'$')
1165 1162 Traceback (most recent call last):
1166 1163 ...
1167 1164 Abort: matcher pattern is too long (20009 bytes)
1168 1165 """
1169 1166 try:
1170 1167 allgroups = []
1171 1168 regexps = [_regex(k, p, globsuffix) for (k, p, s) in kindpats]
1172 1169 fullregexp = _joinregexes(regexps)
1173 1170
1174 1171 startidx = 0
1175 1172 groupsize = 0
1176 1173 for idx, r in enumerate(regexps):
1177 1174 piecesize = len(r)
1178 1175 if piecesize > MAX_RE_SIZE:
1179 1176 msg = _("matcher pattern is too long (%d bytes)") % piecesize
1180 1177 raise error.Abort(msg)
1181 1178 elif (groupsize + piecesize) > MAX_RE_SIZE:
1182 1179 group = regexps[startidx:idx]
1183 1180 allgroups.append(_joinregexes(group))
1184 1181 startidx = idx
1185 1182 groupsize = 0
1186 1183 groupsize += piecesize + 1
1187 1184
1188 1185 if startidx == 0:
1189 1186 func = _rematcher(fullregexp)
1190 1187 else:
1191 1188 group = regexps[startidx:]
1192 1189 allgroups.append(_joinregexes(group))
1193 1190 allmatchers = [_rematcher(g) for g in allgroups]
1194 1191 func = lambda s: any(m(s) for m in allmatchers)
1195 1192 return fullregexp, func
1196 1193 except re.error:
1197 1194 for k, p, s in kindpats:
1198 1195 try:
1199 1196 _rematcher(_regex(k, p, globsuffix))
1200 1197 except re.error:
1201 1198 if s:
1202 1199 raise error.Abort(_("%s: invalid pattern (%s): %s") %
1203 1200 (s, k, p))
1204 1201 else:
1205 1202 raise error.Abort(_("invalid pattern (%s): %s") % (k, p))
1206 1203 raise error.Abort(_("invalid pattern"))
1207 1204
1208 1205 def _patternrootsanddirs(kindpats):
1209 1206 '''Returns roots and directories corresponding to each pattern.
1210 1207
1211 1208 This calculates the roots and directories exactly matching the patterns and
1212 1209 returns a tuple of (roots, dirs) for each. It does not return other
1213 1210 directories which may also need to be considered, like the parent
1214 1211 directories.
1215 1212 '''
1216 1213 r = []
1217 1214 d = []
1218 1215 for kind, pat, source in kindpats:
1219 1216 if kind in ('glob', 'rootglob'): # find the non-glob prefix
1220 1217 root = []
1221 1218 for p in pat.split('/'):
1222 1219 if '[' in p or '{' in p or '*' in p or '?' in p:
1223 1220 break
1224 1221 root.append(p)
1225 1222 r.append('/'.join(root) or '.')
1226 1223 elif kind in ('relpath', 'path'):
1227 1224 r.append(pat or '.')
1228 1225 elif kind in ('rootfilesin',):
1229 1226 d.append(pat or '.')
1230 1227 else: # relglob, re, relre
1231 1228 r.append('.')
1232 1229 return r, d
1233 1230
1234 1231 def _roots(kindpats):
1235 1232 '''Returns root directories to match recursively from the given patterns.'''
1236 1233 roots, dirs = _patternrootsanddirs(kindpats)
1237 1234 return roots
1238 1235
1239 1236 def _rootsdirsandparents(kindpats):
1240 1237 '''Returns roots and exact directories from patterns.
1241 1238
1242 1239 `roots` are directories to match recursively, `dirs` should
1243 1240 be matched non-recursively, and `parents` are the implicitly required
1244 1241 directories to walk to items in either roots or dirs.
1245 1242
1246 1243 Returns a tuple of (roots, dirs, parents).
1247 1244
1248 1245 >>> _rootsdirsandparents(
1249 1246 ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
1250 1247 ... (b'glob', b'g*', b'')])
1251 1248 (['g/h', 'g/h', '.'], [], ['g', '.'])
1252 1249 >>> _rootsdirsandparents(
1253 1250 ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
1254 1251 ([], ['g/h', '.'], ['g', '.'])
1255 1252 >>> _rootsdirsandparents(
1256 1253 ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
1257 1254 ... (b'path', b'', b'')])
1258 1255 (['r', 'p/p', '.'], [], ['p', '.'])
1259 1256 >>> _rootsdirsandparents(
1260 1257 ... [(b'relglob', b'rg*', b''), (b're', b're/', b''),
1261 1258 ... (b'relre', b'rr', b'')])
1262 1259 (['.', '.', '.'], [], ['.'])
1263 1260 '''
1264 1261 r, d = _patternrootsanddirs(kindpats)
1265 1262
1266 1263 p = []
1267 1264 # Append the parents as non-recursive/exact directories, since they must be
1268 1265 # scanned to get to either the roots or the other exact directories.
1269 1266 p.extend(util.dirs(d))
1270 1267 p.extend(util.dirs(r))
1271 1268 # util.dirs() does not include the root directory, so add it manually
1272 1269 p.append('.')
1273 1270
1274 1271 # FIXME: all uses of this function convert these to sets, do so before
1275 1272 # returning.
1276 1273 # FIXME: all uses of this function do not need anything in 'roots' and
1277 1274 # 'dirs' to also be in 'parents', consider removing them before returning.
1278 1275 return r, d, p
1279 1276
1280 1277 def _explicitfiles(kindpats):
1281 1278 '''Returns the potential explicit filenames from the patterns.
1282 1279
1283 1280 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
1284 1281 ['foo/bar']
1285 1282 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
1286 1283 []
1287 1284 '''
1288 1285 # Keep only the pattern kinds where one can specify filenames (vs only
1289 1286 # directory names).
1290 1287 filable = [kp for kp in kindpats if kp[0] not in ('rootfilesin',)]
1291 1288 return _roots(filable)
1292 1289
1293 1290 def _prefix(kindpats):
1294 1291 '''Whether all the patterns match a prefix (i.e. recursively)'''
1295 1292 for kind, pat, source in kindpats:
1296 1293 if kind not in ('path', 'relpath'):
1297 1294 return False
1298 1295 return True
1299 1296
1300 1297 _commentre = None
1301 1298
1302 1299 def readpatternfile(filepath, warn, sourceinfo=False):
1303 1300 '''parse a pattern file, returning a list of
1304 1301 patterns. These patterns should be given to compile()
1305 1302 to be validated and converted into a match function.
1306 1303
1307 1304 trailing white space is dropped.
1308 1305 the escape character is backslash.
1309 1306 comments start with #.
1310 1307 empty lines are skipped.
1311 1308
1312 1309 lines can be of the following formats:
1313 1310
1314 1311 syntax: regexp # defaults following lines to non-rooted regexps
1315 1312 syntax: glob # defaults following lines to non-rooted globs
1316 1313 re:pattern # non-rooted regular expression
1317 1314 glob:pattern # non-rooted glob
1318 1315 rootglob:pat # rooted glob (same root as ^ in regexps)
1319 1316 pattern # pattern of the current default type
1320 1317
1321 1318 if sourceinfo is set, returns a list of tuples:
1322 1319 (pattern, lineno, originalline). This is useful to debug ignore patterns.
1323 1320 '''
1324 1321
1325 1322 syntaxes = {
1326 1323 're': 'relre:',
1327 1324 'regexp': 'relre:',
1328 1325 'glob': 'relglob:',
1329 1326 'rootglob': 'rootglob:',
1330 1327 'include': 'include',
1331 1328 'subinclude': 'subinclude',
1332 1329 }
1333 1330 syntax = 'relre:'
1334 1331 patterns = []
1335 1332
1336 1333 fp = open(filepath, 'rb')
1337 1334 for lineno, line in enumerate(util.iterfile(fp), start=1):
1338 1335 if "#" in line:
1339 1336 global _commentre
1340 1337 if not _commentre:
1341 1338 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
1342 1339 # remove comments prefixed by an even number of escapes
1343 1340 m = _commentre.search(line)
1344 1341 if m:
1345 1342 line = line[:m.end(1)]
1346 1343 # fixup properly escaped comments that survived the above
1347 1344 line = line.replace("\\#", "#")
1348 1345 line = line.rstrip()
1349 1346 if not line:
1350 1347 continue
1351 1348
1352 1349 if line.startswith('syntax:'):
1353 1350 s = line[7:].strip()
1354 1351 try:
1355 1352 syntax = syntaxes[s]
1356 1353 except KeyError:
1357 1354 if warn:
1358 1355 warn(_("%s: ignoring invalid syntax '%s'\n") %
1359 1356 (filepath, s))
1360 1357 continue
1361 1358
1362 1359 linesyntax = syntax
1363 1360 for s, rels in syntaxes.iteritems():
1364 1361 if line.startswith(rels):
1365 1362 linesyntax = rels
1366 1363 line = line[len(rels):]
1367 1364 break
1368 1365 elif line.startswith(s+':'):
1369 1366 linesyntax = rels
1370 1367 line = line[len(s) + 1:]
1371 1368 break
1372 1369 if sourceinfo:
1373 1370 patterns.append((linesyntax + line, lineno, line))
1374 1371 else:
1375 1372 patterns.append(linesyntax + line)
1376 1373 fp.close()
1377 1374 return patterns
General Comments 0
You need to be logged in to leave comments. Login now