##// END OF EJS Templates
match: allow passing in badfn to always() and never()...
Martin von Zweigbergk -
r41821:c302218a default
parent child Browse files
Show More
@@ -1,1374 +1,1374 b''
1 1 # match.py - filename matching
2 2 #
3 3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import, print_function
9 9
10 10 import copy
11 11 import itertools
12 12 import os
13 13 import re
14 14
15 15 from .i18n import _
16 16 from . import (
17 17 encoding,
18 18 error,
19 19 pathutil,
20 20 pycompat,
21 21 util,
22 22 )
23 23 from .utils import (
24 24 stringutil,
25 25 )
26 26
27 27 allpatternkinds = ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
28 28 'rootglob',
29 29 'listfile', 'listfile0', 'set', 'include', 'subinclude',
30 30 'rootfilesin')
31 31 cwdrelativepatternkinds = ('relpath', 'glob')
32 32
33 33 propertycache = util.propertycache
34 34
35 35 def _rematcher(regex):
36 36 '''compile the regexp with the best available regexp engine and return a
37 37 matcher function'''
38 38 m = util.re.compile(regex)
39 39 try:
40 40 # slightly faster, provided by facebook's re2 bindings
41 41 return m.test_match
42 42 except AttributeError:
43 43 return m.match
44 44
45 45 def _expandsets(root, cwd, kindpats, ctx, listsubrepos, badfn):
46 46 '''Returns the kindpats list with the 'set' patterns expanded to matchers'''
47 47 matchers = []
48 48 other = []
49 49
50 50 for kind, pat, source in kindpats:
51 51 if kind == 'set':
52 52 if ctx is None:
53 53 raise error.ProgrammingError("fileset expression with no "
54 54 "context")
55 55 matchers.append(ctx.matchfileset(pat, badfn=badfn))
56 56
57 57 if listsubrepos:
58 58 for subpath in ctx.substate:
59 59 sm = ctx.sub(subpath).matchfileset(pat, badfn=badfn)
60 60 pm = prefixdirmatcher(root, cwd, subpath, sm, badfn=badfn)
61 61 matchers.append(pm)
62 62
63 63 continue
64 64 other.append((kind, pat, source))
65 65 return matchers, other
66 66
67 67 def _expandsubinclude(kindpats, root):
68 68 '''Returns the list of subinclude matcher args and the kindpats without the
69 69 subincludes in it.'''
70 70 relmatchers = []
71 71 other = []
72 72
73 73 for kind, pat, source in kindpats:
74 74 if kind == 'subinclude':
75 75 sourceroot = pathutil.dirname(util.normpath(source))
76 76 pat = util.pconvert(pat)
77 77 path = pathutil.join(sourceroot, pat)
78 78
79 79 newroot = pathutil.dirname(path)
80 80 matcherargs = (newroot, '', [], ['include:%s' % path])
81 81
82 82 prefix = pathutil.canonpath(root, root, newroot)
83 83 if prefix:
84 84 prefix += '/'
85 85 relmatchers.append((prefix, matcherargs))
86 86 else:
87 87 other.append((kind, pat, source))
88 88
89 89 return relmatchers, other
90 90
91 91 def _kindpatsalwaysmatch(kindpats):
92 92 """"Checks whether the kindspats match everything, as e.g.
93 93 'relpath:.' does.
94 94 """
95 95 for kind, pat, source in kindpats:
96 96 if pat != '' or kind not in ['relpath', 'glob']:
97 97 return False
98 98 return True
99 99
100 100 def _buildkindpatsmatcher(matchercls, root, cwd, kindpats, ctx=None,
101 101 listsubrepos=False, badfn=None):
102 102 matchers = []
103 103 fms, kindpats = _expandsets(root, cwd, kindpats, ctx=ctx,
104 104 listsubrepos=listsubrepos, badfn=badfn)
105 105 if kindpats:
106 106 m = matchercls(root, cwd, kindpats, badfn=badfn)
107 107 matchers.append(m)
108 108 if fms:
109 109 matchers.extend(fms)
110 110 if not matchers:
111 111 return nevermatcher(root, cwd, badfn=badfn)
112 112 if len(matchers) == 1:
113 113 return matchers[0]
114 114 return unionmatcher(matchers)
115 115
116 116 def match(root, cwd, patterns=None, include=None, exclude=None, default='glob',
117 117 auditor=None, ctx=None, listsubrepos=False, warn=None,
118 118 badfn=None, icasefs=False):
119 119 """build an object to match a set of file patterns
120 120
121 121 arguments:
122 122 root - the canonical root of the tree you're matching against
123 123 cwd - the current working directory, if relevant
124 124 patterns - patterns to find
125 125 include - patterns to include (unless they are excluded)
126 126 exclude - patterns to exclude (even if they are included)
127 127 default - if a pattern in patterns has no explicit type, assume this one
128 128 warn - optional function used for printing warnings
129 129 badfn - optional bad() callback for this matcher instead of the default
130 130 icasefs - make a matcher for wdir on case insensitive filesystems, which
131 131 normalizes the given patterns to the case in the filesystem
132 132
133 133 a pattern is one of:
134 134 'glob:<glob>' - a glob relative to cwd
135 135 're:<regexp>' - a regular expression
136 136 'path:<path>' - a path relative to repository root, which is matched
137 137 recursively
138 138 'rootfilesin:<path>' - a path relative to repository root, which is
139 139 matched non-recursively (will not match subdirectories)
140 140 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
141 141 'relpath:<path>' - a path relative to cwd
142 142 'relre:<regexp>' - a regexp that needn't match the start of a name
143 143 'set:<fileset>' - a fileset expression
144 144 'include:<path>' - a file of patterns to read and include
145 145 'subinclude:<path>' - a file of patterns to match against files under
146 146 the same directory
147 147 '<something>' - a pattern of the specified default type
148 148 """
149 149 normalize = _donormalize
150 150 if icasefs:
151 151 dirstate = ctx.repo().dirstate
152 152 dsnormalize = dirstate.normalize
153 153
154 154 def normalize(patterns, default, root, cwd, auditor, warn):
155 155 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
156 156 kindpats = []
157 157 for kind, pats, source in kp:
158 158 if kind not in ('re', 'relre'): # regex can't be normalized
159 159 p = pats
160 160 pats = dsnormalize(pats)
161 161
162 162 # Preserve the original to handle a case only rename.
163 163 if p != pats and p in dirstate:
164 164 kindpats.append((kind, p, source))
165 165
166 166 kindpats.append((kind, pats, source))
167 167 return kindpats
168 168
169 169 if patterns:
170 170 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
171 171 if _kindpatsalwaysmatch(kindpats):
172 172 m = alwaysmatcher(root, cwd, badfn)
173 173 else:
174 174 m = _buildkindpatsmatcher(patternmatcher, root, cwd, kindpats,
175 175 ctx=ctx, listsubrepos=listsubrepos,
176 176 badfn=badfn)
177 177 else:
178 178 # It's a little strange that no patterns means to match everything.
179 179 # Consider changing this to match nothing (probably using nevermatcher).
180 180 m = alwaysmatcher(root, cwd, badfn)
181 181
182 182 if include:
183 183 kindpats = normalize(include, 'glob', root, cwd, auditor, warn)
184 184 im = _buildkindpatsmatcher(includematcher, root, cwd, kindpats, ctx=ctx,
185 185 listsubrepos=listsubrepos, badfn=None)
186 186 m = intersectmatchers(m, im)
187 187 if exclude:
188 188 kindpats = normalize(exclude, 'glob', root, cwd, auditor, warn)
189 189 em = _buildkindpatsmatcher(includematcher, root, cwd, kindpats, ctx=ctx,
190 190 listsubrepos=listsubrepos, badfn=None)
191 191 m = differencematcher(m, em)
192 192 return m
193 193
194 194 def exact(root, cwd, files, badfn=None):
195 195 return exactmatcher(root, cwd, files, badfn=badfn)
196 196
197 def always(root, cwd):
198 return alwaysmatcher(root, cwd)
197 def always(root, cwd, badfn=None):
198 return alwaysmatcher(root, cwd, badfn=badfn)
199 199
200 def never(root, cwd):
201 return nevermatcher(root, cwd)
200 def never(root, cwd, badfn=None):
201 return nevermatcher(root, cwd, badfn=badfn)
202 202
203 203 def badmatch(match, badfn):
204 204 """Make a copy of the given matcher, replacing its bad method with the given
205 205 one.
206 206 """
207 207 m = copy.copy(match)
208 208 m.bad = badfn
209 209 return m
210 210
211 211 def _donormalize(patterns, default, root, cwd, auditor, warn):
212 212 '''Convert 'kind:pat' from the patterns list to tuples with kind and
213 213 normalized and rooted patterns and with listfiles expanded.'''
214 214 kindpats = []
215 215 for kind, pat in [_patsplit(p, default) for p in patterns]:
216 216 if kind in cwdrelativepatternkinds:
217 217 pat = pathutil.canonpath(root, cwd, pat, auditor)
218 218 elif kind in ('relglob', 'path', 'rootfilesin', 'rootglob'):
219 219 pat = util.normpath(pat)
220 220 elif kind in ('listfile', 'listfile0'):
221 221 try:
222 222 files = util.readfile(pat)
223 223 if kind == 'listfile0':
224 224 files = files.split('\0')
225 225 else:
226 226 files = files.splitlines()
227 227 files = [f for f in files if f]
228 228 except EnvironmentError:
229 229 raise error.Abort(_("unable to read file list (%s)") % pat)
230 230 for k, p, source in _donormalize(files, default, root, cwd,
231 231 auditor, warn):
232 232 kindpats.append((k, p, pat))
233 233 continue
234 234 elif kind == 'include':
235 235 try:
236 236 fullpath = os.path.join(root, util.localpath(pat))
237 237 includepats = readpatternfile(fullpath, warn)
238 238 for k, p, source in _donormalize(includepats, default,
239 239 root, cwd, auditor, warn):
240 240 kindpats.append((k, p, source or pat))
241 241 except error.Abort as inst:
242 242 raise error.Abort('%s: %s' % (pat, inst[0]))
243 243 except IOError as inst:
244 244 if warn:
245 245 warn(_("skipping unreadable pattern file '%s': %s\n") %
246 246 (pat, stringutil.forcebytestr(inst.strerror)))
247 247 continue
248 248 # else: re or relre - which cannot be normalized
249 249 kindpats.append((kind, pat, ''))
250 250 return kindpats
251 251
252 252 class basematcher(object):
253 253
254 254 def __init__(self, root, cwd, badfn=None):
255 255 self._root = root
256 256 self._cwd = cwd
257 257 if badfn is not None:
258 258 self.bad = badfn
259 259
260 260 def __call__(self, fn):
261 261 return self.matchfn(fn)
262 262 def __iter__(self):
263 263 for f in self._files:
264 264 yield f
265 265 # Callbacks related to how the matcher is used by dirstate.walk.
266 266 # Subscribers to these events must monkeypatch the matcher object.
267 267 def bad(self, f, msg):
268 268 '''Callback from dirstate.walk for each explicit file that can't be
269 269 found/accessed, with an error message.'''
270 270
271 271 # If an explicitdir is set, it will be called when an explicitly listed
272 272 # directory is visited.
273 273 explicitdir = None
274 274
275 275 # If an traversedir is set, it will be called when a directory discovered
276 276 # by recursive traversal is visited.
277 277 traversedir = None
278 278
279 279 @propertycache
280 280 def _files(self):
281 281 return []
282 282
283 283 def files(self):
284 284 '''Explicitly listed files or patterns or roots:
285 285 if no patterns or .always(): empty list,
286 286 if exact: list exact files,
287 287 if not .anypats(): list all files and dirs,
288 288 else: optimal roots'''
289 289 return self._files
290 290
291 291 @propertycache
292 292 def _fileset(self):
293 293 return set(self._files)
294 294
295 295 def exact(self, f):
296 296 '''Returns True if f is in .files().'''
297 297 return f in self._fileset
298 298
299 299 def matchfn(self, f):
300 300 return False
301 301
302 302 def visitdir(self, dir):
303 303 '''Decides whether a directory should be visited based on whether it
304 304 has potential matches in it or one of its subdirectories. This is
305 305 based on the match's primary, included, and excluded patterns.
306 306
307 307 Returns the string 'all' if the given directory and all subdirectories
308 308 should be visited. Otherwise returns True or False indicating whether
309 309 the given directory should be visited.
310 310 '''
311 311 return True
312 312
313 313 def visitchildrenset(self, dir):
314 314 '''Decides whether a directory should be visited based on whether it
315 315 has potential matches in it or one of its subdirectories, and
316 316 potentially lists which subdirectories of that directory should be
317 317 visited. This is based on the match's primary, included, and excluded
318 318 patterns.
319 319
320 320 This function is very similar to 'visitdir', and the following mapping
321 321 can be applied:
322 322
323 323 visitdir | visitchildrenlist
324 324 ----------+-------------------
325 325 False | set()
326 326 'all' | 'all'
327 327 True | 'this' OR non-empty set of subdirs -or files- to visit
328 328
329 329 Example:
330 330 Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return
331 331 the following values (assuming the implementation of visitchildrenset
332 332 is capable of recognizing this; some implementations are not).
333 333
334 334 '.' -> {'foo', 'qux'}
335 335 'baz' -> set()
336 336 'foo' -> {'bar'}
337 337 # Ideally this would be 'all', but since the prefix nature of matchers
338 338 # is applied to the entire matcher, we have to downgrade this to
339 339 # 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed
340 340 # in.
341 341 'foo/bar' -> 'this'
342 342 'qux' -> 'this'
343 343
344 344 Important:
345 345 Most matchers do not know if they're representing files or
346 346 directories. They see ['path:dir/f'] and don't know whether 'f' is a
347 347 file or a directory, so visitchildrenset('dir') for most matchers will
348 348 return {'f'}, but if the matcher knows it's a file (like exactmatcher
349 349 does), it may return 'this'. Do not rely on the return being a set
350 350 indicating that there are no files in this dir to investigate (or
351 351 equivalently that if there are files to investigate in 'dir' that it
352 352 will always return 'this').
353 353 '''
354 354 return 'this'
355 355
356 356 def always(self):
357 357 '''Matcher will match everything and .files() will be empty --
358 358 optimization might be possible.'''
359 359 return False
360 360
361 361 def isexact(self):
362 362 '''Matcher will match exactly the list of files in .files() --
363 363 optimization might be possible.'''
364 364 return False
365 365
366 366 def prefix(self):
367 367 '''Matcher will match the paths in .files() recursively --
368 368 optimization might be possible.'''
369 369 return False
370 370
371 371 def anypats(self):
372 372 '''None of .always(), .isexact(), and .prefix() is true --
373 373 optimizations will be difficult.'''
374 374 return not self.always() and not self.isexact() and not self.prefix()
375 375
376 376 class alwaysmatcher(basematcher):
377 377 '''Matches everything.'''
378 378
379 379 def __init__(self, root, cwd, badfn=None):
380 380 super(alwaysmatcher, self).__init__(root, cwd, badfn)
381 381
382 382 def always(self):
383 383 return True
384 384
385 385 def matchfn(self, f):
386 386 return True
387 387
388 388 def visitdir(self, dir):
389 389 return 'all'
390 390
391 391 def visitchildrenset(self, dir):
392 392 return 'all'
393 393
394 394 def __repr__(self):
395 395 return r'<alwaysmatcher>'
396 396
397 397 class nevermatcher(basematcher):
398 398 '''Matches nothing.'''
399 399
400 400 def __init__(self, root, cwd, badfn=None):
401 401 super(nevermatcher, self).__init__(root, cwd, badfn)
402 402
403 403 # It's a little weird to say that the nevermatcher is an exact matcher
404 404 # or a prefix matcher, but it seems to make sense to let callers take
405 405 # fast paths based on either. There will be no exact matches, nor any
406 406 # prefixes (files() returns []), so fast paths iterating over them should
407 407 # be efficient (and correct).
408 408 def isexact(self):
409 409 return True
410 410
411 411 def prefix(self):
412 412 return True
413 413
414 414 def visitdir(self, dir):
415 415 return False
416 416
417 417 def visitchildrenset(self, dir):
418 418 return set()
419 419
420 420 def __repr__(self):
421 421 return r'<nevermatcher>'
422 422
423 423 class predicatematcher(basematcher):
424 424 """A matcher adapter for a simple boolean function"""
425 425
426 426 def __init__(self, root, cwd, predfn, predrepr=None, badfn=None):
427 427 super(predicatematcher, self).__init__(root, cwd, badfn)
428 428 self.matchfn = predfn
429 429 self._predrepr = predrepr
430 430
431 431 @encoding.strmethod
432 432 def __repr__(self):
433 433 s = (stringutil.buildrepr(self._predrepr)
434 434 or pycompat.byterepr(self.matchfn))
435 435 return '<predicatenmatcher pred=%s>' % s
436 436
437 437 class patternmatcher(basematcher):
438 438
439 439 def __init__(self, root, cwd, kindpats, badfn=None):
440 440 super(patternmatcher, self).__init__(root, cwd, badfn)
441 441
442 442 self._files = _explicitfiles(kindpats)
443 443 self._prefix = _prefix(kindpats)
444 444 self._pats, self.matchfn = _buildmatch(kindpats, '$', root)
445 445
446 446 @propertycache
447 447 def _dirs(self):
448 448 return set(util.dirs(self._fileset)) | {'.'}
449 449
450 450 def visitdir(self, dir):
451 451 if self._prefix and dir in self._fileset:
452 452 return 'all'
453 453 return ('.' in self._fileset or
454 454 dir in self._fileset or
455 455 dir in self._dirs or
456 456 any(parentdir in self._fileset
457 457 for parentdir in util.finddirs(dir)))
458 458
459 459 def visitchildrenset(self, dir):
460 460 ret = self.visitdir(dir)
461 461 if ret is True:
462 462 return 'this'
463 463 elif not ret:
464 464 return set()
465 465 assert ret == 'all'
466 466 return 'all'
467 467
468 468 def prefix(self):
469 469 return self._prefix
470 470
471 471 @encoding.strmethod
472 472 def __repr__(self):
473 473 return ('<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats))
474 474
475 475 # This is basically a reimplementation of util.dirs that stores the children
476 476 # instead of just a count of them, plus a small optional optimization to avoid
477 477 # some directories we don't need.
478 478 class _dirchildren(object):
479 479 def __init__(self, paths, onlyinclude=None):
480 480 self._dirs = {}
481 481 self._onlyinclude = onlyinclude or []
482 482 addpath = self.addpath
483 483 for f in paths:
484 484 addpath(f)
485 485
486 486 def addpath(self, path):
487 487 if path == '.':
488 488 return
489 489 dirs = self._dirs
490 490 findsplitdirs = _dirchildren._findsplitdirs
491 491 for d, b in findsplitdirs(path):
492 492 if d not in self._onlyinclude:
493 493 continue
494 494 dirs.setdefault(d, set()).add(b)
495 495
496 496 @staticmethod
497 497 def _findsplitdirs(path):
498 498 # yields (dirname, basename) tuples, walking back to the root. This is
499 499 # very similar to util.finddirs, except:
500 500 # - produces a (dirname, basename) tuple, not just 'dirname'
501 501 # - includes root dir
502 502 # Unlike manifest._splittopdir, this does not suffix `dirname` with a
503 503 # slash, and produces '.' for the root instead of ''.
504 504 oldpos = len(path)
505 505 pos = path.rfind('/')
506 506 while pos != -1:
507 507 yield path[:pos], path[pos + 1:oldpos]
508 508 oldpos = pos
509 509 pos = path.rfind('/', 0, pos)
510 510 yield '.', path[:oldpos]
511 511
512 512 def get(self, path):
513 513 return self._dirs.get(path, set())
514 514
515 515 class includematcher(basematcher):
516 516
517 517 def __init__(self, root, cwd, kindpats, badfn=None):
518 518 super(includematcher, self).__init__(root, cwd, badfn)
519 519
520 520 self._pats, self.matchfn = _buildmatch(kindpats, '(?:/|$)', root)
521 521 self._prefix = _prefix(kindpats)
522 522 roots, dirs, parents = _rootsdirsandparents(kindpats)
523 523 # roots are directories which are recursively included.
524 524 self._roots = set(roots)
525 525 # dirs are directories which are non-recursively included.
526 526 self._dirs = set(dirs)
527 527 # parents are directories which are non-recursively included because
528 528 # they are needed to get to items in _dirs or _roots.
529 529 self._parents = set(parents)
530 530
531 531 def visitdir(self, dir):
532 532 if self._prefix and dir in self._roots:
533 533 return 'all'
534 534 return ('.' in self._roots or
535 535 dir in self._roots or
536 536 dir in self._dirs or
537 537 dir in self._parents or
538 538 any(parentdir in self._roots
539 539 for parentdir in util.finddirs(dir)))
540 540
541 541 @propertycache
542 542 def _allparentschildren(self):
543 543 # It may seem odd that we add dirs, roots, and parents, and then
544 544 # restrict to only parents. This is to catch the case of:
545 545 # dirs = ['foo/bar']
546 546 # parents = ['foo']
547 547 # if we asked for the children of 'foo', but had only added
548 548 # self._parents, we wouldn't be able to respond ['bar'].
549 549 return _dirchildren(
550 550 itertools.chain(self._dirs, self._roots, self._parents),
551 551 onlyinclude=self._parents)
552 552
553 553 def visitchildrenset(self, dir):
554 554 if self._prefix and dir in self._roots:
555 555 return 'all'
556 556 # Note: this does *not* include the 'dir in self._parents' case from
557 557 # visitdir, that's handled below.
558 558 if ('.' in self._roots or
559 559 dir in self._roots or
560 560 dir in self._dirs or
561 561 any(parentdir in self._roots
562 562 for parentdir in util.finddirs(dir))):
563 563 return 'this'
564 564
565 565 if dir in self._parents:
566 566 return self._allparentschildren.get(dir) or set()
567 567 return set()
568 568
569 569 @encoding.strmethod
570 570 def __repr__(self):
571 571 return ('<includematcher includes=%r>' % pycompat.bytestr(self._pats))
572 572
573 573 class exactmatcher(basematcher):
574 574 '''Matches the input files exactly. They are interpreted as paths, not
575 575 patterns (so no kind-prefixes).
576 576 '''
577 577
578 578 def __init__(self, root, cwd, files, badfn=None):
579 579 super(exactmatcher, self).__init__(root, cwd, badfn)
580 580
581 581 if isinstance(files, list):
582 582 self._files = files
583 583 else:
584 584 self._files = list(files)
585 585
586 586 matchfn = basematcher.exact
587 587
588 588 @propertycache
589 589 def _dirs(self):
590 590 return set(util.dirs(self._fileset)) | {'.'}
591 591
592 592 def visitdir(self, dir):
593 593 return dir in self._dirs
594 594
595 595 def visitchildrenset(self, dir):
596 596 if not self._fileset or dir not in self._dirs:
597 597 return set()
598 598
599 599 candidates = self._fileset | self._dirs - {'.'}
600 600 if dir != '.':
601 601 d = dir + '/'
602 602 candidates = set(c[len(d):] for c in candidates if
603 603 c.startswith(d))
604 604 # self._dirs includes all of the directories, recursively, so if
605 605 # we're attempting to match foo/bar/baz.txt, it'll have '.', 'foo',
606 606 # 'foo/bar' in it. Thus we can safely ignore a candidate that has a
607 607 # '/' in it, indicating a it's for a subdir-of-a-subdir; the
608 608 # immediate subdir will be in there without a slash.
609 609 ret = {c for c in candidates if '/' not in c}
610 610 # We really do not expect ret to be empty, since that would imply that
611 611 # there's something in _dirs that didn't have a file in _fileset.
612 612 assert ret
613 613 return ret
614 614
615 615 def isexact(self):
616 616 return True
617 617
618 618 @encoding.strmethod
619 619 def __repr__(self):
620 620 return ('<exactmatcher files=%r>' % self._files)
621 621
622 622 class differencematcher(basematcher):
623 623 '''Composes two matchers by matching if the first matches and the second
624 624 does not.
625 625
626 626 The second matcher's non-matching-attributes (root, cwd, bad, explicitdir,
627 627 traversedir) are ignored.
628 628 '''
629 629 def __init__(self, m1, m2):
630 630 super(differencematcher, self).__init__(m1._root, m1._cwd)
631 631 self._m1 = m1
632 632 self._m2 = m2
633 633 self.bad = m1.bad
634 634 self.explicitdir = m1.explicitdir
635 635 self.traversedir = m1.traversedir
636 636
637 637 def matchfn(self, f):
638 638 return self._m1(f) and not self._m2(f)
639 639
640 640 @propertycache
641 641 def _files(self):
642 642 if self.isexact():
643 643 return [f for f in self._m1.files() if self(f)]
644 644 # If m1 is not an exact matcher, we can't easily figure out the set of
645 645 # files, because its files() are not always files. For example, if
646 646 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
647 647 # want to remove "dir" from the set even though it would match m2,
648 648 # because the "dir" in m1 may not be a file.
649 649 return self._m1.files()
650 650
651 651 def visitdir(self, dir):
652 652 if self._m2.visitdir(dir) == 'all':
653 653 return False
654 654 elif not self._m2.visitdir(dir):
655 655 # m2 does not match dir, we can return 'all' here if possible
656 656 return self._m1.visitdir(dir)
657 657 return bool(self._m1.visitdir(dir))
658 658
659 659 def visitchildrenset(self, dir):
660 660 m2_set = self._m2.visitchildrenset(dir)
661 661 if m2_set == 'all':
662 662 return set()
663 663 m1_set = self._m1.visitchildrenset(dir)
664 664 # Possible values for m1: 'all', 'this', set(...), set()
665 665 # Possible values for m2: 'this', set(...), set()
666 666 # If m2 has nothing under here that we care about, return m1, even if
667 667 # it's 'all'. This is a change in behavior from visitdir, which would
668 668 # return True, not 'all', for some reason.
669 669 if not m2_set:
670 670 return m1_set
671 671 if m1_set in ['all', 'this']:
672 672 # Never return 'all' here if m2_set is any kind of non-empty (either
673 673 # 'this' or set(foo)), since m2 might return set() for a
674 674 # subdirectory.
675 675 return 'this'
676 676 # Possible values for m1: set(...), set()
677 677 # Possible values for m2: 'this', set(...)
678 678 # We ignore m2's set results. They're possibly incorrect:
679 679 # m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset('.'):
680 680 # m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd
681 681 # return set(), which is *not* correct, we still need to visit 'dir'!
682 682 return m1_set
683 683
684 684 def isexact(self):
685 685 return self._m1.isexact()
686 686
687 687 @encoding.strmethod
688 688 def __repr__(self):
689 689 return ('<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2))
690 690
691 691 def intersectmatchers(m1, m2):
692 692 '''Composes two matchers by matching if both of them match.
693 693
694 694 The second matcher's non-matching-attributes (root, cwd, bad, explicitdir,
695 695 traversedir) are ignored.
696 696 '''
697 697 if m1 is None or m2 is None:
698 698 return m1 or m2
699 699 if m1.always():
700 700 m = copy.copy(m2)
701 701 # TODO: Consider encapsulating these things in a class so there's only
702 702 # one thing to copy from m1.
703 703 m.bad = m1.bad
704 704 m.explicitdir = m1.explicitdir
705 705 m.traversedir = m1.traversedir
706 706 return m
707 707 if m2.always():
708 708 m = copy.copy(m1)
709 709 return m
710 710 return intersectionmatcher(m1, m2)
711 711
712 712 class intersectionmatcher(basematcher):
713 713 def __init__(self, m1, m2):
714 714 super(intersectionmatcher, self).__init__(m1._root, m1._cwd)
715 715 self._m1 = m1
716 716 self._m2 = m2
717 717 self.bad = m1.bad
718 718 self.explicitdir = m1.explicitdir
719 719 self.traversedir = m1.traversedir
720 720
721 721 @propertycache
722 722 def _files(self):
723 723 if self.isexact():
724 724 m1, m2 = self._m1, self._m2
725 725 if not m1.isexact():
726 726 m1, m2 = m2, m1
727 727 return [f for f in m1.files() if m2(f)]
728 728 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
729 729 # the set of files, because their files() are not always files. For
730 730 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
731 731 # "path:dir2", we don't want to remove "dir2" from the set.
732 732 return self._m1.files() + self._m2.files()
733 733
734 734 def matchfn(self, f):
735 735 return self._m1(f) and self._m2(f)
736 736
737 737 def visitdir(self, dir):
738 738 visit1 = self._m1.visitdir(dir)
739 739 if visit1 == 'all':
740 740 return self._m2.visitdir(dir)
741 741 # bool() because visit1=True + visit2='all' should not be 'all'
742 742 return bool(visit1 and self._m2.visitdir(dir))
743 743
744 744 def visitchildrenset(self, dir):
745 745 m1_set = self._m1.visitchildrenset(dir)
746 746 if not m1_set:
747 747 return set()
748 748 m2_set = self._m2.visitchildrenset(dir)
749 749 if not m2_set:
750 750 return set()
751 751
752 752 if m1_set == 'all':
753 753 return m2_set
754 754 elif m2_set == 'all':
755 755 return m1_set
756 756
757 757 if m1_set == 'this' or m2_set == 'this':
758 758 return 'this'
759 759
760 760 assert isinstance(m1_set, set) and isinstance(m2_set, set)
761 761 return m1_set.intersection(m2_set)
762 762
763 763 def always(self):
764 764 return self._m1.always() and self._m2.always()
765 765
766 766 def isexact(self):
767 767 return self._m1.isexact() or self._m2.isexact()
768 768
769 769 @encoding.strmethod
770 770 def __repr__(self):
771 771 return ('<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2))
772 772
773 773 class subdirmatcher(basematcher):
774 774 """Adapt a matcher to work on a subdirectory only.
775 775
776 776 The paths are remapped to remove/insert the path as needed:
777 777
778 778 >>> from . import pycompat
779 779 >>> m1 = match(b'root', b'', [b'a.txt', b'sub/b.txt'])
780 780 >>> m2 = subdirmatcher(b'sub', m1)
781 781 >>> bool(m2(b'a.txt'))
782 782 False
783 783 >>> bool(m2(b'b.txt'))
784 784 True
785 785 >>> bool(m2.matchfn(b'a.txt'))
786 786 False
787 787 >>> bool(m2.matchfn(b'b.txt'))
788 788 True
789 789 >>> m2.files()
790 790 ['b.txt']
791 791 >>> m2.exact(b'b.txt')
792 792 True
793 793 >>> def bad(f, msg):
794 794 ... print(pycompat.sysstr(b"%s: %s" % (f, msg)))
795 795 >>> m1.bad = bad
796 796 >>> m2.bad(b'x.txt', b'No such file')
797 797 sub/x.txt: No such file
798 798 """
799 799
800 800 def __init__(self, path, matcher):
801 801 super(subdirmatcher, self).__init__(matcher._root, matcher._cwd)
802 802 self._path = path
803 803 self._matcher = matcher
804 804 self._always = matcher.always()
805 805
806 806 self._files = [f[len(path) + 1:] for f in matcher._files
807 807 if f.startswith(path + "/")]
808 808
809 809 # If the parent repo had a path to this subrepo and the matcher is
810 810 # a prefix matcher, this submatcher always matches.
811 811 if matcher.prefix():
812 812 self._always = any(f == path for f in matcher._files)
813 813
814 814 def bad(self, f, msg):
815 815 self._matcher.bad(self._path + "/" + f, msg)
816 816
817 817 def matchfn(self, f):
818 818 # Some information is lost in the superclass's constructor, so we
819 819 # can not accurately create the matching function for the subdirectory
820 820 # from the inputs. Instead, we override matchfn() and visitdir() to
821 821 # call the original matcher with the subdirectory path prepended.
822 822 return self._matcher.matchfn(self._path + "/" + f)
823 823
824 824 def visitdir(self, dir):
825 825 if dir == '.':
826 826 dir = self._path
827 827 else:
828 828 dir = self._path + "/" + dir
829 829 return self._matcher.visitdir(dir)
830 830
831 831 def visitchildrenset(self, dir):
832 832 if dir == '.':
833 833 dir = self._path
834 834 else:
835 835 dir = self._path + "/" + dir
836 836 return self._matcher.visitchildrenset(dir)
837 837
838 838 def always(self):
839 839 return self._always
840 840
841 841 def prefix(self):
842 842 return self._matcher.prefix() and not self._always
843 843
844 844 @encoding.strmethod
845 845 def __repr__(self):
846 846 return ('<subdirmatcher path=%r, matcher=%r>' %
847 847 (self._path, self._matcher))
848 848
849 849 class prefixdirmatcher(basematcher):
850 850 """Adapt a matcher to work on a parent directory.
851 851
852 852 The matcher's non-matching-attributes (root, cwd, bad, explicitdir,
853 853 traversedir) are ignored.
854 854
855 855 The prefix path should usually be the relative path from the root of
856 856 this matcher to the root of the wrapped matcher.
857 857
858 858 >>> m1 = match(util.localpath(b'root/d/e'), b'f', [b'../a.txt', b'b.txt'])
859 859 >>> m2 = prefixdirmatcher(b'root', b'd/e/f', b'd/e', m1)
860 860 >>> bool(m2(b'a.txt'),)
861 861 False
862 862 >>> bool(m2(b'd/e/a.txt'))
863 863 True
864 864 >>> bool(m2(b'd/e/b.txt'))
865 865 False
866 866 >>> m2.files()
867 867 ['d/e/a.txt', 'd/e/f/b.txt']
868 868 >>> m2.exact(b'd/e/a.txt')
869 869 True
870 870 >>> m2.visitdir(b'd')
871 871 True
872 872 >>> m2.visitdir(b'd/e')
873 873 True
874 874 >>> m2.visitdir(b'd/e/f')
875 875 True
876 876 >>> m2.visitdir(b'd/e/g')
877 877 False
878 878 >>> m2.visitdir(b'd/ef')
879 879 False
880 880 """
881 881
882 882 def __init__(self, root, cwd, path, matcher, badfn=None):
883 883 super(prefixdirmatcher, self).__init__(root, cwd, badfn)
884 884 if not path:
885 885 raise error.ProgrammingError('prefix path must not be empty')
886 886 self._path = path
887 887 self._pathprefix = path + '/'
888 888 self._matcher = matcher
889 889
890 890 @propertycache
891 891 def _files(self):
892 892 return [self._pathprefix + f for f in self._matcher._files]
893 893
894 894 def matchfn(self, f):
895 895 if not f.startswith(self._pathprefix):
896 896 return False
897 897 return self._matcher.matchfn(f[len(self._pathprefix):])
898 898
899 899 @propertycache
900 900 def _pathdirs(self):
901 901 return set(util.finddirs(self._path)) | {'.'}
902 902
903 903 def visitdir(self, dir):
904 904 if dir == self._path:
905 905 return self._matcher.visitdir('.')
906 906 if dir.startswith(self._pathprefix):
907 907 return self._matcher.visitdir(dir[len(self._pathprefix):])
908 908 return dir in self._pathdirs
909 909
910 910 def visitchildrenset(self, dir):
911 911 if dir == self._path:
912 912 return self._matcher.visitchildrenset('.')
913 913 if dir.startswith(self._pathprefix):
914 914 return self._matcher.visitchildrenset(dir[len(self._pathprefix):])
915 915 if dir in self._pathdirs:
916 916 return 'this'
917 917 return set()
918 918
919 919 def isexact(self):
920 920 return self._matcher.isexact()
921 921
922 922 def prefix(self):
923 923 return self._matcher.prefix()
924 924
925 925 @encoding.strmethod
926 926 def __repr__(self):
927 927 return ('<prefixdirmatcher path=%r, matcher=%r>'
928 928 % (pycompat.bytestr(self._path), self._matcher))
929 929
930 930 class unionmatcher(basematcher):
931 931 """A matcher that is the union of several matchers.
932 932
933 933 The non-matching-attributes (root, cwd, bad, explicitdir, traversedir) are
934 934 taken from the first matcher.
935 935 """
936 936
937 937 def __init__(self, matchers):
938 938 m1 = matchers[0]
939 939 super(unionmatcher, self).__init__(m1._root, m1._cwd)
940 940 self.explicitdir = m1.explicitdir
941 941 self.traversedir = m1.traversedir
942 942 self._matchers = matchers
943 943
944 944 def matchfn(self, f):
945 945 for match in self._matchers:
946 946 if match(f):
947 947 return True
948 948 return False
949 949
950 950 def visitdir(self, dir):
951 951 r = False
952 952 for m in self._matchers:
953 953 v = m.visitdir(dir)
954 954 if v == 'all':
955 955 return v
956 956 r |= v
957 957 return r
958 958
959 959 def visitchildrenset(self, dir):
960 960 r = set()
961 961 this = False
962 962 for m in self._matchers:
963 963 v = m.visitchildrenset(dir)
964 964 if not v:
965 965 continue
966 966 if v == 'all':
967 967 return v
968 968 if this or v == 'this':
969 969 this = True
970 970 # don't break, we might have an 'all' in here.
971 971 continue
972 972 assert isinstance(v, set)
973 973 r = r.union(v)
974 974 if this:
975 975 return 'this'
976 976 return r
977 977
978 978 @encoding.strmethod
979 979 def __repr__(self):
980 980 return ('<unionmatcher matchers=%r>' % self._matchers)
981 981
982 982 def patkind(pattern, default=None):
983 983 '''If pattern is 'kind:pat' with a known kind, return kind.'''
984 984 return _patsplit(pattern, default)[0]
985 985
986 986 def _patsplit(pattern, default):
987 987 """Split a string into the optional pattern kind prefix and the actual
988 988 pattern."""
989 989 if ':' in pattern:
990 990 kind, pat = pattern.split(':', 1)
991 991 if kind in allpatternkinds:
992 992 return kind, pat
993 993 return default, pattern
994 994
995 995 def _globre(pat):
996 996 r'''Convert an extended glob string to a regexp string.
997 997
998 998 >>> from . import pycompat
999 999 >>> def bprint(s):
1000 1000 ... print(pycompat.sysstr(s))
1001 1001 >>> bprint(_globre(br'?'))
1002 1002 .
1003 1003 >>> bprint(_globre(br'*'))
1004 1004 [^/]*
1005 1005 >>> bprint(_globre(br'**'))
1006 1006 .*
1007 1007 >>> bprint(_globre(br'**/a'))
1008 1008 (?:.*/)?a
1009 1009 >>> bprint(_globre(br'a/**/b'))
1010 1010 a/(?:.*/)?b
1011 1011 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
1012 1012 [a*?!^][\^b][^c]
1013 1013 >>> bprint(_globre(br'{a,b}'))
1014 1014 (?:a|b)
1015 1015 >>> bprint(_globre(br'.\*\?'))
1016 1016 \.\*\?
1017 1017 '''
1018 1018 i, n = 0, len(pat)
1019 1019 res = ''
1020 1020 group = 0
1021 1021 escape = util.stringutil.regexbytesescapemap.get
1022 1022 def peek():
1023 1023 return i < n and pat[i:i + 1]
1024 1024 while i < n:
1025 1025 c = pat[i:i + 1]
1026 1026 i += 1
1027 1027 if c not in '*?[{},\\':
1028 1028 res += escape(c, c)
1029 1029 elif c == '*':
1030 1030 if peek() == '*':
1031 1031 i += 1
1032 1032 if peek() == '/':
1033 1033 i += 1
1034 1034 res += '(?:.*/)?'
1035 1035 else:
1036 1036 res += '.*'
1037 1037 else:
1038 1038 res += '[^/]*'
1039 1039 elif c == '?':
1040 1040 res += '.'
1041 1041 elif c == '[':
1042 1042 j = i
1043 1043 if j < n and pat[j:j + 1] in '!]':
1044 1044 j += 1
1045 1045 while j < n and pat[j:j + 1] != ']':
1046 1046 j += 1
1047 1047 if j >= n:
1048 1048 res += '\\['
1049 1049 else:
1050 1050 stuff = pat[i:j].replace('\\','\\\\')
1051 1051 i = j + 1
1052 1052 if stuff[0:1] == '!':
1053 1053 stuff = '^' + stuff[1:]
1054 1054 elif stuff[0:1] == '^':
1055 1055 stuff = '\\' + stuff
1056 1056 res = '%s[%s]' % (res, stuff)
1057 1057 elif c == '{':
1058 1058 group += 1
1059 1059 res += '(?:'
1060 1060 elif c == '}' and group:
1061 1061 res += ')'
1062 1062 group -= 1
1063 1063 elif c == ',' and group:
1064 1064 res += '|'
1065 1065 elif c == '\\':
1066 1066 p = peek()
1067 1067 if p:
1068 1068 i += 1
1069 1069 res += escape(p, p)
1070 1070 else:
1071 1071 res += escape(c, c)
1072 1072 else:
1073 1073 res += escape(c, c)
1074 1074 return res
1075 1075
1076 1076 def _regex(kind, pat, globsuffix):
1077 1077 '''Convert a (normalized) pattern of any kind into a regular expression.
1078 1078 globsuffix is appended to the regexp of globs.'''
1079 1079 if not pat:
1080 1080 return ''
1081 1081 if kind == 're':
1082 1082 return pat
1083 1083 if kind in ('path', 'relpath'):
1084 1084 if pat == '.':
1085 1085 return ''
1086 1086 return util.stringutil.reescape(pat) + '(?:/|$)'
1087 1087 if kind == 'rootfilesin':
1088 1088 if pat == '.':
1089 1089 escaped = ''
1090 1090 else:
1091 1091 # Pattern is a directory name.
1092 1092 escaped = util.stringutil.reescape(pat) + '/'
1093 1093 # Anything after the pattern must be a non-directory.
1094 1094 return escaped + '[^/]+$'
1095 1095 if kind == 'relglob':
1096 1096 return '(?:|.*/)' + _globre(pat) + globsuffix
1097 1097 if kind == 'relre':
1098 1098 if pat.startswith('^'):
1099 1099 return pat
1100 1100 return '.*' + pat
1101 1101 if kind in ('glob', 'rootglob'):
1102 1102 return _globre(pat) + globsuffix
1103 1103 raise error.ProgrammingError('not a regex pattern: %s:%s' % (kind, pat))
1104 1104
1105 1105 def _buildmatch(kindpats, globsuffix, root):
1106 1106 '''Return regexp string and a matcher function for kindpats.
1107 1107 globsuffix is appended to the regexp of globs.'''
1108 1108 matchfuncs = []
1109 1109
1110 1110 subincludes, kindpats = _expandsubinclude(kindpats, root)
1111 1111 if subincludes:
1112 1112 submatchers = {}
1113 1113 def matchsubinclude(f):
1114 1114 for prefix, matcherargs in subincludes:
1115 1115 if f.startswith(prefix):
1116 1116 mf = submatchers.get(prefix)
1117 1117 if mf is None:
1118 1118 mf = match(*matcherargs)
1119 1119 submatchers[prefix] = mf
1120 1120
1121 1121 if mf(f[len(prefix):]):
1122 1122 return True
1123 1123 return False
1124 1124 matchfuncs.append(matchsubinclude)
1125 1125
1126 1126 regex = ''
1127 1127 if kindpats:
1128 1128 if all(k == 'rootfilesin' for k, p, s in kindpats):
1129 1129 dirs = {p for k, p, s in kindpats}
1130 1130 def mf(f):
1131 1131 i = f.rfind('/')
1132 1132 if i >= 0:
1133 1133 dir = f[:i]
1134 1134 else:
1135 1135 dir = '.'
1136 1136 return dir in dirs
1137 1137 regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))
1138 1138 matchfuncs.append(mf)
1139 1139 else:
1140 1140 regex, mf = _buildregexmatch(kindpats, globsuffix)
1141 1141 matchfuncs.append(mf)
1142 1142
1143 1143 if len(matchfuncs) == 1:
1144 1144 return regex, matchfuncs[0]
1145 1145 else:
1146 1146 return regex, lambda f: any(mf(f) for mf in matchfuncs)
1147 1147
1148 1148 MAX_RE_SIZE = 20000
1149 1149
1150 1150 def _joinregexes(regexps):
1151 1151 """gather multiple regular expressions into a single one"""
1152 1152 return '|'.join(regexps)
1153 1153
1154 1154 def _buildregexmatch(kindpats, globsuffix):
1155 1155 """Build a match function from a list of kinds and kindpats,
1156 1156 return regexp string and a matcher function.
1157 1157
1158 1158 Test too large input
1159 1159 >>> _buildregexmatch([
1160 1160 ... (b'relglob', b'?' * MAX_RE_SIZE, b'')
1161 1161 ... ], b'$')
1162 1162 Traceback (most recent call last):
1163 1163 ...
1164 1164 Abort: matcher pattern is too long (20009 bytes)
1165 1165 """
1166 1166 try:
1167 1167 allgroups = []
1168 1168 regexps = [_regex(k, p, globsuffix) for (k, p, s) in kindpats]
1169 1169 fullregexp = _joinregexes(regexps)
1170 1170
1171 1171 startidx = 0
1172 1172 groupsize = 0
1173 1173 for idx, r in enumerate(regexps):
1174 1174 piecesize = len(r)
1175 1175 if piecesize > MAX_RE_SIZE:
1176 1176 msg = _("matcher pattern is too long (%d bytes)") % piecesize
1177 1177 raise error.Abort(msg)
1178 1178 elif (groupsize + piecesize) > MAX_RE_SIZE:
1179 1179 group = regexps[startidx:idx]
1180 1180 allgroups.append(_joinregexes(group))
1181 1181 startidx = idx
1182 1182 groupsize = 0
1183 1183 groupsize += piecesize + 1
1184 1184
1185 1185 if startidx == 0:
1186 1186 func = _rematcher(fullregexp)
1187 1187 else:
1188 1188 group = regexps[startidx:]
1189 1189 allgroups.append(_joinregexes(group))
1190 1190 allmatchers = [_rematcher(g) for g in allgroups]
1191 1191 func = lambda s: any(m(s) for m in allmatchers)
1192 1192 return fullregexp, func
1193 1193 except re.error:
1194 1194 for k, p, s in kindpats:
1195 1195 try:
1196 1196 _rematcher(_regex(k, p, globsuffix))
1197 1197 except re.error:
1198 1198 if s:
1199 1199 raise error.Abort(_("%s: invalid pattern (%s): %s") %
1200 1200 (s, k, p))
1201 1201 else:
1202 1202 raise error.Abort(_("invalid pattern (%s): %s") % (k, p))
1203 1203 raise error.Abort(_("invalid pattern"))
1204 1204
1205 1205 def _patternrootsanddirs(kindpats):
1206 1206 '''Returns roots and directories corresponding to each pattern.
1207 1207
1208 1208 This calculates the roots and directories exactly matching the patterns and
1209 1209 returns a tuple of (roots, dirs) for each. It does not return other
1210 1210 directories which may also need to be considered, like the parent
1211 1211 directories.
1212 1212 '''
1213 1213 r = []
1214 1214 d = []
1215 1215 for kind, pat, source in kindpats:
1216 1216 if kind in ('glob', 'rootglob'): # find the non-glob prefix
1217 1217 root = []
1218 1218 for p in pat.split('/'):
1219 1219 if '[' in p or '{' in p or '*' in p or '?' in p:
1220 1220 break
1221 1221 root.append(p)
1222 1222 r.append('/'.join(root) or '.')
1223 1223 elif kind in ('relpath', 'path'):
1224 1224 r.append(pat or '.')
1225 1225 elif kind in ('rootfilesin',):
1226 1226 d.append(pat or '.')
1227 1227 else: # relglob, re, relre
1228 1228 r.append('.')
1229 1229 return r, d
1230 1230
1231 1231 def _roots(kindpats):
1232 1232 '''Returns root directories to match recursively from the given patterns.'''
1233 1233 roots, dirs = _patternrootsanddirs(kindpats)
1234 1234 return roots
1235 1235
1236 1236 def _rootsdirsandparents(kindpats):
1237 1237 '''Returns roots and exact directories from patterns.
1238 1238
1239 1239 `roots` are directories to match recursively, `dirs` should
1240 1240 be matched non-recursively, and `parents` are the implicitly required
1241 1241 directories to walk to items in either roots or dirs.
1242 1242
1243 1243 Returns a tuple of (roots, dirs, parents).
1244 1244
1245 1245 >>> _rootsdirsandparents(
1246 1246 ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
1247 1247 ... (b'glob', b'g*', b'')])
1248 1248 (['g/h', 'g/h', '.'], [], ['g', '.'])
1249 1249 >>> _rootsdirsandparents(
1250 1250 ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
1251 1251 ([], ['g/h', '.'], ['g', '.'])
1252 1252 >>> _rootsdirsandparents(
1253 1253 ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
1254 1254 ... (b'path', b'', b'')])
1255 1255 (['r', 'p/p', '.'], [], ['p', '.'])
1256 1256 >>> _rootsdirsandparents(
1257 1257 ... [(b'relglob', b'rg*', b''), (b're', b're/', b''),
1258 1258 ... (b'relre', b'rr', b'')])
1259 1259 (['.', '.', '.'], [], ['.'])
1260 1260 '''
1261 1261 r, d = _patternrootsanddirs(kindpats)
1262 1262
1263 1263 p = []
1264 1264 # Append the parents as non-recursive/exact directories, since they must be
1265 1265 # scanned to get to either the roots or the other exact directories.
1266 1266 p.extend(util.dirs(d))
1267 1267 p.extend(util.dirs(r))
1268 1268 # util.dirs() does not include the root directory, so add it manually
1269 1269 p.append('.')
1270 1270
1271 1271 # FIXME: all uses of this function convert these to sets, do so before
1272 1272 # returning.
1273 1273 # FIXME: all uses of this function do not need anything in 'roots' and
1274 1274 # 'dirs' to also be in 'parents', consider removing them before returning.
1275 1275 return r, d, p
1276 1276
1277 1277 def _explicitfiles(kindpats):
1278 1278 '''Returns the potential explicit filenames from the patterns.
1279 1279
1280 1280 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
1281 1281 ['foo/bar']
1282 1282 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
1283 1283 []
1284 1284 '''
1285 1285 # Keep only the pattern kinds where one can specify filenames (vs only
1286 1286 # directory names).
1287 1287 filable = [kp for kp in kindpats if kp[0] not in ('rootfilesin',)]
1288 1288 return _roots(filable)
1289 1289
1290 1290 def _prefix(kindpats):
1291 1291 '''Whether all the patterns match a prefix (i.e. recursively)'''
1292 1292 for kind, pat, source in kindpats:
1293 1293 if kind not in ('path', 'relpath'):
1294 1294 return False
1295 1295 return True
1296 1296
1297 1297 _commentre = None
1298 1298
1299 1299 def readpatternfile(filepath, warn, sourceinfo=False):
1300 1300 '''parse a pattern file, returning a list of
1301 1301 patterns. These patterns should be given to compile()
1302 1302 to be validated and converted into a match function.
1303 1303
1304 1304 trailing white space is dropped.
1305 1305 the escape character is backslash.
1306 1306 comments start with #.
1307 1307 empty lines are skipped.
1308 1308
1309 1309 lines can be of the following formats:
1310 1310
1311 1311 syntax: regexp # defaults following lines to non-rooted regexps
1312 1312 syntax: glob # defaults following lines to non-rooted globs
1313 1313 re:pattern # non-rooted regular expression
1314 1314 glob:pattern # non-rooted glob
1315 1315 rootglob:pat # rooted glob (same root as ^ in regexps)
1316 1316 pattern # pattern of the current default type
1317 1317
1318 1318 if sourceinfo is set, returns a list of tuples:
1319 1319 (pattern, lineno, originalline). This is useful to debug ignore patterns.
1320 1320 '''
1321 1321
1322 1322 syntaxes = {
1323 1323 're': 'relre:',
1324 1324 'regexp': 'relre:',
1325 1325 'glob': 'relglob:',
1326 1326 'rootglob': 'rootglob:',
1327 1327 'include': 'include',
1328 1328 'subinclude': 'subinclude',
1329 1329 }
1330 1330 syntax = 'relre:'
1331 1331 patterns = []
1332 1332
1333 1333 fp = open(filepath, 'rb')
1334 1334 for lineno, line in enumerate(util.iterfile(fp), start=1):
1335 1335 if "#" in line:
1336 1336 global _commentre
1337 1337 if not _commentre:
1338 1338 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
1339 1339 # remove comments prefixed by an even number of escapes
1340 1340 m = _commentre.search(line)
1341 1341 if m:
1342 1342 line = line[:m.end(1)]
1343 1343 # fixup properly escaped comments that survived the above
1344 1344 line = line.replace("\\#", "#")
1345 1345 line = line.rstrip()
1346 1346 if not line:
1347 1347 continue
1348 1348
1349 1349 if line.startswith('syntax:'):
1350 1350 s = line[7:].strip()
1351 1351 try:
1352 1352 syntax = syntaxes[s]
1353 1353 except KeyError:
1354 1354 if warn:
1355 1355 warn(_("%s: ignoring invalid syntax '%s'\n") %
1356 1356 (filepath, s))
1357 1357 continue
1358 1358
1359 1359 linesyntax = syntax
1360 1360 for s, rels in syntaxes.iteritems():
1361 1361 if line.startswith(rels):
1362 1362 linesyntax = rels
1363 1363 line = line[len(rels):]
1364 1364 break
1365 1365 elif line.startswith(s+':'):
1366 1366 linesyntax = rels
1367 1367 line = line[len(s) + 1:]
1368 1368 break
1369 1369 if sourceinfo:
1370 1370 patterns.append((linesyntax + line, lineno, line))
1371 1371 else:
1372 1372 patterns.append(linesyntax + line)
1373 1373 fp.close()
1374 1374 return patterns
General Comments 0
You need to be logged in to leave comments. Login now