##// END OF EJS Templates
match: test for overflow error in pattern...
Boris Feld -
r40811:4e02f25f default
parent child Browse files
Show More
@@ -1,1386 +1,1395 b''
1 1 # match.py - filename matching
2 2 #
3 3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import, print_function
9 9
10 10 import copy
11 11 import itertools
12 12 import os
13 13 import re
14 14
15 15 from .i18n import _
16 16 from . import (
17 17 encoding,
18 18 error,
19 19 pathutil,
20 20 pycompat,
21 21 util,
22 22 )
23 23 from .utils import (
24 24 stringutil,
25 25 )
26 26
27 27 allpatternkinds = ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
28 28 'listfile', 'listfile0', 'set', 'include', 'subinclude',
29 29 'rootfilesin')
30 30 cwdrelativepatternkinds = ('relpath', 'glob')
31 31
32 32 propertycache = util.propertycache
33 33
34 34 def _rematcher(regex):
35 35 '''compile the regexp with the best available regexp engine and return a
36 36 matcher function'''
37 37 m = util.re.compile(regex)
38 38 try:
39 39 # slightly faster, provided by facebook's re2 bindings
40 40 return m.test_match
41 41 except AttributeError:
42 42 return m.match
43 43
44 44 def _expandsets(root, cwd, kindpats, ctx, listsubrepos, badfn):
45 45 '''Returns the kindpats list with the 'set' patterns expanded to matchers'''
46 46 matchers = []
47 47 other = []
48 48
49 49 for kind, pat, source in kindpats:
50 50 if kind == 'set':
51 51 if not ctx:
52 52 raise error.ProgrammingError("fileset expression with no "
53 53 "context")
54 54 matchers.append(ctx.matchfileset(pat, badfn=badfn))
55 55
56 56 if listsubrepos:
57 57 for subpath in ctx.substate:
58 58 sm = ctx.sub(subpath).matchfileset(pat, badfn=badfn)
59 59 pm = prefixdirmatcher(root, cwd, subpath, sm, badfn=badfn)
60 60 matchers.append(pm)
61 61
62 62 continue
63 63 other.append((kind, pat, source))
64 64 return matchers, other
65 65
66 66 def _expandsubinclude(kindpats, root):
67 67 '''Returns the list of subinclude matcher args and the kindpats without the
68 68 subincludes in it.'''
69 69 relmatchers = []
70 70 other = []
71 71
72 72 for kind, pat, source in kindpats:
73 73 if kind == 'subinclude':
74 74 sourceroot = pathutil.dirname(util.normpath(source))
75 75 pat = util.pconvert(pat)
76 76 path = pathutil.join(sourceroot, pat)
77 77
78 78 newroot = pathutil.dirname(path)
79 79 matcherargs = (newroot, '', [], ['include:%s' % path])
80 80
81 81 prefix = pathutil.canonpath(root, root, newroot)
82 82 if prefix:
83 83 prefix += '/'
84 84 relmatchers.append((prefix, matcherargs))
85 85 else:
86 86 other.append((kind, pat, source))
87 87
88 88 return relmatchers, other
89 89
90 90 def _kindpatsalwaysmatch(kindpats):
91 91 """"Checks whether the kindspats match everything, as e.g.
92 92 'relpath:.' does.
93 93 """
94 94 for kind, pat, source in kindpats:
95 95 if pat != '' or kind not in ['relpath', 'glob']:
96 96 return False
97 97 return True
98 98
99 99 def _buildkindpatsmatcher(matchercls, root, cwd, kindpats, ctx=None,
100 100 listsubrepos=False, badfn=None):
101 101 matchers = []
102 102 fms, kindpats = _expandsets(root, cwd, kindpats, ctx=ctx,
103 103 listsubrepos=listsubrepos, badfn=badfn)
104 104 if kindpats:
105 105 m = matchercls(root, cwd, kindpats, listsubrepos=listsubrepos,
106 106 badfn=badfn)
107 107 matchers.append(m)
108 108 if fms:
109 109 matchers.extend(fms)
110 110 if not matchers:
111 111 return nevermatcher(root, cwd, badfn=badfn)
112 112 if len(matchers) == 1:
113 113 return matchers[0]
114 114 return unionmatcher(matchers)
115 115
116 116 def match(root, cwd, patterns=None, include=None, exclude=None, default='glob',
117 117 exact=False, auditor=None, ctx=None, listsubrepos=False, warn=None,
118 118 badfn=None, icasefs=False):
119 119 """build an object to match a set of file patterns
120 120
121 121 arguments:
122 122 root - the canonical root of the tree you're matching against
123 123 cwd - the current working directory, if relevant
124 124 patterns - patterns to find
125 125 include - patterns to include (unless they are excluded)
126 126 exclude - patterns to exclude (even if they are included)
127 127 default - if a pattern in patterns has no explicit type, assume this one
128 128 exact - patterns are actually filenames (include/exclude still apply)
129 129 warn - optional function used for printing warnings
130 130 badfn - optional bad() callback for this matcher instead of the default
131 131 icasefs - make a matcher for wdir on case insensitive filesystems, which
132 132 normalizes the given patterns to the case in the filesystem
133 133
134 134 a pattern is one of:
135 135 'glob:<glob>' - a glob relative to cwd
136 136 're:<regexp>' - a regular expression
137 137 'path:<path>' - a path relative to repository root, which is matched
138 138 recursively
139 139 'rootfilesin:<path>' - a path relative to repository root, which is
140 140 matched non-recursively (will not match subdirectories)
141 141 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
142 142 'relpath:<path>' - a path relative to cwd
143 143 'relre:<regexp>' - a regexp that needn't match the start of a name
144 144 'set:<fileset>' - a fileset expression
145 145 'include:<path>' - a file of patterns to read and include
146 146 'subinclude:<path>' - a file of patterns to match against files under
147 147 the same directory
148 148 '<something>' - a pattern of the specified default type
149 149 """
150 150 normalize = _donormalize
151 151 if icasefs:
152 152 if exact:
153 153 raise error.ProgrammingError("a case-insensitive exact matcher "
154 154 "doesn't make sense")
155 155 dirstate = ctx.repo().dirstate
156 156 dsnormalize = dirstate.normalize
157 157
158 158 def normalize(patterns, default, root, cwd, auditor, warn):
159 159 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
160 160 kindpats = []
161 161 for kind, pats, source in kp:
162 162 if kind not in ('re', 'relre'): # regex can't be normalized
163 163 p = pats
164 164 pats = dsnormalize(pats)
165 165
166 166 # Preserve the original to handle a case only rename.
167 167 if p != pats and p in dirstate:
168 168 kindpats.append((kind, p, source))
169 169
170 170 kindpats.append((kind, pats, source))
171 171 return kindpats
172 172
173 173 if exact:
174 174 m = exactmatcher(root, cwd, patterns, badfn)
175 175 elif patterns:
176 176 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
177 177 if _kindpatsalwaysmatch(kindpats):
178 178 m = alwaysmatcher(root, cwd, badfn, relativeuipath=True)
179 179 else:
180 180 m = _buildkindpatsmatcher(patternmatcher, root, cwd, kindpats,
181 181 ctx=ctx, listsubrepos=listsubrepos,
182 182 badfn=badfn)
183 183 else:
184 184 # It's a little strange that no patterns means to match everything.
185 185 # Consider changing this to match nothing (probably using nevermatcher).
186 186 m = alwaysmatcher(root, cwd, badfn)
187 187
188 188 if include:
189 189 kindpats = normalize(include, 'glob', root, cwd, auditor, warn)
190 190 im = _buildkindpatsmatcher(includematcher, root, cwd, kindpats, ctx=ctx,
191 191 listsubrepos=listsubrepos, badfn=None)
192 192 m = intersectmatchers(m, im)
193 193 if exclude:
194 194 kindpats = normalize(exclude, 'glob', root, cwd, auditor, warn)
195 195 em = _buildkindpatsmatcher(includematcher, root, cwd, kindpats, ctx=ctx,
196 196 listsubrepos=listsubrepos, badfn=None)
197 197 m = differencematcher(m, em)
198 198 return m
199 199
200 200 def exact(root, cwd, files, badfn=None):
201 201 return exactmatcher(root, cwd, files, badfn=badfn)
202 202
203 203 def always(root, cwd):
204 204 return alwaysmatcher(root, cwd)
205 205
206 206 def never(root, cwd):
207 207 return nevermatcher(root, cwd)
208 208
209 209 def badmatch(match, badfn):
210 210 """Make a copy of the given matcher, replacing its bad method with the given
211 211 one.
212 212 """
213 213 m = copy.copy(match)
214 214 m.bad = badfn
215 215 return m
216 216
217 217 def _donormalize(patterns, default, root, cwd, auditor, warn):
218 218 '''Convert 'kind:pat' from the patterns list to tuples with kind and
219 219 normalized and rooted patterns and with listfiles expanded.'''
220 220 kindpats = []
221 221 for kind, pat in [_patsplit(p, default) for p in patterns]:
222 222 if kind in cwdrelativepatternkinds:
223 223 pat = pathutil.canonpath(root, cwd, pat, auditor)
224 224 elif kind in ('relglob', 'path', 'rootfilesin'):
225 225 pat = util.normpath(pat)
226 226 elif kind in ('listfile', 'listfile0'):
227 227 try:
228 228 files = util.readfile(pat)
229 229 if kind == 'listfile0':
230 230 files = files.split('\0')
231 231 else:
232 232 files = files.splitlines()
233 233 files = [f for f in files if f]
234 234 except EnvironmentError:
235 235 raise error.Abort(_("unable to read file list (%s)") % pat)
236 236 for k, p, source in _donormalize(files, default, root, cwd,
237 237 auditor, warn):
238 238 kindpats.append((k, p, pat))
239 239 continue
240 240 elif kind == 'include':
241 241 try:
242 242 fullpath = os.path.join(root, util.localpath(pat))
243 243 includepats = readpatternfile(fullpath, warn)
244 244 for k, p, source in _donormalize(includepats, default,
245 245 root, cwd, auditor, warn):
246 246 kindpats.append((k, p, source or pat))
247 247 except error.Abort as inst:
248 248 raise error.Abort('%s: %s' % (pat, inst[0]))
249 249 except IOError as inst:
250 250 if warn:
251 251 warn(_("skipping unreadable pattern file '%s': %s\n") %
252 252 (pat, stringutil.forcebytestr(inst.strerror)))
253 253 continue
254 254 # else: re or relre - which cannot be normalized
255 255 kindpats.append((kind, pat, ''))
256 256 return kindpats
257 257
258 258 class basematcher(object):
259 259
260 260 def __init__(self, root, cwd, badfn=None, relativeuipath=True):
261 261 self._root = root
262 262 self._cwd = cwd
263 263 if badfn is not None:
264 264 self.bad = badfn
265 265 self._relativeuipath = relativeuipath
266 266
267 267 def __call__(self, fn):
268 268 return self.matchfn(fn)
269 269 def __iter__(self):
270 270 for f in self._files:
271 271 yield f
272 272 # Callbacks related to how the matcher is used by dirstate.walk.
273 273 # Subscribers to these events must monkeypatch the matcher object.
274 274 def bad(self, f, msg):
275 275 '''Callback from dirstate.walk for each explicit file that can't be
276 276 found/accessed, with an error message.'''
277 277
278 278 # If an explicitdir is set, it will be called when an explicitly listed
279 279 # directory is visited.
280 280 explicitdir = None
281 281
282 282 # If an traversedir is set, it will be called when a directory discovered
283 283 # by recursive traversal is visited.
284 284 traversedir = None
285 285
286 286 def abs(self, f):
287 287 '''Convert a repo path back to path that is relative to the root of the
288 288 matcher.'''
289 289 return f
290 290
291 291 def rel(self, f):
292 292 '''Convert repo path back to path that is relative to cwd of matcher.'''
293 293 return util.pathto(self._root, self._cwd, f)
294 294
295 295 def uipath(self, f):
296 296 '''Convert repo path to a display path. If patterns or -I/-X were used
297 297 to create this matcher, the display path will be relative to cwd.
298 298 Otherwise it is relative to the root of the repo.'''
299 299 return (self._relativeuipath and self.rel(f)) or self.abs(f)
300 300
301 301 @propertycache
302 302 def _files(self):
303 303 return []
304 304
305 305 def files(self):
306 306 '''Explicitly listed files or patterns or roots:
307 307 if no patterns or .always(): empty list,
308 308 if exact: list exact files,
309 309 if not .anypats(): list all files and dirs,
310 310 else: optimal roots'''
311 311 return self._files
312 312
313 313 @propertycache
314 314 def _fileset(self):
315 315 return set(self._files)
316 316
317 317 def exact(self, f):
318 318 '''Returns True if f is in .files().'''
319 319 return f in self._fileset
320 320
321 321 def matchfn(self, f):
322 322 return False
323 323
324 324 def visitdir(self, dir):
325 325 '''Decides whether a directory should be visited based on whether it
326 326 has potential matches in it or one of its subdirectories. This is
327 327 based on the match's primary, included, and excluded patterns.
328 328
329 329 Returns the string 'all' if the given directory and all subdirectories
330 330 should be visited. Otherwise returns True or False indicating whether
331 331 the given directory should be visited.
332 332 '''
333 333 return True
334 334
335 335 def visitchildrenset(self, dir):
336 336 '''Decides whether a directory should be visited based on whether it
337 337 has potential matches in it or one of its subdirectories, and
338 338 potentially lists which subdirectories of that directory should be
339 339 visited. This is based on the match's primary, included, and excluded
340 340 patterns.
341 341
342 342 This function is very similar to 'visitdir', and the following mapping
343 343 can be applied:
344 344
345 345 visitdir | visitchildrenlist
346 346 ----------+-------------------
347 347 False | set()
348 348 'all' | 'all'
349 349 True | 'this' OR non-empty set of subdirs -or files- to visit
350 350
351 351 Example:
352 352 Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return
353 353 the following values (assuming the implementation of visitchildrenset
354 354 is capable of recognizing this; some implementations are not).
355 355
356 356 '.' -> {'foo', 'qux'}
357 357 'baz' -> set()
358 358 'foo' -> {'bar'}
359 359 # Ideally this would be 'all', but since the prefix nature of matchers
360 360 # is applied to the entire matcher, we have to downgrade this to
361 361 # 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed
362 362 # in.
363 363 'foo/bar' -> 'this'
364 364 'qux' -> 'this'
365 365
366 366 Important:
367 367 Most matchers do not know if they're representing files or
368 368 directories. They see ['path:dir/f'] and don't know whether 'f' is a
369 369 file or a directory, so visitchildrenset('dir') for most matchers will
370 370 return {'f'}, but if the matcher knows it's a file (like exactmatcher
371 371 does), it may return 'this'. Do not rely on the return being a set
372 372 indicating that there are no files in this dir to investigate (or
373 373 equivalently that if there are files to investigate in 'dir' that it
374 374 will always return 'this').
375 375 '''
376 376 return 'this'
377 377
378 378 def always(self):
379 379 '''Matcher will match everything and .files() will be empty --
380 380 optimization might be possible.'''
381 381 return False
382 382
383 383 def isexact(self):
384 384 '''Matcher will match exactly the list of files in .files() --
385 385 optimization might be possible.'''
386 386 return False
387 387
388 388 def prefix(self):
389 389 '''Matcher will match the paths in .files() recursively --
390 390 optimization might be possible.'''
391 391 return False
392 392
393 393 def anypats(self):
394 394 '''None of .always(), .isexact(), and .prefix() is true --
395 395 optimizations will be difficult.'''
396 396 return not self.always() and not self.isexact() and not self.prefix()
397 397
398 398 class alwaysmatcher(basematcher):
399 399 '''Matches everything.'''
400 400
401 401 def __init__(self, root, cwd, badfn=None, relativeuipath=False):
402 402 super(alwaysmatcher, self).__init__(root, cwd, badfn,
403 403 relativeuipath=relativeuipath)
404 404
405 405 def always(self):
406 406 return True
407 407
408 408 def matchfn(self, f):
409 409 return True
410 410
411 411 def visitdir(self, dir):
412 412 return 'all'
413 413
414 414 def visitchildrenset(self, dir):
415 415 return 'all'
416 416
417 417 def __repr__(self):
418 418 return r'<alwaysmatcher>'
419 419
420 420 class nevermatcher(basematcher):
421 421 '''Matches nothing.'''
422 422
423 423 def __init__(self, root, cwd, badfn=None):
424 424 super(nevermatcher, self).__init__(root, cwd, badfn)
425 425
426 426 # It's a little weird to say that the nevermatcher is an exact matcher
427 427 # or a prefix matcher, but it seems to make sense to let callers take
428 428 # fast paths based on either. There will be no exact matches, nor any
429 429 # prefixes (files() returns []), so fast paths iterating over them should
430 430 # be efficient (and correct).
431 431 def isexact(self):
432 432 return True
433 433
434 434 def prefix(self):
435 435 return True
436 436
437 437 def visitdir(self, dir):
438 438 return False
439 439
440 440 def visitchildrenset(self, dir):
441 441 return set()
442 442
443 443 def __repr__(self):
444 444 return r'<nevermatcher>'
445 445
446 446 class predicatematcher(basematcher):
447 447 """A matcher adapter for a simple boolean function"""
448 448
449 449 def __init__(self, root, cwd, predfn, predrepr=None, badfn=None):
450 450 super(predicatematcher, self).__init__(root, cwd, badfn)
451 451 self.matchfn = predfn
452 452 self._predrepr = predrepr
453 453
454 454 @encoding.strmethod
455 455 def __repr__(self):
456 456 s = (stringutil.buildrepr(self._predrepr)
457 457 or pycompat.byterepr(self.matchfn))
458 458 return '<predicatenmatcher pred=%s>' % s
459 459
460 460 class patternmatcher(basematcher):
461 461
462 462 def __init__(self, root, cwd, kindpats, listsubrepos=False, badfn=None):
463 463 super(patternmatcher, self).__init__(root, cwd, badfn)
464 464
465 465 self._files = _explicitfiles(kindpats)
466 466 self._prefix = _prefix(kindpats)
467 467 self._pats, self.matchfn = _buildmatch(kindpats, '$', listsubrepos,
468 468 root)
469 469
470 470 @propertycache
471 471 def _dirs(self):
472 472 return set(util.dirs(self._fileset)) | {'.'}
473 473
474 474 def visitdir(self, dir):
475 475 if self._prefix and dir in self._fileset:
476 476 return 'all'
477 477 return ('.' in self._fileset or
478 478 dir in self._fileset or
479 479 dir in self._dirs or
480 480 any(parentdir in self._fileset
481 481 for parentdir in util.finddirs(dir)))
482 482
483 483 def visitchildrenset(self, dir):
484 484 ret = self.visitdir(dir)
485 485 if ret is True:
486 486 return 'this'
487 487 elif not ret:
488 488 return set()
489 489 assert ret == 'all'
490 490 return 'all'
491 491
492 492 def prefix(self):
493 493 return self._prefix
494 494
495 495 @encoding.strmethod
496 496 def __repr__(self):
497 497 return ('<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats))
498 498
499 499 # This is basically a reimplementation of util.dirs that stores the children
500 500 # instead of just a count of them, plus a small optional optimization to avoid
501 501 # some directories we don't need.
502 502 class _dirchildren(object):
503 503 def __init__(self, paths, onlyinclude=None):
504 504 self._dirs = {}
505 505 self._onlyinclude = onlyinclude or []
506 506 addpath = self.addpath
507 507 for f in paths:
508 508 addpath(f)
509 509
510 510 def addpath(self, path):
511 511 if path == '.':
512 512 return
513 513 dirs = self._dirs
514 514 findsplitdirs = _dirchildren._findsplitdirs
515 515 for d, b in findsplitdirs(path):
516 516 if d not in self._onlyinclude:
517 517 continue
518 518 dirs.setdefault(d, set()).add(b)
519 519
520 520 @staticmethod
521 521 def _findsplitdirs(path):
522 522 # yields (dirname, basename) tuples, walking back to the root. This is
523 523 # very similar to util.finddirs, except:
524 524 # - produces a (dirname, basename) tuple, not just 'dirname'
525 525 # - includes root dir
526 526 # Unlike manifest._splittopdir, this does not suffix `dirname` with a
527 527 # slash, and produces '.' for the root instead of ''.
528 528 oldpos = len(path)
529 529 pos = path.rfind('/')
530 530 while pos != -1:
531 531 yield path[:pos], path[pos + 1:oldpos]
532 532 oldpos = pos
533 533 pos = path.rfind('/', 0, pos)
534 534 yield '.', path[:oldpos]
535 535
536 536 def get(self, path):
537 537 return self._dirs.get(path, set())
538 538
539 539 class includematcher(basematcher):
540 540
541 541 def __init__(self, root, cwd, kindpats, listsubrepos=False, badfn=None):
542 542 super(includematcher, self).__init__(root, cwd, badfn)
543 543
544 544 self._pats, self.matchfn = _buildmatch(kindpats, '(?:/|$)',
545 545 listsubrepos, root)
546 546 self._prefix = _prefix(kindpats)
547 547 roots, dirs, parents = _rootsdirsandparents(kindpats)
548 548 # roots are directories which are recursively included.
549 549 self._roots = set(roots)
550 550 # dirs are directories which are non-recursively included.
551 551 self._dirs = set(dirs)
552 552 # parents are directories which are non-recursively included because
553 553 # they are needed to get to items in _dirs or _roots.
554 554 self._parents = set(parents)
555 555
556 556 def visitdir(self, dir):
557 557 if self._prefix and dir in self._roots:
558 558 return 'all'
559 559 return ('.' in self._roots or
560 560 dir in self._roots or
561 561 dir in self._dirs or
562 562 dir in self._parents or
563 563 any(parentdir in self._roots
564 564 for parentdir in util.finddirs(dir)))
565 565
566 566 @propertycache
567 567 def _allparentschildren(self):
568 568 # It may seem odd that we add dirs, roots, and parents, and then
569 569 # restrict to only parents. This is to catch the case of:
570 570 # dirs = ['foo/bar']
571 571 # parents = ['foo']
572 572 # if we asked for the children of 'foo', but had only added
573 573 # self._parents, we wouldn't be able to respond ['bar'].
574 574 return _dirchildren(
575 575 itertools.chain(self._dirs, self._roots, self._parents),
576 576 onlyinclude=self._parents)
577 577
578 578 def visitchildrenset(self, dir):
579 579 if self._prefix and dir in self._roots:
580 580 return 'all'
581 581 # Note: this does *not* include the 'dir in self._parents' case from
582 582 # visitdir, that's handled below.
583 583 if ('.' in self._roots or
584 584 dir in self._roots or
585 585 dir in self._dirs or
586 586 any(parentdir in self._roots
587 587 for parentdir in util.finddirs(dir))):
588 588 return 'this'
589 589
590 590 if dir in self._parents:
591 591 return self._allparentschildren.get(dir) or set()
592 592 return set()
593 593
594 594 @encoding.strmethod
595 595 def __repr__(self):
596 596 return ('<includematcher includes=%r>' % pycompat.bytestr(self._pats))
597 597
598 598 class exactmatcher(basematcher):
599 599 '''Matches the input files exactly. They are interpreted as paths, not
600 600 patterns (so no kind-prefixes).
601 601 '''
602 602
603 603 def __init__(self, root, cwd, files, badfn=None):
604 604 super(exactmatcher, self).__init__(root, cwd, badfn)
605 605
606 606 if isinstance(files, list):
607 607 self._files = files
608 608 else:
609 609 self._files = list(files)
610 610
611 611 matchfn = basematcher.exact
612 612
613 613 @propertycache
614 614 def _dirs(self):
615 615 return set(util.dirs(self._fileset)) | {'.'}
616 616
617 617 def visitdir(self, dir):
618 618 return dir in self._dirs
619 619
620 620 def visitchildrenset(self, dir):
621 621 if not self._fileset or dir not in self._dirs:
622 622 return set()
623 623
624 624 candidates = self._fileset | self._dirs - {'.'}
625 625 if dir != '.':
626 626 d = dir + '/'
627 627 candidates = set(c[len(d):] for c in candidates if
628 628 c.startswith(d))
629 629 # self._dirs includes all of the directories, recursively, so if
630 630 # we're attempting to match foo/bar/baz.txt, it'll have '.', 'foo',
631 631 # 'foo/bar' in it. Thus we can safely ignore a candidate that has a
632 632 # '/' in it, indicating a it's for a subdir-of-a-subdir; the
633 633 # immediate subdir will be in there without a slash.
634 634 ret = {c for c in candidates if '/' not in c}
635 635 # We really do not expect ret to be empty, since that would imply that
636 636 # there's something in _dirs that didn't have a file in _fileset.
637 637 assert ret
638 638 return ret
639 639
640 640 def isexact(self):
641 641 return True
642 642
643 643 @encoding.strmethod
644 644 def __repr__(self):
645 645 return ('<exactmatcher files=%r>' % self._files)
646 646
647 647 class differencematcher(basematcher):
648 648 '''Composes two matchers by matching if the first matches and the second
649 649 does not.
650 650
651 651 The second matcher's non-matching-attributes (root, cwd, bad, explicitdir,
652 652 traversedir) are ignored.
653 653 '''
654 654 def __init__(self, m1, m2):
655 655 super(differencematcher, self).__init__(m1._root, m1._cwd)
656 656 self._m1 = m1
657 657 self._m2 = m2
658 658 self.bad = m1.bad
659 659 self.explicitdir = m1.explicitdir
660 660 self.traversedir = m1.traversedir
661 661
662 662 def matchfn(self, f):
663 663 return self._m1(f) and not self._m2(f)
664 664
665 665 @propertycache
666 666 def _files(self):
667 667 if self.isexact():
668 668 return [f for f in self._m1.files() if self(f)]
669 669 # If m1 is not an exact matcher, we can't easily figure out the set of
670 670 # files, because its files() are not always files. For example, if
671 671 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
672 672 # want to remove "dir" from the set even though it would match m2,
673 673 # because the "dir" in m1 may not be a file.
674 674 return self._m1.files()
675 675
676 676 def visitdir(self, dir):
677 677 if self._m2.visitdir(dir) == 'all':
678 678 return False
679 679 return bool(self._m1.visitdir(dir))
680 680
681 681 def visitchildrenset(self, dir):
682 682 m2_set = self._m2.visitchildrenset(dir)
683 683 if m2_set == 'all':
684 684 return set()
685 685 m1_set = self._m1.visitchildrenset(dir)
686 686 # Possible values for m1: 'all', 'this', set(...), set()
687 687 # Possible values for m2: 'this', set(...), set()
688 688 # If m2 has nothing under here that we care about, return m1, even if
689 689 # it's 'all'. This is a change in behavior from visitdir, which would
690 690 # return True, not 'all', for some reason.
691 691 if not m2_set:
692 692 return m1_set
693 693 if m1_set in ['all', 'this']:
694 694 # Never return 'all' here if m2_set is any kind of non-empty (either
695 695 # 'this' or set(foo)), since m2 might return set() for a
696 696 # subdirectory.
697 697 return 'this'
698 698 # Possible values for m1: set(...), set()
699 699 # Possible values for m2: 'this', set(...)
700 700 # We ignore m2's set results. They're possibly incorrect:
701 701 # m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset('.'):
702 702 # m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd
703 703 # return set(), which is *not* correct, we still need to visit 'dir'!
704 704 return m1_set
705 705
706 706 def isexact(self):
707 707 return self._m1.isexact()
708 708
709 709 @encoding.strmethod
710 710 def __repr__(self):
711 711 return ('<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2))
712 712
713 713 def intersectmatchers(m1, m2):
714 714 '''Composes two matchers by matching if both of them match.
715 715
716 716 The second matcher's non-matching-attributes (root, cwd, bad, explicitdir,
717 717 traversedir) are ignored.
718 718 '''
719 719 if m1 is None or m2 is None:
720 720 return m1 or m2
721 721 if m1.always():
722 722 m = copy.copy(m2)
723 723 # TODO: Consider encapsulating these things in a class so there's only
724 724 # one thing to copy from m1.
725 725 m.bad = m1.bad
726 726 m.explicitdir = m1.explicitdir
727 727 m.traversedir = m1.traversedir
728 728 m.abs = m1.abs
729 729 m.rel = m1.rel
730 730 m._relativeuipath |= m1._relativeuipath
731 731 return m
732 732 if m2.always():
733 733 m = copy.copy(m1)
734 734 m._relativeuipath |= m2._relativeuipath
735 735 return m
736 736 return intersectionmatcher(m1, m2)
737 737
738 738 class intersectionmatcher(basematcher):
739 739 def __init__(self, m1, m2):
740 740 super(intersectionmatcher, self).__init__(m1._root, m1._cwd)
741 741 self._m1 = m1
742 742 self._m2 = m2
743 743 self.bad = m1.bad
744 744 self.explicitdir = m1.explicitdir
745 745 self.traversedir = m1.traversedir
746 746
747 747 @propertycache
748 748 def _files(self):
749 749 if self.isexact():
750 750 m1, m2 = self._m1, self._m2
751 751 if not m1.isexact():
752 752 m1, m2 = m2, m1
753 753 return [f for f in m1.files() if m2(f)]
754 754 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
755 755 # the set of files, because their files() are not always files. For
756 756 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
757 757 # "path:dir2", we don't want to remove "dir2" from the set.
758 758 return self._m1.files() + self._m2.files()
759 759
760 760 def matchfn(self, f):
761 761 return self._m1(f) and self._m2(f)
762 762
763 763 def visitdir(self, dir):
764 764 visit1 = self._m1.visitdir(dir)
765 765 if visit1 == 'all':
766 766 return self._m2.visitdir(dir)
767 767 # bool() because visit1=True + visit2='all' should not be 'all'
768 768 return bool(visit1 and self._m2.visitdir(dir))
769 769
770 770 def visitchildrenset(self, dir):
771 771 m1_set = self._m1.visitchildrenset(dir)
772 772 if not m1_set:
773 773 return set()
774 774 m2_set = self._m2.visitchildrenset(dir)
775 775 if not m2_set:
776 776 return set()
777 777
778 778 if m1_set == 'all':
779 779 return m2_set
780 780 elif m2_set == 'all':
781 781 return m1_set
782 782
783 783 if m1_set == 'this' or m2_set == 'this':
784 784 return 'this'
785 785
786 786 assert isinstance(m1_set, set) and isinstance(m2_set, set)
787 787 return m1_set.intersection(m2_set)
788 788
789 789 def always(self):
790 790 return self._m1.always() and self._m2.always()
791 791
792 792 def isexact(self):
793 793 return self._m1.isexact() or self._m2.isexact()
794 794
795 795 @encoding.strmethod
796 796 def __repr__(self):
797 797 return ('<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2))
798 798
799 799 class subdirmatcher(basematcher):
800 800 """Adapt a matcher to work on a subdirectory only.
801 801
802 802 The paths are remapped to remove/insert the path as needed:
803 803
804 804 >>> from . import pycompat
805 805 >>> m1 = match(b'root', b'', [b'a.txt', b'sub/b.txt'])
806 806 >>> m2 = subdirmatcher(b'sub', m1)
807 807 >>> bool(m2(b'a.txt'))
808 808 False
809 809 >>> bool(m2(b'b.txt'))
810 810 True
811 811 >>> bool(m2.matchfn(b'a.txt'))
812 812 False
813 813 >>> bool(m2.matchfn(b'b.txt'))
814 814 True
815 815 >>> m2.files()
816 816 ['b.txt']
817 817 >>> m2.exact(b'b.txt')
818 818 True
819 819 >>> util.pconvert(m2.rel(b'b.txt'))
820 820 'sub/b.txt'
821 821 >>> def bad(f, msg):
822 822 ... print(pycompat.sysstr(b"%s: %s" % (f, msg)))
823 823 >>> m1.bad = bad
824 824 >>> m2.bad(b'x.txt', b'No such file')
825 825 sub/x.txt: No such file
826 826 >>> m2.abs(b'c.txt')
827 827 'sub/c.txt'
828 828 """
829 829
830 830 def __init__(self, path, matcher):
831 831 super(subdirmatcher, self).__init__(matcher._root, matcher._cwd)
832 832 self._path = path
833 833 self._matcher = matcher
834 834 self._always = matcher.always()
835 835
836 836 self._files = [f[len(path) + 1:] for f in matcher._files
837 837 if f.startswith(path + "/")]
838 838
839 839 # If the parent repo had a path to this subrepo and the matcher is
840 840 # a prefix matcher, this submatcher always matches.
841 841 if matcher.prefix():
842 842 self._always = any(f == path for f in matcher._files)
843 843
844 844 def bad(self, f, msg):
845 845 self._matcher.bad(self._path + "/" + f, msg)
846 846
847 847 def abs(self, f):
848 848 return self._matcher.abs(self._path + "/" + f)
849 849
850 850 def rel(self, f):
851 851 return self._matcher.rel(self._path + "/" + f)
852 852
853 853 def uipath(self, f):
854 854 return self._matcher.uipath(self._path + "/" + f)
855 855
856 856 def matchfn(self, f):
857 857 # Some information is lost in the superclass's constructor, so we
858 858 # can not accurately create the matching function for the subdirectory
859 859 # from the inputs. Instead, we override matchfn() and visitdir() to
860 860 # call the original matcher with the subdirectory path prepended.
861 861 return self._matcher.matchfn(self._path + "/" + f)
862 862
863 863 def visitdir(self, dir):
864 864 if dir == '.':
865 865 dir = self._path
866 866 else:
867 867 dir = self._path + "/" + dir
868 868 return self._matcher.visitdir(dir)
869 869
870 870 def visitchildrenset(self, dir):
871 871 if dir == '.':
872 872 dir = self._path
873 873 else:
874 874 dir = self._path + "/" + dir
875 875 return self._matcher.visitchildrenset(dir)
876 876
877 877 def always(self):
878 878 return self._always
879 879
880 880 def prefix(self):
881 881 return self._matcher.prefix() and not self._always
882 882
883 883 @encoding.strmethod
884 884 def __repr__(self):
885 885 return ('<subdirmatcher path=%r, matcher=%r>' %
886 886 (self._path, self._matcher))
887 887
888 888 class prefixdirmatcher(basematcher):
889 889 """Adapt a matcher to work on a parent directory.
890 890
891 891 The matcher's non-matching-attributes (root, cwd, bad, explicitdir,
892 892 traversedir) are ignored.
893 893
894 894 The prefix path should usually be the relative path from the root of
895 895 this matcher to the root of the wrapped matcher.
896 896
897 897 >>> m1 = match(util.localpath(b'root/d/e'), b'f', [b'../a.txt', b'b.txt'])
898 898 >>> m2 = prefixdirmatcher(b'root', b'd/e/f', b'd/e', m1)
899 899 >>> bool(m2(b'a.txt'),)
900 900 False
901 901 >>> bool(m2(b'd/e/a.txt'))
902 902 True
903 903 >>> bool(m2(b'd/e/b.txt'))
904 904 False
905 905 >>> m2.files()
906 906 ['d/e/a.txt', 'd/e/f/b.txt']
907 907 >>> m2.exact(b'd/e/a.txt')
908 908 True
909 909 >>> m2.visitdir(b'd')
910 910 True
911 911 >>> m2.visitdir(b'd/e')
912 912 True
913 913 >>> m2.visitdir(b'd/e/f')
914 914 True
915 915 >>> m2.visitdir(b'd/e/g')
916 916 False
917 917 >>> m2.visitdir(b'd/ef')
918 918 False
919 919 """
920 920
921 921 def __init__(self, root, cwd, path, matcher, badfn=None):
922 922 super(prefixdirmatcher, self).__init__(root, cwd, badfn)
923 923 if not path:
924 924 raise error.ProgrammingError('prefix path must not be empty')
925 925 self._path = path
926 926 self._pathprefix = path + '/'
927 927 self._matcher = matcher
928 928
929 929 @propertycache
930 930 def _files(self):
931 931 return [self._pathprefix + f for f in self._matcher._files]
932 932
933 933 def matchfn(self, f):
934 934 if not f.startswith(self._pathprefix):
935 935 return False
936 936 return self._matcher.matchfn(f[len(self._pathprefix):])
937 937
938 938 @propertycache
939 939 def _pathdirs(self):
940 940 return set(util.finddirs(self._path)) | {'.'}
941 941
942 942 def visitdir(self, dir):
943 943 if dir == self._path:
944 944 return self._matcher.visitdir('.')
945 945 if dir.startswith(self._pathprefix):
946 946 return self._matcher.visitdir(dir[len(self._pathprefix):])
947 947 return dir in self._pathdirs
948 948
949 949 def visitchildrenset(self, dir):
950 950 if dir == self._path:
951 951 return self._matcher.visitchildrenset('.')
952 952 if dir.startswith(self._pathprefix):
953 953 return self._matcher.visitchildrenset(dir[len(self._pathprefix):])
954 954 if dir in self._pathdirs:
955 955 return 'this'
956 956 return set()
957 957
958 958 def isexact(self):
959 959 return self._matcher.isexact()
960 960
961 961 def prefix(self):
962 962 return self._matcher.prefix()
963 963
964 964 @encoding.strmethod
965 965 def __repr__(self):
966 966 return ('<prefixdirmatcher path=%r, matcher=%r>'
967 967 % (pycompat.bytestr(self._path), self._matcher))
968 968
969 969 class unionmatcher(basematcher):
970 970 """A matcher that is the union of several matchers.
971 971
972 972 The non-matching-attributes (root, cwd, bad, explicitdir, traversedir) are
973 973 taken from the first matcher.
974 974 """
975 975
976 976 def __init__(self, matchers):
977 977 m1 = matchers[0]
978 978 super(unionmatcher, self).__init__(m1._root, m1._cwd)
979 979 self.explicitdir = m1.explicitdir
980 980 self.traversedir = m1.traversedir
981 981 self._matchers = matchers
982 982
983 983 def matchfn(self, f):
984 984 for match in self._matchers:
985 985 if match(f):
986 986 return True
987 987 return False
988 988
989 989 def visitdir(self, dir):
990 990 r = False
991 991 for m in self._matchers:
992 992 v = m.visitdir(dir)
993 993 if v == 'all':
994 994 return v
995 995 r |= v
996 996 return r
997 997
998 998 def visitchildrenset(self, dir):
999 999 r = set()
1000 1000 this = False
1001 1001 for m in self._matchers:
1002 1002 v = m.visitchildrenset(dir)
1003 1003 if not v:
1004 1004 continue
1005 1005 if v == 'all':
1006 1006 return v
1007 1007 if this or v == 'this':
1008 1008 this = True
1009 1009 # don't break, we might have an 'all' in here.
1010 1010 continue
1011 1011 assert isinstance(v, set)
1012 1012 r = r.union(v)
1013 1013 if this:
1014 1014 return 'this'
1015 1015 return r
1016 1016
1017 1017 @encoding.strmethod
1018 1018 def __repr__(self):
1019 1019 return ('<unionmatcher matchers=%r>' % self._matchers)
1020 1020
1021 1021 def patkind(pattern, default=None):
1022 1022 '''If pattern is 'kind:pat' with a known kind, return kind.'''
1023 1023 return _patsplit(pattern, default)[0]
1024 1024
1025 1025 def _patsplit(pattern, default):
1026 1026 """Split a string into the optional pattern kind prefix and the actual
1027 1027 pattern."""
1028 1028 if ':' in pattern:
1029 1029 kind, pat = pattern.split(':', 1)
1030 1030 if kind in allpatternkinds:
1031 1031 return kind, pat
1032 1032 return default, pattern
1033 1033
1034 1034 def _globre(pat):
1035 1035 r'''Convert an extended glob string to a regexp string.
1036 1036
1037 1037 >>> from . import pycompat
1038 1038 >>> def bprint(s):
1039 1039 ... print(pycompat.sysstr(s))
1040 1040 >>> bprint(_globre(br'?'))
1041 1041 .
1042 1042 >>> bprint(_globre(br'*'))
1043 1043 [^/]*
1044 1044 >>> bprint(_globre(br'**'))
1045 1045 .*
1046 1046 >>> bprint(_globre(br'**/a'))
1047 1047 (?:.*/)?a
1048 1048 >>> bprint(_globre(br'a/**/b'))
1049 1049 a/(?:.*/)?b
1050 1050 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
1051 1051 [a*?!^][\^b][^c]
1052 1052 >>> bprint(_globre(br'{a,b}'))
1053 1053 (?:a|b)
1054 1054 >>> bprint(_globre(br'.\*\?'))
1055 1055 \.\*\?
1056 1056 '''
1057 1057 i, n = 0, len(pat)
1058 1058 res = ''
1059 1059 group = 0
1060 1060 escape = util.stringutil.regexbytesescapemap.get
1061 1061 def peek():
1062 1062 return i < n and pat[i:i + 1]
1063 1063 while i < n:
1064 1064 c = pat[i:i + 1]
1065 1065 i += 1
1066 1066 if c not in '*?[{},\\':
1067 1067 res += escape(c, c)
1068 1068 elif c == '*':
1069 1069 if peek() == '*':
1070 1070 i += 1
1071 1071 if peek() == '/':
1072 1072 i += 1
1073 1073 res += '(?:.*/)?'
1074 1074 else:
1075 1075 res += '.*'
1076 1076 else:
1077 1077 res += '[^/]*'
1078 1078 elif c == '?':
1079 1079 res += '.'
1080 1080 elif c == '[':
1081 1081 j = i
1082 1082 if j < n and pat[j:j + 1] in '!]':
1083 1083 j += 1
1084 1084 while j < n and pat[j:j + 1] != ']':
1085 1085 j += 1
1086 1086 if j >= n:
1087 1087 res += '\\['
1088 1088 else:
1089 1089 stuff = pat[i:j].replace('\\','\\\\')
1090 1090 i = j + 1
1091 1091 if stuff[0:1] == '!':
1092 1092 stuff = '^' + stuff[1:]
1093 1093 elif stuff[0:1] == '^':
1094 1094 stuff = '\\' + stuff
1095 1095 res = '%s[%s]' % (res, stuff)
1096 1096 elif c == '{':
1097 1097 group += 1
1098 1098 res += '(?:'
1099 1099 elif c == '}' and group:
1100 1100 res += ')'
1101 1101 group -= 1
1102 1102 elif c == ',' and group:
1103 1103 res += '|'
1104 1104 elif c == '\\':
1105 1105 p = peek()
1106 1106 if p:
1107 1107 i += 1
1108 1108 res += escape(p, p)
1109 1109 else:
1110 1110 res += escape(c, c)
1111 1111 else:
1112 1112 res += escape(c, c)
1113 1113 return res
1114 1114
1115 1115 def _regex(kind, pat, globsuffix):
1116 1116 '''Convert a (normalized) pattern of any kind into a regular expression.
1117 1117 globsuffix is appended to the regexp of globs.'''
1118 1118 if not pat:
1119 1119 return ''
1120 1120 if kind == 're':
1121 1121 return pat
1122 1122 if kind in ('path', 'relpath'):
1123 1123 if pat == '.':
1124 1124 return ''
1125 1125 return util.stringutil.reescape(pat) + '(?:/|$)'
1126 1126 if kind == 'rootfilesin':
1127 1127 if pat == '.':
1128 1128 escaped = ''
1129 1129 else:
1130 1130 # Pattern is a directory name.
1131 1131 escaped = util.stringutil.reescape(pat) + '/'
1132 1132 # Anything after the pattern must be a non-directory.
1133 1133 return escaped + '[^/]+$'
1134 1134 if kind == 'relglob':
1135 1135 return '(?:|.*/)' + _globre(pat) + globsuffix
1136 1136 if kind == 'relre':
1137 1137 if pat.startswith('^'):
1138 1138 return pat
1139 1139 return '.*' + pat
1140 1140 if kind == 'glob':
1141 1141 return _globre(pat) + globsuffix
1142 1142 raise error.ProgrammingError('not a regex pattern: %s:%s' % (kind, pat))
1143 1143
1144 1144 def _buildmatch(kindpats, globsuffix, listsubrepos, root):
1145 1145 '''Return regexp string and a matcher function for kindpats.
1146 1146 globsuffix is appended to the regexp of globs.'''
1147 1147 matchfuncs = []
1148 1148
1149 1149 subincludes, kindpats = _expandsubinclude(kindpats, root)
1150 1150 if subincludes:
1151 1151 submatchers = {}
1152 1152 def matchsubinclude(f):
1153 1153 for prefix, matcherargs in subincludes:
1154 1154 if f.startswith(prefix):
1155 1155 mf = submatchers.get(prefix)
1156 1156 if mf is None:
1157 1157 mf = match(*matcherargs)
1158 1158 submatchers[prefix] = mf
1159 1159
1160 1160 if mf(f[len(prefix):]):
1161 1161 return True
1162 1162 return False
1163 1163 matchfuncs.append(matchsubinclude)
1164 1164
1165 1165 regex = ''
1166 1166 if kindpats:
1167 1167 if all(k == 'rootfilesin' for k, p, s in kindpats):
1168 1168 dirs = {p for k, p, s in kindpats}
1169 1169 def mf(f):
1170 1170 i = f.rfind('/')
1171 1171 if i >= 0:
1172 1172 dir = f[:i]
1173 1173 else:
1174 1174 dir = '.'
1175 1175 return dir in dirs
1176 1176 regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))
1177 1177 matchfuncs.append(mf)
1178 1178 else:
1179 1179 regex, mf = _buildregexmatch(kindpats, globsuffix)
1180 1180 matchfuncs.append(mf)
1181 1181
1182 1182 if len(matchfuncs) == 1:
1183 1183 return regex, matchfuncs[0]
1184 1184 else:
1185 1185 return regex, lambda f: any(mf(f) for mf in matchfuncs)
1186 1186
1187 1187 MAX_RE_SIZE = 20000
1188 1188
1189 1189 def _buildregexmatch(kindpats, globsuffix):
1190 1190 """Build a match function from a list of kinds and kindpats,
1191 return regexp string and a matcher function."""
1191 return regexp string and a matcher function.
1192
1193 Test too large input
1194 >>> _buildregexmatch([
1195 ... ('relglob', '?' * MAX_RE_SIZE, '')
1196 ... ], '$')
1197 Traceback (most recent call last):
1198 ...
1199 OverflowError
1200 """
1192 1201 try:
1193 1202 regex = '(?:%s)' % '|'.join([_regex(k, p, globsuffix)
1194 1203 for (k, p, s) in kindpats])
1195 1204 if len(regex) <= MAX_RE_SIZE:
1196 1205 return regex, _rematcher(regex)
1197 1206 # We're using a Python with a tiny regex engine and we
1198 1207 # made it explode, so we'll divide the pattern list in two
1199 1208 # until it works
1200 1209 l = len(kindpats)
1201 1210 if l < 2:
1202 1211 # TODO: raise error.Abort here
1203 1212 raise OverflowError
1204 1213 regexa, a = _buildregexmatch(kindpats[:l//2], globsuffix)
1205 1214 regexb, b = _buildregexmatch(kindpats[l//2:], globsuffix)
1206 1215 return regex, lambda s: a(s) or b(s)
1207 1216 except re.error:
1208 1217 for k, p, s in kindpats:
1209 1218 try:
1210 1219 _rematcher('(?:%s)' % _regex(k, p, globsuffix))
1211 1220 except re.error:
1212 1221 if s:
1213 1222 raise error.Abort(_("%s: invalid pattern (%s): %s") %
1214 1223 (s, k, p))
1215 1224 else:
1216 1225 raise error.Abort(_("invalid pattern (%s): %s") % (k, p))
1217 1226 raise error.Abort(_("invalid pattern"))
1218 1227
1219 1228 def _patternrootsanddirs(kindpats):
1220 1229 '''Returns roots and directories corresponding to each pattern.
1221 1230
1222 1231 This calculates the roots and directories exactly matching the patterns and
1223 1232 returns a tuple of (roots, dirs) for each. It does not return other
1224 1233 directories which may also need to be considered, like the parent
1225 1234 directories.
1226 1235 '''
1227 1236 r = []
1228 1237 d = []
1229 1238 for kind, pat, source in kindpats:
1230 1239 if kind == 'glob': # find the non-glob prefix
1231 1240 root = []
1232 1241 for p in pat.split('/'):
1233 1242 if '[' in p or '{' in p or '*' in p or '?' in p:
1234 1243 break
1235 1244 root.append(p)
1236 1245 r.append('/'.join(root) or '.')
1237 1246 elif kind in ('relpath', 'path'):
1238 1247 r.append(pat or '.')
1239 1248 elif kind in ('rootfilesin',):
1240 1249 d.append(pat or '.')
1241 1250 else: # relglob, re, relre
1242 1251 r.append('.')
1243 1252 return r, d
1244 1253
1245 1254 def _roots(kindpats):
1246 1255 '''Returns root directories to match recursively from the given patterns.'''
1247 1256 roots, dirs = _patternrootsanddirs(kindpats)
1248 1257 return roots
1249 1258
1250 1259 def _rootsdirsandparents(kindpats):
1251 1260 '''Returns roots and exact directories from patterns.
1252 1261
1253 1262 `roots` are directories to match recursively, `dirs` should
1254 1263 be matched non-recursively, and `parents` are the implicitly required
1255 1264 directories to walk to items in either roots or dirs.
1256 1265
1257 1266 Returns a tuple of (roots, dirs, parents).
1258 1267
1259 1268 >>> _rootsdirsandparents(
1260 1269 ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
1261 1270 ... (b'glob', b'g*', b'')])
1262 1271 (['g/h', 'g/h', '.'], [], ['g', '.'])
1263 1272 >>> _rootsdirsandparents(
1264 1273 ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
1265 1274 ([], ['g/h', '.'], ['g', '.'])
1266 1275 >>> _rootsdirsandparents(
1267 1276 ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
1268 1277 ... (b'path', b'', b'')])
1269 1278 (['r', 'p/p', '.'], [], ['p', '.'])
1270 1279 >>> _rootsdirsandparents(
1271 1280 ... [(b'relglob', b'rg*', b''), (b're', b're/', b''),
1272 1281 ... (b'relre', b'rr', b'')])
1273 1282 (['.', '.', '.'], [], ['.'])
1274 1283 '''
1275 1284 r, d = _patternrootsanddirs(kindpats)
1276 1285
1277 1286 p = []
1278 1287 # Append the parents as non-recursive/exact directories, since they must be
1279 1288 # scanned to get to either the roots or the other exact directories.
1280 1289 p.extend(util.dirs(d))
1281 1290 p.extend(util.dirs(r))
1282 1291 # util.dirs() does not include the root directory, so add it manually
1283 1292 p.append('.')
1284 1293
1285 1294 # FIXME: all uses of this function convert these to sets, do so before
1286 1295 # returning.
1287 1296 # FIXME: all uses of this function do not need anything in 'roots' and
1288 1297 # 'dirs' to also be in 'parents', consider removing them before returning.
1289 1298 return r, d, p
1290 1299
1291 1300 def _explicitfiles(kindpats):
1292 1301 '''Returns the potential explicit filenames from the patterns.
1293 1302
1294 1303 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
1295 1304 ['foo/bar']
1296 1305 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
1297 1306 []
1298 1307 '''
1299 1308 # Keep only the pattern kinds where one can specify filenames (vs only
1300 1309 # directory names).
1301 1310 filable = [kp for kp in kindpats if kp[0] not in ('rootfilesin',)]
1302 1311 return _roots(filable)
1303 1312
1304 1313 def _prefix(kindpats):
1305 1314 '''Whether all the patterns match a prefix (i.e. recursively)'''
1306 1315 for kind, pat, source in kindpats:
1307 1316 if kind not in ('path', 'relpath'):
1308 1317 return False
1309 1318 return True
1310 1319
1311 1320 _commentre = None
1312 1321
1313 1322 def readpatternfile(filepath, warn, sourceinfo=False):
1314 1323 '''parse a pattern file, returning a list of
1315 1324 patterns. These patterns should be given to compile()
1316 1325 to be validated and converted into a match function.
1317 1326
1318 1327 trailing white space is dropped.
1319 1328 the escape character is backslash.
1320 1329 comments start with #.
1321 1330 empty lines are skipped.
1322 1331
1323 1332 lines can be of the following formats:
1324 1333
1325 1334 syntax: regexp # defaults following lines to non-rooted regexps
1326 1335 syntax: glob # defaults following lines to non-rooted globs
1327 1336 re:pattern # non-rooted regular expression
1328 1337 glob:pattern # non-rooted glob
1329 1338 pattern # pattern of the current default type
1330 1339
1331 1340 if sourceinfo is set, returns a list of tuples:
1332 1341 (pattern, lineno, originalline). This is useful to debug ignore patterns.
1333 1342 '''
1334 1343
1335 1344 syntaxes = {
1336 1345 're': 'relre:',
1337 1346 'regexp': 'relre:',
1338 1347 'glob': 'relglob:',
1339 1348 'include': 'include',
1340 1349 'subinclude': 'subinclude',
1341 1350 }
1342 1351 syntax = 'relre:'
1343 1352 patterns = []
1344 1353
1345 1354 fp = open(filepath, 'rb')
1346 1355 for lineno, line in enumerate(util.iterfile(fp), start=1):
1347 1356 if "#" in line:
1348 1357 global _commentre
1349 1358 if not _commentre:
1350 1359 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
1351 1360 # remove comments prefixed by an even number of escapes
1352 1361 m = _commentre.search(line)
1353 1362 if m:
1354 1363 line = line[:m.end(1)]
1355 1364 # fixup properly escaped comments that survived the above
1356 1365 line = line.replace("\\#", "#")
1357 1366 line = line.rstrip()
1358 1367 if not line:
1359 1368 continue
1360 1369
1361 1370 if line.startswith('syntax:'):
1362 1371 s = line[7:].strip()
1363 1372 try:
1364 1373 syntax = syntaxes[s]
1365 1374 except KeyError:
1366 1375 if warn:
1367 1376 warn(_("%s: ignoring invalid syntax '%s'\n") %
1368 1377 (filepath, s))
1369 1378 continue
1370 1379
1371 1380 linesyntax = syntax
1372 1381 for s, rels in syntaxes.iteritems():
1373 1382 if line.startswith(rels):
1374 1383 linesyntax = rels
1375 1384 line = line[len(rels):]
1376 1385 break
1377 1386 elif line.startswith(s+':'):
1378 1387 linesyntax = rels
1379 1388 line = line[len(s) + 1:]
1380 1389 break
1381 1390 if sourceinfo:
1382 1391 patterns.append((linesyntax + line, lineno, line))
1383 1392 else:
1384 1393 patterns.append(linesyntax + line)
1385 1394 fp.close()
1386 1395 return patterns
General Comments 0
You need to be logged in to leave comments. Login now