##// END OF EJS Templates
match: use raw strings to avoid illegal baskslash escape...
Gregory Szorc -
r42367:fd384911 default
parent child Browse files
Show More
@@ -1,1479 +1,1479 b''
1 1 # match.py - filename matching
2 2 #
3 3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import, print_function
9 9
10 10 import copy
11 11 import itertools
12 12 import os
13 13 import re
14 14
15 15 from .i18n import _
16 16 from . import (
17 17 encoding,
18 18 error,
19 19 pathutil,
20 20 pycompat,
21 21 util,
22 22 )
23 23 from .utils import (
24 24 stringutil,
25 25 )
26 26
27 27 allpatternkinds = ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
28 28 'rootglob',
29 29 'listfile', 'listfile0', 'set', 'include', 'subinclude',
30 30 'rootfilesin')
31 31 cwdrelativepatternkinds = ('relpath', 'glob')
32 32
33 33 propertycache = util.propertycache
34 34
35 35 def _rematcher(regex):
36 36 '''compile the regexp with the best available regexp engine and return a
37 37 matcher function'''
38 38 m = util.re.compile(regex)
39 39 try:
40 40 # slightly faster, provided by facebook's re2 bindings
41 41 return m.test_match
42 42 except AttributeError:
43 43 return m.match
44 44
45 45 def _expandsets(kindpats, ctx=None, listsubrepos=False, badfn=None):
46 46 '''Returns the kindpats list with the 'set' patterns expanded to matchers'''
47 47 matchers = []
48 48 other = []
49 49
50 50 for kind, pat, source in kindpats:
51 51 if kind == 'set':
52 52 if ctx is None:
53 53 raise error.ProgrammingError("fileset expression with no "
54 54 "context")
55 55 matchers.append(ctx.matchfileset(pat, badfn=badfn))
56 56
57 57 if listsubrepos:
58 58 for subpath in ctx.substate:
59 59 sm = ctx.sub(subpath).matchfileset(pat, badfn=badfn)
60 60 pm = prefixdirmatcher(subpath, sm, badfn=badfn)
61 61 matchers.append(pm)
62 62
63 63 continue
64 64 other.append((kind, pat, source))
65 65 return matchers, other
66 66
67 67 def _expandsubinclude(kindpats, root):
68 68 '''Returns the list of subinclude matcher args and the kindpats without the
69 69 subincludes in it.'''
70 70 relmatchers = []
71 71 other = []
72 72
73 73 for kind, pat, source in kindpats:
74 74 if kind == 'subinclude':
75 75 sourceroot = pathutil.dirname(util.normpath(source))
76 76 pat = util.pconvert(pat)
77 77 path = pathutil.join(sourceroot, pat)
78 78
79 79 newroot = pathutil.dirname(path)
80 80 matcherargs = (newroot, '', [], ['include:%s' % path])
81 81
82 82 prefix = pathutil.canonpath(root, root, newroot)
83 83 if prefix:
84 84 prefix += '/'
85 85 relmatchers.append((prefix, matcherargs))
86 86 else:
87 87 other.append((kind, pat, source))
88 88
89 89 return relmatchers, other
90 90
91 91 def _kindpatsalwaysmatch(kindpats):
92 92 """"Checks whether the kindspats match everything, as e.g.
93 93 'relpath:.' does.
94 94 """
95 95 for kind, pat, source in kindpats:
96 96 if pat != '' or kind not in ['relpath', 'glob']:
97 97 return False
98 98 return True
99 99
100 100 def _buildkindpatsmatcher(matchercls, root, kindpats, ctx=None,
101 101 listsubrepos=False, badfn=None):
102 102 matchers = []
103 103 fms, kindpats = _expandsets(kindpats, ctx=ctx,
104 104 listsubrepos=listsubrepos, badfn=badfn)
105 105 if kindpats:
106 106 m = matchercls(root, kindpats, badfn=badfn)
107 107 matchers.append(m)
108 108 if fms:
109 109 matchers.extend(fms)
110 110 if not matchers:
111 111 return nevermatcher(badfn=badfn)
112 112 if len(matchers) == 1:
113 113 return matchers[0]
114 114 return unionmatcher(matchers)
115 115
116 116 def match(root, cwd, patterns=None, include=None, exclude=None, default='glob',
117 117 auditor=None, ctx=None, listsubrepos=False, warn=None,
118 118 badfn=None, icasefs=False):
119 119 r"""build an object to match a set of file patterns
120 120
121 121 arguments:
122 122 root - the canonical root of the tree you're matching against
123 123 cwd - the current working directory, if relevant
124 124 patterns - patterns to find
125 125 include - patterns to include (unless they are excluded)
126 126 exclude - patterns to exclude (even if they are included)
127 127 default - if a pattern in patterns has no explicit type, assume this one
128 128 auditor - optional path auditor
129 129 ctx - optional changecontext
130 130 listsubrepos - if True, recurse into subrepositories
131 131 warn - optional function used for printing warnings
132 132 badfn - optional bad() callback for this matcher instead of the default
133 133 icasefs - make a matcher for wdir on case insensitive filesystems, which
134 134 normalizes the given patterns to the case in the filesystem
135 135
136 136 a pattern is one of:
137 137 'glob:<glob>' - a glob relative to cwd
138 138 're:<regexp>' - a regular expression
139 139 'path:<path>' - a path relative to repository root, which is matched
140 140 recursively
141 141 'rootfilesin:<path>' - a path relative to repository root, which is
142 142 matched non-recursively (will not match subdirectories)
143 143 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
144 144 'relpath:<path>' - a path relative to cwd
145 145 'relre:<regexp>' - a regexp that needn't match the start of a name
146 146 'set:<fileset>' - a fileset expression
147 147 'include:<path>' - a file of patterns to read and include
148 148 'subinclude:<path>' - a file of patterns to match against files under
149 149 the same directory
150 150 '<something>' - a pattern of the specified default type
151 151
152 152 Usually a patternmatcher is returned:
153 153 >>> match(b'foo', b'.', [b're:.*\.c$', b'path:foo/a', b'*.py'])
154 154 <patternmatcher patterns='.*\\.c$|foo/a(?:/|$)|[^/]*\\.py$'>
155 155
156 156 Combining 'patterns' with 'include' (resp. 'exclude') gives an
157 157 intersectionmatcher (resp. a differencematcher):
158 158 >>> type(match(b'foo', b'.', [b're:.*\.c$'], include=[b'path:lib']))
159 159 <class 'mercurial.match.intersectionmatcher'>
160 160 >>> type(match(b'foo', b'.', [b're:.*\.c$'], exclude=[b'path:build']))
161 161 <class 'mercurial.match.differencematcher'>
162 162
163 163 Notice that, if 'patterns' is empty, an alwaysmatcher is returned:
164 164 >>> match(b'foo', b'.', [])
165 165 <alwaysmatcher>
166 166
167 167 The 'default' argument determines which kind of pattern is assumed if a
168 168 pattern has no prefix:
169 169 >>> match(b'foo', b'.', [b'.*\.c$'], default=b're')
170 170 <patternmatcher patterns='.*\\.c$'>
171 171 >>> match(b'foo', b'.', [b'main.py'], default=b'relpath')
172 172 <patternmatcher patterns='main\\.py(?:/|$)'>
173 173 >>> match(b'foo', b'.', [b'main.py'], default=b're')
174 174 <patternmatcher patterns='main.py'>
175 175
176 176 The primary use of matchers is to check whether a value (usually a file
177 177 name) matches againset one of the patterns given at initialization. There
178 178 are two ways of doing this check.
179 179
180 180 >>> m = match(b'foo', b'', [b're:.*\.c$', b'relpath:a'])
181 181
182 182 1. Calling the matcher with a file name returns True if any pattern
183 183 matches that file name:
184 184 >>> m(b'a')
185 185 True
186 186 >>> m(b'main.c')
187 187 True
188 188 >>> m(b'test.py')
189 189 False
190 190
191 191 2. Using the exact() method only returns True if the file name matches one
192 192 of the exact patterns (i.e. not re: or glob: patterns):
193 193 >>> m.exact(b'a')
194 194 True
195 195 >>> m.exact(b'main.c')
196 196 False
197 197 """
198 198 normalize = _donormalize
199 199 if icasefs:
200 200 dirstate = ctx.repo().dirstate
201 201 dsnormalize = dirstate.normalize
202 202
203 203 def normalize(patterns, default, root, cwd, auditor, warn):
204 204 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
205 205 kindpats = []
206 206 for kind, pats, source in kp:
207 207 if kind not in ('re', 'relre'): # regex can't be normalized
208 208 p = pats
209 209 pats = dsnormalize(pats)
210 210
211 211 # Preserve the original to handle a case only rename.
212 212 if p != pats and p in dirstate:
213 213 kindpats.append((kind, p, source))
214 214
215 215 kindpats.append((kind, pats, source))
216 216 return kindpats
217 217
218 218 if patterns:
219 219 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
220 220 if _kindpatsalwaysmatch(kindpats):
221 221 m = alwaysmatcher(badfn)
222 222 else:
223 223 m = _buildkindpatsmatcher(patternmatcher, root, kindpats, ctx=ctx,
224 224 listsubrepos=listsubrepos, badfn=badfn)
225 225 else:
226 226 # It's a little strange that no patterns means to match everything.
227 227 # Consider changing this to match nothing (probably using nevermatcher).
228 228 m = alwaysmatcher(badfn)
229 229
230 230 if include:
231 231 kindpats = normalize(include, 'glob', root, cwd, auditor, warn)
232 232 im = _buildkindpatsmatcher(includematcher, root, kindpats, ctx=ctx,
233 233 listsubrepos=listsubrepos, badfn=None)
234 234 m = intersectmatchers(m, im)
235 235 if exclude:
236 236 kindpats = normalize(exclude, 'glob', root, cwd, auditor, warn)
237 237 em = _buildkindpatsmatcher(includematcher, root, kindpats, ctx=ctx,
238 238 listsubrepos=listsubrepos, badfn=None)
239 239 m = differencematcher(m, em)
240 240 return m
241 241
242 242 def exact(files, badfn=None):
243 243 return exactmatcher(files, badfn=badfn)
244 244
245 245 def always(badfn=None):
246 246 return alwaysmatcher(badfn)
247 247
248 248 def never(badfn=None):
249 249 return nevermatcher(badfn)
250 250
251 251 def badmatch(match, badfn):
252 252 """Make a copy of the given matcher, replacing its bad method with the given
253 253 one.
254 254 """
255 255 m = copy.copy(match)
256 256 m.bad = badfn
257 257 return m
258 258
259 259 def _donormalize(patterns, default, root, cwd, auditor=None, warn=None):
260 260 '''Convert 'kind:pat' from the patterns list to tuples with kind and
261 261 normalized and rooted patterns and with listfiles expanded.'''
262 262 kindpats = []
263 263 for kind, pat in [_patsplit(p, default) for p in patterns]:
264 264 if kind in cwdrelativepatternkinds:
265 265 pat = pathutil.canonpath(root, cwd, pat, auditor=auditor)
266 266 elif kind in ('relglob', 'path', 'rootfilesin', 'rootglob'):
267 267 pat = util.normpath(pat)
268 268 elif kind in ('listfile', 'listfile0'):
269 269 try:
270 270 files = util.readfile(pat)
271 271 if kind == 'listfile0':
272 272 files = files.split('\0')
273 273 else:
274 274 files = files.splitlines()
275 275 files = [f for f in files if f]
276 276 except EnvironmentError:
277 277 raise error.Abort(_("unable to read file list (%s)") % pat)
278 278 for k, p, source in _donormalize(files, default, root, cwd,
279 279 auditor, warn):
280 280 kindpats.append((k, p, pat))
281 281 continue
282 282 elif kind == 'include':
283 283 try:
284 284 fullpath = os.path.join(root, util.localpath(pat))
285 285 includepats = readpatternfile(fullpath, warn)
286 286 for k, p, source in _donormalize(includepats, default,
287 287 root, cwd, auditor, warn):
288 288 kindpats.append((k, p, source or pat))
289 289 except error.Abort as inst:
290 290 raise error.Abort('%s: %s' % (pat, inst[0]))
291 291 except IOError as inst:
292 292 if warn:
293 293 warn(_("skipping unreadable pattern file '%s': %s\n") %
294 294 (pat, stringutil.forcebytestr(inst.strerror)))
295 295 continue
296 296 # else: re or relre - which cannot be normalized
297 297 kindpats.append((kind, pat, ''))
298 298 return kindpats
299 299
300 300 class basematcher(object):
301 301
302 302 def __init__(self, badfn=None):
303 303 if badfn is not None:
304 304 self.bad = badfn
305 305
306 306 def __call__(self, fn):
307 307 return self.matchfn(fn)
308 308 def __iter__(self):
309 309 for f in self._files:
310 310 yield f
311 311 # Callbacks related to how the matcher is used by dirstate.walk.
312 312 # Subscribers to these events must monkeypatch the matcher object.
313 313 def bad(self, f, msg):
314 314 '''Callback from dirstate.walk for each explicit file that can't be
315 315 found/accessed, with an error message.'''
316 316
317 317 # If an explicitdir is set, it will be called when an explicitly listed
318 318 # directory is visited.
319 319 explicitdir = None
320 320
321 321 # If an traversedir is set, it will be called when a directory discovered
322 322 # by recursive traversal is visited.
323 323 traversedir = None
324 324
325 325 @propertycache
326 326 def _files(self):
327 327 return []
328 328
329 329 def files(self):
330 330 '''Explicitly listed files or patterns or roots:
331 331 if no patterns or .always(): empty list,
332 332 if exact: list exact files,
333 333 if not .anypats(): list all files and dirs,
334 334 else: optimal roots'''
335 335 return self._files
336 336
337 337 @propertycache
338 338 def _fileset(self):
339 339 return set(self._files)
340 340
341 341 def exact(self, f):
342 342 '''Returns True if f is in .files().'''
343 343 return f in self._fileset
344 344
345 345 def matchfn(self, f):
346 346 return False
347 347
348 348 def visitdir(self, dir):
349 349 '''Decides whether a directory should be visited based on whether it
350 350 has potential matches in it or one of its subdirectories. This is
351 351 based on the match's primary, included, and excluded patterns.
352 352
353 353 Returns the string 'all' if the given directory and all subdirectories
354 354 should be visited. Otherwise returns True or False indicating whether
355 355 the given directory should be visited.
356 356 '''
357 357 return True
358 358
359 359 def visitchildrenset(self, dir):
360 360 '''Decides whether a directory should be visited based on whether it
361 361 has potential matches in it or one of its subdirectories, and
362 362 potentially lists which subdirectories of that directory should be
363 363 visited. This is based on the match's primary, included, and excluded
364 364 patterns.
365 365
366 366 This function is very similar to 'visitdir', and the following mapping
367 367 can be applied:
368 368
369 369 visitdir | visitchildrenlist
370 370 ----------+-------------------
371 371 False | set()
372 372 'all' | 'all'
373 373 True | 'this' OR non-empty set of subdirs -or files- to visit
374 374
375 375 Example:
376 376 Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return
377 377 the following values (assuming the implementation of visitchildrenset
378 378 is capable of recognizing this; some implementations are not).
379 379
380 380 '.' -> {'foo', 'qux'}
381 381 'baz' -> set()
382 382 'foo' -> {'bar'}
383 383 # Ideally this would be 'all', but since the prefix nature of matchers
384 384 # is applied to the entire matcher, we have to downgrade this to
385 385 # 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed
386 386 # in.
387 387 'foo/bar' -> 'this'
388 388 'qux' -> 'this'
389 389
390 390 Important:
391 391 Most matchers do not know if they're representing files or
392 392 directories. They see ['path:dir/f'] and don't know whether 'f' is a
393 393 file or a directory, so visitchildrenset('dir') for most matchers will
394 394 return {'f'}, but if the matcher knows it's a file (like exactmatcher
395 395 does), it may return 'this'. Do not rely on the return being a set
396 396 indicating that there are no files in this dir to investigate (or
397 397 equivalently that if there are files to investigate in 'dir' that it
398 398 will always return 'this').
399 399 '''
400 400 return 'this'
401 401
402 402 def always(self):
403 403 '''Matcher will match everything and .files() will be empty --
404 404 optimization might be possible.'''
405 405 return False
406 406
407 407 def isexact(self):
408 408 '''Matcher will match exactly the list of files in .files() --
409 409 optimization might be possible.'''
410 410 return False
411 411
412 412 def prefix(self):
413 413 '''Matcher will match the paths in .files() recursively --
414 414 optimization might be possible.'''
415 415 return False
416 416
417 417 def anypats(self):
418 418 '''None of .always(), .isexact(), and .prefix() is true --
419 419 optimizations will be difficult.'''
420 420 return not self.always() and not self.isexact() and not self.prefix()
421 421
422 422 class alwaysmatcher(basematcher):
423 423 '''Matches everything.'''
424 424
425 425 def __init__(self, badfn=None):
426 426 super(alwaysmatcher, self).__init__(badfn)
427 427
428 428 def always(self):
429 429 return True
430 430
431 431 def matchfn(self, f):
432 432 return True
433 433
434 434 def visitdir(self, dir):
435 435 return 'all'
436 436
437 437 def visitchildrenset(self, dir):
438 438 return 'all'
439 439
440 440 def __repr__(self):
441 441 return r'<alwaysmatcher>'
442 442
443 443 class nevermatcher(basematcher):
444 444 '''Matches nothing.'''
445 445
446 446 def __init__(self, badfn=None):
447 447 super(nevermatcher, self).__init__(badfn)
448 448
449 449 # It's a little weird to say that the nevermatcher is an exact matcher
450 450 # or a prefix matcher, but it seems to make sense to let callers take
451 451 # fast paths based on either. There will be no exact matches, nor any
452 452 # prefixes (files() returns []), so fast paths iterating over them should
453 453 # be efficient (and correct).
454 454 def isexact(self):
455 455 return True
456 456
457 457 def prefix(self):
458 458 return True
459 459
460 460 def visitdir(self, dir):
461 461 return False
462 462
463 463 def visitchildrenset(self, dir):
464 464 return set()
465 465
466 466 def __repr__(self):
467 467 return r'<nevermatcher>'
468 468
469 469 class predicatematcher(basematcher):
470 470 """A matcher adapter for a simple boolean function"""
471 471
472 472 def __init__(self, predfn, predrepr=None, badfn=None):
473 473 super(predicatematcher, self).__init__(badfn)
474 474 self.matchfn = predfn
475 475 self._predrepr = predrepr
476 476
477 477 @encoding.strmethod
478 478 def __repr__(self):
479 479 s = (stringutil.buildrepr(self._predrepr)
480 480 or pycompat.byterepr(self.matchfn))
481 481 return '<predicatenmatcher pred=%s>' % s
482 482
483 483 class patternmatcher(basematcher):
484 484 """Matches a set of (kind, pat, source) against a 'root' directory.
485 485
486 486 >>> kindpats = [
487 ... (b're', b'.*\.c$', b''),
487 ... (b're', br'.*\.c$', b''),
488 488 ... (b'path', b'foo/a', b''),
489 489 ... (b'relpath', b'b', b''),
490 490 ... (b'glob', b'*.h', b''),
491 491 ... ]
492 492 >>> m = patternmatcher(b'foo', kindpats)
493 493 >>> m(b'main.c') # matches re:.*\.c$
494 494 True
495 495 >>> m(b'b.txt')
496 496 False
497 497 >>> m(b'foo/a') # matches path:foo/a
498 498 True
499 499 >>> m(b'a') # does not match path:b, since 'root' is 'foo'
500 500 False
501 501 >>> m(b'b') # matches relpath:b, since 'root' is 'foo'
502 502 True
503 503 >>> m(b'lib.h') # matches glob:*.h
504 504 True
505 505
506 506 >>> m.files()
507 507 ['.', 'foo/a', 'b', '.']
508 508 >>> m.exact(b'foo/a')
509 509 True
510 510 >>> m.exact(b'b')
511 511 True
512 512 >>> m.exact(b'lib.h') # exact matches are for (rel)path kinds
513 513 False
514 514 """
515 515
516 516 def __init__(self, root, kindpats, badfn=None):
517 517 super(patternmatcher, self).__init__(badfn)
518 518
519 519 self._files = _explicitfiles(kindpats)
520 520 self._prefix = _prefix(kindpats)
521 521 self._pats, self.matchfn = _buildmatch(kindpats, '$', root)
522 522
523 523 @propertycache
524 524 def _dirs(self):
525 525 return set(util.dirs(self._fileset)) | {'.'}
526 526
527 527 def visitdir(self, dir):
528 528 if self._prefix and dir in self._fileset:
529 529 return 'all'
530 530 return ('.' in self._fileset or
531 531 dir in self._fileset or
532 532 dir in self._dirs or
533 533 any(parentdir in self._fileset
534 534 for parentdir in util.finddirs(dir)))
535 535
536 536 def visitchildrenset(self, dir):
537 537 ret = self.visitdir(dir)
538 538 if ret is True:
539 539 return 'this'
540 540 elif not ret:
541 541 return set()
542 542 assert ret == 'all'
543 543 return 'all'
544 544
545 545 def prefix(self):
546 546 return self._prefix
547 547
548 548 @encoding.strmethod
549 549 def __repr__(self):
550 550 return ('<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats))
551 551
552 552 # This is basically a reimplementation of util.dirs that stores the children
553 553 # instead of just a count of them, plus a small optional optimization to avoid
554 554 # some directories we don't need.
555 555 class _dirchildren(object):
556 556 def __init__(self, paths, onlyinclude=None):
557 557 self._dirs = {}
558 558 self._onlyinclude = onlyinclude or []
559 559 addpath = self.addpath
560 560 for f in paths:
561 561 addpath(f)
562 562
563 563 def addpath(self, path):
564 564 if path == '.':
565 565 return
566 566 dirs = self._dirs
567 567 findsplitdirs = _dirchildren._findsplitdirs
568 568 for d, b in findsplitdirs(path):
569 569 if d not in self._onlyinclude:
570 570 continue
571 571 dirs.setdefault(d, set()).add(b)
572 572
573 573 @staticmethod
574 574 def _findsplitdirs(path):
575 575 # yields (dirname, basename) tuples, walking back to the root. This is
576 576 # very similar to util.finddirs, except:
577 577 # - produces a (dirname, basename) tuple, not just 'dirname'
578 578 # - includes root dir
579 579 # Unlike manifest._splittopdir, this does not suffix `dirname` with a
580 580 # slash, and produces '.' for the root instead of ''.
581 581 oldpos = len(path)
582 582 pos = path.rfind('/')
583 583 while pos != -1:
584 584 yield path[:pos], path[pos + 1:oldpos]
585 585 oldpos = pos
586 586 pos = path.rfind('/', 0, pos)
587 587 yield '.', path[:oldpos]
588 588
589 589 def get(self, path):
590 590 return self._dirs.get(path, set())
591 591
592 592 class includematcher(basematcher):
593 593
594 594 def __init__(self, root, kindpats, badfn=None):
595 595 super(includematcher, self).__init__(badfn)
596 596
597 597 self._pats, self.matchfn = _buildmatch(kindpats, '(?:/|$)', root)
598 598 self._prefix = _prefix(kindpats)
599 599 roots, dirs, parents = _rootsdirsandparents(kindpats)
600 600 # roots are directories which are recursively included.
601 601 self._roots = set(roots)
602 602 # dirs are directories which are non-recursively included.
603 603 self._dirs = set(dirs)
604 604 # parents are directories which are non-recursively included because
605 605 # they are needed to get to items in _dirs or _roots.
606 606 self._parents = set(parents)
607 607
608 608 def visitdir(self, dir):
609 609 if self._prefix and dir in self._roots:
610 610 return 'all'
611 611 return ('.' in self._roots or
612 612 dir in self._roots or
613 613 dir in self._dirs or
614 614 dir in self._parents or
615 615 any(parentdir in self._roots
616 616 for parentdir in util.finddirs(dir)))
617 617
618 618 @propertycache
619 619 def _allparentschildren(self):
620 620 # It may seem odd that we add dirs, roots, and parents, and then
621 621 # restrict to only parents. This is to catch the case of:
622 622 # dirs = ['foo/bar']
623 623 # parents = ['foo']
624 624 # if we asked for the children of 'foo', but had only added
625 625 # self._parents, we wouldn't be able to respond ['bar'].
626 626 return _dirchildren(
627 627 itertools.chain(self._dirs, self._roots, self._parents),
628 628 onlyinclude=self._parents)
629 629
630 630 def visitchildrenset(self, dir):
631 631 if self._prefix and dir in self._roots:
632 632 return 'all'
633 633 # Note: this does *not* include the 'dir in self._parents' case from
634 634 # visitdir, that's handled below.
635 635 if ('.' in self._roots or
636 636 dir in self._roots or
637 637 dir in self._dirs or
638 638 any(parentdir in self._roots
639 639 for parentdir in util.finddirs(dir))):
640 640 return 'this'
641 641
642 642 if dir in self._parents:
643 643 return self._allparentschildren.get(dir) or set()
644 644 return set()
645 645
646 646 @encoding.strmethod
647 647 def __repr__(self):
648 648 return ('<includematcher includes=%r>' % pycompat.bytestr(self._pats))
649 649
650 650 class exactmatcher(basematcher):
651 651 r'''Matches the input files exactly. They are interpreted as paths, not
652 652 patterns (so no kind-prefixes).
653 653
654 >>> m = exactmatcher([b'a.txt', b're:.*\.c$'])
654 >>> m = exactmatcher([b'a.txt', br're:.*\.c$'])
655 655 >>> m(b'a.txt')
656 656 True
657 657 >>> m(b'b.txt')
658 658 False
659 659
660 660 Input files that would be matched are exactly those returned by .files()
661 661 >>> m.files()
662 662 ['a.txt', 're:.*\\.c$']
663 663
664 664 So pattern 're:.*\.c$' is not considered as a regex, but as a file name
665 665 >>> m(b'main.c')
666 666 False
667 >>> m(b're:.*\.c$')
667 >>> m(br're:.*\.c$')
668 668 True
669 669 '''
670 670
671 671 def __init__(self, files, badfn=None):
672 672 super(exactmatcher, self).__init__(badfn)
673 673
674 674 if isinstance(files, list):
675 675 self._files = files
676 676 else:
677 677 self._files = list(files)
678 678
679 679 matchfn = basematcher.exact
680 680
681 681 @propertycache
682 682 def _dirs(self):
683 683 return set(util.dirs(self._fileset)) | {'.'}
684 684
685 685 def visitdir(self, dir):
686 686 return dir in self._dirs
687 687
688 688 def visitchildrenset(self, dir):
689 689 if not self._fileset or dir not in self._dirs:
690 690 return set()
691 691
692 692 candidates = self._fileset | self._dirs - {'.'}
693 693 if dir != '.':
694 694 d = dir + '/'
695 695 candidates = set(c[len(d):] for c in candidates if
696 696 c.startswith(d))
697 697 # self._dirs includes all of the directories, recursively, so if
698 698 # we're attempting to match foo/bar/baz.txt, it'll have '.', 'foo',
699 699 # 'foo/bar' in it. Thus we can safely ignore a candidate that has a
700 700 # '/' in it, indicating a it's for a subdir-of-a-subdir; the
701 701 # immediate subdir will be in there without a slash.
702 702 ret = {c for c in candidates if '/' not in c}
703 703 # We really do not expect ret to be empty, since that would imply that
704 704 # there's something in _dirs that didn't have a file in _fileset.
705 705 assert ret
706 706 return ret
707 707
708 708 def isexact(self):
709 709 return True
710 710
711 711 @encoding.strmethod
712 712 def __repr__(self):
713 713 return ('<exactmatcher files=%r>' % self._files)
714 714
715 715 class differencematcher(basematcher):
716 716 '''Composes two matchers by matching if the first matches and the second
717 717 does not.
718 718
719 719 The second matcher's non-matching-attributes (bad, explicitdir,
720 720 traversedir) are ignored.
721 721 '''
722 722 def __init__(self, m1, m2):
723 723 super(differencematcher, self).__init__()
724 724 self._m1 = m1
725 725 self._m2 = m2
726 726 self.bad = m1.bad
727 727 self.explicitdir = m1.explicitdir
728 728 self.traversedir = m1.traversedir
729 729
730 730 def matchfn(self, f):
731 731 return self._m1(f) and not self._m2(f)
732 732
733 733 @propertycache
734 734 def _files(self):
735 735 if self.isexact():
736 736 return [f for f in self._m1.files() if self(f)]
737 737 # If m1 is not an exact matcher, we can't easily figure out the set of
738 738 # files, because its files() are not always files. For example, if
739 739 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
740 740 # want to remove "dir" from the set even though it would match m2,
741 741 # because the "dir" in m1 may not be a file.
742 742 return self._m1.files()
743 743
744 744 def visitdir(self, dir):
745 745 if self._m2.visitdir(dir) == 'all':
746 746 return False
747 747 elif not self._m2.visitdir(dir):
748 748 # m2 does not match dir, we can return 'all' here if possible
749 749 return self._m1.visitdir(dir)
750 750 return bool(self._m1.visitdir(dir))
751 751
752 752 def visitchildrenset(self, dir):
753 753 m2_set = self._m2.visitchildrenset(dir)
754 754 if m2_set == 'all':
755 755 return set()
756 756 m1_set = self._m1.visitchildrenset(dir)
757 757 # Possible values for m1: 'all', 'this', set(...), set()
758 758 # Possible values for m2: 'this', set(...), set()
759 759 # If m2 has nothing under here that we care about, return m1, even if
760 760 # it's 'all'. This is a change in behavior from visitdir, which would
761 761 # return True, not 'all', for some reason.
762 762 if not m2_set:
763 763 return m1_set
764 764 if m1_set in ['all', 'this']:
765 765 # Never return 'all' here if m2_set is any kind of non-empty (either
766 766 # 'this' or set(foo)), since m2 might return set() for a
767 767 # subdirectory.
768 768 return 'this'
769 769 # Possible values for m1: set(...), set()
770 770 # Possible values for m2: 'this', set(...)
771 771 # We ignore m2's set results. They're possibly incorrect:
772 772 # m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset('.'):
773 773 # m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd
774 774 # return set(), which is *not* correct, we still need to visit 'dir'!
775 775 return m1_set
776 776
777 777 def isexact(self):
778 778 return self._m1.isexact()
779 779
780 780 @encoding.strmethod
781 781 def __repr__(self):
782 782 return ('<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2))
783 783
784 784 def intersectmatchers(m1, m2):
785 785 '''Composes two matchers by matching if both of them match.
786 786
787 787 The second matcher's non-matching-attributes (bad, explicitdir,
788 788 traversedir) are ignored.
789 789 '''
790 790 if m1 is None or m2 is None:
791 791 return m1 or m2
792 792 if m1.always():
793 793 m = copy.copy(m2)
794 794 # TODO: Consider encapsulating these things in a class so there's only
795 795 # one thing to copy from m1.
796 796 m.bad = m1.bad
797 797 m.explicitdir = m1.explicitdir
798 798 m.traversedir = m1.traversedir
799 799 return m
800 800 if m2.always():
801 801 m = copy.copy(m1)
802 802 return m
803 803 return intersectionmatcher(m1, m2)
804 804
805 805 class intersectionmatcher(basematcher):
806 806 def __init__(self, m1, m2):
807 807 super(intersectionmatcher, self).__init__()
808 808 self._m1 = m1
809 809 self._m2 = m2
810 810 self.bad = m1.bad
811 811 self.explicitdir = m1.explicitdir
812 812 self.traversedir = m1.traversedir
813 813
814 814 @propertycache
815 815 def _files(self):
816 816 if self.isexact():
817 817 m1, m2 = self._m1, self._m2
818 818 if not m1.isexact():
819 819 m1, m2 = m2, m1
820 820 return [f for f in m1.files() if m2(f)]
821 821 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
822 822 # the set of files, because their files() are not always files. For
823 823 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
824 824 # "path:dir2", we don't want to remove "dir2" from the set.
825 825 return self._m1.files() + self._m2.files()
826 826
827 827 def matchfn(self, f):
828 828 return self._m1(f) and self._m2(f)
829 829
830 830 def visitdir(self, dir):
831 831 visit1 = self._m1.visitdir(dir)
832 832 if visit1 == 'all':
833 833 return self._m2.visitdir(dir)
834 834 # bool() because visit1=True + visit2='all' should not be 'all'
835 835 return bool(visit1 and self._m2.visitdir(dir))
836 836
837 837 def visitchildrenset(self, dir):
838 838 m1_set = self._m1.visitchildrenset(dir)
839 839 if not m1_set:
840 840 return set()
841 841 m2_set = self._m2.visitchildrenset(dir)
842 842 if not m2_set:
843 843 return set()
844 844
845 845 if m1_set == 'all':
846 846 return m2_set
847 847 elif m2_set == 'all':
848 848 return m1_set
849 849
850 850 if m1_set == 'this' or m2_set == 'this':
851 851 return 'this'
852 852
853 853 assert isinstance(m1_set, set) and isinstance(m2_set, set)
854 854 return m1_set.intersection(m2_set)
855 855
856 856 def always(self):
857 857 return self._m1.always() and self._m2.always()
858 858
859 859 def isexact(self):
860 860 return self._m1.isexact() or self._m2.isexact()
861 861
862 862 @encoding.strmethod
863 863 def __repr__(self):
864 864 return ('<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2))
865 865
866 866 class subdirmatcher(basematcher):
867 867 """Adapt a matcher to work on a subdirectory only.
868 868
869 869 The paths are remapped to remove/insert the path as needed:
870 870
871 871 >>> from . import pycompat
872 872 >>> m1 = match(b'root', b'', [b'a.txt', b'sub/b.txt'])
873 873 >>> m2 = subdirmatcher(b'sub', m1)
874 874 >>> m2(b'a.txt')
875 875 False
876 876 >>> m2(b'b.txt')
877 877 True
878 878 >>> m2.matchfn(b'a.txt')
879 879 False
880 880 >>> m2.matchfn(b'b.txt')
881 881 True
882 882 >>> m2.files()
883 883 ['b.txt']
884 884 >>> m2.exact(b'b.txt')
885 885 True
886 886 >>> def bad(f, msg):
887 887 ... print(pycompat.sysstr(b"%s: %s" % (f, msg)))
888 888 >>> m1.bad = bad
889 889 >>> m2.bad(b'x.txt', b'No such file')
890 890 sub/x.txt: No such file
891 891 """
892 892
893 893 def __init__(self, path, matcher):
894 894 super(subdirmatcher, self).__init__()
895 895 self._path = path
896 896 self._matcher = matcher
897 897 self._always = matcher.always()
898 898
899 899 self._files = [f[len(path) + 1:] for f in matcher._files
900 900 if f.startswith(path + "/")]
901 901
902 902 # If the parent repo had a path to this subrepo and the matcher is
903 903 # a prefix matcher, this submatcher always matches.
904 904 if matcher.prefix():
905 905 self._always = any(f == path for f in matcher._files)
906 906
907 907 def bad(self, f, msg):
908 908 self._matcher.bad(self._path + "/" + f, msg)
909 909
910 910 def matchfn(self, f):
911 911 # Some information is lost in the superclass's constructor, so we
912 912 # can not accurately create the matching function for the subdirectory
913 913 # from the inputs. Instead, we override matchfn() and visitdir() to
914 914 # call the original matcher with the subdirectory path prepended.
915 915 return self._matcher.matchfn(self._path + "/" + f)
916 916
917 917 def visitdir(self, dir):
918 918 if dir == '.':
919 919 dir = self._path
920 920 else:
921 921 dir = self._path + "/" + dir
922 922 return self._matcher.visitdir(dir)
923 923
924 924 def visitchildrenset(self, dir):
925 925 if dir == '.':
926 926 dir = self._path
927 927 else:
928 928 dir = self._path + "/" + dir
929 929 return self._matcher.visitchildrenset(dir)
930 930
931 931 def always(self):
932 932 return self._always
933 933
934 934 def prefix(self):
935 935 return self._matcher.prefix() and not self._always
936 936
937 937 @encoding.strmethod
938 938 def __repr__(self):
939 939 return ('<subdirmatcher path=%r, matcher=%r>' %
940 940 (self._path, self._matcher))
941 941
942 942 class prefixdirmatcher(basematcher):
943 943 """Adapt a matcher to work on a parent directory.
944 944
945 945 The matcher's non-matching-attributes (bad, explicitdir, traversedir) are
946 946 ignored.
947 947
948 948 The prefix path should usually be the relative path from the root of
949 949 this matcher to the root of the wrapped matcher.
950 950
951 951 >>> m1 = match(util.localpath(b'root/d/e'), b'f', [b'../a.txt', b'b.txt'])
952 952 >>> m2 = prefixdirmatcher(b'd/e', m1)
953 953 >>> m2(b'a.txt')
954 954 False
955 955 >>> m2(b'd/e/a.txt')
956 956 True
957 957 >>> m2(b'd/e/b.txt')
958 958 False
959 959 >>> m2.files()
960 960 ['d/e/a.txt', 'd/e/f/b.txt']
961 961 >>> m2.exact(b'd/e/a.txt')
962 962 True
963 963 >>> m2.visitdir(b'd')
964 964 True
965 965 >>> m2.visitdir(b'd/e')
966 966 True
967 967 >>> m2.visitdir(b'd/e/f')
968 968 True
969 969 >>> m2.visitdir(b'd/e/g')
970 970 False
971 971 >>> m2.visitdir(b'd/ef')
972 972 False
973 973 """
974 974
975 975 def __init__(self, path, matcher, badfn=None):
976 976 super(prefixdirmatcher, self).__init__(badfn)
977 977 if not path:
978 978 raise error.ProgrammingError('prefix path must not be empty')
979 979 self._path = path
980 980 self._pathprefix = path + '/'
981 981 self._matcher = matcher
982 982
983 983 @propertycache
984 984 def _files(self):
985 985 return [self._pathprefix + f for f in self._matcher._files]
986 986
987 987 def matchfn(self, f):
988 988 if not f.startswith(self._pathprefix):
989 989 return False
990 990 return self._matcher.matchfn(f[len(self._pathprefix):])
991 991
992 992 @propertycache
993 993 def _pathdirs(self):
994 994 return set(util.finddirs(self._path)) | {'.'}
995 995
996 996 def visitdir(self, dir):
997 997 if dir == self._path:
998 998 return self._matcher.visitdir('.')
999 999 if dir.startswith(self._pathprefix):
1000 1000 return self._matcher.visitdir(dir[len(self._pathprefix):])
1001 1001 return dir in self._pathdirs
1002 1002
1003 1003 def visitchildrenset(self, dir):
1004 1004 if dir == self._path:
1005 1005 return self._matcher.visitchildrenset('.')
1006 1006 if dir.startswith(self._pathprefix):
1007 1007 return self._matcher.visitchildrenset(dir[len(self._pathprefix):])
1008 1008 if dir in self._pathdirs:
1009 1009 return 'this'
1010 1010 return set()
1011 1011
1012 1012 def isexact(self):
1013 1013 return self._matcher.isexact()
1014 1014
1015 1015 def prefix(self):
1016 1016 return self._matcher.prefix()
1017 1017
1018 1018 @encoding.strmethod
1019 1019 def __repr__(self):
1020 1020 return ('<prefixdirmatcher path=%r, matcher=%r>'
1021 1021 % (pycompat.bytestr(self._path), self._matcher))
1022 1022
1023 1023 class unionmatcher(basematcher):
1024 1024 """A matcher that is the union of several matchers.
1025 1025
1026 1026 The non-matching-attributes (bad, explicitdir, traversedir) are taken from
1027 1027 the first matcher.
1028 1028 """
1029 1029
1030 1030 def __init__(self, matchers):
1031 1031 m1 = matchers[0]
1032 1032 super(unionmatcher, self).__init__()
1033 1033 self.explicitdir = m1.explicitdir
1034 1034 self.traversedir = m1.traversedir
1035 1035 self._matchers = matchers
1036 1036
1037 1037 def matchfn(self, f):
1038 1038 for match in self._matchers:
1039 1039 if match(f):
1040 1040 return True
1041 1041 return False
1042 1042
1043 1043 def visitdir(self, dir):
1044 1044 r = False
1045 1045 for m in self._matchers:
1046 1046 v = m.visitdir(dir)
1047 1047 if v == 'all':
1048 1048 return v
1049 1049 r |= v
1050 1050 return r
1051 1051
1052 1052 def visitchildrenset(self, dir):
1053 1053 r = set()
1054 1054 this = False
1055 1055 for m in self._matchers:
1056 1056 v = m.visitchildrenset(dir)
1057 1057 if not v:
1058 1058 continue
1059 1059 if v == 'all':
1060 1060 return v
1061 1061 if this or v == 'this':
1062 1062 this = True
1063 1063 # don't break, we might have an 'all' in here.
1064 1064 continue
1065 1065 assert isinstance(v, set)
1066 1066 r = r.union(v)
1067 1067 if this:
1068 1068 return 'this'
1069 1069 return r
1070 1070
1071 1071 @encoding.strmethod
1072 1072 def __repr__(self):
1073 1073 return ('<unionmatcher matchers=%r>' % self._matchers)
1074 1074
1075 1075 def patkind(pattern, default=None):
1076 1076 '''If pattern is 'kind:pat' with a known kind, return kind.
1077 1077
1078 >>> patkind(b're:.*\.c$')
1078 >>> patkind(br're:.*\.c$')
1079 1079 're'
1080 1080 >>> patkind(b'glob:*.c')
1081 1081 'glob'
1082 1082 >>> patkind(b'relpath:test.py')
1083 1083 'relpath'
1084 1084 >>> patkind(b'main.py')
1085 1085 >>> patkind(b'main.py', default=b're')
1086 1086 're'
1087 1087 '''
1088 1088 return _patsplit(pattern, default)[0]
1089 1089
1090 1090 def _patsplit(pattern, default):
1091 1091 """Split a string into the optional pattern kind prefix and the actual
1092 1092 pattern."""
1093 1093 if ':' in pattern:
1094 1094 kind, pat = pattern.split(':', 1)
1095 1095 if kind in allpatternkinds:
1096 1096 return kind, pat
1097 1097 return default, pattern
1098 1098
1099 1099 def _globre(pat):
1100 1100 r'''Convert an extended glob string to a regexp string.
1101 1101
1102 1102 >>> from . import pycompat
1103 1103 >>> def bprint(s):
1104 1104 ... print(pycompat.sysstr(s))
1105 1105 >>> bprint(_globre(br'?'))
1106 1106 .
1107 1107 >>> bprint(_globre(br'*'))
1108 1108 [^/]*
1109 1109 >>> bprint(_globre(br'**'))
1110 1110 .*
1111 1111 >>> bprint(_globre(br'**/a'))
1112 1112 (?:.*/)?a
1113 1113 >>> bprint(_globre(br'a/**/b'))
1114 1114 a/(?:.*/)?b
1115 1115 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
1116 1116 [a*?!^][\^b][^c]
1117 1117 >>> bprint(_globre(br'{a,b}'))
1118 1118 (?:a|b)
1119 1119 >>> bprint(_globre(br'.\*\?'))
1120 1120 \.\*\?
1121 1121 '''
1122 1122 i, n = 0, len(pat)
1123 1123 res = ''
1124 1124 group = 0
1125 1125 escape = util.stringutil.regexbytesescapemap.get
1126 1126 def peek():
1127 1127 return i < n and pat[i:i + 1]
1128 1128 while i < n:
1129 1129 c = pat[i:i + 1]
1130 1130 i += 1
1131 1131 if c not in '*?[{},\\':
1132 1132 res += escape(c, c)
1133 1133 elif c == '*':
1134 1134 if peek() == '*':
1135 1135 i += 1
1136 1136 if peek() == '/':
1137 1137 i += 1
1138 1138 res += '(?:.*/)?'
1139 1139 else:
1140 1140 res += '.*'
1141 1141 else:
1142 1142 res += '[^/]*'
1143 1143 elif c == '?':
1144 1144 res += '.'
1145 1145 elif c == '[':
1146 1146 j = i
1147 1147 if j < n and pat[j:j + 1] in '!]':
1148 1148 j += 1
1149 1149 while j < n and pat[j:j + 1] != ']':
1150 1150 j += 1
1151 1151 if j >= n:
1152 1152 res += '\\['
1153 1153 else:
1154 1154 stuff = pat[i:j].replace('\\','\\\\')
1155 1155 i = j + 1
1156 1156 if stuff[0:1] == '!':
1157 1157 stuff = '^' + stuff[1:]
1158 1158 elif stuff[0:1] == '^':
1159 1159 stuff = '\\' + stuff
1160 1160 res = '%s[%s]' % (res, stuff)
1161 1161 elif c == '{':
1162 1162 group += 1
1163 1163 res += '(?:'
1164 1164 elif c == '}' and group:
1165 1165 res += ')'
1166 1166 group -= 1
1167 1167 elif c == ',' and group:
1168 1168 res += '|'
1169 1169 elif c == '\\':
1170 1170 p = peek()
1171 1171 if p:
1172 1172 i += 1
1173 1173 res += escape(p, p)
1174 1174 else:
1175 1175 res += escape(c, c)
1176 1176 else:
1177 1177 res += escape(c, c)
1178 1178 return res
1179 1179
1180 1180 def _regex(kind, pat, globsuffix):
1181 1181 '''Convert a (normalized) pattern of any kind into a regular expression.
1182 1182 globsuffix is appended to the regexp of globs.'''
1183 1183 if not pat:
1184 1184 return ''
1185 1185 if kind == 're':
1186 1186 return pat
1187 1187 if kind in ('path', 'relpath'):
1188 1188 if pat == '.':
1189 1189 return ''
1190 1190 return util.stringutil.reescape(pat) + '(?:/|$)'
1191 1191 if kind == 'rootfilesin':
1192 1192 if pat == '.':
1193 1193 escaped = ''
1194 1194 else:
1195 1195 # Pattern is a directory name.
1196 1196 escaped = util.stringutil.reescape(pat) + '/'
1197 1197 # Anything after the pattern must be a non-directory.
1198 1198 return escaped + '[^/]+$'
1199 1199 if kind == 'relglob':
1200 1200 return '(?:|.*/)' + _globre(pat) + globsuffix
1201 1201 if kind == 'relre':
1202 1202 if pat.startswith('^'):
1203 1203 return pat
1204 1204 return '.*' + pat
1205 1205 if kind in ('glob', 'rootglob'):
1206 1206 return _globre(pat) + globsuffix
1207 1207 raise error.ProgrammingError('not a regex pattern: %s:%s' % (kind, pat))
1208 1208
1209 1209 def _buildmatch(kindpats, globsuffix, root):
1210 1210 '''Return regexp string and a matcher function for kindpats.
1211 1211 globsuffix is appended to the regexp of globs.'''
1212 1212 matchfuncs = []
1213 1213
1214 1214 subincludes, kindpats = _expandsubinclude(kindpats, root)
1215 1215 if subincludes:
1216 1216 submatchers = {}
1217 1217 def matchsubinclude(f):
1218 1218 for prefix, matcherargs in subincludes:
1219 1219 if f.startswith(prefix):
1220 1220 mf = submatchers.get(prefix)
1221 1221 if mf is None:
1222 1222 mf = match(*matcherargs)
1223 1223 submatchers[prefix] = mf
1224 1224
1225 1225 if mf(f[len(prefix):]):
1226 1226 return True
1227 1227 return False
1228 1228 matchfuncs.append(matchsubinclude)
1229 1229
1230 1230 regex = ''
1231 1231 if kindpats:
1232 1232 if all(k == 'rootfilesin' for k, p, s in kindpats):
1233 1233 dirs = {p for k, p, s in kindpats}
1234 1234 def mf(f):
1235 1235 i = f.rfind('/')
1236 1236 if i >= 0:
1237 1237 dir = f[:i]
1238 1238 else:
1239 1239 dir = '.'
1240 1240 return dir in dirs
1241 1241 regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))
1242 1242 matchfuncs.append(mf)
1243 1243 else:
1244 1244 regex, mf = _buildregexmatch(kindpats, globsuffix)
1245 1245 matchfuncs.append(mf)
1246 1246
1247 1247 if len(matchfuncs) == 1:
1248 1248 return regex, matchfuncs[0]
1249 1249 else:
1250 1250 return regex, lambda f: any(mf(f) for mf in matchfuncs)
1251 1251
1252 1252 MAX_RE_SIZE = 20000
1253 1253
1254 1254 def _joinregexes(regexps):
1255 1255 """gather multiple regular expressions into a single one"""
1256 1256 return '|'.join(regexps)
1257 1257
1258 1258 def _buildregexmatch(kindpats, globsuffix):
1259 1259 """Build a match function from a list of kinds and kindpats,
1260 1260 return regexp string and a matcher function.
1261 1261
1262 1262 Test too large input
1263 1263 >>> _buildregexmatch([
1264 1264 ... (b'relglob', b'?' * MAX_RE_SIZE, b'')
1265 1265 ... ], b'$')
1266 1266 Traceback (most recent call last):
1267 1267 ...
1268 1268 Abort: matcher pattern is too long (20009 bytes)
1269 1269 """
1270 1270 try:
1271 1271 allgroups = []
1272 1272 regexps = [_regex(k, p, globsuffix) for (k, p, s) in kindpats]
1273 1273 fullregexp = _joinregexes(regexps)
1274 1274
1275 1275 startidx = 0
1276 1276 groupsize = 0
1277 1277 for idx, r in enumerate(regexps):
1278 1278 piecesize = len(r)
1279 1279 if piecesize > MAX_RE_SIZE:
1280 1280 msg = _("matcher pattern is too long (%d bytes)") % piecesize
1281 1281 raise error.Abort(msg)
1282 1282 elif (groupsize + piecesize) > MAX_RE_SIZE:
1283 1283 group = regexps[startidx:idx]
1284 1284 allgroups.append(_joinregexes(group))
1285 1285 startidx = idx
1286 1286 groupsize = 0
1287 1287 groupsize += piecesize + 1
1288 1288
1289 1289 if startidx == 0:
1290 1290 matcher = _rematcher(fullregexp)
1291 1291 func = lambda s: bool(matcher(s))
1292 1292 else:
1293 1293 group = regexps[startidx:]
1294 1294 allgroups.append(_joinregexes(group))
1295 1295 allmatchers = [_rematcher(g) for g in allgroups]
1296 1296 func = lambda s: any(m(s) for m in allmatchers)
1297 1297 return fullregexp, func
1298 1298 except re.error:
1299 1299 for k, p, s in kindpats:
1300 1300 try:
1301 1301 _rematcher(_regex(k, p, globsuffix))
1302 1302 except re.error:
1303 1303 if s:
1304 1304 raise error.Abort(_("%s: invalid pattern (%s): %s") %
1305 1305 (s, k, p))
1306 1306 else:
1307 1307 raise error.Abort(_("invalid pattern (%s): %s") % (k, p))
1308 1308 raise error.Abort(_("invalid pattern"))
1309 1309
1310 1310 def _patternrootsanddirs(kindpats):
1311 1311 '''Returns roots and directories corresponding to each pattern.
1312 1312
1313 1313 This calculates the roots and directories exactly matching the patterns and
1314 1314 returns a tuple of (roots, dirs) for each. It does not return other
1315 1315 directories which may also need to be considered, like the parent
1316 1316 directories.
1317 1317 '''
1318 1318 r = []
1319 1319 d = []
1320 1320 for kind, pat, source in kindpats:
1321 1321 if kind in ('glob', 'rootglob'): # find the non-glob prefix
1322 1322 root = []
1323 1323 for p in pat.split('/'):
1324 1324 if '[' in p or '{' in p or '*' in p or '?' in p:
1325 1325 break
1326 1326 root.append(p)
1327 1327 r.append('/'.join(root) or '.')
1328 1328 elif kind in ('relpath', 'path'):
1329 1329 r.append(pat or '.')
1330 1330 elif kind in ('rootfilesin',):
1331 1331 d.append(pat or '.')
1332 1332 else: # relglob, re, relre
1333 1333 r.append('.')
1334 1334 return r, d
1335 1335
1336 1336 def _roots(kindpats):
1337 1337 '''Returns root directories to match recursively from the given patterns.'''
1338 1338 roots, dirs = _patternrootsanddirs(kindpats)
1339 1339 return roots
1340 1340
1341 1341 def _rootsdirsandparents(kindpats):
1342 1342 '''Returns roots and exact directories from patterns.
1343 1343
1344 1344 `roots` are directories to match recursively, `dirs` should
1345 1345 be matched non-recursively, and `parents` are the implicitly required
1346 1346 directories to walk to items in either roots or dirs.
1347 1347
1348 1348 Returns a tuple of (roots, dirs, parents).
1349 1349
1350 1350 >>> _rootsdirsandparents(
1351 1351 ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
1352 1352 ... (b'glob', b'g*', b'')])
1353 1353 (['g/h', 'g/h', '.'], [], ['g', '.'])
1354 1354 >>> _rootsdirsandparents(
1355 1355 ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
1356 1356 ([], ['g/h', '.'], ['g', '.'])
1357 1357 >>> _rootsdirsandparents(
1358 1358 ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
1359 1359 ... (b'path', b'', b'')])
1360 1360 (['r', 'p/p', '.'], [], ['p', '.'])
1361 1361 >>> _rootsdirsandparents(
1362 1362 ... [(b'relglob', b'rg*', b''), (b're', b're/', b''),
1363 1363 ... (b'relre', b'rr', b'')])
1364 1364 (['.', '.', '.'], [], ['.'])
1365 1365 '''
1366 1366 r, d = _patternrootsanddirs(kindpats)
1367 1367
1368 1368 p = []
1369 1369 # Append the parents as non-recursive/exact directories, since they must be
1370 1370 # scanned to get to either the roots or the other exact directories.
1371 1371 p.extend(util.dirs(d))
1372 1372 p.extend(util.dirs(r))
1373 1373 # util.dirs() does not include the root directory, so add it manually
1374 1374 p.append('.')
1375 1375
1376 1376 # FIXME: all uses of this function convert these to sets, do so before
1377 1377 # returning.
1378 1378 # FIXME: all uses of this function do not need anything in 'roots' and
1379 1379 # 'dirs' to also be in 'parents', consider removing them before returning.
1380 1380 return r, d, p
1381 1381
1382 1382 def _explicitfiles(kindpats):
1383 1383 '''Returns the potential explicit filenames from the patterns.
1384 1384
1385 1385 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
1386 1386 ['foo/bar']
1387 1387 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
1388 1388 []
1389 1389 '''
1390 1390 # Keep only the pattern kinds where one can specify filenames (vs only
1391 1391 # directory names).
1392 1392 filable = [kp for kp in kindpats if kp[0] not in ('rootfilesin',)]
1393 1393 return _roots(filable)
1394 1394
1395 1395 def _prefix(kindpats):
1396 1396 '''Whether all the patterns match a prefix (i.e. recursively)'''
1397 1397 for kind, pat, source in kindpats:
1398 1398 if kind not in ('path', 'relpath'):
1399 1399 return False
1400 1400 return True
1401 1401
1402 1402 _commentre = None
1403 1403
1404 1404 def readpatternfile(filepath, warn, sourceinfo=False):
1405 1405 '''parse a pattern file, returning a list of
1406 1406 patterns. These patterns should be given to compile()
1407 1407 to be validated and converted into a match function.
1408 1408
1409 1409 trailing white space is dropped.
1410 1410 the escape character is backslash.
1411 1411 comments start with #.
1412 1412 empty lines are skipped.
1413 1413
1414 1414 lines can be of the following formats:
1415 1415
1416 1416 syntax: regexp # defaults following lines to non-rooted regexps
1417 1417 syntax: glob # defaults following lines to non-rooted globs
1418 1418 re:pattern # non-rooted regular expression
1419 1419 glob:pattern # non-rooted glob
1420 1420 rootglob:pat # rooted glob (same root as ^ in regexps)
1421 1421 pattern # pattern of the current default type
1422 1422
1423 1423 if sourceinfo is set, returns a list of tuples:
1424 1424 (pattern, lineno, originalline). This is useful to debug ignore patterns.
1425 1425 '''
1426 1426
1427 1427 syntaxes = {
1428 1428 're': 'relre:',
1429 1429 'regexp': 'relre:',
1430 1430 'glob': 'relglob:',
1431 1431 'rootglob': 'rootglob:',
1432 1432 'include': 'include',
1433 1433 'subinclude': 'subinclude',
1434 1434 }
1435 1435 syntax = 'relre:'
1436 1436 patterns = []
1437 1437
1438 1438 fp = open(filepath, 'rb')
1439 1439 for lineno, line in enumerate(util.iterfile(fp), start=1):
1440 1440 if "#" in line:
1441 1441 global _commentre
1442 1442 if not _commentre:
1443 1443 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
1444 1444 # remove comments prefixed by an even number of escapes
1445 1445 m = _commentre.search(line)
1446 1446 if m:
1447 1447 line = line[:m.end(1)]
1448 1448 # fixup properly escaped comments that survived the above
1449 1449 line = line.replace("\\#", "#")
1450 1450 line = line.rstrip()
1451 1451 if not line:
1452 1452 continue
1453 1453
1454 1454 if line.startswith('syntax:'):
1455 1455 s = line[7:].strip()
1456 1456 try:
1457 1457 syntax = syntaxes[s]
1458 1458 except KeyError:
1459 1459 if warn:
1460 1460 warn(_("%s: ignoring invalid syntax '%s'\n") %
1461 1461 (filepath, s))
1462 1462 continue
1463 1463
1464 1464 linesyntax = syntax
1465 1465 for s, rels in syntaxes.iteritems():
1466 1466 if line.startswith(rels):
1467 1467 linesyntax = rels
1468 1468 line = line[len(rels):]
1469 1469 break
1470 1470 elif line.startswith(s+':'):
1471 1471 linesyntax = rels
1472 1472 line = line[len(s) + 1:]
1473 1473 break
1474 1474 if sourceinfo:
1475 1475 patterns.append((linesyntax + line, lineno, line))
1476 1476 else:
1477 1477 patterns.append(linesyntax + line)
1478 1478 fp.close()
1479 1479 return patterns
General Comments 0
You need to be logged in to leave comments. Login now