##// END OF EJS Templates
match: add basic wrapper for boolean function...
Yuya Nishihara -
r38596:76838305 default
parent child Browse files
Show More
@@ -1,1032 +1,1046
1 1 # match.py - filename matching
2 2 #
3 3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import, print_function
9 9
10 10 import copy
11 11 import os
12 12 import re
13 13
14 14 from .i18n import _
15 15 from . import (
16 16 encoding,
17 17 error,
18 18 pathutil,
19 19 pycompat,
20 20 util,
21 21 )
22 22 from .utils import (
23 23 stringutil,
24 24 )
25 25
26 26 allpatternkinds = ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
27 27 'listfile', 'listfile0', 'set', 'include', 'subinclude',
28 28 'rootfilesin')
29 29 cwdrelativepatternkinds = ('relpath', 'glob')
30 30
31 31 propertycache = util.propertycache
32 32
33 33 def _rematcher(regex):
34 34 '''compile the regexp with the best available regexp engine and return a
35 35 matcher function'''
36 36 m = util.re.compile(regex)
37 37 try:
38 38 # slightly faster, provided by facebook's re2 bindings
39 39 return m.test_match
40 40 except AttributeError:
41 41 return m.match
42 42
43 43 def _expandsets(kindpats, ctx, listsubrepos):
44 44 '''Returns the kindpats list with the 'set' patterns expanded.'''
45 45 fset = set()
46 46 other = []
47 47
48 48 for kind, pat, source in kindpats:
49 49 if kind == 'set':
50 50 if not ctx:
51 51 raise error.ProgrammingError("fileset expression with no "
52 52 "context")
53 53 s = ctx.getfileset(pat)
54 54 fset.update(s)
55 55
56 56 if listsubrepos:
57 57 for subpath in ctx.substate:
58 58 s = ctx.sub(subpath).getfileset(pat)
59 59 fset.update(subpath + '/' + f for f in s)
60 60
61 61 continue
62 62 other.append((kind, pat, source))
63 63 return fset, other
64 64
65 65 def _expandsubinclude(kindpats, root):
66 66 '''Returns the list of subinclude matcher args and the kindpats without the
67 67 subincludes in it.'''
68 68 relmatchers = []
69 69 other = []
70 70
71 71 for kind, pat, source in kindpats:
72 72 if kind == 'subinclude':
73 73 sourceroot = pathutil.dirname(util.normpath(source))
74 74 pat = util.pconvert(pat)
75 75 path = pathutil.join(sourceroot, pat)
76 76
77 77 newroot = pathutil.dirname(path)
78 78 matcherargs = (newroot, '', [], ['include:%s' % path])
79 79
80 80 prefix = pathutil.canonpath(root, root, newroot)
81 81 if prefix:
82 82 prefix += '/'
83 83 relmatchers.append((prefix, matcherargs))
84 84 else:
85 85 other.append((kind, pat, source))
86 86
87 87 return relmatchers, other
88 88
89 89 def _kindpatsalwaysmatch(kindpats):
90 90 """"Checks whether the kindspats match everything, as e.g.
91 91 'relpath:.' does.
92 92 """
93 93 for kind, pat, source in kindpats:
94 94 if pat != '' or kind not in ['relpath', 'glob']:
95 95 return False
96 96 return True
97 97
98 98 def match(root, cwd, patterns=None, include=None, exclude=None, default='glob',
99 99 exact=False, auditor=None, ctx=None, listsubrepos=False, warn=None,
100 100 badfn=None, icasefs=False):
101 101 """build an object to match a set of file patterns
102 102
103 103 arguments:
104 104 root - the canonical root of the tree you're matching against
105 105 cwd - the current working directory, if relevant
106 106 patterns - patterns to find
107 107 include - patterns to include (unless they are excluded)
108 108 exclude - patterns to exclude (even if they are included)
109 109 default - if a pattern in patterns has no explicit type, assume this one
110 110 exact - patterns are actually filenames (include/exclude still apply)
111 111 warn - optional function used for printing warnings
112 112 badfn - optional bad() callback for this matcher instead of the default
113 113 icasefs - make a matcher for wdir on case insensitive filesystems, which
114 114 normalizes the given patterns to the case in the filesystem
115 115
116 116 a pattern is one of:
117 117 'glob:<glob>' - a glob relative to cwd
118 118 're:<regexp>' - a regular expression
119 119 'path:<path>' - a path relative to repository root, which is matched
120 120 recursively
121 121 'rootfilesin:<path>' - a path relative to repository root, which is
122 122 matched non-recursively (will not match subdirectories)
123 123 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
124 124 'relpath:<path>' - a path relative to cwd
125 125 'relre:<regexp>' - a regexp that needn't match the start of a name
126 126 'set:<fileset>' - a fileset expression
127 127 'include:<path>' - a file of patterns to read and include
128 128 'subinclude:<path>' - a file of patterns to match against files under
129 129 the same directory
130 130 '<something>' - a pattern of the specified default type
131 131 """
132 132 normalize = _donormalize
133 133 if icasefs:
134 134 if exact:
135 135 raise error.ProgrammingError("a case-insensitive exact matcher "
136 136 "doesn't make sense")
137 137 dirstate = ctx.repo().dirstate
138 138 dsnormalize = dirstate.normalize
139 139
140 140 def normalize(patterns, default, root, cwd, auditor, warn):
141 141 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
142 142 kindpats = []
143 143 for kind, pats, source in kp:
144 144 if kind not in ('re', 'relre'): # regex can't be normalized
145 145 p = pats
146 146 pats = dsnormalize(pats)
147 147
148 148 # Preserve the original to handle a case only rename.
149 149 if p != pats and p in dirstate:
150 150 kindpats.append((kind, p, source))
151 151
152 152 kindpats.append((kind, pats, source))
153 153 return kindpats
154 154
155 155 if exact:
156 156 m = exactmatcher(root, cwd, patterns, badfn)
157 157 elif patterns:
158 158 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
159 159 if _kindpatsalwaysmatch(kindpats):
160 160 m = alwaysmatcher(root, cwd, badfn, relativeuipath=True)
161 161 else:
162 162 m = patternmatcher(root, cwd, kindpats, ctx=ctx,
163 163 listsubrepos=listsubrepos, badfn=badfn)
164 164 else:
165 165 # It's a little strange that no patterns means to match everything.
166 166 # Consider changing this to match nothing (probably using nevermatcher).
167 167 m = alwaysmatcher(root, cwd, badfn)
168 168
169 169 if include:
170 170 kindpats = normalize(include, 'glob', root, cwd, auditor, warn)
171 171 im = includematcher(root, cwd, kindpats, ctx=ctx,
172 172 listsubrepos=listsubrepos, badfn=None)
173 173 m = intersectmatchers(m, im)
174 174 if exclude:
175 175 kindpats = normalize(exclude, 'glob', root, cwd, auditor, warn)
176 176 em = includematcher(root, cwd, kindpats, ctx=ctx,
177 177 listsubrepos=listsubrepos, badfn=None)
178 178 m = differencematcher(m, em)
179 179 return m
180 180
181 181 def exact(root, cwd, files, badfn=None):
182 182 return exactmatcher(root, cwd, files, badfn=badfn)
183 183
184 184 def always(root, cwd):
185 185 return alwaysmatcher(root, cwd)
186 186
187 187 def never(root, cwd):
188 188 return nevermatcher(root, cwd)
189 189
190 190 def badmatch(match, badfn):
191 191 """Make a copy of the given matcher, replacing its bad method with the given
192 192 one.
193 193 """
194 194 m = copy.copy(match)
195 195 m.bad = badfn
196 196 return m
197 197
198 198 def _donormalize(patterns, default, root, cwd, auditor, warn):
199 199 '''Convert 'kind:pat' from the patterns list to tuples with kind and
200 200 normalized and rooted patterns and with listfiles expanded.'''
201 201 kindpats = []
202 202 for kind, pat in [_patsplit(p, default) for p in patterns]:
203 203 if kind in cwdrelativepatternkinds:
204 204 pat = pathutil.canonpath(root, cwd, pat, auditor)
205 205 elif kind in ('relglob', 'path', 'rootfilesin'):
206 206 pat = util.normpath(pat)
207 207 elif kind in ('listfile', 'listfile0'):
208 208 try:
209 209 files = util.readfile(pat)
210 210 if kind == 'listfile0':
211 211 files = files.split('\0')
212 212 else:
213 213 files = files.splitlines()
214 214 files = [f for f in files if f]
215 215 except EnvironmentError:
216 216 raise error.Abort(_("unable to read file list (%s)") % pat)
217 217 for k, p, source in _donormalize(files, default, root, cwd,
218 218 auditor, warn):
219 219 kindpats.append((k, p, pat))
220 220 continue
221 221 elif kind == 'include':
222 222 try:
223 223 fullpath = os.path.join(root, util.localpath(pat))
224 224 includepats = readpatternfile(fullpath, warn)
225 225 for k, p, source in _donormalize(includepats, default,
226 226 root, cwd, auditor, warn):
227 227 kindpats.append((k, p, source or pat))
228 228 except error.Abort as inst:
229 229 raise error.Abort('%s: %s' % (pat, inst[0]))
230 230 except IOError as inst:
231 231 if warn:
232 232 warn(_("skipping unreadable pattern file '%s': %s\n") %
233 233 (pat, stringutil.forcebytestr(inst.strerror)))
234 234 continue
235 235 # else: re or relre - which cannot be normalized
236 236 kindpats.append((kind, pat, ''))
237 237 return kindpats
238 238
239 239 class basematcher(object):
240 240
241 241 def __init__(self, root, cwd, badfn=None, relativeuipath=True):
242 242 self._root = root
243 243 self._cwd = cwd
244 244 if badfn is not None:
245 245 self.bad = badfn
246 246 self._relativeuipath = relativeuipath
247 247
248 248 def __call__(self, fn):
249 249 return self.matchfn(fn)
250 250 def __iter__(self):
251 251 for f in self._files:
252 252 yield f
253 253 # Callbacks related to how the matcher is used by dirstate.walk.
254 254 # Subscribers to these events must monkeypatch the matcher object.
255 255 def bad(self, f, msg):
256 256 '''Callback from dirstate.walk for each explicit file that can't be
257 257 found/accessed, with an error message.'''
258 258
259 259 # If an explicitdir is set, it will be called when an explicitly listed
260 260 # directory is visited.
261 261 explicitdir = None
262 262
263 263 # If an traversedir is set, it will be called when a directory discovered
264 264 # by recursive traversal is visited.
265 265 traversedir = None
266 266
267 267 def abs(self, f):
268 268 '''Convert a repo path back to path that is relative to the root of the
269 269 matcher.'''
270 270 return f
271 271
272 272 def rel(self, f):
273 273 '''Convert repo path back to path that is relative to cwd of matcher.'''
274 274 return util.pathto(self._root, self._cwd, f)
275 275
276 276 def uipath(self, f):
277 277 '''Convert repo path to a display path. If patterns or -I/-X were used
278 278 to create this matcher, the display path will be relative to cwd.
279 279 Otherwise it is relative to the root of the repo.'''
280 280 return (self._relativeuipath and self.rel(f)) or self.abs(f)
281 281
282 282 @propertycache
283 283 def _files(self):
284 284 return []
285 285
286 286 def files(self):
287 287 '''Explicitly listed files or patterns or roots:
288 288 if no patterns or .always(): empty list,
289 289 if exact: list exact files,
290 290 if not .anypats(): list all files and dirs,
291 291 else: optimal roots'''
292 292 return self._files
293 293
294 294 @propertycache
295 295 def _fileset(self):
296 296 return set(self._files)
297 297
298 298 def exact(self, f):
299 299 '''Returns True if f is in .files().'''
300 300 return f in self._fileset
301 301
302 302 def matchfn(self, f):
303 303 return False
304 304
305 305 def visitdir(self, dir):
306 306 '''Decides whether a directory should be visited based on whether it
307 307 has potential matches in it or one of its subdirectories. This is
308 308 based on the match's primary, included, and excluded patterns.
309 309
310 310 Returns the string 'all' if the given directory and all subdirectories
311 311 should be visited. Otherwise returns True or False indicating whether
312 312 the given directory should be visited.
313 313 '''
314 314 return True
315 315
316 316 def always(self):
317 317 '''Matcher will match everything and .files() will be empty --
318 318 optimization might be possible.'''
319 319 return False
320 320
321 321 def isexact(self):
322 322 '''Matcher will match exactly the list of files in .files() --
323 323 optimization might be possible.'''
324 324 return False
325 325
326 326 def prefix(self):
327 327 '''Matcher will match the paths in .files() recursively --
328 328 optimization might be possible.'''
329 329 return False
330 330
331 331 def anypats(self):
332 332 '''None of .always(), .isexact(), and .prefix() is true --
333 333 optimizations will be difficult.'''
334 334 return not self.always() and not self.isexact() and not self.prefix()
335 335
336 336 class alwaysmatcher(basematcher):
337 337 '''Matches everything.'''
338 338
339 339 def __init__(self, root, cwd, badfn=None, relativeuipath=False):
340 340 super(alwaysmatcher, self).__init__(root, cwd, badfn,
341 341 relativeuipath=relativeuipath)
342 342
343 343 def always(self):
344 344 return True
345 345
346 346 def matchfn(self, f):
347 347 return True
348 348
349 349 def visitdir(self, dir):
350 350 return 'all'
351 351
352 352 def __repr__(self):
353 353 return r'<alwaysmatcher>'
354 354
355 355 class nevermatcher(basematcher):
356 356 '''Matches nothing.'''
357 357
358 358 def __init__(self, root, cwd, badfn=None):
359 359 super(nevermatcher, self).__init__(root, cwd, badfn)
360 360
361 361 # It's a little weird to say that the nevermatcher is an exact matcher
362 362 # or a prefix matcher, but it seems to make sense to let callers take
363 363 # fast paths based on either. There will be no exact matches, nor any
364 364 # prefixes (files() returns []), so fast paths iterating over them should
365 365 # be efficient (and correct).
366 366 def isexact(self):
367 367 return True
368 368
369 369 def prefix(self):
370 370 return True
371 371
372 372 def visitdir(self, dir):
373 373 return False
374 374
375 375 def __repr__(self):
376 376 return r'<nevermatcher>'
377 377
378 class predicatematcher(basematcher):
379 """A matcher adapter for a simple boolean function"""
380
381 def __init__(self, root, cwd, predfn, predrepr=None, badfn=None):
382 super(predicatematcher, self).__init__(root, cwd, badfn)
383 self.matchfn = predfn
384 self._predrepr = predrepr
385
386 @encoding.strmethod
387 def __repr__(self):
388 s = (stringutil.buildrepr(self._predrepr)
389 or pycompat.byterepr(self.matchfn))
390 return '<predicatenmatcher pred=%s>' % s
391
378 392 class patternmatcher(basematcher):
379 393
380 394 def __init__(self, root, cwd, kindpats, ctx=None, listsubrepos=False,
381 395 badfn=None):
382 396 super(patternmatcher, self).__init__(root, cwd, badfn)
383 397
384 398 self._files = _explicitfiles(kindpats)
385 399 self._prefix = _prefix(kindpats)
386 400 self._pats, self.matchfn = _buildmatch(ctx, kindpats, '$', listsubrepos,
387 401 root)
388 402
389 403 @propertycache
390 404 def _dirs(self):
391 405 return set(util.dirs(self._fileset)) | {'.'}
392 406
393 407 def visitdir(self, dir):
394 408 if self._prefix and dir in self._fileset:
395 409 return 'all'
396 410 return ('.' in self._fileset or
397 411 dir in self._fileset or
398 412 dir in self._dirs or
399 413 any(parentdir in self._fileset
400 414 for parentdir in util.finddirs(dir)))
401 415
402 416 def prefix(self):
403 417 return self._prefix
404 418
405 419 @encoding.strmethod
406 420 def __repr__(self):
407 421 return ('<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats))
408 422
409 423 class includematcher(basematcher):
410 424
411 425 def __init__(self, root, cwd, kindpats, ctx=None, listsubrepos=False,
412 426 badfn=None):
413 427 super(includematcher, self).__init__(root, cwd, badfn)
414 428
415 429 self._pats, self.matchfn = _buildmatch(ctx, kindpats, '(?:/|$)',
416 430 listsubrepos, root)
417 431 self._prefix = _prefix(kindpats)
418 432 roots, dirs = _rootsanddirs(kindpats)
419 433 # roots are directories which are recursively included.
420 434 self._roots = set(roots)
421 435 # dirs are directories which are non-recursively included.
422 436 self._dirs = set(dirs)
423 437
424 438 def visitdir(self, dir):
425 439 if self._prefix and dir in self._roots:
426 440 return 'all'
427 441 return ('.' in self._roots or
428 442 dir in self._roots or
429 443 dir in self._dirs or
430 444 any(parentdir in self._roots
431 445 for parentdir in util.finddirs(dir)))
432 446
433 447 @encoding.strmethod
434 448 def __repr__(self):
435 449 return ('<includematcher includes=%r>' % pycompat.bytestr(self._pats))
436 450
437 451 class exactmatcher(basematcher):
438 452 '''Matches the input files exactly. They are interpreted as paths, not
439 453 patterns (so no kind-prefixes).
440 454 '''
441 455
442 456 def __init__(self, root, cwd, files, badfn=None):
443 457 super(exactmatcher, self).__init__(root, cwd, badfn)
444 458
445 459 if isinstance(files, list):
446 460 self._files = files
447 461 else:
448 462 self._files = list(files)
449 463
450 464 matchfn = basematcher.exact
451 465
452 466 @propertycache
453 467 def _dirs(self):
454 468 return set(util.dirs(self._fileset)) | {'.'}
455 469
456 470 def visitdir(self, dir):
457 471 return dir in self._dirs
458 472
459 473 def isexact(self):
460 474 return True
461 475
462 476 @encoding.strmethod
463 477 def __repr__(self):
464 478 return ('<exactmatcher files=%r>' % self._files)
465 479
466 480 class differencematcher(basematcher):
467 481 '''Composes two matchers by matching if the first matches and the second
468 482 does not.
469 483
470 484 The second matcher's non-matching-attributes (root, cwd, bad, explicitdir,
471 485 traversedir) are ignored.
472 486 '''
473 487 def __init__(self, m1, m2):
474 488 super(differencematcher, self).__init__(m1._root, m1._cwd)
475 489 self._m1 = m1
476 490 self._m2 = m2
477 491 self.bad = m1.bad
478 492 self.explicitdir = m1.explicitdir
479 493 self.traversedir = m1.traversedir
480 494
481 495 def matchfn(self, f):
482 496 return self._m1(f) and not self._m2(f)
483 497
484 498 @propertycache
485 499 def _files(self):
486 500 if self.isexact():
487 501 return [f for f in self._m1.files() if self(f)]
488 502 # If m1 is not an exact matcher, we can't easily figure out the set of
489 503 # files, because its files() are not always files. For example, if
490 504 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
491 505 # want to remove "dir" from the set even though it would match m2,
492 506 # because the "dir" in m1 may not be a file.
493 507 return self._m1.files()
494 508
495 509 def visitdir(self, dir):
496 510 if self._m2.visitdir(dir) == 'all':
497 511 return False
498 512 return bool(self._m1.visitdir(dir))
499 513
500 514 def isexact(self):
501 515 return self._m1.isexact()
502 516
503 517 @encoding.strmethod
504 518 def __repr__(self):
505 519 return ('<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2))
506 520
507 521 def intersectmatchers(m1, m2):
508 522 '''Composes two matchers by matching if both of them match.
509 523
510 524 The second matcher's non-matching-attributes (root, cwd, bad, explicitdir,
511 525 traversedir) are ignored.
512 526 '''
513 527 if m1 is None or m2 is None:
514 528 return m1 or m2
515 529 if m1.always():
516 530 m = copy.copy(m2)
517 531 # TODO: Consider encapsulating these things in a class so there's only
518 532 # one thing to copy from m1.
519 533 m.bad = m1.bad
520 534 m.explicitdir = m1.explicitdir
521 535 m.traversedir = m1.traversedir
522 536 m.abs = m1.abs
523 537 m.rel = m1.rel
524 538 m._relativeuipath |= m1._relativeuipath
525 539 return m
526 540 if m2.always():
527 541 m = copy.copy(m1)
528 542 m._relativeuipath |= m2._relativeuipath
529 543 return m
530 544 return intersectionmatcher(m1, m2)
531 545
532 546 class intersectionmatcher(basematcher):
533 547 def __init__(self, m1, m2):
534 548 super(intersectionmatcher, self).__init__(m1._root, m1._cwd)
535 549 self._m1 = m1
536 550 self._m2 = m2
537 551 self.bad = m1.bad
538 552 self.explicitdir = m1.explicitdir
539 553 self.traversedir = m1.traversedir
540 554
541 555 @propertycache
542 556 def _files(self):
543 557 if self.isexact():
544 558 m1, m2 = self._m1, self._m2
545 559 if not m1.isexact():
546 560 m1, m2 = m2, m1
547 561 return [f for f in m1.files() if m2(f)]
548 562 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
549 563 # the set of files, because their files() are not always files. For
550 564 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
551 565 # "path:dir2", we don't want to remove "dir2" from the set.
552 566 return self._m1.files() + self._m2.files()
553 567
554 568 def matchfn(self, f):
555 569 return self._m1(f) and self._m2(f)
556 570
557 571 def visitdir(self, dir):
558 572 visit1 = self._m1.visitdir(dir)
559 573 if visit1 == 'all':
560 574 return self._m2.visitdir(dir)
561 575 # bool() because visit1=True + visit2='all' should not be 'all'
562 576 return bool(visit1 and self._m2.visitdir(dir))
563 577
564 578 def always(self):
565 579 return self._m1.always() and self._m2.always()
566 580
567 581 def isexact(self):
568 582 return self._m1.isexact() or self._m2.isexact()
569 583
570 584 @encoding.strmethod
571 585 def __repr__(self):
572 586 return ('<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2))
573 587
574 588 class subdirmatcher(basematcher):
575 589 """Adapt a matcher to work on a subdirectory only.
576 590
577 591 The paths are remapped to remove/insert the path as needed:
578 592
579 593 >>> from . import pycompat
580 594 >>> m1 = match(b'root', b'', [b'a.txt', b'sub/b.txt'])
581 595 >>> m2 = subdirmatcher(b'sub', m1)
582 596 >>> bool(m2(b'a.txt'))
583 597 False
584 598 >>> bool(m2(b'b.txt'))
585 599 True
586 600 >>> bool(m2.matchfn(b'a.txt'))
587 601 False
588 602 >>> bool(m2.matchfn(b'b.txt'))
589 603 True
590 604 >>> m2.files()
591 605 ['b.txt']
592 606 >>> m2.exact(b'b.txt')
593 607 True
594 608 >>> util.pconvert(m2.rel(b'b.txt'))
595 609 'sub/b.txt'
596 610 >>> def bad(f, msg):
597 611 ... print(pycompat.sysstr(b"%s: %s" % (f, msg)))
598 612 >>> m1.bad = bad
599 613 >>> m2.bad(b'x.txt', b'No such file')
600 614 sub/x.txt: No such file
601 615 >>> m2.abs(b'c.txt')
602 616 'sub/c.txt'
603 617 """
604 618
605 619 def __init__(self, path, matcher):
606 620 super(subdirmatcher, self).__init__(matcher._root, matcher._cwd)
607 621 self._path = path
608 622 self._matcher = matcher
609 623 self._always = matcher.always()
610 624
611 625 self._files = [f[len(path) + 1:] for f in matcher._files
612 626 if f.startswith(path + "/")]
613 627
614 628 # If the parent repo had a path to this subrepo and the matcher is
615 629 # a prefix matcher, this submatcher always matches.
616 630 if matcher.prefix():
617 631 self._always = any(f == path for f in matcher._files)
618 632
619 633 def bad(self, f, msg):
620 634 self._matcher.bad(self._path + "/" + f, msg)
621 635
622 636 def abs(self, f):
623 637 return self._matcher.abs(self._path + "/" + f)
624 638
625 639 def rel(self, f):
626 640 return self._matcher.rel(self._path + "/" + f)
627 641
628 642 def uipath(self, f):
629 643 return self._matcher.uipath(self._path + "/" + f)
630 644
631 645 def matchfn(self, f):
632 646 # Some information is lost in the superclass's constructor, so we
633 647 # can not accurately create the matching function for the subdirectory
634 648 # from the inputs. Instead, we override matchfn() and visitdir() to
635 649 # call the original matcher with the subdirectory path prepended.
636 650 return self._matcher.matchfn(self._path + "/" + f)
637 651
638 652 def visitdir(self, dir):
639 653 if dir == '.':
640 654 dir = self._path
641 655 else:
642 656 dir = self._path + "/" + dir
643 657 return self._matcher.visitdir(dir)
644 658
645 659 def always(self):
646 660 return self._always
647 661
648 662 def prefix(self):
649 663 return self._matcher.prefix() and not self._always
650 664
651 665 @encoding.strmethod
652 666 def __repr__(self):
653 667 return ('<subdirmatcher path=%r, matcher=%r>' %
654 668 (self._path, self._matcher))
655 669
656 670 class unionmatcher(basematcher):
657 671 """A matcher that is the union of several matchers.
658 672
659 673 The non-matching-attributes (root, cwd, bad, explicitdir, traversedir) are
660 674 taken from the first matcher.
661 675 """
662 676
663 677 def __init__(self, matchers):
664 678 m1 = matchers[0]
665 679 super(unionmatcher, self).__init__(m1._root, m1._cwd)
666 680 self.explicitdir = m1.explicitdir
667 681 self.traversedir = m1.traversedir
668 682 self._matchers = matchers
669 683
670 684 def matchfn(self, f):
671 685 for match in self._matchers:
672 686 if match(f):
673 687 return True
674 688 return False
675 689
676 690 def visitdir(self, dir):
677 691 r = False
678 692 for m in self._matchers:
679 693 v = m.visitdir(dir)
680 694 if v == 'all':
681 695 return v
682 696 r |= v
683 697 return r
684 698
685 699 @encoding.strmethod
686 700 def __repr__(self):
687 701 return ('<unionmatcher matchers=%r>' % self._matchers)
688 702
689 703 def patkind(pattern, default=None):
690 704 '''If pattern is 'kind:pat' with a known kind, return kind.'''
691 705 return _patsplit(pattern, default)[0]
692 706
693 707 def _patsplit(pattern, default):
694 708 """Split a string into the optional pattern kind prefix and the actual
695 709 pattern."""
696 710 if ':' in pattern:
697 711 kind, pat = pattern.split(':', 1)
698 712 if kind in allpatternkinds:
699 713 return kind, pat
700 714 return default, pattern
701 715
702 716 def _globre(pat):
703 717 r'''Convert an extended glob string to a regexp string.
704 718
705 719 >>> from . import pycompat
706 720 >>> def bprint(s):
707 721 ... print(pycompat.sysstr(s))
708 722 >>> bprint(_globre(br'?'))
709 723 .
710 724 >>> bprint(_globre(br'*'))
711 725 [^/]*
712 726 >>> bprint(_globre(br'**'))
713 727 .*
714 728 >>> bprint(_globre(br'**/a'))
715 729 (?:.*/)?a
716 730 >>> bprint(_globre(br'a/**/b'))
717 731 a/(?:.*/)?b
718 732 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
719 733 [a*?!^][\^b][^c]
720 734 >>> bprint(_globre(br'{a,b}'))
721 735 (?:a|b)
722 736 >>> bprint(_globre(br'.\*\?'))
723 737 \.\*\?
724 738 '''
725 739 i, n = 0, len(pat)
726 740 res = ''
727 741 group = 0
728 742 escape = util.stringutil.reescape
729 743 def peek():
730 744 return i < n and pat[i:i + 1]
731 745 while i < n:
732 746 c = pat[i:i + 1]
733 747 i += 1
734 748 if c not in '*?[{},\\':
735 749 res += escape(c)
736 750 elif c == '*':
737 751 if peek() == '*':
738 752 i += 1
739 753 if peek() == '/':
740 754 i += 1
741 755 res += '(?:.*/)?'
742 756 else:
743 757 res += '.*'
744 758 else:
745 759 res += '[^/]*'
746 760 elif c == '?':
747 761 res += '.'
748 762 elif c == '[':
749 763 j = i
750 764 if j < n and pat[j:j + 1] in '!]':
751 765 j += 1
752 766 while j < n and pat[j:j + 1] != ']':
753 767 j += 1
754 768 if j >= n:
755 769 res += '\\['
756 770 else:
757 771 stuff = pat[i:j].replace('\\','\\\\')
758 772 i = j + 1
759 773 if stuff[0:1] == '!':
760 774 stuff = '^' + stuff[1:]
761 775 elif stuff[0:1] == '^':
762 776 stuff = '\\' + stuff
763 777 res = '%s[%s]' % (res, stuff)
764 778 elif c == '{':
765 779 group += 1
766 780 res += '(?:'
767 781 elif c == '}' and group:
768 782 res += ')'
769 783 group -= 1
770 784 elif c == ',' and group:
771 785 res += '|'
772 786 elif c == '\\':
773 787 p = peek()
774 788 if p:
775 789 i += 1
776 790 res += escape(p)
777 791 else:
778 792 res += escape(c)
779 793 else:
780 794 res += escape(c)
781 795 return res
782 796
783 797 def _regex(kind, pat, globsuffix):
784 798 '''Convert a (normalized) pattern of any kind into a regular expression.
785 799 globsuffix is appended to the regexp of globs.'''
786 800 if not pat:
787 801 return ''
788 802 if kind == 're':
789 803 return pat
790 804 if kind in ('path', 'relpath'):
791 805 if pat == '.':
792 806 return ''
793 807 return util.stringutil.reescape(pat) + '(?:/|$)'
794 808 if kind == 'rootfilesin':
795 809 if pat == '.':
796 810 escaped = ''
797 811 else:
798 812 # Pattern is a directory name.
799 813 escaped = util.stringutil.reescape(pat) + '/'
800 814 # Anything after the pattern must be a non-directory.
801 815 return escaped + '[^/]+$'
802 816 if kind == 'relglob':
803 817 return '(?:|.*/)' + _globre(pat) + globsuffix
804 818 if kind == 'relre':
805 819 if pat.startswith('^'):
806 820 return pat
807 821 return '.*' + pat
808 822 return _globre(pat) + globsuffix
809 823
810 824 def _buildmatch(ctx, kindpats, globsuffix, listsubrepos, root):
811 825 '''Return regexp string and a matcher function for kindpats.
812 826 globsuffix is appended to the regexp of globs.'''
813 827 matchfuncs = []
814 828
815 829 subincludes, kindpats = _expandsubinclude(kindpats, root)
816 830 if subincludes:
817 831 submatchers = {}
818 832 def matchsubinclude(f):
819 833 for prefix, matcherargs in subincludes:
820 834 if f.startswith(prefix):
821 835 mf = submatchers.get(prefix)
822 836 if mf is None:
823 837 mf = match(*matcherargs)
824 838 submatchers[prefix] = mf
825 839
826 840 if mf(f[len(prefix):]):
827 841 return True
828 842 return False
829 843 matchfuncs.append(matchsubinclude)
830 844
831 845 fset, kindpats = _expandsets(kindpats, ctx, listsubrepos)
832 846 if fset:
833 847 matchfuncs.append(fset.__contains__)
834 848
835 849 regex = ''
836 850 if kindpats:
837 851 regex, mf = _buildregexmatch(kindpats, globsuffix)
838 852 matchfuncs.append(mf)
839 853
840 854 if len(matchfuncs) == 1:
841 855 return regex, matchfuncs[0]
842 856 else:
843 857 return regex, lambda f: any(mf(f) for mf in matchfuncs)
844 858
845 859 def _buildregexmatch(kindpats, globsuffix):
846 860 """Build a match function from a list of kinds and kindpats,
847 861 return regexp string and a matcher function."""
848 862 try:
849 863 regex = '(?:%s)' % '|'.join([_regex(k, p, globsuffix)
850 864 for (k, p, s) in kindpats])
851 865 if len(regex) > 20000:
852 866 raise OverflowError
853 867 return regex, _rematcher(regex)
854 868 except OverflowError:
855 869 # We're using a Python with a tiny regex engine and we
856 870 # made it explode, so we'll divide the pattern list in two
857 871 # until it works
858 872 l = len(kindpats)
859 873 if l < 2:
860 874 raise
861 875 regexa, a = _buildregexmatch(kindpats[:l//2], globsuffix)
862 876 regexb, b = _buildregexmatch(kindpats[l//2:], globsuffix)
863 877 return regex, lambda s: a(s) or b(s)
864 878 except re.error:
865 879 for k, p, s in kindpats:
866 880 try:
867 881 _rematcher('(?:%s)' % _regex(k, p, globsuffix))
868 882 except re.error:
869 883 if s:
870 884 raise error.Abort(_("%s: invalid pattern (%s): %s") %
871 885 (s, k, p))
872 886 else:
873 887 raise error.Abort(_("invalid pattern (%s): %s") % (k, p))
874 888 raise error.Abort(_("invalid pattern"))
875 889
876 890 def _patternrootsanddirs(kindpats):
877 891 '''Returns roots and directories corresponding to each pattern.
878 892
879 893 This calculates the roots and directories exactly matching the patterns and
880 894 returns a tuple of (roots, dirs) for each. It does not return other
881 895 directories which may also need to be considered, like the parent
882 896 directories.
883 897 '''
884 898 r = []
885 899 d = []
886 900 for kind, pat, source in kindpats:
887 901 if kind == 'glob': # find the non-glob prefix
888 902 root = []
889 903 for p in pat.split('/'):
890 904 if '[' in p or '{' in p or '*' in p or '?' in p:
891 905 break
892 906 root.append(p)
893 907 r.append('/'.join(root) or '.')
894 908 elif kind in ('relpath', 'path'):
895 909 r.append(pat or '.')
896 910 elif kind in ('rootfilesin',):
897 911 d.append(pat or '.')
898 912 else: # relglob, re, relre
899 913 r.append('.')
900 914 return r, d
901 915
902 916 def _roots(kindpats):
903 917 '''Returns root directories to match recursively from the given patterns.'''
904 918 roots, dirs = _patternrootsanddirs(kindpats)
905 919 return roots
906 920
907 921 def _rootsanddirs(kindpats):
908 922 '''Returns roots and exact directories from patterns.
909 923
910 924 roots are directories to match recursively, whereas exact directories should
911 925 be matched non-recursively. The returned (roots, dirs) tuple will also
912 926 include directories that need to be implicitly considered as either, such as
913 927 parent directories.
914 928
915 929 >>> _rootsanddirs(
916 930 ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
917 931 ... (b'glob', b'g*', b'')])
918 932 (['g/h', 'g/h', '.'], ['g', '.'])
919 933 >>> _rootsanddirs(
920 934 ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
921 935 ([], ['g/h', '.', 'g', '.'])
922 936 >>> _rootsanddirs(
923 937 ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
924 938 ... (b'path', b'', b'')])
925 939 (['r', 'p/p', '.'], ['p', '.'])
926 940 >>> _rootsanddirs(
927 941 ... [(b'relglob', b'rg*', b''), (b're', b're/', b''),
928 942 ... (b'relre', b'rr', b'')])
929 943 (['.', '.', '.'], ['.'])
930 944 '''
931 945 r, d = _patternrootsanddirs(kindpats)
932 946
933 947 # Append the parents as non-recursive/exact directories, since they must be
934 948 # scanned to get to either the roots or the other exact directories.
935 949 d.extend(util.dirs(d))
936 950 d.extend(util.dirs(r))
937 951 # util.dirs() does not include the root directory, so add it manually
938 952 d.append('.')
939 953
940 954 return r, d
941 955
942 956 def _explicitfiles(kindpats):
943 957 '''Returns the potential explicit filenames from the patterns.
944 958
945 959 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
946 960 ['foo/bar']
947 961 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
948 962 []
949 963 '''
950 964 # Keep only the pattern kinds where one can specify filenames (vs only
951 965 # directory names).
952 966 filable = [kp for kp in kindpats if kp[0] not in ('rootfilesin',)]
953 967 return _roots(filable)
954 968
955 969 def _prefix(kindpats):
956 970 '''Whether all the patterns match a prefix (i.e. recursively)'''
957 971 for kind, pat, source in kindpats:
958 972 if kind not in ('path', 'relpath'):
959 973 return False
960 974 return True
961 975
962 976 _commentre = None
963 977
964 978 def readpatternfile(filepath, warn, sourceinfo=False):
965 979 '''parse a pattern file, returning a list of
966 980 patterns. These patterns should be given to compile()
967 981 to be validated and converted into a match function.
968 982
969 983 trailing white space is dropped.
970 984 the escape character is backslash.
971 985 comments start with #.
972 986 empty lines are skipped.
973 987
974 988 lines can be of the following formats:
975 989
976 990 syntax: regexp # defaults following lines to non-rooted regexps
977 991 syntax: glob # defaults following lines to non-rooted globs
978 992 re:pattern # non-rooted regular expression
979 993 glob:pattern # non-rooted glob
980 994 pattern # pattern of the current default type
981 995
982 996 if sourceinfo is set, returns a list of tuples:
983 997 (pattern, lineno, originalline). This is useful to debug ignore patterns.
984 998 '''
985 999
986 1000 syntaxes = {'re': 'relre:', 'regexp': 'relre:', 'glob': 'relglob:',
987 1001 'include': 'include', 'subinclude': 'subinclude'}
988 1002 syntax = 'relre:'
989 1003 patterns = []
990 1004
991 1005 fp = open(filepath, 'rb')
992 1006 for lineno, line in enumerate(util.iterfile(fp), start=1):
993 1007 if "#" in line:
994 1008 global _commentre
995 1009 if not _commentre:
996 1010 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
997 1011 # remove comments prefixed by an even number of escapes
998 1012 m = _commentre.search(line)
999 1013 if m:
1000 1014 line = line[:m.end(1)]
1001 1015 # fixup properly escaped comments that survived the above
1002 1016 line = line.replace("\\#", "#")
1003 1017 line = line.rstrip()
1004 1018 if not line:
1005 1019 continue
1006 1020
1007 1021 if line.startswith('syntax:'):
1008 1022 s = line[7:].strip()
1009 1023 try:
1010 1024 syntax = syntaxes[s]
1011 1025 except KeyError:
1012 1026 if warn:
1013 1027 warn(_("%s: ignoring invalid syntax '%s'\n") %
1014 1028 (filepath, s))
1015 1029 continue
1016 1030
1017 1031 linesyntax = syntax
1018 1032 for s, rels in syntaxes.iteritems():
1019 1033 if line.startswith(rels):
1020 1034 linesyntax = rels
1021 1035 line = line[len(rels):]
1022 1036 break
1023 1037 elif line.startswith(s+':'):
1024 1038 linesyntax = rels
1025 1039 line = line[len(s) + 1:]
1026 1040 break
1027 1041 if sourceinfo:
1028 1042 patterns.append((linesyntax + line, lineno, line))
1029 1043 else:
1030 1044 patterns.append(linesyntax + line)
1031 1045 fp.close()
1032 1046 return patterns
General Comments 0
You need to be logged in to leave comments. Login now