##// END OF EJS Templates
includematcher: separate "parents" from "dirs"...
spectral -
r38989:5a7df82d default
parent child Browse files
Show More
@@ -1,1132 +1,1137 b''
1 1 # match.py - filename matching
2 2 #
3 3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import, print_function
9 9
10 10 import copy
11 11 import os
12 12 import re
13 13
14 14 from .i18n import _
15 15 from . import (
16 16 encoding,
17 17 error,
18 18 pathutil,
19 19 pycompat,
20 20 util,
21 21 )
22 22 from .utils import (
23 23 stringutil,
24 24 )
25 25
26 26 allpatternkinds = ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
27 27 'listfile', 'listfile0', 'set', 'include', 'subinclude',
28 28 'rootfilesin')
29 29 cwdrelativepatternkinds = ('relpath', 'glob')
30 30
31 31 propertycache = util.propertycache
32 32
33 33 def _rematcher(regex):
34 34 '''compile the regexp with the best available regexp engine and return a
35 35 matcher function'''
36 36 m = util.re.compile(regex)
37 37 try:
38 38 # slightly faster, provided by facebook's re2 bindings
39 39 return m.test_match
40 40 except AttributeError:
41 41 return m.match
42 42
43 43 def _expandsets(root, cwd, kindpats, ctx, listsubrepos, badfn):
44 44 '''Returns the kindpats list with the 'set' patterns expanded to matchers'''
45 45 matchers = []
46 46 other = []
47 47
48 48 for kind, pat, source in kindpats:
49 49 if kind == 'set':
50 50 if not ctx:
51 51 raise error.ProgrammingError("fileset expression with no "
52 52 "context")
53 53 matchers.append(ctx.matchfileset(pat, badfn=badfn))
54 54
55 55 if listsubrepos:
56 56 for subpath in ctx.substate:
57 57 sm = ctx.sub(subpath).matchfileset(pat, badfn=badfn)
58 58 pm = prefixdirmatcher(root, cwd, subpath, sm, badfn=badfn)
59 59 matchers.append(pm)
60 60
61 61 continue
62 62 other.append((kind, pat, source))
63 63 return matchers, other
64 64
65 65 def _expandsubinclude(kindpats, root):
66 66 '''Returns the list of subinclude matcher args and the kindpats without the
67 67 subincludes in it.'''
68 68 relmatchers = []
69 69 other = []
70 70
71 71 for kind, pat, source in kindpats:
72 72 if kind == 'subinclude':
73 73 sourceroot = pathutil.dirname(util.normpath(source))
74 74 pat = util.pconvert(pat)
75 75 path = pathutil.join(sourceroot, pat)
76 76
77 77 newroot = pathutil.dirname(path)
78 78 matcherargs = (newroot, '', [], ['include:%s' % path])
79 79
80 80 prefix = pathutil.canonpath(root, root, newroot)
81 81 if prefix:
82 82 prefix += '/'
83 83 relmatchers.append((prefix, matcherargs))
84 84 else:
85 85 other.append((kind, pat, source))
86 86
87 87 return relmatchers, other
88 88
89 89 def _kindpatsalwaysmatch(kindpats):
90 90 """"Checks whether the kindspats match everything, as e.g.
91 91 'relpath:.' does.
92 92 """
93 93 for kind, pat, source in kindpats:
94 94 if pat != '' or kind not in ['relpath', 'glob']:
95 95 return False
96 96 return True
97 97
98 98 def _buildkindpatsmatcher(matchercls, root, cwd, kindpats, ctx=None,
99 99 listsubrepos=False, badfn=None):
100 100 matchers = []
101 101 fms, kindpats = _expandsets(root, cwd, kindpats, ctx=ctx,
102 102 listsubrepos=listsubrepos, badfn=badfn)
103 103 if kindpats:
104 104 m = matchercls(root, cwd, kindpats, listsubrepos=listsubrepos,
105 105 badfn=badfn)
106 106 matchers.append(m)
107 107 if fms:
108 108 matchers.extend(fms)
109 109 if not matchers:
110 110 return nevermatcher(root, cwd, badfn=badfn)
111 111 if len(matchers) == 1:
112 112 return matchers[0]
113 113 return unionmatcher(matchers)
114 114
115 115 def match(root, cwd, patterns=None, include=None, exclude=None, default='glob',
116 116 exact=False, auditor=None, ctx=None, listsubrepos=False, warn=None,
117 117 badfn=None, icasefs=False):
118 118 """build an object to match a set of file patterns
119 119
120 120 arguments:
121 121 root - the canonical root of the tree you're matching against
122 122 cwd - the current working directory, if relevant
123 123 patterns - patterns to find
124 124 include - patterns to include (unless they are excluded)
125 125 exclude - patterns to exclude (even if they are included)
126 126 default - if a pattern in patterns has no explicit type, assume this one
127 127 exact - patterns are actually filenames (include/exclude still apply)
128 128 warn - optional function used for printing warnings
129 129 badfn - optional bad() callback for this matcher instead of the default
130 130 icasefs - make a matcher for wdir on case insensitive filesystems, which
131 131 normalizes the given patterns to the case in the filesystem
132 132
133 133 a pattern is one of:
134 134 'glob:<glob>' - a glob relative to cwd
135 135 're:<regexp>' - a regular expression
136 136 'path:<path>' - a path relative to repository root, which is matched
137 137 recursively
138 138 'rootfilesin:<path>' - a path relative to repository root, which is
139 139 matched non-recursively (will not match subdirectories)
140 140 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
141 141 'relpath:<path>' - a path relative to cwd
142 142 'relre:<regexp>' - a regexp that needn't match the start of a name
143 143 'set:<fileset>' - a fileset expression
144 144 'include:<path>' - a file of patterns to read and include
145 145 'subinclude:<path>' - a file of patterns to match against files under
146 146 the same directory
147 147 '<something>' - a pattern of the specified default type
148 148 """
149 149 normalize = _donormalize
150 150 if icasefs:
151 151 if exact:
152 152 raise error.ProgrammingError("a case-insensitive exact matcher "
153 153 "doesn't make sense")
154 154 dirstate = ctx.repo().dirstate
155 155 dsnormalize = dirstate.normalize
156 156
157 157 def normalize(patterns, default, root, cwd, auditor, warn):
158 158 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
159 159 kindpats = []
160 160 for kind, pats, source in kp:
161 161 if kind not in ('re', 'relre'): # regex can't be normalized
162 162 p = pats
163 163 pats = dsnormalize(pats)
164 164
165 165 # Preserve the original to handle a case only rename.
166 166 if p != pats and p in dirstate:
167 167 kindpats.append((kind, p, source))
168 168
169 169 kindpats.append((kind, pats, source))
170 170 return kindpats
171 171
172 172 if exact:
173 173 m = exactmatcher(root, cwd, patterns, badfn)
174 174 elif patterns:
175 175 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
176 176 if _kindpatsalwaysmatch(kindpats):
177 177 m = alwaysmatcher(root, cwd, badfn, relativeuipath=True)
178 178 else:
179 179 m = _buildkindpatsmatcher(patternmatcher, root, cwd, kindpats,
180 180 ctx=ctx, listsubrepos=listsubrepos,
181 181 badfn=badfn)
182 182 else:
183 183 # It's a little strange that no patterns means to match everything.
184 184 # Consider changing this to match nothing (probably using nevermatcher).
185 185 m = alwaysmatcher(root, cwd, badfn)
186 186
187 187 if include:
188 188 kindpats = normalize(include, 'glob', root, cwd, auditor, warn)
189 189 im = _buildkindpatsmatcher(includematcher, root, cwd, kindpats, ctx=ctx,
190 190 listsubrepos=listsubrepos, badfn=None)
191 191 m = intersectmatchers(m, im)
192 192 if exclude:
193 193 kindpats = normalize(exclude, 'glob', root, cwd, auditor, warn)
194 194 em = _buildkindpatsmatcher(includematcher, root, cwd, kindpats, ctx=ctx,
195 195 listsubrepos=listsubrepos, badfn=None)
196 196 m = differencematcher(m, em)
197 197 return m
198 198
199 199 def exact(root, cwd, files, badfn=None):
200 200 return exactmatcher(root, cwd, files, badfn=badfn)
201 201
202 202 def always(root, cwd):
203 203 return alwaysmatcher(root, cwd)
204 204
205 205 def never(root, cwd):
206 206 return nevermatcher(root, cwd)
207 207
208 208 def badmatch(match, badfn):
209 209 """Make a copy of the given matcher, replacing its bad method with the given
210 210 one.
211 211 """
212 212 m = copy.copy(match)
213 213 m.bad = badfn
214 214 return m
215 215
216 216 def _donormalize(patterns, default, root, cwd, auditor, warn):
217 217 '''Convert 'kind:pat' from the patterns list to tuples with kind and
218 218 normalized and rooted patterns and with listfiles expanded.'''
219 219 kindpats = []
220 220 for kind, pat in [_patsplit(p, default) for p in patterns]:
221 221 if kind in cwdrelativepatternkinds:
222 222 pat = pathutil.canonpath(root, cwd, pat, auditor)
223 223 elif kind in ('relglob', 'path', 'rootfilesin'):
224 224 pat = util.normpath(pat)
225 225 elif kind in ('listfile', 'listfile0'):
226 226 try:
227 227 files = util.readfile(pat)
228 228 if kind == 'listfile0':
229 229 files = files.split('\0')
230 230 else:
231 231 files = files.splitlines()
232 232 files = [f for f in files if f]
233 233 except EnvironmentError:
234 234 raise error.Abort(_("unable to read file list (%s)") % pat)
235 235 for k, p, source in _donormalize(files, default, root, cwd,
236 236 auditor, warn):
237 237 kindpats.append((k, p, pat))
238 238 continue
239 239 elif kind == 'include':
240 240 try:
241 241 fullpath = os.path.join(root, util.localpath(pat))
242 242 includepats = readpatternfile(fullpath, warn)
243 243 for k, p, source in _donormalize(includepats, default,
244 244 root, cwd, auditor, warn):
245 245 kindpats.append((k, p, source or pat))
246 246 except error.Abort as inst:
247 247 raise error.Abort('%s: %s' % (pat, inst[0]))
248 248 except IOError as inst:
249 249 if warn:
250 250 warn(_("skipping unreadable pattern file '%s': %s\n") %
251 251 (pat, stringutil.forcebytestr(inst.strerror)))
252 252 continue
253 253 # else: re or relre - which cannot be normalized
254 254 kindpats.append((kind, pat, ''))
255 255 return kindpats
256 256
257 257 class basematcher(object):
258 258
259 259 def __init__(self, root, cwd, badfn=None, relativeuipath=True):
260 260 self._root = root
261 261 self._cwd = cwd
262 262 if badfn is not None:
263 263 self.bad = badfn
264 264 self._relativeuipath = relativeuipath
265 265
266 266 def __call__(self, fn):
267 267 return self.matchfn(fn)
268 268 def __iter__(self):
269 269 for f in self._files:
270 270 yield f
271 271 # Callbacks related to how the matcher is used by dirstate.walk.
272 272 # Subscribers to these events must monkeypatch the matcher object.
273 273 def bad(self, f, msg):
274 274 '''Callback from dirstate.walk for each explicit file that can't be
275 275 found/accessed, with an error message.'''
276 276
277 277 # If an explicitdir is set, it will be called when an explicitly listed
278 278 # directory is visited.
279 279 explicitdir = None
280 280
281 281 # If an traversedir is set, it will be called when a directory discovered
282 282 # by recursive traversal is visited.
283 283 traversedir = None
284 284
285 285 def abs(self, f):
286 286 '''Convert a repo path back to path that is relative to the root of the
287 287 matcher.'''
288 288 return f
289 289
290 290 def rel(self, f):
291 291 '''Convert repo path back to path that is relative to cwd of matcher.'''
292 292 return util.pathto(self._root, self._cwd, f)
293 293
294 294 def uipath(self, f):
295 295 '''Convert repo path to a display path. If patterns or -I/-X were used
296 296 to create this matcher, the display path will be relative to cwd.
297 297 Otherwise it is relative to the root of the repo.'''
298 298 return (self._relativeuipath and self.rel(f)) or self.abs(f)
299 299
300 300 @propertycache
301 301 def _files(self):
302 302 return []
303 303
304 304 def files(self):
305 305 '''Explicitly listed files or patterns or roots:
306 306 if no patterns or .always(): empty list,
307 307 if exact: list exact files,
308 308 if not .anypats(): list all files and dirs,
309 309 else: optimal roots'''
310 310 return self._files
311 311
312 312 @propertycache
313 313 def _fileset(self):
314 314 return set(self._files)
315 315
316 316 def exact(self, f):
317 317 '''Returns True if f is in .files().'''
318 318 return f in self._fileset
319 319
320 320 def matchfn(self, f):
321 321 return False
322 322
323 323 def visitdir(self, dir):
324 324 '''Decides whether a directory should be visited based on whether it
325 325 has potential matches in it or one of its subdirectories. This is
326 326 based on the match's primary, included, and excluded patterns.
327 327
328 328 Returns the string 'all' if the given directory and all subdirectories
329 329 should be visited. Otherwise returns True or False indicating whether
330 330 the given directory should be visited.
331 331 '''
332 332 return True
333 333
334 334 def always(self):
335 335 '''Matcher will match everything and .files() will be empty --
336 336 optimization might be possible.'''
337 337 return False
338 338
339 339 def isexact(self):
340 340 '''Matcher will match exactly the list of files in .files() --
341 341 optimization might be possible.'''
342 342 return False
343 343
344 344 def prefix(self):
345 345 '''Matcher will match the paths in .files() recursively --
346 346 optimization might be possible.'''
347 347 return False
348 348
349 349 def anypats(self):
350 350 '''None of .always(), .isexact(), and .prefix() is true --
351 351 optimizations will be difficult.'''
352 352 return not self.always() and not self.isexact() and not self.prefix()
353 353
354 354 class alwaysmatcher(basematcher):
355 355 '''Matches everything.'''
356 356
357 357 def __init__(self, root, cwd, badfn=None, relativeuipath=False):
358 358 super(alwaysmatcher, self).__init__(root, cwd, badfn,
359 359 relativeuipath=relativeuipath)
360 360
361 361 def always(self):
362 362 return True
363 363
364 364 def matchfn(self, f):
365 365 return True
366 366
367 367 def visitdir(self, dir):
368 368 return 'all'
369 369
370 370 def __repr__(self):
371 371 return r'<alwaysmatcher>'
372 372
373 373 class nevermatcher(basematcher):
374 374 '''Matches nothing.'''
375 375
376 376 def __init__(self, root, cwd, badfn=None):
377 377 super(nevermatcher, self).__init__(root, cwd, badfn)
378 378
379 379 # It's a little weird to say that the nevermatcher is an exact matcher
380 380 # or a prefix matcher, but it seems to make sense to let callers take
381 381 # fast paths based on either. There will be no exact matches, nor any
382 382 # prefixes (files() returns []), so fast paths iterating over them should
383 383 # be efficient (and correct).
384 384 def isexact(self):
385 385 return True
386 386
387 387 def prefix(self):
388 388 return True
389 389
390 390 def visitdir(self, dir):
391 391 return False
392 392
393 393 def __repr__(self):
394 394 return r'<nevermatcher>'
395 395
396 396 class predicatematcher(basematcher):
397 397 """A matcher adapter for a simple boolean function"""
398 398
399 399 def __init__(self, root, cwd, predfn, predrepr=None, badfn=None):
400 400 super(predicatematcher, self).__init__(root, cwd, badfn)
401 401 self.matchfn = predfn
402 402 self._predrepr = predrepr
403 403
404 404 @encoding.strmethod
405 405 def __repr__(self):
406 406 s = (stringutil.buildrepr(self._predrepr)
407 407 or pycompat.byterepr(self.matchfn))
408 408 return '<predicatenmatcher pred=%s>' % s
409 409
410 410 class patternmatcher(basematcher):
411 411
412 412 def __init__(self, root, cwd, kindpats, listsubrepos=False, badfn=None):
413 413 super(patternmatcher, self).__init__(root, cwd, badfn)
414 414
415 415 self._files = _explicitfiles(kindpats)
416 416 self._prefix = _prefix(kindpats)
417 417 self._pats, self.matchfn = _buildmatch(kindpats, '$', listsubrepos,
418 418 root)
419 419
420 420 @propertycache
421 421 def _dirs(self):
422 422 return set(util.dirs(self._fileset)) | {'.'}
423 423
424 424 def visitdir(self, dir):
425 425 if self._prefix and dir in self._fileset:
426 426 return 'all'
427 427 return ('.' in self._fileset or
428 428 dir in self._fileset or
429 429 dir in self._dirs or
430 430 any(parentdir in self._fileset
431 431 for parentdir in util.finddirs(dir)))
432 432
433 433 def prefix(self):
434 434 return self._prefix
435 435
436 436 @encoding.strmethod
437 437 def __repr__(self):
438 438 return ('<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats))
439 439
440 440 class includematcher(basematcher):
441 441
442 442 def __init__(self, root, cwd, kindpats, listsubrepos=False, badfn=None):
443 443 super(includematcher, self).__init__(root, cwd, badfn)
444 444
445 445 self._pats, self.matchfn = _buildmatch(kindpats, '(?:/|$)',
446 446 listsubrepos, root)
447 447 self._prefix = _prefix(kindpats)
448 roots, dirs = _rootsanddirs(kindpats)
448 roots, dirs, parents = _rootsdirsandparents(kindpats)
449 449 # roots are directories which are recursively included.
450 450 self._roots = set(roots)
451 451 # dirs are directories which are non-recursively included.
452 452 self._dirs = set(dirs)
453 # parents are directories which are non-recursively included because
454 # they are needed to get to items in _dirs or _roots.
455 self._parents = set(parents)
453 456
454 457 def visitdir(self, dir):
455 458 if self._prefix and dir in self._roots:
456 459 return 'all'
457 460 return ('.' in self._roots or
458 461 dir in self._roots or
459 462 dir in self._dirs or
463 dir in self._parents or
460 464 any(parentdir in self._roots
461 465 for parentdir in util.finddirs(dir)))
462 466
463 467 @encoding.strmethod
464 468 def __repr__(self):
465 469 return ('<includematcher includes=%r>' % pycompat.bytestr(self._pats))
466 470
467 471 class exactmatcher(basematcher):
468 472 '''Matches the input files exactly. They are interpreted as paths, not
469 473 patterns (so no kind-prefixes).
470 474 '''
471 475
472 476 def __init__(self, root, cwd, files, badfn=None):
473 477 super(exactmatcher, self).__init__(root, cwd, badfn)
474 478
475 479 if isinstance(files, list):
476 480 self._files = files
477 481 else:
478 482 self._files = list(files)
479 483
480 484 matchfn = basematcher.exact
481 485
482 486 @propertycache
483 487 def _dirs(self):
484 488 return set(util.dirs(self._fileset)) | {'.'}
485 489
486 490 def visitdir(self, dir):
487 491 return dir in self._dirs
488 492
489 493 def isexact(self):
490 494 return True
491 495
492 496 @encoding.strmethod
493 497 def __repr__(self):
494 498 return ('<exactmatcher files=%r>' % self._files)
495 499
496 500 class differencematcher(basematcher):
497 501 '''Composes two matchers by matching if the first matches and the second
498 502 does not.
499 503
500 504 The second matcher's non-matching-attributes (root, cwd, bad, explicitdir,
501 505 traversedir) are ignored.
502 506 '''
503 507 def __init__(self, m1, m2):
504 508 super(differencematcher, self).__init__(m1._root, m1._cwd)
505 509 self._m1 = m1
506 510 self._m2 = m2
507 511 self.bad = m1.bad
508 512 self.explicitdir = m1.explicitdir
509 513 self.traversedir = m1.traversedir
510 514
511 515 def matchfn(self, f):
512 516 return self._m1(f) and not self._m2(f)
513 517
514 518 @propertycache
515 519 def _files(self):
516 520 if self.isexact():
517 521 return [f for f in self._m1.files() if self(f)]
518 522 # If m1 is not an exact matcher, we can't easily figure out the set of
519 523 # files, because its files() are not always files. For example, if
520 524 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
521 525 # want to remove "dir" from the set even though it would match m2,
522 526 # because the "dir" in m1 may not be a file.
523 527 return self._m1.files()
524 528
525 529 def visitdir(self, dir):
526 530 if self._m2.visitdir(dir) == 'all':
527 531 return False
528 532 return bool(self._m1.visitdir(dir))
529 533
530 534 def isexact(self):
531 535 return self._m1.isexact()
532 536
533 537 @encoding.strmethod
534 538 def __repr__(self):
535 539 return ('<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2))
536 540
537 541 def intersectmatchers(m1, m2):
538 542 '''Composes two matchers by matching if both of them match.
539 543
540 544 The second matcher's non-matching-attributes (root, cwd, bad, explicitdir,
541 545 traversedir) are ignored.
542 546 '''
543 547 if m1 is None or m2 is None:
544 548 return m1 or m2
545 549 if m1.always():
546 550 m = copy.copy(m2)
547 551 # TODO: Consider encapsulating these things in a class so there's only
548 552 # one thing to copy from m1.
549 553 m.bad = m1.bad
550 554 m.explicitdir = m1.explicitdir
551 555 m.traversedir = m1.traversedir
552 556 m.abs = m1.abs
553 557 m.rel = m1.rel
554 558 m._relativeuipath |= m1._relativeuipath
555 559 return m
556 560 if m2.always():
557 561 m = copy.copy(m1)
558 562 m._relativeuipath |= m2._relativeuipath
559 563 return m
560 564 return intersectionmatcher(m1, m2)
561 565
562 566 class intersectionmatcher(basematcher):
563 567 def __init__(self, m1, m2):
564 568 super(intersectionmatcher, self).__init__(m1._root, m1._cwd)
565 569 self._m1 = m1
566 570 self._m2 = m2
567 571 self.bad = m1.bad
568 572 self.explicitdir = m1.explicitdir
569 573 self.traversedir = m1.traversedir
570 574
571 575 @propertycache
572 576 def _files(self):
573 577 if self.isexact():
574 578 m1, m2 = self._m1, self._m2
575 579 if not m1.isexact():
576 580 m1, m2 = m2, m1
577 581 return [f for f in m1.files() if m2(f)]
578 582 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
579 583 # the set of files, because their files() are not always files. For
580 584 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
581 585 # "path:dir2", we don't want to remove "dir2" from the set.
582 586 return self._m1.files() + self._m2.files()
583 587
584 588 def matchfn(self, f):
585 589 return self._m1(f) and self._m2(f)
586 590
587 591 def visitdir(self, dir):
588 592 visit1 = self._m1.visitdir(dir)
589 593 if visit1 == 'all':
590 594 return self._m2.visitdir(dir)
591 595 # bool() because visit1=True + visit2='all' should not be 'all'
592 596 return bool(visit1 and self._m2.visitdir(dir))
593 597
594 598 def always(self):
595 599 return self._m1.always() and self._m2.always()
596 600
597 601 def isexact(self):
598 602 return self._m1.isexact() or self._m2.isexact()
599 603
600 604 @encoding.strmethod
601 605 def __repr__(self):
602 606 return ('<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2))
603 607
604 608 class subdirmatcher(basematcher):
605 609 """Adapt a matcher to work on a subdirectory only.
606 610
607 611 The paths are remapped to remove/insert the path as needed:
608 612
609 613 >>> from . import pycompat
610 614 >>> m1 = match(b'root', b'', [b'a.txt', b'sub/b.txt'])
611 615 >>> m2 = subdirmatcher(b'sub', m1)
612 616 >>> bool(m2(b'a.txt'))
613 617 False
614 618 >>> bool(m2(b'b.txt'))
615 619 True
616 620 >>> bool(m2.matchfn(b'a.txt'))
617 621 False
618 622 >>> bool(m2.matchfn(b'b.txt'))
619 623 True
620 624 >>> m2.files()
621 625 ['b.txt']
622 626 >>> m2.exact(b'b.txt')
623 627 True
624 628 >>> util.pconvert(m2.rel(b'b.txt'))
625 629 'sub/b.txt'
626 630 >>> def bad(f, msg):
627 631 ... print(pycompat.sysstr(b"%s: %s" % (f, msg)))
628 632 >>> m1.bad = bad
629 633 >>> m2.bad(b'x.txt', b'No such file')
630 634 sub/x.txt: No such file
631 635 >>> m2.abs(b'c.txt')
632 636 'sub/c.txt'
633 637 """
634 638
635 639 def __init__(self, path, matcher):
636 640 super(subdirmatcher, self).__init__(matcher._root, matcher._cwd)
637 641 self._path = path
638 642 self._matcher = matcher
639 643 self._always = matcher.always()
640 644
641 645 self._files = [f[len(path) + 1:] for f in matcher._files
642 646 if f.startswith(path + "/")]
643 647
644 648 # If the parent repo had a path to this subrepo and the matcher is
645 649 # a prefix matcher, this submatcher always matches.
646 650 if matcher.prefix():
647 651 self._always = any(f == path for f in matcher._files)
648 652
649 653 def bad(self, f, msg):
650 654 self._matcher.bad(self._path + "/" + f, msg)
651 655
652 656 def abs(self, f):
653 657 return self._matcher.abs(self._path + "/" + f)
654 658
655 659 def rel(self, f):
656 660 return self._matcher.rel(self._path + "/" + f)
657 661
658 662 def uipath(self, f):
659 663 return self._matcher.uipath(self._path + "/" + f)
660 664
661 665 def matchfn(self, f):
662 666 # Some information is lost in the superclass's constructor, so we
663 667 # can not accurately create the matching function for the subdirectory
664 668 # from the inputs. Instead, we override matchfn() and visitdir() to
665 669 # call the original matcher with the subdirectory path prepended.
666 670 return self._matcher.matchfn(self._path + "/" + f)
667 671
668 672 def visitdir(self, dir):
669 673 if dir == '.':
670 674 dir = self._path
671 675 else:
672 676 dir = self._path + "/" + dir
673 677 return self._matcher.visitdir(dir)
674 678
675 679 def always(self):
676 680 return self._always
677 681
678 682 def prefix(self):
679 683 return self._matcher.prefix() and not self._always
680 684
681 685 @encoding.strmethod
682 686 def __repr__(self):
683 687 return ('<subdirmatcher path=%r, matcher=%r>' %
684 688 (self._path, self._matcher))
685 689
686 690 class prefixdirmatcher(basematcher):
687 691 """Adapt a matcher to work on a parent directory.
688 692
689 693 The matcher's non-matching-attributes (root, cwd, bad, explicitdir,
690 694 traversedir) are ignored.
691 695
692 696 The prefix path should usually be the relative path from the root of
693 697 this matcher to the root of the wrapped matcher.
694 698
695 699 >>> m1 = match(util.localpath(b'root/d/e'), b'f', [b'../a.txt', b'b.txt'])
696 700 >>> m2 = prefixdirmatcher(b'root', b'd/e/f', b'd/e', m1)
697 701 >>> bool(m2(b'a.txt'),)
698 702 False
699 703 >>> bool(m2(b'd/e/a.txt'))
700 704 True
701 705 >>> bool(m2(b'd/e/b.txt'))
702 706 False
703 707 >>> m2.files()
704 708 ['d/e/a.txt', 'd/e/f/b.txt']
705 709 >>> m2.exact(b'd/e/a.txt')
706 710 True
707 711 >>> m2.visitdir(b'd')
708 712 True
709 713 >>> m2.visitdir(b'd/e')
710 714 True
711 715 >>> m2.visitdir(b'd/e/f')
712 716 True
713 717 >>> m2.visitdir(b'd/e/g')
714 718 False
715 719 >>> m2.visitdir(b'd/ef')
716 720 False
717 721 """
718 722
719 723 def __init__(self, root, cwd, path, matcher, badfn=None):
720 724 super(prefixdirmatcher, self).__init__(root, cwd, badfn)
721 725 if not path:
722 726 raise error.ProgrammingError('prefix path must not be empty')
723 727 self._path = path
724 728 self._pathprefix = path + '/'
725 729 self._matcher = matcher
726 730
727 731 @propertycache
728 732 def _files(self):
729 733 return [self._pathprefix + f for f in self._matcher._files]
730 734
731 735 def matchfn(self, f):
732 736 if not f.startswith(self._pathprefix):
733 737 return False
734 738 return self._matcher.matchfn(f[len(self._pathprefix):])
735 739
736 740 @propertycache
737 741 def _pathdirs(self):
738 742 return set(util.finddirs(self._path)) | {'.'}
739 743
740 744 def visitdir(self, dir):
741 745 if dir == self._path:
742 746 return self._matcher.visitdir('.')
743 747 if dir.startswith(self._pathprefix):
744 748 return self._matcher.visitdir(dir[len(self._pathprefix):])
745 749 return dir in self._pathdirs
746 750
747 751 def isexact(self):
748 752 return self._matcher.isexact()
749 753
750 754 def prefix(self):
751 755 return self._matcher.prefix()
752 756
753 757 @encoding.strmethod
754 758 def __repr__(self):
755 759 return ('<prefixdirmatcher path=%r, matcher=%r>'
756 760 % (pycompat.bytestr(self._path), self._matcher))
757 761
758 762 class unionmatcher(basematcher):
759 763 """A matcher that is the union of several matchers.
760 764
761 765 The non-matching-attributes (root, cwd, bad, explicitdir, traversedir) are
762 766 taken from the first matcher.
763 767 """
764 768
765 769 def __init__(self, matchers):
766 770 m1 = matchers[0]
767 771 super(unionmatcher, self).__init__(m1._root, m1._cwd)
768 772 self.explicitdir = m1.explicitdir
769 773 self.traversedir = m1.traversedir
770 774 self._matchers = matchers
771 775
772 776 def matchfn(self, f):
773 777 for match in self._matchers:
774 778 if match(f):
775 779 return True
776 780 return False
777 781
778 782 def visitdir(self, dir):
779 783 r = False
780 784 for m in self._matchers:
781 785 v = m.visitdir(dir)
782 786 if v == 'all':
783 787 return v
784 788 r |= v
785 789 return r
786 790
787 791 @encoding.strmethod
788 792 def __repr__(self):
789 793 return ('<unionmatcher matchers=%r>' % self._matchers)
790 794
791 795 def patkind(pattern, default=None):
792 796 '''If pattern is 'kind:pat' with a known kind, return kind.'''
793 797 return _patsplit(pattern, default)[0]
794 798
795 799 def _patsplit(pattern, default):
796 800 """Split a string into the optional pattern kind prefix and the actual
797 801 pattern."""
798 802 if ':' in pattern:
799 803 kind, pat = pattern.split(':', 1)
800 804 if kind in allpatternkinds:
801 805 return kind, pat
802 806 return default, pattern
803 807
804 808 def _globre(pat):
805 809 r'''Convert an extended glob string to a regexp string.
806 810
807 811 >>> from . import pycompat
808 812 >>> def bprint(s):
809 813 ... print(pycompat.sysstr(s))
810 814 >>> bprint(_globre(br'?'))
811 815 .
812 816 >>> bprint(_globre(br'*'))
813 817 [^/]*
814 818 >>> bprint(_globre(br'**'))
815 819 .*
816 820 >>> bprint(_globre(br'**/a'))
817 821 (?:.*/)?a
818 822 >>> bprint(_globre(br'a/**/b'))
819 823 a/(?:.*/)?b
820 824 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
821 825 [a*?!^][\^b][^c]
822 826 >>> bprint(_globre(br'{a,b}'))
823 827 (?:a|b)
824 828 >>> bprint(_globre(br'.\*\?'))
825 829 \.\*\?
826 830 '''
827 831 i, n = 0, len(pat)
828 832 res = ''
829 833 group = 0
830 834 escape = util.stringutil.reescape
831 835 def peek():
832 836 return i < n and pat[i:i + 1]
833 837 while i < n:
834 838 c = pat[i:i + 1]
835 839 i += 1
836 840 if c not in '*?[{},\\':
837 841 res += escape(c)
838 842 elif c == '*':
839 843 if peek() == '*':
840 844 i += 1
841 845 if peek() == '/':
842 846 i += 1
843 847 res += '(?:.*/)?'
844 848 else:
845 849 res += '.*'
846 850 else:
847 851 res += '[^/]*'
848 852 elif c == '?':
849 853 res += '.'
850 854 elif c == '[':
851 855 j = i
852 856 if j < n and pat[j:j + 1] in '!]':
853 857 j += 1
854 858 while j < n and pat[j:j + 1] != ']':
855 859 j += 1
856 860 if j >= n:
857 861 res += '\\['
858 862 else:
859 863 stuff = pat[i:j].replace('\\','\\\\')
860 864 i = j + 1
861 865 if stuff[0:1] == '!':
862 866 stuff = '^' + stuff[1:]
863 867 elif stuff[0:1] == '^':
864 868 stuff = '\\' + stuff
865 869 res = '%s[%s]' % (res, stuff)
866 870 elif c == '{':
867 871 group += 1
868 872 res += '(?:'
869 873 elif c == '}' and group:
870 874 res += ')'
871 875 group -= 1
872 876 elif c == ',' and group:
873 877 res += '|'
874 878 elif c == '\\':
875 879 p = peek()
876 880 if p:
877 881 i += 1
878 882 res += escape(p)
879 883 else:
880 884 res += escape(c)
881 885 else:
882 886 res += escape(c)
883 887 return res
884 888
885 889 def _regex(kind, pat, globsuffix):
886 890 '''Convert a (normalized) pattern of any kind into a regular expression.
887 891 globsuffix is appended to the regexp of globs.'''
888 892 if not pat:
889 893 return ''
890 894 if kind == 're':
891 895 return pat
892 896 if kind in ('path', 'relpath'):
893 897 if pat == '.':
894 898 return ''
895 899 return util.stringutil.reescape(pat) + '(?:/|$)'
896 900 if kind == 'rootfilesin':
897 901 if pat == '.':
898 902 escaped = ''
899 903 else:
900 904 # Pattern is a directory name.
901 905 escaped = util.stringutil.reescape(pat) + '/'
902 906 # Anything after the pattern must be a non-directory.
903 907 return escaped + '[^/]+$'
904 908 if kind == 'relglob':
905 909 return '(?:|.*/)' + _globre(pat) + globsuffix
906 910 if kind == 'relre':
907 911 if pat.startswith('^'):
908 912 return pat
909 913 return '.*' + pat
910 914 if kind == 'glob':
911 915 return _globre(pat) + globsuffix
912 916 raise error.ProgrammingError('not a regex pattern: %s:%s' % (kind, pat))
913 917
914 918 def _buildmatch(kindpats, globsuffix, listsubrepos, root):
915 919 '''Return regexp string and a matcher function for kindpats.
916 920 globsuffix is appended to the regexp of globs.'''
917 921 matchfuncs = []
918 922
919 923 subincludes, kindpats = _expandsubinclude(kindpats, root)
920 924 if subincludes:
921 925 submatchers = {}
922 926 def matchsubinclude(f):
923 927 for prefix, matcherargs in subincludes:
924 928 if f.startswith(prefix):
925 929 mf = submatchers.get(prefix)
926 930 if mf is None:
927 931 mf = match(*matcherargs)
928 932 submatchers[prefix] = mf
929 933
930 934 if mf(f[len(prefix):]):
931 935 return True
932 936 return False
933 937 matchfuncs.append(matchsubinclude)
934 938
935 939 regex = ''
936 940 if kindpats:
937 941 regex, mf = _buildregexmatch(kindpats, globsuffix)
938 942 matchfuncs.append(mf)
939 943
940 944 if len(matchfuncs) == 1:
941 945 return regex, matchfuncs[0]
942 946 else:
943 947 return regex, lambda f: any(mf(f) for mf in matchfuncs)
944 948
945 949 def _buildregexmatch(kindpats, globsuffix):
946 950 """Build a match function from a list of kinds and kindpats,
947 951 return regexp string and a matcher function."""
948 952 try:
949 953 regex = '(?:%s)' % '|'.join([_regex(k, p, globsuffix)
950 954 for (k, p, s) in kindpats])
951 955 if len(regex) > 20000:
952 956 raise OverflowError
953 957 return regex, _rematcher(regex)
954 958 except OverflowError:
955 959 # We're using a Python with a tiny regex engine and we
956 960 # made it explode, so we'll divide the pattern list in two
957 961 # until it works
958 962 l = len(kindpats)
959 963 if l < 2:
960 964 raise
961 965 regexa, a = _buildregexmatch(kindpats[:l//2], globsuffix)
962 966 regexb, b = _buildregexmatch(kindpats[l//2:], globsuffix)
963 967 return regex, lambda s: a(s) or b(s)
964 968 except re.error:
965 969 for k, p, s in kindpats:
966 970 try:
967 971 _rematcher('(?:%s)' % _regex(k, p, globsuffix))
968 972 except re.error:
969 973 if s:
970 974 raise error.Abort(_("%s: invalid pattern (%s): %s") %
971 975 (s, k, p))
972 976 else:
973 977 raise error.Abort(_("invalid pattern (%s): %s") % (k, p))
974 978 raise error.Abort(_("invalid pattern"))
975 979
976 980 def _patternrootsanddirs(kindpats):
977 981 '''Returns roots and directories corresponding to each pattern.
978 982
979 983 This calculates the roots and directories exactly matching the patterns and
980 984 returns a tuple of (roots, dirs) for each. It does not return other
981 985 directories which may also need to be considered, like the parent
982 986 directories.
983 987 '''
984 988 r = []
985 989 d = []
986 990 for kind, pat, source in kindpats:
987 991 if kind == 'glob': # find the non-glob prefix
988 992 root = []
989 993 for p in pat.split('/'):
990 994 if '[' in p or '{' in p or '*' in p or '?' in p:
991 995 break
992 996 root.append(p)
993 997 r.append('/'.join(root) or '.')
994 998 elif kind in ('relpath', 'path'):
995 999 r.append(pat or '.')
996 1000 elif kind in ('rootfilesin',):
997 1001 d.append(pat or '.')
998 1002 else: # relglob, re, relre
999 1003 r.append('.')
1000 1004 return r, d
1001 1005
1002 1006 def _roots(kindpats):
1003 1007 '''Returns root directories to match recursively from the given patterns.'''
1004 1008 roots, dirs = _patternrootsanddirs(kindpats)
1005 1009 return roots
1006 1010
1007 def _rootsanddirs(kindpats):
1011 def _rootsdirsandparents(kindpats):
1008 1012 '''Returns roots and exact directories from patterns.
1009 1013
1010 1014 roots are directories to match recursively, whereas exact directories should
1011 1015 be matched non-recursively. The returned (roots, dirs) tuple will also
1012 1016 include directories that need to be implicitly considered as either, such as
1013 1017 parent directories.
1014 1018
1015 >>> _rootsanddirs(
1019 >>> _rootsdirsandparents(
1016 1020 ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
1017 1021 ... (b'glob', b'g*', b'')])
1018 (['g/h', 'g/h', '.'], ['g', '.'])
1019 >>> _rootsanddirs(
1022 (['g/h', 'g/h', '.'], [], ['g', '.'])
1023 >>> _rootsdirsandparents(
1020 1024 ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
1021 ([], ['g/h', '.', 'g', '.'])
1022 >>> _rootsanddirs(
1025 ([], ['g/h', '.'], ['g', '.'])
1026 >>> _rootsdirsandparents(
1023 1027 ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
1024 1028 ... (b'path', b'', b'')])
1025 (['r', 'p/p', '.'], ['p', '.'])
1026 >>> _rootsanddirs(
1029 (['r', 'p/p', '.'], [], ['p', '.'])
1030 >>> _rootsdirsandparents(
1027 1031 ... [(b'relglob', b'rg*', b''), (b're', b're/', b''),
1028 1032 ... (b'relre', b'rr', b'')])
1029 (['.', '.', '.'], ['.'])
1033 (['.', '.', '.'], [], ['.'])
1030 1034 '''
1031 1035 r, d = _patternrootsanddirs(kindpats)
1032 1036
1037 p = []
1033 1038 # Append the parents as non-recursive/exact directories, since they must be
1034 1039 # scanned to get to either the roots or the other exact directories.
1035 d.extend(util.dirs(d))
1036 d.extend(util.dirs(r))
1040 p.extend(util.dirs(d))
1041 p.extend(util.dirs(r))
1037 1042 # util.dirs() does not include the root directory, so add it manually
1038 d.append('.')
1043 p.append('.')
1039 1044
1040 return r, d
1045 return r, d, p
1041 1046
1042 1047 def _explicitfiles(kindpats):
1043 1048 '''Returns the potential explicit filenames from the patterns.
1044 1049
1045 1050 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
1046 1051 ['foo/bar']
1047 1052 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
1048 1053 []
1049 1054 '''
1050 1055 # Keep only the pattern kinds where one can specify filenames (vs only
1051 1056 # directory names).
1052 1057 filable = [kp for kp in kindpats if kp[0] not in ('rootfilesin',)]
1053 1058 return _roots(filable)
1054 1059
1055 1060 def _prefix(kindpats):
1056 1061 '''Whether all the patterns match a prefix (i.e. recursively)'''
1057 1062 for kind, pat, source in kindpats:
1058 1063 if kind not in ('path', 'relpath'):
1059 1064 return False
1060 1065 return True
1061 1066
1062 1067 _commentre = None
1063 1068
1064 1069 def readpatternfile(filepath, warn, sourceinfo=False):
1065 1070 '''parse a pattern file, returning a list of
1066 1071 patterns. These patterns should be given to compile()
1067 1072 to be validated and converted into a match function.
1068 1073
1069 1074 trailing white space is dropped.
1070 1075 the escape character is backslash.
1071 1076 comments start with #.
1072 1077 empty lines are skipped.
1073 1078
1074 1079 lines can be of the following formats:
1075 1080
1076 1081 syntax: regexp # defaults following lines to non-rooted regexps
1077 1082 syntax: glob # defaults following lines to non-rooted globs
1078 1083 re:pattern # non-rooted regular expression
1079 1084 glob:pattern # non-rooted glob
1080 1085 pattern # pattern of the current default type
1081 1086
1082 1087 if sourceinfo is set, returns a list of tuples:
1083 1088 (pattern, lineno, originalline). This is useful to debug ignore patterns.
1084 1089 '''
1085 1090
1086 1091 syntaxes = {'re': 'relre:', 'regexp': 'relre:', 'glob': 'relglob:',
1087 1092 'include': 'include', 'subinclude': 'subinclude'}
1088 1093 syntax = 'relre:'
1089 1094 patterns = []
1090 1095
1091 1096 fp = open(filepath, 'rb')
1092 1097 for lineno, line in enumerate(util.iterfile(fp), start=1):
1093 1098 if "#" in line:
1094 1099 global _commentre
1095 1100 if not _commentre:
1096 1101 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
1097 1102 # remove comments prefixed by an even number of escapes
1098 1103 m = _commentre.search(line)
1099 1104 if m:
1100 1105 line = line[:m.end(1)]
1101 1106 # fixup properly escaped comments that survived the above
1102 1107 line = line.replace("\\#", "#")
1103 1108 line = line.rstrip()
1104 1109 if not line:
1105 1110 continue
1106 1111
1107 1112 if line.startswith('syntax:'):
1108 1113 s = line[7:].strip()
1109 1114 try:
1110 1115 syntax = syntaxes[s]
1111 1116 except KeyError:
1112 1117 if warn:
1113 1118 warn(_("%s: ignoring invalid syntax '%s'\n") %
1114 1119 (filepath, s))
1115 1120 continue
1116 1121
1117 1122 linesyntax = syntax
1118 1123 for s, rels in syntaxes.iteritems():
1119 1124 if line.startswith(rels):
1120 1125 linesyntax = rels
1121 1126 line = line[len(rels):]
1122 1127 break
1123 1128 elif line.startswith(s+':'):
1124 1129 linesyntax = rels
1125 1130 line = line[len(s) + 1:]
1126 1131 break
1127 1132 if sourceinfo:
1128 1133 patterns.append((linesyntax + line, lineno, line))
1129 1134 else:
1130 1135 patterns.append(linesyntax + line)
1131 1136 fp.close()
1132 1137 return patterns
General Comments 0
You need to be logged in to leave comments. Login now