##// END OF EJS Templates
match: add prefixdirmatcher to adapt subrepo matcher back...
Yuya Nishihara -
r38630:0ba4cf3f default
parent child Browse files
Show More
@@ -1,1061 +1,1133 b''
1 1 # match.py - filename matching
2 2 #
3 3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import, print_function
9 9
10 10 import copy
11 11 import os
12 12 import re
13 13
14 14 from .i18n import _
15 15 from . import (
16 16 encoding,
17 17 error,
18 18 pathutil,
19 19 pycompat,
20 20 util,
21 21 )
22 22 from .utils import (
23 23 stringutil,
24 24 )
25 25
26 26 allpatternkinds = ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
27 27 'listfile', 'listfile0', 'set', 'include', 'subinclude',
28 28 'rootfilesin')
29 29 cwdrelativepatternkinds = ('relpath', 'glob')
30 30
31 31 propertycache = util.propertycache
32 32
33 33 def _rematcher(regex):
34 34 '''compile the regexp with the best available regexp engine and return a
35 35 matcher function'''
36 36 m = util.re.compile(regex)
37 37 try:
38 38 # slightly faster, provided by facebook's re2 bindings
39 39 return m.test_match
40 40 except AttributeError:
41 41 return m.match
42 42
43 43 def _expandsets(kindpats, ctx, listsubrepos):
44 44 '''Returns the kindpats list with the 'set' patterns expanded.'''
45 45 fset = set()
46 46 other = []
47 47
48 48 for kind, pat, source in kindpats:
49 49 if kind == 'set':
50 50 if not ctx:
51 51 raise error.ProgrammingError("fileset expression with no "
52 52 "context")
53 53 s = ctx.getfileset(pat)
54 54 fset.update(s)
55 55
56 56 if listsubrepos:
57 57 for subpath in ctx.substate:
58 58 s = ctx.sub(subpath).getfileset(pat)
59 59 fset.update(subpath + '/' + f for f in s)
60 60
61 61 continue
62 62 other.append((kind, pat, source))
63 63 return fset, other
64 64
65 65 def _expandsubinclude(kindpats, root):
66 66 '''Returns the list of subinclude matcher args and the kindpats without the
67 67 subincludes in it.'''
68 68 relmatchers = []
69 69 other = []
70 70
71 71 for kind, pat, source in kindpats:
72 72 if kind == 'subinclude':
73 73 sourceroot = pathutil.dirname(util.normpath(source))
74 74 pat = util.pconvert(pat)
75 75 path = pathutil.join(sourceroot, pat)
76 76
77 77 newroot = pathutil.dirname(path)
78 78 matcherargs = (newroot, '', [], ['include:%s' % path])
79 79
80 80 prefix = pathutil.canonpath(root, root, newroot)
81 81 if prefix:
82 82 prefix += '/'
83 83 relmatchers.append((prefix, matcherargs))
84 84 else:
85 85 other.append((kind, pat, source))
86 86
87 87 return relmatchers, other
88 88
89 89 def _kindpatsalwaysmatch(kindpats):
90 90 """"Checks whether the kindspats match everything, as e.g.
91 91 'relpath:.' does.
92 92 """
93 93 for kind, pat, source in kindpats:
94 94 if pat != '' or kind not in ['relpath', 'glob']:
95 95 return False
96 96 return True
97 97
98 98 def _buildkindpatsmatcher(matchercls, root, cwd, kindpats, ctx=None,
99 99 listsubrepos=False, badfn=None):
100 100 fset, kindpats = _expandsets(kindpats, ctx, listsubrepos)
101 101 matchers = []
102 102 if kindpats:
103 103 m = matchercls(root, cwd, kindpats, listsubrepos=listsubrepos,
104 104 badfn=badfn)
105 105 matchers.append(m)
106 106 if fset:
107 107 m = predicatematcher(root, cwd, fset.__contains__,
108 108 predrepr='fileset', badfn=badfn)
109 109 matchers.append(m)
110 110 if not matchers:
111 111 return nevermatcher(root, cwd, badfn=badfn)
112 112 if len(matchers) == 1:
113 113 return matchers[0]
114 114 return unionmatcher(matchers)
115 115
116 116 def match(root, cwd, patterns=None, include=None, exclude=None, default='glob',
117 117 exact=False, auditor=None, ctx=None, listsubrepos=False, warn=None,
118 118 badfn=None, icasefs=False):
119 119 """build an object to match a set of file patterns
120 120
121 121 arguments:
122 122 root - the canonical root of the tree you're matching against
123 123 cwd - the current working directory, if relevant
124 124 patterns - patterns to find
125 125 include - patterns to include (unless they are excluded)
126 126 exclude - patterns to exclude (even if they are included)
127 127 default - if a pattern in patterns has no explicit type, assume this one
128 128 exact - patterns are actually filenames (include/exclude still apply)
129 129 warn - optional function used for printing warnings
130 130 badfn - optional bad() callback for this matcher instead of the default
131 131 icasefs - make a matcher for wdir on case insensitive filesystems, which
132 132 normalizes the given patterns to the case in the filesystem
133 133
134 134 a pattern is one of:
135 135 'glob:<glob>' - a glob relative to cwd
136 136 're:<regexp>' - a regular expression
137 137 'path:<path>' - a path relative to repository root, which is matched
138 138 recursively
139 139 'rootfilesin:<path>' - a path relative to repository root, which is
140 140 matched non-recursively (will not match subdirectories)
141 141 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
142 142 'relpath:<path>' - a path relative to cwd
143 143 'relre:<regexp>' - a regexp that needn't match the start of a name
144 144 'set:<fileset>' - a fileset expression
145 145 'include:<path>' - a file of patterns to read and include
146 146 'subinclude:<path>' - a file of patterns to match against files under
147 147 the same directory
148 148 '<something>' - a pattern of the specified default type
149 149 """
150 150 normalize = _donormalize
151 151 if icasefs:
152 152 if exact:
153 153 raise error.ProgrammingError("a case-insensitive exact matcher "
154 154 "doesn't make sense")
155 155 dirstate = ctx.repo().dirstate
156 156 dsnormalize = dirstate.normalize
157 157
158 158 def normalize(patterns, default, root, cwd, auditor, warn):
159 159 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
160 160 kindpats = []
161 161 for kind, pats, source in kp:
162 162 if kind not in ('re', 'relre'): # regex can't be normalized
163 163 p = pats
164 164 pats = dsnormalize(pats)
165 165
166 166 # Preserve the original to handle a case only rename.
167 167 if p != pats and p in dirstate:
168 168 kindpats.append((kind, p, source))
169 169
170 170 kindpats.append((kind, pats, source))
171 171 return kindpats
172 172
173 173 if exact:
174 174 m = exactmatcher(root, cwd, patterns, badfn)
175 175 elif patterns:
176 176 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
177 177 if _kindpatsalwaysmatch(kindpats):
178 178 m = alwaysmatcher(root, cwd, badfn, relativeuipath=True)
179 179 else:
180 180 m = _buildkindpatsmatcher(patternmatcher, root, cwd, kindpats,
181 181 ctx=ctx, listsubrepos=listsubrepos,
182 182 badfn=badfn)
183 183 else:
184 184 # It's a little strange that no patterns means to match everything.
185 185 # Consider changing this to match nothing (probably using nevermatcher).
186 186 m = alwaysmatcher(root, cwd, badfn)
187 187
188 188 if include:
189 189 kindpats = normalize(include, 'glob', root, cwd, auditor, warn)
190 190 im = _buildkindpatsmatcher(includematcher, root, cwd, kindpats, ctx=ctx,
191 191 listsubrepos=listsubrepos, badfn=None)
192 192 m = intersectmatchers(m, im)
193 193 if exclude:
194 194 kindpats = normalize(exclude, 'glob', root, cwd, auditor, warn)
195 195 em = _buildkindpatsmatcher(includematcher, root, cwd, kindpats, ctx=ctx,
196 196 listsubrepos=listsubrepos, badfn=None)
197 197 m = differencematcher(m, em)
198 198 return m
199 199
200 200 def exact(root, cwd, files, badfn=None):
201 201 return exactmatcher(root, cwd, files, badfn=badfn)
202 202
203 203 def always(root, cwd):
204 204 return alwaysmatcher(root, cwd)
205 205
206 206 def never(root, cwd):
207 207 return nevermatcher(root, cwd)
208 208
209 209 def badmatch(match, badfn):
210 210 """Make a copy of the given matcher, replacing its bad method with the given
211 211 one.
212 212 """
213 213 m = copy.copy(match)
214 214 m.bad = badfn
215 215 return m
216 216
217 217 def _donormalize(patterns, default, root, cwd, auditor, warn):
218 218 '''Convert 'kind:pat' from the patterns list to tuples with kind and
219 219 normalized and rooted patterns and with listfiles expanded.'''
220 220 kindpats = []
221 221 for kind, pat in [_patsplit(p, default) for p in patterns]:
222 222 if kind in cwdrelativepatternkinds:
223 223 pat = pathutil.canonpath(root, cwd, pat, auditor)
224 224 elif kind in ('relglob', 'path', 'rootfilesin'):
225 225 pat = util.normpath(pat)
226 226 elif kind in ('listfile', 'listfile0'):
227 227 try:
228 228 files = util.readfile(pat)
229 229 if kind == 'listfile0':
230 230 files = files.split('\0')
231 231 else:
232 232 files = files.splitlines()
233 233 files = [f for f in files if f]
234 234 except EnvironmentError:
235 235 raise error.Abort(_("unable to read file list (%s)") % pat)
236 236 for k, p, source in _donormalize(files, default, root, cwd,
237 237 auditor, warn):
238 238 kindpats.append((k, p, pat))
239 239 continue
240 240 elif kind == 'include':
241 241 try:
242 242 fullpath = os.path.join(root, util.localpath(pat))
243 243 includepats = readpatternfile(fullpath, warn)
244 244 for k, p, source in _donormalize(includepats, default,
245 245 root, cwd, auditor, warn):
246 246 kindpats.append((k, p, source or pat))
247 247 except error.Abort as inst:
248 248 raise error.Abort('%s: %s' % (pat, inst[0]))
249 249 except IOError as inst:
250 250 if warn:
251 251 warn(_("skipping unreadable pattern file '%s': %s\n") %
252 252 (pat, stringutil.forcebytestr(inst.strerror)))
253 253 continue
254 254 # else: re or relre - which cannot be normalized
255 255 kindpats.append((kind, pat, ''))
256 256 return kindpats
257 257
258 258 class basematcher(object):
259 259
260 260 def __init__(self, root, cwd, badfn=None, relativeuipath=True):
261 261 self._root = root
262 262 self._cwd = cwd
263 263 if badfn is not None:
264 264 self.bad = badfn
265 265 self._relativeuipath = relativeuipath
266 266
267 267 def __call__(self, fn):
268 268 return self.matchfn(fn)
269 269 def __iter__(self):
270 270 for f in self._files:
271 271 yield f
272 272 # Callbacks related to how the matcher is used by dirstate.walk.
273 273 # Subscribers to these events must monkeypatch the matcher object.
274 274 def bad(self, f, msg):
275 275 '''Callback from dirstate.walk for each explicit file that can't be
276 276 found/accessed, with an error message.'''
277 277
278 278 # If an explicitdir is set, it will be called when an explicitly listed
279 279 # directory is visited.
280 280 explicitdir = None
281 281
282 282 # If an traversedir is set, it will be called when a directory discovered
283 283 # by recursive traversal is visited.
284 284 traversedir = None
285 285
286 286 def abs(self, f):
287 287 '''Convert a repo path back to path that is relative to the root of the
288 288 matcher.'''
289 289 return f
290 290
291 291 def rel(self, f):
292 292 '''Convert repo path back to path that is relative to cwd of matcher.'''
293 293 return util.pathto(self._root, self._cwd, f)
294 294
295 295 def uipath(self, f):
296 296 '''Convert repo path to a display path. If patterns or -I/-X were used
297 297 to create this matcher, the display path will be relative to cwd.
298 298 Otherwise it is relative to the root of the repo.'''
299 299 return (self._relativeuipath and self.rel(f)) or self.abs(f)
300 300
301 301 @propertycache
302 302 def _files(self):
303 303 return []
304 304
305 305 def files(self):
306 306 '''Explicitly listed files or patterns or roots:
307 307 if no patterns or .always(): empty list,
308 308 if exact: list exact files,
309 309 if not .anypats(): list all files and dirs,
310 310 else: optimal roots'''
311 311 return self._files
312 312
313 313 @propertycache
314 314 def _fileset(self):
315 315 return set(self._files)
316 316
317 317 def exact(self, f):
318 318 '''Returns True if f is in .files().'''
319 319 return f in self._fileset
320 320
321 321 def matchfn(self, f):
322 322 return False
323 323
324 324 def visitdir(self, dir):
325 325 '''Decides whether a directory should be visited based on whether it
326 326 has potential matches in it or one of its subdirectories. This is
327 327 based on the match's primary, included, and excluded patterns.
328 328
329 329 Returns the string 'all' if the given directory and all subdirectories
330 330 should be visited. Otherwise returns True or False indicating whether
331 331 the given directory should be visited.
332 332 '''
333 333 return True
334 334
335 335 def always(self):
336 336 '''Matcher will match everything and .files() will be empty --
337 337 optimization might be possible.'''
338 338 return False
339 339
340 340 def isexact(self):
341 341 '''Matcher will match exactly the list of files in .files() --
342 342 optimization might be possible.'''
343 343 return False
344 344
345 345 def prefix(self):
346 346 '''Matcher will match the paths in .files() recursively --
347 347 optimization might be possible.'''
348 348 return False
349 349
350 350 def anypats(self):
351 351 '''None of .always(), .isexact(), and .prefix() is true --
352 352 optimizations will be difficult.'''
353 353 return not self.always() and not self.isexact() and not self.prefix()
354 354
355 355 class alwaysmatcher(basematcher):
356 356 '''Matches everything.'''
357 357
358 358 def __init__(self, root, cwd, badfn=None, relativeuipath=False):
359 359 super(alwaysmatcher, self).__init__(root, cwd, badfn,
360 360 relativeuipath=relativeuipath)
361 361
362 362 def always(self):
363 363 return True
364 364
365 365 def matchfn(self, f):
366 366 return True
367 367
368 368 def visitdir(self, dir):
369 369 return 'all'
370 370
371 371 def __repr__(self):
372 372 return r'<alwaysmatcher>'
373 373
374 374 class nevermatcher(basematcher):
375 375 '''Matches nothing.'''
376 376
377 377 def __init__(self, root, cwd, badfn=None):
378 378 super(nevermatcher, self).__init__(root, cwd, badfn)
379 379
380 380 # It's a little weird to say that the nevermatcher is an exact matcher
381 381 # or a prefix matcher, but it seems to make sense to let callers take
382 382 # fast paths based on either. There will be no exact matches, nor any
383 383 # prefixes (files() returns []), so fast paths iterating over them should
384 384 # be efficient (and correct).
385 385 def isexact(self):
386 386 return True
387 387
388 388 def prefix(self):
389 389 return True
390 390
391 391 def visitdir(self, dir):
392 392 return False
393 393
394 394 def __repr__(self):
395 395 return r'<nevermatcher>'
396 396
397 397 class predicatematcher(basematcher):
398 398 """A matcher adapter for a simple boolean function"""
399 399
400 400 def __init__(self, root, cwd, predfn, predrepr=None, badfn=None):
401 401 super(predicatematcher, self).__init__(root, cwd, badfn)
402 402 self.matchfn = predfn
403 403 self._predrepr = predrepr
404 404
405 405 @encoding.strmethod
406 406 def __repr__(self):
407 407 s = (stringutil.buildrepr(self._predrepr)
408 408 or pycompat.byterepr(self.matchfn))
409 409 return '<predicatenmatcher pred=%s>' % s
410 410
411 411 class patternmatcher(basematcher):
412 412
413 413 def __init__(self, root, cwd, kindpats, listsubrepos=False, badfn=None):
414 414 super(patternmatcher, self).__init__(root, cwd, badfn)
415 415
416 416 self._files = _explicitfiles(kindpats)
417 417 self._prefix = _prefix(kindpats)
418 418 self._pats, self.matchfn = _buildmatch(kindpats, '$', listsubrepos,
419 419 root)
420 420
421 421 @propertycache
422 422 def _dirs(self):
423 423 return set(util.dirs(self._fileset)) | {'.'}
424 424
425 425 def visitdir(self, dir):
426 426 if self._prefix and dir in self._fileset:
427 427 return 'all'
428 428 return ('.' in self._fileset or
429 429 dir in self._fileset or
430 430 dir in self._dirs or
431 431 any(parentdir in self._fileset
432 432 for parentdir in util.finddirs(dir)))
433 433
434 434 def prefix(self):
435 435 return self._prefix
436 436
437 437 @encoding.strmethod
438 438 def __repr__(self):
439 439 return ('<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats))
440 440
441 441 class includematcher(basematcher):
442 442
443 443 def __init__(self, root, cwd, kindpats, listsubrepos=False, badfn=None):
444 444 super(includematcher, self).__init__(root, cwd, badfn)
445 445
446 446 self._pats, self.matchfn = _buildmatch(kindpats, '(?:/|$)',
447 447 listsubrepos, root)
448 448 self._prefix = _prefix(kindpats)
449 449 roots, dirs = _rootsanddirs(kindpats)
450 450 # roots are directories which are recursively included.
451 451 self._roots = set(roots)
452 452 # dirs are directories which are non-recursively included.
453 453 self._dirs = set(dirs)
454 454
455 455 def visitdir(self, dir):
456 456 if self._prefix and dir in self._roots:
457 457 return 'all'
458 458 return ('.' in self._roots or
459 459 dir in self._roots or
460 460 dir in self._dirs or
461 461 any(parentdir in self._roots
462 462 for parentdir in util.finddirs(dir)))
463 463
464 464 @encoding.strmethod
465 465 def __repr__(self):
466 466 return ('<includematcher includes=%r>' % pycompat.bytestr(self._pats))
467 467
468 468 class exactmatcher(basematcher):
469 469 '''Matches the input files exactly. They are interpreted as paths, not
470 470 patterns (so no kind-prefixes).
471 471 '''
472 472
473 473 def __init__(self, root, cwd, files, badfn=None):
474 474 super(exactmatcher, self).__init__(root, cwd, badfn)
475 475
476 476 if isinstance(files, list):
477 477 self._files = files
478 478 else:
479 479 self._files = list(files)
480 480
481 481 matchfn = basematcher.exact
482 482
483 483 @propertycache
484 484 def _dirs(self):
485 485 return set(util.dirs(self._fileset)) | {'.'}
486 486
487 487 def visitdir(self, dir):
488 488 return dir in self._dirs
489 489
490 490 def isexact(self):
491 491 return True
492 492
493 493 @encoding.strmethod
494 494 def __repr__(self):
495 495 return ('<exactmatcher files=%r>' % self._files)
496 496
497 497 class differencematcher(basematcher):
498 498 '''Composes two matchers by matching if the first matches and the second
499 499 does not.
500 500
501 501 The second matcher's non-matching-attributes (root, cwd, bad, explicitdir,
502 502 traversedir) are ignored.
503 503 '''
504 504 def __init__(self, m1, m2):
505 505 super(differencematcher, self).__init__(m1._root, m1._cwd)
506 506 self._m1 = m1
507 507 self._m2 = m2
508 508 self.bad = m1.bad
509 509 self.explicitdir = m1.explicitdir
510 510 self.traversedir = m1.traversedir
511 511
512 512 def matchfn(self, f):
513 513 return self._m1(f) and not self._m2(f)
514 514
515 515 @propertycache
516 516 def _files(self):
517 517 if self.isexact():
518 518 return [f for f in self._m1.files() if self(f)]
519 519 # If m1 is not an exact matcher, we can't easily figure out the set of
520 520 # files, because its files() are not always files. For example, if
521 521 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
522 522 # want to remove "dir" from the set even though it would match m2,
523 523 # because the "dir" in m1 may not be a file.
524 524 return self._m1.files()
525 525
526 526 def visitdir(self, dir):
527 527 if self._m2.visitdir(dir) == 'all':
528 528 return False
529 529 return bool(self._m1.visitdir(dir))
530 530
531 531 def isexact(self):
532 532 return self._m1.isexact()
533 533
534 534 @encoding.strmethod
535 535 def __repr__(self):
536 536 return ('<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2))
537 537
538 538 def intersectmatchers(m1, m2):
539 539 '''Composes two matchers by matching if both of them match.
540 540
541 541 The second matcher's non-matching-attributes (root, cwd, bad, explicitdir,
542 542 traversedir) are ignored.
543 543 '''
544 544 if m1 is None or m2 is None:
545 545 return m1 or m2
546 546 if m1.always():
547 547 m = copy.copy(m2)
548 548 # TODO: Consider encapsulating these things in a class so there's only
549 549 # one thing to copy from m1.
550 550 m.bad = m1.bad
551 551 m.explicitdir = m1.explicitdir
552 552 m.traversedir = m1.traversedir
553 553 m.abs = m1.abs
554 554 m.rel = m1.rel
555 555 m._relativeuipath |= m1._relativeuipath
556 556 return m
557 557 if m2.always():
558 558 m = copy.copy(m1)
559 559 m._relativeuipath |= m2._relativeuipath
560 560 return m
561 561 return intersectionmatcher(m1, m2)
562 562
563 563 class intersectionmatcher(basematcher):
564 564 def __init__(self, m1, m2):
565 565 super(intersectionmatcher, self).__init__(m1._root, m1._cwd)
566 566 self._m1 = m1
567 567 self._m2 = m2
568 568 self.bad = m1.bad
569 569 self.explicitdir = m1.explicitdir
570 570 self.traversedir = m1.traversedir
571 571
572 572 @propertycache
573 573 def _files(self):
574 574 if self.isexact():
575 575 m1, m2 = self._m1, self._m2
576 576 if not m1.isexact():
577 577 m1, m2 = m2, m1
578 578 return [f for f in m1.files() if m2(f)]
579 579 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
580 580 # the set of files, because their files() are not always files. For
581 581 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
582 582 # "path:dir2", we don't want to remove "dir2" from the set.
583 583 return self._m1.files() + self._m2.files()
584 584
585 585 def matchfn(self, f):
586 586 return self._m1(f) and self._m2(f)
587 587
588 588 def visitdir(self, dir):
589 589 visit1 = self._m1.visitdir(dir)
590 590 if visit1 == 'all':
591 591 return self._m2.visitdir(dir)
592 592 # bool() because visit1=True + visit2='all' should not be 'all'
593 593 return bool(visit1 and self._m2.visitdir(dir))
594 594
595 595 def always(self):
596 596 return self._m1.always() and self._m2.always()
597 597
598 598 def isexact(self):
599 599 return self._m1.isexact() or self._m2.isexact()
600 600
601 601 @encoding.strmethod
602 602 def __repr__(self):
603 603 return ('<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2))
604 604
605 605 class subdirmatcher(basematcher):
606 606 """Adapt a matcher to work on a subdirectory only.
607 607
608 608 The paths are remapped to remove/insert the path as needed:
609 609
610 610 >>> from . import pycompat
611 611 >>> m1 = match(b'root', b'', [b'a.txt', b'sub/b.txt'])
612 612 >>> m2 = subdirmatcher(b'sub', m1)
613 613 >>> bool(m2(b'a.txt'))
614 614 False
615 615 >>> bool(m2(b'b.txt'))
616 616 True
617 617 >>> bool(m2.matchfn(b'a.txt'))
618 618 False
619 619 >>> bool(m2.matchfn(b'b.txt'))
620 620 True
621 621 >>> m2.files()
622 622 ['b.txt']
623 623 >>> m2.exact(b'b.txt')
624 624 True
625 625 >>> util.pconvert(m2.rel(b'b.txt'))
626 626 'sub/b.txt'
627 627 >>> def bad(f, msg):
628 628 ... print(pycompat.sysstr(b"%s: %s" % (f, msg)))
629 629 >>> m1.bad = bad
630 630 >>> m2.bad(b'x.txt', b'No such file')
631 631 sub/x.txt: No such file
632 632 >>> m2.abs(b'c.txt')
633 633 'sub/c.txt'
634 634 """
635 635
636 636 def __init__(self, path, matcher):
637 637 super(subdirmatcher, self).__init__(matcher._root, matcher._cwd)
638 638 self._path = path
639 639 self._matcher = matcher
640 640 self._always = matcher.always()
641 641
642 642 self._files = [f[len(path) + 1:] for f in matcher._files
643 643 if f.startswith(path + "/")]
644 644
645 645 # If the parent repo had a path to this subrepo and the matcher is
646 646 # a prefix matcher, this submatcher always matches.
647 647 if matcher.prefix():
648 648 self._always = any(f == path for f in matcher._files)
649 649
650 650 def bad(self, f, msg):
651 651 self._matcher.bad(self._path + "/" + f, msg)
652 652
653 653 def abs(self, f):
654 654 return self._matcher.abs(self._path + "/" + f)
655 655
656 656 def rel(self, f):
657 657 return self._matcher.rel(self._path + "/" + f)
658 658
659 659 def uipath(self, f):
660 660 return self._matcher.uipath(self._path + "/" + f)
661 661
662 662 def matchfn(self, f):
663 663 # Some information is lost in the superclass's constructor, so we
664 664 # can not accurately create the matching function for the subdirectory
665 665 # from the inputs. Instead, we override matchfn() and visitdir() to
666 666 # call the original matcher with the subdirectory path prepended.
667 667 return self._matcher.matchfn(self._path + "/" + f)
668 668
669 669 def visitdir(self, dir):
670 670 if dir == '.':
671 671 dir = self._path
672 672 else:
673 673 dir = self._path + "/" + dir
674 674 return self._matcher.visitdir(dir)
675 675
676 676 def always(self):
677 677 return self._always
678 678
679 679 def prefix(self):
680 680 return self._matcher.prefix() and not self._always
681 681
682 682 @encoding.strmethod
683 683 def __repr__(self):
684 684 return ('<subdirmatcher path=%r, matcher=%r>' %
685 685 (self._path, self._matcher))
686 686
687 class prefixdirmatcher(basematcher):
688 """Adapt a matcher to work on a parent directory.
689
690 The matcher's non-matching-attributes (root, cwd, bad, explicitdir,
691 traversedir) are ignored.
692
693 The prefix path should usually be the relative path from the root of
694 this matcher to the root of the wrapped matcher.
695
696 >>> m1 = match(b'root/d/e', b'f', [b'../a.txt', b'b.txt'])
697 >>> m2 = prefixdirmatcher(b'root', b'd/e/f', b'd/e', m1)
698 >>> bool(m2(b'a.txt'),)
699 False
700 >>> bool(m2(b'd/e/a.txt'))
701 True
702 >>> bool(m2(b'd/e/b.txt'))
703 False
704 >>> m2.files()
705 ['d/e/a.txt', 'd/e/f/b.txt']
706 >>> m2.exact(b'd/e/a.txt')
707 True
708 >>> m2.visitdir(b'd')
709 True
710 >>> m2.visitdir(b'd/e')
711 True
712 >>> m2.visitdir(b'd/e/f')
713 True
714 >>> m2.visitdir(b'd/e/g')
715 False
716 >>> m2.visitdir(b'd/ef')
717 False
718 """
719
720 def __init__(self, root, cwd, path, matcher, badfn=None):
721 super(prefixdirmatcher, self).__init__(root, cwd, badfn)
722 if not path:
723 raise error.ProgrammingError('prefix path must not be empty')
724 self._path = path
725 self._pathprefix = path + '/'
726 self._matcher = matcher
727
728 @propertycache
729 def _files(self):
730 return [self._pathprefix + f for f in self._matcher._files]
731
732 def matchfn(self, f):
733 if not f.startswith(self._pathprefix):
734 return False
735 return self._matcher.matchfn(f[len(self._pathprefix):])
736
737 @propertycache
738 def _pathdirs(self):
739 return set(util.finddirs(self._path)) | {'.'}
740
741 def visitdir(self, dir):
742 if dir == self._path:
743 return self._matcher.visitdir('.')
744 if dir.startswith(self._pathprefix):
745 return self._matcher.visitdir(dir[len(self._pathprefix):])
746 return dir in self._pathdirs
747
748 def isexact(self):
749 return self._matcher.isexact()
750
751 def prefix(self):
752 return self._matcher.prefix()
753
754 @encoding.strmethod
755 def __repr__(self):
756 return ('<prefixdirmatcher path=%r, matcher=%r>'
757 % (pycompat.bytestr(self._path), self._matcher))
758
687 759 class unionmatcher(basematcher):
688 760 """A matcher that is the union of several matchers.
689 761
690 762 The non-matching-attributes (root, cwd, bad, explicitdir, traversedir) are
691 763 taken from the first matcher.
692 764 """
693 765
694 766 def __init__(self, matchers):
695 767 m1 = matchers[0]
696 768 super(unionmatcher, self).__init__(m1._root, m1._cwd)
697 769 self.explicitdir = m1.explicitdir
698 770 self.traversedir = m1.traversedir
699 771 self._matchers = matchers
700 772
701 773 def matchfn(self, f):
702 774 for match in self._matchers:
703 775 if match(f):
704 776 return True
705 777 return False
706 778
707 779 def visitdir(self, dir):
708 780 r = False
709 781 for m in self._matchers:
710 782 v = m.visitdir(dir)
711 783 if v == 'all':
712 784 return v
713 785 r |= v
714 786 return r
715 787
716 788 @encoding.strmethod
717 789 def __repr__(self):
718 790 return ('<unionmatcher matchers=%r>' % self._matchers)
719 791
720 792 def patkind(pattern, default=None):
721 793 '''If pattern is 'kind:pat' with a known kind, return kind.'''
722 794 return _patsplit(pattern, default)[0]
723 795
724 796 def _patsplit(pattern, default):
725 797 """Split a string into the optional pattern kind prefix and the actual
726 798 pattern."""
727 799 if ':' in pattern:
728 800 kind, pat = pattern.split(':', 1)
729 801 if kind in allpatternkinds:
730 802 return kind, pat
731 803 return default, pattern
732 804
733 805 def _globre(pat):
734 806 r'''Convert an extended glob string to a regexp string.
735 807
736 808 >>> from . import pycompat
737 809 >>> def bprint(s):
738 810 ... print(pycompat.sysstr(s))
739 811 >>> bprint(_globre(br'?'))
740 812 .
741 813 >>> bprint(_globre(br'*'))
742 814 [^/]*
743 815 >>> bprint(_globre(br'**'))
744 816 .*
745 817 >>> bprint(_globre(br'**/a'))
746 818 (?:.*/)?a
747 819 >>> bprint(_globre(br'a/**/b'))
748 820 a/(?:.*/)?b
749 821 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
750 822 [a*?!^][\^b][^c]
751 823 >>> bprint(_globre(br'{a,b}'))
752 824 (?:a|b)
753 825 >>> bprint(_globre(br'.\*\?'))
754 826 \.\*\?
755 827 '''
756 828 i, n = 0, len(pat)
757 829 res = ''
758 830 group = 0
759 831 escape = util.stringutil.reescape
760 832 def peek():
761 833 return i < n and pat[i:i + 1]
762 834 while i < n:
763 835 c = pat[i:i + 1]
764 836 i += 1
765 837 if c not in '*?[{},\\':
766 838 res += escape(c)
767 839 elif c == '*':
768 840 if peek() == '*':
769 841 i += 1
770 842 if peek() == '/':
771 843 i += 1
772 844 res += '(?:.*/)?'
773 845 else:
774 846 res += '.*'
775 847 else:
776 848 res += '[^/]*'
777 849 elif c == '?':
778 850 res += '.'
779 851 elif c == '[':
780 852 j = i
781 853 if j < n and pat[j:j + 1] in '!]':
782 854 j += 1
783 855 while j < n and pat[j:j + 1] != ']':
784 856 j += 1
785 857 if j >= n:
786 858 res += '\\['
787 859 else:
788 860 stuff = pat[i:j].replace('\\','\\\\')
789 861 i = j + 1
790 862 if stuff[0:1] == '!':
791 863 stuff = '^' + stuff[1:]
792 864 elif stuff[0:1] == '^':
793 865 stuff = '\\' + stuff
794 866 res = '%s[%s]' % (res, stuff)
795 867 elif c == '{':
796 868 group += 1
797 869 res += '(?:'
798 870 elif c == '}' and group:
799 871 res += ')'
800 872 group -= 1
801 873 elif c == ',' and group:
802 874 res += '|'
803 875 elif c == '\\':
804 876 p = peek()
805 877 if p:
806 878 i += 1
807 879 res += escape(p)
808 880 else:
809 881 res += escape(c)
810 882 else:
811 883 res += escape(c)
812 884 return res
813 885
814 886 def _regex(kind, pat, globsuffix):
815 887 '''Convert a (normalized) pattern of any kind into a regular expression.
816 888 globsuffix is appended to the regexp of globs.'''
817 889 if not pat:
818 890 return ''
819 891 if kind == 're':
820 892 return pat
821 893 if kind in ('path', 'relpath'):
822 894 if pat == '.':
823 895 return ''
824 896 return util.stringutil.reescape(pat) + '(?:/|$)'
825 897 if kind == 'rootfilesin':
826 898 if pat == '.':
827 899 escaped = ''
828 900 else:
829 901 # Pattern is a directory name.
830 902 escaped = util.stringutil.reescape(pat) + '/'
831 903 # Anything after the pattern must be a non-directory.
832 904 return escaped + '[^/]+$'
833 905 if kind == 'relglob':
834 906 return '(?:|.*/)' + _globre(pat) + globsuffix
835 907 if kind == 'relre':
836 908 if pat.startswith('^'):
837 909 return pat
838 910 return '.*' + pat
839 911 if kind == 'glob':
840 912 return _globre(pat) + globsuffix
841 913 raise error.ProgrammingError('not a regex pattern: %s:%s' % (kind, pat))
842 914
843 915 def _buildmatch(kindpats, globsuffix, listsubrepos, root):
844 916 '''Return regexp string and a matcher function for kindpats.
845 917 globsuffix is appended to the regexp of globs.'''
846 918 matchfuncs = []
847 919
848 920 subincludes, kindpats = _expandsubinclude(kindpats, root)
849 921 if subincludes:
850 922 submatchers = {}
851 923 def matchsubinclude(f):
852 924 for prefix, matcherargs in subincludes:
853 925 if f.startswith(prefix):
854 926 mf = submatchers.get(prefix)
855 927 if mf is None:
856 928 mf = match(*matcherargs)
857 929 submatchers[prefix] = mf
858 930
859 931 if mf(f[len(prefix):]):
860 932 return True
861 933 return False
862 934 matchfuncs.append(matchsubinclude)
863 935
864 936 regex = ''
865 937 if kindpats:
866 938 regex, mf = _buildregexmatch(kindpats, globsuffix)
867 939 matchfuncs.append(mf)
868 940
869 941 if len(matchfuncs) == 1:
870 942 return regex, matchfuncs[0]
871 943 else:
872 944 return regex, lambda f: any(mf(f) for mf in matchfuncs)
873 945
874 946 def _buildregexmatch(kindpats, globsuffix):
875 947 """Build a match function from a list of kinds and kindpats,
876 948 return regexp string and a matcher function."""
877 949 try:
878 950 regex = '(?:%s)' % '|'.join([_regex(k, p, globsuffix)
879 951 for (k, p, s) in kindpats])
880 952 if len(regex) > 20000:
881 953 raise OverflowError
882 954 return regex, _rematcher(regex)
883 955 except OverflowError:
884 956 # We're using a Python with a tiny regex engine and we
885 957 # made it explode, so we'll divide the pattern list in two
886 958 # until it works
887 959 l = len(kindpats)
888 960 if l < 2:
889 961 raise
890 962 regexa, a = _buildregexmatch(kindpats[:l//2], globsuffix)
891 963 regexb, b = _buildregexmatch(kindpats[l//2:], globsuffix)
892 964 return regex, lambda s: a(s) or b(s)
893 965 except re.error:
894 966 for k, p, s in kindpats:
895 967 try:
896 968 _rematcher('(?:%s)' % _regex(k, p, globsuffix))
897 969 except re.error:
898 970 if s:
899 971 raise error.Abort(_("%s: invalid pattern (%s): %s") %
900 972 (s, k, p))
901 973 else:
902 974 raise error.Abort(_("invalid pattern (%s): %s") % (k, p))
903 975 raise error.Abort(_("invalid pattern"))
904 976
905 977 def _patternrootsanddirs(kindpats):
906 978 '''Returns roots and directories corresponding to each pattern.
907 979
908 980 This calculates the roots and directories exactly matching the patterns and
909 981 returns a tuple of (roots, dirs) for each. It does not return other
910 982 directories which may also need to be considered, like the parent
911 983 directories.
912 984 '''
913 985 r = []
914 986 d = []
915 987 for kind, pat, source in kindpats:
916 988 if kind == 'glob': # find the non-glob prefix
917 989 root = []
918 990 for p in pat.split('/'):
919 991 if '[' in p or '{' in p or '*' in p or '?' in p:
920 992 break
921 993 root.append(p)
922 994 r.append('/'.join(root) or '.')
923 995 elif kind in ('relpath', 'path'):
924 996 r.append(pat or '.')
925 997 elif kind in ('rootfilesin',):
926 998 d.append(pat or '.')
927 999 else: # relglob, re, relre
928 1000 r.append('.')
929 1001 return r, d
930 1002
931 1003 def _roots(kindpats):
932 1004 '''Returns root directories to match recursively from the given patterns.'''
933 1005 roots, dirs = _patternrootsanddirs(kindpats)
934 1006 return roots
935 1007
936 1008 def _rootsanddirs(kindpats):
937 1009 '''Returns roots and exact directories from patterns.
938 1010
939 1011 roots are directories to match recursively, whereas exact directories should
940 1012 be matched non-recursively. The returned (roots, dirs) tuple will also
941 1013 include directories that need to be implicitly considered as either, such as
942 1014 parent directories.
943 1015
944 1016 >>> _rootsanddirs(
945 1017 ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
946 1018 ... (b'glob', b'g*', b'')])
947 1019 (['g/h', 'g/h', '.'], ['g', '.'])
948 1020 >>> _rootsanddirs(
949 1021 ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
950 1022 ([], ['g/h', '.', 'g', '.'])
951 1023 >>> _rootsanddirs(
952 1024 ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
953 1025 ... (b'path', b'', b'')])
954 1026 (['r', 'p/p', '.'], ['p', '.'])
955 1027 >>> _rootsanddirs(
956 1028 ... [(b'relglob', b'rg*', b''), (b're', b're/', b''),
957 1029 ... (b'relre', b'rr', b'')])
958 1030 (['.', '.', '.'], ['.'])
959 1031 '''
960 1032 r, d = _patternrootsanddirs(kindpats)
961 1033
962 1034 # Append the parents as non-recursive/exact directories, since they must be
963 1035 # scanned to get to either the roots or the other exact directories.
964 1036 d.extend(util.dirs(d))
965 1037 d.extend(util.dirs(r))
966 1038 # util.dirs() does not include the root directory, so add it manually
967 1039 d.append('.')
968 1040
969 1041 return r, d
970 1042
971 1043 def _explicitfiles(kindpats):
972 1044 '''Returns the potential explicit filenames from the patterns.
973 1045
974 1046 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
975 1047 ['foo/bar']
976 1048 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
977 1049 []
978 1050 '''
979 1051 # Keep only the pattern kinds where one can specify filenames (vs only
980 1052 # directory names).
981 1053 filable = [kp for kp in kindpats if kp[0] not in ('rootfilesin',)]
982 1054 return _roots(filable)
983 1055
984 1056 def _prefix(kindpats):
985 1057 '''Whether all the patterns match a prefix (i.e. recursively)'''
986 1058 for kind, pat, source in kindpats:
987 1059 if kind not in ('path', 'relpath'):
988 1060 return False
989 1061 return True
990 1062
991 1063 _commentre = None
992 1064
993 1065 def readpatternfile(filepath, warn, sourceinfo=False):
994 1066 '''parse a pattern file, returning a list of
995 1067 patterns. These patterns should be given to compile()
996 1068 to be validated and converted into a match function.
997 1069
998 1070 trailing white space is dropped.
999 1071 the escape character is backslash.
1000 1072 comments start with #.
1001 1073 empty lines are skipped.
1002 1074
1003 1075 lines can be of the following formats:
1004 1076
1005 1077 syntax: regexp # defaults following lines to non-rooted regexps
1006 1078 syntax: glob # defaults following lines to non-rooted globs
1007 1079 re:pattern # non-rooted regular expression
1008 1080 glob:pattern # non-rooted glob
1009 1081 pattern # pattern of the current default type
1010 1082
1011 1083 if sourceinfo is set, returns a list of tuples:
1012 1084 (pattern, lineno, originalline). This is useful to debug ignore patterns.
1013 1085 '''
1014 1086
1015 1087 syntaxes = {'re': 'relre:', 'regexp': 'relre:', 'glob': 'relglob:',
1016 1088 'include': 'include', 'subinclude': 'subinclude'}
1017 1089 syntax = 'relre:'
1018 1090 patterns = []
1019 1091
1020 1092 fp = open(filepath, 'rb')
1021 1093 for lineno, line in enumerate(util.iterfile(fp), start=1):
1022 1094 if "#" in line:
1023 1095 global _commentre
1024 1096 if not _commentre:
1025 1097 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
1026 1098 # remove comments prefixed by an even number of escapes
1027 1099 m = _commentre.search(line)
1028 1100 if m:
1029 1101 line = line[:m.end(1)]
1030 1102 # fixup properly escaped comments that survived the above
1031 1103 line = line.replace("\\#", "#")
1032 1104 line = line.rstrip()
1033 1105 if not line:
1034 1106 continue
1035 1107
1036 1108 if line.startswith('syntax:'):
1037 1109 s = line[7:].strip()
1038 1110 try:
1039 1111 syntax = syntaxes[s]
1040 1112 except KeyError:
1041 1113 if warn:
1042 1114 warn(_("%s: ignoring invalid syntax '%s'\n") %
1043 1115 (filepath, s))
1044 1116 continue
1045 1117
1046 1118 linesyntax = syntax
1047 1119 for s, rels in syntaxes.iteritems():
1048 1120 if line.startswith(rels):
1049 1121 linesyntax = rels
1050 1122 line = line[len(rels):]
1051 1123 break
1052 1124 elif line.startswith(s+':'):
1053 1125 linesyntax = rels
1054 1126 line = line[len(s) + 1:]
1055 1127 break
1056 1128 if sourceinfo:
1057 1129 patterns.append((linesyntax + line, lineno, line))
1058 1130 else:
1059 1131 patterns.append(linesyntax + line)
1060 1132 fp.close()
1061 1133 return patterns
General Comments 0
You need to be logged in to leave comments. Login now