##// END OF EJS Templates
match: some minimal pycompat fixes guided by test-hgignore.t...
Augie Fackler -
r36590:9adfa487 default
parent child Browse files
Show More
@@ -1,1028 +1,1029
1 1 # match.py - filename matching
2 2 #
3 3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import, print_function
9 9
10 10 import copy
11 11 import os
12 12 import re
13 13
14 14 from .i18n import _
15 15 from . import (
16 16 encoding,
17 17 error,
18 18 pathutil,
19 pycompat,
19 20 util,
20 21 )
21 22
22 23 allpatternkinds = ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
23 24 'listfile', 'listfile0', 'set', 'include', 'subinclude',
24 25 'rootfilesin')
25 26 cwdrelativepatternkinds = ('relpath', 'glob')
26 27
27 28 propertycache = util.propertycache
28 29
29 30 def _rematcher(regex):
30 31 '''compile the regexp with the best available regexp engine and return a
31 32 matcher function'''
32 33 m = util.re.compile(regex)
33 34 try:
34 35 # slightly faster, provided by facebook's re2 bindings
35 36 return m.test_match
36 37 except AttributeError:
37 38 return m.match
38 39
39 40 def _expandsets(kindpats, ctx, listsubrepos):
40 41 '''Returns the kindpats list with the 'set' patterns expanded.'''
41 42 fset = set()
42 43 other = []
43 44
44 45 for kind, pat, source in kindpats:
45 46 if kind == 'set':
46 47 if not ctx:
47 48 raise error.ProgrammingError("fileset expression with no "
48 49 "context")
49 50 s = ctx.getfileset(pat)
50 51 fset.update(s)
51 52
52 53 if listsubrepos:
53 54 for subpath in ctx.substate:
54 55 s = ctx.sub(subpath).getfileset(pat)
55 56 fset.update(subpath + '/' + f for f in s)
56 57
57 58 continue
58 59 other.append((kind, pat, source))
59 60 return fset, other
60 61
61 62 def _expandsubinclude(kindpats, root):
62 63 '''Returns the list of subinclude matcher args and the kindpats without the
63 64 subincludes in it.'''
64 65 relmatchers = []
65 66 other = []
66 67
67 68 for kind, pat, source in kindpats:
68 69 if kind == 'subinclude':
69 70 sourceroot = pathutil.dirname(util.normpath(source))
70 71 pat = util.pconvert(pat)
71 72 path = pathutil.join(sourceroot, pat)
72 73
73 74 newroot = pathutil.dirname(path)
74 75 matcherargs = (newroot, '', [], ['include:%s' % path])
75 76
76 77 prefix = pathutil.canonpath(root, root, newroot)
77 78 if prefix:
78 79 prefix += '/'
79 80 relmatchers.append((prefix, matcherargs))
80 81 else:
81 82 other.append((kind, pat, source))
82 83
83 84 return relmatchers, other
84 85
85 86 def _kindpatsalwaysmatch(kindpats):
86 87 """"Checks whether the kindspats match everything, as e.g.
87 88 'relpath:.' does.
88 89 """
89 90 for kind, pat, source in kindpats:
90 91 if pat != '' or kind not in ['relpath', 'glob']:
91 92 return False
92 93 return True
93 94
94 95 def match(root, cwd, patterns=None, include=None, exclude=None, default='glob',
95 96 exact=False, auditor=None, ctx=None, listsubrepos=False, warn=None,
96 97 badfn=None, icasefs=False):
97 98 """build an object to match a set of file patterns
98 99
99 100 arguments:
100 101 root - the canonical root of the tree you're matching against
101 102 cwd - the current working directory, if relevant
102 103 patterns - patterns to find
103 104 include - patterns to include (unless they are excluded)
104 105 exclude - patterns to exclude (even if they are included)
105 106 default - if a pattern in patterns has no explicit type, assume this one
106 107 exact - patterns are actually filenames (include/exclude still apply)
107 108 warn - optional function used for printing warnings
108 109 badfn - optional bad() callback for this matcher instead of the default
109 110 icasefs - make a matcher for wdir on case insensitive filesystems, which
110 111 normalizes the given patterns to the case in the filesystem
111 112
112 113 a pattern is one of:
113 114 'glob:<glob>' - a glob relative to cwd
114 115 're:<regexp>' - a regular expression
115 116 'path:<path>' - a path relative to repository root, which is matched
116 117 recursively
117 118 'rootfilesin:<path>' - a path relative to repository root, which is
118 119 matched non-recursively (will not match subdirectories)
119 120 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
120 121 'relpath:<path>' - a path relative to cwd
121 122 'relre:<regexp>' - a regexp that needn't match the start of a name
122 123 'set:<fileset>' - a fileset expression
123 124 'include:<path>' - a file of patterns to read and include
124 125 'subinclude:<path>' - a file of patterns to match against files under
125 126 the same directory
126 127 '<something>' - a pattern of the specified default type
127 128 """
128 129 normalize = _donormalize
129 130 if icasefs:
130 131 if exact:
131 132 raise error.ProgrammingError("a case-insensitive exact matcher "
132 133 "doesn't make sense")
133 134 dirstate = ctx.repo().dirstate
134 135 dsnormalize = dirstate.normalize
135 136
136 137 def normalize(patterns, default, root, cwd, auditor, warn):
137 138 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
138 139 kindpats = []
139 140 for kind, pats, source in kp:
140 141 if kind not in ('re', 'relre'): # regex can't be normalized
141 142 p = pats
142 143 pats = dsnormalize(pats)
143 144
144 145 # Preserve the original to handle a case only rename.
145 146 if p != pats and p in dirstate:
146 147 kindpats.append((kind, p, source))
147 148
148 149 kindpats.append((kind, pats, source))
149 150 return kindpats
150 151
151 152 if exact:
152 153 m = exactmatcher(root, cwd, patterns, badfn)
153 154 elif patterns:
154 155 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
155 156 if _kindpatsalwaysmatch(kindpats):
156 157 m = alwaysmatcher(root, cwd, badfn, relativeuipath=True)
157 158 else:
158 159 m = patternmatcher(root, cwd, kindpats, ctx=ctx,
159 160 listsubrepos=listsubrepos, badfn=badfn)
160 161 else:
161 162 # It's a little strange that no patterns means to match everything.
162 163 # Consider changing this to match nothing (probably using nevermatcher).
163 164 m = alwaysmatcher(root, cwd, badfn)
164 165
165 166 if include:
166 167 kindpats = normalize(include, 'glob', root, cwd, auditor, warn)
167 168 im = includematcher(root, cwd, kindpats, ctx=ctx,
168 169 listsubrepos=listsubrepos, badfn=None)
169 170 m = intersectmatchers(m, im)
170 171 if exclude:
171 172 kindpats = normalize(exclude, 'glob', root, cwd, auditor, warn)
172 173 em = includematcher(root, cwd, kindpats, ctx=ctx,
173 174 listsubrepos=listsubrepos, badfn=None)
174 175 m = differencematcher(m, em)
175 176 return m
176 177
177 178 def exact(root, cwd, files, badfn=None):
178 179 return exactmatcher(root, cwd, files, badfn=badfn)
179 180
180 181 def always(root, cwd):
181 182 return alwaysmatcher(root, cwd)
182 183
183 184 def never(root, cwd):
184 185 return nevermatcher(root, cwd)
185 186
186 187 def badmatch(match, badfn):
187 188 """Make a copy of the given matcher, replacing its bad method with the given
188 189 one.
189 190 """
190 191 m = copy.copy(match)
191 192 m.bad = badfn
192 193 return m
193 194
194 195 def _donormalize(patterns, default, root, cwd, auditor, warn):
195 196 '''Convert 'kind:pat' from the patterns list to tuples with kind and
196 197 normalized and rooted patterns and with listfiles expanded.'''
197 198 kindpats = []
198 199 for kind, pat in [_patsplit(p, default) for p in patterns]:
199 200 if kind in cwdrelativepatternkinds:
200 201 pat = pathutil.canonpath(root, cwd, pat, auditor)
201 202 elif kind in ('relglob', 'path', 'rootfilesin'):
202 203 pat = util.normpath(pat)
203 204 elif kind in ('listfile', 'listfile0'):
204 205 try:
205 206 files = util.readfile(pat)
206 207 if kind == 'listfile0':
207 208 files = files.split('\0')
208 209 else:
209 210 files = files.splitlines()
210 211 files = [f for f in files if f]
211 212 except EnvironmentError:
212 213 raise error.Abort(_("unable to read file list (%s)") % pat)
213 214 for k, p, source in _donormalize(files, default, root, cwd,
214 215 auditor, warn):
215 216 kindpats.append((k, p, pat))
216 217 continue
217 218 elif kind == 'include':
218 219 try:
219 220 fullpath = os.path.join(root, util.localpath(pat))
220 221 includepats = readpatternfile(fullpath, warn)
221 222 for k, p, source in _donormalize(includepats, default,
222 223 root, cwd, auditor, warn):
223 224 kindpats.append((k, p, source or pat))
224 225 except error.Abort as inst:
225 226 raise error.Abort('%s: %s' % (pat, inst[0]))
226 227 except IOError as inst:
227 228 if warn:
228 229 warn(_("skipping unreadable pattern file '%s': %s\n") %
229 (pat, inst.strerror))
230 (pat, util.forcebytestr(inst.strerror)))
230 231 continue
231 232 # else: re or relre - which cannot be normalized
232 233 kindpats.append((kind, pat, ''))
233 234 return kindpats
234 235
235 236 class basematcher(object):
236 237
237 238 def __init__(self, root, cwd, badfn=None, relativeuipath=True):
238 239 self._root = root
239 240 self._cwd = cwd
240 241 if badfn is not None:
241 242 self.bad = badfn
242 243 self._relativeuipath = relativeuipath
243 244
244 245 def __call__(self, fn):
245 246 return self.matchfn(fn)
246 247 def __iter__(self):
247 248 for f in self._files:
248 249 yield f
249 250 # Callbacks related to how the matcher is used by dirstate.walk.
250 251 # Subscribers to these events must monkeypatch the matcher object.
251 252 def bad(self, f, msg):
252 253 '''Callback from dirstate.walk for each explicit file that can't be
253 254 found/accessed, with an error message.'''
254 255
255 256 # If an explicitdir is set, it will be called when an explicitly listed
256 257 # directory is visited.
257 258 explicitdir = None
258 259
259 260 # If an traversedir is set, it will be called when a directory discovered
260 261 # by recursive traversal is visited.
261 262 traversedir = None
262 263
263 264 def abs(self, f):
264 265 '''Convert a repo path back to path that is relative to the root of the
265 266 matcher.'''
266 267 return f
267 268
268 269 def rel(self, f):
269 270 '''Convert repo path back to path that is relative to cwd of matcher.'''
270 271 return util.pathto(self._root, self._cwd, f)
271 272
272 273 def uipath(self, f):
273 274 '''Convert repo path to a display path. If patterns or -I/-X were used
274 275 to create this matcher, the display path will be relative to cwd.
275 276 Otherwise it is relative to the root of the repo.'''
276 277 return (self._relativeuipath and self.rel(f)) or self.abs(f)
277 278
278 279 @propertycache
279 280 def _files(self):
280 281 return []
281 282
282 283 def files(self):
283 284 '''Explicitly listed files or patterns or roots:
284 285 if no patterns or .always(): empty list,
285 286 if exact: list exact files,
286 287 if not .anypats(): list all files and dirs,
287 288 else: optimal roots'''
288 289 return self._files
289 290
290 291 @propertycache
291 292 def _fileset(self):
292 293 return set(self._files)
293 294
294 295 def exact(self, f):
295 296 '''Returns True if f is in .files().'''
296 297 return f in self._fileset
297 298
298 299 def matchfn(self, f):
299 300 return False
300 301
301 302 def visitdir(self, dir):
302 303 '''Decides whether a directory should be visited based on whether it
303 304 has potential matches in it or one of its subdirectories. This is
304 305 based on the match's primary, included, and excluded patterns.
305 306
306 307 Returns the string 'all' if the given directory and all subdirectories
307 308 should be visited. Otherwise returns True or False indicating whether
308 309 the given directory should be visited.
309 310 '''
310 311 return True
311 312
312 313 def always(self):
313 314 '''Matcher will match everything and .files() will be empty --
314 315 optimization might be possible.'''
315 316 return False
316 317
317 318 def isexact(self):
318 319 '''Matcher will match exactly the list of files in .files() --
319 320 optimization might be possible.'''
320 321 return False
321 322
322 323 def prefix(self):
323 324 '''Matcher will match the paths in .files() recursively --
324 325 optimization might be possible.'''
325 326 return False
326 327
327 328 def anypats(self):
328 329 '''None of .always(), .isexact(), and .prefix() is true --
329 330 optimizations will be difficult.'''
330 331 return not self.always() and not self.isexact() and not self.prefix()
331 332
332 333 class alwaysmatcher(basematcher):
333 334 '''Matches everything.'''
334 335
335 336 def __init__(self, root, cwd, badfn=None, relativeuipath=False):
336 337 super(alwaysmatcher, self).__init__(root, cwd, badfn,
337 338 relativeuipath=relativeuipath)
338 339
339 340 def always(self):
340 341 return True
341 342
342 343 def matchfn(self, f):
343 344 return True
344 345
345 346 def visitdir(self, dir):
346 347 return 'all'
347 348
348 349 def __repr__(self):
349 350 return r'<alwaysmatcher>'
350 351
351 352 class nevermatcher(basematcher):
352 353 '''Matches nothing.'''
353 354
354 355 def __init__(self, root, cwd, badfn=None):
355 356 super(nevermatcher, self).__init__(root, cwd, badfn)
356 357
357 358 # It's a little weird to say that the nevermatcher is an exact matcher
358 359 # or a prefix matcher, but it seems to make sense to let callers take
359 360 # fast paths based on either. There will be no exact matches, nor any
360 361 # prefixes (files() returns []), so fast paths iterating over them should
361 362 # be efficient (and correct).
362 363 def isexact(self):
363 364 return True
364 365
365 366 def prefix(self):
366 367 return True
367 368
368 369 def visitdir(self, dir):
369 370 return False
370 371
371 372 def __repr__(self):
372 373 return r'<nevermatcher>'
373 374
374 375 class patternmatcher(basematcher):
375 376
376 377 def __init__(self, root, cwd, kindpats, ctx=None, listsubrepos=False,
377 378 badfn=None):
378 379 super(patternmatcher, self).__init__(root, cwd, badfn)
379 380
380 381 self._files = _explicitfiles(kindpats)
381 382 self._prefix = _prefix(kindpats)
382 383 self._pats, self.matchfn = _buildmatch(ctx, kindpats, '$', listsubrepos,
383 384 root)
384 385
385 386 @propertycache
386 387 def _dirs(self):
387 388 return set(util.dirs(self._fileset)) | {'.'}
388 389
389 390 def visitdir(self, dir):
390 391 if self._prefix and dir in self._fileset:
391 392 return 'all'
392 393 return ('.' in self._fileset or
393 394 dir in self._fileset or
394 395 dir in self._dirs or
395 396 any(parentdir in self._fileset
396 397 for parentdir in util.finddirs(dir)))
397 398
398 399 def prefix(self):
399 400 return self._prefix
400 401
401 402 @encoding.strmethod
402 403 def __repr__(self):
403 404 return ('<patternmatcher patterns=%r>' % self._pats)
404 405
405 406 class includematcher(basematcher):
406 407
407 408 def __init__(self, root, cwd, kindpats, ctx=None, listsubrepos=False,
408 409 badfn=None):
409 410 super(includematcher, self).__init__(root, cwd, badfn)
410 411
411 412 self._pats, self.matchfn = _buildmatch(ctx, kindpats, '(?:/|$)',
412 413 listsubrepos, root)
413 414 self._prefix = _prefix(kindpats)
414 415 roots, dirs = _rootsanddirs(kindpats)
415 416 # roots are directories which are recursively included.
416 417 self._roots = set(roots)
417 418 # dirs are directories which are non-recursively included.
418 419 self._dirs = set(dirs)
419 420
420 421 def visitdir(self, dir):
421 422 if self._prefix and dir in self._roots:
422 423 return 'all'
423 424 return ('.' in self._roots or
424 425 dir in self._roots or
425 426 dir in self._dirs or
426 427 any(parentdir in self._roots
427 428 for parentdir in util.finddirs(dir)))
428 429
429 430 @encoding.strmethod
430 431 def __repr__(self):
431 return ('<includematcher includes=%r>' % self._pats)
432 return ('<includematcher includes=%r>' % pycompat.bytestr(self._pats))
432 433
433 434 class exactmatcher(basematcher):
434 435 '''Matches the input files exactly. They are interpreted as paths, not
435 436 patterns (so no kind-prefixes).
436 437 '''
437 438
438 439 def __init__(self, root, cwd, files, badfn=None):
439 440 super(exactmatcher, self).__init__(root, cwd, badfn)
440 441
441 442 if isinstance(files, list):
442 443 self._files = files
443 444 else:
444 445 self._files = list(files)
445 446
446 447 matchfn = basematcher.exact
447 448
448 449 @propertycache
449 450 def _dirs(self):
450 451 return set(util.dirs(self._fileset)) | {'.'}
451 452
452 453 def visitdir(self, dir):
453 454 return dir in self._dirs
454 455
455 456 def isexact(self):
456 457 return True
457 458
458 459 @encoding.strmethod
459 460 def __repr__(self):
460 461 return ('<exactmatcher files=%r>' % self._files)
461 462
462 463 class differencematcher(basematcher):
463 464 '''Composes two matchers by matching if the first matches and the second
464 465 does not.
465 466
466 467 The second matcher's non-matching-attributes (root, cwd, bad, explicitdir,
467 468 traversedir) are ignored.
468 469 '''
469 470 def __init__(self, m1, m2):
470 471 super(differencematcher, self).__init__(m1._root, m1._cwd)
471 472 self._m1 = m1
472 473 self._m2 = m2
473 474 self.bad = m1.bad
474 475 self.explicitdir = m1.explicitdir
475 476 self.traversedir = m1.traversedir
476 477
477 478 def matchfn(self, f):
478 479 return self._m1(f) and not self._m2(f)
479 480
480 481 @propertycache
481 482 def _files(self):
482 483 if self.isexact():
483 484 return [f for f in self._m1.files() if self(f)]
484 485 # If m1 is not an exact matcher, we can't easily figure out the set of
485 486 # files, because its files() are not always files. For example, if
486 487 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
487 488 # want to remove "dir" from the set even though it would match m2,
488 489 # because the "dir" in m1 may not be a file.
489 490 return self._m1.files()
490 491
491 492 def visitdir(self, dir):
492 493 if self._m2.visitdir(dir) == 'all':
493 494 return False
494 495 return bool(self._m1.visitdir(dir))
495 496
496 497 def isexact(self):
497 498 return self._m1.isexact()
498 499
499 500 @encoding.strmethod
500 501 def __repr__(self):
501 502 return ('<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2))
502 503
503 504 def intersectmatchers(m1, m2):
504 505 '''Composes two matchers by matching if both of them match.
505 506
506 507 The second matcher's non-matching-attributes (root, cwd, bad, explicitdir,
507 508 traversedir) are ignored.
508 509 '''
509 510 if m1 is None or m2 is None:
510 511 return m1 or m2
511 512 if m1.always():
512 513 m = copy.copy(m2)
513 514 # TODO: Consider encapsulating these things in a class so there's only
514 515 # one thing to copy from m1.
515 516 m.bad = m1.bad
516 517 m.explicitdir = m1.explicitdir
517 518 m.traversedir = m1.traversedir
518 519 m.abs = m1.abs
519 520 m.rel = m1.rel
520 521 m._relativeuipath |= m1._relativeuipath
521 522 return m
522 523 if m2.always():
523 524 m = copy.copy(m1)
524 525 m._relativeuipath |= m2._relativeuipath
525 526 return m
526 527 return intersectionmatcher(m1, m2)
527 528
528 529 class intersectionmatcher(basematcher):
529 530 def __init__(self, m1, m2):
530 531 super(intersectionmatcher, self).__init__(m1._root, m1._cwd)
531 532 self._m1 = m1
532 533 self._m2 = m2
533 534 self.bad = m1.bad
534 535 self.explicitdir = m1.explicitdir
535 536 self.traversedir = m1.traversedir
536 537
537 538 @propertycache
538 539 def _files(self):
539 540 if self.isexact():
540 541 m1, m2 = self._m1, self._m2
541 542 if not m1.isexact():
542 543 m1, m2 = m2, m1
543 544 return [f for f in m1.files() if m2(f)]
544 545 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
545 546 # the set of files, because their files() are not always files. For
546 547 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
547 548 # "path:dir2", we don't want to remove "dir2" from the set.
548 549 return self._m1.files() + self._m2.files()
549 550
550 551 def matchfn(self, f):
551 552 return self._m1(f) and self._m2(f)
552 553
553 554 def visitdir(self, dir):
554 555 visit1 = self._m1.visitdir(dir)
555 556 if visit1 == 'all':
556 557 return self._m2.visitdir(dir)
557 558 # bool() because visit1=True + visit2='all' should not be 'all'
558 559 return bool(visit1 and self._m2.visitdir(dir))
559 560
560 561 def always(self):
561 562 return self._m1.always() and self._m2.always()
562 563
563 564 def isexact(self):
564 565 return self._m1.isexact() or self._m2.isexact()
565 566
566 567 @encoding.strmethod
567 568 def __repr__(self):
568 569 return ('<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2))
569 570
570 571 class subdirmatcher(basematcher):
571 572 """Adapt a matcher to work on a subdirectory only.
572 573
573 574 The paths are remapped to remove/insert the path as needed:
574 575
575 576 >>> from . import pycompat
576 577 >>> m1 = match(b'root', b'', [b'a.txt', b'sub/b.txt'])
577 578 >>> m2 = subdirmatcher(b'sub', m1)
578 579 >>> bool(m2(b'a.txt'))
579 580 False
580 581 >>> bool(m2(b'b.txt'))
581 582 True
582 583 >>> bool(m2.matchfn(b'a.txt'))
583 584 False
584 585 >>> bool(m2.matchfn(b'b.txt'))
585 586 True
586 587 >>> m2.files()
587 588 ['b.txt']
588 589 >>> m2.exact(b'b.txt')
589 590 True
590 591 >>> util.pconvert(m2.rel(b'b.txt'))
591 592 'sub/b.txt'
592 593 >>> def bad(f, msg):
593 594 ... print(pycompat.sysstr(b"%s: %s" % (f, msg)))
594 595 >>> m1.bad = bad
595 596 >>> m2.bad(b'x.txt', b'No such file')
596 597 sub/x.txt: No such file
597 598 >>> m2.abs(b'c.txt')
598 599 'sub/c.txt'
599 600 """
600 601
601 602 def __init__(self, path, matcher):
602 603 super(subdirmatcher, self).__init__(matcher._root, matcher._cwd)
603 604 self._path = path
604 605 self._matcher = matcher
605 606 self._always = matcher.always()
606 607
607 608 self._files = [f[len(path) + 1:] for f in matcher._files
608 609 if f.startswith(path + "/")]
609 610
610 611 # If the parent repo had a path to this subrepo and the matcher is
611 612 # a prefix matcher, this submatcher always matches.
612 613 if matcher.prefix():
613 614 self._always = any(f == path for f in matcher._files)
614 615
615 616 def bad(self, f, msg):
616 617 self._matcher.bad(self._path + "/" + f, msg)
617 618
618 619 def abs(self, f):
619 620 return self._matcher.abs(self._path + "/" + f)
620 621
621 622 def rel(self, f):
622 623 return self._matcher.rel(self._path + "/" + f)
623 624
624 625 def uipath(self, f):
625 626 return self._matcher.uipath(self._path + "/" + f)
626 627
627 628 def matchfn(self, f):
628 629 # Some information is lost in the superclass's constructor, so we
629 630 # can not accurately create the matching function for the subdirectory
630 631 # from the inputs. Instead, we override matchfn() and visitdir() to
631 632 # call the original matcher with the subdirectory path prepended.
632 633 return self._matcher.matchfn(self._path + "/" + f)
633 634
634 635 def visitdir(self, dir):
635 636 if dir == '.':
636 637 dir = self._path
637 638 else:
638 639 dir = self._path + "/" + dir
639 640 return self._matcher.visitdir(dir)
640 641
641 642 def always(self):
642 643 return self._always
643 644
644 645 def prefix(self):
645 646 return self._matcher.prefix() and not self._always
646 647
647 648 @encoding.strmethod
648 649 def __repr__(self):
649 650 return ('<subdirmatcher path=%r, matcher=%r>' %
650 651 (self._path, self._matcher))
651 652
652 653 class unionmatcher(basematcher):
653 654 """A matcher that is the union of several matchers.
654 655
655 656 The non-matching-attributes (root, cwd, bad, explicitdir, traversedir) are
656 657 taken from the first matcher.
657 658 """
658 659
659 660 def __init__(self, matchers):
660 661 m1 = matchers[0]
661 662 super(unionmatcher, self).__init__(m1._root, m1._cwd)
662 663 self.explicitdir = m1.explicitdir
663 664 self.traversedir = m1.traversedir
664 665 self._matchers = matchers
665 666
666 667 def matchfn(self, f):
667 668 for match in self._matchers:
668 669 if match(f):
669 670 return True
670 671 return False
671 672
672 673 def visitdir(self, dir):
673 674 r = False
674 675 for m in self._matchers:
675 676 v = m.visitdir(dir)
676 677 if v == 'all':
677 678 return v
678 679 r |= v
679 680 return r
680 681
681 682 @encoding.strmethod
682 683 def __repr__(self):
683 684 return ('<unionmatcher matchers=%r>' % self._matchers)
684 685
685 686 def patkind(pattern, default=None):
686 687 '''If pattern is 'kind:pat' with a known kind, return kind.'''
687 688 return _patsplit(pattern, default)[0]
688 689
689 690 def _patsplit(pattern, default):
690 691 """Split a string into the optional pattern kind prefix and the actual
691 692 pattern."""
692 693 if ':' in pattern:
693 694 kind, pat = pattern.split(':', 1)
694 695 if kind in allpatternkinds:
695 696 return kind, pat
696 697 return default, pattern
697 698
698 699 def _globre(pat):
699 700 r'''Convert an extended glob string to a regexp string.
700 701
701 702 >>> from . import pycompat
702 703 >>> def bprint(s):
703 704 ... print(pycompat.sysstr(s))
704 705 >>> bprint(_globre(br'?'))
705 706 .
706 707 >>> bprint(_globre(br'*'))
707 708 [^/]*
708 709 >>> bprint(_globre(br'**'))
709 710 .*
710 711 >>> bprint(_globre(br'**/a'))
711 712 (?:.*/)?a
712 713 >>> bprint(_globre(br'a/**/b'))
713 714 a\/(?:.*/)?b
714 715 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
715 716 [a*?!^][\^b][^c]
716 717 >>> bprint(_globre(br'{a,b}'))
717 718 (?:a|b)
718 719 >>> bprint(_globre(br'.\*\?'))
719 720 \.\*\?
720 721 '''
721 722 i, n = 0, len(pat)
722 723 res = ''
723 724 group = 0
724 725 escape = util.re.escape
725 726 def peek():
726 727 return i < n and pat[i:i + 1]
727 728 while i < n:
728 729 c = pat[i:i + 1]
729 730 i += 1
730 731 if c not in '*?[{},\\':
731 732 res += escape(c)
732 733 elif c == '*':
733 734 if peek() == '*':
734 735 i += 1
735 736 if peek() == '/':
736 737 i += 1
737 738 res += '(?:.*/)?'
738 739 else:
739 740 res += '.*'
740 741 else:
741 742 res += '[^/]*'
742 743 elif c == '?':
743 744 res += '.'
744 745 elif c == '[':
745 746 j = i
746 747 if j < n and pat[j:j + 1] in '!]':
747 748 j += 1
748 749 while j < n and pat[j:j + 1] != ']':
749 750 j += 1
750 751 if j >= n:
751 752 res += '\\['
752 753 else:
753 754 stuff = pat[i:j].replace('\\','\\\\')
754 755 i = j + 1
755 756 if stuff[0:1] == '!':
756 757 stuff = '^' + stuff[1:]
757 758 elif stuff[0:1] == '^':
758 759 stuff = '\\' + stuff
759 760 res = '%s[%s]' % (res, stuff)
760 761 elif c == '{':
761 762 group += 1
762 763 res += '(?:'
763 764 elif c == '}' and group:
764 765 res += ')'
765 766 group -= 1
766 767 elif c == ',' and group:
767 768 res += '|'
768 769 elif c == '\\':
769 770 p = peek()
770 771 if p:
771 772 i += 1
772 773 res += escape(p)
773 774 else:
774 775 res += escape(c)
775 776 else:
776 777 res += escape(c)
777 778 return res
778 779
779 780 def _regex(kind, pat, globsuffix):
780 781 '''Convert a (normalized) pattern of any kind into a regular expression.
781 782 globsuffix is appended to the regexp of globs.'''
782 783 if not pat:
783 784 return ''
784 785 if kind == 're':
785 786 return pat
786 787 if kind in ('path', 'relpath'):
787 788 if pat == '.':
788 789 return ''
789 790 return util.re.escape(pat) + '(?:/|$)'
790 791 if kind == 'rootfilesin':
791 792 if pat == '.':
792 793 escaped = ''
793 794 else:
794 795 # Pattern is a directory name.
795 796 escaped = util.re.escape(pat) + '/'
796 797 # Anything after the pattern must be a non-directory.
797 798 return escaped + '[^/]+$'
798 799 if kind == 'relglob':
799 800 return '(?:|.*/)' + _globre(pat) + globsuffix
800 801 if kind == 'relre':
801 802 if pat.startswith('^'):
802 803 return pat
803 804 return '.*' + pat
804 805 return _globre(pat) + globsuffix
805 806
806 807 def _buildmatch(ctx, kindpats, globsuffix, listsubrepos, root):
807 808 '''Return regexp string and a matcher function for kindpats.
808 809 globsuffix is appended to the regexp of globs.'''
809 810 matchfuncs = []
810 811
811 812 subincludes, kindpats = _expandsubinclude(kindpats, root)
812 813 if subincludes:
813 814 submatchers = {}
814 815 def matchsubinclude(f):
815 816 for prefix, matcherargs in subincludes:
816 817 if f.startswith(prefix):
817 818 mf = submatchers.get(prefix)
818 819 if mf is None:
819 820 mf = match(*matcherargs)
820 821 submatchers[prefix] = mf
821 822
822 823 if mf(f[len(prefix):]):
823 824 return True
824 825 return False
825 826 matchfuncs.append(matchsubinclude)
826 827
827 828 fset, kindpats = _expandsets(kindpats, ctx, listsubrepos)
828 829 if fset:
829 830 matchfuncs.append(fset.__contains__)
830 831
831 832 regex = ''
832 833 if kindpats:
833 834 regex, mf = _buildregexmatch(kindpats, globsuffix)
834 835 matchfuncs.append(mf)
835 836
836 837 if len(matchfuncs) == 1:
837 838 return regex, matchfuncs[0]
838 839 else:
839 840 return regex, lambda f: any(mf(f) for mf in matchfuncs)
840 841
841 842 def _buildregexmatch(kindpats, globsuffix):
842 843 """Build a match function from a list of kinds and kindpats,
843 844 return regexp string and a matcher function."""
844 845 try:
845 846 regex = '(?:%s)' % '|'.join([_regex(k, p, globsuffix)
846 847 for (k, p, s) in kindpats])
847 848 if len(regex) > 20000:
848 849 raise OverflowError
849 850 return regex, _rematcher(regex)
850 851 except OverflowError:
851 852 # We're using a Python with a tiny regex engine and we
852 853 # made it explode, so we'll divide the pattern list in two
853 854 # until it works
854 855 l = len(kindpats)
855 856 if l < 2:
856 857 raise
857 858 regexa, a = _buildregexmatch(kindpats[:l//2], globsuffix)
858 859 regexb, b = _buildregexmatch(kindpats[l//2:], globsuffix)
859 860 return regex, lambda s: a(s) or b(s)
860 861 except re.error:
861 862 for k, p, s in kindpats:
862 863 try:
863 864 _rematcher('(?:%s)' % _regex(k, p, globsuffix))
864 865 except re.error:
865 866 if s:
866 867 raise error.Abort(_("%s: invalid pattern (%s): %s") %
867 868 (s, k, p))
868 869 else:
869 870 raise error.Abort(_("invalid pattern (%s): %s") % (k, p))
870 871 raise error.Abort(_("invalid pattern"))
871 872
872 873 def _patternrootsanddirs(kindpats):
873 874 '''Returns roots and directories corresponding to each pattern.
874 875
875 876 This calculates the roots and directories exactly matching the patterns and
876 877 returns a tuple of (roots, dirs) for each. It does not return other
877 878 directories which may also need to be considered, like the parent
878 879 directories.
879 880 '''
880 881 r = []
881 882 d = []
882 883 for kind, pat, source in kindpats:
883 884 if kind == 'glob': # find the non-glob prefix
884 885 root = []
885 886 for p in pat.split('/'):
886 887 if '[' in p or '{' in p or '*' in p or '?' in p:
887 888 break
888 889 root.append(p)
889 890 r.append('/'.join(root) or '.')
890 891 elif kind in ('relpath', 'path'):
891 892 r.append(pat or '.')
892 893 elif kind in ('rootfilesin',):
893 894 d.append(pat or '.')
894 895 else: # relglob, re, relre
895 896 r.append('.')
896 897 return r, d
897 898
898 899 def _roots(kindpats):
899 900 '''Returns root directories to match recursively from the given patterns.'''
900 901 roots, dirs = _patternrootsanddirs(kindpats)
901 902 return roots
902 903
903 904 def _rootsanddirs(kindpats):
904 905 '''Returns roots and exact directories from patterns.
905 906
906 907 roots are directories to match recursively, whereas exact directories should
907 908 be matched non-recursively. The returned (roots, dirs) tuple will also
908 909 include directories that need to be implicitly considered as either, such as
909 910 parent directories.
910 911
911 912 >>> _rootsanddirs(
912 913 ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
913 914 ... (b'glob', b'g*', b'')])
914 915 (['g/h', 'g/h', '.'], ['g', '.'])
915 916 >>> _rootsanddirs(
916 917 ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
917 918 ([], ['g/h', '.', 'g', '.'])
918 919 >>> _rootsanddirs(
919 920 ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
920 921 ... (b'path', b'', b'')])
921 922 (['r', 'p/p', '.'], ['p', '.'])
922 923 >>> _rootsanddirs(
923 924 ... [(b'relglob', b'rg*', b''), (b're', b're/', b''),
924 925 ... (b'relre', b'rr', b'')])
925 926 (['.', '.', '.'], ['.'])
926 927 '''
927 928 r, d = _patternrootsanddirs(kindpats)
928 929
929 930 # Append the parents as non-recursive/exact directories, since they must be
930 931 # scanned to get to either the roots or the other exact directories.
931 932 d.extend(util.dirs(d))
932 933 d.extend(util.dirs(r))
933 934 # util.dirs() does not include the root directory, so add it manually
934 935 d.append('.')
935 936
936 937 return r, d
937 938
938 939 def _explicitfiles(kindpats):
939 940 '''Returns the potential explicit filenames from the patterns.
940 941
941 942 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
942 943 ['foo/bar']
943 944 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
944 945 []
945 946 '''
946 947 # Keep only the pattern kinds where one can specify filenames (vs only
947 948 # directory names).
948 949 filable = [kp for kp in kindpats if kp[0] not in ('rootfilesin',)]
949 950 return _roots(filable)
950 951
951 952 def _prefix(kindpats):
952 953 '''Whether all the patterns match a prefix (i.e. recursively)'''
953 954 for kind, pat, source in kindpats:
954 955 if kind not in ('path', 'relpath'):
955 956 return False
956 957 return True
957 958
958 959 _commentre = None
959 960
960 961 def readpatternfile(filepath, warn, sourceinfo=False):
961 962 '''parse a pattern file, returning a list of
962 963 patterns. These patterns should be given to compile()
963 964 to be validated and converted into a match function.
964 965
965 966 trailing white space is dropped.
966 967 the escape character is backslash.
967 968 comments start with #.
968 969 empty lines are skipped.
969 970
970 971 lines can be of the following formats:
971 972
972 973 syntax: regexp # defaults following lines to non-rooted regexps
973 974 syntax: glob # defaults following lines to non-rooted globs
974 975 re:pattern # non-rooted regular expression
975 976 glob:pattern # non-rooted glob
976 977 pattern # pattern of the current default type
977 978
978 979 if sourceinfo is set, returns a list of tuples:
979 980 (pattern, lineno, originalline). This is useful to debug ignore patterns.
980 981 '''
981 982
982 983 syntaxes = {'re': 'relre:', 'regexp': 'relre:', 'glob': 'relglob:',
983 984 'include': 'include', 'subinclude': 'subinclude'}
984 985 syntax = 'relre:'
985 986 patterns = []
986 987
987 988 fp = open(filepath, 'rb')
988 989 for lineno, line in enumerate(util.iterfile(fp), start=1):
989 990 if "#" in line:
990 991 global _commentre
991 992 if not _commentre:
992 993 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
993 994 # remove comments prefixed by an even number of escapes
994 995 m = _commentre.search(line)
995 996 if m:
996 997 line = line[:m.end(1)]
997 998 # fixup properly escaped comments that survived the above
998 999 line = line.replace("\\#", "#")
999 1000 line = line.rstrip()
1000 1001 if not line:
1001 1002 continue
1002 1003
1003 1004 if line.startswith('syntax:'):
1004 1005 s = line[7:].strip()
1005 1006 try:
1006 1007 syntax = syntaxes[s]
1007 1008 except KeyError:
1008 1009 if warn:
1009 1010 warn(_("%s: ignoring invalid syntax '%s'\n") %
1010 1011 (filepath, s))
1011 1012 continue
1012 1013
1013 1014 linesyntax = syntax
1014 1015 for s, rels in syntaxes.iteritems():
1015 1016 if line.startswith(rels):
1016 1017 linesyntax = rels
1017 1018 line = line[len(rels):]
1018 1019 break
1019 1020 elif line.startswith(s+':'):
1020 1021 linesyntax = rels
1021 1022 line = line[len(s) + 1:]
1022 1023 break
1023 1024 if sourceinfo:
1024 1025 patterns.append((linesyntax + line, lineno, line))
1025 1026 else:
1026 1027 patterns.append(linesyntax + line)
1027 1028 fp.close()
1028 1029 return patterns
General Comments 0
You need to be logged in to leave comments. Login now