##// END OF EJS Templates
match: remove special-casing of always-matching patterns in patternmatcher...
Martin von Zweigbergk -
r32557:3fdcc34c default
parent child Browse files
Show More
@@ -1,983 +1,974
1 1 # match.py - filename matching
2 2 #
3 3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import copy
11 11 import os
12 12 import re
13 13
14 14 from .i18n import _
15 15 from . import (
16 16 error,
17 17 pathutil,
18 18 util,
19 19 )
20 20
21 21 propertycache = util.propertycache
22 22
23 23 def _rematcher(regex):
24 24 '''compile the regexp with the best available regexp engine and return a
25 25 matcher function'''
26 26 m = util.re.compile(regex)
27 27 try:
28 28 # slightly faster, provided by facebook's re2 bindings
29 29 return m.test_match
30 30 except AttributeError:
31 31 return m.match
32 32
33 33 def _expandsets(kindpats, ctx, listsubrepos):
34 34 '''Returns the kindpats list with the 'set' patterns expanded.'''
35 35 fset = set()
36 36 other = []
37 37
38 38 for kind, pat, source in kindpats:
39 39 if kind == 'set':
40 40 if not ctx:
41 41 raise error.ProgrammingError("fileset expression with no "
42 42 "context")
43 43 s = ctx.getfileset(pat)
44 44 fset.update(s)
45 45
46 46 if listsubrepos:
47 47 for subpath in ctx.substate:
48 48 s = ctx.sub(subpath).getfileset(pat)
49 49 fset.update(subpath + '/' + f for f in s)
50 50
51 51 continue
52 52 other.append((kind, pat, source))
53 53 return fset, other
54 54
55 55 def _expandsubinclude(kindpats, root):
56 56 '''Returns the list of subinclude matcher args and the kindpats without the
57 57 subincludes in it.'''
58 58 relmatchers = []
59 59 other = []
60 60
61 61 for kind, pat, source in kindpats:
62 62 if kind == 'subinclude':
63 63 sourceroot = pathutil.dirname(util.normpath(source))
64 64 pat = util.pconvert(pat)
65 65 path = pathutil.join(sourceroot, pat)
66 66
67 67 newroot = pathutil.dirname(path)
68 68 matcherargs = (newroot, '', [], ['include:%s' % path])
69 69
70 70 prefix = pathutil.canonpath(root, root, newroot)
71 71 if prefix:
72 72 prefix += '/'
73 73 relmatchers.append((prefix, matcherargs))
74 74 else:
75 75 other.append((kind, pat, source))
76 76
77 77 return relmatchers, other
78 78
79 79 def _kindpatsalwaysmatch(kindpats):
80 80 """"Checks whether the kindspats match everything, as e.g.
81 81 'relpath:.' does.
82 82 """
83 83 for kind, pat, source in kindpats:
84 84 if pat != '' or kind not in ['relpath', 'glob']:
85 85 return False
86 86 return True
87 87
88 88 def match(root, cwd, patterns, include=None, exclude=None, default='glob',
89 89 exact=False, auditor=None, ctx=None, listsubrepos=False, warn=None,
90 90 badfn=None, icasefs=False):
91 91 """build an object to match a set of file patterns
92 92
93 93 arguments:
94 94 root - the canonical root of the tree you're matching against
95 95 cwd - the current working directory, if relevant
96 96 patterns - patterns to find
97 97 include - patterns to include (unless they are excluded)
98 98 exclude - patterns to exclude (even if they are included)
99 99 default - if a pattern in patterns has no explicit type, assume this one
100 100 exact - patterns are actually filenames (include/exclude still apply)
101 101 warn - optional function used for printing warnings
102 102 badfn - optional bad() callback for this matcher instead of the default
103 103 icasefs - make a matcher for wdir on case insensitive filesystems, which
104 104 normalizes the given patterns to the case in the filesystem
105 105
106 106 a pattern is one of:
107 107 'glob:<glob>' - a glob relative to cwd
108 108 're:<regexp>' - a regular expression
109 109 'path:<path>' - a path relative to repository root, which is matched
110 110 recursively
111 111 'rootfilesin:<path>' - a path relative to repository root, which is
112 112 matched non-recursively (will not match subdirectories)
113 113 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
114 114 'relpath:<path>' - a path relative to cwd
115 115 'relre:<regexp>' - a regexp that needn't match the start of a name
116 116 'set:<fileset>' - a fileset expression
117 117 'include:<path>' - a file of patterns to read and include
118 118 'subinclude:<path>' - a file of patterns to match against files under
119 119 the same directory
120 120 '<something>' - a pattern of the specified default type
121 121 """
122 122 normalize = _donormalize
123 123 if icasefs:
124 124 if exact:
125 125 raise error.ProgrammingError("a case-insensitive exact matcher "
126 126 "doesn't make sense")
127 127 dirstate = ctx.repo().dirstate
128 128 dsnormalize = dirstate.normalize
129 129
130 130 def normalize(patterns, default, root, cwd, auditor, warn):
131 131 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
132 132 kindpats = []
133 133 for kind, pats, source in kp:
134 134 if kind not in ('re', 'relre'): # regex can't be normalized
135 135 p = pats
136 136 pats = dsnormalize(pats)
137 137
138 138 # Preserve the original to handle a case only rename.
139 139 if p != pats and p in dirstate:
140 140 kindpats.append((kind, p, source))
141 141
142 142 kindpats.append((kind, pats, source))
143 143 return kindpats
144 144
145 145 if exact:
146 146 m = exactmatcher(root, cwd, patterns, badfn)
147 147 elif patterns:
148 148 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
149 m = patternmatcher(root, cwd, kindpats, ctx=ctx,
150 listsubrepos=listsubrepos, badfn=badfn)
149 if _kindpatsalwaysmatch(kindpats):
150 m = alwaysmatcher(root, cwd, badfn, relativeuipath=True)
151 else:
152 m = patternmatcher(root, cwd, kindpats, ctx=ctx,
153 listsubrepos=listsubrepos, badfn=badfn)
151 154 else:
152 155 # It's a little strange that no patterns means to match everything.
153 156 # Consider changing this to match nothing (probably adding a
154 157 # "nevermatcher").
155 158 m = alwaysmatcher(root, cwd, badfn)
156 159
157 160 if include:
158 161 kindpats = normalize(include, 'glob', root, cwd, auditor, warn)
159 162 im = includematcher(root, cwd, kindpats, ctx=ctx,
160 163 listsubrepos=listsubrepos, badfn=None)
161 164 m = intersectmatchers(m, im)
162 165 if exclude:
163 166 kindpats = normalize(exclude, 'glob', root, cwd, auditor, warn)
164 167 em = includematcher(root, cwd, kindpats, ctx=ctx,
165 168 listsubrepos=listsubrepos, badfn=None)
166 169 m = differencematcher(m, em)
167 170 return m
168 171
169 172 def exact(root, cwd, files, badfn=None):
170 173 return exactmatcher(root, cwd, files, badfn=badfn)
171 174
172 175 def always(root, cwd):
173 176 return alwaysmatcher(root, cwd)
174 177
175 178 def badmatch(match, badfn):
176 179 """Make a copy of the given matcher, replacing its bad method with the given
177 180 one.
178 181 """
179 182 m = copy.copy(match)
180 183 m.bad = badfn
181 184 return m
182 185
183 186 def _donormalize(patterns, default, root, cwd, auditor, warn):
184 187 '''Convert 'kind:pat' from the patterns list to tuples with kind and
185 188 normalized and rooted patterns and with listfiles expanded.'''
186 189 kindpats = []
187 190 for kind, pat in [_patsplit(p, default) for p in patterns]:
188 191 if kind in ('glob', 'relpath'):
189 192 pat = pathutil.canonpath(root, cwd, pat, auditor)
190 193 elif kind in ('relglob', 'path', 'rootfilesin'):
191 194 pat = util.normpath(pat)
192 195 elif kind in ('listfile', 'listfile0'):
193 196 try:
194 197 files = util.readfile(pat)
195 198 if kind == 'listfile0':
196 199 files = files.split('\0')
197 200 else:
198 201 files = files.splitlines()
199 202 files = [f for f in files if f]
200 203 except EnvironmentError:
201 204 raise error.Abort(_("unable to read file list (%s)") % pat)
202 205 for k, p, source in _donormalize(files, default, root, cwd,
203 206 auditor, warn):
204 207 kindpats.append((k, p, pat))
205 208 continue
206 209 elif kind == 'include':
207 210 try:
208 211 fullpath = os.path.join(root, util.localpath(pat))
209 212 includepats = readpatternfile(fullpath, warn)
210 213 for k, p, source in _donormalize(includepats, default,
211 214 root, cwd, auditor, warn):
212 215 kindpats.append((k, p, source or pat))
213 216 except error.Abort as inst:
214 217 raise error.Abort('%s: %s' % (pat, inst[0]))
215 218 except IOError as inst:
216 219 if warn:
217 220 warn(_("skipping unreadable pattern file '%s': %s\n") %
218 221 (pat, inst.strerror))
219 222 continue
220 223 # else: re or relre - which cannot be normalized
221 224 kindpats.append((kind, pat, ''))
222 225 return kindpats
223 226
224 227 class basematcher(object):
225 228
226 229 def __init__(self, root, cwd, badfn=None, relativeuipath=True):
227 230 self._root = root
228 231 self._cwd = cwd
229 232 if badfn is not None:
230 233 self.bad = badfn
231 234 self._relativeuipath = relativeuipath
232 235
233 236 def __call__(self, fn):
234 237 return self.matchfn(fn)
235 238 def __iter__(self):
236 239 for f in self._files:
237 240 yield f
238 241 # Callbacks related to how the matcher is used by dirstate.walk.
239 242 # Subscribers to these events must monkeypatch the matcher object.
240 243 def bad(self, f, msg):
241 244 '''Callback from dirstate.walk for each explicit file that can't be
242 245 found/accessed, with an error message.'''
243 246 pass
244 247
245 248 # If an explicitdir is set, it will be called when an explicitly listed
246 249 # directory is visited.
247 250 explicitdir = None
248 251
249 252 # If an traversedir is set, it will be called when a directory discovered
250 253 # by recursive traversal is visited.
251 254 traversedir = None
252 255
253 256 def abs(self, f):
254 257 '''Convert a repo path back to path that is relative to the root of the
255 258 matcher.'''
256 259 return f
257 260
258 261 def rel(self, f):
259 262 '''Convert repo path back to path that is relative to cwd of matcher.'''
260 263 return util.pathto(self._root, self._cwd, f)
261 264
262 265 def uipath(self, f):
263 266 '''Convert repo path to a display path. If patterns or -I/-X were used
264 267 to create this matcher, the display path will be relative to cwd.
265 268 Otherwise it is relative to the root of the repo.'''
266 269 return (self._relativeuipath and self.rel(f)) or self.abs(f)
267 270
268 271 @propertycache
269 272 def _files(self):
270 273 return []
271 274
272 275 def files(self):
273 276 '''Explicitly listed files or patterns or roots:
274 277 if no patterns or .always(): empty list,
275 278 if exact: list exact files,
276 279 if not .anypats(): list all files and dirs,
277 280 else: optimal roots'''
278 281 return self._files
279 282
280 283 @propertycache
281 284 def _fileset(self):
282 285 return set(self._files)
283 286
284 287 def exact(self, f):
285 288 '''Returns True if f is in .files().'''
286 289 return f in self._fileset
287 290
288 291 def matchfn(self, f):
289 292 return False
290 293
291 294 def visitdir(self, dir):
292 295 '''Decides whether a directory should be visited based on whether it
293 296 has potential matches in it or one of its subdirectories. This is
294 297 based on the match's primary, included, and excluded patterns.
295 298
296 299 Returns the string 'all' if the given directory and all subdirectories
297 300 should be visited. Otherwise returns True or False indicating whether
298 301 the given directory should be visited.
299 302
300 303 This function's behavior is undefined if it has returned False for
301 304 one of the dir's parent directories.
302 305 '''
303 306 return False
304 307
305 308 def anypats(self):
306 309 '''Matcher uses patterns or include/exclude.'''
307 310 return False
308 311
309 312 def always(self):
310 313 '''Matcher will match everything and .files() will be empty
311 314 - optimization might be possible and necessary.'''
312 315 return False
313 316
314 317 def isexact(self):
315 318 return False
316 319
317 320 def prefix(self):
318 321 return not self.always() and not self.isexact() and not self.anypats()
319 322
320 323 class alwaysmatcher(basematcher):
321 324 '''Matches everything.'''
322 325
323 def __init__(self, root, cwd, badfn=None):
326 def __init__(self, root, cwd, badfn=None, relativeuipath=False):
324 327 super(alwaysmatcher, self).__init__(root, cwd, badfn,
325 relativeuipath=False)
328 relativeuipath=relativeuipath)
326 329
327 330 def always(self):
328 331 return True
329 332
330 333 def matchfn(self, f):
331 334 return True
332 335
333 336 def visitdir(self, dir):
334 337 return 'all'
335 338
336 339 def __repr__(self):
337 340 return '<alwaysmatcher>'
338 341
339 342 class patternmatcher(basematcher):
340 343
341 344 def __init__(self, root, cwd, kindpats, ctx=None, listsubrepos=False,
342 345 badfn=None):
343 346 super(patternmatcher, self).__init__(root, cwd, badfn)
344 347
345 if not _kindpatsalwaysmatch(kindpats):
346 self._files = _explicitfiles(kindpats)
347 self._anypats = _anypats(kindpats)
348 self.patternspat, pm = _buildmatch(ctx, kindpats, '$',
349 listsubrepos, root)
350 self._always = False
351 self.matchfn = pm
352 else:
353 self._anypats = False
354 self.patternspat = None
355 self._always = True
356 self.matchfn = lambda f: True
348 self._files = _explicitfiles(kindpats)
349 self._anypats = _anypats(kindpats)
350 self.patternspat, pm = _buildmatch(ctx, kindpats, '$', listsubrepos,
351 root)
352 self.matchfn = pm
357 353
358 354 @propertycache
359 355 def _dirs(self):
360 356 return set(util.dirs(self._fileset)) | {'.'}
361 357
362 358 def visitdir(self, dir):
363 if self.always():
364 return 'all'
365 359 if self.prefix() and dir in self._fileset:
366 360 return 'all'
367 361 return ('.' in self._fileset or
368 362 dir in self._fileset or
369 363 dir in self._dirs or
370 364 any(parentdir in self._fileset
371 365 for parentdir in util.finddirs(dir)))
372 366
373 367 def anypats(self):
374 368 return self._anypats
375 369
376 def always(self):
377 return self._always
378
379 370 def __repr__(self):
380 371 return ('<patternmatcher patterns=%r>' % self.patternspat)
381 372
382 373 class includematcher(basematcher):
383 374
384 375 def __init__(self, root, cwd, kindpats, ctx=None, listsubrepos=False,
385 376 badfn=None):
386 377 super(includematcher, self).__init__(root, cwd, badfn)
387 378
388 379 self.includepat, im = _buildmatch(ctx, kindpats, '(?:/|$)',
389 380 listsubrepos, root)
390 381 self._anypats = _anypats(kindpats)
391 382 roots, dirs = _rootsanddirs(kindpats)
392 383 # roots are directories which are recursively included.
393 384 self._roots = set(roots)
394 385 # dirs are directories which are non-recursively included.
395 386 self._dirs = set(dirs)
396 387 self.matchfn = im
397 388
398 389 def visitdir(self, dir):
399 390 if not self._anypats and dir in self._roots:
400 391 # The condition above is essentially self.prefix() for includes
401 392 return 'all'
402 393 return ('.' in self._roots or
403 394 dir in self._roots or
404 395 dir in self._dirs or
405 396 any(parentdir in self._roots
406 397 for parentdir in util.finddirs(dir)))
407 398
408 399 def anypats(self):
409 400 return True
410 401
411 402 def __repr__(self):
412 403 return ('<includematcher includes=%r>' % self.includepat)
413 404
414 405 class exactmatcher(basematcher):
415 406 '''Matches the input files exactly. They are interpreted as paths, not
416 407 patterns (so no kind-prefixes).
417 408 '''
418 409
419 410 def __init__(self, root, cwd, files, badfn=None):
420 411 super(exactmatcher, self).__init__(root, cwd, badfn)
421 412
422 413 if isinstance(files, list):
423 414 self._files = files
424 415 else:
425 416 self._files = list(files)
426 417
427 418 matchfn = basematcher.exact
428 419
429 420 @propertycache
430 421 def _dirs(self):
431 422 return set(util.dirs(self._fileset)) | {'.'}
432 423
433 424 def visitdir(self, dir):
434 425 return dir in self._dirs
435 426
436 427 def isexact(self):
437 428 return True
438 429
439 430 def __repr__(self):
440 431 return ('<exactmatcher files=%r>' % self._files)
441 432
442 433 class differencematcher(basematcher):
443 434 '''Composes two matchers by matching if the first matches and the second
444 435 does not. Well, almost... If the user provides a pattern like "-X foo foo",
445 436 Mercurial actually does match "foo" against that. That's because exact
446 437 matches are treated specially. So, since this differencematcher is used for
447 438 excludes, it needs to special-case exact matching.
448 439
449 440 The second matcher's non-matching-attributes (root, cwd, bad, explicitdir,
450 441 traversedir) are ignored.
451 442
452 443 TODO: If we want to keep the behavior described above for exact matches, we
453 444 should consider instead treating the above case something like this:
454 445 union(exact(foo), difference(pattern(foo), include(foo)))
455 446 '''
456 447 def __init__(self, m1, m2):
457 448 super(differencematcher, self).__init__(m1._root, m1._cwd)
458 449 self._m1 = m1
459 450 self._m2 = m2
460 451 self.bad = m1.bad
461 452 self.explicitdir = m1.explicitdir
462 453 self.traversedir = m1.traversedir
463 454
464 455 def matchfn(self, f):
465 456 return self._m1(f) and (not self._m2(f) or self._m1.exact(f))
466 457
467 458 @propertycache
468 459 def _files(self):
469 460 if self.isexact():
470 461 return [f for f in self._m1.files() if self(f)]
471 462 # If m1 is not an exact matcher, we can't easily figure out the set of
472 463 # files, because its files() are not always files. For example, if
473 464 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
474 465 # want to remove "dir" from the set even though it would match m2,
475 466 # because the "dir" in m1 may not be a file.
476 467 return self._m1.files()
477 468
478 469 def visitdir(self, dir):
479 470 if self._m2.visitdir(dir) == 'all':
480 471 # There's a bug here: If m1 matches file 'dir/file' and m2 excludes
481 472 # 'dir' (recursively), we should still visit 'dir' due to the
482 473 # exception we have for exact matches.
483 474 return False
484 475 return bool(self._m1.visitdir(dir))
485 476
486 477 def isexact(self):
487 478 return self._m1.isexact()
488 479
489 480 def anypats(self):
490 481 return self._m1.anypats() or self._m2.anypats()
491 482
492 483 def __repr__(self):
493 484 return ('<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2))
494 485
495 486 def intersectmatchers(m1, m2):
496 487 '''Composes two matchers by matching if both of them match.
497 488
498 489 The second matcher's non-matching-attributes (root, cwd, bad, explicitdir,
499 490 traversedir) are ignored.
500 491 '''
501 492 if m1 is None or m2 is None:
502 493 return m1 or m2
503 494 if m1.always():
504 495 m = copy.copy(m2)
505 496 # TODO: Consider encapsulating these things in a class so there's only
506 497 # one thing to copy from m1.
507 498 m.bad = m1.bad
508 499 m.explicitdir = m1.explicitdir
509 500 m.traversedir = m1.traversedir
510 501 m.abs = m1.abs
511 502 m.rel = m1.rel
512 503 m._relativeuipath |= m1._relativeuipath
513 504 return m
514 505 if m2.always():
515 506 m = copy.copy(m1)
516 507 m._relativeuipath |= m2._relativeuipath
517 508 return m
518 509 return intersectionmatcher(m1, m2)
519 510
520 511 class intersectionmatcher(basematcher):
521 512 def __init__(self, m1, m2):
522 513 super(intersectionmatcher, self).__init__(m1._root, m1._cwd)
523 514 self._m1 = m1
524 515 self._m2 = m2
525 516 self.bad = m1.bad
526 517 self.explicitdir = m1.explicitdir
527 518 self.traversedir = m1.traversedir
528 519
529 520 @propertycache
530 521 def _files(self):
531 522 if self.isexact():
532 523 m1, m2 = self._m1, self._m2
533 524 if not m1.isexact():
534 525 m1, m2 = m2, m1
535 526 return [f for f in m1.files() if m2(f)]
536 527 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
537 528 # the set of files, because their files() are not always files. For
538 529 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
539 530 # "path:dir2", we don't want to remove "dir2" from the set.
540 531 return self._m1.files() + self._m2.files()
541 532
542 533 def matchfn(self, f):
543 534 return self._m1(f) and self._m2(f)
544 535
545 536 def visitdir(self, dir):
546 537 visit1 = self._m1.visitdir(dir)
547 538 if visit1 == 'all':
548 539 return self._m2.visitdir(dir)
549 540 # bool() because visit1=True + visit2='all' should not be 'all'
550 541 return bool(visit1 and self._m2.visitdir(dir))
551 542
552 543 def always(self):
553 544 return self._m1.always() and self._m2.always()
554 545
555 546 def isexact(self):
556 547 return self._m1.isexact() or self._m2.isexact()
557 548
558 549 def anypats(self):
559 550 return self._m1.anypats() or self._m2.anypats()
560 551
561 552 def __repr__(self):
562 553 return ('<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2))
563 554
564 555 class subdirmatcher(basematcher):
565 556 """Adapt a matcher to work on a subdirectory only.
566 557
567 558 The paths are remapped to remove/insert the path as needed:
568 559
569 560 >>> m1 = match('root', '', ['a.txt', 'sub/b.txt'])
570 561 >>> m2 = subdirmatcher('sub', m1)
571 562 >>> bool(m2('a.txt'))
572 563 False
573 564 >>> bool(m2('b.txt'))
574 565 True
575 566 >>> bool(m2.matchfn('a.txt'))
576 567 False
577 568 >>> bool(m2.matchfn('b.txt'))
578 569 True
579 570 >>> m2.files()
580 571 ['b.txt']
581 572 >>> m2.exact('b.txt')
582 573 True
583 574 >>> util.pconvert(m2.rel('b.txt'))
584 575 'sub/b.txt'
585 576 >>> def bad(f, msg):
586 577 ... print "%s: %s" % (f, msg)
587 578 >>> m1.bad = bad
588 579 >>> m2.bad('x.txt', 'No such file')
589 580 sub/x.txt: No such file
590 581 >>> m2.abs('c.txt')
591 582 'sub/c.txt'
592 583 """
593 584
594 585 def __init__(self, path, matcher):
595 586 super(subdirmatcher, self).__init__(matcher._root, matcher._cwd)
596 587 self._path = path
597 588 self._matcher = matcher
598 589 self._always = matcher.always()
599 590
600 591 self._files = [f[len(path) + 1:] for f in matcher._files
601 592 if f.startswith(path + "/")]
602 593
603 594 # If the parent repo had a path to this subrepo and the matcher is
604 595 # a prefix matcher, this submatcher always matches.
605 596 if matcher.prefix():
606 597 self._always = any(f == path for f in matcher._files)
607 598
608 599 def bad(self, f, msg):
609 600 self._matcher.bad(self._path + "/" + f, msg)
610 601
611 602 def abs(self, f):
612 603 return self._matcher.abs(self._path + "/" + f)
613 604
614 605 def rel(self, f):
615 606 return self._matcher.rel(self._path + "/" + f)
616 607
617 608 def uipath(self, f):
618 609 return self._matcher.uipath(self._path + "/" + f)
619 610
620 611 def matchfn(self, f):
621 612 # Some information is lost in the superclass's constructor, so we
622 613 # can not accurately create the matching function for the subdirectory
623 614 # from the inputs. Instead, we override matchfn() and visitdir() to
624 615 # call the original matcher with the subdirectory path prepended.
625 616 return self._matcher.matchfn(self._path + "/" + f)
626 617
627 618 def visitdir(self, dir):
628 619 if dir == '.':
629 620 dir = self._path
630 621 else:
631 622 dir = self._path + "/" + dir
632 623 return self._matcher.visitdir(dir)
633 624
634 625 def always(self):
635 626 return self._always
636 627
637 628 def anypats(self):
638 629 return self._matcher.anypats()
639 630
640 631 def __repr__(self):
641 632 return ('<subdirmatcher path=%r, matcher=%r>' %
642 633 (self._path, self._matcher))
643 634
644 635 def patkind(pattern, default=None):
645 636 '''If pattern is 'kind:pat' with a known kind, return kind.'''
646 637 return _patsplit(pattern, default)[0]
647 638
648 639 def _patsplit(pattern, default):
649 640 """Split a string into the optional pattern kind prefix and the actual
650 641 pattern."""
651 642 if ':' in pattern:
652 643 kind, pat = pattern.split(':', 1)
653 644 if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
654 645 'listfile', 'listfile0', 'set', 'include', 'subinclude',
655 646 'rootfilesin'):
656 647 return kind, pat
657 648 return default, pattern
658 649
659 650 def _globre(pat):
660 651 r'''Convert an extended glob string to a regexp string.
661 652
662 653 >>> print _globre(r'?')
663 654 .
664 655 >>> print _globre(r'*')
665 656 [^/]*
666 657 >>> print _globre(r'**')
667 658 .*
668 659 >>> print _globre(r'**/a')
669 660 (?:.*/)?a
670 661 >>> print _globre(r'a/**/b')
671 662 a\/(?:.*/)?b
672 663 >>> print _globre(r'[a*?!^][^b][!c]')
673 664 [a*?!^][\^b][^c]
674 665 >>> print _globre(r'{a,b}')
675 666 (?:a|b)
676 667 >>> print _globre(r'.\*\?')
677 668 \.\*\?
678 669 '''
679 670 i, n = 0, len(pat)
680 671 res = ''
681 672 group = 0
682 673 escape = util.re.escape
683 674 def peek():
684 675 return i < n and pat[i:i + 1]
685 676 while i < n:
686 677 c = pat[i:i + 1]
687 678 i += 1
688 679 if c not in '*?[{},\\':
689 680 res += escape(c)
690 681 elif c == '*':
691 682 if peek() == '*':
692 683 i += 1
693 684 if peek() == '/':
694 685 i += 1
695 686 res += '(?:.*/)?'
696 687 else:
697 688 res += '.*'
698 689 else:
699 690 res += '[^/]*'
700 691 elif c == '?':
701 692 res += '.'
702 693 elif c == '[':
703 694 j = i
704 695 if j < n and pat[j:j + 1] in '!]':
705 696 j += 1
706 697 while j < n and pat[j:j + 1] != ']':
707 698 j += 1
708 699 if j >= n:
709 700 res += '\\['
710 701 else:
711 702 stuff = pat[i:j].replace('\\','\\\\')
712 703 i = j + 1
713 704 if stuff[0:1] == '!':
714 705 stuff = '^' + stuff[1:]
715 706 elif stuff[0:1] == '^':
716 707 stuff = '\\' + stuff
717 708 res = '%s[%s]' % (res, stuff)
718 709 elif c == '{':
719 710 group += 1
720 711 res += '(?:'
721 712 elif c == '}' and group:
722 713 res += ')'
723 714 group -= 1
724 715 elif c == ',' and group:
725 716 res += '|'
726 717 elif c == '\\':
727 718 p = peek()
728 719 if p:
729 720 i += 1
730 721 res += escape(p)
731 722 else:
732 723 res += escape(c)
733 724 else:
734 725 res += escape(c)
735 726 return res
736 727
737 728 def _regex(kind, pat, globsuffix):
738 729 '''Convert a (normalized) pattern of any kind into a regular expression.
739 730 globsuffix is appended to the regexp of globs.'''
740 731 if not pat:
741 732 return ''
742 733 if kind == 're':
743 734 return pat
744 735 if kind == 'path':
745 736 if pat == '.':
746 737 return ''
747 738 return '^' + util.re.escape(pat) + '(?:/|$)'
748 739 if kind == 'rootfilesin':
749 740 if pat == '.':
750 741 escaped = ''
751 742 else:
752 743 # Pattern is a directory name.
753 744 escaped = util.re.escape(pat) + '/'
754 745 # Anything after the pattern must be a non-directory.
755 746 return '^' + escaped + '[^/]+$'
756 747 if kind == 'relglob':
757 748 return '(?:|.*/)' + _globre(pat) + globsuffix
758 749 if kind == 'relpath':
759 750 return util.re.escape(pat) + '(?:/|$)'
760 751 if kind == 'relre':
761 752 if pat.startswith('^'):
762 753 return pat
763 754 return '.*' + pat
764 755 return _globre(pat) + globsuffix
765 756
766 757 def _buildmatch(ctx, kindpats, globsuffix, listsubrepos, root):
767 758 '''Return regexp string and a matcher function for kindpats.
768 759 globsuffix is appended to the regexp of globs.'''
769 760 matchfuncs = []
770 761
771 762 subincludes, kindpats = _expandsubinclude(kindpats, root)
772 763 if subincludes:
773 764 submatchers = {}
774 765 def matchsubinclude(f):
775 766 for prefix, matcherargs in subincludes:
776 767 if f.startswith(prefix):
777 768 mf = submatchers.get(prefix)
778 769 if mf is None:
779 770 mf = match(*matcherargs)
780 771 submatchers[prefix] = mf
781 772
782 773 if mf(f[len(prefix):]):
783 774 return True
784 775 return False
785 776 matchfuncs.append(matchsubinclude)
786 777
787 778 fset, kindpats = _expandsets(kindpats, ctx, listsubrepos)
788 779 if fset:
789 780 matchfuncs.append(fset.__contains__)
790 781
791 782 regex = ''
792 783 if kindpats:
793 784 regex, mf = _buildregexmatch(kindpats, globsuffix)
794 785 matchfuncs.append(mf)
795 786
796 787 if len(matchfuncs) == 1:
797 788 return regex, matchfuncs[0]
798 789 else:
799 790 return regex, lambda f: any(mf(f) for mf in matchfuncs)
800 791
801 792 def _buildregexmatch(kindpats, globsuffix):
802 793 """Build a match function from a list of kinds and kindpats,
803 794 return regexp string and a matcher function."""
804 795 try:
805 796 regex = '(?:%s)' % '|'.join([_regex(k, p, globsuffix)
806 797 for (k, p, s) in kindpats])
807 798 if len(regex) > 20000:
808 799 raise OverflowError
809 800 return regex, _rematcher(regex)
810 801 except OverflowError:
811 802 # We're using a Python with a tiny regex engine and we
812 803 # made it explode, so we'll divide the pattern list in two
813 804 # until it works
814 805 l = len(kindpats)
815 806 if l < 2:
816 807 raise
817 808 regexa, a = _buildregexmatch(kindpats[:l//2], globsuffix)
818 809 regexb, b = _buildregexmatch(kindpats[l//2:], globsuffix)
819 810 return regex, lambda s: a(s) or b(s)
820 811 except re.error:
821 812 for k, p, s in kindpats:
822 813 try:
823 814 _rematcher('(?:%s)' % _regex(k, p, globsuffix))
824 815 except re.error:
825 816 if s:
826 817 raise error.Abort(_("%s: invalid pattern (%s): %s") %
827 818 (s, k, p))
828 819 else:
829 820 raise error.Abort(_("invalid pattern (%s): %s") % (k, p))
830 821 raise error.Abort(_("invalid pattern"))
831 822
832 823 def _patternrootsanddirs(kindpats):
833 824 '''Returns roots and directories corresponding to each pattern.
834 825
835 826 This calculates the roots and directories exactly matching the patterns and
836 827 returns a tuple of (roots, dirs) for each. It does not return other
837 828 directories which may also need to be considered, like the parent
838 829 directories.
839 830 '''
840 831 r = []
841 832 d = []
842 833 for kind, pat, source in kindpats:
843 834 if kind == 'glob': # find the non-glob prefix
844 835 root = []
845 836 for p in pat.split('/'):
846 837 if '[' in p or '{' in p or '*' in p or '?' in p:
847 838 break
848 839 root.append(p)
849 840 r.append('/'.join(root) or '.')
850 841 elif kind in ('relpath', 'path'):
851 842 r.append(pat or '.')
852 843 elif kind in ('rootfilesin',):
853 844 d.append(pat or '.')
854 845 else: # relglob, re, relre
855 846 r.append('.')
856 847 return r, d
857 848
858 849 def _roots(kindpats):
859 850 '''Returns root directories to match recursively from the given patterns.'''
860 851 roots, dirs = _patternrootsanddirs(kindpats)
861 852 return roots
862 853
863 854 def _rootsanddirs(kindpats):
864 855 '''Returns roots and exact directories from patterns.
865 856
866 857 roots are directories to match recursively, whereas exact directories should
867 858 be matched non-recursively. The returned (roots, dirs) tuple will also
868 859 include directories that need to be implicitly considered as either, such as
869 860 parent directories.
870 861
871 862 >>> _rootsanddirs(\
872 863 [('glob', 'g/h/*', ''), ('glob', 'g/h', ''), ('glob', 'g*', '')])
873 864 (['g/h', 'g/h', '.'], ['g', '.'])
874 865 >>> _rootsanddirs(\
875 866 [('rootfilesin', 'g/h', ''), ('rootfilesin', '', '')])
876 867 ([], ['g/h', '.', 'g', '.'])
877 868 >>> _rootsanddirs(\
878 869 [('relpath', 'r', ''), ('path', 'p/p', ''), ('path', '', '')])
879 870 (['r', 'p/p', '.'], ['p', '.'])
880 871 >>> _rootsanddirs(\
881 872 [('relglob', 'rg*', ''), ('re', 're/', ''), ('relre', 'rr', '')])
882 873 (['.', '.', '.'], ['.'])
883 874 '''
884 875 r, d = _patternrootsanddirs(kindpats)
885 876
886 877 # Append the parents as non-recursive/exact directories, since they must be
887 878 # scanned to get to either the roots or the other exact directories.
888 879 d.extend(util.dirs(d))
889 880 d.extend(util.dirs(r))
890 881 # util.dirs() does not include the root directory, so add it manually
891 882 d.append('.')
892 883
893 884 return r, d
894 885
895 886 def _explicitfiles(kindpats):
896 887 '''Returns the potential explicit filenames from the patterns.
897 888
898 889 >>> _explicitfiles([('path', 'foo/bar', '')])
899 890 ['foo/bar']
900 891 >>> _explicitfiles([('rootfilesin', 'foo/bar', '')])
901 892 []
902 893 '''
903 894 # Keep only the pattern kinds where one can specify filenames (vs only
904 895 # directory names).
905 896 filable = [kp for kp in kindpats if kp[0] not in ('rootfilesin',)]
906 897 return _roots(filable)
907 898
908 899 def _anypats(kindpats):
909 900 for kind, pat, source in kindpats:
910 901 if kind in ('glob', 're', 'relglob', 'relre', 'set', 'rootfilesin'):
911 902 return True
912 903
913 904 _commentre = None
914 905
915 906 def readpatternfile(filepath, warn, sourceinfo=False):
916 907 '''parse a pattern file, returning a list of
917 908 patterns. These patterns should be given to compile()
918 909 to be validated and converted into a match function.
919 910
920 911 trailing white space is dropped.
921 912 the escape character is backslash.
922 913 comments start with #.
923 914 empty lines are skipped.
924 915
925 916 lines can be of the following formats:
926 917
927 918 syntax: regexp # defaults following lines to non-rooted regexps
928 919 syntax: glob # defaults following lines to non-rooted globs
929 920 re:pattern # non-rooted regular expression
930 921 glob:pattern # non-rooted glob
931 922 pattern # pattern of the current default type
932 923
933 924 if sourceinfo is set, returns a list of tuples:
934 925 (pattern, lineno, originalline). This is useful to debug ignore patterns.
935 926 '''
936 927
937 928 syntaxes = {'re': 'relre:', 'regexp': 'relre:', 'glob': 'relglob:',
938 929 'include': 'include', 'subinclude': 'subinclude'}
939 930 syntax = 'relre:'
940 931 patterns = []
941 932
942 933 fp = open(filepath, 'rb')
943 934 for lineno, line in enumerate(util.iterfile(fp), start=1):
944 935 if "#" in line:
945 936 global _commentre
946 937 if not _commentre:
947 938 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
948 939 # remove comments prefixed by an even number of escapes
949 940 m = _commentre.search(line)
950 941 if m:
951 942 line = line[:m.end(1)]
952 943 # fixup properly escaped comments that survived the above
953 944 line = line.replace("\\#", "#")
954 945 line = line.rstrip()
955 946 if not line:
956 947 continue
957 948
958 949 if line.startswith('syntax:'):
959 950 s = line[7:].strip()
960 951 try:
961 952 syntax = syntaxes[s]
962 953 except KeyError:
963 954 if warn:
964 955 warn(_("%s: ignoring invalid syntax '%s'\n") %
965 956 (filepath, s))
966 957 continue
967 958
968 959 linesyntax = syntax
969 960 for s, rels in syntaxes.iteritems():
970 961 if line.startswith(rels):
971 962 linesyntax = rels
972 963 line = line[len(rels):]
973 964 break
974 965 elif line.startswith(s+':'):
975 966 linesyntax = rels
976 967 line = line[len(s) + 1:]
977 968 break
978 969 if sourceinfo:
979 970 patterns.append((linesyntax + line, lineno, line))
980 971 else:
981 972 patterns.append(linesyntax + line)
982 973 fp.close()
983 974 return patterns
General Comments 0
You need to be logged in to leave comments. Login now