##// END OF EJS Templates
match: minor cleanups to patternmatcher and includematcher...
Martin von Zweigbergk -
r33306:a9808bd1 default
parent child Browse files
Show More
@@ -1,985 +1,983
1 1 # match.py - filename matching
2 2 #
3 3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import copy
11 11 import os
12 12 import re
13 13
14 14 from .i18n import _
15 15 from . import (
16 16 error,
17 17 pathutil,
18 18 util,
19 19 )
20 20
21 21 propertycache = util.propertycache
22 22
23 23 def _rematcher(regex):
24 24 '''compile the regexp with the best available regexp engine and return a
25 25 matcher function'''
26 26 m = util.re.compile(regex)
27 27 try:
28 28 # slightly faster, provided by facebook's re2 bindings
29 29 return m.test_match
30 30 except AttributeError:
31 31 return m.match
32 32
33 33 def _expandsets(kindpats, ctx, listsubrepos):
34 34 '''Returns the kindpats list with the 'set' patterns expanded.'''
35 35 fset = set()
36 36 other = []
37 37
38 38 for kind, pat, source in kindpats:
39 39 if kind == 'set':
40 40 if not ctx:
41 41 raise error.ProgrammingError("fileset expression with no "
42 42 "context")
43 43 s = ctx.getfileset(pat)
44 44 fset.update(s)
45 45
46 46 if listsubrepos:
47 47 for subpath in ctx.substate:
48 48 s = ctx.sub(subpath).getfileset(pat)
49 49 fset.update(subpath + '/' + f for f in s)
50 50
51 51 continue
52 52 other.append((kind, pat, source))
53 53 return fset, other
54 54
55 55 def _expandsubinclude(kindpats, root):
56 56 '''Returns the list of subinclude matcher args and the kindpats without the
57 57 subincludes in it.'''
58 58 relmatchers = []
59 59 other = []
60 60
61 61 for kind, pat, source in kindpats:
62 62 if kind == 'subinclude':
63 63 sourceroot = pathutil.dirname(util.normpath(source))
64 64 pat = util.pconvert(pat)
65 65 path = pathutil.join(sourceroot, pat)
66 66
67 67 newroot = pathutil.dirname(path)
68 68 matcherargs = (newroot, '', [], ['include:%s' % path])
69 69
70 70 prefix = pathutil.canonpath(root, root, newroot)
71 71 if prefix:
72 72 prefix += '/'
73 73 relmatchers.append((prefix, matcherargs))
74 74 else:
75 75 other.append((kind, pat, source))
76 76
77 77 return relmatchers, other
78 78
79 79 def _kindpatsalwaysmatch(kindpats):
80 80 """"Checks whether the kindspats match everything, as e.g.
81 81 'relpath:.' does.
82 82 """
83 83 for kind, pat, source in kindpats:
84 84 if pat != '' or kind not in ['relpath', 'glob']:
85 85 return False
86 86 return True
87 87
88 88 def match(root, cwd, patterns=None, include=None, exclude=None, default='glob',
89 89 exact=False, auditor=None, ctx=None, listsubrepos=False, warn=None,
90 90 badfn=None, icasefs=False):
91 91 """build an object to match a set of file patterns
92 92
93 93 arguments:
94 94 root - the canonical root of the tree you're matching against
95 95 cwd - the current working directory, if relevant
96 96 patterns - patterns to find
97 97 include - patterns to include (unless they are excluded)
98 98 exclude - patterns to exclude (even if they are included)
99 99 default - if a pattern in patterns has no explicit type, assume this one
100 100 exact - patterns are actually filenames (include/exclude still apply)
101 101 warn - optional function used for printing warnings
102 102 badfn - optional bad() callback for this matcher instead of the default
103 103 icasefs - make a matcher for wdir on case insensitive filesystems, which
104 104 normalizes the given patterns to the case in the filesystem
105 105
106 106 a pattern is one of:
107 107 'glob:<glob>' - a glob relative to cwd
108 108 're:<regexp>' - a regular expression
109 109 'path:<path>' - a path relative to repository root, which is matched
110 110 recursively
111 111 'rootfilesin:<path>' - a path relative to repository root, which is
112 112 matched non-recursively (will not match subdirectories)
113 113 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
114 114 'relpath:<path>' - a path relative to cwd
115 115 'relre:<regexp>' - a regexp that needn't match the start of a name
116 116 'set:<fileset>' - a fileset expression
117 117 'include:<path>' - a file of patterns to read and include
118 118 'subinclude:<path>' - a file of patterns to match against files under
119 119 the same directory
120 120 '<something>' - a pattern of the specified default type
121 121 """
122 122 normalize = _donormalize
123 123 if icasefs:
124 124 if exact:
125 125 raise error.ProgrammingError("a case-insensitive exact matcher "
126 126 "doesn't make sense")
127 127 dirstate = ctx.repo().dirstate
128 128 dsnormalize = dirstate.normalize
129 129
130 130 def normalize(patterns, default, root, cwd, auditor, warn):
131 131 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
132 132 kindpats = []
133 133 for kind, pats, source in kp:
134 134 if kind not in ('re', 'relre'): # regex can't be normalized
135 135 p = pats
136 136 pats = dsnormalize(pats)
137 137
138 138 # Preserve the original to handle a case only rename.
139 139 if p != pats and p in dirstate:
140 140 kindpats.append((kind, p, source))
141 141
142 142 kindpats.append((kind, pats, source))
143 143 return kindpats
144 144
145 145 if exact:
146 146 m = exactmatcher(root, cwd, patterns, badfn)
147 147 elif patterns:
148 148 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
149 149 if _kindpatsalwaysmatch(kindpats):
150 150 m = alwaysmatcher(root, cwd, badfn, relativeuipath=True)
151 151 else:
152 152 m = patternmatcher(root, cwd, kindpats, ctx=ctx,
153 153 listsubrepos=listsubrepos, badfn=badfn)
154 154 else:
155 155 # It's a little strange that no patterns means to match everything.
156 156 # Consider changing this to match nothing (probably using nevermatcher).
157 157 m = alwaysmatcher(root, cwd, badfn)
158 158
159 159 if include:
160 160 kindpats = normalize(include, 'glob', root, cwd, auditor, warn)
161 161 im = includematcher(root, cwd, kindpats, ctx=ctx,
162 162 listsubrepos=listsubrepos, badfn=None)
163 163 m = intersectmatchers(m, im)
164 164 if exclude:
165 165 kindpats = normalize(exclude, 'glob', root, cwd, auditor, warn)
166 166 em = includematcher(root, cwd, kindpats, ctx=ctx,
167 167 listsubrepos=listsubrepos, badfn=None)
168 168 m = differencematcher(m, em)
169 169 return m
170 170
171 171 def exact(root, cwd, files, badfn=None):
172 172 return exactmatcher(root, cwd, files, badfn=badfn)
173 173
174 174 def always(root, cwd):
175 175 return alwaysmatcher(root, cwd)
176 176
177 177 def never(root, cwd):
178 178 return nevermatcher(root, cwd)
179 179
180 180 def badmatch(match, badfn):
181 181 """Make a copy of the given matcher, replacing its bad method with the given
182 182 one.
183 183 """
184 184 m = copy.copy(match)
185 185 m.bad = badfn
186 186 return m
187 187
188 188 def _donormalize(patterns, default, root, cwd, auditor, warn):
189 189 '''Convert 'kind:pat' from the patterns list to tuples with kind and
190 190 normalized and rooted patterns and with listfiles expanded.'''
191 191 kindpats = []
192 192 for kind, pat in [_patsplit(p, default) for p in patterns]:
193 193 if kind in ('glob', 'relpath'):
194 194 pat = pathutil.canonpath(root, cwd, pat, auditor)
195 195 elif kind in ('relglob', 'path', 'rootfilesin'):
196 196 pat = util.normpath(pat)
197 197 elif kind in ('listfile', 'listfile0'):
198 198 try:
199 199 files = util.readfile(pat)
200 200 if kind == 'listfile0':
201 201 files = files.split('\0')
202 202 else:
203 203 files = files.splitlines()
204 204 files = [f for f in files if f]
205 205 except EnvironmentError:
206 206 raise error.Abort(_("unable to read file list (%s)") % pat)
207 207 for k, p, source in _donormalize(files, default, root, cwd,
208 208 auditor, warn):
209 209 kindpats.append((k, p, pat))
210 210 continue
211 211 elif kind == 'include':
212 212 try:
213 213 fullpath = os.path.join(root, util.localpath(pat))
214 214 includepats = readpatternfile(fullpath, warn)
215 215 for k, p, source in _donormalize(includepats, default,
216 216 root, cwd, auditor, warn):
217 217 kindpats.append((k, p, source or pat))
218 218 except error.Abort as inst:
219 219 raise error.Abort('%s: %s' % (pat, inst[0]))
220 220 except IOError as inst:
221 221 if warn:
222 222 warn(_("skipping unreadable pattern file '%s': %s\n") %
223 223 (pat, inst.strerror))
224 224 continue
225 225 # else: re or relre - which cannot be normalized
226 226 kindpats.append((kind, pat, ''))
227 227 return kindpats
228 228
229 229 class basematcher(object):
230 230
231 231 def __init__(self, root, cwd, badfn=None, relativeuipath=True):
232 232 self._root = root
233 233 self._cwd = cwd
234 234 if badfn is not None:
235 235 self.bad = badfn
236 236 self._relativeuipath = relativeuipath
237 237
238 238 def __call__(self, fn):
239 239 return self.matchfn(fn)
240 240 def __iter__(self):
241 241 for f in self._files:
242 242 yield f
243 243 # Callbacks related to how the matcher is used by dirstate.walk.
244 244 # Subscribers to these events must monkeypatch the matcher object.
245 245 def bad(self, f, msg):
246 246 '''Callback from dirstate.walk for each explicit file that can't be
247 247 found/accessed, with an error message.'''
248 248 pass
249 249
250 250 # If an explicitdir is set, it will be called when an explicitly listed
251 251 # directory is visited.
252 252 explicitdir = None
253 253
254 254 # If an traversedir is set, it will be called when a directory discovered
255 255 # by recursive traversal is visited.
256 256 traversedir = None
257 257
258 258 def abs(self, f):
259 259 '''Convert a repo path back to path that is relative to the root of the
260 260 matcher.'''
261 261 return f
262 262
263 263 def rel(self, f):
264 264 '''Convert repo path back to path that is relative to cwd of matcher.'''
265 265 return util.pathto(self._root, self._cwd, f)
266 266
267 267 def uipath(self, f):
268 268 '''Convert repo path to a display path. If patterns or -I/-X were used
269 269 to create this matcher, the display path will be relative to cwd.
270 270 Otherwise it is relative to the root of the repo.'''
271 271 return (self._relativeuipath and self.rel(f)) or self.abs(f)
272 272
273 273 @propertycache
274 274 def _files(self):
275 275 return []
276 276
277 277 def files(self):
278 278 '''Explicitly listed files or patterns or roots:
279 279 if no patterns or .always(): empty list,
280 280 if exact: list exact files,
281 281 if not .anypats(): list all files and dirs,
282 282 else: optimal roots'''
283 283 return self._files
284 284
285 285 @propertycache
286 286 def _fileset(self):
287 287 return set(self._files)
288 288
289 289 def exact(self, f):
290 290 '''Returns True if f is in .files().'''
291 291 return f in self._fileset
292 292
293 293 def matchfn(self, f):
294 294 return False
295 295
296 296 def visitdir(self, dir):
297 297 '''Decides whether a directory should be visited based on whether it
298 298 has potential matches in it or one of its subdirectories. This is
299 299 based on the match's primary, included, and excluded patterns.
300 300
301 301 Returns the string 'all' if the given directory and all subdirectories
302 302 should be visited. Otherwise returns True or False indicating whether
303 303 the given directory should be visited.
304 304
305 305 This function's behavior is undefined if it has returned False for
306 306 one of the dir's parent directories.
307 307 '''
308 308 return False
309 309
310 310 def anypats(self):
311 311 '''Matcher uses patterns or include/exclude.'''
312 312 return False
313 313
314 314 def always(self):
315 315 '''Matcher will match everything and .files() will be empty
316 316 - optimization might be possible and necessary.'''
317 317 return False
318 318
319 319 def isexact(self):
320 320 return False
321 321
322 322 def prefix(self):
323 323 return not self.always() and not self.isexact() and not self.anypats()
324 324
325 325 class alwaysmatcher(basematcher):
326 326 '''Matches everything.'''
327 327
328 328 def __init__(self, root, cwd, badfn=None, relativeuipath=False):
329 329 super(alwaysmatcher, self).__init__(root, cwd, badfn,
330 330 relativeuipath=relativeuipath)
331 331
332 332 def always(self):
333 333 return True
334 334
335 335 def matchfn(self, f):
336 336 return True
337 337
338 338 def visitdir(self, dir):
339 339 return 'all'
340 340
341 341 def __repr__(self):
342 342 return '<alwaysmatcher>'
343 343
344 344 class nevermatcher(basematcher):
345 345 '''Matches nothing.'''
346 346
347 347 def __init__(self, root, cwd, badfn=None):
348 348 super(nevermatcher, self).__init__(root, cwd, badfn)
349 349
350 350 def __repr__(self):
351 351 return '<nevermatcher>'
352 352
353 353 class patternmatcher(basematcher):
354 354
355 355 def __init__(self, root, cwd, kindpats, ctx=None, listsubrepos=False,
356 356 badfn=None):
357 357 super(patternmatcher, self).__init__(root, cwd, badfn)
358 358
359 359 self._files = _explicitfiles(kindpats)
360 360 self._anypats = _anypats(kindpats)
361 self.patternspat, pm = _buildmatch(ctx, kindpats, '$', listsubrepos,
361 self._pats, self.matchfn = _buildmatch(ctx, kindpats, '$', listsubrepos,
362 362 root)
363 self.matchfn = pm
364 363
365 364 @propertycache
366 365 def _dirs(self):
367 366 return set(util.dirs(self._fileset)) | {'.'}
368 367
369 368 def visitdir(self, dir):
370 369 if self.prefix() and dir in self._fileset:
371 370 return 'all'
372 371 return ('.' in self._fileset or
373 372 dir in self._fileset or
374 373 dir in self._dirs or
375 374 any(parentdir in self._fileset
376 375 for parentdir in util.finddirs(dir)))
377 376
378 377 def anypats(self):
379 378 return self._anypats
380 379
381 380 def __repr__(self):
382 return ('<patternmatcher patterns=%r>' % self.patternspat)
381 return ('<patternmatcher patterns=%r>' % self._pats)
383 382
384 383 class includematcher(basematcher):
385 384
386 385 def __init__(self, root, cwd, kindpats, ctx=None, listsubrepos=False,
387 386 badfn=None):
388 387 super(includematcher, self).__init__(root, cwd, badfn)
389 388
390 self.includepat, im = _buildmatch(ctx, kindpats, '(?:/|$)',
389 self._pats, self.matchfn = _buildmatch(ctx, kindpats, '(?:/|$)',
391 390 listsubrepos, root)
392 391 self._anypats = _anypats(kindpats)
393 392 roots, dirs = _rootsanddirs(kindpats)
394 393 # roots are directories which are recursively included.
395 394 self._roots = set(roots)
396 395 # dirs are directories which are non-recursively included.
397 396 self._dirs = set(dirs)
398 self.matchfn = im
399 397
400 398 def visitdir(self, dir):
401 399 if not self._anypats and dir in self._roots:
402 400 # The condition above is essentially self.prefix() for includes
403 401 return 'all'
404 402 return ('.' in self._roots or
405 403 dir in self._roots or
406 404 dir in self._dirs or
407 405 any(parentdir in self._roots
408 406 for parentdir in util.finddirs(dir)))
409 407
410 408 def anypats(self):
411 409 return True
412 410
413 411 def __repr__(self):
414 return ('<includematcher includes=%r>' % self.includepat)
412 return ('<includematcher includes=%r>' % self._pats)
415 413
416 414 class exactmatcher(basematcher):
417 415 '''Matches the input files exactly. They are interpreted as paths, not
418 416 patterns (so no kind-prefixes).
419 417 '''
420 418
421 419 def __init__(self, root, cwd, files, badfn=None):
422 420 super(exactmatcher, self).__init__(root, cwd, badfn)
423 421
424 422 if isinstance(files, list):
425 423 self._files = files
426 424 else:
427 425 self._files = list(files)
428 426
429 427 matchfn = basematcher.exact
430 428
431 429 @propertycache
432 430 def _dirs(self):
433 431 return set(util.dirs(self._fileset)) | {'.'}
434 432
435 433 def visitdir(self, dir):
436 434 return dir in self._dirs
437 435
438 436 def isexact(self):
439 437 return True
440 438
441 439 def __repr__(self):
442 440 return ('<exactmatcher files=%r>' % self._files)
443 441
444 442 class differencematcher(basematcher):
445 443 '''Composes two matchers by matching if the first matches and the second
446 444 does not. Well, almost... If the user provides a pattern like "-X foo foo",
447 445 Mercurial actually does match "foo" against that. That's because exact
448 446 matches are treated specially. So, since this differencematcher is used for
449 447 excludes, it needs to special-case exact matching.
450 448
451 449 The second matcher's non-matching-attributes (root, cwd, bad, explicitdir,
452 450 traversedir) are ignored.
453 451
454 452 TODO: If we want to keep the behavior described above for exact matches, we
455 453 should consider instead treating the above case something like this:
456 454 union(exact(foo), difference(pattern(foo), include(foo)))
457 455 '''
458 456 def __init__(self, m1, m2):
459 457 super(differencematcher, self).__init__(m1._root, m1._cwd)
460 458 self._m1 = m1
461 459 self._m2 = m2
462 460 self.bad = m1.bad
463 461 self.explicitdir = m1.explicitdir
464 462 self.traversedir = m1.traversedir
465 463
466 464 def matchfn(self, f):
467 465 return self._m1(f) and (not self._m2(f) or self._m1.exact(f))
468 466
469 467 @propertycache
470 468 def _files(self):
471 469 if self.isexact():
472 470 return [f for f in self._m1.files() if self(f)]
473 471 # If m1 is not an exact matcher, we can't easily figure out the set of
474 472 # files, because its files() are not always files. For example, if
475 473 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
476 474 # want to remove "dir" from the set even though it would match m2,
477 475 # because the "dir" in m1 may not be a file.
478 476 return self._m1.files()
479 477
480 478 def visitdir(self, dir):
481 479 if self._m2.visitdir(dir) == 'all':
482 480 # There's a bug here: If m1 matches file 'dir/file' and m2 excludes
483 481 # 'dir' (recursively), we should still visit 'dir' due to the
484 482 # exception we have for exact matches.
485 483 return False
486 484 return bool(self._m1.visitdir(dir))
487 485
488 486 def isexact(self):
489 487 return self._m1.isexact()
490 488
491 489 def anypats(self):
492 490 return self._m1.anypats() or self._m2.anypats()
493 491
494 492 def __repr__(self):
495 493 return ('<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2))
496 494
497 495 def intersectmatchers(m1, m2):
498 496 '''Composes two matchers by matching if both of them match.
499 497
500 498 The second matcher's non-matching-attributes (root, cwd, bad, explicitdir,
501 499 traversedir) are ignored.
502 500 '''
503 501 if m1 is None or m2 is None:
504 502 return m1 or m2
505 503 if m1.always():
506 504 m = copy.copy(m2)
507 505 # TODO: Consider encapsulating these things in a class so there's only
508 506 # one thing to copy from m1.
509 507 m.bad = m1.bad
510 508 m.explicitdir = m1.explicitdir
511 509 m.traversedir = m1.traversedir
512 510 m.abs = m1.abs
513 511 m.rel = m1.rel
514 512 m._relativeuipath |= m1._relativeuipath
515 513 return m
516 514 if m2.always():
517 515 m = copy.copy(m1)
518 516 m._relativeuipath |= m2._relativeuipath
519 517 return m
520 518 return intersectionmatcher(m1, m2)
521 519
522 520 class intersectionmatcher(basematcher):
523 521 def __init__(self, m1, m2):
524 522 super(intersectionmatcher, self).__init__(m1._root, m1._cwd)
525 523 self._m1 = m1
526 524 self._m2 = m2
527 525 self.bad = m1.bad
528 526 self.explicitdir = m1.explicitdir
529 527 self.traversedir = m1.traversedir
530 528
531 529 @propertycache
532 530 def _files(self):
533 531 if self.isexact():
534 532 m1, m2 = self._m1, self._m2
535 533 if not m1.isexact():
536 534 m1, m2 = m2, m1
537 535 return [f for f in m1.files() if m2(f)]
538 536 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
539 537 # the set of files, because their files() are not always files. For
540 538 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
541 539 # "path:dir2", we don't want to remove "dir2" from the set.
542 540 return self._m1.files() + self._m2.files()
543 541
544 542 def matchfn(self, f):
545 543 return self._m1(f) and self._m2(f)
546 544
547 545 def visitdir(self, dir):
548 546 visit1 = self._m1.visitdir(dir)
549 547 if visit1 == 'all':
550 548 return self._m2.visitdir(dir)
551 549 # bool() because visit1=True + visit2='all' should not be 'all'
552 550 return bool(visit1 and self._m2.visitdir(dir))
553 551
554 552 def always(self):
555 553 return self._m1.always() and self._m2.always()
556 554
557 555 def isexact(self):
558 556 return self._m1.isexact() or self._m2.isexact()
559 557
560 558 def anypats(self):
561 559 return self._m1.anypats() or self._m2.anypats()
562 560
563 561 def __repr__(self):
564 562 return ('<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2))
565 563
566 564 class subdirmatcher(basematcher):
567 565 """Adapt a matcher to work on a subdirectory only.
568 566
569 567 The paths are remapped to remove/insert the path as needed:
570 568
571 569 >>> m1 = match('root', '', ['a.txt', 'sub/b.txt'])
572 570 >>> m2 = subdirmatcher('sub', m1)
573 571 >>> bool(m2('a.txt'))
574 572 False
575 573 >>> bool(m2('b.txt'))
576 574 True
577 575 >>> bool(m2.matchfn('a.txt'))
578 576 False
579 577 >>> bool(m2.matchfn('b.txt'))
580 578 True
581 579 >>> m2.files()
582 580 ['b.txt']
583 581 >>> m2.exact('b.txt')
584 582 True
585 583 >>> util.pconvert(m2.rel('b.txt'))
586 584 'sub/b.txt'
587 585 >>> def bad(f, msg):
588 586 ... print "%s: %s" % (f, msg)
589 587 >>> m1.bad = bad
590 588 >>> m2.bad('x.txt', 'No such file')
591 589 sub/x.txt: No such file
592 590 >>> m2.abs('c.txt')
593 591 'sub/c.txt'
594 592 """
595 593
596 594 def __init__(self, path, matcher):
597 595 super(subdirmatcher, self).__init__(matcher._root, matcher._cwd)
598 596 self._path = path
599 597 self._matcher = matcher
600 598 self._always = matcher.always()
601 599
602 600 self._files = [f[len(path) + 1:] for f in matcher._files
603 601 if f.startswith(path + "/")]
604 602
605 603 # If the parent repo had a path to this subrepo and the matcher is
606 604 # a prefix matcher, this submatcher always matches.
607 605 if matcher.prefix():
608 606 self._always = any(f == path for f in matcher._files)
609 607
610 608 def bad(self, f, msg):
611 609 self._matcher.bad(self._path + "/" + f, msg)
612 610
613 611 def abs(self, f):
614 612 return self._matcher.abs(self._path + "/" + f)
615 613
616 614 def rel(self, f):
617 615 return self._matcher.rel(self._path + "/" + f)
618 616
619 617 def uipath(self, f):
620 618 return self._matcher.uipath(self._path + "/" + f)
621 619
622 620 def matchfn(self, f):
623 621 # Some information is lost in the superclass's constructor, so we
624 622 # can not accurately create the matching function for the subdirectory
625 623 # from the inputs. Instead, we override matchfn() and visitdir() to
626 624 # call the original matcher with the subdirectory path prepended.
627 625 return self._matcher.matchfn(self._path + "/" + f)
628 626
629 627 def visitdir(self, dir):
630 628 if dir == '.':
631 629 dir = self._path
632 630 else:
633 631 dir = self._path + "/" + dir
634 632 return self._matcher.visitdir(dir)
635 633
636 634 def always(self):
637 635 return self._always
638 636
639 637 def anypats(self):
640 638 return self._matcher.anypats()
641 639
642 640 def __repr__(self):
643 641 return ('<subdirmatcher path=%r, matcher=%r>' %
644 642 (self._path, self._matcher))
645 643
646 644 def patkind(pattern, default=None):
647 645 '''If pattern is 'kind:pat' with a known kind, return kind.'''
648 646 return _patsplit(pattern, default)[0]
649 647
650 648 def _patsplit(pattern, default):
651 649 """Split a string into the optional pattern kind prefix and the actual
652 650 pattern."""
653 651 if ':' in pattern:
654 652 kind, pat = pattern.split(':', 1)
655 653 if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
656 654 'listfile', 'listfile0', 'set', 'include', 'subinclude',
657 655 'rootfilesin'):
658 656 return kind, pat
659 657 return default, pattern
660 658
661 659 def _globre(pat):
662 660 r'''Convert an extended glob string to a regexp string.
663 661
664 662 >>> print _globre(r'?')
665 663 .
666 664 >>> print _globre(r'*')
667 665 [^/]*
668 666 >>> print _globre(r'**')
669 667 .*
670 668 >>> print _globre(r'**/a')
671 669 (?:.*/)?a
672 670 >>> print _globre(r'a/**/b')
673 671 a\/(?:.*/)?b
674 672 >>> print _globre(r'[a*?!^][^b][!c]')
675 673 [a*?!^][\^b][^c]
676 674 >>> print _globre(r'{a,b}')
677 675 (?:a|b)
678 676 >>> print _globre(r'.\*\?')
679 677 \.\*\?
680 678 '''
681 679 i, n = 0, len(pat)
682 680 res = ''
683 681 group = 0
684 682 escape = util.re.escape
685 683 def peek():
686 684 return i < n and pat[i:i + 1]
687 685 while i < n:
688 686 c = pat[i:i + 1]
689 687 i += 1
690 688 if c not in '*?[{},\\':
691 689 res += escape(c)
692 690 elif c == '*':
693 691 if peek() == '*':
694 692 i += 1
695 693 if peek() == '/':
696 694 i += 1
697 695 res += '(?:.*/)?'
698 696 else:
699 697 res += '.*'
700 698 else:
701 699 res += '[^/]*'
702 700 elif c == '?':
703 701 res += '.'
704 702 elif c == '[':
705 703 j = i
706 704 if j < n and pat[j:j + 1] in '!]':
707 705 j += 1
708 706 while j < n and pat[j:j + 1] != ']':
709 707 j += 1
710 708 if j >= n:
711 709 res += '\\['
712 710 else:
713 711 stuff = pat[i:j].replace('\\','\\\\')
714 712 i = j + 1
715 713 if stuff[0:1] == '!':
716 714 stuff = '^' + stuff[1:]
717 715 elif stuff[0:1] == '^':
718 716 stuff = '\\' + stuff
719 717 res = '%s[%s]' % (res, stuff)
720 718 elif c == '{':
721 719 group += 1
722 720 res += '(?:'
723 721 elif c == '}' and group:
724 722 res += ')'
725 723 group -= 1
726 724 elif c == ',' and group:
727 725 res += '|'
728 726 elif c == '\\':
729 727 p = peek()
730 728 if p:
731 729 i += 1
732 730 res += escape(p)
733 731 else:
734 732 res += escape(c)
735 733 else:
736 734 res += escape(c)
737 735 return res
738 736
739 737 def _regex(kind, pat, globsuffix):
740 738 '''Convert a (normalized) pattern of any kind into a regular expression.
741 739 globsuffix is appended to the regexp of globs.'''
742 740 if not pat:
743 741 return ''
744 742 if kind == 're':
745 743 return pat
746 744 if kind == 'path':
747 745 if pat == '.':
748 746 return ''
749 747 return '^' + util.re.escape(pat) + '(?:/|$)'
750 748 if kind == 'rootfilesin':
751 749 if pat == '.':
752 750 escaped = ''
753 751 else:
754 752 # Pattern is a directory name.
755 753 escaped = util.re.escape(pat) + '/'
756 754 # Anything after the pattern must be a non-directory.
757 755 return '^' + escaped + '[^/]+$'
758 756 if kind == 'relglob':
759 757 return '(?:|.*/)' + _globre(pat) + globsuffix
760 758 if kind == 'relpath':
761 759 return util.re.escape(pat) + '(?:/|$)'
762 760 if kind == 'relre':
763 761 if pat.startswith('^'):
764 762 return pat
765 763 return '.*' + pat
766 764 return _globre(pat) + globsuffix
767 765
768 766 def _buildmatch(ctx, kindpats, globsuffix, listsubrepos, root):
769 767 '''Return regexp string and a matcher function for kindpats.
770 768 globsuffix is appended to the regexp of globs.'''
771 769 matchfuncs = []
772 770
773 771 subincludes, kindpats = _expandsubinclude(kindpats, root)
774 772 if subincludes:
775 773 submatchers = {}
776 774 def matchsubinclude(f):
777 775 for prefix, matcherargs in subincludes:
778 776 if f.startswith(prefix):
779 777 mf = submatchers.get(prefix)
780 778 if mf is None:
781 779 mf = match(*matcherargs)
782 780 submatchers[prefix] = mf
783 781
784 782 if mf(f[len(prefix):]):
785 783 return True
786 784 return False
787 785 matchfuncs.append(matchsubinclude)
788 786
789 787 fset, kindpats = _expandsets(kindpats, ctx, listsubrepos)
790 788 if fset:
791 789 matchfuncs.append(fset.__contains__)
792 790
793 791 regex = ''
794 792 if kindpats:
795 793 regex, mf = _buildregexmatch(kindpats, globsuffix)
796 794 matchfuncs.append(mf)
797 795
798 796 if len(matchfuncs) == 1:
799 797 return regex, matchfuncs[0]
800 798 else:
801 799 return regex, lambda f: any(mf(f) for mf in matchfuncs)
802 800
803 801 def _buildregexmatch(kindpats, globsuffix):
804 802 """Build a match function from a list of kinds and kindpats,
805 803 return regexp string and a matcher function."""
806 804 try:
807 805 regex = '(?:%s)' % '|'.join([_regex(k, p, globsuffix)
808 806 for (k, p, s) in kindpats])
809 807 if len(regex) > 20000:
810 808 raise OverflowError
811 809 return regex, _rematcher(regex)
812 810 except OverflowError:
813 811 # We're using a Python with a tiny regex engine and we
814 812 # made it explode, so we'll divide the pattern list in two
815 813 # until it works
816 814 l = len(kindpats)
817 815 if l < 2:
818 816 raise
819 817 regexa, a = _buildregexmatch(kindpats[:l//2], globsuffix)
820 818 regexb, b = _buildregexmatch(kindpats[l//2:], globsuffix)
821 819 return regex, lambda s: a(s) or b(s)
822 820 except re.error:
823 821 for k, p, s in kindpats:
824 822 try:
825 823 _rematcher('(?:%s)' % _regex(k, p, globsuffix))
826 824 except re.error:
827 825 if s:
828 826 raise error.Abort(_("%s: invalid pattern (%s): %s") %
829 827 (s, k, p))
830 828 else:
831 829 raise error.Abort(_("invalid pattern (%s): %s") % (k, p))
832 830 raise error.Abort(_("invalid pattern"))
833 831
834 832 def _patternrootsanddirs(kindpats):
835 833 '''Returns roots and directories corresponding to each pattern.
836 834
837 835 This calculates the roots and directories exactly matching the patterns and
838 836 returns a tuple of (roots, dirs) for each. It does not return other
839 837 directories which may also need to be considered, like the parent
840 838 directories.
841 839 '''
842 840 r = []
843 841 d = []
844 842 for kind, pat, source in kindpats:
845 843 if kind == 'glob': # find the non-glob prefix
846 844 root = []
847 845 for p in pat.split('/'):
848 846 if '[' in p or '{' in p or '*' in p or '?' in p:
849 847 break
850 848 root.append(p)
851 849 r.append('/'.join(root) or '.')
852 850 elif kind in ('relpath', 'path'):
853 851 r.append(pat or '.')
854 852 elif kind in ('rootfilesin',):
855 853 d.append(pat or '.')
856 854 else: # relglob, re, relre
857 855 r.append('.')
858 856 return r, d
859 857
860 858 def _roots(kindpats):
861 859 '''Returns root directories to match recursively from the given patterns.'''
862 860 roots, dirs = _patternrootsanddirs(kindpats)
863 861 return roots
864 862
865 863 def _rootsanddirs(kindpats):
866 864 '''Returns roots and exact directories from patterns.
867 865
868 866 roots are directories to match recursively, whereas exact directories should
869 867 be matched non-recursively. The returned (roots, dirs) tuple will also
870 868 include directories that need to be implicitly considered as either, such as
871 869 parent directories.
872 870
873 871 >>> _rootsanddirs(\
874 872 [('glob', 'g/h/*', ''), ('glob', 'g/h', ''), ('glob', 'g*', '')])
875 873 (['g/h', 'g/h', '.'], ['g', '.'])
876 874 >>> _rootsanddirs(\
877 875 [('rootfilesin', 'g/h', ''), ('rootfilesin', '', '')])
878 876 ([], ['g/h', '.', 'g', '.'])
879 877 >>> _rootsanddirs(\
880 878 [('relpath', 'r', ''), ('path', 'p/p', ''), ('path', '', '')])
881 879 (['r', 'p/p', '.'], ['p', '.'])
882 880 >>> _rootsanddirs(\
883 881 [('relglob', 'rg*', ''), ('re', 're/', ''), ('relre', 'rr', '')])
884 882 (['.', '.', '.'], ['.'])
885 883 '''
886 884 r, d = _patternrootsanddirs(kindpats)
887 885
888 886 # Append the parents as non-recursive/exact directories, since they must be
889 887 # scanned to get to either the roots or the other exact directories.
890 888 d.extend(util.dirs(d))
891 889 d.extend(util.dirs(r))
892 890 # util.dirs() does not include the root directory, so add it manually
893 891 d.append('.')
894 892
895 893 return r, d
896 894
897 895 def _explicitfiles(kindpats):
898 896 '''Returns the potential explicit filenames from the patterns.
899 897
900 898 >>> _explicitfiles([('path', 'foo/bar', '')])
901 899 ['foo/bar']
902 900 >>> _explicitfiles([('rootfilesin', 'foo/bar', '')])
903 901 []
904 902 '''
905 903 # Keep only the pattern kinds where one can specify filenames (vs only
906 904 # directory names).
907 905 filable = [kp for kp in kindpats if kp[0] not in ('rootfilesin',)]
908 906 return _roots(filable)
909 907
910 908 def _anypats(kindpats):
911 909 for kind, pat, source in kindpats:
912 910 if kind in ('glob', 're', 'relglob', 'relre', 'set', 'rootfilesin'):
913 911 return True
914 912
915 913 _commentre = None
916 914
917 915 def readpatternfile(filepath, warn, sourceinfo=False):
918 916 '''parse a pattern file, returning a list of
919 917 patterns. These patterns should be given to compile()
920 918 to be validated and converted into a match function.
921 919
922 920 trailing white space is dropped.
923 921 the escape character is backslash.
924 922 comments start with #.
925 923 empty lines are skipped.
926 924
927 925 lines can be of the following formats:
928 926
929 927 syntax: regexp # defaults following lines to non-rooted regexps
930 928 syntax: glob # defaults following lines to non-rooted globs
931 929 re:pattern # non-rooted regular expression
932 930 glob:pattern # non-rooted glob
933 931 pattern # pattern of the current default type
934 932
935 933 if sourceinfo is set, returns a list of tuples:
936 934 (pattern, lineno, originalline). This is useful to debug ignore patterns.
937 935 '''
938 936
939 937 syntaxes = {'re': 'relre:', 'regexp': 'relre:', 'glob': 'relglob:',
940 938 'include': 'include', 'subinclude': 'subinclude'}
941 939 syntax = 'relre:'
942 940 patterns = []
943 941
944 942 fp = open(filepath, 'rb')
945 943 for lineno, line in enumerate(util.iterfile(fp), start=1):
946 944 if "#" in line:
947 945 global _commentre
948 946 if not _commentre:
949 947 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
950 948 # remove comments prefixed by an even number of escapes
951 949 m = _commentre.search(line)
952 950 if m:
953 951 line = line[:m.end(1)]
954 952 # fixup properly escaped comments that survived the above
955 953 line = line.replace("\\#", "#")
956 954 line = line.rstrip()
957 955 if not line:
958 956 continue
959 957
960 958 if line.startswith('syntax:'):
961 959 s = line[7:].strip()
962 960 try:
963 961 syntax = syntaxes[s]
964 962 except KeyError:
965 963 if warn:
966 964 warn(_("%s: ignoring invalid syntax '%s'\n") %
967 965 (filepath, s))
968 966 continue
969 967
970 968 linesyntax = syntax
971 969 for s, rels in syntaxes.iteritems():
972 970 if line.startswith(rels):
973 971 linesyntax = rels
974 972 line = line[len(rels):]
975 973 break
976 974 elif line.startswith(s+':'):
977 975 linesyntax = rels
978 976 line = line[len(s) + 1:]
979 977 break
980 978 if sourceinfo:
981 979 patterns.append((linesyntax + line, lineno, line))
982 980 else:
983 981 patterns.append(linesyntax + line)
984 982 fp.close()
985 983 return patterns
General Comments 0
You need to be logged in to leave comments. Login now