##// END OF EJS Templates
match: simplify nevermatcher...
Martin von Zweigbergk -
r32650:783394c0 default
parent child Browse files
Show More
@@ -1,996 +1,985
1 1 # match.py - filename matching
2 2 #
3 3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import copy
11 11 import os
12 12 import re
13 13
14 14 from .i18n import _
15 15 from . import (
16 16 error,
17 17 pathutil,
18 18 util,
19 19 )
20 20
21 21 propertycache = util.propertycache
22 22
23 23 def _rematcher(regex):
24 24 '''compile the regexp with the best available regexp engine and return a
25 25 matcher function'''
26 26 m = util.re.compile(regex)
27 27 try:
28 28 # slightly faster, provided by facebook's re2 bindings
29 29 return m.test_match
30 30 except AttributeError:
31 31 return m.match
32 32
33 33 def _expandsets(kindpats, ctx, listsubrepos):
34 34 '''Returns the kindpats list with the 'set' patterns expanded.'''
35 35 fset = set()
36 36 other = []
37 37
38 38 for kind, pat, source in kindpats:
39 39 if kind == 'set':
40 40 if not ctx:
41 41 raise error.ProgrammingError("fileset expression with no "
42 42 "context")
43 43 s = ctx.getfileset(pat)
44 44 fset.update(s)
45 45
46 46 if listsubrepos:
47 47 for subpath in ctx.substate:
48 48 s = ctx.sub(subpath).getfileset(pat)
49 49 fset.update(subpath + '/' + f for f in s)
50 50
51 51 continue
52 52 other.append((kind, pat, source))
53 53 return fset, other
54 54
55 55 def _expandsubinclude(kindpats, root):
56 56 '''Returns the list of subinclude matcher args and the kindpats without the
57 57 subincludes in it.'''
58 58 relmatchers = []
59 59 other = []
60 60
61 61 for kind, pat, source in kindpats:
62 62 if kind == 'subinclude':
63 63 sourceroot = pathutil.dirname(util.normpath(source))
64 64 pat = util.pconvert(pat)
65 65 path = pathutil.join(sourceroot, pat)
66 66
67 67 newroot = pathutil.dirname(path)
68 68 matcherargs = (newroot, '', [], ['include:%s' % path])
69 69
70 70 prefix = pathutil.canonpath(root, root, newroot)
71 71 if prefix:
72 72 prefix += '/'
73 73 relmatchers.append((prefix, matcherargs))
74 74 else:
75 75 other.append((kind, pat, source))
76 76
77 77 return relmatchers, other
78 78
79 79 def _kindpatsalwaysmatch(kindpats):
80 80 """"Checks whether the kindspats match everything, as e.g.
81 81 'relpath:.' does.
82 82 """
83 83 for kind, pat, source in kindpats:
84 84 if pat != '' or kind not in ['relpath', 'glob']:
85 85 return False
86 86 return True
87 87
88 88 def match(root, cwd, patterns, include=None, exclude=None, default='glob',
89 89 exact=False, auditor=None, ctx=None, listsubrepos=False, warn=None,
90 90 badfn=None, icasefs=False):
91 91 """build an object to match a set of file patterns
92 92
93 93 arguments:
94 94 root - the canonical root of the tree you're matching against
95 95 cwd - the current working directory, if relevant
96 96 patterns - patterns to find
97 97 include - patterns to include (unless they are excluded)
98 98 exclude - patterns to exclude (even if they are included)
99 99 default - if a pattern in patterns has no explicit type, assume this one
100 100 exact - patterns are actually filenames (include/exclude still apply)
101 101 warn - optional function used for printing warnings
102 102 badfn - optional bad() callback for this matcher instead of the default
103 103 icasefs - make a matcher for wdir on case insensitive filesystems, which
104 104 normalizes the given patterns to the case in the filesystem
105 105
106 106 a pattern is one of:
107 107 'glob:<glob>' - a glob relative to cwd
108 108 're:<regexp>' - a regular expression
109 109 'path:<path>' - a path relative to repository root, which is matched
110 110 recursively
111 111 'rootfilesin:<path>' - a path relative to repository root, which is
112 112 matched non-recursively (will not match subdirectories)
113 113 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
114 114 'relpath:<path>' - a path relative to cwd
115 115 'relre:<regexp>' - a regexp that needn't match the start of a name
116 116 'set:<fileset>' - a fileset expression
117 117 'include:<path>' - a file of patterns to read and include
118 118 'subinclude:<path>' - a file of patterns to match against files under
119 119 the same directory
120 120 '<something>' - a pattern of the specified default type
121 121 """
122 122 normalize = _donormalize
123 123 if icasefs:
124 124 if exact:
125 125 raise error.ProgrammingError("a case-insensitive exact matcher "
126 126 "doesn't make sense")
127 127 dirstate = ctx.repo().dirstate
128 128 dsnormalize = dirstate.normalize
129 129
130 130 def normalize(patterns, default, root, cwd, auditor, warn):
131 131 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
132 132 kindpats = []
133 133 for kind, pats, source in kp:
134 134 if kind not in ('re', 'relre'): # regex can't be normalized
135 135 p = pats
136 136 pats = dsnormalize(pats)
137 137
138 138 # Preserve the original to handle a case only rename.
139 139 if p != pats and p in dirstate:
140 140 kindpats.append((kind, p, source))
141 141
142 142 kindpats.append((kind, pats, source))
143 143 return kindpats
144 144
145 145 if exact:
146 146 m = exactmatcher(root, cwd, patterns, badfn)
147 147 elif patterns:
148 148 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
149 149 if _kindpatsalwaysmatch(kindpats):
150 150 m = alwaysmatcher(root, cwd, badfn, relativeuipath=True)
151 151 else:
152 152 m = patternmatcher(root, cwd, kindpats, ctx=ctx,
153 153 listsubrepos=listsubrepos, badfn=badfn)
154 154 else:
155 155 # It's a little strange that no patterns means to match everything.
156 # Consider changing this to match nothing (probably adding a
157 # "nevermatcher").
156 # Consider changing this to match nothing (probably using nevermatcher).
158 157 m = alwaysmatcher(root, cwd, badfn)
159 158
160 159 if include:
161 160 kindpats = normalize(include, 'glob', root, cwd, auditor, warn)
162 161 im = includematcher(root, cwd, kindpats, ctx=ctx,
163 162 listsubrepos=listsubrepos, badfn=None)
164 163 m = intersectmatchers(m, im)
165 164 if exclude:
166 165 kindpats = normalize(exclude, 'glob', root, cwd, auditor, warn)
167 166 em = includematcher(root, cwd, kindpats, ctx=ctx,
168 167 listsubrepos=listsubrepos, badfn=None)
169 168 m = differencematcher(m, em)
170 169 return m
171 170
172 171 def exact(root, cwd, files, badfn=None):
173 172 return exactmatcher(root, cwd, files, badfn=badfn)
174 173
175 174 def always(root, cwd):
176 175 return alwaysmatcher(root, cwd)
177 176
178 177 def never(root, cwd):
179 178 return nevermatcher(root, cwd)
180 179
181 180 def badmatch(match, badfn):
182 181 """Make a copy of the given matcher, replacing its bad method with the given
183 182 one.
184 183 """
185 184 m = copy.copy(match)
186 185 m.bad = badfn
187 186 return m
188 187
189 188 def _donormalize(patterns, default, root, cwd, auditor, warn):
190 189 '''Convert 'kind:pat' from the patterns list to tuples with kind and
191 190 normalized and rooted patterns and with listfiles expanded.'''
192 191 kindpats = []
193 192 for kind, pat in [_patsplit(p, default) for p in patterns]:
194 193 if kind in ('glob', 'relpath'):
195 194 pat = pathutil.canonpath(root, cwd, pat, auditor)
196 195 elif kind in ('relglob', 'path', 'rootfilesin'):
197 196 pat = util.normpath(pat)
198 197 elif kind in ('listfile', 'listfile0'):
199 198 try:
200 199 files = util.readfile(pat)
201 200 if kind == 'listfile0':
202 201 files = files.split('\0')
203 202 else:
204 203 files = files.splitlines()
205 204 files = [f for f in files if f]
206 205 except EnvironmentError:
207 206 raise error.Abort(_("unable to read file list (%s)") % pat)
208 207 for k, p, source in _donormalize(files, default, root, cwd,
209 208 auditor, warn):
210 209 kindpats.append((k, p, pat))
211 210 continue
212 211 elif kind == 'include':
213 212 try:
214 213 fullpath = os.path.join(root, util.localpath(pat))
215 214 includepats = readpatternfile(fullpath, warn)
216 215 for k, p, source in _donormalize(includepats, default,
217 216 root, cwd, auditor, warn):
218 217 kindpats.append((k, p, source or pat))
219 218 except error.Abort as inst:
220 219 raise error.Abort('%s: %s' % (pat, inst[0]))
221 220 except IOError as inst:
222 221 if warn:
223 222 warn(_("skipping unreadable pattern file '%s': %s\n") %
224 223 (pat, inst.strerror))
225 224 continue
226 225 # else: re or relre - which cannot be normalized
227 226 kindpats.append((kind, pat, ''))
228 227 return kindpats
229 228
230 229 class basematcher(object):
231 230
232 231 def __init__(self, root, cwd, badfn=None, relativeuipath=True):
233 232 self._root = root
234 233 self._cwd = cwd
235 234 if badfn is not None:
236 235 self.bad = badfn
237 236 self._relativeuipath = relativeuipath
238 237
239 238 def __call__(self, fn):
240 239 return self.matchfn(fn)
241 240 def __iter__(self):
242 241 for f in self._files:
243 242 yield f
244 243 # Callbacks related to how the matcher is used by dirstate.walk.
245 244 # Subscribers to these events must monkeypatch the matcher object.
246 245 def bad(self, f, msg):
247 246 '''Callback from dirstate.walk for each explicit file that can't be
248 247 found/accessed, with an error message.'''
249 248 pass
250 249
251 250 # If an explicitdir is set, it will be called when an explicitly listed
252 251 # directory is visited.
253 252 explicitdir = None
254 253
255 254 # If an traversedir is set, it will be called when a directory discovered
256 255 # by recursive traversal is visited.
257 256 traversedir = None
258 257
259 258 def abs(self, f):
260 259 '''Convert a repo path back to path that is relative to the root of the
261 260 matcher.'''
262 261 return f
263 262
264 263 def rel(self, f):
265 264 '''Convert repo path back to path that is relative to cwd of matcher.'''
266 265 return util.pathto(self._root, self._cwd, f)
267 266
268 267 def uipath(self, f):
269 268 '''Convert repo path to a display path. If patterns or -I/-X were used
270 269 to create this matcher, the display path will be relative to cwd.
271 270 Otherwise it is relative to the root of the repo.'''
272 271 return (self._relativeuipath and self.rel(f)) or self.abs(f)
273 272
274 273 @propertycache
275 274 def _files(self):
276 275 return []
277 276
278 277 def files(self):
279 278 '''Explicitly listed files or patterns or roots:
280 279 if no patterns or .always(): empty list,
281 280 if exact: list exact files,
282 281 if not .anypats(): list all files and dirs,
283 282 else: optimal roots'''
284 283 return self._files
285 284
286 285 @propertycache
287 286 def _fileset(self):
288 287 return set(self._files)
289 288
290 289 def exact(self, f):
291 290 '''Returns True if f is in .files().'''
292 291 return f in self._fileset
293 292
294 293 def matchfn(self, f):
295 294 return False
296 295
297 296 def visitdir(self, dir):
298 297 '''Decides whether a directory should be visited based on whether it
299 298 has potential matches in it or one of its subdirectories. This is
300 299 based on the match's primary, included, and excluded patterns.
301 300
302 301 Returns the string 'all' if the given directory and all subdirectories
303 302 should be visited. Otherwise returns True or False indicating whether
304 303 the given directory should be visited.
305 304
306 305 This function's behavior is undefined if it has returned False for
307 306 one of the dir's parent directories.
308 307 '''
309 308 return False
310 309
311 310 def anypats(self):
312 311 '''Matcher uses patterns or include/exclude.'''
313 312 return False
314 313
315 314 def always(self):
316 315 '''Matcher will match everything and .files() will be empty
317 316 - optimization might be possible and necessary.'''
318 317 return False
319 318
320 319 def isexact(self):
321 320 return False
322 321
323 322 def prefix(self):
324 323 return not self.always() and not self.isexact() and not self.anypats()
325 324
326 325 class alwaysmatcher(basematcher):
327 326 '''Matches everything.'''
328 327
329 328 def __init__(self, root, cwd, badfn=None, relativeuipath=False):
330 329 super(alwaysmatcher, self).__init__(root, cwd, badfn,
331 330 relativeuipath=relativeuipath)
332 331
333 332 def always(self):
334 333 return True
335 334
336 335 def matchfn(self, f):
337 336 return True
338 337
339 338 def visitdir(self, dir):
340 339 return 'all'
341 340
342 341 def __repr__(self):
343 342 return '<alwaysmatcher>'
344 343
345 344 class nevermatcher(basematcher):
346 345 '''Matches nothing.'''
347 346
348 def __init__(self, root, cwd, badfn=None, relativeuipath=False):
349 super(nevermatcher, self).__init__(root, cwd, badfn,
350 relativeuipath=relativeuipath)
351
352 def always(self):
353 return False
354
355 def matchfn(self, f):
356 return False
357
358 def visitdir(self, dir):
359 return False
347 def __init__(self, root, cwd, badfn=None):
348 super(nevermatcher, self).__init__(root, cwd, badfn)
360 349
361 350 def __repr__(self):
362 351 return '<nevermatcher>'
363 352
364 353 class patternmatcher(basematcher):
365 354
366 355 def __init__(self, root, cwd, kindpats, ctx=None, listsubrepos=False,
367 356 badfn=None):
368 357 super(patternmatcher, self).__init__(root, cwd, badfn)
369 358
370 359 self._files = _explicitfiles(kindpats)
371 360 self._anypats = _anypats(kindpats)
372 361 self.patternspat, pm = _buildmatch(ctx, kindpats, '$', listsubrepos,
373 362 root)
374 363 self.matchfn = pm
375 364
376 365 @propertycache
377 366 def _dirs(self):
378 367 return set(util.dirs(self._fileset)) | {'.'}
379 368
380 369 def visitdir(self, dir):
381 370 if self.prefix() and dir in self._fileset:
382 371 return 'all'
383 372 return ('.' in self._fileset or
384 373 dir in self._fileset or
385 374 dir in self._dirs or
386 375 any(parentdir in self._fileset
387 376 for parentdir in util.finddirs(dir)))
388 377
389 378 def anypats(self):
390 379 return self._anypats
391 380
392 381 def __repr__(self):
393 382 return ('<patternmatcher patterns=%r>' % self.patternspat)
394 383
395 384 class includematcher(basematcher):
396 385
397 386 def __init__(self, root, cwd, kindpats, ctx=None, listsubrepos=False,
398 387 badfn=None):
399 388 super(includematcher, self).__init__(root, cwd, badfn)
400 389
401 390 self.includepat, im = _buildmatch(ctx, kindpats, '(?:/|$)',
402 391 listsubrepos, root)
403 392 self._anypats = _anypats(kindpats)
404 393 roots, dirs = _rootsanddirs(kindpats)
405 394 # roots are directories which are recursively included.
406 395 self._roots = set(roots)
407 396 # dirs are directories which are non-recursively included.
408 397 self._dirs = set(dirs)
409 398 self.matchfn = im
410 399
411 400 def visitdir(self, dir):
412 401 if not self._anypats and dir in self._roots:
413 402 # The condition above is essentially self.prefix() for includes
414 403 return 'all'
415 404 return ('.' in self._roots or
416 405 dir in self._roots or
417 406 dir in self._dirs or
418 407 any(parentdir in self._roots
419 408 for parentdir in util.finddirs(dir)))
420 409
421 410 def anypats(self):
422 411 return True
423 412
424 413 def __repr__(self):
425 414 return ('<includematcher includes=%r>' % self.includepat)
426 415
427 416 class exactmatcher(basematcher):
428 417 '''Matches the input files exactly. They are interpreted as paths, not
429 418 patterns (so no kind-prefixes).
430 419 '''
431 420
432 421 def __init__(self, root, cwd, files, badfn=None):
433 422 super(exactmatcher, self).__init__(root, cwd, badfn)
434 423
435 424 if isinstance(files, list):
436 425 self._files = files
437 426 else:
438 427 self._files = list(files)
439 428
440 429 matchfn = basematcher.exact
441 430
442 431 @propertycache
443 432 def _dirs(self):
444 433 return set(util.dirs(self._fileset)) | {'.'}
445 434
446 435 def visitdir(self, dir):
447 436 return dir in self._dirs
448 437
449 438 def isexact(self):
450 439 return True
451 440
452 441 def __repr__(self):
453 442 return ('<exactmatcher files=%r>' % self._files)
454 443
455 444 class differencematcher(basematcher):
456 445 '''Composes two matchers by matching if the first matches and the second
457 446 does not. Well, almost... If the user provides a pattern like "-X foo foo",
458 447 Mercurial actually does match "foo" against that. That's because exact
459 448 matches are treated specially. So, since this differencematcher is used for
460 449 excludes, it needs to special-case exact matching.
461 450
462 451 The second matcher's non-matching-attributes (root, cwd, bad, explicitdir,
463 452 traversedir) are ignored.
464 453
465 454 TODO: If we want to keep the behavior described above for exact matches, we
466 455 should consider instead treating the above case something like this:
467 456 union(exact(foo), difference(pattern(foo), include(foo)))
468 457 '''
469 458 def __init__(self, m1, m2):
470 459 super(differencematcher, self).__init__(m1._root, m1._cwd)
471 460 self._m1 = m1
472 461 self._m2 = m2
473 462 self.bad = m1.bad
474 463 self.explicitdir = m1.explicitdir
475 464 self.traversedir = m1.traversedir
476 465
477 466 def matchfn(self, f):
478 467 return self._m1(f) and (not self._m2(f) or self._m1.exact(f))
479 468
480 469 @propertycache
481 470 def _files(self):
482 471 if self.isexact():
483 472 return [f for f in self._m1.files() if self(f)]
484 473 # If m1 is not an exact matcher, we can't easily figure out the set of
485 474 # files, because its files() are not always files. For example, if
486 475 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
487 476 # want to remove "dir" from the set even though it would match m2,
488 477 # because the "dir" in m1 may not be a file.
489 478 return self._m1.files()
490 479
491 480 def visitdir(self, dir):
492 481 if self._m2.visitdir(dir) == 'all':
493 482 # There's a bug here: If m1 matches file 'dir/file' and m2 excludes
494 483 # 'dir' (recursively), we should still visit 'dir' due to the
495 484 # exception we have for exact matches.
496 485 return False
497 486 return bool(self._m1.visitdir(dir))
498 487
499 488 def isexact(self):
500 489 return self._m1.isexact()
501 490
502 491 def anypats(self):
503 492 return self._m1.anypats() or self._m2.anypats()
504 493
505 494 def __repr__(self):
506 495 return ('<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2))
507 496
508 497 def intersectmatchers(m1, m2):
509 498 '''Composes two matchers by matching if both of them match.
510 499
511 500 The second matcher's non-matching-attributes (root, cwd, bad, explicitdir,
512 501 traversedir) are ignored.
513 502 '''
514 503 if m1 is None or m2 is None:
515 504 return m1 or m2
516 505 if m1.always():
517 506 m = copy.copy(m2)
518 507 # TODO: Consider encapsulating these things in a class so there's only
519 508 # one thing to copy from m1.
520 509 m.bad = m1.bad
521 510 m.explicitdir = m1.explicitdir
522 511 m.traversedir = m1.traversedir
523 512 m.abs = m1.abs
524 513 m.rel = m1.rel
525 514 m._relativeuipath |= m1._relativeuipath
526 515 return m
527 516 if m2.always():
528 517 m = copy.copy(m1)
529 518 m._relativeuipath |= m2._relativeuipath
530 519 return m
531 520 return intersectionmatcher(m1, m2)
532 521
533 522 class intersectionmatcher(basematcher):
534 523 def __init__(self, m1, m2):
535 524 super(intersectionmatcher, self).__init__(m1._root, m1._cwd)
536 525 self._m1 = m1
537 526 self._m2 = m2
538 527 self.bad = m1.bad
539 528 self.explicitdir = m1.explicitdir
540 529 self.traversedir = m1.traversedir
541 530
542 531 @propertycache
543 532 def _files(self):
544 533 if self.isexact():
545 534 m1, m2 = self._m1, self._m2
546 535 if not m1.isexact():
547 536 m1, m2 = m2, m1
548 537 return [f for f in m1.files() if m2(f)]
549 538 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
550 539 # the set of files, because their files() are not always files. For
551 540 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
552 541 # "path:dir2", we don't want to remove "dir2" from the set.
553 542 return self._m1.files() + self._m2.files()
554 543
555 544 def matchfn(self, f):
556 545 return self._m1(f) and self._m2(f)
557 546
558 547 def visitdir(self, dir):
559 548 visit1 = self._m1.visitdir(dir)
560 549 if visit1 == 'all':
561 550 return self._m2.visitdir(dir)
562 551 # bool() because visit1=True + visit2='all' should not be 'all'
563 552 return bool(visit1 and self._m2.visitdir(dir))
564 553
565 554 def always(self):
566 555 return self._m1.always() and self._m2.always()
567 556
568 557 def isexact(self):
569 558 return self._m1.isexact() or self._m2.isexact()
570 559
571 560 def anypats(self):
572 561 return self._m1.anypats() or self._m2.anypats()
573 562
574 563 def __repr__(self):
575 564 return ('<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2))
576 565
577 566 class subdirmatcher(basematcher):
578 567 """Adapt a matcher to work on a subdirectory only.
579 568
580 569 The paths are remapped to remove/insert the path as needed:
581 570
582 571 >>> m1 = match('root', '', ['a.txt', 'sub/b.txt'])
583 572 >>> m2 = subdirmatcher('sub', m1)
584 573 >>> bool(m2('a.txt'))
585 574 False
586 575 >>> bool(m2('b.txt'))
587 576 True
588 577 >>> bool(m2.matchfn('a.txt'))
589 578 False
590 579 >>> bool(m2.matchfn('b.txt'))
591 580 True
592 581 >>> m2.files()
593 582 ['b.txt']
594 583 >>> m2.exact('b.txt')
595 584 True
596 585 >>> util.pconvert(m2.rel('b.txt'))
597 586 'sub/b.txt'
598 587 >>> def bad(f, msg):
599 588 ... print "%s: %s" % (f, msg)
600 589 >>> m1.bad = bad
601 590 >>> m2.bad('x.txt', 'No such file')
602 591 sub/x.txt: No such file
603 592 >>> m2.abs('c.txt')
604 593 'sub/c.txt'
605 594 """
606 595
607 596 def __init__(self, path, matcher):
608 597 super(subdirmatcher, self).__init__(matcher._root, matcher._cwd)
609 598 self._path = path
610 599 self._matcher = matcher
611 600 self._always = matcher.always()
612 601
613 602 self._files = [f[len(path) + 1:] for f in matcher._files
614 603 if f.startswith(path + "/")]
615 604
616 605 # If the parent repo had a path to this subrepo and the matcher is
617 606 # a prefix matcher, this submatcher always matches.
618 607 if matcher.prefix():
619 608 self._always = any(f == path for f in matcher._files)
620 609
621 610 def bad(self, f, msg):
622 611 self._matcher.bad(self._path + "/" + f, msg)
623 612
624 613 def abs(self, f):
625 614 return self._matcher.abs(self._path + "/" + f)
626 615
627 616 def rel(self, f):
628 617 return self._matcher.rel(self._path + "/" + f)
629 618
630 619 def uipath(self, f):
631 620 return self._matcher.uipath(self._path + "/" + f)
632 621
633 622 def matchfn(self, f):
634 623 # Some information is lost in the superclass's constructor, so we
635 624 # can not accurately create the matching function for the subdirectory
636 625 # from the inputs. Instead, we override matchfn() and visitdir() to
637 626 # call the original matcher with the subdirectory path prepended.
638 627 return self._matcher.matchfn(self._path + "/" + f)
639 628
640 629 def visitdir(self, dir):
641 630 if dir == '.':
642 631 dir = self._path
643 632 else:
644 633 dir = self._path + "/" + dir
645 634 return self._matcher.visitdir(dir)
646 635
647 636 def always(self):
648 637 return self._always
649 638
650 639 def anypats(self):
651 640 return self._matcher.anypats()
652 641
653 642 def __repr__(self):
654 643 return ('<subdirmatcher path=%r, matcher=%r>' %
655 644 (self._path, self._matcher))
656 645
657 646 def patkind(pattern, default=None):
658 647 '''If pattern is 'kind:pat' with a known kind, return kind.'''
659 648 return _patsplit(pattern, default)[0]
660 649
661 650 def _patsplit(pattern, default):
662 651 """Split a string into the optional pattern kind prefix and the actual
663 652 pattern."""
664 653 if ':' in pattern:
665 654 kind, pat = pattern.split(':', 1)
666 655 if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
667 656 'listfile', 'listfile0', 'set', 'include', 'subinclude',
668 657 'rootfilesin'):
669 658 return kind, pat
670 659 return default, pattern
671 660
672 661 def _globre(pat):
673 662 r'''Convert an extended glob string to a regexp string.
674 663
675 664 >>> print _globre(r'?')
676 665 .
677 666 >>> print _globre(r'*')
678 667 [^/]*
679 668 >>> print _globre(r'**')
680 669 .*
681 670 >>> print _globre(r'**/a')
682 671 (?:.*/)?a
683 672 >>> print _globre(r'a/**/b')
684 673 a\/(?:.*/)?b
685 674 >>> print _globre(r'[a*?!^][^b][!c]')
686 675 [a*?!^][\^b][^c]
687 676 >>> print _globre(r'{a,b}')
688 677 (?:a|b)
689 678 >>> print _globre(r'.\*\?')
690 679 \.\*\?
691 680 '''
692 681 i, n = 0, len(pat)
693 682 res = ''
694 683 group = 0
695 684 escape = util.re.escape
696 685 def peek():
697 686 return i < n and pat[i:i + 1]
698 687 while i < n:
699 688 c = pat[i:i + 1]
700 689 i += 1
701 690 if c not in '*?[{},\\':
702 691 res += escape(c)
703 692 elif c == '*':
704 693 if peek() == '*':
705 694 i += 1
706 695 if peek() == '/':
707 696 i += 1
708 697 res += '(?:.*/)?'
709 698 else:
710 699 res += '.*'
711 700 else:
712 701 res += '[^/]*'
713 702 elif c == '?':
714 703 res += '.'
715 704 elif c == '[':
716 705 j = i
717 706 if j < n and pat[j:j + 1] in '!]':
718 707 j += 1
719 708 while j < n and pat[j:j + 1] != ']':
720 709 j += 1
721 710 if j >= n:
722 711 res += '\\['
723 712 else:
724 713 stuff = pat[i:j].replace('\\','\\\\')
725 714 i = j + 1
726 715 if stuff[0:1] == '!':
727 716 stuff = '^' + stuff[1:]
728 717 elif stuff[0:1] == '^':
729 718 stuff = '\\' + stuff
730 719 res = '%s[%s]' % (res, stuff)
731 720 elif c == '{':
732 721 group += 1
733 722 res += '(?:'
734 723 elif c == '}' and group:
735 724 res += ')'
736 725 group -= 1
737 726 elif c == ',' and group:
738 727 res += '|'
739 728 elif c == '\\':
740 729 p = peek()
741 730 if p:
742 731 i += 1
743 732 res += escape(p)
744 733 else:
745 734 res += escape(c)
746 735 else:
747 736 res += escape(c)
748 737 return res
749 738
750 739 def _regex(kind, pat, globsuffix):
751 740 '''Convert a (normalized) pattern of any kind into a regular expression.
752 741 globsuffix is appended to the regexp of globs.'''
753 742 if not pat:
754 743 return ''
755 744 if kind == 're':
756 745 return pat
757 746 if kind == 'path':
758 747 if pat == '.':
759 748 return ''
760 749 return '^' + util.re.escape(pat) + '(?:/|$)'
761 750 if kind == 'rootfilesin':
762 751 if pat == '.':
763 752 escaped = ''
764 753 else:
765 754 # Pattern is a directory name.
766 755 escaped = util.re.escape(pat) + '/'
767 756 # Anything after the pattern must be a non-directory.
768 757 return '^' + escaped + '[^/]+$'
769 758 if kind == 'relglob':
770 759 return '(?:|.*/)' + _globre(pat) + globsuffix
771 760 if kind == 'relpath':
772 761 return util.re.escape(pat) + '(?:/|$)'
773 762 if kind == 'relre':
774 763 if pat.startswith('^'):
775 764 return pat
776 765 return '.*' + pat
777 766 return _globre(pat) + globsuffix
778 767
779 768 def _buildmatch(ctx, kindpats, globsuffix, listsubrepos, root):
780 769 '''Return regexp string and a matcher function for kindpats.
781 770 globsuffix is appended to the regexp of globs.'''
782 771 matchfuncs = []
783 772
784 773 subincludes, kindpats = _expandsubinclude(kindpats, root)
785 774 if subincludes:
786 775 submatchers = {}
787 776 def matchsubinclude(f):
788 777 for prefix, matcherargs in subincludes:
789 778 if f.startswith(prefix):
790 779 mf = submatchers.get(prefix)
791 780 if mf is None:
792 781 mf = match(*matcherargs)
793 782 submatchers[prefix] = mf
794 783
795 784 if mf(f[len(prefix):]):
796 785 return True
797 786 return False
798 787 matchfuncs.append(matchsubinclude)
799 788
800 789 fset, kindpats = _expandsets(kindpats, ctx, listsubrepos)
801 790 if fset:
802 791 matchfuncs.append(fset.__contains__)
803 792
804 793 regex = ''
805 794 if kindpats:
806 795 regex, mf = _buildregexmatch(kindpats, globsuffix)
807 796 matchfuncs.append(mf)
808 797
809 798 if len(matchfuncs) == 1:
810 799 return regex, matchfuncs[0]
811 800 else:
812 801 return regex, lambda f: any(mf(f) for mf in matchfuncs)
813 802
814 803 def _buildregexmatch(kindpats, globsuffix):
815 804 """Build a match function from a list of kinds and kindpats,
816 805 return regexp string and a matcher function."""
817 806 try:
818 807 regex = '(?:%s)' % '|'.join([_regex(k, p, globsuffix)
819 808 for (k, p, s) in kindpats])
820 809 if len(regex) > 20000:
821 810 raise OverflowError
822 811 return regex, _rematcher(regex)
823 812 except OverflowError:
824 813 # We're using a Python with a tiny regex engine and we
825 814 # made it explode, so we'll divide the pattern list in two
826 815 # until it works
827 816 l = len(kindpats)
828 817 if l < 2:
829 818 raise
830 819 regexa, a = _buildregexmatch(kindpats[:l//2], globsuffix)
831 820 regexb, b = _buildregexmatch(kindpats[l//2:], globsuffix)
832 821 return regex, lambda s: a(s) or b(s)
833 822 except re.error:
834 823 for k, p, s in kindpats:
835 824 try:
836 825 _rematcher('(?:%s)' % _regex(k, p, globsuffix))
837 826 except re.error:
838 827 if s:
839 828 raise error.Abort(_("%s: invalid pattern (%s): %s") %
840 829 (s, k, p))
841 830 else:
842 831 raise error.Abort(_("invalid pattern (%s): %s") % (k, p))
843 832 raise error.Abort(_("invalid pattern"))
844 833
845 834 def _patternrootsanddirs(kindpats):
846 835 '''Returns roots and directories corresponding to each pattern.
847 836
848 837 This calculates the roots and directories exactly matching the patterns and
849 838 returns a tuple of (roots, dirs) for each. It does not return other
850 839 directories which may also need to be considered, like the parent
851 840 directories.
852 841 '''
853 842 r = []
854 843 d = []
855 844 for kind, pat, source in kindpats:
856 845 if kind == 'glob': # find the non-glob prefix
857 846 root = []
858 847 for p in pat.split('/'):
859 848 if '[' in p or '{' in p or '*' in p or '?' in p:
860 849 break
861 850 root.append(p)
862 851 r.append('/'.join(root) or '.')
863 852 elif kind in ('relpath', 'path'):
864 853 r.append(pat or '.')
865 854 elif kind in ('rootfilesin',):
866 855 d.append(pat or '.')
867 856 else: # relglob, re, relre
868 857 r.append('.')
869 858 return r, d
870 859
871 860 def _roots(kindpats):
872 861 '''Returns root directories to match recursively from the given patterns.'''
873 862 roots, dirs = _patternrootsanddirs(kindpats)
874 863 return roots
875 864
876 865 def _rootsanddirs(kindpats):
877 866 '''Returns roots and exact directories from patterns.
878 867
879 868 roots are directories to match recursively, whereas exact directories should
880 869 be matched non-recursively. The returned (roots, dirs) tuple will also
881 870 include directories that need to be implicitly considered as either, such as
882 871 parent directories.
883 872
884 873 >>> _rootsanddirs(\
885 874 [('glob', 'g/h/*', ''), ('glob', 'g/h', ''), ('glob', 'g*', '')])
886 875 (['g/h', 'g/h', '.'], ['g', '.'])
887 876 >>> _rootsanddirs(\
888 877 [('rootfilesin', 'g/h', ''), ('rootfilesin', '', '')])
889 878 ([], ['g/h', '.', 'g', '.'])
890 879 >>> _rootsanddirs(\
891 880 [('relpath', 'r', ''), ('path', 'p/p', ''), ('path', '', '')])
892 881 (['r', 'p/p', '.'], ['p', '.'])
893 882 >>> _rootsanddirs(\
894 883 [('relglob', 'rg*', ''), ('re', 're/', ''), ('relre', 'rr', '')])
895 884 (['.', '.', '.'], ['.'])
896 885 '''
897 886 r, d = _patternrootsanddirs(kindpats)
898 887
899 888 # Append the parents as non-recursive/exact directories, since they must be
900 889 # scanned to get to either the roots or the other exact directories.
901 890 d.extend(util.dirs(d))
902 891 d.extend(util.dirs(r))
903 892 # util.dirs() does not include the root directory, so add it manually
904 893 d.append('.')
905 894
906 895 return r, d
907 896
908 897 def _explicitfiles(kindpats):
909 898 '''Returns the potential explicit filenames from the patterns.
910 899
911 900 >>> _explicitfiles([('path', 'foo/bar', '')])
912 901 ['foo/bar']
913 902 >>> _explicitfiles([('rootfilesin', 'foo/bar', '')])
914 903 []
915 904 '''
916 905 # Keep only the pattern kinds where one can specify filenames (vs only
917 906 # directory names).
918 907 filable = [kp for kp in kindpats if kp[0] not in ('rootfilesin',)]
919 908 return _roots(filable)
920 909
921 910 def _anypats(kindpats):
922 911 for kind, pat, source in kindpats:
923 912 if kind in ('glob', 're', 'relglob', 'relre', 'set', 'rootfilesin'):
924 913 return True
925 914
926 915 _commentre = None
927 916
928 917 def readpatternfile(filepath, warn, sourceinfo=False):
929 918 '''parse a pattern file, returning a list of
930 919 patterns. These patterns should be given to compile()
931 920 to be validated and converted into a match function.
932 921
933 922 trailing white space is dropped.
934 923 the escape character is backslash.
935 924 comments start with #.
936 925 empty lines are skipped.
937 926
938 927 lines can be of the following formats:
939 928
940 929 syntax: regexp # defaults following lines to non-rooted regexps
941 930 syntax: glob # defaults following lines to non-rooted globs
942 931 re:pattern # non-rooted regular expression
943 932 glob:pattern # non-rooted glob
944 933 pattern # pattern of the current default type
945 934
946 935 if sourceinfo is set, returns a list of tuples:
947 936 (pattern, lineno, originalline). This is useful to debug ignore patterns.
948 937 '''
949 938
950 939 syntaxes = {'re': 'relre:', 'regexp': 'relre:', 'glob': 'relglob:',
951 940 'include': 'include', 'subinclude': 'subinclude'}
952 941 syntax = 'relre:'
953 942 patterns = []
954 943
955 944 fp = open(filepath, 'rb')
956 945 for lineno, line in enumerate(util.iterfile(fp), start=1):
957 946 if "#" in line:
958 947 global _commentre
959 948 if not _commentre:
960 949 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
961 950 # remove comments prefixed by an even number of escapes
962 951 m = _commentre.search(line)
963 952 if m:
964 953 line = line[:m.end(1)]
965 954 # fixup properly escaped comments that survived the above
966 955 line = line.replace("\\#", "#")
967 956 line = line.rstrip()
968 957 if not line:
969 958 continue
970 959
971 960 if line.startswith('syntax:'):
972 961 s = line[7:].strip()
973 962 try:
974 963 syntax = syntaxes[s]
975 964 except KeyError:
976 965 if warn:
977 966 warn(_("%s: ignoring invalid syntax '%s'\n") %
978 967 (filepath, s))
979 968 continue
980 969
981 970 linesyntax = syntax
982 971 for s, rels in syntaxes.iteritems():
983 972 if line.startswith(rels):
984 973 linesyntax = rels
985 974 line = line[len(rels):]
986 975 break
987 976 elif line.startswith(s+':'):
988 977 linesyntax = rels
989 978 line = line[len(s) + 1:]
990 979 break
991 980 if sourceinfo:
992 981 patterns.append((linesyntax + line, lineno, line))
993 982 else:
994 983 patterns.append(linesyntax + line)
995 984 fp.close()
996 985 return patterns
General Comments 0
You need to be logged in to leave comments. Login now