##// END OF EJS Templates
match: define exactmatcher.matchfn statically...
Yuya Nishihara -
r32543:cf7c8898 default
parent child Browse files
Show More
@@ -1,970 +1,971
1 1 # match.py - filename matching
2 2 #
3 3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import copy
11 11 import os
12 12 import re
13 13
14 14 from .i18n import _
15 15 from . import (
16 16 error,
17 17 pathutil,
18 18 util,
19 19 )
20 20
21 21 propertycache = util.propertycache
22 22
23 23 def _rematcher(regex):
24 24 '''compile the regexp with the best available regexp engine and return a
25 25 matcher function'''
26 26 m = util.re.compile(regex)
27 27 try:
28 28 # slightly faster, provided by facebook's re2 bindings
29 29 return m.test_match
30 30 except AttributeError:
31 31 return m.match
32 32
33 33 def _expandsets(kindpats, ctx, listsubrepos):
34 34 '''Returns the kindpats list with the 'set' patterns expanded.'''
35 35 fset = set()
36 36 other = []
37 37
38 38 for kind, pat, source in kindpats:
39 39 if kind == 'set':
40 40 if not ctx:
41 41 raise error.ProgrammingError("fileset expression with no "
42 42 "context")
43 43 s = ctx.getfileset(pat)
44 44 fset.update(s)
45 45
46 46 if listsubrepos:
47 47 for subpath in ctx.substate:
48 48 s = ctx.sub(subpath).getfileset(pat)
49 49 fset.update(subpath + '/' + f for f in s)
50 50
51 51 continue
52 52 other.append((kind, pat, source))
53 53 return fset, other
54 54
55 55 def _expandsubinclude(kindpats, root):
56 56 '''Returns the list of subinclude matcher args and the kindpats without the
57 57 subincludes in it.'''
58 58 relmatchers = []
59 59 other = []
60 60
61 61 for kind, pat, source in kindpats:
62 62 if kind == 'subinclude':
63 63 sourceroot = pathutil.dirname(util.normpath(source))
64 64 pat = util.pconvert(pat)
65 65 path = pathutil.join(sourceroot, pat)
66 66
67 67 newroot = pathutil.dirname(path)
68 68 matcherargs = (newroot, '', [], ['include:%s' % path])
69 69
70 70 prefix = pathutil.canonpath(root, root, newroot)
71 71 if prefix:
72 72 prefix += '/'
73 73 relmatchers.append((prefix, matcherargs))
74 74 else:
75 75 other.append((kind, pat, source))
76 76
77 77 return relmatchers, other
78 78
79 79 def _kindpatsalwaysmatch(kindpats):
80 80 """"Checks whether the kindspats match everything, as e.g.
81 81 'relpath:.' does.
82 82 """
83 83 for kind, pat, source in kindpats:
84 84 if pat != '' or kind not in ['relpath', 'glob']:
85 85 return False
86 86 return True
87 87
88 88 def match(root, cwd, patterns, include=None, exclude=None, default='glob',
89 89 exact=False, auditor=None, ctx=None, listsubrepos=False, warn=None,
90 90 badfn=None, icasefs=False):
91 91 """build an object to match a set of file patterns
92 92
93 93 arguments:
94 94 root - the canonical root of the tree you're matching against
95 95 cwd - the current working directory, if relevant
96 96 patterns - patterns to find
97 97 include - patterns to include (unless they are excluded)
98 98 exclude - patterns to exclude (even if they are included)
99 99 default - if a pattern in patterns has no explicit type, assume this one
100 100 exact - patterns are actually filenames (include/exclude still apply)
101 101 warn - optional function used for printing warnings
102 102 badfn - optional bad() callback for this matcher instead of the default
103 103 icasefs - make a matcher for wdir on case insensitive filesystems, which
104 104 normalizes the given patterns to the case in the filesystem
105 105
106 106 a pattern is one of:
107 107 'glob:<glob>' - a glob relative to cwd
108 108 're:<regexp>' - a regular expression
109 109 'path:<path>' - a path relative to repository root, which is matched
110 110 recursively
111 111 'rootfilesin:<path>' - a path relative to repository root, which is
112 112 matched non-recursively (will not match subdirectories)
113 113 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
114 114 'relpath:<path>' - a path relative to cwd
115 115 'relre:<regexp>' - a regexp that needn't match the start of a name
116 116 'set:<fileset>' - a fileset expression
117 117 'include:<path>' - a file of patterns to read and include
118 118 'subinclude:<path>' - a file of patterns to match against files under
119 119 the same directory
120 120 '<something>' - a pattern of the specified default type
121 121 """
122 122 normalize = _donormalize
123 123 if icasefs:
124 124 if exact:
125 125 raise error.ProgrammingError("a case-insensitive exact matcher "
126 126 "doesn't make sense")
127 127 dirstate = ctx.repo().dirstate
128 128 dsnormalize = dirstate.normalize
129 129
130 130 def normalize(patterns, default, root, cwd, auditor, warn):
131 131 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
132 132 kindpats = []
133 133 for kind, pats, source in kp:
134 134 if kind not in ('re', 'relre'): # regex can't be normalized
135 135 p = pats
136 136 pats = dsnormalize(pats)
137 137
138 138 # Preserve the original to handle a case only rename.
139 139 if p != pats and p in dirstate:
140 140 kindpats.append((kind, p, source))
141 141
142 142 kindpats.append((kind, pats, source))
143 143 return kindpats
144 144
145 145 if exact:
146 146 m = exactmatcher(root, cwd, patterns, badfn)
147 147 else:
148 148 m = patternmatcher(root, cwd, normalize, patterns, default=default,
149 149 auditor=auditor, ctx=ctx, listsubrepos=listsubrepos,
150 150 warn=warn, badfn=badfn)
151 151 if include:
152 152 im = includematcher(root, cwd, normalize, include, auditor=auditor,
153 153 ctx=ctx, listsubrepos=listsubrepos, warn=warn,
154 154 badfn=None)
155 155 m = intersectmatchers(m, im)
156 156 if exclude:
157 157 em = includematcher(root, cwd, normalize, exclude, auditor=auditor,
158 158 ctx=ctx, listsubrepos=listsubrepos, warn=warn,
159 159 badfn=None)
160 160 m = differencematcher(m, em)
161 161 return m
162 162
163 163 def exact(root, cwd, files, badfn=None):
164 164 return exactmatcher(root, cwd, files, badfn=badfn)
165 165
166 166 def always(root, cwd):
167 167 return match(root, cwd, [])
168 168
169 169 def badmatch(match, badfn):
170 170 """Make a copy of the given matcher, replacing its bad method with the given
171 171 one.
172 172 """
173 173 m = copy.copy(match)
174 174 m.bad = badfn
175 175 return m
176 176
177 177 def _donormalize(patterns, default, root, cwd, auditor, warn):
178 178 '''Convert 'kind:pat' from the patterns list to tuples with kind and
179 179 normalized and rooted patterns and with listfiles expanded.'''
180 180 kindpats = []
181 181 for kind, pat in [_patsplit(p, default) for p in patterns]:
182 182 if kind in ('glob', 'relpath'):
183 183 pat = pathutil.canonpath(root, cwd, pat, auditor)
184 184 elif kind in ('relglob', 'path', 'rootfilesin'):
185 185 pat = util.normpath(pat)
186 186 elif kind in ('listfile', 'listfile0'):
187 187 try:
188 188 files = util.readfile(pat)
189 189 if kind == 'listfile0':
190 190 files = files.split('\0')
191 191 else:
192 192 files = files.splitlines()
193 193 files = [f for f in files if f]
194 194 except EnvironmentError:
195 195 raise error.Abort(_("unable to read file list (%s)") % pat)
196 196 for k, p, source in _donormalize(files, default, root, cwd,
197 197 auditor, warn):
198 198 kindpats.append((k, p, pat))
199 199 continue
200 200 elif kind == 'include':
201 201 try:
202 202 fullpath = os.path.join(root, util.localpath(pat))
203 203 includepats = readpatternfile(fullpath, warn)
204 204 for k, p, source in _donormalize(includepats, default,
205 205 root, cwd, auditor, warn):
206 206 kindpats.append((k, p, source or pat))
207 207 except error.Abort as inst:
208 208 raise error.Abort('%s: %s' % (pat, inst[0]))
209 209 except IOError as inst:
210 210 if warn:
211 211 warn(_("skipping unreadable pattern file '%s': %s\n") %
212 212 (pat, inst.strerror))
213 213 continue
214 214 # else: re or relre - which cannot be normalized
215 215 kindpats.append((kind, pat, ''))
216 216 return kindpats
217 217
218 218 class basematcher(object):
219 219
220 220 def __init__(self, root, cwd, badfn=None, relativeuipath=True):
221 221 self._root = root
222 222 self._cwd = cwd
223 223 if badfn is not None:
224 224 self.bad = badfn
225 225 self._relativeuipath = relativeuipath
226 226
227 227 def __call__(self, fn):
228 228 return self.matchfn(fn)
229 229 def __iter__(self):
230 230 for f in self._files:
231 231 yield f
232 232 # Callbacks related to how the matcher is used by dirstate.walk.
233 233 # Subscribers to these events must monkeypatch the matcher object.
234 234 def bad(self, f, msg):
235 235 '''Callback from dirstate.walk for each explicit file that can't be
236 236 found/accessed, with an error message.'''
237 237 pass
238 238
239 239 # If an explicitdir is set, it will be called when an explicitly listed
240 240 # directory is visited.
241 241 explicitdir = None
242 242
243 243 # If an traversedir is set, it will be called when a directory discovered
244 244 # by recursive traversal is visited.
245 245 traversedir = None
246 246
247 247 def abs(self, f):
248 248 '''Convert a repo path back to path that is relative to the root of the
249 249 matcher.'''
250 250 return f
251 251
252 252 def rel(self, f):
253 253 '''Convert repo path back to path that is relative to cwd of matcher.'''
254 254 return util.pathto(self._root, self._cwd, f)
255 255
256 256 def uipath(self, f):
257 257 '''Convert repo path to a display path. If patterns or -I/-X were used
258 258 to create this matcher, the display path will be relative to cwd.
259 259 Otherwise it is relative to the root of the repo.'''
260 260 return (self._relativeuipath and self.rel(f)) or self.abs(f)
261 261
262 262 @propertycache
263 263 def _files(self):
264 264 return []
265 265
266 266 def files(self):
267 267 '''Explicitly listed files or patterns or roots:
268 268 if no patterns or .always(): empty list,
269 269 if exact: list exact files,
270 270 if not .anypats(): list all files and dirs,
271 271 else: optimal roots'''
272 272 return self._files
273 273
274 274 @propertycache
275 275 def _fileset(self):
276 276 return set(self._files)
277 277
278 278 def exact(self, f):
279 279 '''Returns True if f is in .files().'''
280 280 return f in self._fileset
281 281
282 282 def matchfn(self, f):
283 283 return False
284 284
285 285 def visitdir(self, dir):
286 286 '''Decides whether a directory should be visited based on whether it
287 287 has potential matches in it or one of its subdirectories. This is
288 288 based on the match's primary, included, and excluded patterns.
289 289
290 290 Returns the string 'all' if the given directory and all subdirectories
291 291 should be visited. Otherwise returns True or False indicating whether
292 292 the given directory should be visited.
293 293
294 294 This function's behavior is undefined if it has returned False for
295 295 one of the dir's parent directories.
296 296 '''
297 297 return False
298 298
299 299 def anypats(self):
300 300 '''Matcher uses patterns or include/exclude.'''
301 301 return False
302 302
303 303 def always(self):
304 304 '''Matcher will match everything and .files() will be empty
305 305 - optimization might be possible and necessary.'''
306 306 return False
307 307
308 308 def isexact(self):
309 309 return False
310 310
311 311 def prefix(self):
312 312 return not self.always() and not self.isexact() and not self.anypats()
313 313
314 314 class patternmatcher(basematcher):
315 315
316 316 def __init__(self, root, cwd, normalize, patterns, default='glob',
317 317 auditor=None, ctx=None, listsubrepos=False, warn=None,
318 318 badfn=None):
319 319 super(patternmatcher, self).__init__(root, cwd, badfn,
320 320 relativeuipath=bool(patterns))
321 321
322 322 self._anypats = False
323 323 self._always = False
324 324 self.patternspat = None
325 325
326 326 matchfns = []
327 327 if patterns:
328 328 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
329 329 if not _kindpatsalwaysmatch(kindpats):
330 330 self._files = _explicitfiles(kindpats)
331 331 self._anypats = self._anypats or _anypats(kindpats)
332 332 self.patternspat, pm = _buildmatch(ctx, kindpats, '$',
333 333 listsubrepos, root)
334 334 matchfns.append(pm)
335 335
336 336 if not matchfns:
337 337 m = util.always
338 338 self._always = True
339 339 elif len(matchfns) == 1:
340 340 m = matchfns[0]
341 341 else:
342 342 def m(f):
343 343 for matchfn in matchfns:
344 344 if not matchfn(f):
345 345 return False
346 346 return True
347 347
348 348 self.matchfn = m
349 349
350 350 @propertycache
351 351 def _dirs(self):
352 352 return set(util.dirs(self._fileset)) | {'.'}
353 353
354 354 def visitdir(self, dir):
355 355 if self.prefix() and dir in self._fileset:
356 356 return 'all'
357 357 return (not self._fileset or
358 358 '.' in self._fileset or
359 359 dir in self._fileset or
360 360 dir in self._dirs or
361 361 any(parentdir in self._fileset
362 362 for parentdir in util.finddirs(dir)))
363 363
364 364 def anypats(self):
365 365 return self._anypats
366 366
367 367 def always(self):
368 368 return self._always
369 369
370 370 def __repr__(self):
371 371 return ('<patternmatcher patterns=%r>' % self.patternspat)
372 372
373 373 class includematcher(basematcher):
374 374
375 375 def __init__(self, root, cwd, normalize, include, auditor=None, ctx=None,
376 376 listsubrepos=False, warn=None, badfn=None):
377 377 super(includematcher, self).__init__(root, cwd, badfn)
378 378
379 379 kindpats = normalize(include, 'glob', root, cwd, auditor, warn)
380 380 self.includepat, im = _buildmatch(ctx, kindpats, '(?:/|$)',
381 381 listsubrepos, root)
382 382 self._anypats = _anypats(kindpats)
383 383 roots, dirs = _rootsanddirs(kindpats)
384 384 # roots are directories which are recursively included.
385 385 self._roots = set(roots)
386 386 # dirs are directories which are non-recursively included.
387 387 self._dirs = set(dirs)
388 388 self.matchfn = im
389 389
390 390 def visitdir(self, dir):
391 391 if not self._anypats and dir in self._roots:
392 392 # The condition above is essentially self.prefix() for includes
393 393 return 'all'
394 394 return ('.' in self._roots or
395 395 dir in self._roots or
396 396 dir in self._dirs or
397 397 any(parentdir in self._roots
398 398 for parentdir in util.finddirs(dir)))
399 399
400 400 def anypats(self):
401 401 return True
402 402
403 403 def __repr__(self):
404 404 return ('<includematcher includes=%r>' % self.includepat)
405 405
406 406 class exactmatcher(basematcher):
407 407 '''Matches the input files exactly. They are interpreted as paths, not
408 408 patterns (so no kind-prefixes).
409 409 '''
410 410
411 411 def __init__(self, root, cwd, files, badfn=None):
412 412 super(exactmatcher, self).__init__(root, cwd, badfn)
413 413
414 414 if isinstance(files, list):
415 415 self._files = files
416 416 else:
417 417 self._files = list(files)
418 self.matchfn = self.exact
418
419 matchfn = basematcher.exact
419 420
420 421 @propertycache
421 422 def _dirs(self):
422 423 return set(util.dirs(self._fileset)) | {'.'}
423 424
424 425 def visitdir(self, dir):
425 426 return dir in self._dirs
426 427
427 428 def isexact(self):
428 429 return True
429 430
430 431 def __repr__(self):
431 432 return ('<exactmatcher files=%r>' % self._files)
432 433
433 434 class differencematcher(basematcher):
434 435 '''Composes two matchers by matching if the first matches and the second
435 436 does not. Well, almost... If the user provides a pattern like "-X foo foo",
436 437 Mercurial actually does match "foo" against that. That's because exact
437 438 matches are treated specially. So, since this differencematcher is used for
438 439 excludes, it needs to special-case exact matching.
439 440
440 441 The second matcher's non-matching-attributes (root, cwd, bad, explicitdir,
441 442 traversedir) are ignored.
442 443
443 444 TODO: If we want to keep the behavior described above for exact matches, we
444 445 should consider instead treating the above case something like this:
445 446 union(exact(foo), difference(pattern(foo), include(foo)))
446 447 '''
447 448 def __init__(self, m1, m2):
448 449 super(differencematcher, self).__init__(m1._root, m1._cwd)
449 450 self._m1 = m1
450 451 self._m2 = m2
451 452 self.bad = m1.bad
452 453 self.explicitdir = m1.explicitdir
453 454 self.traversedir = m1.traversedir
454 455
455 456 def matchfn(self, f):
456 457 return self._m1(f) and (not self._m2(f) or self._m1.exact(f))
457 458
458 459 @propertycache
459 460 def _files(self):
460 461 if self.isexact():
461 462 return [f for f in self._m1.files() if self(f)]
462 463 # If m1 is not an exact matcher, we can't easily figure out the set of
463 464 # files, because its files() are not always files. For example, if
464 465 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
465 466 # want to remove "dir" from the set even though it would match m2,
466 467 # because the "dir" in m1 may not be a file.
467 468 return self._m1.files()
468 469
469 470 def visitdir(self, dir):
470 471 if self._m2.visitdir(dir) == 'all':
471 472 # There's a bug here: If m1 matches file 'dir/file' and m2 excludes
472 473 # 'dir' (recursively), we should still visit 'dir' due to the
473 474 # exception we have for exact matches.
474 475 return False
475 476 return bool(self._m1.visitdir(dir))
476 477
477 478 def isexact(self):
478 479 return self._m1.isexact()
479 480
480 481 def anypats(self):
481 482 return self._m1.anypats() or self._m2.anypats()
482 483
483 484 def __repr__(self):
484 485 return ('<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2))
485 486
486 487 def intersectmatchers(m1, m2):
487 488 '''Composes two matchers by matching if both of them match.
488 489
489 490 The second matcher's non-matching-attributes (root, cwd, bad, explicitdir,
490 491 traversedir) are ignored.
491 492 '''
492 493 if m1 is None or m2 is None:
493 494 return m1 or m2
494 495 if m1.always():
495 496 m = copy.copy(m2)
496 497 # TODO: Consider encapsulating these things in a class so there's only
497 498 # one thing to copy from m1.
498 499 m.bad = m1.bad
499 500 m.explicitdir = m1.explicitdir
500 501 m.traversedir = m1.traversedir
501 502 m.abs = m1.abs
502 503 m.rel = m1.rel
503 504 m._relativeuipath |= m1._relativeuipath
504 505 return m
505 506 if m2.always():
506 507 m = copy.copy(m1)
507 508 m._relativeuipath |= m2._relativeuipath
508 509 return m
509 510 return intersectionmatcher(m1, m2)
510 511
511 512 class intersectionmatcher(basematcher):
512 513 def __init__(self, m1, m2):
513 514 super(intersectionmatcher, self).__init__(m1._root, m1._cwd)
514 515 self._m1 = m1
515 516 self._m2 = m2
516 517 self.bad = m1.bad
517 518 self.explicitdir = m1.explicitdir
518 519 self.traversedir = m1.traversedir
519 520
520 521 @propertycache
521 522 def _files(self):
522 523 if self.isexact():
523 524 m1, m2 = self._m1, self._m2
524 525 if not m1.isexact():
525 526 m1, m2 = m2, m1
526 527 return [f for f in m1.files() if m2(f)]
527 528 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
528 529 # the set of files, because their files() are not always files. For
529 530 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
530 531 # "path:dir2", we don't want to remove "dir2" from the set.
531 532 return self._m1.files() + self._m2.files()
532 533
533 534 def matchfn(self, f):
534 535 return self._m1(f) and self._m2(f)
535 536
536 537 def visitdir(self, dir):
537 538 visit1 = self._m1.visitdir(dir)
538 539 if visit1 == 'all':
539 540 return self._m2.visitdir(dir)
540 541 # bool() because visit1=True + visit2='all' should not be 'all'
541 542 return bool(visit1 and self._m2.visitdir(dir))
542 543
543 544 def always(self):
544 545 return self._m1.always() and self._m2.always()
545 546
546 547 def isexact(self):
547 548 return self._m1.isexact() or self._m2.isexact()
548 549
549 550 def anypats(self):
550 551 return self._m1.anypats() or self._m2.anypats()
551 552
552 553 def __repr__(self):
553 554 return ('<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2))
554 555
555 556 class subdirmatcher(basematcher):
556 557 """Adapt a matcher to work on a subdirectory only.
557 558
558 559 The paths are remapped to remove/insert the path as needed:
559 560
560 561 >>> m1 = match('root', '', ['a.txt', 'sub/b.txt'])
561 562 >>> m2 = subdirmatcher('sub', m1)
562 563 >>> bool(m2('a.txt'))
563 564 False
564 565 >>> bool(m2('b.txt'))
565 566 True
566 567 >>> bool(m2.matchfn('a.txt'))
567 568 False
568 569 >>> bool(m2.matchfn('b.txt'))
569 570 True
570 571 >>> m2.files()
571 572 ['b.txt']
572 573 >>> m2.exact('b.txt')
573 574 True
574 575 >>> util.pconvert(m2.rel('b.txt'))
575 576 'sub/b.txt'
576 577 >>> def bad(f, msg):
577 578 ... print "%s: %s" % (f, msg)
578 579 >>> m1.bad = bad
579 580 >>> m2.bad('x.txt', 'No such file')
580 581 sub/x.txt: No such file
581 582 >>> m2.abs('c.txt')
582 583 'sub/c.txt'
583 584 """
584 585
585 586 def __init__(self, path, matcher):
586 587 super(subdirmatcher, self).__init__(matcher._root, matcher._cwd)
587 588 self._path = path
588 589 self._matcher = matcher
589 590 self._always = matcher.always()
590 591
591 592 self._files = [f[len(path) + 1:] for f in matcher._files
592 593 if f.startswith(path + "/")]
593 594
594 595 # If the parent repo had a path to this subrepo and the matcher is
595 596 # a prefix matcher, this submatcher always matches.
596 597 if matcher.prefix():
597 598 self._always = any(f == path for f in matcher._files)
598 599
599 600 def bad(self, f, msg):
600 601 self._matcher.bad(self._path + "/" + f, msg)
601 602
602 603 def abs(self, f):
603 604 return self._matcher.abs(self._path + "/" + f)
604 605
605 606 def rel(self, f):
606 607 return self._matcher.rel(self._path + "/" + f)
607 608
608 609 def uipath(self, f):
609 610 return self._matcher.uipath(self._path + "/" + f)
610 611
611 612 def matchfn(self, f):
612 613 # Some information is lost in the superclass's constructor, so we
613 614 # can not accurately create the matching function for the subdirectory
614 615 # from the inputs. Instead, we override matchfn() and visitdir() to
615 616 # call the original matcher with the subdirectory path prepended.
616 617 return self._matcher.matchfn(self._path + "/" + f)
617 618
618 619 def visitdir(self, dir):
619 620 if dir == '.':
620 621 dir = self._path
621 622 else:
622 623 dir = self._path + "/" + dir
623 624 return self._matcher.visitdir(dir)
624 625
625 626 def always(self):
626 627 return self._always
627 628
628 629 def anypats(self):
629 630 return self._matcher.anypats()
630 631
631 632 def patkind(pattern, default=None):
632 633 '''If pattern is 'kind:pat' with a known kind, return kind.'''
633 634 return _patsplit(pattern, default)[0]
634 635
635 636 def _patsplit(pattern, default):
636 637 """Split a string into the optional pattern kind prefix and the actual
637 638 pattern."""
638 639 if ':' in pattern:
639 640 kind, pat = pattern.split(':', 1)
640 641 if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
641 642 'listfile', 'listfile0', 'set', 'include', 'subinclude',
642 643 'rootfilesin'):
643 644 return kind, pat
644 645 return default, pattern
645 646
646 647 def _globre(pat):
647 648 r'''Convert an extended glob string to a regexp string.
648 649
649 650 >>> print _globre(r'?')
650 651 .
651 652 >>> print _globre(r'*')
652 653 [^/]*
653 654 >>> print _globre(r'**')
654 655 .*
655 656 >>> print _globre(r'**/a')
656 657 (?:.*/)?a
657 658 >>> print _globre(r'a/**/b')
658 659 a\/(?:.*/)?b
659 660 >>> print _globre(r'[a*?!^][^b][!c]')
660 661 [a*?!^][\^b][^c]
661 662 >>> print _globre(r'{a,b}')
662 663 (?:a|b)
663 664 >>> print _globre(r'.\*\?')
664 665 \.\*\?
665 666 '''
666 667 i, n = 0, len(pat)
667 668 res = ''
668 669 group = 0
669 670 escape = util.re.escape
670 671 def peek():
671 672 return i < n and pat[i:i + 1]
672 673 while i < n:
673 674 c = pat[i:i + 1]
674 675 i += 1
675 676 if c not in '*?[{},\\':
676 677 res += escape(c)
677 678 elif c == '*':
678 679 if peek() == '*':
679 680 i += 1
680 681 if peek() == '/':
681 682 i += 1
682 683 res += '(?:.*/)?'
683 684 else:
684 685 res += '.*'
685 686 else:
686 687 res += '[^/]*'
687 688 elif c == '?':
688 689 res += '.'
689 690 elif c == '[':
690 691 j = i
691 692 if j < n and pat[j:j + 1] in '!]':
692 693 j += 1
693 694 while j < n and pat[j:j + 1] != ']':
694 695 j += 1
695 696 if j >= n:
696 697 res += '\\['
697 698 else:
698 699 stuff = pat[i:j].replace('\\','\\\\')
699 700 i = j + 1
700 701 if stuff[0:1] == '!':
701 702 stuff = '^' + stuff[1:]
702 703 elif stuff[0:1] == '^':
703 704 stuff = '\\' + stuff
704 705 res = '%s[%s]' % (res, stuff)
705 706 elif c == '{':
706 707 group += 1
707 708 res += '(?:'
708 709 elif c == '}' and group:
709 710 res += ')'
710 711 group -= 1
711 712 elif c == ',' and group:
712 713 res += '|'
713 714 elif c == '\\':
714 715 p = peek()
715 716 if p:
716 717 i += 1
717 718 res += escape(p)
718 719 else:
719 720 res += escape(c)
720 721 else:
721 722 res += escape(c)
722 723 return res
723 724
724 725 def _regex(kind, pat, globsuffix):
725 726 '''Convert a (normalized) pattern of any kind into a regular expression.
726 727 globsuffix is appended to the regexp of globs.'''
727 728 if not pat:
728 729 return ''
729 730 if kind == 're':
730 731 return pat
731 732 if kind == 'path':
732 733 if pat == '.':
733 734 return ''
734 735 return '^' + util.re.escape(pat) + '(?:/|$)'
735 736 if kind == 'rootfilesin':
736 737 if pat == '.':
737 738 escaped = ''
738 739 else:
739 740 # Pattern is a directory name.
740 741 escaped = util.re.escape(pat) + '/'
741 742 # Anything after the pattern must be a non-directory.
742 743 return '^' + escaped + '[^/]+$'
743 744 if kind == 'relglob':
744 745 return '(?:|.*/)' + _globre(pat) + globsuffix
745 746 if kind == 'relpath':
746 747 return util.re.escape(pat) + '(?:/|$)'
747 748 if kind == 'relre':
748 749 if pat.startswith('^'):
749 750 return pat
750 751 return '.*' + pat
751 752 return _globre(pat) + globsuffix
752 753
753 754 def _buildmatch(ctx, kindpats, globsuffix, listsubrepos, root):
754 755 '''Return regexp string and a matcher function for kindpats.
755 756 globsuffix is appended to the regexp of globs.'''
756 757 matchfuncs = []
757 758
758 759 subincludes, kindpats = _expandsubinclude(kindpats, root)
759 760 if subincludes:
760 761 submatchers = {}
761 762 def matchsubinclude(f):
762 763 for prefix, matcherargs in subincludes:
763 764 if f.startswith(prefix):
764 765 mf = submatchers.get(prefix)
765 766 if mf is None:
766 767 mf = match(*matcherargs)
767 768 submatchers[prefix] = mf
768 769
769 770 if mf(f[len(prefix):]):
770 771 return True
771 772 return False
772 773 matchfuncs.append(matchsubinclude)
773 774
774 775 fset, kindpats = _expandsets(kindpats, ctx, listsubrepos)
775 776 if fset:
776 777 matchfuncs.append(fset.__contains__)
777 778
778 779 regex = ''
779 780 if kindpats:
780 781 regex, mf = _buildregexmatch(kindpats, globsuffix)
781 782 matchfuncs.append(mf)
782 783
783 784 if len(matchfuncs) == 1:
784 785 return regex, matchfuncs[0]
785 786 else:
786 787 return regex, lambda f: any(mf(f) for mf in matchfuncs)
787 788
788 789 def _buildregexmatch(kindpats, globsuffix):
789 790 """Build a match function from a list of kinds and kindpats,
790 791 return regexp string and a matcher function."""
791 792 try:
792 793 regex = '(?:%s)' % '|'.join([_regex(k, p, globsuffix)
793 794 for (k, p, s) in kindpats])
794 795 if len(regex) > 20000:
795 796 raise OverflowError
796 797 return regex, _rematcher(regex)
797 798 except OverflowError:
798 799 # We're using a Python with a tiny regex engine and we
799 800 # made it explode, so we'll divide the pattern list in two
800 801 # until it works
801 802 l = len(kindpats)
802 803 if l < 2:
803 804 raise
804 805 regexa, a = _buildregexmatch(kindpats[:l//2], globsuffix)
805 806 regexb, b = _buildregexmatch(kindpats[l//2:], globsuffix)
806 807 return regex, lambda s: a(s) or b(s)
807 808 except re.error:
808 809 for k, p, s in kindpats:
809 810 try:
810 811 _rematcher('(?:%s)' % _regex(k, p, globsuffix))
811 812 except re.error:
812 813 if s:
813 814 raise error.Abort(_("%s: invalid pattern (%s): %s") %
814 815 (s, k, p))
815 816 else:
816 817 raise error.Abort(_("invalid pattern (%s): %s") % (k, p))
817 818 raise error.Abort(_("invalid pattern"))
818 819
819 820 def _patternrootsanddirs(kindpats):
820 821 '''Returns roots and directories corresponding to each pattern.
821 822
822 823 This calculates the roots and directories exactly matching the patterns and
823 824 returns a tuple of (roots, dirs) for each. It does not return other
824 825 directories which may also need to be considered, like the parent
825 826 directories.
826 827 '''
827 828 r = []
828 829 d = []
829 830 for kind, pat, source in kindpats:
830 831 if kind == 'glob': # find the non-glob prefix
831 832 root = []
832 833 for p in pat.split('/'):
833 834 if '[' in p or '{' in p or '*' in p or '?' in p:
834 835 break
835 836 root.append(p)
836 837 r.append('/'.join(root) or '.')
837 838 elif kind in ('relpath', 'path'):
838 839 r.append(pat or '.')
839 840 elif kind in ('rootfilesin',):
840 841 d.append(pat or '.')
841 842 else: # relglob, re, relre
842 843 r.append('.')
843 844 return r, d
844 845
845 846 def _roots(kindpats):
846 847 '''Returns root directories to match recursively from the given patterns.'''
847 848 roots, dirs = _patternrootsanddirs(kindpats)
848 849 return roots
849 850
850 851 def _rootsanddirs(kindpats):
851 852 '''Returns roots and exact directories from patterns.
852 853
853 854 roots are directories to match recursively, whereas exact directories should
854 855 be matched non-recursively. The returned (roots, dirs) tuple will also
855 856 include directories that need to be implicitly considered as either, such as
856 857 parent directories.
857 858
858 859 >>> _rootsanddirs(\
859 860 [('glob', 'g/h/*', ''), ('glob', 'g/h', ''), ('glob', 'g*', '')])
860 861 (['g/h', 'g/h', '.'], ['g', '.'])
861 862 >>> _rootsanddirs(\
862 863 [('rootfilesin', 'g/h', ''), ('rootfilesin', '', '')])
863 864 ([], ['g/h', '.', 'g', '.'])
864 865 >>> _rootsanddirs(\
865 866 [('relpath', 'r', ''), ('path', 'p/p', ''), ('path', '', '')])
866 867 (['r', 'p/p', '.'], ['p', '.'])
867 868 >>> _rootsanddirs(\
868 869 [('relglob', 'rg*', ''), ('re', 're/', ''), ('relre', 'rr', '')])
869 870 (['.', '.', '.'], ['.'])
870 871 '''
871 872 r, d = _patternrootsanddirs(kindpats)
872 873
873 874 # Append the parents as non-recursive/exact directories, since they must be
874 875 # scanned to get to either the roots or the other exact directories.
875 876 d.extend(util.dirs(d))
876 877 d.extend(util.dirs(r))
877 878 # util.dirs() does not include the root directory, so add it manually
878 879 d.append('.')
879 880
880 881 return r, d
881 882
882 883 def _explicitfiles(kindpats):
883 884 '''Returns the potential explicit filenames from the patterns.
884 885
885 886 >>> _explicitfiles([('path', 'foo/bar', '')])
886 887 ['foo/bar']
887 888 >>> _explicitfiles([('rootfilesin', 'foo/bar', '')])
888 889 []
889 890 '''
890 891 # Keep only the pattern kinds where one can specify filenames (vs only
891 892 # directory names).
892 893 filable = [kp for kp in kindpats if kp[0] not in ('rootfilesin',)]
893 894 return _roots(filable)
894 895
895 896 def _anypats(kindpats):
896 897 for kind, pat, source in kindpats:
897 898 if kind in ('glob', 're', 'relglob', 'relre', 'set', 'rootfilesin'):
898 899 return True
899 900
900 901 _commentre = None
901 902
902 903 def readpatternfile(filepath, warn, sourceinfo=False):
903 904 '''parse a pattern file, returning a list of
904 905 patterns. These patterns should be given to compile()
905 906 to be validated and converted into a match function.
906 907
907 908 trailing white space is dropped.
908 909 the escape character is backslash.
909 910 comments start with #.
910 911 empty lines are skipped.
911 912
912 913 lines can be of the following formats:
913 914
914 915 syntax: regexp # defaults following lines to non-rooted regexps
915 916 syntax: glob # defaults following lines to non-rooted globs
916 917 re:pattern # non-rooted regular expression
917 918 glob:pattern # non-rooted glob
918 919 pattern # pattern of the current default type
919 920
920 921 if sourceinfo is set, returns a list of tuples:
921 922 (pattern, lineno, originalline). This is useful to debug ignore patterns.
922 923 '''
923 924
924 925 syntaxes = {'re': 'relre:', 'regexp': 'relre:', 'glob': 'relglob:',
925 926 'include': 'include', 'subinclude': 'subinclude'}
926 927 syntax = 'relre:'
927 928 patterns = []
928 929
929 930 fp = open(filepath, 'rb')
930 931 for lineno, line in enumerate(util.iterfile(fp), start=1):
931 932 if "#" in line:
932 933 global _commentre
933 934 if not _commentre:
934 935 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
935 936 # remove comments prefixed by an even number of escapes
936 937 m = _commentre.search(line)
937 938 if m:
938 939 line = line[:m.end(1)]
939 940 # fixup properly escaped comments that survived the above
940 941 line = line.replace("\\#", "#")
941 942 line = line.rstrip()
942 943 if not line:
943 944 continue
944 945
945 946 if line.startswith('syntax:'):
946 947 s = line[7:].strip()
947 948 try:
948 949 syntax = syntaxes[s]
949 950 except KeyError:
950 951 if warn:
951 952 warn(_("%s: ignoring invalid syntax '%s'\n") %
952 953 (filepath, s))
953 954 continue
954 955
955 956 linesyntax = syntax
956 957 for s, rels in syntaxes.iteritems():
957 958 if line.startswith(rels):
958 959 linesyntax = rels
959 960 line = line[len(rels):]
960 961 break
961 962 elif line.startswith(s+':'):
962 963 linesyntax = rels
963 964 line = line[len(s) + 1:]
964 965 break
965 966 if sourceinfo:
966 967 patterns.append((linesyntax + line, lineno, line))
967 968 else:
968 969 patterns.append(linesyntax + line)
969 970 fp.close()
970 971 return patterns
General Comments 0
You need to be logged in to leave comments. Login now