##// END OF EJS Templates
match: move normalize() call out of matcher constructors...
Martin von Zweigbergk -
r32556:5f08eca8 default
parent child Browse files
Show More
@@ -1,986 +1,983 b''
1 1 # match.py - filename matching
2 2 #
3 3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import copy
11 11 import os
12 12 import re
13 13
14 14 from .i18n import _
15 15 from . import (
16 16 error,
17 17 pathutil,
18 18 util,
19 19 )
20 20
21 21 propertycache = util.propertycache
22 22
23 23 def _rematcher(regex):
24 24 '''compile the regexp with the best available regexp engine and return a
25 25 matcher function'''
26 26 m = util.re.compile(regex)
27 27 try:
28 28 # slightly faster, provided by facebook's re2 bindings
29 29 return m.test_match
30 30 except AttributeError:
31 31 return m.match
32 32
33 33 def _expandsets(kindpats, ctx, listsubrepos):
34 34 '''Returns the kindpats list with the 'set' patterns expanded.'''
35 35 fset = set()
36 36 other = []
37 37
38 38 for kind, pat, source in kindpats:
39 39 if kind == 'set':
40 40 if not ctx:
41 41 raise error.ProgrammingError("fileset expression with no "
42 42 "context")
43 43 s = ctx.getfileset(pat)
44 44 fset.update(s)
45 45
46 46 if listsubrepos:
47 47 for subpath in ctx.substate:
48 48 s = ctx.sub(subpath).getfileset(pat)
49 49 fset.update(subpath + '/' + f for f in s)
50 50
51 51 continue
52 52 other.append((kind, pat, source))
53 53 return fset, other
54 54
55 55 def _expandsubinclude(kindpats, root):
56 56 '''Returns the list of subinclude matcher args and the kindpats without the
57 57 subincludes in it.'''
58 58 relmatchers = []
59 59 other = []
60 60
61 61 for kind, pat, source in kindpats:
62 62 if kind == 'subinclude':
63 63 sourceroot = pathutil.dirname(util.normpath(source))
64 64 pat = util.pconvert(pat)
65 65 path = pathutil.join(sourceroot, pat)
66 66
67 67 newroot = pathutil.dirname(path)
68 68 matcherargs = (newroot, '', [], ['include:%s' % path])
69 69
70 70 prefix = pathutil.canonpath(root, root, newroot)
71 71 if prefix:
72 72 prefix += '/'
73 73 relmatchers.append((prefix, matcherargs))
74 74 else:
75 75 other.append((kind, pat, source))
76 76
77 77 return relmatchers, other
78 78
79 79 def _kindpatsalwaysmatch(kindpats):
80 80 """"Checks whether the kindspats match everything, as e.g.
81 81 'relpath:.' does.
82 82 """
83 83 for kind, pat, source in kindpats:
84 84 if pat != '' or kind not in ['relpath', 'glob']:
85 85 return False
86 86 return True
87 87
88 88 def match(root, cwd, patterns, include=None, exclude=None, default='glob',
89 89 exact=False, auditor=None, ctx=None, listsubrepos=False, warn=None,
90 90 badfn=None, icasefs=False):
91 91 """build an object to match a set of file patterns
92 92
93 93 arguments:
94 94 root - the canonical root of the tree you're matching against
95 95 cwd - the current working directory, if relevant
96 96 patterns - patterns to find
97 97 include - patterns to include (unless they are excluded)
98 98 exclude - patterns to exclude (even if they are included)
99 99 default - if a pattern in patterns has no explicit type, assume this one
100 100 exact - patterns are actually filenames (include/exclude still apply)
101 101 warn - optional function used for printing warnings
102 102 badfn - optional bad() callback for this matcher instead of the default
103 103 icasefs - make a matcher for wdir on case insensitive filesystems, which
104 104 normalizes the given patterns to the case in the filesystem
105 105
106 106 a pattern is one of:
107 107 'glob:<glob>' - a glob relative to cwd
108 108 're:<regexp>' - a regular expression
109 109 'path:<path>' - a path relative to repository root, which is matched
110 110 recursively
111 111 'rootfilesin:<path>' - a path relative to repository root, which is
112 112 matched non-recursively (will not match subdirectories)
113 113 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
114 114 'relpath:<path>' - a path relative to cwd
115 115 'relre:<regexp>' - a regexp that needn't match the start of a name
116 116 'set:<fileset>' - a fileset expression
117 117 'include:<path>' - a file of patterns to read and include
118 118 'subinclude:<path>' - a file of patterns to match against files under
119 119 the same directory
120 120 '<something>' - a pattern of the specified default type
121 121 """
122 122 normalize = _donormalize
123 123 if icasefs:
124 124 if exact:
125 125 raise error.ProgrammingError("a case-insensitive exact matcher "
126 126 "doesn't make sense")
127 127 dirstate = ctx.repo().dirstate
128 128 dsnormalize = dirstate.normalize
129 129
130 130 def normalize(patterns, default, root, cwd, auditor, warn):
131 131 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
132 132 kindpats = []
133 133 for kind, pats, source in kp:
134 134 if kind not in ('re', 'relre'): # regex can't be normalized
135 135 p = pats
136 136 pats = dsnormalize(pats)
137 137
138 138 # Preserve the original to handle a case only rename.
139 139 if p != pats and p in dirstate:
140 140 kindpats.append((kind, p, source))
141 141
142 142 kindpats.append((kind, pats, source))
143 143 return kindpats
144 144
145 145 if exact:
146 146 m = exactmatcher(root, cwd, patterns, badfn)
147 147 elif patterns:
148 m = patternmatcher(root, cwd, normalize, patterns, default=default,
149 auditor=auditor, ctx=ctx, listsubrepos=listsubrepos,
150 warn=warn, badfn=badfn)
148 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
149 m = patternmatcher(root, cwd, kindpats, ctx=ctx,
150 listsubrepos=listsubrepos, badfn=badfn)
151 151 else:
152 152 # It's a little strange that no patterns means to match everything.
153 153 # Consider changing this to match nothing (probably adding a
154 154 # "nevermatcher").
155 155 m = alwaysmatcher(root, cwd, badfn)
156 156
157 157 if include:
158 im = includematcher(root, cwd, normalize, include, auditor=auditor,
159 ctx=ctx, listsubrepos=listsubrepos, warn=warn,
160 badfn=None)
158 kindpats = normalize(include, 'glob', root, cwd, auditor, warn)
159 im = includematcher(root, cwd, kindpats, ctx=ctx,
160 listsubrepos=listsubrepos, badfn=None)
161 161 m = intersectmatchers(m, im)
162 162 if exclude:
163 em = includematcher(root, cwd, normalize, exclude, auditor=auditor,
164 ctx=ctx, listsubrepos=listsubrepos, warn=warn,
165 badfn=None)
163 kindpats = normalize(exclude, 'glob', root, cwd, auditor, warn)
164 em = includematcher(root, cwd, kindpats, ctx=ctx,
165 listsubrepos=listsubrepos, badfn=None)
166 166 m = differencematcher(m, em)
167 167 return m
168 168
169 169 def exact(root, cwd, files, badfn=None):
170 170 return exactmatcher(root, cwd, files, badfn=badfn)
171 171
172 172 def always(root, cwd):
173 173 return alwaysmatcher(root, cwd)
174 174
175 175 def badmatch(match, badfn):
176 176 """Make a copy of the given matcher, replacing its bad method with the given
177 177 one.
178 178 """
179 179 m = copy.copy(match)
180 180 m.bad = badfn
181 181 return m
182 182
183 183 def _donormalize(patterns, default, root, cwd, auditor, warn):
184 184 '''Convert 'kind:pat' from the patterns list to tuples with kind and
185 185 normalized and rooted patterns and with listfiles expanded.'''
186 186 kindpats = []
187 187 for kind, pat in [_patsplit(p, default) for p in patterns]:
188 188 if kind in ('glob', 'relpath'):
189 189 pat = pathutil.canonpath(root, cwd, pat, auditor)
190 190 elif kind in ('relglob', 'path', 'rootfilesin'):
191 191 pat = util.normpath(pat)
192 192 elif kind in ('listfile', 'listfile0'):
193 193 try:
194 194 files = util.readfile(pat)
195 195 if kind == 'listfile0':
196 196 files = files.split('\0')
197 197 else:
198 198 files = files.splitlines()
199 199 files = [f for f in files if f]
200 200 except EnvironmentError:
201 201 raise error.Abort(_("unable to read file list (%s)") % pat)
202 202 for k, p, source in _donormalize(files, default, root, cwd,
203 203 auditor, warn):
204 204 kindpats.append((k, p, pat))
205 205 continue
206 206 elif kind == 'include':
207 207 try:
208 208 fullpath = os.path.join(root, util.localpath(pat))
209 209 includepats = readpatternfile(fullpath, warn)
210 210 for k, p, source in _donormalize(includepats, default,
211 211 root, cwd, auditor, warn):
212 212 kindpats.append((k, p, source or pat))
213 213 except error.Abort as inst:
214 214 raise error.Abort('%s: %s' % (pat, inst[0]))
215 215 except IOError as inst:
216 216 if warn:
217 217 warn(_("skipping unreadable pattern file '%s': %s\n") %
218 218 (pat, inst.strerror))
219 219 continue
220 220 # else: re or relre - which cannot be normalized
221 221 kindpats.append((kind, pat, ''))
222 222 return kindpats
223 223
224 224 class basematcher(object):
225 225
226 226 def __init__(self, root, cwd, badfn=None, relativeuipath=True):
227 227 self._root = root
228 228 self._cwd = cwd
229 229 if badfn is not None:
230 230 self.bad = badfn
231 231 self._relativeuipath = relativeuipath
232 232
233 233 def __call__(self, fn):
234 234 return self.matchfn(fn)
235 235 def __iter__(self):
236 236 for f in self._files:
237 237 yield f
238 238 # Callbacks related to how the matcher is used by dirstate.walk.
239 239 # Subscribers to these events must monkeypatch the matcher object.
240 240 def bad(self, f, msg):
241 241 '''Callback from dirstate.walk for each explicit file that can't be
242 242 found/accessed, with an error message.'''
243 243 pass
244 244
245 245 # If an explicitdir is set, it will be called when an explicitly listed
246 246 # directory is visited.
247 247 explicitdir = None
248 248
249 249 # If an traversedir is set, it will be called when a directory discovered
250 250 # by recursive traversal is visited.
251 251 traversedir = None
252 252
253 253 def abs(self, f):
254 254 '''Convert a repo path back to path that is relative to the root of the
255 255 matcher.'''
256 256 return f
257 257
258 258 def rel(self, f):
259 259 '''Convert repo path back to path that is relative to cwd of matcher.'''
260 260 return util.pathto(self._root, self._cwd, f)
261 261
262 262 def uipath(self, f):
263 263 '''Convert repo path to a display path. If patterns or -I/-X were used
264 264 to create this matcher, the display path will be relative to cwd.
265 265 Otherwise it is relative to the root of the repo.'''
266 266 return (self._relativeuipath and self.rel(f)) or self.abs(f)
267 267
268 268 @propertycache
269 269 def _files(self):
270 270 return []
271 271
272 272 def files(self):
273 273 '''Explicitly listed files or patterns or roots:
274 274 if no patterns or .always(): empty list,
275 275 if exact: list exact files,
276 276 if not .anypats(): list all files and dirs,
277 277 else: optimal roots'''
278 278 return self._files
279 279
280 280 @propertycache
281 281 def _fileset(self):
282 282 return set(self._files)
283 283
284 284 def exact(self, f):
285 285 '''Returns True if f is in .files().'''
286 286 return f in self._fileset
287 287
288 288 def matchfn(self, f):
289 289 return False
290 290
291 291 def visitdir(self, dir):
292 292 '''Decides whether a directory should be visited based on whether it
293 293 has potential matches in it or one of its subdirectories. This is
294 294 based on the match's primary, included, and excluded patterns.
295 295
296 296 Returns the string 'all' if the given directory and all subdirectories
297 297 should be visited. Otherwise returns True or False indicating whether
298 298 the given directory should be visited.
299 299
300 300 This function's behavior is undefined if it has returned False for
301 301 one of the dir's parent directories.
302 302 '''
303 303 return False
304 304
305 305 def anypats(self):
306 306 '''Matcher uses patterns or include/exclude.'''
307 307 return False
308 308
309 309 def always(self):
310 310 '''Matcher will match everything and .files() will be empty
311 311 - optimization might be possible and necessary.'''
312 312 return False
313 313
314 314 def isexact(self):
315 315 return False
316 316
317 317 def prefix(self):
318 318 return not self.always() and not self.isexact() and not self.anypats()
319 319
320 320 class alwaysmatcher(basematcher):
321 321 '''Matches everything.'''
322 322
323 323 def __init__(self, root, cwd, badfn=None):
324 324 super(alwaysmatcher, self).__init__(root, cwd, badfn,
325 325 relativeuipath=False)
326 326
327 327 def always(self):
328 328 return True
329 329
330 330 def matchfn(self, f):
331 331 return True
332 332
333 333 def visitdir(self, dir):
334 334 return 'all'
335 335
336 336 def __repr__(self):
337 337 return '<alwaysmatcher>'
338 338
339 339 class patternmatcher(basematcher):
340 340
341 def __init__(self, root, cwd, normalize, patterns, default='glob',
342 auditor=None, ctx=None, listsubrepos=False, warn=None,
341 def __init__(self, root, cwd, kindpats, ctx=None, listsubrepos=False,
343 342 badfn=None):
344 343 super(patternmatcher, self).__init__(root, cwd, badfn)
345 344
346 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
347 345 if not _kindpatsalwaysmatch(kindpats):
348 346 self._files = _explicitfiles(kindpats)
349 347 self._anypats = _anypats(kindpats)
350 348 self.patternspat, pm = _buildmatch(ctx, kindpats, '$',
351 349 listsubrepos, root)
352 350 self._always = False
353 351 self.matchfn = pm
354 352 else:
355 353 self._anypats = False
356 354 self.patternspat = None
357 355 self._always = True
358 356 self.matchfn = lambda f: True
359 357
360 358 @propertycache
361 359 def _dirs(self):
362 360 return set(util.dirs(self._fileset)) | {'.'}
363 361
364 362 def visitdir(self, dir):
365 363 if self.always():
366 364 return 'all'
367 365 if self.prefix() and dir in self._fileset:
368 366 return 'all'
369 367 return ('.' in self._fileset or
370 368 dir in self._fileset or
371 369 dir in self._dirs or
372 370 any(parentdir in self._fileset
373 371 for parentdir in util.finddirs(dir)))
374 372
375 373 def anypats(self):
376 374 return self._anypats
377 375
378 376 def always(self):
379 377 return self._always
380 378
381 379 def __repr__(self):
382 380 return ('<patternmatcher patterns=%r>' % self.patternspat)
383 381
384 382 class includematcher(basematcher):
385 383
386 def __init__(self, root, cwd, normalize, include, auditor=None, ctx=None,
387 listsubrepos=False, warn=None, badfn=None):
384 def __init__(self, root, cwd, kindpats, ctx=None, listsubrepos=False,
385 badfn=None):
388 386 super(includematcher, self).__init__(root, cwd, badfn)
389 387
390 kindpats = normalize(include, 'glob', root, cwd, auditor, warn)
391 388 self.includepat, im = _buildmatch(ctx, kindpats, '(?:/|$)',
392 389 listsubrepos, root)
393 390 self._anypats = _anypats(kindpats)
394 391 roots, dirs = _rootsanddirs(kindpats)
395 392 # roots are directories which are recursively included.
396 393 self._roots = set(roots)
397 394 # dirs are directories which are non-recursively included.
398 395 self._dirs = set(dirs)
399 396 self.matchfn = im
400 397
401 398 def visitdir(self, dir):
402 399 if not self._anypats and dir in self._roots:
403 400 # The condition above is essentially self.prefix() for includes
404 401 return 'all'
405 402 return ('.' in self._roots or
406 403 dir in self._roots or
407 404 dir in self._dirs or
408 405 any(parentdir in self._roots
409 406 for parentdir in util.finddirs(dir)))
410 407
411 408 def anypats(self):
412 409 return True
413 410
414 411 def __repr__(self):
415 412 return ('<includematcher includes=%r>' % self.includepat)
416 413
417 414 class exactmatcher(basematcher):
418 415 '''Matches the input files exactly. They are interpreted as paths, not
419 416 patterns (so no kind-prefixes).
420 417 '''
421 418
422 419 def __init__(self, root, cwd, files, badfn=None):
423 420 super(exactmatcher, self).__init__(root, cwd, badfn)
424 421
425 422 if isinstance(files, list):
426 423 self._files = files
427 424 else:
428 425 self._files = list(files)
429 426
430 427 matchfn = basematcher.exact
431 428
432 429 @propertycache
433 430 def _dirs(self):
434 431 return set(util.dirs(self._fileset)) | {'.'}
435 432
436 433 def visitdir(self, dir):
437 434 return dir in self._dirs
438 435
439 436 def isexact(self):
440 437 return True
441 438
442 439 def __repr__(self):
443 440 return ('<exactmatcher files=%r>' % self._files)
444 441
445 442 class differencematcher(basematcher):
446 443 '''Composes two matchers by matching if the first matches and the second
447 444 does not. Well, almost... If the user provides a pattern like "-X foo foo",
448 445 Mercurial actually does match "foo" against that. That's because exact
449 446 matches are treated specially. So, since this differencematcher is used for
450 447 excludes, it needs to special-case exact matching.
451 448
452 449 The second matcher's non-matching-attributes (root, cwd, bad, explicitdir,
453 450 traversedir) are ignored.
454 451
455 452 TODO: If we want to keep the behavior described above for exact matches, we
456 453 should consider instead treating the above case something like this:
457 454 union(exact(foo), difference(pattern(foo), include(foo)))
458 455 '''
459 456 def __init__(self, m1, m2):
460 457 super(differencematcher, self).__init__(m1._root, m1._cwd)
461 458 self._m1 = m1
462 459 self._m2 = m2
463 460 self.bad = m1.bad
464 461 self.explicitdir = m1.explicitdir
465 462 self.traversedir = m1.traversedir
466 463
467 464 def matchfn(self, f):
468 465 return self._m1(f) and (not self._m2(f) or self._m1.exact(f))
469 466
470 467 @propertycache
471 468 def _files(self):
472 469 if self.isexact():
473 470 return [f for f in self._m1.files() if self(f)]
474 471 # If m1 is not an exact matcher, we can't easily figure out the set of
475 472 # files, because its files() are not always files. For example, if
476 473 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
477 474 # want to remove "dir" from the set even though it would match m2,
478 475 # because the "dir" in m1 may not be a file.
479 476 return self._m1.files()
480 477
481 478 def visitdir(self, dir):
482 479 if self._m2.visitdir(dir) == 'all':
483 480 # There's a bug here: If m1 matches file 'dir/file' and m2 excludes
484 481 # 'dir' (recursively), we should still visit 'dir' due to the
485 482 # exception we have for exact matches.
486 483 return False
487 484 return bool(self._m1.visitdir(dir))
488 485
489 486 def isexact(self):
490 487 return self._m1.isexact()
491 488
492 489 def anypats(self):
493 490 return self._m1.anypats() or self._m2.anypats()
494 491
495 492 def __repr__(self):
496 493 return ('<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2))
497 494
498 495 def intersectmatchers(m1, m2):
499 496 '''Composes two matchers by matching if both of them match.
500 497
501 498 The second matcher's non-matching-attributes (root, cwd, bad, explicitdir,
502 499 traversedir) are ignored.
503 500 '''
504 501 if m1 is None or m2 is None:
505 502 return m1 or m2
506 503 if m1.always():
507 504 m = copy.copy(m2)
508 505 # TODO: Consider encapsulating these things in a class so there's only
509 506 # one thing to copy from m1.
510 507 m.bad = m1.bad
511 508 m.explicitdir = m1.explicitdir
512 509 m.traversedir = m1.traversedir
513 510 m.abs = m1.abs
514 511 m.rel = m1.rel
515 512 m._relativeuipath |= m1._relativeuipath
516 513 return m
517 514 if m2.always():
518 515 m = copy.copy(m1)
519 516 m._relativeuipath |= m2._relativeuipath
520 517 return m
521 518 return intersectionmatcher(m1, m2)
522 519
523 520 class intersectionmatcher(basematcher):
524 521 def __init__(self, m1, m2):
525 522 super(intersectionmatcher, self).__init__(m1._root, m1._cwd)
526 523 self._m1 = m1
527 524 self._m2 = m2
528 525 self.bad = m1.bad
529 526 self.explicitdir = m1.explicitdir
530 527 self.traversedir = m1.traversedir
531 528
532 529 @propertycache
533 530 def _files(self):
534 531 if self.isexact():
535 532 m1, m2 = self._m1, self._m2
536 533 if not m1.isexact():
537 534 m1, m2 = m2, m1
538 535 return [f for f in m1.files() if m2(f)]
539 536 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
540 537 # the set of files, because their files() are not always files. For
541 538 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
542 539 # "path:dir2", we don't want to remove "dir2" from the set.
543 540 return self._m1.files() + self._m2.files()
544 541
545 542 def matchfn(self, f):
546 543 return self._m1(f) and self._m2(f)
547 544
548 545 def visitdir(self, dir):
549 546 visit1 = self._m1.visitdir(dir)
550 547 if visit1 == 'all':
551 548 return self._m2.visitdir(dir)
552 549 # bool() because visit1=True + visit2='all' should not be 'all'
553 550 return bool(visit1 and self._m2.visitdir(dir))
554 551
555 552 def always(self):
556 553 return self._m1.always() and self._m2.always()
557 554
558 555 def isexact(self):
559 556 return self._m1.isexact() or self._m2.isexact()
560 557
561 558 def anypats(self):
562 559 return self._m1.anypats() or self._m2.anypats()
563 560
564 561 def __repr__(self):
565 562 return ('<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2))
566 563
567 564 class subdirmatcher(basematcher):
568 565 """Adapt a matcher to work on a subdirectory only.
569 566
570 567 The paths are remapped to remove/insert the path as needed:
571 568
572 569 >>> m1 = match('root', '', ['a.txt', 'sub/b.txt'])
573 570 >>> m2 = subdirmatcher('sub', m1)
574 571 >>> bool(m2('a.txt'))
575 572 False
576 573 >>> bool(m2('b.txt'))
577 574 True
578 575 >>> bool(m2.matchfn('a.txt'))
579 576 False
580 577 >>> bool(m2.matchfn('b.txt'))
581 578 True
582 579 >>> m2.files()
583 580 ['b.txt']
584 581 >>> m2.exact('b.txt')
585 582 True
586 583 >>> util.pconvert(m2.rel('b.txt'))
587 584 'sub/b.txt'
588 585 >>> def bad(f, msg):
589 586 ... print "%s: %s" % (f, msg)
590 587 >>> m1.bad = bad
591 588 >>> m2.bad('x.txt', 'No such file')
592 589 sub/x.txt: No such file
593 590 >>> m2.abs('c.txt')
594 591 'sub/c.txt'
595 592 """
596 593
597 594 def __init__(self, path, matcher):
598 595 super(subdirmatcher, self).__init__(matcher._root, matcher._cwd)
599 596 self._path = path
600 597 self._matcher = matcher
601 598 self._always = matcher.always()
602 599
603 600 self._files = [f[len(path) + 1:] for f in matcher._files
604 601 if f.startswith(path + "/")]
605 602
606 603 # If the parent repo had a path to this subrepo and the matcher is
607 604 # a prefix matcher, this submatcher always matches.
608 605 if matcher.prefix():
609 606 self._always = any(f == path for f in matcher._files)
610 607
611 608 def bad(self, f, msg):
612 609 self._matcher.bad(self._path + "/" + f, msg)
613 610
614 611 def abs(self, f):
615 612 return self._matcher.abs(self._path + "/" + f)
616 613
617 614 def rel(self, f):
618 615 return self._matcher.rel(self._path + "/" + f)
619 616
620 617 def uipath(self, f):
621 618 return self._matcher.uipath(self._path + "/" + f)
622 619
623 620 def matchfn(self, f):
624 621 # Some information is lost in the superclass's constructor, so we
625 622 # can not accurately create the matching function for the subdirectory
626 623 # from the inputs. Instead, we override matchfn() and visitdir() to
627 624 # call the original matcher with the subdirectory path prepended.
628 625 return self._matcher.matchfn(self._path + "/" + f)
629 626
630 627 def visitdir(self, dir):
631 628 if dir == '.':
632 629 dir = self._path
633 630 else:
634 631 dir = self._path + "/" + dir
635 632 return self._matcher.visitdir(dir)
636 633
637 634 def always(self):
638 635 return self._always
639 636
640 637 def anypats(self):
641 638 return self._matcher.anypats()
642 639
643 640 def __repr__(self):
644 641 return ('<subdirmatcher path=%r, matcher=%r>' %
645 642 (self._path, self._matcher))
646 643
647 644 def patkind(pattern, default=None):
648 645 '''If pattern is 'kind:pat' with a known kind, return kind.'''
649 646 return _patsplit(pattern, default)[0]
650 647
651 648 def _patsplit(pattern, default):
652 649 """Split a string into the optional pattern kind prefix and the actual
653 650 pattern."""
654 651 if ':' in pattern:
655 652 kind, pat = pattern.split(':', 1)
656 653 if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
657 654 'listfile', 'listfile0', 'set', 'include', 'subinclude',
658 655 'rootfilesin'):
659 656 return kind, pat
660 657 return default, pattern
661 658
662 659 def _globre(pat):
663 660 r'''Convert an extended glob string to a regexp string.
664 661
665 662 >>> print _globre(r'?')
666 663 .
667 664 >>> print _globre(r'*')
668 665 [^/]*
669 666 >>> print _globre(r'**')
670 667 .*
671 668 >>> print _globre(r'**/a')
672 669 (?:.*/)?a
673 670 >>> print _globre(r'a/**/b')
674 671 a\/(?:.*/)?b
675 672 >>> print _globre(r'[a*?!^][^b][!c]')
676 673 [a*?!^][\^b][^c]
677 674 >>> print _globre(r'{a,b}')
678 675 (?:a|b)
679 676 >>> print _globre(r'.\*\?')
680 677 \.\*\?
681 678 '''
682 679 i, n = 0, len(pat)
683 680 res = ''
684 681 group = 0
685 682 escape = util.re.escape
686 683 def peek():
687 684 return i < n and pat[i:i + 1]
688 685 while i < n:
689 686 c = pat[i:i + 1]
690 687 i += 1
691 688 if c not in '*?[{},\\':
692 689 res += escape(c)
693 690 elif c == '*':
694 691 if peek() == '*':
695 692 i += 1
696 693 if peek() == '/':
697 694 i += 1
698 695 res += '(?:.*/)?'
699 696 else:
700 697 res += '.*'
701 698 else:
702 699 res += '[^/]*'
703 700 elif c == '?':
704 701 res += '.'
705 702 elif c == '[':
706 703 j = i
707 704 if j < n and pat[j:j + 1] in '!]':
708 705 j += 1
709 706 while j < n and pat[j:j + 1] != ']':
710 707 j += 1
711 708 if j >= n:
712 709 res += '\\['
713 710 else:
714 711 stuff = pat[i:j].replace('\\','\\\\')
715 712 i = j + 1
716 713 if stuff[0:1] == '!':
717 714 stuff = '^' + stuff[1:]
718 715 elif stuff[0:1] == '^':
719 716 stuff = '\\' + stuff
720 717 res = '%s[%s]' % (res, stuff)
721 718 elif c == '{':
722 719 group += 1
723 720 res += '(?:'
724 721 elif c == '}' and group:
725 722 res += ')'
726 723 group -= 1
727 724 elif c == ',' and group:
728 725 res += '|'
729 726 elif c == '\\':
730 727 p = peek()
731 728 if p:
732 729 i += 1
733 730 res += escape(p)
734 731 else:
735 732 res += escape(c)
736 733 else:
737 734 res += escape(c)
738 735 return res
739 736
740 737 def _regex(kind, pat, globsuffix):
741 738 '''Convert a (normalized) pattern of any kind into a regular expression.
742 739 globsuffix is appended to the regexp of globs.'''
743 740 if not pat:
744 741 return ''
745 742 if kind == 're':
746 743 return pat
747 744 if kind == 'path':
748 745 if pat == '.':
749 746 return ''
750 747 return '^' + util.re.escape(pat) + '(?:/|$)'
751 748 if kind == 'rootfilesin':
752 749 if pat == '.':
753 750 escaped = ''
754 751 else:
755 752 # Pattern is a directory name.
756 753 escaped = util.re.escape(pat) + '/'
757 754 # Anything after the pattern must be a non-directory.
758 755 return '^' + escaped + '[^/]+$'
759 756 if kind == 'relglob':
760 757 return '(?:|.*/)' + _globre(pat) + globsuffix
761 758 if kind == 'relpath':
762 759 return util.re.escape(pat) + '(?:/|$)'
763 760 if kind == 'relre':
764 761 if pat.startswith('^'):
765 762 return pat
766 763 return '.*' + pat
767 764 return _globre(pat) + globsuffix
768 765
769 766 def _buildmatch(ctx, kindpats, globsuffix, listsubrepos, root):
770 767 '''Return regexp string and a matcher function for kindpats.
771 768 globsuffix is appended to the regexp of globs.'''
772 769 matchfuncs = []
773 770
774 771 subincludes, kindpats = _expandsubinclude(kindpats, root)
775 772 if subincludes:
776 773 submatchers = {}
777 774 def matchsubinclude(f):
778 775 for prefix, matcherargs in subincludes:
779 776 if f.startswith(prefix):
780 777 mf = submatchers.get(prefix)
781 778 if mf is None:
782 779 mf = match(*matcherargs)
783 780 submatchers[prefix] = mf
784 781
785 782 if mf(f[len(prefix):]):
786 783 return True
787 784 return False
788 785 matchfuncs.append(matchsubinclude)
789 786
790 787 fset, kindpats = _expandsets(kindpats, ctx, listsubrepos)
791 788 if fset:
792 789 matchfuncs.append(fset.__contains__)
793 790
794 791 regex = ''
795 792 if kindpats:
796 793 regex, mf = _buildregexmatch(kindpats, globsuffix)
797 794 matchfuncs.append(mf)
798 795
799 796 if len(matchfuncs) == 1:
800 797 return regex, matchfuncs[0]
801 798 else:
802 799 return regex, lambda f: any(mf(f) for mf in matchfuncs)
803 800
804 801 def _buildregexmatch(kindpats, globsuffix):
805 802 """Build a match function from a list of kinds and kindpats,
806 803 return regexp string and a matcher function."""
807 804 try:
808 805 regex = '(?:%s)' % '|'.join([_regex(k, p, globsuffix)
809 806 for (k, p, s) in kindpats])
810 807 if len(regex) > 20000:
811 808 raise OverflowError
812 809 return regex, _rematcher(regex)
813 810 except OverflowError:
814 811 # We're using a Python with a tiny regex engine and we
815 812 # made it explode, so we'll divide the pattern list in two
816 813 # until it works
817 814 l = len(kindpats)
818 815 if l < 2:
819 816 raise
820 817 regexa, a = _buildregexmatch(kindpats[:l//2], globsuffix)
821 818 regexb, b = _buildregexmatch(kindpats[l//2:], globsuffix)
822 819 return regex, lambda s: a(s) or b(s)
823 820 except re.error:
824 821 for k, p, s in kindpats:
825 822 try:
826 823 _rematcher('(?:%s)' % _regex(k, p, globsuffix))
827 824 except re.error:
828 825 if s:
829 826 raise error.Abort(_("%s: invalid pattern (%s): %s") %
830 827 (s, k, p))
831 828 else:
832 829 raise error.Abort(_("invalid pattern (%s): %s") % (k, p))
833 830 raise error.Abort(_("invalid pattern"))
834 831
835 832 def _patternrootsanddirs(kindpats):
836 833 '''Returns roots and directories corresponding to each pattern.
837 834
838 835 This calculates the roots and directories exactly matching the patterns and
839 836 returns a tuple of (roots, dirs) for each. It does not return other
840 837 directories which may also need to be considered, like the parent
841 838 directories.
842 839 '''
843 840 r = []
844 841 d = []
845 842 for kind, pat, source in kindpats:
846 843 if kind == 'glob': # find the non-glob prefix
847 844 root = []
848 845 for p in pat.split('/'):
849 846 if '[' in p or '{' in p or '*' in p or '?' in p:
850 847 break
851 848 root.append(p)
852 849 r.append('/'.join(root) or '.')
853 850 elif kind in ('relpath', 'path'):
854 851 r.append(pat or '.')
855 852 elif kind in ('rootfilesin',):
856 853 d.append(pat or '.')
857 854 else: # relglob, re, relre
858 855 r.append('.')
859 856 return r, d
860 857
861 858 def _roots(kindpats):
862 859 '''Returns root directories to match recursively from the given patterns.'''
863 860 roots, dirs = _patternrootsanddirs(kindpats)
864 861 return roots
865 862
866 863 def _rootsanddirs(kindpats):
867 864 '''Returns roots and exact directories from patterns.
868 865
869 866 roots are directories to match recursively, whereas exact directories should
870 867 be matched non-recursively. The returned (roots, dirs) tuple will also
871 868 include directories that need to be implicitly considered as either, such as
872 869 parent directories.
873 870
874 871 >>> _rootsanddirs(\
875 872 [('glob', 'g/h/*', ''), ('glob', 'g/h', ''), ('glob', 'g*', '')])
876 873 (['g/h', 'g/h', '.'], ['g', '.'])
877 874 >>> _rootsanddirs(\
878 875 [('rootfilesin', 'g/h', ''), ('rootfilesin', '', '')])
879 876 ([], ['g/h', '.', 'g', '.'])
880 877 >>> _rootsanddirs(\
881 878 [('relpath', 'r', ''), ('path', 'p/p', ''), ('path', '', '')])
882 879 (['r', 'p/p', '.'], ['p', '.'])
883 880 >>> _rootsanddirs(\
884 881 [('relglob', 'rg*', ''), ('re', 're/', ''), ('relre', 'rr', '')])
885 882 (['.', '.', '.'], ['.'])
886 883 '''
887 884 r, d = _patternrootsanddirs(kindpats)
888 885
889 886 # Append the parents as non-recursive/exact directories, since they must be
890 887 # scanned to get to either the roots or the other exact directories.
891 888 d.extend(util.dirs(d))
892 889 d.extend(util.dirs(r))
893 890 # util.dirs() does not include the root directory, so add it manually
894 891 d.append('.')
895 892
896 893 return r, d
897 894
898 895 def _explicitfiles(kindpats):
899 896 '''Returns the potential explicit filenames from the patterns.
900 897
901 898 >>> _explicitfiles([('path', 'foo/bar', '')])
902 899 ['foo/bar']
903 900 >>> _explicitfiles([('rootfilesin', 'foo/bar', '')])
904 901 []
905 902 '''
906 903 # Keep only the pattern kinds where one can specify filenames (vs only
907 904 # directory names).
908 905 filable = [kp for kp in kindpats if kp[0] not in ('rootfilesin',)]
909 906 return _roots(filable)
910 907
911 908 def _anypats(kindpats):
912 909 for kind, pat, source in kindpats:
913 910 if kind in ('glob', 're', 'relglob', 'relre', 'set', 'rootfilesin'):
914 911 return True
915 912
916 913 _commentre = None
917 914
918 915 def readpatternfile(filepath, warn, sourceinfo=False):
919 916 '''parse a pattern file, returning a list of
920 917 patterns. These patterns should be given to compile()
921 918 to be validated and converted into a match function.
922 919
923 920 trailing white space is dropped.
924 921 the escape character is backslash.
925 922 comments start with #.
926 923 empty lines are skipped.
927 924
928 925 lines can be of the following formats:
929 926
930 927 syntax: regexp # defaults following lines to non-rooted regexps
931 928 syntax: glob # defaults following lines to non-rooted globs
932 929 re:pattern # non-rooted regular expression
933 930 glob:pattern # non-rooted glob
934 931 pattern # pattern of the current default type
935 932
936 933 if sourceinfo is set, returns a list of tuples:
937 934 (pattern, lineno, originalline). This is useful to debug ignore patterns.
938 935 '''
939 936
940 937 syntaxes = {'re': 'relre:', 'regexp': 'relre:', 'glob': 'relglob:',
941 938 'include': 'include', 'subinclude': 'subinclude'}
942 939 syntax = 'relre:'
943 940 patterns = []
944 941
945 942 fp = open(filepath, 'rb')
946 943 for lineno, line in enumerate(util.iterfile(fp), start=1):
947 944 if "#" in line:
948 945 global _commentre
949 946 if not _commentre:
950 947 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
951 948 # remove comments prefixed by an even number of escapes
952 949 m = _commentre.search(line)
953 950 if m:
954 951 line = line[:m.end(1)]
955 952 # fixup properly escaped comments that survived the above
956 953 line = line.replace("\\#", "#")
957 954 line = line.rstrip()
958 955 if not line:
959 956 continue
960 957
961 958 if line.startswith('syntax:'):
962 959 s = line[7:].strip()
963 960 try:
964 961 syntax = syntaxes[s]
965 962 except KeyError:
966 963 if warn:
967 964 warn(_("%s: ignoring invalid syntax '%s'\n") %
968 965 (filepath, s))
969 966 continue
970 967
971 968 linesyntax = syntax
972 969 for s, rels in syntaxes.iteritems():
973 970 if line.startswith(rels):
974 971 linesyntax = rels
975 972 line = line[len(rels):]
976 973 break
977 974 elif line.startswith(s+':'):
978 975 linesyntax = rels
979 976 line = line[len(s) + 1:]
980 977 break
981 978 if sourceinfo:
982 979 patterns.append((linesyntax + line, lineno, line))
983 980 else:
984 981 patterns.append(linesyntax + line)
985 982 fp.close()
986 983 return patterns
General Comments 0
You need to be logged in to leave comments. Login now