##// END OF EJS Templates
match: handle exact matching using new exactmatcher
Martin von Zweigbergk -
r32499:a3583852 default
parent child Browse files
Show More
@@ -1,949 +1,979
1 1 # match.py - filename matching
2 2 #
3 3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import copy
11 11 import os
12 12 import re
13 13
14 14 from .i18n import _
15 15 from . import (
16 16 error,
17 17 pathutil,
18 18 util,
19 19 )
20 20
21 21 propertycache = util.propertycache
22 22
23 23 def _rematcher(regex):
24 24 '''compile the regexp with the best available regexp engine and return a
25 25 matcher function'''
26 26 m = util.re.compile(regex)
27 27 try:
28 28 # slightly faster, provided by facebook's re2 bindings
29 29 return m.test_match
30 30 except AttributeError:
31 31 return m.match
32 32
33 33 def _expandsets(kindpats, ctx, listsubrepos):
34 34 '''Returns the kindpats list with the 'set' patterns expanded.'''
35 35 fset = set()
36 36 other = []
37 37
38 38 for kind, pat, source in kindpats:
39 39 if kind == 'set':
40 40 if not ctx:
41 41 raise error.ProgrammingError("fileset expression with no "
42 42 "context")
43 43 s = ctx.getfileset(pat)
44 44 fset.update(s)
45 45
46 46 if listsubrepos:
47 47 for subpath in ctx.substate:
48 48 s = ctx.sub(subpath).getfileset(pat)
49 49 fset.update(subpath + '/' + f for f in s)
50 50
51 51 continue
52 52 other.append((kind, pat, source))
53 53 return fset, other
54 54
55 55 def _expandsubinclude(kindpats, root):
56 56 '''Returns the list of subinclude matcher args and the kindpats without the
57 57 subincludes in it.'''
58 58 relmatchers = []
59 59 other = []
60 60
61 61 for kind, pat, source in kindpats:
62 62 if kind == 'subinclude':
63 63 sourceroot = pathutil.dirname(util.normpath(source))
64 64 pat = util.pconvert(pat)
65 65 path = pathutil.join(sourceroot, pat)
66 66
67 67 newroot = pathutil.dirname(path)
68 68 matcherargs = (newroot, '', [], ['include:%s' % path])
69 69
70 70 prefix = pathutil.canonpath(root, root, newroot)
71 71 if prefix:
72 72 prefix += '/'
73 73 relmatchers.append((prefix, matcherargs))
74 74 else:
75 75 other.append((kind, pat, source))
76 76
77 77 return relmatchers, other
78 78
79 79 def _kindpatsalwaysmatch(kindpats):
80 80 """"Checks whether the kindspats match everything, as e.g.
81 81 'relpath:.' does.
82 82 """
83 83 for kind, pat, source in kindpats:
84 84 if pat != '' or kind not in ['relpath', 'glob']:
85 85 return False
86 86 return True
87 87
88 88 def match(root, cwd, patterns, include=None, exclude=None, default='glob',
89 89 exact=False, auditor=None, ctx=None, listsubrepos=False, warn=None,
90 90 badfn=None, icasefs=False):
91 91 """build an object to match a set of file patterns
92 92
93 93 arguments:
94 94 root - the canonical root of the tree you're matching against
95 95 cwd - the current working directory, if relevant
96 96 patterns - patterns to find
97 97 include - patterns to include (unless they are excluded)
98 98 exclude - patterns to exclude (even if they are included)
99 99 default - if a pattern in patterns has no explicit type, assume this one
100 100 exact - patterns are actually filenames (include/exclude still apply)
101 101 warn - optional function used for printing warnings
102 102 badfn - optional bad() callback for this matcher instead of the default
103 103 icasefs - make a matcher for wdir on case insensitive filesystems, which
104 104 normalizes the given patterns to the case in the filesystem
105 105
106 106 a pattern is one of:
107 107 'glob:<glob>' - a glob relative to cwd
108 108 're:<regexp>' - a regular expression
109 109 'path:<path>' - a path relative to repository root, which is matched
110 110 recursively
111 111 'rootfilesin:<path>' - a path relative to repository root, which is
112 112 matched non-recursively (will not match subdirectories)
113 113 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
114 114 'relpath:<path>' - a path relative to cwd
115 115 'relre:<regexp>' - a regexp that needn't match the start of a name
116 116 'set:<fileset>' - a fileset expression
117 117 'include:<path>' - a file of patterns to read and include
118 118 'subinclude:<path>' - a file of patterns to match against files under
119 119 the same directory
120 120 '<something>' - a pattern of the specified default type
121 121 """
122 122 normalize = _donormalize
123 123 if icasefs:
124 124 if exact:
125 125 raise error.ProgrammingError("a case-insensitive exact matcher "
126 126 "doesn't make sense")
127 127 dirstate = ctx.repo().dirstate
128 128 dsnormalize = dirstate.normalize
129 129
130 130 def normalize(patterns, default, root, cwd, auditor, warn):
131 131 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
132 132 kindpats = []
133 133 for kind, pats, source in kp:
134 134 if kind not in ('re', 'relre'): # regex can't be normalized
135 135 p = pats
136 136 pats = dsnormalize(pats)
137 137
138 138 # Preserve the original to handle a case only rename.
139 139 if p != pats and p in dirstate:
140 140 kindpats.append((kind, p, source))
141 141
142 142 kindpats.append((kind, pats, source))
143 143 return kindpats
144 144
145 m = matcher(root, cwd, normalize, patterns, include=None,
146 default=default, exact=exact, auditor=auditor, ctx=ctx,
147 listsubrepos=listsubrepos, warn=warn, badfn=badfn)
145 if exact:
146 m = exactmatcher(root, cwd, patterns, badfn)
147 else:
148 m = matcher(root, cwd, normalize, patterns, include=None,
149 default=default, exact=exact, auditor=auditor, ctx=ctx,
150 listsubrepos=listsubrepos, warn=warn, badfn=badfn)
148 151 if include:
149 152 im = matcher(root, cwd, normalize, [], include=include, default=default,
150 153 exact=False, auditor=auditor, ctx=ctx,
151 154 listsubrepos=listsubrepos, warn=warn, badfn=None)
152 155 m = intersectmatchers(m, im)
153 156 if exclude:
154 157 em = matcher(root, cwd, normalize, [], include=exclude, default=default,
155 158 exact=False, auditor=auditor, ctx=ctx,
156 159 listsubrepos=listsubrepos, warn=warn, badfn=None)
157 160 m = differencematcher(m, em)
158 161 return m
159 162
160 163 def exact(root, cwd, files, badfn=None):
161 return match(root, cwd, files, exact=True, badfn=badfn)
164 return exactmatcher(root, cwd, files, badfn=badfn)
162 165
163 166 def always(root, cwd):
164 167 return match(root, cwd, [])
165 168
166 169 def badmatch(match, badfn):
167 170 """Make a copy of the given matcher, replacing its bad method with the given
168 171 one.
169 172 """
170 173 m = copy.copy(match)
171 174 m.bad = badfn
172 175 return m
173 176
174 177 def _donormalize(patterns, default, root, cwd, auditor, warn):
175 178 '''Convert 'kind:pat' from the patterns list to tuples with kind and
176 179 normalized and rooted patterns and with listfiles expanded.'''
177 180 kindpats = []
178 181 for kind, pat in [_patsplit(p, default) for p in patterns]:
179 182 if kind in ('glob', 'relpath'):
180 183 pat = pathutil.canonpath(root, cwd, pat, auditor)
181 184 elif kind in ('relglob', 'path', 'rootfilesin'):
182 185 pat = util.normpath(pat)
183 186 elif kind in ('listfile', 'listfile0'):
184 187 try:
185 188 files = util.readfile(pat)
186 189 if kind == 'listfile0':
187 190 files = files.split('\0')
188 191 else:
189 192 files = files.splitlines()
190 193 files = [f for f in files if f]
191 194 except EnvironmentError:
192 195 raise error.Abort(_("unable to read file list (%s)") % pat)
193 196 for k, p, source in _donormalize(files, default, root, cwd,
194 197 auditor, warn):
195 198 kindpats.append((k, p, pat))
196 199 continue
197 200 elif kind == 'include':
198 201 try:
199 202 fullpath = os.path.join(root, util.localpath(pat))
200 203 includepats = readpatternfile(fullpath, warn)
201 204 for k, p, source in _donormalize(includepats, default,
202 205 root, cwd, auditor, warn):
203 206 kindpats.append((k, p, source or pat))
204 207 except error.Abort as inst:
205 208 raise error.Abort('%s: %s' % (pat, inst[0]))
206 209 except IOError as inst:
207 210 if warn:
208 211 warn(_("skipping unreadable pattern file '%s': %s\n") %
209 212 (pat, inst.strerror))
210 213 continue
211 214 # else: re or relre - which cannot be normalized
212 215 kindpats.append((kind, pat, ''))
213 216 return kindpats
214 217
215 218 class basematcher(object):
216 219
217 220 def __init__(self, root, cwd, badfn=None, relativeuipath=True):
218 221 self._root = root
219 222 self._cwd = cwd
220 223 if badfn is not None:
221 224 self.bad = badfn
222 225 self._relativeuipath = relativeuipath
223 226
224 227 def __call__(self, fn):
225 228 return self.matchfn(fn)
226 229 def __iter__(self):
227 230 for f in self._files:
228 231 yield f
229 232 # Callbacks related to how the matcher is used by dirstate.walk.
230 233 # Subscribers to these events must monkeypatch the matcher object.
231 234 def bad(self, f, msg):
232 235 '''Callback from dirstate.walk for each explicit file that can't be
233 236 found/accessed, with an error message.'''
234 237 pass
235 238
236 239 # If an explicitdir is set, it will be called when an explicitly listed
237 240 # directory is visited.
238 241 explicitdir = None
239 242
240 243 # If an traversedir is set, it will be called when a directory discovered
241 244 # by recursive traversal is visited.
242 245 traversedir = None
243 246
244 247 def abs(self, f):
245 248 '''Convert a repo path back to path that is relative to the root of the
246 249 matcher.'''
247 250 return f
248 251
249 252 def rel(self, f):
250 253 '''Convert repo path back to path that is relative to cwd of matcher.'''
251 254 return util.pathto(self._root, self._cwd, f)
252 255
253 256 def uipath(self, f):
254 257 '''Convert repo path to a display path. If patterns or -I/-X were used
255 258 to create this matcher, the display path will be relative to cwd.
256 259 Otherwise it is relative to the root of the repo.'''
257 260 return (self._relativeuipath and self.rel(f)) or self.abs(f)
258 261
259 262 @propertycache
260 263 def _files(self):
261 264 return []
262 265
263 266 def files(self):
264 267 '''Explicitly listed files or patterns or roots:
265 268 if no patterns or .always(): empty list,
266 269 if exact: list exact files,
267 270 if not .anypats(): list all files and dirs,
268 271 else: optimal roots'''
269 272 return self._files
270 273
271 274 @propertycache
272 275 def _fileset(self):
273 276 return set(self._files)
274 277
275 278 def exact(self, f):
276 279 '''Returns True if f is in .files().'''
277 280 return f in self._fileset
278 281
279 282 def matchfn(self, f):
280 283 return False
281 284
282 285 def visitdir(self, dir):
283 286 '''Decides whether a directory should be visited based on whether it
284 287 has potential matches in it or one of its subdirectories. This is
285 288 based on the match's primary, included, and excluded patterns.
286 289
287 290 Returns the string 'all' if the given directory and all subdirectories
288 291 should be visited. Otherwise returns True or False indicating whether
289 292 the given directory should be visited.
290 293
291 294 This function's behavior is undefined if it has returned False for
292 295 one of the dir's parent directories.
293 296 '''
294 297 return False
295 298
296 299 def anypats(self):
297 300 '''Matcher uses patterns or include/exclude.'''
298 301 return False
299 302
300 303 def always(self):
301 304 '''Matcher will match everything and .files() will be empty
302 305 - optimization might be possible and necessary.'''
303 306 return False
304 307
305 308 def isexact(self):
306 309 return False
307 310
308 311 def prefix(self):
309 312 return not self.always() and not self.isexact() and not self.anypats()
310 313
311 314 class matcher(basematcher):
312 315
313 316 def __init__(self, root, cwd, normalize, patterns, include=None,
314 317 default='glob', exact=False, auditor=None, ctx=None,
315 318 listsubrepos=False, warn=None, badfn=None):
316 319 super(matcher, self).__init__(root, cwd, badfn,
317 320 relativeuipath=bool(include or patterns))
318 321 if include is None:
319 322 include = []
320 323
321 324 self._anypats = bool(include)
322 325 self._anyincludepats = False
323 326 self._always = False
324 327 self.patternspat = None
325 328 self.includepat = None
326 329
327 330 # roots are directories which are recursively included.
328 331 self._includeroots = set()
329 332 # dirs are directories which are non-recursively included.
330 333 self._includedirs = set()
331 334
332 335 matchfns = []
333 336 if include:
334 337 kindpats = normalize(include, 'glob', root, cwd, auditor, warn)
335 338 self.includepat, im = _buildmatch(ctx, kindpats, '(?:/|$)',
336 339 listsubrepos, root)
337 340 self._anyincludepats = _anypats(kindpats)
338 341 roots, dirs = _rootsanddirs(kindpats)
339 342 self._includeroots.update(roots)
340 343 self._includedirs.update(dirs)
341 344 matchfns.append(im)
342 345 if exact:
343 346 if isinstance(patterns, list):
344 347 self._files = patterns
345 348 else:
346 349 self._files = list(patterns)
347 350 matchfns.append(self.exact)
348 351 elif patterns:
349 352 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
350 353 if not _kindpatsalwaysmatch(kindpats):
351 354 self._files = _explicitfiles(kindpats)
352 355 self._anypats = self._anypats or _anypats(kindpats)
353 356 self.patternspat, pm = _buildmatch(ctx, kindpats, '$',
354 357 listsubrepos, root)
355 358 matchfns.append(pm)
356 359
357 360 if not matchfns:
358 361 m = util.always
359 362 self._always = True
360 363 elif len(matchfns) == 1:
361 364 m = matchfns[0]
362 365 else:
363 366 def m(f):
364 367 for matchfn in matchfns:
365 368 if not matchfn(f):
366 369 return False
367 370 return True
368 371
369 372 self.matchfn = m
370 373
371 374 @propertycache
372 375 def _dirs(self):
373 376 return set(util.dirs(self._fileset)) | {'.'}
374 377
375 378 def visitdir(self, dir):
376 379 if self.prefix() and dir in self._fileset:
377 380 return 'all'
378 381 if self._includeroots or self._includedirs:
379 382 if (not self._anyincludepats and
380 383 dir in self._includeroots):
381 384 # The condition above is essentially self.prefix() for includes
382 385 return 'all'
383 386 if ('.' not in self._includeroots and
384 387 dir not in self._includeroots and
385 388 dir not in self._includedirs and
386 389 not any(parent in self._includeroots
387 390 for parent in util.finddirs(dir))):
388 391 return False
389 392 return (not self._fileset or
390 393 '.' in self._fileset or
391 394 dir in self._fileset or
392 395 dir in self._dirs or
393 396 any(parentdir in self._fileset
394 397 for parentdir in util.finddirs(dir)))
395 398
396 399 def anypats(self):
397 400 return self._anypats
398 401
399 402 def always(self):
400 403 return self._always
401 404
402 405 def isexact(self):
403 406 return self.matchfn == self.exact
404 407
405 408 def __repr__(self):
406 409 return ('<matcher files=%r, patterns=%r, includes=%r>' %
407 410 (self._files, self.patternspat, self.includepat))
408 411
412 class exactmatcher(basematcher):
413 '''Matches the input files exactly. They are interpreted as paths, not
414 patterns (so no kind-prefixes).
415 '''
416
417 def __init__(self, root, cwd, files, badfn=None):
418 super(exactmatcher, self).__init__(root, cwd, badfn)
419
420 if isinstance(files, list):
421 self._files = files
422 else:
423 self._files = list(files)
424 self.matchfn = self.exact
425
426 @propertycache
427 def _dirs(self):
428 return set(util.dirs(self._fileset)) | {'.'}
429
430 def visitdir(self, dir):
431 return dir in self._dirs
432
433 def isexact(self):
434 return True
435
436 def __repr__(self):
437 return ('<exactmatcher files=%r>' % self._files)
438
409 439 class differencematcher(basematcher):
410 440 '''Composes two matchers by matching if the first matches and the second
411 441 does not. Well, almost... If the user provides a pattern like "-X foo foo",
412 442 Mercurial actually does match "foo" against that. That's because exact
413 443 matches are treated specially. So, since this differencematcher is used for
414 444 excludes, it needs to special-case exact matching.
415 445
416 446 The second matcher's non-matching-attributes (root, cwd, bad, explicitdir,
417 447 traversedir) are ignored.
418 448
419 449 TODO: If we want to keep the behavior described above for exact matches, we
420 450 should consider instead treating the above case something like this:
421 451 union(exact(foo), difference(pattern(foo), include(foo)))
422 452 '''
423 453 def __init__(self, m1, m2):
424 454 super(differencematcher, self).__init__(m1._root, m1._cwd)
425 455 self._m1 = m1
426 456 self._m2 = m2
427 457 self.bad = m1.bad
428 458 self.explicitdir = m1.explicitdir
429 459 self.traversedir = m1.traversedir
430 460
431 461 def matchfn(self, f):
432 462 return self._m1(f) and (not self._m2(f) or self._m1.exact(f))
433 463
434 464 @propertycache
435 465 def _files(self):
436 466 if self.isexact():
437 467 return [f for f in self._m1.files() if self(f)]
438 468 # If m1 is not an exact matcher, we can't easily figure out the set of
439 469 # files, because its files() are not always files. For example, if
440 470 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
441 471 # want to remove "dir" from the set even though it would match m2,
442 472 # because the "dir" in m1 may not be a file.
443 473 return self._m1.files()
444 474
445 475 def visitdir(self, dir):
446 476 if self._m2.visitdir(dir) == 'all':
447 477 # There's a bug here: If m1 matches file 'dir/file' and m2 excludes
448 478 # 'dir' (recursively), we should still visit 'dir' due to the
449 479 # exception we have for exact matches.
450 480 return False
451 481 return bool(self._m1.visitdir(dir))
452 482
453 483 def isexact(self):
454 484 return self._m1.isexact()
455 485
456 486 def anypats(self):
457 487 return self._m1.anypats() or self._m2.anypats()
458 488
459 489 def prefix(self):
460 490 return not self.always() and not self.isexact() and not self.anypats()
461 491
462 492 def __repr__(self):
463 493 return ('<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2))
464 494
465 495 def intersectmatchers(m1, m2):
466 496 '''Composes two matchers by matching if both of them match.
467 497
468 498 The second matcher's non-matching-attributes (root, cwd, bad, explicitdir,
469 499 traversedir) are ignored.
470 500 '''
471 501 if m1 is None or m2 is None:
472 502 return m1 or m2
473 503 if m1.always():
474 504 m = copy.copy(m2)
475 505 # TODO: Consider encapsulating these things in a class so there's only
476 506 # one thing to copy from m1.
477 507 m.bad = m1.bad
478 508 m.explicitdir = m1.explicitdir
479 509 m.traversedir = m1.traversedir
480 510 m.abs = m1.abs
481 511 m.rel = m1.rel
482 512 m._relativeuipath |= m1._relativeuipath
483 513 return m
484 514 if m2.always():
485 515 m = copy.copy(m1)
486 516 m._relativeuipath |= m2._relativeuipath
487 517 return m
488 518 return intersectionmatcher(m1, m2)
489 519
490 520 class intersectionmatcher(basematcher):
491 521 def __init__(self, m1, m2):
492 522 super(intersectionmatcher, self).__init__(m1._root, m1._cwd)
493 523 self._m1 = m1
494 524 self._m2 = m2
495 525 self.bad = m1.bad
496 526 self.explicitdir = m1.explicitdir
497 527 self.traversedir = m1.traversedir
498 528
499 529 @propertycache
500 530 def _files(self):
501 531 if self.isexact():
502 532 m1, m2 = self._m1, self._m2
503 533 if not m1.isexact():
504 534 m1, m2 = m2, m1
505 535 return [f for f in m1.files() if m2(f)]
506 536 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
507 537 # the set of files, because their files() are not always files. For
508 538 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
509 539 # "path:dir2", we don't want to remove "dir2" from the set.
510 540 return self._m1.files() + self._m2.files()
511 541
512 542 def matchfn(self, f):
513 543 return self._m1(f) and self._m2(f)
514 544
515 545 def visitdir(self, dir):
516 546 visit1 = self._m1.visitdir(dir)
517 547 if visit1 == 'all':
518 548 return self._m2.visitdir(dir)
519 549 # bool() because visit1=True + visit2='all' should not be 'all'
520 550 return bool(visit1 and self._m2.visitdir(dir))
521 551
522 552 def always(self):
523 553 return self._m1.always() and self._m2.always()
524 554
525 555 def isexact(self):
526 556 return self._m1.isexact() or self._m2.isexact()
527 557
528 558 def anypats(self):
529 559 return self._m1.anypats() or self._m2.anypats()
530 560
531 561 def __repr__(self):
532 562 return ('<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2))
533 563
534 564 class subdirmatcher(basematcher):
535 565 """Adapt a matcher to work on a subdirectory only.
536 566
537 567 The paths are remapped to remove/insert the path as needed:
538 568
539 569 >>> m1 = match('root', '', ['a.txt', 'sub/b.txt'])
540 570 >>> m2 = subdirmatcher('sub', m1)
541 571 >>> bool(m2('a.txt'))
542 572 False
543 573 >>> bool(m2('b.txt'))
544 574 True
545 575 >>> bool(m2.matchfn('a.txt'))
546 576 False
547 577 >>> bool(m2.matchfn('b.txt'))
548 578 True
549 579 >>> m2.files()
550 580 ['b.txt']
551 581 >>> m2.exact('b.txt')
552 582 True
553 583 >>> util.pconvert(m2.rel('b.txt'))
554 584 'sub/b.txt'
555 585 >>> def bad(f, msg):
556 586 ... print "%s: %s" % (f, msg)
557 587 >>> m1.bad = bad
558 588 >>> m2.bad('x.txt', 'No such file')
559 589 sub/x.txt: No such file
560 590 >>> m2.abs('c.txt')
561 591 'sub/c.txt'
562 592 """
563 593
564 594 def __init__(self, path, matcher):
565 595 super(subdirmatcher, self).__init__(matcher._root, matcher._cwd)
566 596 self._path = path
567 597 self._matcher = matcher
568 598 self._always = matcher.always()
569 599
570 600 self._files = [f[len(path) + 1:] for f in matcher._files
571 601 if f.startswith(path + "/")]
572 602
573 603 # If the parent repo had a path to this subrepo and the matcher is
574 604 # a prefix matcher, this submatcher always matches.
575 605 if matcher.prefix():
576 606 self._always = any(f == path for f in matcher._files)
577 607
578 608 def bad(self, f, msg):
579 609 self._matcher.bad(self._path + "/" + f, msg)
580 610
581 611 def abs(self, f):
582 612 return self._matcher.abs(self._path + "/" + f)
583 613
584 614 def rel(self, f):
585 615 return self._matcher.rel(self._path + "/" + f)
586 616
587 617 def uipath(self, f):
588 618 return self._matcher.uipath(self._path + "/" + f)
589 619
590 620 def matchfn(self, f):
591 621 # Some information is lost in the superclass's constructor, so we
592 622 # can not accurately create the matching function for the subdirectory
593 623 # from the inputs. Instead, we override matchfn() and visitdir() to
594 624 # call the original matcher with the subdirectory path prepended.
595 625 return self._matcher.matchfn(self._path + "/" + f)
596 626
597 627 def visitdir(self, dir):
598 628 if dir == '.':
599 629 dir = self._path
600 630 else:
601 631 dir = self._path + "/" + dir
602 632 return self._matcher.visitdir(dir)
603 633
604 634 def always(self):
605 635 return self._always
606 636
607 637 def anypats(self):
608 638 return self._matcher.anypats()
609 639
610 640 def patkind(pattern, default=None):
611 641 '''If pattern is 'kind:pat' with a known kind, return kind.'''
612 642 return _patsplit(pattern, default)[0]
613 643
614 644 def _patsplit(pattern, default):
615 645 """Split a string into the optional pattern kind prefix and the actual
616 646 pattern."""
617 647 if ':' in pattern:
618 648 kind, pat = pattern.split(':', 1)
619 649 if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
620 650 'listfile', 'listfile0', 'set', 'include', 'subinclude',
621 651 'rootfilesin'):
622 652 return kind, pat
623 653 return default, pattern
624 654
625 655 def _globre(pat):
626 656 r'''Convert an extended glob string to a regexp string.
627 657
628 658 >>> print _globre(r'?')
629 659 .
630 660 >>> print _globre(r'*')
631 661 [^/]*
632 662 >>> print _globre(r'**')
633 663 .*
634 664 >>> print _globre(r'**/a')
635 665 (?:.*/)?a
636 666 >>> print _globre(r'a/**/b')
637 667 a\/(?:.*/)?b
638 668 >>> print _globre(r'[a*?!^][^b][!c]')
639 669 [a*?!^][\^b][^c]
640 670 >>> print _globre(r'{a,b}')
641 671 (?:a|b)
642 672 >>> print _globre(r'.\*\?')
643 673 \.\*\?
644 674 '''
645 675 i, n = 0, len(pat)
646 676 res = ''
647 677 group = 0
648 678 escape = util.re.escape
649 679 def peek():
650 680 return i < n and pat[i:i + 1]
651 681 while i < n:
652 682 c = pat[i:i + 1]
653 683 i += 1
654 684 if c not in '*?[{},\\':
655 685 res += escape(c)
656 686 elif c == '*':
657 687 if peek() == '*':
658 688 i += 1
659 689 if peek() == '/':
660 690 i += 1
661 691 res += '(?:.*/)?'
662 692 else:
663 693 res += '.*'
664 694 else:
665 695 res += '[^/]*'
666 696 elif c == '?':
667 697 res += '.'
668 698 elif c == '[':
669 699 j = i
670 700 if j < n and pat[j:j + 1] in '!]':
671 701 j += 1
672 702 while j < n and pat[j:j + 1] != ']':
673 703 j += 1
674 704 if j >= n:
675 705 res += '\\['
676 706 else:
677 707 stuff = pat[i:j].replace('\\','\\\\')
678 708 i = j + 1
679 709 if stuff[0:1] == '!':
680 710 stuff = '^' + stuff[1:]
681 711 elif stuff[0:1] == '^':
682 712 stuff = '\\' + stuff
683 713 res = '%s[%s]' % (res, stuff)
684 714 elif c == '{':
685 715 group += 1
686 716 res += '(?:'
687 717 elif c == '}' and group:
688 718 res += ')'
689 719 group -= 1
690 720 elif c == ',' and group:
691 721 res += '|'
692 722 elif c == '\\':
693 723 p = peek()
694 724 if p:
695 725 i += 1
696 726 res += escape(p)
697 727 else:
698 728 res += escape(c)
699 729 else:
700 730 res += escape(c)
701 731 return res
702 732
703 733 def _regex(kind, pat, globsuffix):
704 734 '''Convert a (normalized) pattern of any kind into a regular expression.
705 735 globsuffix is appended to the regexp of globs.'''
706 736 if not pat:
707 737 return ''
708 738 if kind == 're':
709 739 return pat
710 740 if kind == 'path':
711 741 if pat == '.':
712 742 return ''
713 743 return '^' + util.re.escape(pat) + '(?:/|$)'
714 744 if kind == 'rootfilesin':
715 745 if pat == '.':
716 746 escaped = ''
717 747 else:
718 748 # Pattern is a directory name.
719 749 escaped = util.re.escape(pat) + '/'
720 750 # Anything after the pattern must be a non-directory.
721 751 return '^' + escaped + '[^/]+$'
722 752 if kind == 'relglob':
723 753 return '(?:|.*/)' + _globre(pat) + globsuffix
724 754 if kind == 'relpath':
725 755 return util.re.escape(pat) + '(?:/|$)'
726 756 if kind == 'relre':
727 757 if pat.startswith('^'):
728 758 return pat
729 759 return '.*' + pat
730 760 return _globre(pat) + globsuffix
731 761
732 762 def _buildmatch(ctx, kindpats, globsuffix, listsubrepos, root):
733 763 '''Return regexp string and a matcher function for kindpats.
734 764 globsuffix is appended to the regexp of globs.'''
735 765 matchfuncs = []
736 766
737 767 subincludes, kindpats = _expandsubinclude(kindpats, root)
738 768 if subincludes:
739 769 submatchers = {}
740 770 def matchsubinclude(f):
741 771 for prefix, matcherargs in subincludes:
742 772 if f.startswith(prefix):
743 773 mf = submatchers.get(prefix)
744 774 if mf is None:
745 775 mf = match(*matcherargs)
746 776 submatchers[prefix] = mf
747 777
748 778 if mf(f[len(prefix):]):
749 779 return True
750 780 return False
751 781 matchfuncs.append(matchsubinclude)
752 782
753 783 fset, kindpats = _expandsets(kindpats, ctx, listsubrepos)
754 784 if fset:
755 785 matchfuncs.append(fset.__contains__)
756 786
757 787 regex = ''
758 788 if kindpats:
759 789 regex, mf = _buildregexmatch(kindpats, globsuffix)
760 790 matchfuncs.append(mf)
761 791
762 792 if len(matchfuncs) == 1:
763 793 return regex, matchfuncs[0]
764 794 else:
765 795 return regex, lambda f: any(mf(f) for mf in matchfuncs)
766 796
767 797 def _buildregexmatch(kindpats, globsuffix):
768 798 """Build a match function from a list of kinds and kindpats,
769 799 return regexp string and a matcher function."""
770 800 try:
771 801 regex = '(?:%s)' % '|'.join([_regex(k, p, globsuffix)
772 802 for (k, p, s) in kindpats])
773 803 if len(regex) > 20000:
774 804 raise OverflowError
775 805 return regex, _rematcher(regex)
776 806 except OverflowError:
777 807 # We're using a Python with a tiny regex engine and we
778 808 # made it explode, so we'll divide the pattern list in two
779 809 # until it works
780 810 l = len(kindpats)
781 811 if l < 2:
782 812 raise
783 813 regexa, a = _buildregexmatch(kindpats[:l//2], globsuffix)
784 814 regexb, b = _buildregexmatch(kindpats[l//2:], globsuffix)
785 815 return regex, lambda s: a(s) or b(s)
786 816 except re.error:
787 817 for k, p, s in kindpats:
788 818 try:
789 819 _rematcher('(?:%s)' % _regex(k, p, globsuffix))
790 820 except re.error:
791 821 if s:
792 822 raise error.Abort(_("%s: invalid pattern (%s): %s") %
793 823 (s, k, p))
794 824 else:
795 825 raise error.Abort(_("invalid pattern (%s): %s") % (k, p))
796 826 raise error.Abort(_("invalid pattern"))
797 827
798 828 def _patternrootsanddirs(kindpats):
799 829 '''Returns roots and directories corresponding to each pattern.
800 830
801 831 This calculates the roots and directories exactly matching the patterns and
802 832 returns a tuple of (roots, dirs) for each. It does not return other
803 833 directories which may also need to be considered, like the parent
804 834 directories.
805 835 '''
806 836 r = []
807 837 d = []
808 838 for kind, pat, source in kindpats:
809 839 if kind == 'glob': # find the non-glob prefix
810 840 root = []
811 841 for p in pat.split('/'):
812 842 if '[' in p or '{' in p or '*' in p or '?' in p:
813 843 break
814 844 root.append(p)
815 845 r.append('/'.join(root) or '.')
816 846 elif kind in ('relpath', 'path'):
817 847 r.append(pat or '.')
818 848 elif kind in ('rootfilesin',):
819 849 d.append(pat or '.')
820 850 else: # relglob, re, relre
821 851 r.append('.')
822 852 return r, d
823 853
824 854 def _roots(kindpats):
825 855 '''Returns root directories to match recursively from the given patterns.'''
826 856 roots, dirs = _patternrootsanddirs(kindpats)
827 857 return roots
828 858
829 859 def _rootsanddirs(kindpats):
830 860 '''Returns roots and exact directories from patterns.
831 861
832 862 roots are directories to match recursively, whereas exact directories should
833 863 be matched non-recursively. The returned (roots, dirs) tuple will also
834 864 include directories that need to be implicitly considered as either, such as
835 865 parent directories.
836 866
837 867 >>> _rootsanddirs(\
838 868 [('glob', 'g/h/*', ''), ('glob', 'g/h', ''), ('glob', 'g*', '')])
839 869 (['g/h', 'g/h', '.'], ['g', '.'])
840 870 >>> _rootsanddirs(\
841 871 [('rootfilesin', 'g/h', ''), ('rootfilesin', '', '')])
842 872 ([], ['g/h', '.', 'g', '.'])
843 873 >>> _rootsanddirs(\
844 874 [('relpath', 'r', ''), ('path', 'p/p', ''), ('path', '', '')])
845 875 (['r', 'p/p', '.'], ['p', '.'])
846 876 >>> _rootsanddirs(\
847 877 [('relglob', 'rg*', ''), ('re', 're/', ''), ('relre', 'rr', '')])
848 878 (['.', '.', '.'], ['.'])
849 879 '''
850 880 r, d = _patternrootsanddirs(kindpats)
851 881
852 882 # Append the parents as non-recursive/exact directories, since they must be
853 883 # scanned to get to either the roots or the other exact directories.
854 884 d.extend(util.dirs(d))
855 885 d.extend(util.dirs(r))
856 886 # util.dirs() does not include the root directory, so add it manually
857 887 d.append('.')
858 888
859 889 return r, d
860 890
861 891 def _explicitfiles(kindpats):
862 892 '''Returns the potential explicit filenames from the patterns.
863 893
864 894 >>> _explicitfiles([('path', 'foo/bar', '')])
865 895 ['foo/bar']
866 896 >>> _explicitfiles([('rootfilesin', 'foo/bar', '')])
867 897 []
868 898 '''
869 899 # Keep only the pattern kinds where one can specify filenames (vs only
870 900 # directory names).
871 901 filable = [kp for kp in kindpats if kp[0] not in ('rootfilesin',)]
872 902 return _roots(filable)
873 903
874 904 def _anypats(kindpats):
875 905 for kind, pat, source in kindpats:
876 906 if kind in ('glob', 're', 'relglob', 'relre', 'set', 'rootfilesin'):
877 907 return True
878 908
879 909 _commentre = None
880 910
881 911 def readpatternfile(filepath, warn, sourceinfo=False):
882 912 '''parse a pattern file, returning a list of
883 913 patterns. These patterns should be given to compile()
884 914 to be validated and converted into a match function.
885 915
886 916 trailing white space is dropped.
887 917 the escape character is backslash.
888 918 comments start with #.
889 919 empty lines are skipped.
890 920
891 921 lines can be of the following formats:
892 922
893 923 syntax: regexp # defaults following lines to non-rooted regexps
894 924 syntax: glob # defaults following lines to non-rooted globs
895 925 re:pattern # non-rooted regular expression
896 926 glob:pattern # non-rooted glob
897 927 pattern # pattern of the current default type
898 928
899 929 if sourceinfo is set, returns a list of tuples:
900 930 (pattern, lineno, originalline). This is useful to debug ignore patterns.
901 931 '''
902 932
903 933 syntaxes = {'re': 'relre:', 'regexp': 'relre:', 'glob': 'relglob:',
904 934 'include': 'include', 'subinclude': 'subinclude'}
905 935 syntax = 'relre:'
906 936 patterns = []
907 937
908 938 fp = open(filepath, 'rb')
909 939 for lineno, line in enumerate(util.iterfile(fp), start=1):
910 940 if "#" in line:
911 941 global _commentre
912 942 if not _commentre:
913 943 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
914 944 # remove comments prefixed by an even number of escapes
915 945 m = _commentre.search(line)
916 946 if m:
917 947 line = line[:m.end(1)]
918 948 # fixup properly escaped comments that survived the above
919 949 line = line.replace("\\#", "#")
920 950 line = line.rstrip()
921 951 if not line:
922 952 continue
923 953
924 954 if line.startswith('syntax:'):
925 955 s = line[7:].strip()
926 956 try:
927 957 syntax = syntaxes[s]
928 958 except KeyError:
929 959 if warn:
930 960 warn(_("%s: ignoring invalid syntax '%s'\n") %
931 961 (filepath, s))
932 962 continue
933 963
934 964 linesyntax = syntax
935 965 for s, rels in syntaxes.iteritems():
936 966 if line.startswith(rels):
937 967 linesyntax = rels
938 968 line = line[len(rels):]
939 969 break
940 970 elif line.startswith(s+':'):
941 971 linesyntax = rels
942 972 line = line[len(s) + 1:]
943 973 break
944 974 if sourceinfo:
945 975 patterns.append((linesyntax + line, lineno, line))
946 976 else:
947 977 patterns.append(linesyntax + line)
948 978 fp.close()
949 979 return patterns
General Comments 0
You need to be logged in to leave comments. Login now