##// END OF EJS Templates
match: improve documentation - docstrings and more descriptive variable naming...
Mads Kiilerich -
r21111:9d28fd79 default
parent child Browse files
Show More
@@ -9,27 +9,29 b' import re'
9 9 import util, pathutil
10 10 from i18n import _
11 11
12 def _rematcher(pat):
13 m = util.compilere(pat)
12 def _rematcher(regex):
13 '''compile the regexp with the best available regexp engine and return a
14 matcher function'''
15 m = util.compilere(regex)
14 16 try:
15 17 # slightly faster, provided by facebook's re2 bindings
16 18 return m.test_match
17 19 except AttributeError:
18 20 return m.match
19 21
20 def _expandsets(pats, ctx):
21 '''convert set: patterns into a list of files in the given context'''
22 def _expandsets(kindpats, ctx):
23 '''Returns the kindpats list with the 'set' patterns expanded.'''
22 24 fset = set()
23 25 other = []
24 26
25 for kind, expr in pats:
27 for kind, pat in kindpats:
26 28 if kind == 'set':
27 29 if not ctx:
28 30 raise util.Abort("fileset expression with no context")
29 s = ctx.getfileset(expr)
31 s = ctx.getfileset(pat)
30 32 fset.update(s)
31 33 continue
32 other.append((kind, expr))
34 other.append((kind, pat))
33 35 return fset, other
34 36
35 37 class match(object):
@@ -41,10 +43,10 b' class match(object):'
41 43 root - the canonical root of the tree you're matching against
42 44 cwd - the current working directory, if relevant
43 45 patterns - patterns to find
44 include - patterns to include
45 exclude - patterns to exclude
46 default - if a pattern in names has no explicit type, assume this one
47 exact - patterns are actually literals
46 include - patterns to include (unless they are excluded)
47 exclude - patterns to exclude (even if they are included)
48 default - if a pattern in patterns has no explicit type, assume this one
49 exact - patterns are actually filenames (include/exclude still apply)
48 50
49 51 a pattern is one of:
50 52 'glob:<glob>' - a glob relative to cwd
@@ -65,11 +67,11 b' class match(object):'
65 67 self._always = False
66 68
67 69 if include:
68 pats = _normalize(include, 'glob', root, cwd, auditor)
69 self.includepat, im = _buildmatch(ctx, pats, '(?:/|$)')
70 kindpats = _normalize(include, 'glob', root, cwd, auditor)
71 self.includepat, im = _buildmatch(ctx, kindpats, '(?:/|$)')
70 72 if exclude:
71 pats = _normalize(exclude, 'glob', root, cwd, auditor)
72 self.excludepat, em = _buildmatch(ctx, pats, '(?:/|$)')
73 kindpats = _normalize(exclude, 'glob', root, cwd, auditor)
74 self.excludepat, em = _buildmatch(ctx, kindpats, '(?:/|$)')
73 75 if exact:
74 76 if isinstance(patterns, list):
75 77 self._files = patterns
@@ -77,10 +79,10 b' class match(object):'
77 79 self._files = list(patterns)
78 80 pm = self.exact
79 81 elif patterns:
80 pats = _normalize(patterns, default, root, cwd, auditor)
81 self._files = _roots(pats)
82 self._anypats = self._anypats or _anypats(pats)
83 self.patternspat, pm = _buildmatch(ctx, pats, '$')
82 kindpats = _normalize(patterns, default, root, cwd, auditor)
83 self._files = _roots(kindpats)
84 self._anypats = self._anypats or _anypats(kindpats)
85 self.patternspat, pm = _buildmatch(ctx, kindpats, '$')
84 86
85 87 if patterns or exact:
86 88 if include:
@@ -114,28 +116,48 b' class match(object):'
114 116 def __iter__(self):
115 117 for f in self._files:
116 118 yield f
119
120 # Callbacks related to how the matcher is used by dirstate.walk.
121 # Subscribers to these events must monkeypatch the matcher object.
117 122 def bad(self, f, msg):
118 '''callback for each explicit file that can't be
119 found/accessed, with an error message
120 '''
123 '''Callback from dirstate.walk for each explicit file that can't be
124 found/accessed, with an error message.'''
121 125 pass
122 # If this is set, it will be called when an explicitly listed directory is
123 # visited.
126
127 # If an explicitdir is set, it will be called when an explicitly listed
128 # directory is visited.
124 129 explicitdir = None
125 # If this is set, it will be called when a directory discovered by recursive
126 # traversal is visited.
130
131 # If an traversedir is set, it will be called when a directory discovered
132 # by recursive traversal is visited.
127 133 traversedir = None
134
128 135 def missing(self, f):
129 136 pass
130 def exact(self, f):
131 return f in self._fmap
137
132 138 def rel(self, f):
139 '''Convert repo path back to path that is relative to cwd of matcher.'''
133 140 return util.pathto(self._root, self._cwd, f)
141
134 142 def files(self):
143 '''Explicitly listed files or patterns or roots:
144 if no patterns or .always(): empty list,
145 if exact: list exact files,
146 if not .anypats(): list all files and dirs,
147 else: optimal roots'''
135 148 return self._files
149
150 def exact(self, f):
151 '''Returns True if f is in .files().'''
152 return f in self._fmap
153
136 154 def anypats(self):
155 '''Matcher uses patterns or include/exclude.'''
137 156 return self._anypats
157
138 158 def always(self):
159 '''Matcher will match everything and .files() will be empty
160 - optimization might be possible and necessary.'''
139 161 return self._always
140 162
141 163 class exact(match):
@@ -191,21 +213,22 b' class narrowmatcher(match):'
191 213 def bad(self, f, msg):
192 214 self._matcher.bad(self._path + "/" + f, msg)
193 215
194 def patkind(pat):
195 return _patsplit(pat, None)[0]
216 def patkind(pattern, default=None):
217 '''If pattern is 'kind:pat' with a known kind, return kind.'''
218 return _patsplit(pattern, default)[0]
196 219
197 def _patsplit(pat, default):
198 """Split a string into an optional pattern kind prefix and the
199 actual pattern."""
200 if ':' in pat:
201 kind, val = pat.split(':', 1)
220 def _patsplit(pattern, default):
221 """Split a string into the optional pattern kind prefix and the actual
222 pattern."""
223 if ':' in pattern:
224 kind, pat = pattern.split(':', 1)
202 225 if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
203 226 'listfile', 'listfile0', 'set'):
204 return kind, val
205 return default, pat
227 return kind, pat
228 return default, pattern
206 229
207 230 def _globre(pat):
208 "convert a glob pattern into a regexp"
231 '''Convert an extended glob string to a regexp string.'''
209 232 i, n = 0, len(pat)
210 233 res = ''
211 234 group = 0
@@ -260,83 +283,90 b' def _globre(pat):'
260 283 res += escape(c)
261 284 return res
262 285
263 def _regex(kind, name, tail):
264 '''convert a pattern into a regular expression'''
265 if not name:
286 def _regex(kind, pat, globsuffix):
287 '''Convert a (normalized) pattern of any kind into a regular expression.
288 globsuffix is appended to the regexp of globs.'''
289 if not pat:
266 290 return ''
267 291 if kind == 're':
268 return name
269 elif kind == 'path':
270 return '^' + re.escape(name) + '(?:/|$)'
271 elif kind == 'relglob':
272 return '(?:|.*/)' + _globre(name) + tail
273 elif kind == 'relpath':
274 return re.escape(name) + '(?:/|$)'
275 elif kind == 'relre':
276 if name.startswith('^'):
277 return name
278 return '.*' + name
279 return _globre(name) + tail
292 return pat
293 if kind == 'path':
294 return '^' + re.escape(pat) + '(?:/|$)'
295 if kind == 'relglob':
296 return '(?:|.*/)' + _globre(pat) + globsuffix
297 if kind == 'relpath':
298 return re.escape(pat) + '(?:/|$)'
299 if kind == 'relre':
300 if pat.startswith('^'):
301 return pat
302 return '.*' + pat
303 return _globre(pat) + globsuffix
280 304
281 def _buildmatch(ctx, pats, tail):
282 fset, pats = _expandsets(pats, ctx)
283 if not pats:
305 def _buildmatch(ctx, kindpats, globsuffix):
306 '''Return regexp string and a matcher function for kindpats.
307 globsuffix is appended to the regexp of globs.'''
308 fset, kindpats = _expandsets(kindpats, ctx)
309 if not kindpats:
284 310 return "", fset.__contains__
285 311
286 pat, mf = _buildregexmatch(pats, tail)
312 regex, mf = _buildregexmatch(kindpats, globsuffix)
287 313 if fset:
288 return pat, lambda f: f in fset or mf(f)
289 return pat, mf
314 return regex, lambda f: f in fset or mf(f)
315 return regex, mf
290 316
291 def _buildregexmatch(pats, tail):
292 """build a matching function from a set of patterns"""
317 def _buildregexmatch(kindpats, globsuffix):
318 """Build a match function from a list of kinds and kindpats,
319 return regexp string and a matcher function."""
293 320 try:
294 pat = '(?:%s)' % '|'.join([_regex(k, p, tail) for (k, p) in pats])
295 if len(pat) > 20000:
321 regex = '(?:%s)' % '|'.join([_regex(k, p, globsuffix)
322 for (k, p) in kindpats])
323 if len(regex) > 20000:
296 324 raise OverflowError
297 return pat, _rematcher(pat)
325 return regex, _rematcher(regex)
298 326 except OverflowError:
299 327 # We're using a Python with a tiny regex engine and we
300 328 # made it explode, so we'll divide the pattern list in two
301 329 # until it works
302 l = len(pats)
330 l = len(kindpats)
303 331 if l < 2:
304 332 raise
305 pata, a = _buildregexmatch(pats[:l//2], tail)
306 patb, b = _buildregexmatch(pats[l//2:], tail)
333 regexa, a = _buildregexmatch(kindpats[:l//2], globsuffix)
334 regexb, b = _buildregexmatch(kindpats[l//2:], globsuffix)
307 335 return pat, lambda s: a(s) or b(s)
308 336 except re.error:
309 for k, p in pats:
337 for k, p in kindpats:
310 338 try:
311 _rematcher('(?:%s)' % _regex(k, p, tail))
339 _rematcher('(?:%s)' % _regex(k, p, globsuffix))
312 340 except re.error:
313 341 raise util.Abort(_("invalid pattern (%s): %s") % (k, p))
314 342 raise util.Abort(_("invalid pattern"))
315 343
316 def _normalize(names, default, root, cwd, auditor):
317 pats = []
318 for kind, name in [_patsplit(p, default) for p in names]:
344 def _normalize(patterns, default, root, cwd, auditor):
345 '''Convert 'kind:pat' from the patterns list to tuples with kind and
346 normalized and rooted patterns and with listfiles expanded.'''
347 kindpats = []
348 for kind, pat in [_patsplit(p, default) for p in patterns]:
319 349 if kind in ('glob', 'relpath'):
320 name = pathutil.canonpath(root, cwd, name, auditor)
350 pat = pathutil.canonpath(root, cwd, pat, auditor)
321 351 elif kind in ('relglob', 'path'):
322 name = util.normpath(name)
352 pat = util.normpath(pat)
323 353 elif kind in ('listfile', 'listfile0'):
324 354 try:
325 files = util.readfile(name)
355 files = util.readfile(pat)
326 356 if kind == 'listfile0':
327 357 files = files.split('\0')
328 358 else:
329 359 files = files.splitlines()
330 360 files = [f for f in files if f]
331 361 except EnvironmentError:
332 raise util.Abort(_("unable to read file list (%s)") % name)
333 pats += _normalize(files, default, root, cwd, auditor)
362 raise util.Abort(_("unable to read file list (%s)") % pat)
363 kindpats += _normalize(files, default, root, cwd, auditor)
334 364 continue
365 # else: re or relre - which cannot be normalized
366 kindpats.append((kind, pat))
367 return kindpats
335 368
336 pats.append((kind, name))
337 return pats
338
339 def _roots(patterns):
369 def _roots(kindpats):
340 370 '''return roots and exact explicitly listed files from patterns
341 371
342 372 >>> _roots([('glob', 'g/*'), ('glob', 'g'), ('glob', 'g*')])
@@ -347,21 +377,21 b' def _roots(patterns):'
347 377 ['.', '.', '.']
348 378 '''
349 379 r = []
350 for kind, name in patterns:
380 for kind, pat in kindpats:
351 381 if kind == 'glob': # find the non-glob prefix
352 382 root = []
353 for p in name.split('/'):
383 for p in pat.split('/'):
354 384 if '[' in p or '{' in p or '*' in p or '?' in p:
355 385 break
356 386 root.append(p)
357 387 r.append('/'.join(root) or '.')
358 388 elif kind in ('relpath', 'path'):
359 r.append(name or '.')
389 r.append(pat or '.')
360 390 else: # relglob, re, relre
361 391 r.append('.')
362 392 return r
363 393
364 def _anypats(patterns):
365 for kind, name in patterns:
394 def _anypats(kindpats):
395 for kind, pat in kindpats:
366 396 if kind in ('glob', 're', 'relglob', 'relre', 'set'):
367 397 return True
@@ -564,23 +564,27 b' def revrange(repo, revs):'
564 564 return l
565 565
566 566 def expandpats(pats):
567 '''Expand bare globs when running on windows.
568 On posix we assume it already has already been done by sh.'''
567 569 if not util.expandglobs:
568 570 return list(pats)
569 571 ret = []
570 for p in pats:
571 kind, name = matchmod._patsplit(p, None)
572 for kindpat in pats:
573 kind, pat = matchmod._patsplit(kindpat, None)
572 574 if kind is None:
573 575 try:
574 globbed = glob.glob(name)
576 globbed = glob.glob(pat)
575 577 except re.error:
576 globbed = [name]
578 globbed = [pat]
577 579 if globbed:
578 580 ret.extend(globbed)
579 581 continue
580 ret.append(p)
582 ret.append(kindpat)
581 583 return ret
582 584
583 585 def matchandpats(ctx, pats=[], opts={}, globbed=False, default='relpath'):
586 '''Return a matcher and the patterns that were used.
587 The matcher will warn about bad matches.'''
584 588 if pats == ("",):
585 589 pats = []
586 590 if not globbed and default == 'relpath':
@@ -594,12 +598,15 b' def matchandpats(ctx, pats=[], opts={}, '
594 598 return m, pats
595 599
596 600 def match(ctx, pats=[], opts={}, globbed=False, default='relpath'):
601 '''Return a matcher that will warn about bad matches.'''
597 602 return matchandpats(ctx, pats, opts, globbed, default)[0]
598 603
599 604 def matchall(repo):
605 '''Return a matcher that will efficiently match everything.'''
600 606 return matchmod.always(repo.root, repo.getcwd())
601 607
602 608 def matchfiles(repo, files):
609 '''Return a matcher that will efficiently match exactly these files.'''
603 610 return matchmod.exact(repo.root, repo.getcwd(), files)
604 611
605 612 def addremove(repo, pats=[], opts={}, dry_run=None, similarity=None):
General Comments 0
You need to be logged in to leave comments. Login now