##// END OF EJS Templates
match: improve documentation - docstrings and more descriptive variable naming...
Mads Kiilerich -
r21111:9d28fd79 default
parent child Browse files
Show More
@@ -9,27 +9,29 b' import re'
9 import util, pathutil
9 import util, pathutil
10 from i18n import _
10 from i18n import _
11
11
12 def _rematcher(pat):
12 def _rematcher(regex):
13 m = util.compilere(pat)
13 '''compile the regexp with the best available regexp engine and return a
14 matcher function'''
15 m = util.compilere(regex)
14 try:
16 try:
15 # slightly faster, provided by facebook's re2 bindings
17 # slightly faster, provided by facebook's re2 bindings
16 return m.test_match
18 return m.test_match
17 except AttributeError:
19 except AttributeError:
18 return m.match
20 return m.match
19
21
20 def _expandsets(pats, ctx):
22 def _expandsets(kindpats, ctx):
21 '''convert set: patterns into a list of files in the given context'''
23 '''Returns the kindpats list with the 'set' patterns expanded.'''
22 fset = set()
24 fset = set()
23 other = []
25 other = []
24
26
25 for kind, expr in pats:
27 for kind, pat in kindpats:
26 if kind == 'set':
28 if kind == 'set':
27 if not ctx:
29 if not ctx:
28 raise util.Abort("fileset expression with no context")
30 raise util.Abort("fileset expression with no context")
29 s = ctx.getfileset(expr)
31 s = ctx.getfileset(pat)
30 fset.update(s)
32 fset.update(s)
31 continue
33 continue
32 other.append((kind, expr))
34 other.append((kind, pat))
33 return fset, other
35 return fset, other
34
36
35 class match(object):
37 class match(object):
@@ -41,10 +43,10 b' class match(object):'
41 root - the canonical root of the tree you're matching against
43 root - the canonical root of the tree you're matching against
42 cwd - the current working directory, if relevant
44 cwd - the current working directory, if relevant
43 patterns - patterns to find
45 patterns - patterns to find
44 include - patterns to include
46 include - patterns to include (unless they are excluded)
45 exclude - patterns to exclude
47 exclude - patterns to exclude (even if they are included)
46 default - if a pattern in names has no explicit type, assume this one
48 default - if a pattern in patterns has no explicit type, assume this one
47 exact - patterns are actually literals
49 exact - patterns are actually filenames (include/exclude still apply)
48
50
49 a pattern is one of:
51 a pattern is one of:
50 'glob:<glob>' - a glob relative to cwd
52 'glob:<glob>' - a glob relative to cwd
@@ -65,11 +67,11 b' class match(object):'
65 self._always = False
67 self._always = False
66
68
67 if include:
69 if include:
68 pats = _normalize(include, 'glob', root, cwd, auditor)
70 kindpats = _normalize(include, 'glob', root, cwd, auditor)
69 self.includepat, im = _buildmatch(ctx, pats, '(?:/|$)')
71 self.includepat, im = _buildmatch(ctx, kindpats, '(?:/|$)')
70 if exclude:
72 if exclude:
71 pats = _normalize(exclude, 'glob', root, cwd, auditor)
73 kindpats = _normalize(exclude, 'glob', root, cwd, auditor)
72 self.excludepat, em = _buildmatch(ctx, pats, '(?:/|$)')
74 self.excludepat, em = _buildmatch(ctx, kindpats, '(?:/|$)')
73 if exact:
75 if exact:
74 if isinstance(patterns, list):
76 if isinstance(patterns, list):
75 self._files = patterns
77 self._files = patterns
@@ -77,10 +79,10 b' class match(object):'
77 self._files = list(patterns)
79 self._files = list(patterns)
78 pm = self.exact
80 pm = self.exact
79 elif patterns:
81 elif patterns:
80 pats = _normalize(patterns, default, root, cwd, auditor)
82 kindpats = _normalize(patterns, default, root, cwd, auditor)
81 self._files = _roots(pats)
83 self._files = _roots(kindpats)
82 self._anypats = self._anypats or _anypats(pats)
84 self._anypats = self._anypats or _anypats(kindpats)
83 self.patternspat, pm = _buildmatch(ctx, pats, '$')
85 self.patternspat, pm = _buildmatch(ctx, kindpats, '$')
84
86
85 if patterns or exact:
87 if patterns or exact:
86 if include:
88 if include:
@@ -114,28 +116,48 b' class match(object):'
114 def __iter__(self):
116 def __iter__(self):
115 for f in self._files:
117 for f in self._files:
116 yield f
118 yield f
119
120 # Callbacks related to how the matcher is used by dirstate.walk.
121 # Subscribers to these events must monkeypatch the matcher object.
117 def bad(self, f, msg):
122 def bad(self, f, msg):
118 '''callback for each explicit file that can't be
123 '''Callback from dirstate.walk for each explicit file that can't be
119 found/accessed, with an error message
124 found/accessed, with an error message.'''
120 '''
121 pass
125 pass
122 # If this is set, it will be called when an explicitly listed directory is
126
123 # visited.
127 # If an explicitdir is set, it will be called when an explicitly listed
128 # directory is visited.
124 explicitdir = None
129 explicitdir = None
125 # If this is set, it will be called when a directory discovered by recursive
130
126 # traversal is visited.
131 # If an traversedir is set, it will be called when a directory discovered
132 # by recursive traversal is visited.
127 traversedir = None
133 traversedir = None
134
128 def missing(self, f):
135 def missing(self, f):
129 pass
136 pass
130 def exact(self, f):
137
131 return f in self._fmap
132 def rel(self, f):
138 def rel(self, f):
139 '''Convert repo path back to path that is relative to cwd of matcher.'''
133 return util.pathto(self._root, self._cwd, f)
140 return util.pathto(self._root, self._cwd, f)
141
134 def files(self):
142 def files(self):
143 '''Explicitly listed files or patterns or roots:
144 if no patterns or .always(): empty list,
145 if exact: list exact files,
146 if not .anypats(): list all files and dirs,
147 else: optimal roots'''
135 return self._files
148 return self._files
149
150 def exact(self, f):
151 '''Returns True if f is in .files().'''
152 return f in self._fmap
153
136 def anypats(self):
154 def anypats(self):
155 '''Matcher uses patterns or include/exclude.'''
137 return self._anypats
156 return self._anypats
157
138 def always(self):
158 def always(self):
159 '''Matcher will match everything and .files() will be empty
160 - optimization might be possible and necessary.'''
139 return self._always
161 return self._always
140
162
141 class exact(match):
163 class exact(match):
@@ -191,21 +213,22 b' class narrowmatcher(match):'
191 def bad(self, f, msg):
213 def bad(self, f, msg):
192 self._matcher.bad(self._path + "/" + f, msg)
214 self._matcher.bad(self._path + "/" + f, msg)
193
215
194 def patkind(pat):
216 def patkind(pattern, default=None):
195 return _patsplit(pat, None)[0]
217 '''If pattern is 'kind:pat' with a known kind, return kind.'''
218 return _patsplit(pattern, default)[0]
196
219
197 def _patsplit(pat, default):
220 def _patsplit(pattern, default):
198 """Split a string into an optional pattern kind prefix and the
221 """Split a string into the optional pattern kind prefix and the actual
199 actual pattern."""
222 pattern."""
200 if ':' in pat:
223 if ':' in pattern:
201 kind, val = pat.split(':', 1)
224 kind, pat = pattern.split(':', 1)
202 if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
225 if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
203 'listfile', 'listfile0', 'set'):
226 'listfile', 'listfile0', 'set'):
204 return kind, val
227 return kind, pat
205 return default, pat
228 return default, pattern
206
229
207 def _globre(pat):
230 def _globre(pat):
208 "convert a glob pattern into a regexp"
231 '''Convert an extended glob string to a regexp string.'''
209 i, n = 0, len(pat)
232 i, n = 0, len(pat)
210 res = ''
233 res = ''
211 group = 0
234 group = 0
@@ -260,83 +283,90 b' def _globre(pat):'
260 res += escape(c)
283 res += escape(c)
261 return res
284 return res
262
285
263 def _regex(kind, name, tail):
286 def _regex(kind, pat, globsuffix):
264 '''convert a pattern into a regular expression'''
287 '''Convert a (normalized) pattern of any kind into a regular expression.
265 if not name:
288 globsuffix is appended to the regexp of globs.'''
289 if not pat:
266 return ''
290 return ''
267 if kind == 're':
291 if kind == 're':
268 return name
292 return pat
269 elif kind == 'path':
293 if kind == 'path':
270 return '^' + re.escape(name) + '(?:/|$)'
294 return '^' + re.escape(pat) + '(?:/|$)'
271 elif kind == 'relglob':
295 if kind == 'relglob':
272 return '(?:|.*/)' + _globre(name) + tail
296 return '(?:|.*/)' + _globre(pat) + globsuffix
273 elif kind == 'relpath':
297 if kind == 'relpath':
274 return re.escape(name) + '(?:/|$)'
298 return re.escape(pat) + '(?:/|$)'
275 elif kind == 'relre':
299 if kind == 'relre':
276 if name.startswith('^'):
300 if pat.startswith('^'):
277 return name
301 return pat
278 return '.*' + name
302 return '.*' + pat
279 return _globre(name) + tail
303 return _globre(pat) + globsuffix
280
304
281 def _buildmatch(ctx, pats, tail):
305 def _buildmatch(ctx, kindpats, globsuffix):
282 fset, pats = _expandsets(pats, ctx)
306 '''Return regexp string and a matcher function for kindpats.
283 if not pats:
307 globsuffix is appended to the regexp of globs.'''
308 fset, kindpats = _expandsets(kindpats, ctx)
309 if not kindpats:
284 return "", fset.__contains__
310 return "", fset.__contains__
285
311
286 pat, mf = _buildregexmatch(pats, tail)
312 regex, mf = _buildregexmatch(kindpats, globsuffix)
287 if fset:
313 if fset:
288 return pat, lambda f: f in fset or mf(f)
314 return regex, lambda f: f in fset or mf(f)
289 return pat, mf
315 return regex, mf
290
316
291 def _buildregexmatch(pats, tail):
317 def _buildregexmatch(kindpats, globsuffix):
292 """build a matching function from a set of patterns"""
318 """Build a match function from a list of kinds and kindpats,
319 return regexp string and a matcher function."""
293 try:
320 try:
294 pat = '(?:%s)' % '|'.join([_regex(k, p, tail) for (k, p) in pats])
321 regex = '(?:%s)' % '|'.join([_regex(k, p, globsuffix)
295 if len(pat) > 20000:
322 for (k, p) in kindpats])
323 if len(regex) > 20000:
296 raise OverflowError
324 raise OverflowError
297 return pat, _rematcher(pat)
325 return regex, _rematcher(regex)
298 except OverflowError:
326 except OverflowError:
299 # We're using a Python with a tiny regex engine and we
327 # We're using a Python with a tiny regex engine and we
300 # made it explode, so we'll divide the pattern list in two
328 # made it explode, so we'll divide the pattern list in two
301 # until it works
329 # until it works
302 l = len(pats)
330 l = len(kindpats)
303 if l < 2:
331 if l < 2:
304 raise
332 raise
305 pata, a = _buildregexmatch(pats[:l//2], tail)
333 regexa, a = _buildregexmatch(kindpats[:l//2], globsuffix)
306 patb, b = _buildregexmatch(pats[l//2:], tail)
334 regexb, b = _buildregexmatch(kindpats[l//2:], globsuffix)
307 return pat, lambda s: a(s) or b(s)
335 return pat, lambda s: a(s) or b(s)
308 except re.error:
336 except re.error:
309 for k, p in pats:
337 for k, p in kindpats:
310 try:
338 try:
311 _rematcher('(?:%s)' % _regex(k, p, tail))
339 _rematcher('(?:%s)' % _regex(k, p, globsuffix))
312 except re.error:
340 except re.error:
313 raise util.Abort(_("invalid pattern (%s): %s") % (k, p))
341 raise util.Abort(_("invalid pattern (%s): %s") % (k, p))
314 raise util.Abort(_("invalid pattern"))
342 raise util.Abort(_("invalid pattern"))
315
343
316 def _normalize(names, default, root, cwd, auditor):
344 def _normalize(patterns, default, root, cwd, auditor):
317 pats = []
345 '''Convert 'kind:pat' from the patterns list to tuples with kind and
318 for kind, name in [_patsplit(p, default) for p in names]:
346 normalized and rooted patterns and with listfiles expanded.'''
347 kindpats = []
348 for kind, pat in [_patsplit(p, default) for p in patterns]:
319 if kind in ('glob', 'relpath'):
349 if kind in ('glob', 'relpath'):
320 name = pathutil.canonpath(root, cwd, name, auditor)
350 pat = pathutil.canonpath(root, cwd, pat, auditor)
321 elif kind in ('relglob', 'path'):
351 elif kind in ('relglob', 'path'):
322 name = util.normpath(name)
352 pat = util.normpath(pat)
323 elif kind in ('listfile', 'listfile0'):
353 elif kind in ('listfile', 'listfile0'):
324 try:
354 try:
325 files = util.readfile(name)
355 files = util.readfile(pat)
326 if kind == 'listfile0':
356 if kind == 'listfile0':
327 files = files.split('\0')
357 files = files.split('\0')
328 else:
358 else:
329 files = files.splitlines()
359 files = files.splitlines()
330 files = [f for f in files if f]
360 files = [f for f in files if f]
331 except EnvironmentError:
361 except EnvironmentError:
332 raise util.Abort(_("unable to read file list (%s)") % name)
362 raise util.Abort(_("unable to read file list (%s)") % pat)
333 pats += _normalize(files, default, root, cwd, auditor)
363 kindpats += _normalize(files, default, root, cwd, auditor)
334 continue
364 continue
365 # else: re or relre - which cannot be normalized
366 kindpats.append((kind, pat))
367 return kindpats
335
368
336 pats.append((kind, name))
369 def _roots(kindpats):
337 return pats
338
339 def _roots(patterns):
340 '''return roots and exact explicitly listed files from patterns
370 '''return roots and exact explicitly listed files from patterns
341
371
342 >>> _roots([('glob', 'g/*'), ('glob', 'g'), ('glob', 'g*')])
372 >>> _roots([('glob', 'g/*'), ('glob', 'g'), ('glob', 'g*')])
@@ -347,21 +377,21 b' def _roots(patterns):'
347 ['.', '.', '.']
377 ['.', '.', '.']
348 '''
378 '''
349 r = []
379 r = []
350 for kind, name in patterns:
380 for kind, pat in kindpats:
351 if kind == 'glob': # find the non-glob prefix
381 if kind == 'glob': # find the non-glob prefix
352 root = []
382 root = []
353 for p in name.split('/'):
383 for p in pat.split('/'):
354 if '[' in p or '{' in p or '*' in p or '?' in p:
384 if '[' in p or '{' in p or '*' in p or '?' in p:
355 break
385 break
356 root.append(p)
386 root.append(p)
357 r.append('/'.join(root) or '.')
387 r.append('/'.join(root) or '.')
358 elif kind in ('relpath', 'path'):
388 elif kind in ('relpath', 'path'):
359 r.append(name or '.')
389 r.append(pat or '.')
360 else: # relglob, re, relre
390 else: # relglob, re, relre
361 r.append('.')
391 r.append('.')
362 return r
392 return r
363
393
364 def _anypats(patterns):
394 def _anypats(kindpats):
365 for kind, name in patterns:
395 for kind, pat in kindpats:
366 if kind in ('glob', 're', 'relglob', 'relre', 'set'):
396 if kind in ('glob', 're', 'relglob', 'relre', 'set'):
367 return True
397 return True
@@ -564,23 +564,27 b' def revrange(repo, revs):'
564 return l
564 return l
565
565
566 def expandpats(pats):
566 def expandpats(pats):
567 '''Expand bare globs when running on windows.
568 On posix we assume it already has already been done by sh.'''
567 if not util.expandglobs:
569 if not util.expandglobs:
568 return list(pats)
570 return list(pats)
569 ret = []
571 ret = []
570 for p in pats:
572 for kindpat in pats:
571 kind, name = matchmod._patsplit(p, None)
573 kind, pat = matchmod._patsplit(kindpat, None)
572 if kind is None:
574 if kind is None:
573 try:
575 try:
574 globbed = glob.glob(name)
576 globbed = glob.glob(pat)
575 except re.error:
577 except re.error:
576 globbed = [name]
578 globbed = [pat]
577 if globbed:
579 if globbed:
578 ret.extend(globbed)
580 ret.extend(globbed)
579 continue
581 continue
580 ret.append(p)
582 ret.append(kindpat)
581 return ret
583 return ret
582
584
583 def matchandpats(ctx, pats=[], opts={}, globbed=False, default='relpath'):
585 def matchandpats(ctx, pats=[], opts={}, globbed=False, default='relpath'):
586 '''Return a matcher and the patterns that were used.
587 The matcher will warn about bad matches.'''
584 if pats == ("",):
588 if pats == ("",):
585 pats = []
589 pats = []
586 if not globbed and default == 'relpath':
590 if not globbed and default == 'relpath':
@@ -594,12 +598,15 b' def matchandpats(ctx, pats=[], opts={}, '
594 return m, pats
598 return m, pats
595
599
596 def match(ctx, pats=[], opts={}, globbed=False, default='relpath'):
600 def match(ctx, pats=[], opts={}, globbed=False, default='relpath'):
601 '''Return a matcher that will warn about bad matches.'''
597 return matchandpats(ctx, pats, opts, globbed, default)[0]
602 return matchandpats(ctx, pats, opts, globbed, default)[0]
598
603
599 def matchall(repo):
604 def matchall(repo):
605 '''Return a matcher that will efficiently match everything.'''
600 return matchmod.always(repo.root, repo.getcwd())
606 return matchmod.always(repo.root, repo.getcwd())
601
607
602 def matchfiles(repo, files):
608 def matchfiles(repo, files):
609 '''Return a matcher that will efficiently match exactly these files.'''
603 return matchmod.exact(repo.root, repo.getcwd(), files)
610 return matchmod.exact(repo.root, repo.getcwd(), files)
604
611
605 def addremove(repo, pats=[], opts={}, dry_run=None, similarity=None):
612 def addremove(repo, pats=[], opts={}, dry_run=None, similarity=None):
General Comments 0
You need to be logged in to leave comments. Login now