##// END OF EJS Templates
match: add comments to explain explicitdir and traversedir
Siddharth Agarwal -
r19144:facd906c default
parent child Browse files
Show More
@@ -1,354 +1,358 b''
1 # match.py - filename matching
1 # match.py - filename matching
2 #
2 #
3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 import re
8 import re
9 import scmutil, util, fileset
9 import scmutil, util, fileset
10 from i18n import _
10 from i18n import _
11
11
12 def _rematcher(pat):
12 def _rematcher(pat):
13 m = util.compilere(pat)
13 m = util.compilere(pat)
14 try:
14 try:
15 # slightly faster, provided by facebook's re2 bindings
15 # slightly faster, provided by facebook's re2 bindings
16 return m.test_match
16 return m.test_match
17 except AttributeError:
17 except AttributeError:
18 return m.match
18 return m.match
19
19
20 def _expandsets(pats, ctx):
20 def _expandsets(pats, ctx):
21 '''convert set: patterns into a list of files in the given context'''
21 '''convert set: patterns into a list of files in the given context'''
22 fset = set()
22 fset = set()
23 other = []
23 other = []
24
24
25 for kind, expr in pats:
25 for kind, expr in pats:
26 if kind == 'set':
26 if kind == 'set':
27 if not ctx:
27 if not ctx:
28 raise util.Abort("fileset expression with no context")
28 raise util.Abort("fileset expression with no context")
29 s = fileset.getfileset(ctx, expr)
29 s = fileset.getfileset(ctx, expr)
30 fset.update(s)
30 fset.update(s)
31 continue
31 continue
32 other.append((kind, expr))
32 other.append((kind, expr))
33 return fset, other
33 return fset, other
34
34
35 class match(object):
35 class match(object):
36 def __init__(self, root, cwd, patterns, include=[], exclude=[],
36 def __init__(self, root, cwd, patterns, include=[], exclude=[],
37 default='glob', exact=False, auditor=None, ctx=None):
37 default='glob', exact=False, auditor=None, ctx=None):
38 """build an object to match a set of file patterns
38 """build an object to match a set of file patterns
39
39
40 arguments:
40 arguments:
41 root - the canonical root of the tree you're matching against
41 root - the canonical root of the tree you're matching against
42 cwd - the current working directory, if relevant
42 cwd - the current working directory, if relevant
43 patterns - patterns to find
43 patterns - patterns to find
44 include - patterns to include
44 include - patterns to include
45 exclude - patterns to exclude
45 exclude - patterns to exclude
46 default - if a pattern in names has no explicit type, assume this one
46 default - if a pattern in names has no explicit type, assume this one
47 exact - patterns are actually literals
47 exact - patterns are actually literals
48
48
49 a pattern is one of:
49 a pattern is one of:
50 'glob:<glob>' - a glob relative to cwd
50 'glob:<glob>' - a glob relative to cwd
51 're:<regexp>' - a regular expression
51 're:<regexp>' - a regular expression
52 'path:<path>' - a path relative to repository root
52 'path:<path>' - a path relative to repository root
53 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
53 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
54 'relpath:<path>' - a path relative to cwd
54 'relpath:<path>' - a path relative to cwd
55 'relre:<regexp>' - a regexp that needn't match the start of a name
55 'relre:<regexp>' - a regexp that needn't match the start of a name
56 'set:<fileset>' - a fileset expression
56 'set:<fileset>' - a fileset expression
57 '<something>' - a pattern of the specified default type
57 '<something>' - a pattern of the specified default type
58 """
58 """
59
59
60 self._root = root
60 self._root = root
61 self._cwd = cwd
61 self._cwd = cwd
62 self._files = []
62 self._files = []
63 self._anypats = bool(include or exclude)
63 self._anypats = bool(include or exclude)
64 self._ctx = ctx
64 self._ctx = ctx
65 self._always = False
65 self._always = False
66
66
67 if include:
67 if include:
68 pats = _normalize(include, 'glob', root, cwd, auditor)
68 pats = _normalize(include, 'glob', root, cwd, auditor)
69 self.includepat, im = _buildmatch(ctx, pats, '(?:/|$)')
69 self.includepat, im = _buildmatch(ctx, pats, '(?:/|$)')
70 if exclude:
70 if exclude:
71 pats = _normalize(exclude, 'glob', root, cwd, auditor)
71 pats = _normalize(exclude, 'glob', root, cwd, auditor)
72 self.excludepat, em = _buildmatch(ctx, pats, '(?:/|$)')
72 self.excludepat, em = _buildmatch(ctx, pats, '(?:/|$)')
73 if exact:
73 if exact:
74 if isinstance(patterns, list):
74 if isinstance(patterns, list):
75 self._files = patterns
75 self._files = patterns
76 else:
76 else:
77 self._files = list(patterns)
77 self._files = list(patterns)
78 pm = self.exact
78 pm = self.exact
79 elif patterns:
79 elif patterns:
80 pats = _normalize(patterns, default, root, cwd, auditor)
80 pats = _normalize(patterns, default, root, cwd, auditor)
81 self._files = _roots(pats)
81 self._files = _roots(pats)
82 self._anypats = self._anypats or _anypats(pats)
82 self._anypats = self._anypats or _anypats(pats)
83 self.patternspat, pm = _buildmatch(ctx, pats, '$')
83 self.patternspat, pm = _buildmatch(ctx, pats, '$')
84
84
85 if patterns or exact:
85 if patterns or exact:
86 if include:
86 if include:
87 if exclude:
87 if exclude:
88 m = lambda f: im(f) and not em(f) and pm(f)
88 m = lambda f: im(f) and not em(f) and pm(f)
89 else:
89 else:
90 m = lambda f: im(f) and pm(f)
90 m = lambda f: im(f) and pm(f)
91 else:
91 else:
92 if exclude:
92 if exclude:
93 m = lambda f: not em(f) and pm(f)
93 m = lambda f: not em(f) and pm(f)
94 else:
94 else:
95 m = pm
95 m = pm
96 else:
96 else:
97 if include:
97 if include:
98 if exclude:
98 if exclude:
99 m = lambda f: im(f) and not em(f)
99 m = lambda f: im(f) and not em(f)
100 else:
100 else:
101 m = im
101 m = im
102 else:
102 else:
103 if exclude:
103 if exclude:
104 m = lambda f: not em(f)
104 m = lambda f: not em(f)
105 else:
105 else:
106 m = lambda f: True
106 m = lambda f: True
107 self._always = True
107 self._always = True
108
108
109 self.matchfn = m
109 self.matchfn = m
110 self._fmap = set(self._files)
110 self._fmap = set(self._files)
111
111
112 def __call__(self, fn):
112 def __call__(self, fn):
113 return self.matchfn(fn)
113 return self.matchfn(fn)
114 def __iter__(self):
114 def __iter__(self):
115 for f in self._files:
115 for f in self._files:
116 yield f
116 yield f
117 def bad(self, f, msg):
117 def bad(self, f, msg):
118 '''callback for each explicit file that can't be
118 '''callback for each explicit file that can't be
119 found/accessed, with an error message
119 found/accessed, with an error message
120 '''
120 '''
121 pass
121 pass
122 # If this is set, it will be called when an explicitly listed directory is
123 # visited.
122 explicitdir = None
124 explicitdir = None
125 # If this is set, it will be called when a directory discovered by recursive
126 # traversal is visited.
123 traversedir = None
127 traversedir = None
124 def missing(self, f):
128 def missing(self, f):
125 pass
129 pass
126 def exact(self, f):
130 def exact(self, f):
127 return f in self._fmap
131 return f in self._fmap
128 def rel(self, f):
132 def rel(self, f):
129 return util.pathto(self._root, self._cwd, f)
133 return util.pathto(self._root, self._cwd, f)
130 def files(self):
134 def files(self):
131 return self._files
135 return self._files
132 def anypats(self):
136 def anypats(self):
133 return self._anypats
137 return self._anypats
134 def always(self):
138 def always(self):
135 return self._always
139 return self._always
136
140
137 class exact(match):
141 class exact(match):
138 def __init__(self, root, cwd, files):
142 def __init__(self, root, cwd, files):
139 match.__init__(self, root, cwd, files, exact = True)
143 match.__init__(self, root, cwd, files, exact = True)
140
144
141 class always(match):
145 class always(match):
142 def __init__(self, root, cwd):
146 def __init__(self, root, cwd):
143 match.__init__(self, root, cwd, [])
147 match.__init__(self, root, cwd, [])
144 self._always = True
148 self._always = True
145
149
146 class narrowmatcher(match):
150 class narrowmatcher(match):
147 """Adapt a matcher to work on a subdirectory only.
151 """Adapt a matcher to work on a subdirectory only.
148
152
149 The paths are remapped to remove/insert the path as needed:
153 The paths are remapped to remove/insert the path as needed:
150
154
151 >>> m1 = match('root', '', ['a.txt', 'sub/b.txt'])
155 >>> m1 = match('root', '', ['a.txt', 'sub/b.txt'])
152 >>> m2 = narrowmatcher('sub', m1)
156 >>> m2 = narrowmatcher('sub', m1)
153 >>> bool(m2('a.txt'))
157 >>> bool(m2('a.txt'))
154 False
158 False
155 >>> bool(m2('b.txt'))
159 >>> bool(m2('b.txt'))
156 True
160 True
157 >>> bool(m2.matchfn('a.txt'))
161 >>> bool(m2.matchfn('a.txt'))
158 False
162 False
159 >>> bool(m2.matchfn('b.txt'))
163 >>> bool(m2.matchfn('b.txt'))
160 True
164 True
161 >>> m2.files()
165 >>> m2.files()
162 ['b.txt']
166 ['b.txt']
163 >>> m2.exact('b.txt')
167 >>> m2.exact('b.txt')
164 True
168 True
165 >>> m2.rel('b.txt')
169 >>> m2.rel('b.txt')
166 'b.txt'
170 'b.txt'
167 >>> def bad(f, msg):
171 >>> def bad(f, msg):
168 ... print "%s: %s" % (f, msg)
172 ... print "%s: %s" % (f, msg)
169 >>> m1.bad = bad
173 >>> m1.bad = bad
170 >>> m2.bad('x.txt', 'No such file')
174 >>> m2.bad('x.txt', 'No such file')
171 sub/x.txt: No such file
175 sub/x.txt: No such file
172 """
176 """
173
177
174 def __init__(self, path, matcher):
178 def __init__(self, path, matcher):
175 self._root = matcher._root
179 self._root = matcher._root
176 self._cwd = matcher._cwd
180 self._cwd = matcher._cwd
177 self._path = path
181 self._path = path
178 self._matcher = matcher
182 self._matcher = matcher
179 self._always = matcher._always
183 self._always = matcher._always
180
184
181 self._files = [f[len(path) + 1:] for f in matcher._files
185 self._files = [f[len(path) + 1:] for f in matcher._files
182 if f.startswith(path + "/")]
186 if f.startswith(path + "/")]
183 self._anypats = matcher._anypats
187 self._anypats = matcher._anypats
184 self.matchfn = lambda fn: matcher.matchfn(self._path + "/" + fn)
188 self.matchfn = lambda fn: matcher.matchfn(self._path + "/" + fn)
185 self._fmap = set(self._files)
189 self._fmap = set(self._files)
186
190
187 def bad(self, f, msg):
191 def bad(self, f, msg):
188 self._matcher.bad(self._path + "/" + f, msg)
192 self._matcher.bad(self._path + "/" + f, msg)
189
193
190 def patkind(pat):
194 def patkind(pat):
191 return _patsplit(pat, None)[0]
195 return _patsplit(pat, None)[0]
192
196
193 def _patsplit(pat, default):
197 def _patsplit(pat, default):
194 """Split a string into an optional pattern kind prefix and the
198 """Split a string into an optional pattern kind prefix and the
195 actual pattern."""
199 actual pattern."""
196 if ':' in pat:
200 if ':' in pat:
197 kind, val = pat.split(':', 1)
201 kind, val = pat.split(':', 1)
198 if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
202 if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
199 'listfile', 'listfile0', 'set'):
203 'listfile', 'listfile0', 'set'):
200 return kind, val
204 return kind, val
201 return default, pat
205 return default, pat
202
206
203 def _globre(pat):
207 def _globre(pat):
204 "convert a glob pattern into a regexp"
208 "convert a glob pattern into a regexp"
205 i, n = 0, len(pat)
209 i, n = 0, len(pat)
206 res = ''
210 res = ''
207 group = 0
211 group = 0
208 escape = re.escape
212 escape = re.escape
209 def peek():
213 def peek():
210 return i < n and pat[i]
214 return i < n and pat[i]
211 while i < n:
215 while i < n:
212 c = pat[i]
216 c = pat[i]
213 i += 1
217 i += 1
214 if c not in '*?[{},\\':
218 if c not in '*?[{},\\':
215 res += escape(c)
219 res += escape(c)
216 elif c == '*':
220 elif c == '*':
217 if peek() == '*':
221 if peek() == '*':
218 i += 1
222 i += 1
219 res += '.*'
223 res += '.*'
220 else:
224 else:
221 res += '[^/]*'
225 res += '[^/]*'
222 elif c == '?':
226 elif c == '?':
223 res += '.'
227 res += '.'
224 elif c == '[':
228 elif c == '[':
225 j = i
229 j = i
226 if j < n and pat[j] in '!]':
230 if j < n and pat[j] in '!]':
227 j += 1
231 j += 1
228 while j < n and pat[j] != ']':
232 while j < n and pat[j] != ']':
229 j += 1
233 j += 1
230 if j >= n:
234 if j >= n:
231 res += '\\['
235 res += '\\['
232 else:
236 else:
233 stuff = pat[i:j].replace('\\','\\\\')
237 stuff = pat[i:j].replace('\\','\\\\')
234 i = j + 1
238 i = j + 1
235 if stuff[0] == '!':
239 if stuff[0] == '!':
236 stuff = '^' + stuff[1:]
240 stuff = '^' + stuff[1:]
237 elif stuff[0] == '^':
241 elif stuff[0] == '^':
238 stuff = '\\' + stuff
242 stuff = '\\' + stuff
239 res = '%s[%s]' % (res, stuff)
243 res = '%s[%s]' % (res, stuff)
240 elif c == '{':
244 elif c == '{':
241 group += 1
245 group += 1
242 res += '(?:'
246 res += '(?:'
243 elif c == '}' and group:
247 elif c == '}' and group:
244 res += ')'
248 res += ')'
245 group -= 1
249 group -= 1
246 elif c == ',' and group:
250 elif c == ',' and group:
247 res += '|'
251 res += '|'
248 elif c == '\\':
252 elif c == '\\':
249 p = peek()
253 p = peek()
250 if p:
254 if p:
251 i += 1
255 i += 1
252 res += escape(p)
256 res += escape(p)
253 else:
257 else:
254 res += escape(c)
258 res += escape(c)
255 else:
259 else:
256 res += escape(c)
260 res += escape(c)
257 return res
261 return res
258
262
259 def _regex(kind, name, tail):
263 def _regex(kind, name, tail):
260 '''convert a pattern into a regular expression'''
264 '''convert a pattern into a regular expression'''
261 if not name:
265 if not name:
262 return ''
266 return ''
263 if kind == 're':
267 if kind == 're':
264 return name
268 return name
265 elif kind == 'path':
269 elif kind == 'path':
266 return '^' + re.escape(name) + '(?:/|$)'
270 return '^' + re.escape(name) + '(?:/|$)'
267 elif kind == 'relglob':
271 elif kind == 'relglob':
268 return '(?:|.*/)' + _globre(name) + tail
272 return '(?:|.*/)' + _globre(name) + tail
269 elif kind == 'relpath':
273 elif kind == 'relpath':
270 return re.escape(name) + '(?:/|$)'
274 return re.escape(name) + '(?:/|$)'
271 elif kind == 'relre':
275 elif kind == 'relre':
272 if name.startswith('^'):
276 if name.startswith('^'):
273 return name
277 return name
274 return '.*' + name
278 return '.*' + name
275 return _globre(name) + tail
279 return _globre(name) + tail
276
280
277 def _buildmatch(ctx, pats, tail):
281 def _buildmatch(ctx, pats, tail):
278 fset, pats = _expandsets(pats, ctx)
282 fset, pats = _expandsets(pats, ctx)
279 if not pats:
283 if not pats:
280 return "", fset.__contains__
284 return "", fset.__contains__
281
285
282 pat, mf = _buildregexmatch(pats, tail)
286 pat, mf = _buildregexmatch(pats, tail)
283 if fset:
287 if fset:
284 return pat, lambda f: f in fset or mf(f)
288 return pat, lambda f: f in fset or mf(f)
285 return pat, mf
289 return pat, mf
286
290
287 def _buildregexmatch(pats, tail):
291 def _buildregexmatch(pats, tail):
288 """build a matching function from a set of patterns"""
292 """build a matching function from a set of patterns"""
289 try:
293 try:
290 pat = '(?:%s)' % '|'.join([_regex(k, p, tail) for (k, p) in pats])
294 pat = '(?:%s)' % '|'.join([_regex(k, p, tail) for (k, p) in pats])
291 if len(pat) > 20000:
295 if len(pat) > 20000:
292 raise OverflowError
296 raise OverflowError
293 return pat, _rematcher(pat)
297 return pat, _rematcher(pat)
294 except OverflowError:
298 except OverflowError:
295 # We're using a Python with a tiny regex engine and we
299 # We're using a Python with a tiny regex engine and we
296 # made it explode, so we'll divide the pattern list in two
300 # made it explode, so we'll divide the pattern list in two
297 # until it works
301 # until it works
298 l = len(pats)
302 l = len(pats)
299 if l < 2:
303 if l < 2:
300 raise
304 raise
301 pata, a = _buildregexmatch(pats[:l//2], tail)
305 pata, a = _buildregexmatch(pats[:l//2], tail)
302 patb, b = _buildregexmatch(pats[l//2:], tail)
306 patb, b = _buildregexmatch(pats[l//2:], tail)
303 return pat, lambda s: a(s) or b(s)
307 return pat, lambda s: a(s) or b(s)
304 except re.error:
308 except re.error:
305 for k, p in pats:
309 for k, p in pats:
306 try:
310 try:
307 _rematcher('(?:%s)' % _regex(k, p, tail))
311 _rematcher('(?:%s)' % _regex(k, p, tail))
308 except re.error:
312 except re.error:
309 raise util.Abort(_("invalid pattern (%s): %s") % (k, p))
313 raise util.Abort(_("invalid pattern (%s): %s") % (k, p))
310 raise util.Abort(_("invalid pattern"))
314 raise util.Abort(_("invalid pattern"))
311
315
312 def _normalize(names, default, root, cwd, auditor):
316 def _normalize(names, default, root, cwd, auditor):
313 pats = []
317 pats = []
314 for kind, name in [_patsplit(p, default) for p in names]:
318 for kind, name in [_patsplit(p, default) for p in names]:
315 if kind in ('glob', 'relpath'):
319 if kind in ('glob', 'relpath'):
316 name = scmutil.canonpath(root, cwd, name, auditor)
320 name = scmutil.canonpath(root, cwd, name, auditor)
317 elif kind in ('relglob', 'path'):
321 elif kind in ('relglob', 'path'):
318 name = util.normpath(name)
322 name = util.normpath(name)
319 elif kind in ('listfile', 'listfile0'):
323 elif kind in ('listfile', 'listfile0'):
320 try:
324 try:
321 files = util.readfile(name)
325 files = util.readfile(name)
322 if kind == 'listfile0':
326 if kind == 'listfile0':
323 files = files.split('\0')
327 files = files.split('\0')
324 else:
328 else:
325 files = files.splitlines()
329 files = files.splitlines()
326 files = [f for f in files if f]
330 files = [f for f in files if f]
327 except EnvironmentError:
331 except EnvironmentError:
328 raise util.Abort(_("unable to read file list (%s)") % name)
332 raise util.Abort(_("unable to read file list (%s)") % name)
329 pats += _normalize(files, default, root, cwd, auditor)
333 pats += _normalize(files, default, root, cwd, auditor)
330 continue
334 continue
331
335
332 pats.append((kind, name))
336 pats.append((kind, name))
333 return pats
337 return pats
334
338
335 def _roots(patterns):
339 def _roots(patterns):
336 r = []
340 r = []
337 for kind, name in patterns:
341 for kind, name in patterns:
338 if kind == 'glob': # find the non-glob prefix
342 if kind == 'glob': # find the non-glob prefix
339 root = []
343 root = []
340 for p in name.split('/'):
344 for p in name.split('/'):
341 if '[' in p or '{' in p or '*' in p or '?' in p:
345 if '[' in p or '{' in p or '*' in p or '?' in p:
342 break
346 break
343 root.append(p)
347 root.append(p)
344 r.append('/'.join(root) or '.')
348 r.append('/'.join(root) or '.')
345 elif kind in ('relpath', 'path'):
349 elif kind in ('relpath', 'path'):
346 r.append(name or '.')
350 r.append(name or '.')
347 else: # relglob, re, relre
351 else: # relglob, re, relre
348 r.append('.')
352 r.append('.')
349 return r
353 return r
350
354
351 def _anypats(patterns):
355 def _anypats(patterns):
352 for kind, name in patterns:
356 for kind, name in patterns:
353 if kind in ('glob', 're', 'relglob', 'relre', 'set'):
357 if kind in ('glob', 're', 'relglob', 'relre', 'set'):
354 return True
358 return True
General Comments 0
You need to be logged in to leave comments. Login now