##// END OF EJS Templates
match: introduce basic fileset support
Matt Mackall -
r14675:cfc89398 default
parent child Browse files
Show More
@@ -1,311 +1,338
1 # match.py - filename matching
1 # match.py - filename matching
2 #
2 #
3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 import re
8 import re
9 import scmutil, util
9 import scmutil, util, fileset
10 from i18n import _
10 from i18n import _
11
11
12 def _expandsets(pats, ctx):
13 '''convert set: patterns into a list of files in the given context'''
14 fset = set()
15 other = []
16
17 for kind, expr in pats:
18 if kind == 'set':
19 if not ctx:
20 raise util.Abort("fileset expression with no context")
21 s = fileset.getfileset(ctx, expr)
22 fset.update(s)
23 continue
24 other.append((kind, expr))
25 return fset, other
26
12 class match(object):
27 class match(object):
13 def __init__(self, root, cwd, patterns, include=[], exclude=[],
28 def __init__(self, root, cwd, patterns, include=[], exclude=[],
14 default='glob', exact=False, auditor=None, ctx=None):
29 default='glob', exact=False, auditor=None, ctx=None):
15 """build an object to match a set of file patterns
30 """build an object to match a set of file patterns
16
31
17 arguments:
32 arguments:
18 root - the canonical root of the tree you're matching against
33 root - the canonical root of the tree you're matching against
19 cwd - the current working directory, if relevant
34 cwd - the current working directory, if relevant
20 patterns - patterns to find
35 patterns - patterns to find
21 include - patterns to include
36 include - patterns to include
22 exclude - patterns to exclude
37 exclude - patterns to exclude
23 default - if a pattern in names has no explicit type, assume this one
38 default - if a pattern in names has no explicit type, assume this one
24 exact - patterns are actually literals
39 exact - patterns are actually literals
25
40
26 a pattern is one of:
41 a pattern is one of:
27 'glob:<glob>' - a glob relative to cwd
42 'glob:<glob>' - a glob relative to cwd
28 're:<regexp>' - a regular expression
43 're:<regexp>' - a regular expression
29 'path:<path>' - a path relative to canonroot
44 'path:<path>' - a path relative to canonroot
30 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
45 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
31 'relpath:<path>' - a path relative to cwd
46 'relpath:<path>' - a path relative to cwd
32 'relre:<regexp>' - a regexp that needn't match the start of a name
47 'relre:<regexp>' - a regexp that needn't match the start of a name
48 'set:<fileset>' - a fileset expression
33 '<something>' - a pattern of the specified default type
49 '<something>' - a pattern of the specified default type
34 """
50 """
35
51
52 self._ctx = None
36 self._root = root
53 self._root = root
37 self._cwd = cwd
54 self._cwd = cwd
38 self._files = []
55 self._files = []
39 self._anypats = bool(include or exclude)
56 self._anypats = bool(include or exclude)
40 self._ctx = ctx
57 self._ctx = ctx
41
58
42 if include:
59 if include:
43 pats = _normalize(include, 'glob', root, cwd, auditor)
60 pats = _normalize(include, 'glob', root, cwd, auditor)
44 self.includepat, im = _buildmatch(pats, '(?:/|$)')
61 self.includepat, im = _buildmatch(ctx, pats, '(?:/|$)')
45 if exclude:
62 if exclude:
46 pats = _normalize(exclude, 'glob', root, cwd, auditor)
63 pats = _normalize(exclude, 'glob', root, cwd, auditor)
47 self.excludepat, em = _buildmatch(pats, '(?:/|$)')
64 self.excludepat, em = _buildmatch(ctx, pats, '(?:/|$)')
48 if exact:
65 if exact:
49 self._files = patterns
66 self._files = patterns
50 pm = self.exact
67 pm = self.exact
51 elif patterns:
68 elif patterns:
52 pats = _normalize(patterns, default, root, cwd, auditor)
69 pats = _normalize(patterns, default, root, cwd, auditor)
53 self._files = _roots(pats)
70 self._files = _roots(pats)
54 self._anypats = self._anypats or _anypats(pats)
71 self._anypats = self._anypats or _anypats(pats)
55 self.patternspat, pm = _buildmatch(pats, '$')
72 self.patternspat, pm = _buildmatch(ctx, pats, '$')
56
73
57 if patterns or exact:
74 if patterns or exact:
58 if include:
75 if include:
59 if exclude:
76 if exclude:
60 m = lambda f: im(f) and not em(f) and pm(f)
77 m = lambda f: im(f) and not em(f) and pm(f)
61 else:
78 else:
62 m = lambda f: im(f) and pm(f)
79 m = lambda f: im(f) and pm(f)
63 else:
80 else:
64 if exclude:
81 if exclude:
65 m = lambda f: not em(f) and pm(f)
82 m = lambda f: not em(f) and pm(f)
66 else:
83 else:
67 m = pm
84 m = pm
68 else:
85 else:
69 if include:
86 if include:
70 if exclude:
87 if exclude:
71 m = lambda f: im(f) and not em(f)
88 m = lambda f: im(f) and not em(f)
72 else:
89 else:
73 m = im
90 m = im
74 else:
91 else:
75 if exclude:
92 if exclude:
76 m = lambda f: not em(f)
93 m = lambda f: not em(f)
77 else:
94 else:
78 m = lambda f: True
95 m = lambda f: True
79
96
80 self.matchfn = m
97 self.matchfn = m
81 self._fmap = set(self._files)
98 self._fmap = set(self._files)
82
99
83 def __call__(self, fn):
100 def __call__(self, fn):
84 return self.matchfn(fn)
101 return self.matchfn(fn)
85 def __iter__(self):
102 def __iter__(self):
86 for f in self._files:
103 for f in self._files:
87 yield f
104 yield f
88 def bad(self, f, msg):
105 def bad(self, f, msg):
89 '''callback for each explicit file that can't be
106 '''callback for each explicit file that can't be
90 found/accessed, with an error message
107 found/accessed, with an error message
91 '''
108 '''
92 pass
109 pass
93 def dir(self, f):
110 def dir(self, f):
94 pass
111 pass
95 def missing(self, f):
112 def missing(self, f):
96 pass
113 pass
97 def exact(self, f):
114 def exact(self, f):
98 return f in self._fmap
115 return f in self._fmap
99 def rel(self, f):
116 def rel(self, f):
100 return util.pathto(self._root, self._cwd, f)
117 return util.pathto(self._root, self._cwd, f)
101 def files(self):
118 def files(self):
102 return self._files
119 return self._files
103 def anypats(self):
120 def anypats(self):
104 return self._anypats
121 return self._anypats
105
122
106 class exact(match):
123 class exact(match):
107 def __init__(self, root, cwd, files):
124 def __init__(self, root, cwd, files):
108 match.__init__(self, root, cwd, files, exact = True)
125 match.__init__(self, root, cwd, files, exact = True)
109
126
110 class always(match):
127 class always(match):
111 def __init__(self, root, cwd):
128 def __init__(self, root, cwd):
112 match.__init__(self, root, cwd, [])
129 match.__init__(self, root, cwd, [])
113
130
114 class narrowmatcher(match):
131 class narrowmatcher(match):
115 """Adapt a matcher to work on a subdirectory only.
132 """Adapt a matcher to work on a subdirectory only.
116
133
117 The paths are remapped to remove/insert the path as needed:
134 The paths are remapped to remove/insert the path as needed:
118
135
119 >>> m1 = match('root', '', ['a.txt', 'sub/b.txt'])
136 >>> m1 = match('root', '', ['a.txt', 'sub/b.txt'])
120 >>> m2 = narrowmatcher('sub', m1)
137 >>> m2 = narrowmatcher('sub', m1)
121 >>> bool(m2('a.txt'))
138 >>> bool(m2('a.txt'))
122 False
139 False
123 >>> bool(m2('b.txt'))
140 >>> bool(m2('b.txt'))
124 True
141 True
125 >>> bool(m2.matchfn('a.txt'))
142 >>> bool(m2.matchfn('a.txt'))
126 False
143 False
127 >>> bool(m2.matchfn('b.txt'))
144 >>> bool(m2.matchfn('b.txt'))
128 True
145 True
129 >>> m2.files()
146 >>> m2.files()
130 ['b.txt']
147 ['b.txt']
131 >>> m2.exact('b.txt')
148 >>> m2.exact('b.txt')
132 True
149 True
133 >>> m2.rel('b.txt')
150 >>> m2.rel('b.txt')
134 'b.txt'
151 'b.txt'
135 >>> def bad(f, msg):
152 >>> def bad(f, msg):
136 ... print "%s: %s" % (f, msg)
153 ... print "%s: %s" % (f, msg)
137 >>> m1.bad = bad
154 >>> m1.bad = bad
138 >>> m2.bad('x.txt', 'No such file')
155 >>> m2.bad('x.txt', 'No such file')
139 sub/x.txt: No such file
156 sub/x.txt: No such file
140 """
157 """
141
158
142 def __init__(self, path, matcher):
159 def __init__(self, path, matcher):
143 self._root = matcher._root
160 self._root = matcher._root
144 self._cwd = matcher._cwd
161 self._cwd = matcher._cwd
145 self._path = path
162 self._path = path
146 self._matcher = matcher
163 self._matcher = matcher
147
164
148 self._files = [f[len(path) + 1:] for f in matcher._files
165 self._files = [f[len(path) + 1:] for f in matcher._files
149 if f.startswith(path + "/")]
166 if f.startswith(path + "/")]
150 self._anypats = matcher._anypats
167 self._anypats = matcher._anypats
151 self.matchfn = lambda fn: matcher.matchfn(self._path + "/" + fn)
168 self.matchfn = lambda fn: matcher.matchfn(self._path + "/" + fn)
152 self._fmap = set(self._files)
169 self._fmap = set(self._files)
153
170
154 def bad(self, f, msg):
171 def bad(self, f, msg):
155 self._matcher.bad(self._path + "/" + f, msg)
172 self._matcher.bad(self._path + "/" + f, msg)
156
173
157 def patkind(pat):
174 def patkind(pat):
158 return _patsplit(pat, None)[0]
175 return _patsplit(pat, None)[0]
159
176
160 def _patsplit(pat, default):
177 def _patsplit(pat, default):
161 """Split a string into an optional pattern kind prefix and the
178 """Split a string into an optional pattern kind prefix and the
162 actual pattern."""
179 actual pattern."""
163 if ':' in pat:
180 if ':' in pat:
164 kind, val = pat.split(':', 1)
181 kind, val = pat.split(':', 1)
165 if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
182 if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
166 'listfile', 'listfile0'):
183 'listfile', 'listfile0', 'set'):
167 return kind, val
184 return kind, val
168 return default, pat
185 return default, pat
169
186
170 def _globre(pat):
187 def _globre(pat):
171 "convert a glob pattern into a regexp"
188 "convert a glob pattern into a regexp"
172 i, n = 0, len(pat)
189 i, n = 0, len(pat)
173 res = ''
190 res = ''
174 group = 0
191 group = 0
175 escape = re.escape
192 escape = re.escape
176 def peek():
193 def peek():
177 return i < n and pat[i]
194 return i < n and pat[i]
178 while i < n:
195 while i < n:
179 c = pat[i]
196 c = pat[i]
180 i += 1
197 i += 1
181 if c not in '*?[{},\\':
198 if c not in '*?[{},\\':
182 res += escape(c)
199 res += escape(c)
183 elif c == '*':
200 elif c == '*':
184 if peek() == '*':
201 if peek() == '*':
185 i += 1
202 i += 1
186 res += '.*'
203 res += '.*'
187 else:
204 else:
188 res += '[^/]*'
205 res += '[^/]*'
189 elif c == '?':
206 elif c == '?':
190 res += '.'
207 res += '.'
191 elif c == '[':
208 elif c == '[':
192 j = i
209 j = i
193 if j < n and pat[j] in '!]':
210 if j < n and pat[j] in '!]':
194 j += 1
211 j += 1
195 while j < n and pat[j] != ']':
212 while j < n and pat[j] != ']':
196 j += 1
213 j += 1
197 if j >= n:
214 if j >= n:
198 res += '\\['
215 res += '\\['
199 else:
216 else:
200 stuff = pat[i:j].replace('\\','\\\\')
217 stuff = pat[i:j].replace('\\','\\\\')
201 i = j + 1
218 i = j + 1
202 if stuff[0] == '!':
219 if stuff[0] == '!':
203 stuff = '^' + stuff[1:]
220 stuff = '^' + stuff[1:]
204 elif stuff[0] == '^':
221 elif stuff[0] == '^':
205 stuff = '\\' + stuff
222 stuff = '\\' + stuff
206 res = '%s[%s]' % (res, stuff)
223 res = '%s[%s]' % (res, stuff)
207 elif c == '{':
224 elif c == '{':
208 group += 1
225 group += 1
209 res += '(?:'
226 res += '(?:'
210 elif c == '}' and group:
227 elif c == '}' and group:
211 res += ')'
228 res += ')'
212 group -= 1
229 group -= 1
213 elif c == ',' and group:
230 elif c == ',' and group:
214 res += '|'
231 res += '|'
215 elif c == '\\':
232 elif c == '\\':
216 p = peek()
233 p = peek()
217 if p:
234 if p:
218 i += 1
235 i += 1
219 res += escape(p)
236 res += escape(p)
220 else:
237 else:
221 res += escape(c)
238 res += escape(c)
222 else:
239 else:
223 res += escape(c)
240 res += escape(c)
224 return res
241 return res
225
242
226 def _regex(kind, name, tail):
243 def _regex(kind, name, tail):
227 '''convert a pattern into a regular expression'''
244 '''convert a pattern into a regular expression'''
228 if not name:
245 if not name:
229 return ''
246 return ''
230 if kind == 're':
247 if kind == 're':
231 return name
248 return name
232 elif kind == 'path':
249 elif kind == 'path':
233 return '^' + re.escape(name) + '(?:/|$)'
250 return '^' + re.escape(name) + '(?:/|$)'
234 elif kind == 'relglob':
251 elif kind == 'relglob':
235 return '(?:|.*/)' + _globre(name) + tail
252 return '(?:|.*/)' + _globre(name) + tail
236 elif kind == 'relpath':
253 elif kind == 'relpath':
237 return re.escape(name) + '(?:/|$)'
254 return re.escape(name) + '(?:/|$)'
238 elif kind == 'relre':
255 elif kind == 'relre':
239 if name.startswith('^'):
256 if name.startswith('^'):
240 return name
257 return name
241 return '.*' + name
258 return '.*' + name
242 return _globre(name) + tail
259 return _globre(name) + tail
243
260
244 def _buildmatch(pats, tail):
261 def _buildmatch(ctx, pats, tail):
262 fset, pats = _expandsets(pats, ctx)
263 if not pats:
264 return "", fset.__contains__
265
266 pat, mf = _buildregexmatch(pats, tail)
267 if fset:
268 return pat, lambda f: f in fset or mf(f)
269 return pat, mf
270
271 def _buildregexmatch(pats, tail):
245 """build a matching function from a set of patterns"""
272 """build a matching function from a set of patterns"""
246 try:
273 try:
247 pat = '(?:%s)' % '|'.join([_regex(k, p, tail) for (k, p) in pats])
274 pat = '(?:%s)' % '|'.join([_regex(k, p, tail) for (k, p) in pats])
248 if len(pat) > 20000:
275 if len(pat) > 20000:
249 raise OverflowError()
276 raise OverflowError()
250 return pat, re.compile(pat).match
277 return pat, re.compile(pat).match
251 except OverflowError:
278 except OverflowError:
252 # We're using a Python with a tiny regex engine and we
279 # We're using a Python with a tiny regex engine and we
253 # made it explode, so we'll divide the pattern list in two
280 # made it explode, so we'll divide the pattern list in two
254 # until it works
281 # until it works
255 l = len(pats)
282 l = len(pats)
256 if l < 2:
283 if l < 2:
257 raise
284 raise
258 pata, a = _buildmatch(pats[:l//2], tail)
285 pata, a = _buildmatch(pats[:l//2], tail)
259 patb, b = _buildmatch(pats[l//2:], tail)
286 patb, b = _buildmatch(pats[l//2:], tail)
260 return pat, lambda s: a(s) or b(s)
287 return pat, lambda s: a(s) or b(s)
261 except re.error:
288 except re.error:
262 for k, p in pats:
289 for k, p in pats:
263 try:
290 try:
264 re.compile('(?:%s)' % _regex(k, p, tail))
291 re.compile('(?:%s)' % _regex(k, p, tail))
265 except re.error:
292 except re.error:
266 raise util.Abort(_("invalid pattern (%s): %s") % (k, p))
293 raise util.Abort(_("invalid pattern (%s): %s") % (k, p))
267 raise util.Abort(_("invalid pattern"))
294 raise util.Abort(_("invalid pattern"))
268
295
269 def _normalize(names, default, root, cwd, auditor):
296 def _normalize(names, default, root, cwd, auditor):
270 pats = []
297 pats = []
271 for kind, name in [_patsplit(p, default) for p in names]:
298 for kind, name in [_patsplit(p, default) for p in names]:
272 if kind in ('glob', 'relpath'):
299 if kind in ('glob', 'relpath'):
273 name = scmutil.canonpath(root, cwd, name, auditor)
300 name = scmutil.canonpath(root, cwd, name, auditor)
274 elif kind in ('relglob', 'path'):
301 elif kind in ('relglob', 'path'):
275 name = util.normpath(name)
302 name = util.normpath(name)
276 elif kind in ('listfile', 'listfile0'):
303 elif kind in ('listfile', 'listfile0'):
277 try:
304 try:
278 files = util.readfile(name)
305 files = util.readfile(name)
279 if kind == 'listfile0':
306 if kind == 'listfile0':
280 files = files.split('\0')
307 files = files.split('\0')
281 else:
308 else:
282 files = files.splitlines()
309 files = files.splitlines()
283 files = [f for f in files if f]
310 files = [f for f in files if f]
284 except EnvironmentError:
311 except EnvironmentError:
285 raise util.Abort(_("unable to read file list (%s)") % name)
312 raise util.Abort(_("unable to read file list (%s)") % name)
286 pats += _normalize(files, default, root, cwd, auditor)
313 pats += _normalize(files, default, root, cwd, auditor)
287 continue
314 continue
288
315
289 pats.append((kind, name))
316 pats.append((kind, name))
290 return pats
317 return pats
291
318
292 def _roots(patterns):
319 def _roots(patterns):
293 r = []
320 r = []
294 for kind, name in patterns:
321 for kind, name in patterns:
295 if kind == 'glob': # find the non-glob prefix
322 if kind == 'glob': # find the non-glob prefix
296 root = []
323 root = []
297 for p in name.split('/'):
324 for p in name.split('/'):
298 if '[' in p or '{' in p or '*' in p or '?' in p:
325 if '[' in p or '{' in p or '*' in p or '?' in p:
299 break
326 break
300 root.append(p)
327 root.append(p)
301 r.append('/'.join(root) or '.')
328 r.append('/'.join(root) or '.')
302 elif kind in ('relpath', 'path'):
329 elif kind in ('relpath', 'path'):
303 r.append(name or '.')
330 r.append(name or '.')
304 elif kind == 'relglob':
331 elif kind == 'relglob':
305 r.append('.')
332 r.append('.')
306 return r
333 return r
307
334
308 def _anypats(patterns):
335 def _anypats(patterns):
309 for kind, name in patterns:
336 for kind, name in patterns:
310 if kind in ('glob', 're', 'relglob', 'relre'):
337 if kind in ('glob', 're', 'relglob', 'relre'):
311 return True
338 return True
General Comments 0
You need to be logged in to leave comments. Login now