##// END OF EJS Templates
match: more accurately report when we're always going to match...
Bryan O'Sullivan -
r18713:8728579f default
parent child Browse files
Show More
@@ -1,352 +1,354 b''
1 # match.py - filename matching
1 # match.py - filename matching
2 #
2 #
3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 import re
8 import re
9 import scmutil, util, fileset
9 import scmutil, util, fileset
10 from i18n import _
10 from i18n import _
11
11
12 def _rematcher(pat):
12 def _rematcher(pat):
13 m = util.compilere(pat)
13 m = util.compilere(pat)
14 try:
14 try:
15 # slightly faster, provided by facebook's re2 bindings
15 # slightly faster, provided by facebook's re2 bindings
16 return m.test_match
16 return m.test_match
17 except AttributeError:
17 except AttributeError:
18 return m.match
18 return m.match
19
19
20 def _expandsets(pats, ctx):
20 def _expandsets(pats, ctx):
21 '''convert set: patterns into a list of files in the given context'''
21 '''convert set: patterns into a list of files in the given context'''
22 fset = set()
22 fset = set()
23 other = []
23 other = []
24
24
25 for kind, expr in pats:
25 for kind, expr in pats:
26 if kind == 'set':
26 if kind == 'set':
27 if not ctx:
27 if not ctx:
28 raise util.Abort("fileset expression with no context")
28 raise util.Abort("fileset expression with no context")
29 s = fileset.getfileset(ctx, expr)
29 s = fileset.getfileset(ctx, expr)
30 fset.update(s)
30 fset.update(s)
31 continue
31 continue
32 other.append((kind, expr))
32 other.append((kind, expr))
33 return fset, other
33 return fset, other
34
34
35 class match(object):
35 class match(object):
36 def __init__(self, root, cwd, patterns, include=[], exclude=[],
36 def __init__(self, root, cwd, patterns, include=[], exclude=[],
37 default='glob', exact=False, auditor=None, ctx=None):
37 default='glob', exact=False, auditor=None, ctx=None):
38 """build an object to match a set of file patterns
38 """build an object to match a set of file patterns
39
39
40 arguments:
40 arguments:
41 root - the canonical root of the tree you're matching against
41 root - the canonical root of the tree you're matching against
42 cwd - the current working directory, if relevant
42 cwd - the current working directory, if relevant
43 patterns - patterns to find
43 patterns - patterns to find
44 include - patterns to include
44 include - patterns to include
45 exclude - patterns to exclude
45 exclude - patterns to exclude
46 default - if a pattern in names has no explicit type, assume this one
46 default - if a pattern in names has no explicit type, assume this one
47 exact - patterns are actually literals
47 exact - patterns are actually literals
48
48
49 a pattern is one of:
49 a pattern is one of:
50 'glob:<glob>' - a glob relative to cwd
50 'glob:<glob>' - a glob relative to cwd
51 're:<regexp>' - a regular expression
51 're:<regexp>' - a regular expression
52 'path:<path>' - a path relative to repository root
52 'path:<path>' - a path relative to repository root
53 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
53 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
54 'relpath:<path>' - a path relative to cwd
54 'relpath:<path>' - a path relative to cwd
55 'relre:<regexp>' - a regexp that needn't match the start of a name
55 'relre:<regexp>' - a regexp that needn't match the start of a name
56 'set:<fileset>' - a fileset expression
56 'set:<fileset>' - a fileset expression
57 '<something>' - a pattern of the specified default type
57 '<something>' - a pattern of the specified default type
58 """
58 """
59
59
60 self._root = root
60 self._root = root
61 self._cwd = cwd
61 self._cwd = cwd
62 self._files = []
62 self._files = []
63 self._anypats = bool(include or exclude)
63 self._anypats = bool(include or exclude)
64 self._ctx = ctx
64 self._ctx = ctx
65 self._always = False
65
66
66 if include:
67 if include:
67 pats = _normalize(include, 'glob', root, cwd, auditor)
68 pats = _normalize(include, 'glob', root, cwd, auditor)
68 self.includepat, im = _buildmatch(ctx, pats, '(?:/|$)')
69 self.includepat, im = _buildmatch(ctx, pats, '(?:/|$)')
69 if exclude:
70 if exclude:
70 pats = _normalize(exclude, 'glob', root, cwd, auditor)
71 pats = _normalize(exclude, 'glob', root, cwd, auditor)
71 self.excludepat, em = _buildmatch(ctx, pats, '(?:/|$)')
72 self.excludepat, em = _buildmatch(ctx, pats, '(?:/|$)')
72 if exact:
73 if exact:
73 if isinstance(patterns, list):
74 if isinstance(patterns, list):
74 self._files = patterns
75 self._files = patterns
75 else:
76 else:
76 self._files = list(patterns)
77 self._files = list(patterns)
77 pm = self.exact
78 pm = self.exact
78 elif patterns:
79 elif patterns:
79 pats = _normalize(patterns, default, root, cwd, auditor)
80 pats = _normalize(patterns, default, root, cwd, auditor)
80 self._files = _roots(pats)
81 self._files = _roots(pats)
81 self._anypats = self._anypats or _anypats(pats)
82 self._anypats = self._anypats or _anypats(pats)
82 self.patternspat, pm = _buildmatch(ctx, pats, '$')
83 self.patternspat, pm = _buildmatch(ctx, pats, '$')
83
84
84 if patterns or exact:
85 if patterns or exact:
85 if include:
86 if include:
86 if exclude:
87 if exclude:
87 m = lambda f: im(f) and not em(f) and pm(f)
88 m = lambda f: im(f) and not em(f) and pm(f)
88 else:
89 else:
89 m = lambda f: im(f) and pm(f)
90 m = lambda f: im(f) and pm(f)
90 else:
91 else:
91 if exclude:
92 if exclude:
92 m = lambda f: not em(f) and pm(f)
93 m = lambda f: not em(f) and pm(f)
93 else:
94 else:
94 m = pm
95 m = pm
95 else:
96 else:
96 if include:
97 if include:
97 if exclude:
98 if exclude:
98 m = lambda f: im(f) and not em(f)
99 m = lambda f: im(f) and not em(f)
99 else:
100 else:
100 m = im
101 m = im
101 else:
102 else:
102 if exclude:
103 if exclude:
103 m = lambda f: not em(f)
104 m = lambda f: not em(f)
104 else:
105 else:
105 m = lambda f: True
106 m = lambda f: True
107 self._always = True
106
108
107 self.matchfn = m
109 self.matchfn = m
108 self._fmap = set(self._files)
110 self._fmap = set(self._files)
109
111
110 def __call__(self, fn):
112 def __call__(self, fn):
111 return self.matchfn(fn)
113 return self.matchfn(fn)
112 def __iter__(self):
114 def __iter__(self):
113 for f in self._files:
115 for f in self._files:
114 yield f
116 yield f
115 def bad(self, f, msg):
117 def bad(self, f, msg):
116 '''callback for each explicit file that can't be
118 '''callback for each explicit file that can't be
117 found/accessed, with an error message
119 found/accessed, with an error message
118 '''
120 '''
119 pass
121 pass
120 def dir(self, f):
122 def dir(self, f):
121 pass
123 pass
122 def missing(self, f):
124 def missing(self, f):
123 pass
125 pass
124 def exact(self, f):
126 def exact(self, f):
125 return f in self._fmap
127 return f in self._fmap
126 def rel(self, f):
128 def rel(self, f):
127 return util.pathto(self._root, self._cwd, f)
129 return util.pathto(self._root, self._cwd, f)
128 def files(self):
130 def files(self):
129 return self._files
131 return self._files
130 def anypats(self):
132 def anypats(self):
131 return self._anypats
133 return self._anypats
132 def always(self):
134 def always(self):
133 return False
135 return self._always
134
136
135 class exact(match):
137 class exact(match):
136 def __init__(self, root, cwd, files):
138 def __init__(self, root, cwd, files):
137 match.__init__(self, root, cwd, files, exact = True)
139 match.__init__(self, root, cwd, files, exact = True)
138
140
139 class always(match):
141 class always(match):
140 def __init__(self, root, cwd):
142 def __init__(self, root, cwd):
141 match.__init__(self, root, cwd, [])
143 match.__init__(self, root, cwd, [])
142 def always(self):
144 self._always = True
143 return True
144
145
145 class narrowmatcher(match):
146 class narrowmatcher(match):
146 """Adapt a matcher to work on a subdirectory only.
147 """Adapt a matcher to work on a subdirectory only.
147
148
148 The paths are remapped to remove/insert the path as needed:
149 The paths are remapped to remove/insert the path as needed:
149
150
150 >>> m1 = match('root', '', ['a.txt', 'sub/b.txt'])
151 >>> m1 = match('root', '', ['a.txt', 'sub/b.txt'])
151 >>> m2 = narrowmatcher('sub', m1)
152 >>> m2 = narrowmatcher('sub', m1)
152 >>> bool(m2('a.txt'))
153 >>> bool(m2('a.txt'))
153 False
154 False
154 >>> bool(m2('b.txt'))
155 >>> bool(m2('b.txt'))
155 True
156 True
156 >>> bool(m2.matchfn('a.txt'))
157 >>> bool(m2.matchfn('a.txt'))
157 False
158 False
158 >>> bool(m2.matchfn('b.txt'))
159 >>> bool(m2.matchfn('b.txt'))
159 True
160 True
160 >>> m2.files()
161 >>> m2.files()
161 ['b.txt']
162 ['b.txt']
162 >>> m2.exact('b.txt')
163 >>> m2.exact('b.txt')
163 True
164 True
164 >>> m2.rel('b.txt')
165 >>> m2.rel('b.txt')
165 'b.txt'
166 'b.txt'
166 >>> def bad(f, msg):
167 >>> def bad(f, msg):
167 ... print "%s: %s" % (f, msg)
168 ... print "%s: %s" % (f, msg)
168 >>> m1.bad = bad
169 >>> m1.bad = bad
169 >>> m2.bad('x.txt', 'No such file')
170 >>> m2.bad('x.txt', 'No such file')
170 sub/x.txt: No such file
171 sub/x.txt: No such file
171 """
172 """
172
173
173 def __init__(self, path, matcher):
174 def __init__(self, path, matcher):
174 self._root = matcher._root
175 self._root = matcher._root
175 self._cwd = matcher._cwd
176 self._cwd = matcher._cwd
176 self._path = path
177 self._path = path
177 self._matcher = matcher
178 self._matcher = matcher
179 self._always = matcher._always
178
180
179 self._files = [f[len(path) + 1:] for f in matcher._files
181 self._files = [f[len(path) + 1:] for f in matcher._files
180 if f.startswith(path + "/")]
182 if f.startswith(path + "/")]
181 self._anypats = matcher._anypats
183 self._anypats = matcher._anypats
182 self.matchfn = lambda fn: matcher.matchfn(self._path + "/" + fn)
184 self.matchfn = lambda fn: matcher.matchfn(self._path + "/" + fn)
183 self._fmap = set(self._files)
185 self._fmap = set(self._files)
184
186
185 def bad(self, f, msg):
187 def bad(self, f, msg):
186 self._matcher.bad(self._path + "/" + f, msg)
188 self._matcher.bad(self._path + "/" + f, msg)
187
189
188 def patkind(pat):
190 def patkind(pat):
189 return _patsplit(pat, None)[0]
191 return _patsplit(pat, None)[0]
190
192
191 def _patsplit(pat, default):
193 def _patsplit(pat, default):
192 """Split a string into an optional pattern kind prefix and the
194 """Split a string into an optional pattern kind prefix and the
193 actual pattern."""
195 actual pattern."""
194 if ':' in pat:
196 if ':' in pat:
195 kind, val = pat.split(':', 1)
197 kind, val = pat.split(':', 1)
196 if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
198 if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
197 'listfile', 'listfile0', 'set'):
199 'listfile', 'listfile0', 'set'):
198 return kind, val
200 return kind, val
199 return default, pat
201 return default, pat
200
202
201 def _globre(pat):
203 def _globre(pat):
202 "convert a glob pattern into a regexp"
204 "convert a glob pattern into a regexp"
203 i, n = 0, len(pat)
205 i, n = 0, len(pat)
204 res = ''
206 res = ''
205 group = 0
207 group = 0
206 escape = re.escape
208 escape = re.escape
207 def peek():
209 def peek():
208 return i < n and pat[i]
210 return i < n and pat[i]
209 while i < n:
211 while i < n:
210 c = pat[i]
212 c = pat[i]
211 i += 1
213 i += 1
212 if c not in '*?[{},\\':
214 if c not in '*?[{},\\':
213 res += escape(c)
215 res += escape(c)
214 elif c == '*':
216 elif c == '*':
215 if peek() == '*':
217 if peek() == '*':
216 i += 1
218 i += 1
217 res += '.*'
219 res += '.*'
218 else:
220 else:
219 res += '[^/]*'
221 res += '[^/]*'
220 elif c == '?':
222 elif c == '?':
221 res += '.'
223 res += '.'
222 elif c == '[':
224 elif c == '[':
223 j = i
225 j = i
224 if j < n and pat[j] in '!]':
226 if j < n and pat[j] in '!]':
225 j += 1
227 j += 1
226 while j < n and pat[j] != ']':
228 while j < n and pat[j] != ']':
227 j += 1
229 j += 1
228 if j >= n:
230 if j >= n:
229 res += '\\['
231 res += '\\['
230 else:
232 else:
231 stuff = pat[i:j].replace('\\','\\\\')
233 stuff = pat[i:j].replace('\\','\\\\')
232 i = j + 1
234 i = j + 1
233 if stuff[0] == '!':
235 if stuff[0] == '!':
234 stuff = '^' + stuff[1:]
236 stuff = '^' + stuff[1:]
235 elif stuff[0] == '^':
237 elif stuff[0] == '^':
236 stuff = '\\' + stuff
238 stuff = '\\' + stuff
237 res = '%s[%s]' % (res, stuff)
239 res = '%s[%s]' % (res, stuff)
238 elif c == '{':
240 elif c == '{':
239 group += 1
241 group += 1
240 res += '(?:'
242 res += '(?:'
241 elif c == '}' and group:
243 elif c == '}' and group:
242 res += ')'
244 res += ')'
243 group -= 1
245 group -= 1
244 elif c == ',' and group:
246 elif c == ',' and group:
245 res += '|'
247 res += '|'
246 elif c == '\\':
248 elif c == '\\':
247 p = peek()
249 p = peek()
248 if p:
250 if p:
249 i += 1
251 i += 1
250 res += escape(p)
252 res += escape(p)
251 else:
253 else:
252 res += escape(c)
254 res += escape(c)
253 else:
255 else:
254 res += escape(c)
256 res += escape(c)
255 return res
257 return res
256
258
257 def _regex(kind, name, tail):
259 def _regex(kind, name, tail):
258 '''convert a pattern into a regular expression'''
260 '''convert a pattern into a regular expression'''
259 if not name:
261 if not name:
260 return ''
262 return ''
261 if kind == 're':
263 if kind == 're':
262 return name
264 return name
263 elif kind == 'path':
265 elif kind == 'path':
264 return '^' + re.escape(name) + '(?:/|$)'
266 return '^' + re.escape(name) + '(?:/|$)'
265 elif kind == 'relglob':
267 elif kind == 'relglob':
266 return '(?:|.*/)' + _globre(name) + tail
268 return '(?:|.*/)' + _globre(name) + tail
267 elif kind == 'relpath':
269 elif kind == 'relpath':
268 return re.escape(name) + '(?:/|$)'
270 return re.escape(name) + '(?:/|$)'
269 elif kind == 'relre':
271 elif kind == 'relre':
270 if name.startswith('^'):
272 if name.startswith('^'):
271 return name
273 return name
272 return '.*' + name
274 return '.*' + name
273 return _globre(name) + tail
275 return _globre(name) + tail
274
276
275 def _buildmatch(ctx, pats, tail):
277 def _buildmatch(ctx, pats, tail):
276 fset, pats = _expandsets(pats, ctx)
278 fset, pats = _expandsets(pats, ctx)
277 if not pats:
279 if not pats:
278 return "", fset.__contains__
280 return "", fset.__contains__
279
281
280 pat, mf = _buildregexmatch(pats, tail)
282 pat, mf = _buildregexmatch(pats, tail)
281 if fset:
283 if fset:
282 return pat, lambda f: f in fset or mf(f)
284 return pat, lambda f: f in fset or mf(f)
283 return pat, mf
285 return pat, mf
284
286
285 def _buildregexmatch(pats, tail):
287 def _buildregexmatch(pats, tail):
286 """build a matching function from a set of patterns"""
288 """build a matching function from a set of patterns"""
287 try:
289 try:
288 pat = '(?:%s)' % '|'.join([_regex(k, p, tail) for (k, p) in pats])
290 pat = '(?:%s)' % '|'.join([_regex(k, p, tail) for (k, p) in pats])
289 if len(pat) > 20000:
291 if len(pat) > 20000:
290 raise OverflowError
292 raise OverflowError
291 return pat, _rematcher(pat)
293 return pat, _rematcher(pat)
292 except OverflowError:
294 except OverflowError:
293 # We're using a Python with a tiny regex engine and we
295 # We're using a Python with a tiny regex engine and we
294 # made it explode, so we'll divide the pattern list in two
296 # made it explode, so we'll divide the pattern list in two
295 # until it works
297 # until it works
296 l = len(pats)
298 l = len(pats)
297 if l < 2:
299 if l < 2:
298 raise
300 raise
299 pata, a = _buildregexmatch(pats[:l//2], tail)
301 pata, a = _buildregexmatch(pats[:l//2], tail)
300 patb, b = _buildregexmatch(pats[l//2:], tail)
302 patb, b = _buildregexmatch(pats[l//2:], tail)
301 return pat, lambda s: a(s) or b(s)
303 return pat, lambda s: a(s) or b(s)
302 except re.error:
304 except re.error:
303 for k, p in pats:
305 for k, p in pats:
304 try:
306 try:
305 _rematcher('(?:%s)' % _regex(k, p, tail))
307 _rematcher('(?:%s)' % _regex(k, p, tail))
306 except re.error:
308 except re.error:
307 raise util.Abort(_("invalid pattern (%s): %s") % (k, p))
309 raise util.Abort(_("invalid pattern (%s): %s") % (k, p))
308 raise util.Abort(_("invalid pattern"))
310 raise util.Abort(_("invalid pattern"))
309
311
310 def _normalize(names, default, root, cwd, auditor):
312 def _normalize(names, default, root, cwd, auditor):
311 pats = []
313 pats = []
312 for kind, name in [_patsplit(p, default) for p in names]:
314 for kind, name in [_patsplit(p, default) for p in names]:
313 if kind in ('glob', 'relpath'):
315 if kind in ('glob', 'relpath'):
314 name = scmutil.canonpath(root, cwd, name, auditor)
316 name = scmutil.canonpath(root, cwd, name, auditor)
315 elif kind in ('relglob', 'path'):
317 elif kind in ('relglob', 'path'):
316 name = util.normpath(name)
318 name = util.normpath(name)
317 elif kind in ('listfile', 'listfile0'):
319 elif kind in ('listfile', 'listfile0'):
318 try:
320 try:
319 files = util.readfile(name)
321 files = util.readfile(name)
320 if kind == 'listfile0':
322 if kind == 'listfile0':
321 files = files.split('\0')
323 files = files.split('\0')
322 else:
324 else:
323 files = files.splitlines()
325 files = files.splitlines()
324 files = [f for f in files if f]
326 files = [f for f in files if f]
325 except EnvironmentError:
327 except EnvironmentError:
326 raise util.Abort(_("unable to read file list (%s)") % name)
328 raise util.Abort(_("unable to read file list (%s)") % name)
327 pats += _normalize(files, default, root, cwd, auditor)
329 pats += _normalize(files, default, root, cwd, auditor)
328 continue
330 continue
329
331
330 pats.append((kind, name))
332 pats.append((kind, name))
331 return pats
333 return pats
332
334
333 def _roots(patterns):
335 def _roots(patterns):
334 r = []
336 r = []
335 for kind, name in patterns:
337 for kind, name in patterns:
336 if kind == 'glob': # find the non-glob prefix
338 if kind == 'glob': # find the non-glob prefix
337 root = []
339 root = []
338 for p in name.split('/'):
340 for p in name.split('/'):
339 if '[' in p or '{' in p or '*' in p or '?' in p:
341 if '[' in p or '{' in p or '*' in p or '?' in p:
340 break
342 break
341 root.append(p)
343 root.append(p)
342 r.append('/'.join(root) or '.')
344 r.append('/'.join(root) or '.')
343 elif kind in ('relpath', 'path'):
345 elif kind in ('relpath', 'path'):
344 r.append(name or '.')
346 r.append(name or '.')
345 elif kind == 'relglob':
347 elif kind == 'relglob':
346 r.append('.')
348 r.append('.')
347 return r
349 return r
348
350
349 def _anypats(patterns):
351 def _anypats(patterns):
350 for kind, name in patterns:
352 for kind, name in patterns:
351 if kind in ('glob', 're', 'relglob', 'relre', 'set'):
353 if kind in ('glob', 're', 'relglob', 'relre', 'set'):
352 return True
354 return True
General Comments 0
You need to be logged in to leave comments. Login now