##// END OF EJS Templates
match: more accurately report when we're always going to match...
Bryan O'Sullivan -
r18713:8728579f default
parent child Browse files
Show More
@@ -1,352 +1,354 b''
1 1 # match.py - filename matching
2 2 #
3 3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 import re
9 9 import scmutil, util, fileset
10 10 from i18n import _
11 11
12 12 def _rematcher(pat):
13 13 m = util.compilere(pat)
14 14 try:
15 15 # slightly faster, provided by facebook's re2 bindings
16 16 return m.test_match
17 17 except AttributeError:
18 18 return m.match
19 19
20 20 def _expandsets(pats, ctx):
21 21 '''convert set: patterns into a list of files in the given context'''
22 22 fset = set()
23 23 other = []
24 24
25 25 for kind, expr in pats:
26 26 if kind == 'set':
27 27 if not ctx:
28 28 raise util.Abort("fileset expression with no context")
29 29 s = fileset.getfileset(ctx, expr)
30 30 fset.update(s)
31 31 continue
32 32 other.append((kind, expr))
33 33 return fset, other
34 34
35 35 class match(object):
36 36 def __init__(self, root, cwd, patterns, include=[], exclude=[],
37 37 default='glob', exact=False, auditor=None, ctx=None):
38 38 """build an object to match a set of file patterns
39 39
40 40 arguments:
41 41 root - the canonical root of the tree you're matching against
42 42 cwd - the current working directory, if relevant
43 43 patterns - patterns to find
44 44 include - patterns to include
45 45 exclude - patterns to exclude
46 46 default - if a pattern in names has no explicit type, assume this one
47 47 exact - patterns are actually literals
48 48
49 49 a pattern is one of:
50 50 'glob:<glob>' - a glob relative to cwd
51 51 're:<regexp>' - a regular expression
52 52 'path:<path>' - a path relative to repository root
53 53 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
54 54 'relpath:<path>' - a path relative to cwd
55 55 'relre:<regexp>' - a regexp that needn't match the start of a name
56 56 'set:<fileset>' - a fileset expression
57 57 '<something>' - a pattern of the specified default type
58 58 """
59 59
60 60 self._root = root
61 61 self._cwd = cwd
62 62 self._files = []
63 63 self._anypats = bool(include or exclude)
64 64 self._ctx = ctx
65 self._always = False
65 66
66 67 if include:
67 68 pats = _normalize(include, 'glob', root, cwd, auditor)
68 69 self.includepat, im = _buildmatch(ctx, pats, '(?:/|$)')
69 70 if exclude:
70 71 pats = _normalize(exclude, 'glob', root, cwd, auditor)
71 72 self.excludepat, em = _buildmatch(ctx, pats, '(?:/|$)')
72 73 if exact:
73 74 if isinstance(patterns, list):
74 75 self._files = patterns
75 76 else:
76 77 self._files = list(patterns)
77 78 pm = self.exact
78 79 elif patterns:
79 80 pats = _normalize(patterns, default, root, cwd, auditor)
80 81 self._files = _roots(pats)
81 82 self._anypats = self._anypats or _anypats(pats)
82 83 self.patternspat, pm = _buildmatch(ctx, pats, '$')
83 84
84 85 if patterns or exact:
85 86 if include:
86 87 if exclude:
87 88 m = lambda f: im(f) and not em(f) and pm(f)
88 89 else:
89 90 m = lambda f: im(f) and pm(f)
90 91 else:
91 92 if exclude:
92 93 m = lambda f: not em(f) and pm(f)
93 94 else:
94 95 m = pm
95 96 else:
96 97 if include:
97 98 if exclude:
98 99 m = lambda f: im(f) and not em(f)
99 100 else:
100 101 m = im
101 102 else:
102 103 if exclude:
103 104 m = lambda f: not em(f)
104 105 else:
105 106 m = lambda f: True
107 self._always = True
106 108
107 109 self.matchfn = m
108 110 self._fmap = set(self._files)
109 111
110 112 def __call__(self, fn):
111 113 return self.matchfn(fn)
112 114 def __iter__(self):
113 115 for f in self._files:
114 116 yield f
115 117 def bad(self, f, msg):
116 118 '''callback for each explicit file that can't be
117 119 found/accessed, with an error message
118 120 '''
119 121 pass
120 122 def dir(self, f):
121 123 pass
122 124 def missing(self, f):
123 125 pass
124 126 def exact(self, f):
125 127 return f in self._fmap
126 128 def rel(self, f):
127 129 return util.pathto(self._root, self._cwd, f)
128 130 def files(self):
129 131 return self._files
130 132 def anypats(self):
131 133 return self._anypats
132 134 def always(self):
133 return False
135 return self._always
134 136
135 137 class exact(match):
136 138 def __init__(self, root, cwd, files):
137 139 match.__init__(self, root, cwd, files, exact = True)
138 140
139 141 class always(match):
140 142 def __init__(self, root, cwd):
141 143 match.__init__(self, root, cwd, [])
142 def always(self):
143 return True
144 self._always = True
144 145
145 146 class narrowmatcher(match):
146 147 """Adapt a matcher to work on a subdirectory only.
147 148
148 149 The paths are remapped to remove/insert the path as needed:
149 150
150 151 >>> m1 = match('root', '', ['a.txt', 'sub/b.txt'])
151 152 >>> m2 = narrowmatcher('sub', m1)
152 153 >>> bool(m2('a.txt'))
153 154 False
154 155 >>> bool(m2('b.txt'))
155 156 True
156 157 >>> bool(m2.matchfn('a.txt'))
157 158 False
158 159 >>> bool(m2.matchfn('b.txt'))
159 160 True
160 161 >>> m2.files()
161 162 ['b.txt']
162 163 >>> m2.exact('b.txt')
163 164 True
164 165 >>> m2.rel('b.txt')
165 166 'b.txt'
166 167 >>> def bad(f, msg):
167 168 ... print "%s: %s" % (f, msg)
168 169 >>> m1.bad = bad
169 170 >>> m2.bad('x.txt', 'No such file')
170 171 sub/x.txt: No such file
171 172 """
172 173
173 174 def __init__(self, path, matcher):
174 175 self._root = matcher._root
175 176 self._cwd = matcher._cwd
176 177 self._path = path
177 178 self._matcher = matcher
179 self._always = matcher._always
178 180
179 181 self._files = [f[len(path) + 1:] for f in matcher._files
180 182 if f.startswith(path + "/")]
181 183 self._anypats = matcher._anypats
182 184 self.matchfn = lambda fn: matcher.matchfn(self._path + "/" + fn)
183 185 self._fmap = set(self._files)
184 186
185 187 def bad(self, f, msg):
186 188 self._matcher.bad(self._path + "/" + f, msg)
187 189
188 190 def patkind(pat):
189 191 return _patsplit(pat, None)[0]
190 192
191 193 def _patsplit(pat, default):
192 194 """Split a string into an optional pattern kind prefix and the
193 195 actual pattern."""
194 196 if ':' in pat:
195 197 kind, val = pat.split(':', 1)
196 198 if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
197 199 'listfile', 'listfile0', 'set'):
198 200 return kind, val
199 201 return default, pat
200 202
201 203 def _globre(pat):
202 204 "convert a glob pattern into a regexp"
203 205 i, n = 0, len(pat)
204 206 res = ''
205 207 group = 0
206 208 escape = re.escape
207 209 def peek():
208 210 return i < n and pat[i]
209 211 while i < n:
210 212 c = pat[i]
211 213 i += 1
212 214 if c not in '*?[{},\\':
213 215 res += escape(c)
214 216 elif c == '*':
215 217 if peek() == '*':
216 218 i += 1
217 219 res += '.*'
218 220 else:
219 221 res += '[^/]*'
220 222 elif c == '?':
221 223 res += '.'
222 224 elif c == '[':
223 225 j = i
224 226 if j < n and pat[j] in '!]':
225 227 j += 1
226 228 while j < n and pat[j] != ']':
227 229 j += 1
228 230 if j >= n:
229 231 res += '\\['
230 232 else:
231 233 stuff = pat[i:j].replace('\\','\\\\')
232 234 i = j + 1
233 235 if stuff[0] == '!':
234 236 stuff = '^' + stuff[1:]
235 237 elif stuff[0] == '^':
236 238 stuff = '\\' + stuff
237 239 res = '%s[%s]' % (res, stuff)
238 240 elif c == '{':
239 241 group += 1
240 242 res += '(?:'
241 243 elif c == '}' and group:
242 244 res += ')'
243 245 group -= 1
244 246 elif c == ',' and group:
245 247 res += '|'
246 248 elif c == '\\':
247 249 p = peek()
248 250 if p:
249 251 i += 1
250 252 res += escape(p)
251 253 else:
252 254 res += escape(c)
253 255 else:
254 256 res += escape(c)
255 257 return res
256 258
257 259 def _regex(kind, name, tail):
258 260 '''convert a pattern into a regular expression'''
259 261 if not name:
260 262 return ''
261 263 if kind == 're':
262 264 return name
263 265 elif kind == 'path':
264 266 return '^' + re.escape(name) + '(?:/|$)'
265 267 elif kind == 'relglob':
266 268 return '(?:|.*/)' + _globre(name) + tail
267 269 elif kind == 'relpath':
268 270 return re.escape(name) + '(?:/|$)'
269 271 elif kind == 'relre':
270 272 if name.startswith('^'):
271 273 return name
272 274 return '.*' + name
273 275 return _globre(name) + tail
274 276
275 277 def _buildmatch(ctx, pats, tail):
276 278 fset, pats = _expandsets(pats, ctx)
277 279 if not pats:
278 280 return "", fset.__contains__
279 281
280 282 pat, mf = _buildregexmatch(pats, tail)
281 283 if fset:
282 284 return pat, lambda f: f in fset or mf(f)
283 285 return pat, mf
284 286
285 287 def _buildregexmatch(pats, tail):
286 288 """build a matching function from a set of patterns"""
287 289 try:
288 290 pat = '(?:%s)' % '|'.join([_regex(k, p, tail) for (k, p) in pats])
289 291 if len(pat) > 20000:
290 292 raise OverflowError
291 293 return pat, _rematcher(pat)
292 294 except OverflowError:
293 295 # We're using a Python with a tiny regex engine and we
294 296 # made it explode, so we'll divide the pattern list in two
295 297 # until it works
296 298 l = len(pats)
297 299 if l < 2:
298 300 raise
299 301 pata, a = _buildregexmatch(pats[:l//2], tail)
300 302 patb, b = _buildregexmatch(pats[l//2:], tail)
301 303 return pat, lambda s: a(s) or b(s)
302 304 except re.error:
303 305 for k, p in pats:
304 306 try:
305 307 _rematcher('(?:%s)' % _regex(k, p, tail))
306 308 except re.error:
307 309 raise util.Abort(_("invalid pattern (%s): %s") % (k, p))
308 310 raise util.Abort(_("invalid pattern"))
309 311
310 312 def _normalize(names, default, root, cwd, auditor):
311 313 pats = []
312 314 for kind, name in [_patsplit(p, default) for p in names]:
313 315 if kind in ('glob', 'relpath'):
314 316 name = scmutil.canonpath(root, cwd, name, auditor)
315 317 elif kind in ('relglob', 'path'):
316 318 name = util.normpath(name)
317 319 elif kind in ('listfile', 'listfile0'):
318 320 try:
319 321 files = util.readfile(name)
320 322 if kind == 'listfile0':
321 323 files = files.split('\0')
322 324 else:
323 325 files = files.splitlines()
324 326 files = [f for f in files if f]
325 327 except EnvironmentError:
326 328 raise util.Abort(_("unable to read file list (%s)") % name)
327 329 pats += _normalize(files, default, root, cwd, auditor)
328 330 continue
329 331
330 332 pats.append((kind, name))
331 333 return pats
332 334
333 335 def _roots(patterns):
334 336 r = []
335 337 for kind, name in patterns:
336 338 if kind == 'glob': # find the non-glob prefix
337 339 root = []
338 340 for p in name.split('/'):
339 341 if '[' in p or '{' in p or '*' in p or '?' in p:
340 342 break
341 343 root.append(p)
342 344 r.append('/'.join(root) or '.')
343 345 elif kind in ('relpath', 'path'):
344 346 r.append(name or '.')
345 347 elif kind == 'relglob':
346 348 r.append('.')
347 349 return r
348 350
349 351 def _anypats(patterns):
350 352 for kind, name in patterns:
351 353 if kind in ('glob', 're', 'relglob', 'relre', 'set'):
352 354 return True
General Comments 0
You need to be logged in to leave comments. Login now