##// END OF EJS Templates
match: remove last traces of unused .missing callback
Mads Kiilerich -
r21113:00cae8a2 default
parent child Browse files
Show More
@@ -1,411 +1,408
1 1 # match.py - filename matching
2 2 #
3 3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 import re
9 9 import util, pathutil
10 10 from i18n import _
11 11
12 12 def _rematcher(regex):
13 13 '''compile the regexp with the best available regexp engine and return a
14 14 matcher function'''
15 15 m = util.compilere(regex)
16 16 try:
17 17 # slightly faster, provided by facebook's re2 bindings
18 18 return m.test_match
19 19 except AttributeError:
20 20 return m.match
21 21
22 22 def _expandsets(kindpats, ctx):
23 23 '''Returns the kindpats list with the 'set' patterns expanded.'''
24 24 fset = set()
25 25 other = []
26 26
27 27 for kind, pat in kindpats:
28 28 if kind == 'set':
29 29 if not ctx:
30 30 raise util.Abort("fileset expression with no context")
31 31 s = ctx.getfileset(pat)
32 32 fset.update(s)
33 33 continue
34 34 other.append((kind, pat))
35 35 return fset, other
36 36
37 37 class match(object):
38 38 def __init__(self, root, cwd, patterns, include=[], exclude=[],
39 39 default='glob', exact=False, auditor=None, ctx=None):
40 40 """build an object to match a set of file patterns
41 41
42 42 arguments:
43 43 root - the canonical root of the tree you're matching against
44 44 cwd - the current working directory, if relevant
45 45 patterns - patterns to find
46 46 include - patterns to include (unless they are excluded)
47 47 exclude - patterns to exclude (even if they are included)
48 48 default - if a pattern in patterns has no explicit type, assume this one
49 49 exact - patterns are actually filenames (include/exclude still apply)
50 50
51 51 a pattern is one of:
52 52 'glob:<glob>' - a glob relative to cwd
53 53 're:<regexp>' - a regular expression
54 54 'path:<path>' - a path relative to repository root
55 55 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
56 56 'relpath:<path>' - a path relative to cwd
57 57 'relre:<regexp>' - a regexp that needn't match the start of a name
58 58 'set:<fileset>' - a fileset expression
59 59 '<something>' - a pattern of the specified default type
60 60 """
61 61
62 62 self._root = root
63 63 self._cwd = cwd
64 64 self._files = [] # exact files and roots of patterns
65 65 self._anypats = bool(include or exclude)
66 66 self._ctx = ctx
67 67 self._always = False
68 68
69 69 if include:
70 70 kindpats = _normalize(include, 'glob', root, cwd, auditor)
71 71 self.includepat, im = _buildmatch(ctx, kindpats, '(?:/|$)')
72 72 if exclude:
73 73 kindpats = _normalize(exclude, 'glob', root, cwd, auditor)
74 74 self.excludepat, em = _buildmatch(ctx, kindpats, '(?:/|$)')
75 75 if exact:
76 76 if isinstance(patterns, list):
77 77 self._files = patterns
78 78 else:
79 79 self._files = list(patterns)
80 80 pm = self.exact
81 81 elif patterns:
82 82 kindpats = _normalize(patterns, default, root, cwd, auditor)
83 83 self._files = _roots(kindpats)
84 84 self._anypats = self._anypats or _anypats(kindpats)
85 85 self.patternspat, pm = _buildmatch(ctx, kindpats, '$')
86 86
87 87 if patterns or exact:
88 88 if include:
89 89 if exclude:
90 90 m = lambda f: im(f) and not em(f) and pm(f)
91 91 else:
92 92 m = lambda f: im(f) and pm(f)
93 93 else:
94 94 if exclude:
95 95 m = lambda f: not em(f) and pm(f)
96 96 else:
97 97 m = pm
98 98 else:
99 99 if include:
100 100 if exclude:
101 101 m = lambda f: im(f) and not em(f)
102 102 else:
103 103 m = im
104 104 else:
105 105 if exclude:
106 106 m = lambda f: not em(f)
107 107 else:
108 108 m = lambda f: True
109 109 self._always = True
110 110
111 111 self.matchfn = m
112 112 self._fmap = set(self._files)
113 113
114 114 def __call__(self, fn):
115 115 return self.matchfn(fn)
116 116 def __iter__(self):
117 117 for f in self._files:
118 118 yield f
119 119
120 120 # Callbacks related to how the matcher is used by dirstate.walk.
121 121 # Subscribers to these events must monkeypatch the matcher object.
122 122 def bad(self, f, msg):
123 123 '''Callback from dirstate.walk for each explicit file that can't be
124 124 found/accessed, with an error message.'''
125 125 pass
126 126
127 127 # If an explicitdir is set, it will be called when an explicitly listed
128 128 # directory is visited.
129 129 explicitdir = None
130 130
131 131 # If an traversedir is set, it will be called when a directory discovered
132 132 # by recursive traversal is visited.
133 133 traversedir = None
134 134
135 def missing(self, f):
136 pass
137
138 135 def rel(self, f):
139 136 '''Convert repo path back to path that is relative to cwd of matcher.'''
140 137 return util.pathto(self._root, self._cwd, f)
141 138
142 139 def files(self):
143 140 '''Explicitly listed files or patterns or roots:
144 141 if no patterns or .always(): empty list,
145 142 if exact: list exact files,
146 143 if not .anypats(): list all files and dirs,
147 144 else: optimal roots'''
148 145 return self._files
149 146
150 147 def exact(self, f):
151 148 '''Returns True if f is in .files().'''
152 149 return f in self._fmap
153 150
154 151 def anypats(self):
155 152 '''Matcher uses patterns or include/exclude.'''
156 153 return self._anypats
157 154
158 155 def always(self):
159 156 '''Matcher will match everything and .files() will be empty
160 157 - optimization might be possible and necessary.'''
161 158 return self._always
162 159
163 160 class exact(match):
164 161 def __init__(self, root, cwd, files):
165 162 match.__init__(self, root, cwd, files, exact=True)
166 163
167 164 class always(match):
168 165 def __init__(self, root, cwd):
169 166 match.__init__(self, root, cwd, [])
170 167 self._always = True
171 168
172 169 class narrowmatcher(match):
173 170 """Adapt a matcher to work on a subdirectory only.
174 171
175 172 The paths are remapped to remove/insert the path as needed:
176 173
177 174 >>> m1 = match('root', '', ['a.txt', 'sub/b.txt'])
178 175 >>> m2 = narrowmatcher('sub', m1)
179 176 >>> bool(m2('a.txt'))
180 177 False
181 178 >>> bool(m2('b.txt'))
182 179 True
183 180 >>> bool(m2.matchfn('a.txt'))
184 181 False
185 182 >>> bool(m2.matchfn('b.txt'))
186 183 True
187 184 >>> m2.files()
188 185 ['b.txt']
189 186 >>> m2.exact('b.txt')
190 187 True
191 188 >>> m2.rel('b.txt')
192 189 'b.txt'
193 190 >>> def bad(f, msg):
194 191 ... print "%s: %s" % (f, msg)
195 192 >>> m1.bad = bad
196 193 >>> m2.bad('x.txt', 'No such file')
197 194 sub/x.txt: No such file
198 195 """
199 196
200 197 def __init__(self, path, matcher):
201 198 self._root = matcher._root
202 199 self._cwd = matcher._cwd
203 200 self._path = path
204 201 self._matcher = matcher
205 202 self._always = matcher._always
206 203
207 204 self._files = [f[len(path) + 1:] for f in matcher._files
208 205 if f.startswith(path + "/")]
209 206 self._anypats = matcher._anypats
210 207 self.matchfn = lambda fn: matcher.matchfn(self._path + "/" + fn)
211 208 self._fmap = set(self._files)
212 209
213 210 def bad(self, f, msg):
214 211 self._matcher.bad(self._path + "/" + f, msg)
215 212
216 213 def patkind(pattern, default=None):
217 214 '''If pattern is 'kind:pat' with a known kind, return kind.'''
218 215 return _patsplit(pattern, default)[0]
219 216
220 217 def _patsplit(pattern, default):
221 218 """Split a string into the optional pattern kind prefix and the actual
222 219 pattern."""
223 220 if ':' in pattern:
224 221 kind, pat = pattern.split(':', 1)
225 222 if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
226 223 'listfile', 'listfile0', 'set'):
227 224 return kind, pat
228 225 return default, pattern
229 226
230 227 def _globre(pat):
231 228 r'''Convert an extended glob string to a regexp string.
232 229
233 230 >>> print _globre(r'?')
234 231 .
235 232 >>> print _globre(r'*')
236 233 [^/]*
237 234 >>> print _globre(r'**')
238 235 .*
239 236 >>> print _globre(r'[a*?!^][^b][!c]')
240 237 [a*?!^][\^b][^c]
241 238 >>> print _globre(r'{a,b}')
242 239 (?:a|b)
243 240 >>> print _globre(r'.\*\?')
244 241 \.\*\?
245 242 '''
246 243 i, n = 0, len(pat)
247 244 res = ''
248 245 group = 0
249 246 escape = re.escape
250 247 def peek():
251 248 return i < n and pat[i]
252 249 while i < n:
253 250 c = pat[i]
254 251 i += 1
255 252 if c not in '*?[{},\\':
256 253 res += escape(c)
257 254 elif c == '*':
258 255 if peek() == '*':
259 256 i += 1
260 257 res += '.*'
261 258 else:
262 259 res += '[^/]*'
263 260 elif c == '?':
264 261 res += '.'
265 262 elif c == '[':
266 263 j = i
267 264 if j < n and pat[j] in '!]':
268 265 j += 1
269 266 while j < n and pat[j] != ']':
270 267 j += 1
271 268 if j >= n:
272 269 res += '\\['
273 270 else:
274 271 stuff = pat[i:j].replace('\\','\\\\')
275 272 i = j + 1
276 273 if stuff[0] == '!':
277 274 stuff = '^' + stuff[1:]
278 275 elif stuff[0] == '^':
279 276 stuff = '\\' + stuff
280 277 res = '%s[%s]' % (res, stuff)
281 278 elif c == '{':
282 279 group += 1
283 280 res += '(?:'
284 281 elif c == '}' and group:
285 282 res += ')'
286 283 group -= 1
287 284 elif c == ',' and group:
288 285 res += '|'
289 286 elif c == '\\':
290 287 p = peek()
291 288 if p:
292 289 i += 1
293 290 res += escape(p)
294 291 else:
295 292 res += escape(c)
296 293 else:
297 294 res += escape(c)
298 295 return res
299 296
300 297 def _regex(kind, pat, globsuffix):
301 298 '''Convert a (normalized) pattern of any kind into a regular expression.
302 299 globsuffix is appended to the regexp of globs.'''
303 300 if not pat:
304 301 return ''
305 302 if kind == 're':
306 303 return pat
307 304 if kind == 'path':
308 305 return '^' + re.escape(pat) + '(?:/|$)'
309 306 if kind == 'relglob':
310 307 return '(?:|.*/)' + _globre(pat) + globsuffix
311 308 if kind == 'relpath':
312 309 return re.escape(pat) + '(?:/|$)'
313 310 if kind == 'relre':
314 311 if pat.startswith('^'):
315 312 return pat
316 313 return '.*' + pat
317 314 return _globre(pat) + globsuffix
318 315
319 316 def _buildmatch(ctx, kindpats, globsuffix):
320 317 '''Return regexp string and a matcher function for kindpats.
321 318 globsuffix is appended to the regexp of globs.'''
322 319 fset, kindpats = _expandsets(kindpats, ctx)
323 320 if not kindpats:
324 321 return "", fset.__contains__
325 322
326 323 regex, mf = _buildregexmatch(kindpats, globsuffix)
327 324 if fset:
328 325 return regex, lambda f: f in fset or mf(f)
329 326 return regex, mf
330 327
331 328 def _buildregexmatch(kindpats, globsuffix):
332 329 """Build a match function from a list of kinds and kindpats,
333 330 return regexp string and a matcher function."""
334 331 try:
335 332 regex = '(?:%s)' % '|'.join([_regex(k, p, globsuffix)
336 333 for (k, p) in kindpats])
337 334 if len(regex) > 20000:
338 335 raise OverflowError
339 336 return regex, _rematcher(regex)
340 337 except OverflowError:
341 338 # We're using a Python with a tiny regex engine and we
342 339 # made it explode, so we'll divide the pattern list in two
343 340 # until it works
344 341 l = len(kindpats)
345 342 if l < 2:
346 343 raise
347 344 regexa, a = _buildregexmatch(kindpats[:l//2], globsuffix)
348 345 regexb, b = _buildregexmatch(kindpats[l//2:], globsuffix)
349 346 return pat, lambda s: a(s) or b(s)
350 347 except re.error:
351 348 for k, p in kindpats:
352 349 try:
353 350 _rematcher('(?:%s)' % _regex(k, p, globsuffix))
354 351 except re.error:
355 352 raise util.Abort(_("invalid pattern (%s): %s") % (k, p))
356 353 raise util.Abort(_("invalid pattern"))
357 354
358 355 def _normalize(patterns, default, root, cwd, auditor):
359 356 '''Convert 'kind:pat' from the patterns list to tuples with kind and
360 357 normalized and rooted patterns and with listfiles expanded.'''
361 358 kindpats = []
362 359 for kind, pat in [_patsplit(p, default) for p in patterns]:
363 360 if kind in ('glob', 'relpath'):
364 361 pat = pathutil.canonpath(root, cwd, pat, auditor)
365 362 elif kind in ('relglob', 'path'):
366 363 pat = util.normpath(pat)
367 364 elif kind in ('listfile', 'listfile0'):
368 365 try:
369 366 files = util.readfile(pat)
370 367 if kind == 'listfile0':
371 368 files = files.split('\0')
372 369 else:
373 370 files = files.splitlines()
374 371 files = [f for f in files if f]
375 372 except EnvironmentError:
376 373 raise util.Abort(_("unable to read file list (%s)") % pat)
377 374 kindpats += _normalize(files, default, root, cwd, auditor)
378 375 continue
379 376 # else: re or relre - which cannot be normalized
380 377 kindpats.append((kind, pat))
381 378 return kindpats
382 379
383 380 def _roots(kindpats):
384 381 '''return roots and exact explicitly listed files from patterns
385 382
386 383 >>> _roots([('glob', 'g/*'), ('glob', 'g'), ('glob', 'g*')])
387 384 ['g', 'g', '.']
388 385 >>> _roots([('relpath', 'r'), ('path', 'p/p'), ('path', '')])
389 386 ['r', 'p/p', '.']
390 387 >>> _roots([('relglob', 'rg*'), ('re', 're/'), ('relre', 'rr')])
391 388 ['.', '.', '.']
392 389 '''
393 390 r = []
394 391 for kind, pat in kindpats:
395 392 if kind == 'glob': # find the non-glob prefix
396 393 root = []
397 394 for p in pat.split('/'):
398 395 if '[' in p or '{' in p or '*' in p or '?' in p:
399 396 break
400 397 root.append(p)
401 398 r.append('/'.join(root) or '.')
402 399 elif kind in ('relpath', 'path'):
403 400 r.append(pat or '.')
404 401 else: # relglob, re, relre
405 402 r.append('.')
406 403 return r
407 404
408 405 def _anypats(kindpats):
409 406 for kind, pat in kindpats:
410 407 if kind in ('glob', 're', 'relglob', 'relre', 'set'):
411 408 return True
General Comments 0
You need to be logged in to leave comments. Login now