##// END OF EJS Templates
match: add the abs() method...
Matt Harbison -
r23685:5b1eac34 default
parent child Browse files
Show More
@@ -1,413 +1,423 b''
1 1 # match.py - filename matching
2 2 #
3 3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 import re
9 9 import util, pathutil
10 10 from i18n import _
11 11
12 12 def _rematcher(regex):
13 13 '''compile the regexp with the best available regexp engine and return a
14 14 matcher function'''
15 15 m = util.re.compile(regex)
16 16 try:
17 17 # slightly faster, provided by facebook's re2 bindings
18 18 return m.test_match
19 19 except AttributeError:
20 20 return m.match
21 21
22 22 def _expandsets(kindpats, ctx):
23 23 '''Returns the kindpats list with the 'set' patterns expanded.'''
24 24 fset = set()
25 25 other = []
26 26
27 27 for kind, pat in kindpats:
28 28 if kind == 'set':
29 29 if not ctx:
30 30 raise util.Abort("fileset expression with no context")
31 31 s = ctx.getfileset(pat)
32 32 fset.update(s)
33 33 continue
34 34 other.append((kind, pat))
35 35 return fset, other
36 36
37 37 class match(object):
38 38 def __init__(self, root, cwd, patterns, include=[], exclude=[],
39 39 default='glob', exact=False, auditor=None, ctx=None):
40 40 """build an object to match a set of file patterns
41 41
42 42 arguments:
43 43 root - the canonical root of the tree you're matching against
44 44 cwd - the current working directory, if relevant
45 45 patterns - patterns to find
46 46 include - patterns to include (unless they are excluded)
47 47 exclude - patterns to exclude (even if they are included)
48 48 default - if a pattern in patterns has no explicit type, assume this one
49 49 exact - patterns are actually filenames (include/exclude still apply)
50 50
51 51 a pattern is one of:
52 52 'glob:<glob>' - a glob relative to cwd
53 53 're:<regexp>' - a regular expression
54 54 'path:<path>' - a path relative to repository root
55 55 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
56 56 'relpath:<path>' - a path relative to cwd
57 57 'relre:<regexp>' - a regexp that needn't match the start of a name
58 58 'set:<fileset>' - a fileset expression
59 59 '<something>' - a pattern of the specified default type
60 60 """
61 61
62 62 self._root = root
63 63 self._cwd = cwd
64 64 self._files = [] # exact files and roots of patterns
65 65 self._anypats = bool(include or exclude)
66 66 self._ctx = ctx
67 67 self._always = False
68 68 self._pathrestricted = bool(include or exclude or patterns)
69 69
70 70 matchfns = []
71 71 if include:
72 72 kindpats = _normalize(include, 'glob', root, cwd, auditor)
73 73 self.includepat, im = _buildmatch(ctx, kindpats, '(?:/|$)')
74 74 matchfns.append(im)
75 75 if exclude:
76 76 kindpats = _normalize(exclude, 'glob', root, cwd, auditor)
77 77 self.excludepat, em = _buildmatch(ctx, kindpats, '(?:/|$)')
78 78 matchfns.append(lambda f: not em(f))
79 79 if exact:
80 80 if isinstance(patterns, list):
81 81 self._files = patterns
82 82 else:
83 83 self._files = list(patterns)
84 84 matchfns.append(self.exact)
85 85 elif patterns:
86 86 kindpats = _normalize(patterns, default, root, cwd, auditor)
87 87 self._files = _roots(kindpats)
88 88 self._anypats = self._anypats or _anypats(kindpats)
89 89 self.patternspat, pm = _buildmatch(ctx, kindpats, '$')
90 90 matchfns.append(pm)
91 91
92 92 if not matchfns:
93 93 m = util.always
94 94 self._always = True
95 95 elif len(matchfns) == 1:
96 96 m = matchfns[0]
97 97 else:
98 98 def m(f):
99 99 for matchfn in matchfns:
100 100 if not matchfn(f):
101 101 return False
102 102 return True
103 103
104 104 self.matchfn = m
105 105 self._fmap = set(self._files)
106 106
107 107 def __call__(self, fn):
108 108 return self.matchfn(fn)
109 109 def __iter__(self):
110 110 for f in self._files:
111 111 yield f
112 112
113 113 # Callbacks related to how the matcher is used by dirstate.walk.
114 114 # Subscribers to these events must monkeypatch the matcher object.
115 115 def bad(self, f, msg):
116 116 '''Callback from dirstate.walk for each explicit file that can't be
117 117 found/accessed, with an error message.'''
118 118 pass
119 119
120 120 # If an explicitdir is set, it will be called when an explicitly listed
121 121 # directory is visited.
122 122 explicitdir = None
123 123
124 124 # If an traversedir is set, it will be called when a directory discovered
125 125 # by recursive traversal is visited.
126 126 traversedir = None
127 127
128 def abs(self, f):
129 '''Convert a repo path back to path that is relative to the root of the
130 matcher.'''
131 return f
132
128 133 def rel(self, f):
129 134 '''Convert repo path back to path that is relative to cwd of matcher.'''
130 135 return util.pathto(self._root, self._cwd, f)
131 136
132 137 def uipath(self, f):
133 138 '''Convert repo path to a display path. If patterns or -I/-X were used
134 139 to create this matcher, the display path will be relative to cwd.
135 140 Otherwise it is relative to the root of the repo.'''
136 141 return (self._pathrestricted and self.rel(f)) or f
137 142
138 143 def files(self):
139 144 '''Explicitly listed files or patterns or roots:
140 145 if no patterns or .always(): empty list,
141 146 if exact: list exact files,
142 147 if not .anypats(): list all files and dirs,
143 148 else: optimal roots'''
144 149 return self._files
145 150
146 151 def exact(self, f):
147 152 '''Returns True if f is in .files().'''
148 153 return f in self._fmap
149 154
150 155 def anypats(self):
151 156 '''Matcher uses patterns or include/exclude.'''
152 157 return self._anypats
153 158
154 159 def always(self):
155 160 '''Matcher will match everything and .files() will be empty
156 161 - optimization might be possible and necessary.'''
157 162 return self._always
158 163
159 164 def exact(root, cwd, files):
160 165 return match(root, cwd, files, exact=True)
161 166
162 167 def always(root, cwd):
163 168 return match(root, cwd, [])
164 169
165 170 class narrowmatcher(match):
166 171 """Adapt a matcher to work on a subdirectory only.
167 172
168 173 The paths are remapped to remove/insert the path as needed:
169 174
170 175 >>> m1 = match('root', '', ['a.txt', 'sub/b.txt'])
171 176 >>> m2 = narrowmatcher('sub', m1)
172 177 >>> bool(m2('a.txt'))
173 178 False
174 179 >>> bool(m2('b.txt'))
175 180 True
176 181 >>> bool(m2.matchfn('a.txt'))
177 182 False
178 183 >>> bool(m2.matchfn('b.txt'))
179 184 True
180 185 >>> m2.files()
181 186 ['b.txt']
182 187 >>> m2.exact('b.txt')
183 188 True
184 189 >>> m2.rel('b.txt')
185 190 'b.txt'
186 191 >>> def bad(f, msg):
187 192 ... print "%s: %s" % (f, msg)
188 193 >>> m1.bad = bad
189 194 >>> m2.bad('x.txt', 'No such file')
190 195 sub/x.txt: No such file
196 >>> m2.abs('c.txt')
197 'sub/c.txt'
191 198 """
192 199
193 200 def __init__(self, path, matcher):
194 201 self._root = matcher._root
195 202 self._cwd = matcher._cwd
196 203 self._path = path
197 204 self._matcher = matcher
198 205 self._always = matcher._always
199 206 self._pathrestricted = matcher._pathrestricted
200 207
201 208 self._files = [f[len(path) + 1:] for f in matcher._files
202 209 if f.startswith(path + "/")]
203 210 self._anypats = matcher._anypats
204 211 self.matchfn = lambda fn: matcher.matchfn(self._path + "/" + fn)
205 212 self._fmap = set(self._files)
206 213
214 def abs(self, f):
215 return self._matcher.abs(self._path + "/" + f)
216
207 217 def bad(self, f, msg):
208 218 self._matcher.bad(self._path + "/" + f, msg)
209 219
210 220 def patkind(pattern, default=None):
211 221 '''If pattern is 'kind:pat' with a known kind, return kind.'''
212 222 return _patsplit(pattern, default)[0]
213 223
214 224 def _patsplit(pattern, default):
215 225 """Split a string into the optional pattern kind prefix and the actual
216 226 pattern."""
217 227 if ':' in pattern:
218 228 kind, pat = pattern.split(':', 1)
219 229 if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
220 230 'listfile', 'listfile0', 'set'):
221 231 return kind, pat
222 232 return default, pattern
223 233
224 234 def _globre(pat):
225 235 r'''Convert an extended glob string to a regexp string.
226 236
227 237 >>> print _globre(r'?')
228 238 .
229 239 >>> print _globre(r'*')
230 240 [^/]*
231 241 >>> print _globre(r'**')
232 242 .*
233 243 >>> print _globre(r'**/a')
234 244 (?:.*/)?a
235 245 >>> print _globre(r'a/**/b')
236 246 a\/(?:.*/)?b
237 247 >>> print _globre(r'[a*?!^][^b][!c]')
238 248 [a*?!^][\^b][^c]
239 249 >>> print _globre(r'{a,b}')
240 250 (?:a|b)
241 251 >>> print _globre(r'.\*\?')
242 252 \.\*\?
243 253 '''
244 254 i, n = 0, len(pat)
245 255 res = ''
246 256 group = 0
247 257 escape = util.re.escape
248 258 def peek():
249 259 return i < n and pat[i]
250 260 while i < n:
251 261 c = pat[i]
252 262 i += 1
253 263 if c not in '*?[{},\\':
254 264 res += escape(c)
255 265 elif c == '*':
256 266 if peek() == '*':
257 267 i += 1
258 268 if peek() == '/':
259 269 i += 1
260 270 res += '(?:.*/)?'
261 271 else:
262 272 res += '.*'
263 273 else:
264 274 res += '[^/]*'
265 275 elif c == '?':
266 276 res += '.'
267 277 elif c == '[':
268 278 j = i
269 279 if j < n and pat[j] in '!]':
270 280 j += 1
271 281 while j < n and pat[j] != ']':
272 282 j += 1
273 283 if j >= n:
274 284 res += '\\['
275 285 else:
276 286 stuff = pat[i:j].replace('\\','\\\\')
277 287 i = j + 1
278 288 if stuff[0] == '!':
279 289 stuff = '^' + stuff[1:]
280 290 elif stuff[0] == '^':
281 291 stuff = '\\' + stuff
282 292 res = '%s[%s]' % (res, stuff)
283 293 elif c == '{':
284 294 group += 1
285 295 res += '(?:'
286 296 elif c == '}' and group:
287 297 res += ')'
288 298 group -= 1
289 299 elif c == ',' and group:
290 300 res += '|'
291 301 elif c == '\\':
292 302 p = peek()
293 303 if p:
294 304 i += 1
295 305 res += escape(p)
296 306 else:
297 307 res += escape(c)
298 308 else:
299 309 res += escape(c)
300 310 return res
301 311
302 312 def _regex(kind, pat, globsuffix):
303 313 '''Convert a (normalized) pattern of any kind into a regular expression.
304 314 globsuffix is appended to the regexp of globs.'''
305 315 if not pat:
306 316 return ''
307 317 if kind == 're':
308 318 return pat
309 319 if kind == 'path':
310 320 return '^' + util.re.escape(pat) + '(?:/|$)'
311 321 if kind == 'relglob':
312 322 return '(?:|.*/)' + _globre(pat) + globsuffix
313 323 if kind == 'relpath':
314 324 return util.re.escape(pat) + '(?:/|$)'
315 325 if kind == 'relre':
316 326 if pat.startswith('^'):
317 327 return pat
318 328 return '.*' + pat
319 329 return _globre(pat) + globsuffix
320 330
321 331 def _buildmatch(ctx, kindpats, globsuffix):
322 332 '''Return regexp string and a matcher function for kindpats.
323 333 globsuffix is appended to the regexp of globs.'''
324 334 fset, kindpats = _expandsets(kindpats, ctx)
325 335 if not kindpats:
326 336 return "", fset.__contains__
327 337
328 338 regex, mf = _buildregexmatch(kindpats, globsuffix)
329 339 if fset:
330 340 return regex, lambda f: f in fset or mf(f)
331 341 return regex, mf
332 342
333 343 def _buildregexmatch(kindpats, globsuffix):
334 344 """Build a match function from a list of kinds and kindpats,
335 345 return regexp string and a matcher function."""
336 346 try:
337 347 regex = '(?:%s)' % '|'.join([_regex(k, p, globsuffix)
338 348 for (k, p) in kindpats])
339 349 if len(regex) > 20000:
340 350 raise OverflowError
341 351 return regex, _rematcher(regex)
342 352 except OverflowError:
343 353 # We're using a Python with a tiny regex engine and we
344 354 # made it explode, so we'll divide the pattern list in two
345 355 # until it works
346 356 l = len(kindpats)
347 357 if l < 2:
348 358 raise
349 359 regexa, a = _buildregexmatch(kindpats[:l//2], globsuffix)
350 360 regexb, b = _buildregexmatch(kindpats[l//2:], globsuffix)
351 361 return regex, lambda s: a(s) or b(s)
352 362 except re.error:
353 363 for k, p in kindpats:
354 364 try:
355 365 _rematcher('(?:%s)' % _regex(k, p, globsuffix))
356 366 except re.error:
357 367 raise util.Abort(_("invalid pattern (%s): %s") % (k, p))
358 368 raise util.Abort(_("invalid pattern"))
359 369
360 370 def _normalize(patterns, default, root, cwd, auditor):
361 371 '''Convert 'kind:pat' from the patterns list to tuples with kind and
362 372 normalized and rooted patterns and with listfiles expanded.'''
363 373 kindpats = []
364 374 for kind, pat in [_patsplit(p, default) for p in patterns]:
365 375 if kind in ('glob', 'relpath'):
366 376 pat = pathutil.canonpath(root, cwd, pat, auditor)
367 377 elif kind in ('relglob', 'path'):
368 378 pat = util.normpath(pat)
369 379 elif kind in ('listfile', 'listfile0'):
370 380 try:
371 381 files = util.readfile(pat)
372 382 if kind == 'listfile0':
373 383 files = files.split('\0')
374 384 else:
375 385 files = files.splitlines()
376 386 files = [f for f in files if f]
377 387 except EnvironmentError:
378 388 raise util.Abort(_("unable to read file list (%s)") % pat)
379 389 kindpats += _normalize(files, default, root, cwd, auditor)
380 390 continue
381 391 # else: re or relre - which cannot be normalized
382 392 kindpats.append((kind, pat))
383 393 return kindpats
384 394
385 395 def _roots(kindpats):
386 396 '''return roots and exact explicitly listed files from patterns
387 397
388 398 >>> _roots([('glob', 'g/*'), ('glob', 'g'), ('glob', 'g*')])
389 399 ['g', 'g', '.']
390 400 >>> _roots([('relpath', 'r'), ('path', 'p/p'), ('path', '')])
391 401 ['r', 'p/p', '.']
392 402 >>> _roots([('relglob', 'rg*'), ('re', 're/'), ('relre', 'rr')])
393 403 ['.', '.', '.']
394 404 '''
395 405 r = []
396 406 for kind, pat in kindpats:
397 407 if kind == 'glob': # find the non-glob prefix
398 408 root = []
399 409 for p in pat.split('/'):
400 410 if '[' in p or '{' in p or '*' in p or '?' in p:
401 411 break
402 412 root.append(p)
403 413 r.append('/'.join(root) or '.')
404 414 elif kind in ('relpath', 'path'):
405 415 r.append(pat or '.')
406 416 else: # relglob, re, relre
407 417 r.append('.')
408 418 return r
409 419
410 420 def _anypats(kindpats):
411 421 for kind, pat in kindpats:
412 422 if kind in ('glob', 're', 'relglob', 'relre', 'set'):
413 423 return True
General Comments 0
You need to be logged in to leave comments. Login now