##// END OF EJS Templates
match: introduce uipath() to properly style a file path...
Matt Harbison -
r23480:88d2d77e default
parent child Browse files
Show More
@@ -1,407 +1,415
1 1 # match.py - filename matching
2 2 #
3 3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 import re
9 9 import util, pathutil
10 10 from i18n import _
11 11
12 12 def _rematcher(regex):
13 13 '''compile the regexp with the best available regexp engine and return a
14 14 matcher function'''
15 15 m = util.re.compile(regex)
16 16 try:
17 17 # slightly faster, provided by facebook's re2 bindings
18 18 return m.test_match
19 19 except AttributeError:
20 20 return m.match
21 21
22 22 def _expandsets(kindpats, ctx):
23 23 '''Returns the kindpats list with the 'set' patterns expanded.'''
24 24 fset = set()
25 25 other = []
26 26
27 27 for kind, pat in kindpats:
28 28 if kind == 'set':
29 29 if not ctx:
30 30 raise util.Abort("fileset expression with no context")
31 31 s = ctx.getfileset(pat)
32 32 fset.update(s)
33 33 continue
34 34 other.append((kind, pat))
35 35 return fset, other
36 36
37 37 class match(object):
38 38 def __init__(self, root, cwd, patterns, include=[], exclude=[],
39 39 default='glob', exact=False, auditor=None, ctx=None):
40 40 """build an object to match a set of file patterns
41 41
42 42 arguments:
43 43 root - the canonical root of the tree you're matching against
44 44 cwd - the current working directory, if relevant
45 45 patterns - patterns to find
46 46 include - patterns to include (unless they are excluded)
47 47 exclude - patterns to exclude (even if they are included)
48 48 default - if a pattern in patterns has no explicit type, assume this one
49 49 exact - patterns are actually filenames (include/exclude still apply)
50 50
51 51 a pattern is one of:
52 52 'glob:<glob>' - a glob relative to cwd
53 53 're:<regexp>' - a regular expression
54 54 'path:<path>' - a path relative to repository root
55 55 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
56 56 'relpath:<path>' - a path relative to cwd
57 57 'relre:<regexp>' - a regexp that needn't match the start of a name
58 58 'set:<fileset>' - a fileset expression
59 59 '<something>' - a pattern of the specified default type
60 60 """
61 61
62 62 self._root = root
63 63 self._cwd = cwd
64 64 self._files = [] # exact files and roots of patterns
65 65 self._anypats = bool(include or exclude)
66 66 self._ctx = ctx
67 67 self._always = False
68 self._pathrestricted = bool(include or exclude or patterns)
68 69
69 70 matchfns = []
70 71 if include:
71 72 kindpats = _normalize(include, 'glob', root, cwd, auditor)
72 73 self.includepat, im = _buildmatch(ctx, kindpats, '(?:/|$)')
73 74 matchfns.append(im)
74 75 if exclude:
75 76 kindpats = _normalize(exclude, 'glob', root, cwd, auditor)
76 77 self.excludepat, em = _buildmatch(ctx, kindpats, '(?:/|$)')
77 78 matchfns.append(lambda f: not em(f))
78 79 if exact:
79 80 if isinstance(patterns, list):
80 81 self._files = patterns
81 82 else:
82 83 self._files = list(patterns)
83 84 matchfns.append(self.exact)
84 85 elif patterns:
85 86 kindpats = _normalize(patterns, default, root, cwd, auditor)
86 87 self._files = _roots(kindpats)
87 88 self._anypats = self._anypats or _anypats(kindpats)
88 89 self.patternspat, pm = _buildmatch(ctx, kindpats, '$')
89 90 matchfns.append(pm)
90 91
91 92 if not matchfns:
92 93 m = util.always
93 94 self._always = True
94 95 elif len(matchfns) == 1:
95 96 m = matchfns[0]
96 97 else:
97 98 def m(f):
98 99 for matchfn in matchfns:
99 100 if not matchfn(f):
100 101 return False
101 102 return True
102 103
103 104 self.matchfn = m
104 105 self._fmap = set(self._files)
105 106
106 107 def __call__(self, fn):
107 108 return self.matchfn(fn)
108 109 def __iter__(self):
109 110 for f in self._files:
110 111 yield f
111 112
112 113 # Callbacks related to how the matcher is used by dirstate.walk.
113 114 # Subscribers to these events must monkeypatch the matcher object.
114 115 def bad(self, f, msg):
115 116 '''Callback from dirstate.walk for each explicit file that can't be
116 117 found/accessed, with an error message.'''
117 118 pass
118 119
119 120 # If an explicitdir is set, it will be called when an explicitly listed
120 121 # directory is visited.
121 122 explicitdir = None
122 123
123 124 # If an traversedir is set, it will be called when a directory discovered
124 125 # by recursive traversal is visited.
125 126 traversedir = None
126 127
127 128 def rel(self, f):
128 129 '''Convert repo path back to path that is relative to cwd of matcher.'''
129 130 return util.pathto(self._root, self._cwd, f)
130 131
132 def uipath(self, f):
133 '''Convert repo path to a display path. If patterns or -I/-X were used
134 to create this matcher, the display path will be relative to cwd.
135 Otherwise it is relative to the root of the repo.'''
136 return (self._pathrestricted and self.rel(f)) or f
137
131 138 def files(self):
132 139 '''Explicitly listed files or patterns or roots:
133 140 if no patterns or .always(): empty list,
134 141 if exact: list exact files,
135 142 if not .anypats(): list all files and dirs,
136 143 else: optimal roots'''
137 144 return self._files
138 145
139 146 def exact(self, f):
140 147 '''Returns True if f is in .files().'''
141 148 return f in self._fmap
142 149
143 150 def anypats(self):
144 151 '''Matcher uses patterns or include/exclude.'''
145 152 return self._anypats
146 153
147 154 def always(self):
148 155 '''Matcher will match everything and .files() will be empty
149 156 - optimization might be possible and necessary.'''
150 157 return self._always
151 158
152 159 class exact(match):
153 160 def __init__(self, root, cwd, files):
154 161 match.__init__(self, root, cwd, files, exact=True)
155 162
156 163 class always(match):
157 164 def __init__(self, root, cwd):
158 165 match.__init__(self, root, cwd, [])
159 166
160 167 class narrowmatcher(match):
161 168 """Adapt a matcher to work on a subdirectory only.
162 169
163 170 The paths are remapped to remove/insert the path as needed:
164 171
165 172 >>> m1 = match('root', '', ['a.txt', 'sub/b.txt'])
166 173 >>> m2 = narrowmatcher('sub', m1)
167 174 >>> bool(m2('a.txt'))
168 175 False
169 176 >>> bool(m2('b.txt'))
170 177 True
171 178 >>> bool(m2.matchfn('a.txt'))
172 179 False
173 180 >>> bool(m2.matchfn('b.txt'))
174 181 True
175 182 >>> m2.files()
176 183 ['b.txt']
177 184 >>> m2.exact('b.txt')
178 185 True
179 186 >>> m2.rel('b.txt')
180 187 'b.txt'
181 188 >>> def bad(f, msg):
182 189 ... print "%s: %s" % (f, msg)
183 190 >>> m1.bad = bad
184 191 >>> m2.bad('x.txt', 'No such file')
185 192 sub/x.txt: No such file
186 193 """
187 194
188 195 def __init__(self, path, matcher):
189 196 self._root = matcher._root
190 197 self._cwd = matcher._cwd
191 198 self._path = path
192 199 self._matcher = matcher
193 200 self._always = matcher._always
201 self._pathrestricted = matcher._pathrestricted
194 202
195 203 self._files = [f[len(path) + 1:] for f in matcher._files
196 204 if f.startswith(path + "/")]
197 205 self._anypats = matcher._anypats
198 206 self.matchfn = lambda fn: matcher.matchfn(self._path + "/" + fn)
199 207 self._fmap = set(self._files)
200 208
201 209 def bad(self, f, msg):
202 210 self._matcher.bad(self._path + "/" + f, msg)
203 211
204 212 def patkind(pattern, default=None):
205 213 '''If pattern is 'kind:pat' with a known kind, return kind.'''
206 214 return _patsplit(pattern, default)[0]
207 215
208 216 def _patsplit(pattern, default):
209 217 """Split a string into the optional pattern kind prefix and the actual
210 218 pattern."""
211 219 if ':' in pattern:
212 220 kind, pat = pattern.split(':', 1)
213 221 if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
214 222 'listfile', 'listfile0', 'set'):
215 223 return kind, pat
216 224 return default, pattern
217 225
218 226 def _globre(pat):
219 227 r'''Convert an extended glob string to a regexp string.
220 228
221 229 >>> print _globre(r'?')
222 230 .
223 231 >>> print _globre(r'*')
224 232 [^/]*
225 233 >>> print _globre(r'**')
226 234 .*
227 235 >>> print _globre(r'**/a')
228 236 (?:.*/)?a
229 237 >>> print _globre(r'a/**/b')
230 238 a\/(?:.*/)?b
231 239 >>> print _globre(r'[a*?!^][^b][!c]')
232 240 [a*?!^][\^b][^c]
233 241 >>> print _globre(r'{a,b}')
234 242 (?:a|b)
235 243 >>> print _globre(r'.\*\?')
236 244 \.\*\?
237 245 '''
238 246 i, n = 0, len(pat)
239 247 res = ''
240 248 group = 0
241 249 escape = util.re.escape
242 250 def peek():
243 251 return i < n and pat[i]
244 252 while i < n:
245 253 c = pat[i]
246 254 i += 1
247 255 if c not in '*?[{},\\':
248 256 res += escape(c)
249 257 elif c == '*':
250 258 if peek() == '*':
251 259 i += 1
252 260 if peek() == '/':
253 261 i += 1
254 262 res += '(?:.*/)?'
255 263 else:
256 264 res += '.*'
257 265 else:
258 266 res += '[^/]*'
259 267 elif c == '?':
260 268 res += '.'
261 269 elif c == '[':
262 270 j = i
263 271 if j < n and pat[j] in '!]':
264 272 j += 1
265 273 while j < n and pat[j] != ']':
266 274 j += 1
267 275 if j >= n:
268 276 res += '\\['
269 277 else:
270 278 stuff = pat[i:j].replace('\\','\\\\')
271 279 i = j + 1
272 280 if stuff[0] == '!':
273 281 stuff = '^' + stuff[1:]
274 282 elif stuff[0] == '^':
275 283 stuff = '\\' + stuff
276 284 res = '%s[%s]' % (res, stuff)
277 285 elif c == '{':
278 286 group += 1
279 287 res += '(?:'
280 288 elif c == '}' and group:
281 289 res += ')'
282 290 group -= 1
283 291 elif c == ',' and group:
284 292 res += '|'
285 293 elif c == '\\':
286 294 p = peek()
287 295 if p:
288 296 i += 1
289 297 res += escape(p)
290 298 else:
291 299 res += escape(c)
292 300 else:
293 301 res += escape(c)
294 302 return res
295 303
296 304 def _regex(kind, pat, globsuffix):
297 305 '''Convert a (normalized) pattern of any kind into a regular expression.
298 306 globsuffix is appended to the regexp of globs.'''
299 307 if not pat:
300 308 return ''
301 309 if kind == 're':
302 310 return pat
303 311 if kind == 'path':
304 312 return '^' + util.re.escape(pat) + '(?:/|$)'
305 313 if kind == 'relglob':
306 314 return '(?:|.*/)' + _globre(pat) + globsuffix
307 315 if kind == 'relpath':
308 316 return util.re.escape(pat) + '(?:/|$)'
309 317 if kind == 'relre':
310 318 if pat.startswith('^'):
311 319 return pat
312 320 return '.*' + pat
313 321 return _globre(pat) + globsuffix
314 322
315 323 def _buildmatch(ctx, kindpats, globsuffix):
316 324 '''Return regexp string and a matcher function for kindpats.
317 325 globsuffix is appended to the regexp of globs.'''
318 326 fset, kindpats = _expandsets(kindpats, ctx)
319 327 if not kindpats:
320 328 return "", fset.__contains__
321 329
322 330 regex, mf = _buildregexmatch(kindpats, globsuffix)
323 331 if fset:
324 332 return regex, lambda f: f in fset or mf(f)
325 333 return regex, mf
326 334
327 335 def _buildregexmatch(kindpats, globsuffix):
328 336 """Build a match function from a list of kinds and kindpats,
329 337 return regexp string and a matcher function."""
330 338 try:
331 339 regex = '(?:%s)' % '|'.join([_regex(k, p, globsuffix)
332 340 for (k, p) in kindpats])
333 341 if len(regex) > 20000:
334 342 raise OverflowError
335 343 return regex, _rematcher(regex)
336 344 except OverflowError:
337 345 # We're using a Python with a tiny regex engine and we
338 346 # made it explode, so we'll divide the pattern list in two
339 347 # until it works
340 348 l = len(kindpats)
341 349 if l < 2:
342 350 raise
343 351 regexa, a = _buildregexmatch(kindpats[:l//2], globsuffix)
344 352 regexb, b = _buildregexmatch(kindpats[l//2:], globsuffix)
345 353 return regex, lambda s: a(s) or b(s)
346 354 except re.error:
347 355 for k, p in kindpats:
348 356 try:
349 357 _rematcher('(?:%s)' % _regex(k, p, globsuffix))
350 358 except re.error:
351 359 raise util.Abort(_("invalid pattern (%s): %s") % (k, p))
352 360 raise util.Abort(_("invalid pattern"))
353 361
354 362 def _normalize(patterns, default, root, cwd, auditor):
355 363 '''Convert 'kind:pat' from the patterns list to tuples with kind and
356 364 normalized and rooted patterns and with listfiles expanded.'''
357 365 kindpats = []
358 366 for kind, pat in [_patsplit(p, default) for p in patterns]:
359 367 if kind in ('glob', 'relpath'):
360 368 pat = pathutil.canonpath(root, cwd, pat, auditor)
361 369 elif kind in ('relglob', 'path'):
362 370 pat = util.normpath(pat)
363 371 elif kind in ('listfile', 'listfile0'):
364 372 try:
365 373 files = util.readfile(pat)
366 374 if kind == 'listfile0':
367 375 files = files.split('\0')
368 376 else:
369 377 files = files.splitlines()
370 378 files = [f for f in files if f]
371 379 except EnvironmentError:
372 380 raise util.Abort(_("unable to read file list (%s)") % pat)
373 381 kindpats += _normalize(files, default, root, cwd, auditor)
374 382 continue
375 383 # else: re or relre - which cannot be normalized
376 384 kindpats.append((kind, pat))
377 385 return kindpats
378 386
379 387 def _roots(kindpats):
380 388 '''return roots and exact explicitly listed files from patterns
381 389
382 390 >>> _roots([('glob', 'g/*'), ('glob', 'g'), ('glob', 'g*')])
383 391 ['g', 'g', '.']
384 392 >>> _roots([('relpath', 'r'), ('path', 'p/p'), ('path', '')])
385 393 ['r', 'p/p', '.']
386 394 >>> _roots([('relglob', 'rg*'), ('re', 're/'), ('relre', 'rr')])
387 395 ['.', '.', '.']
388 396 '''
389 397 r = []
390 398 for kind, pat in kindpats:
391 399 if kind == 'glob': # find the non-glob prefix
392 400 root = []
393 401 for p in pat.split('/'):
394 402 if '[' in p or '{' in p or '*' in p or '?' in p:
395 403 break
396 404 root.append(p)
397 405 r.append('/'.join(root) or '.')
398 406 elif kind in ('relpath', 'path'):
399 407 r.append(pat or '.')
400 408 else: # relglob, re, relre
401 409 r.append('.')
402 410 return r
403 411
404 412 def _anypats(kindpats):
405 413 for kind, pat in kindpats:
406 414 if kind in ('glob', 're', 'relglob', 'relre', 'set'):
407 415 return True
General Comments 0
You need to be logged in to leave comments. Login now