##// END OF EJS Templates
match: make it more clear what _roots do and that it ends up in match()._files
Mads Kiilerich -
r21079:b02ab648 default
parent child Browse files
Show More
@@ -1,358 +1,367 b''
1 1 # match.py - filename matching
2 2 #
3 3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 import re
9 9 import util, pathutil
10 10 from i18n import _
11 11
12 12 def _rematcher(pat):
13 13 m = util.compilere(pat)
14 14 try:
15 15 # slightly faster, provided by facebook's re2 bindings
16 16 return m.test_match
17 17 except AttributeError:
18 18 return m.match
19 19
20 20 def _expandsets(pats, ctx):
21 21 '''convert set: patterns into a list of files in the given context'''
22 22 fset = set()
23 23 other = []
24 24
25 25 for kind, expr in pats:
26 26 if kind == 'set':
27 27 if not ctx:
28 28 raise util.Abort("fileset expression with no context")
29 29 s = ctx.getfileset(expr)
30 30 fset.update(s)
31 31 continue
32 32 other.append((kind, expr))
33 33 return fset, other
34 34
35 35 class match(object):
36 36 def __init__(self, root, cwd, patterns, include=[], exclude=[],
37 37 default='glob', exact=False, auditor=None, ctx=None):
38 38 """build an object to match a set of file patterns
39 39
40 40 arguments:
41 41 root - the canonical root of the tree you're matching against
42 42 cwd - the current working directory, if relevant
43 43 patterns - patterns to find
44 44 include - patterns to include
45 45 exclude - patterns to exclude
46 46 default - if a pattern in names has no explicit type, assume this one
47 47 exact - patterns are actually literals
48 48
49 49 a pattern is one of:
50 50 'glob:<glob>' - a glob relative to cwd
51 51 're:<regexp>' - a regular expression
52 52 'path:<path>' - a path relative to repository root
53 53 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
54 54 'relpath:<path>' - a path relative to cwd
55 55 'relre:<regexp>' - a regexp that needn't match the start of a name
56 56 'set:<fileset>' - a fileset expression
57 57 '<something>' - a pattern of the specified default type
58 58 """
59 59
60 60 self._root = root
61 61 self._cwd = cwd
62 self._files = []
62 self._files = [] # exact files and roots of patterns
63 63 self._anypats = bool(include or exclude)
64 64 self._ctx = ctx
65 65 self._always = False
66 66
67 67 if include:
68 68 pats = _normalize(include, 'glob', root, cwd, auditor)
69 69 self.includepat, im = _buildmatch(ctx, pats, '(?:/|$)')
70 70 if exclude:
71 71 pats = _normalize(exclude, 'glob', root, cwd, auditor)
72 72 self.excludepat, em = _buildmatch(ctx, pats, '(?:/|$)')
73 73 if exact:
74 74 if isinstance(patterns, list):
75 75 self._files = patterns
76 76 else:
77 77 self._files = list(patterns)
78 78 pm = self.exact
79 79 elif patterns:
80 80 pats = _normalize(patterns, default, root, cwd, auditor)
81 81 self._files = _roots(pats)
82 82 self._anypats = self._anypats or _anypats(pats)
83 83 self.patternspat, pm = _buildmatch(ctx, pats, '$')
84 84
85 85 if patterns or exact:
86 86 if include:
87 87 if exclude:
88 88 m = lambda f: im(f) and not em(f) and pm(f)
89 89 else:
90 90 m = lambda f: im(f) and pm(f)
91 91 else:
92 92 if exclude:
93 93 m = lambda f: not em(f) and pm(f)
94 94 else:
95 95 m = pm
96 96 else:
97 97 if include:
98 98 if exclude:
99 99 m = lambda f: im(f) and not em(f)
100 100 else:
101 101 m = im
102 102 else:
103 103 if exclude:
104 104 m = lambda f: not em(f)
105 105 else:
106 106 m = lambda f: True
107 107 self._always = True
108 108
109 109 self.matchfn = m
110 110 self._fmap = set(self._files)
111 111
112 112 def __call__(self, fn):
113 113 return self.matchfn(fn)
114 114 def __iter__(self):
115 115 for f in self._files:
116 116 yield f
117 117 def bad(self, f, msg):
118 118 '''callback for each explicit file that can't be
119 119 found/accessed, with an error message
120 120 '''
121 121 pass
122 122 # If this is set, it will be called when an explicitly listed directory is
123 123 # visited.
124 124 explicitdir = None
125 125 # If this is set, it will be called when a directory discovered by recursive
126 126 # traversal is visited.
127 127 traversedir = None
128 128 def missing(self, f):
129 129 pass
130 130 def exact(self, f):
131 131 return f in self._fmap
132 132 def rel(self, f):
133 133 return util.pathto(self._root, self._cwd, f)
134 134 def files(self):
135 135 return self._files
136 136 def anypats(self):
137 137 return self._anypats
138 138 def always(self):
139 139 return self._always
140 140
141 141 class exact(match):
142 142 def __init__(self, root, cwd, files):
143 143 match.__init__(self, root, cwd, files, exact=True)
144 144
145 145 class always(match):
146 146 def __init__(self, root, cwd):
147 147 match.__init__(self, root, cwd, [])
148 148 self._always = True
149 149
150 150 class narrowmatcher(match):
151 151 """Adapt a matcher to work on a subdirectory only.
152 152
153 153 The paths are remapped to remove/insert the path as needed:
154 154
155 155 >>> m1 = match('root', '', ['a.txt', 'sub/b.txt'])
156 156 >>> m2 = narrowmatcher('sub', m1)
157 157 >>> bool(m2('a.txt'))
158 158 False
159 159 >>> bool(m2('b.txt'))
160 160 True
161 161 >>> bool(m2.matchfn('a.txt'))
162 162 False
163 163 >>> bool(m2.matchfn('b.txt'))
164 164 True
165 165 >>> m2.files()
166 166 ['b.txt']
167 167 >>> m2.exact('b.txt')
168 168 True
169 169 >>> m2.rel('b.txt')
170 170 'b.txt'
171 171 >>> def bad(f, msg):
172 172 ... print "%s: %s" % (f, msg)
173 173 >>> m1.bad = bad
174 174 >>> m2.bad('x.txt', 'No such file')
175 175 sub/x.txt: No such file
176 176 """
177 177
178 178 def __init__(self, path, matcher):
179 179 self._root = matcher._root
180 180 self._cwd = matcher._cwd
181 181 self._path = path
182 182 self._matcher = matcher
183 183 self._always = matcher._always
184 184
185 185 self._files = [f[len(path) + 1:] for f in matcher._files
186 186 if f.startswith(path + "/")]
187 187 self._anypats = matcher._anypats
188 188 self.matchfn = lambda fn: matcher.matchfn(self._path + "/" + fn)
189 189 self._fmap = set(self._files)
190 190
191 191 def bad(self, f, msg):
192 192 self._matcher.bad(self._path + "/" + f, msg)
193 193
194 194 def patkind(pat):
195 195 return _patsplit(pat, None)[0]
196 196
197 197 def _patsplit(pat, default):
198 198 """Split a string into an optional pattern kind prefix and the
199 199 actual pattern."""
200 200 if ':' in pat:
201 201 kind, val = pat.split(':', 1)
202 202 if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
203 203 'listfile', 'listfile0', 'set'):
204 204 return kind, val
205 205 return default, pat
206 206
207 207 def _globre(pat):
208 208 "convert a glob pattern into a regexp"
209 209 i, n = 0, len(pat)
210 210 res = ''
211 211 group = 0
212 212 escape = re.escape
213 213 def peek():
214 214 return i < n and pat[i]
215 215 while i < n:
216 216 c = pat[i]
217 217 i += 1
218 218 if c not in '*?[{},\\':
219 219 res += escape(c)
220 220 elif c == '*':
221 221 if peek() == '*':
222 222 i += 1
223 223 res += '.*'
224 224 else:
225 225 res += '[^/]*'
226 226 elif c == '?':
227 227 res += '.'
228 228 elif c == '[':
229 229 j = i
230 230 if j < n and pat[j] in '!]':
231 231 j += 1
232 232 while j < n and pat[j] != ']':
233 233 j += 1
234 234 if j >= n:
235 235 res += '\\['
236 236 else:
237 237 stuff = pat[i:j].replace('\\','\\\\')
238 238 i = j + 1
239 239 if stuff[0] == '!':
240 240 stuff = '^' + stuff[1:]
241 241 elif stuff[0] == '^':
242 242 stuff = '\\' + stuff
243 243 res = '%s[%s]' % (res, stuff)
244 244 elif c == '{':
245 245 group += 1
246 246 res += '(?:'
247 247 elif c == '}' and group:
248 248 res += ')'
249 249 group -= 1
250 250 elif c == ',' and group:
251 251 res += '|'
252 252 elif c == '\\':
253 253 p = peek()
254 254 if p:
255 255 i += 1
256 256 res += escape(p)
257 257 else:
258 258 res += escape(c)
259 259 else:
260 260 res += escape(c)
261 261 return res
262 262
263 263 def _regex(kind, name, tail):
264 264 '''convert a pattern into a regular expression'''
265 265 if not name:
266 266 return ''
267 267 if kind == 're':
268 268 return name
269 269 elif kind == 'path':
270 270 return '^' + re.escape(name) + '(?:/|$)'
271 271 elif kind == 'relglob':
272 272 return '(?:|.*/)' + _globre(name) + tail
273 273 elif kind == 'relpath':
274 274 return re.escape(name) + '(?:/|$)'
275 275 elif kind == 'relre':
276 276 if name.startswith('^'):
277 277 return name
278 278 return '.*' + name
279 279 return _globre(name) + tail
280 280
281 281 def _buildmatch(ctx, pats, tail):
282 282 fset, pats = _expandsets(pats, ctx)
283 283 if not pats:
284 284 return "", fset.__contains__
285 285
286 286 pat, mf = _buildregexmatch(pats, tail)
287 287 if fset:
288 288 return pat, lambda f: f in fset or mf(f)
289 289 return pat, mf
290 290
291 291 def _buildregexmatch(pats, tail):
292 292 """build a matching function from a set of patterns"""
293 293 try:
294 294 pat = '(?:%s)' % '|'.join([_regex(k, p, tail) for (k, p) in pats])
295 295 if len(pat) > 20000:
296 296 raise OverflowError
297 297 return pat, _rematcher(pat)
298 298 except OverflowError:
299 299 # We're using a Python with a tiny regex engine and we
300 300 # made it explode, so we'll divide the pattern list in two
301 301 # until it works
302 302 l = len(pats)
303 303 if l < 2:
304 304 raise
305 305 pata, a = _buildregexmatch(pats[:l//2], tail)
306 306 patb, b = _buildregexmatch(pats[l//2:], tail)
307 307 return pat, lambda s: a(s) or b(s)
308 308 except re.error:
309 309 for k, p in pats:
310 310 try:
311 311 _rematcher('(?:%s)' % _regex(k, p, tail))
312 312 except re.error:
313 313 raise util.Abort(_("invalid pattern (%s): %s") % (k, p))
314 314 raise util.Abort(_("invalid pattern"))
315 315
316 316 def _normalize(names, default, root, cwd, auditor):
317 317 pats = []
318 318 for kind, name in [_patsplit(p, default) for p in names]:
319 319 if kind in ('glob', 'relpath'):
320 320 name = pathutil.canonpath(root, cwd, name, auditor)
321 321 elif kind in ('relglob', 'path'):
322 322 name = util.normpath(name)
323 323 elif kind in ('listfile', 'listfile0'):
324 324 try:
325 325 files = util.readfile(name)
326 326 if kind == 'listfile0':
327 327 files = files.split('\0')
328 328 else:
329 329 files = files.splitlines()
330 330 files = [f for f in files if f]
331 331 except EnvironmentError:
332 332 raise util.Abort(_("unable to read file list (%s)") % name)
333 333 pats += _normalize(files, default, root, cwd, auditor)
334 334 continue
335 335
336 336 pats.append((kind, name))
337 337 return pats
338 338
339 339 def _roots(patterns):
340 '''return roots and exact explicitly listed files from patterns
341
342 >>> _roots([('glob', 'g/*'), ('glob', 'g'), ('glob', 'g*')])
343 ['g', 'g', '.']
344 >>> _roots([('relpath', 'r'), ('path', 'p/p'), ('path', '')])
345 ['r', 'p/p', '.']
346 >>> _roots([('relglob', 'rg*'), ('re', 're/'), ('relre', 'rr')])
347 ['.', '.', '.']
348 '''
340 349 r = []
341 350 for kind, name in patterns:
342 351 if kind == 'glob': # find the non-glob prefix
343 352 root = []
344 353 for p in name.split('/'):
345 354 if '[' in p or '{' in p or '*' in p or '?' in p:
346 355 break
347 356 root.append(p)
348 357 r.append('/'.join(root) or '.')
349 358 elif kind in ('relpath', 'path'):
350 359 r.append(name or '.')
351 360 else: # relglob, re, relre
352 361 r.append('.')
353 362 return r
354 363
355 364 def _anypats(patterns):
356 365 for kind, name in patterns:
357 366 if kind in ('glob', 're', 'relglob', 'relre', 'set'):
358 367 return True
General Comments 0
You need to be logged in to leave comments. Login now