##// END OF EJS Templates
match: introduce basic fileset support
Matt Mackall -
r14675:cfc89398 default
parent child Browse files
Show More
@@ -1,311 +1,338
1 1 # match.py - filename matching
2 2 #
3 3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 import re
9 import scmutil, util
9 import scmutil, util, fileset
10 10 from i18n import _
11 11
12 def _expandsets(pats, ctx):
13 '''convert set: patterns into a list of files in the given context'''
14 fset = set()
15 other = []
16
17 for kind, expr in pats:
18 if kind == 'set':
19 if not ctx:
20 raise util.Abort("fileset expression with no context")
21 s = fileset.getfileset(ctx, expr)
22 fset.update(s)
23 continue
24 other.append((kind, expr))
25 return fset, other
26
12 27 class match(object):
13 28 def __init__(self, root, cwd, patterns, include=[], exclude=[],
14 29 default='glob', exact=False, auditor=None, ctx=None):
15 30 """build an object to match a set of file patterns
16 31
17 32 arguments:
18 33 root - the canonical root of the tree you're matching against
19 34 cwd - the current working directory, if relevant
20 35 patterns - patterns to find
21 36 include - patterns to include
22 37 exclude - patterns to exclude
23 38 default - if a pattern in names has no explicit type, assume this one
24 39 exact - patterns are actually literals
25 40
26 41 a pattern is one of:
27 42 'glob:<glob>' - a glob relative to cwd
28 43 're:<regexp>' - a regular expression
29 44 'path:<path>' - a path relative to canonroot
30 45 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
31 46 'relpath:<path>' - a path relative to cwd
32 47 'relre:<regexp>' - a regexp that needn't match the start of a name
48 'set:<fileset>' - a fileset expression
33 49 '<something>' - a pattern of the specified default type
34 50 """
35 51
52 self._ctx = None
36 53 self._root = root
37 54 self._cwd = cwd
38 55 self._files = []
39 56 self._anypats = bool(include or exclude)
40 57 self._ctx = ctx
41 58
42 59 if include:
43 60 pats = _normalize(include, 'glob', root, cwd, auditor)
44 self.includepat, im = _buildmatch(pats, '(?:/|$)')
61 self.includepat, im = _buildmatch(ctx, pats, '(?:/|$)')
45 62 if exclude:
46 63 pats = _normalize(exclude, 'glob', root, cwd, auditor)
47 self.excludepat, em = _buildmatch(pats, '(?:/|$)')
64 self.excludepat, em = _buildmatch(ctx, pats, '(?:/|$)')
48 65 if exact:
49 66 self._files = patterns
50 67 pm = self.exact
51 68 elif patterns:
52 69 pats = _normalize(patterns, default, root, cwd, auditor)
53 70 self._files = _roots(pats)
54 71 self._anypats = self._anypats or _anypats(pats)
55 self.patternspat, pm = _buildmatch(pats, '$')
72 self.patternspat, pm = _buildmatch(ctx, pats, '$')
56 73
57 74 if patterns or exact:
58 75 if include:
59 76 if exclude:
60 77 m = lambda f: im(f) and not em(f) and pm(f)
61 78 else:
62 79 m = lambda f: im(f) and pm(f)
63 80 else:
64 81 if exclude:
65 82 m = lambda f: not em(f) and pm(f)
66 83 else:
67 84 m = pm
68 85 else:
69 86 if include:
70 87 if exclude:
71 88 m = lambda f: im(f) and not em(f)
72 89 else:
73 90 m = im
74 91 else:
75 92 if exclude:
76 93 m = lambda f: not em(f)
77 94 else:
78 95 m = lambda f: True
79 96
80 97 self.matchfn = m
81 98 self._fmap = set(self._files)
82 99
83 100 def __call__(self, fn):
84 101 return self.matchfn(fn)
85 102 def __iter__(self):
86 103 for f in self._files:
87 104 yield f
88 105 def bad(self, f, msg):
89 106 '''callback for each explicit file that can't be
90 107 found/accessed, with an error message
91 108 '''
92 109 pass
93 110 def dir(self, f):
94 111 pass
95 112 def missing(self, f):
96 113 pass
97 114 def exact(self, f):
98 115 return f in self._fmap
99 116 def rel(self, f):
100 117 return util.pathto(self._root, self._cwd, f)
101 118 def files(self):
102 119 return self._files
103 120 def anypats(self):
104 121 return self._anypats
105 122
106 123 class exact(match):
107 124 def __init__(self, root, cwd, files):
108 125 match.__init__(self, root, cwd, files, exact = True)
109 126
110 127 class always(match):
111 128 def __init__(self, root, cwd):
112 129 match.__init__(self, root, cwd, [])
113 130
114 131 class narrowmatcher(match):
115 132 """Adapt a matcher to work on a subdirectory only.
116 133
117 134 The paths are remapped to remove/insert the path as needed:
118 135
119 136 >>> m1 = match('root', '', ['a.txt', 'sub/b.txt'])
120 137 >>> m2 = narrowmatcher('sub', m1)
121 138 >>> bool(m2('a.txt'))
122 139 False
123 140 >>> bool(m2('b.txt'))
124 141 True
125 142 >>> bool(m2.matchfn('a.txt'))
126 143 False
127 144 >>> bool(m2.matchfn('b.txt'))
128 145 True
129 146 >>> m2.files()
130 147 ['b.txt']
131 148 >>> m2.exact('b.txt')
132 149 True
133 150 >>> m2.rel('b.txt')
134 151 'b.txt'
135 152 >>> def bad(f, msg):
136 153 ... print "%s: %s" % (f, msg)
137 154 >>> m1.bad = bad
138 155 >>> m2.bad('x.txt', 'No such file')
139 156 sub/x.txt: No such file
140 157 """
141 158
142 159 def __init__(self, path, matcher):
143 160 self._root = matcher._root
144 161 self._cwd = matcher._cwd
145 162 self._path = path
146 163 self._matcher = matcher
147 164
148 165 self._files = [f[len(path) + 1:] for f in matcher._files
149 166 if f.startswith(path + "/")]
150 167 self._anypats = matcher._anypats
151 168 self.matchfn = lambda fn: matcher.matchfn(self._path + "/" + fn)
152 169 self._fmap = set(self._files)
153 170
154 171 def bad(self, f, msg):
155 172 self._matcher.bad(self._path + "/" + f, msg)
156 173
157 174 def patkind(pat):
158 175 return _patsplit(pat, None)[0]
159 176
160 177 def _patsplit(pat, default):
161 178 """Split a string into an optional pattern kind prefix and the
162 179 actual pattern."""
163 180 if ':' in pat:
164 181 kind, val = pat.split(':', 1)
165 182 if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
166 'listfile', 'listfile0'):
183 'listfile', 'listfile0', 'set'):
167 184 return kind, val
168 185 return default, pat
169 186
170 187 def _globre(pat):
171 188 "convert a glob pattern into a regexp"
172 189 i, n = 0, len(pat)
173 190 res = ''
174 191 group = 0
175 192 escape = re.escape
176 193 def peek():
177 194 return i < n and pat[i]
178 195 while i < n:
179 196 c = pat[i]
180 197 i += 1
181 198 if c not in '*?[{},\\':
182 199 res += escape(c)
183 200 elif c == '*':
184 201 if peek() == '*':
185 202 i += 1
186 203 res += '.*'
187 204 else:
188 205 res += '[^/]*'
189 206 elif c == '?':
190 207 res += '.'
191 208 elif c == '[':
192 209 j = i
193 210 if j < n and pat[j] in '!]':
194 211 j += 1
195 212 while j < n and pat[j] != ']':
196 213 j += 1
197 214 if j >= n:
198 215 res += '\\['
199 216 else:
200 217 stuff = pat[i:j].replace('\\','\\\\')
201 218 i = j + 1
202 219 if stuff[0] == '!':
203 220 stuff = '^' + stuff[1:]
204 221 elif stuff[0] == '^':
205 222 stuff = '\\' + stuff
206 223 res = '%s[%s]' % (res, stuff)
207 224 elif c == '{':
208 225 group += 1
209 226 res += '(?:'
210 227 elif c == '}' and group:
211 228 res += ')'
212 229 group -= 1
213 230 elif c == ',' and group:
214 231 res += '|'
215 232 elif c == '\\':
216 233 p = peek()
217 234 if p:
218 235 i += 1
219 236 res += escape(p)
220 237 else:
221 238 res += escape(c)
222 239 else:
223 240 res += escape(c)
224 241 return res
225 242
226 243 def _regex(kind, name, tail):
227 244 '''convert a pattern into a regular expression'''
228 245 if not name:
229 246 return ''
230 247 if kind == 're':
231 248 return name
232 249 elif kind == 'path':
233 250 return '^' + re.escape(name) + '(?:/|$)'
234 251 elif kind == 'relglob':
235 252 return '(?:|.*/)' + _globre(name) + tail
236 253 elif kind == 'relpath':
237 254 return re.escape(name) + '(?:/|$)'
238 255 elif kind == 'relre':
239 256 if name.startswith('^'):
240 257 return name
241 258 return '.*' + name
242 259 return _globre(name) + tail
243 260
244 def _buildmatch(pats, tail):
261 def _buildmatch(ctx, pats, tail):
262 fset, pats = _expandsets(pats, ctx)
263 if not pats:
264 return "", fset.__contains__
265
266 pat, mf = _buildregexmatch(pats, tail)
267 if fset:
268 return pat, lambda f: f in fset or mf(f)
269 return pat, mf
270
271 def _buildregexmatch(pats, tail):
245 272 """build a matching function from a set of patterns"""
246 273 try:
247 274 pat = '(?:%s)' % '|'.join([_regex(k, p, tail) for (k, p) in pats])
248 275 if len(pat) > 20000:
249 276 raise OverflowError()
250 277 return pat, re.compile(pat).match
251 278 except OverflowError:
252 279 # We're using a Python with a tiny regex engine and we
253 280 # made it explode, so we'll divide the pattern list in two
254 281 # until it works
255 282 l = len(pats)
256 283 if l < 2:
257 284 raise
258 285 pata, a = _buildmatch(pats[:l//2], tail)
259 286 patb, b = _buildmatch(pats[l//2:], tail)
260 287 return pat, lambda s: a(s) or b(s)
261 288 except re.error:
262 289 for k, p in pats:
263 290 try:
264 291 re.compile('(?:%s)' % _regex(k, p, tail))
265 292 except re.error:
266 293 raise util.Abort(_("invalid pattern (%s): %s") % (k, p))
267 294 raise util.Abort(_("invalid pattern"))
268 295
269 296 def _normalize(names, default, root, cwd, auditor):
270 297 pats = []
271 298 for kind, name in [_patsplit(p, default) for p in names]:
272 299 if kind in ('glob', 'relpath'):
273 300 name = scmutil.canonpath(root, cwd, name, auditor)
274 301 elif kind in ('relglob', 'path'):
275 302 name = util.normpath(name)
276 303 elif kind in ('listfile', 'listfile0'):
277 304 try:
278 305 files = util.readfile(name)
279 306 if kind == 'listfile0':
280 307 files = files.split('\0')
281 308 else:
282 309 files = files.splitlines()
283 310 files = [f for f in files if f]
284 311 except EnvironmentError:
285 312 raise util.Abort(_("unable to read file list (%s)") % name)
286 313 pats += _normalize(files, default, root, cwd, auditor)
287 314 continue
288 315
289 316 pats.append((kind, name))
290 317 return pats
291 318
292 319 def _roots(patterns):
293 320 r = []
294 321 for kind, name in patterns:
295 322 if kind == 'glob': # find the non-glob prefix
296 323 root = []
297 324 for p in name.split('/'):
298 325 if '[' in p or '{' in p or '*' in p or '?' in p:
299 326 break
300 327 root.append(p)
301 328 r.append('/'.join(root) or '.')
302 329 elif kind in ('relpath', 'path'):
303 330 r.append(name or '.')
304 331 elif kind == 'relglob':
305 332 r.append('.')
306 333 return r
307 334
308 335 def _anypats(patterns):
309 336 for kind, name in patterns:
310 337 if kind in ('glob', 're', 'relglob', 'relre'):
311 338 return True
General Comments 0
You need to be logged in to leave comments. Login now