##// END OF EJS Templates
match: make glob '**/' match the empty string...
Siddharth Agarwal -
r21815:a4b67bf1 stable
parent child Browse files
Show More
@@ -1,408 +1,416 b''
1 1 # match.py - filename matching
2 2 #
3 3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 import re
9 9 import util, pathutil
10 10 from i18n import _
11 11
12 12 def _rematcher(regex):
13 13 '''compile the regexp with the best available regexp engine and return a
14 14 matcher function'''
15 15 m = util.compilere(regex)
16 16 try:
17 17 # slightly faster, provided by facebook's re2 bindings
18 18 return m.test_match
19 19 except AttributeError:
20 20 return m.match
21 21
22 22 def _expandsets(kindpats, ctx):
23 23 '''Returns the kindpats list with the 'set' patterns expanded.'''
24 24 fset = set()
25 25 other = []
26 26
27 27 for kind, pat in kindpats:
28 28 if kind == 'set':
29 29 if not ctx:
30 30 raise util.Abort("fileset expression with no context")
31 31 s = ctx.getfileset(pat)
32 32 fset.update(s)
33 33 continue
34 34 other.append((kind, pat))
35 35 return fset, other
36 36
37 37 class match(object):
38 38 def __init__(self, root, cwd, patterns, include=[], exclude=[],
39 39 default='glob', exact=False, auditor=None, ctx=None):
40 40 """build an object to match a set of file patterns
41 41
42 42 arguments:
43 43 root - the canonical root of the tree you're matching against
44 44 cwd - the current working directory, if relevant
45 45 patterns - patterns to find
46 46 include - patterns to include (unless they are excluded)
47 47 exclude - patterns to exclude (even if they are included)
48 48 default - if a pattern in patterns has no explicit type, assume this one
49 49 exact - patterns are actually filenames (include/exclude still apply)
50 50
51 51 a pattern is one of:
52 52 'glob:<glob>' - a glob relative to cwd
53 53 're:<regexp>' - a regular expression
54 54 'path:<path>' - a path relative to repository root
55 55 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
56 56 'relpath:<path>' - a path relative to cwd
57 57 'relre:<regexp>' - a regexp that needn't match the start of a name
58 58 'set:<fileset>' - a fileset expression
59 59 '<something>' - a pattern of the specified default type
60 60 """
61 61
62 62 self._root = root
63 63 self._cwd = cwd
64 64 self._files = [] # exact files and roots of patterns
65 65 self._anypats = bool(include or exclude)
66 66 self._ctx = ctx
67 67 self._always = False
68 68
69 69 if include:
70 70 kindpats = _normalize(include, 'glob', root, cwd, auditor)
71 71 self.includepat, im = _buildmatch(ctx, kindpats, '(?:/|$)')
72 72 if exclude:
73 73 kindpats = _normalize(exclude, 'glob', root, cwd, auditor)
74 74 self.excludepat, em = _buildmatch(ctx, kindpats, '(?:/|$)')
75 75 if exact:
76 76 if isinstance(patterns, list):
77 77 self._files = patterns
78 78 else:
79 79 self._files = list(patterns)
80 80 pm = self.exact
81 81 elif patterns:
82 82 kindpats = _normalize(patterns, default, root, cwd, auditor)
83 83 self._files = _roots(kindpats)
84 84 self._anypats = self._anypats or _anypats(kindpats)
85 85 self.patternspat, pm = _buildmatch(ctx, kindpats, '$')
86 86
87 87 if patterns or exact:
88 88 if include:
89 89 if exclude:
90 90 m = lambda f: im(f) and not em(f) and pm(f)
91 91 else:
92 92 m = lambda f: im(f) and pm(f)
93 93 else:
94 94 if exclude:
95 95 m = lambda f: not em(f) and pm(f)
96 96 else:
97 97 m = pm
98 98 else:
99 99 if include:
100 100 if exclude:
101 101 m = lambda f: im(f) and not em(f)
102 102 else:
103 103 m = im
104 104 else:
105 105 if exclude:
106 106 m = lambda f: not em(f)
107 107 else:
108 108 m = lambda f: True
109 109 self._always = True
110 110
111 111 self.matchfn = m
112 112 self._fmap = set(self._files)
113 113
114 114 def __call__(self, fn):
115 115 return self.matchfn(fn)
116 116 def __iter__(self):
117 117 for f in self._files:
118 118 yield f
119 119
120 120 # Callbacks related to how the matcher is used by dirstate.walk.
121 121 # Subscribers to these events must monkeypatch the matcher object.
122 122 def bad(self, f, msg):
123 123 '''Callback from dirstate.walk for each explicit file that can't be
124 124 found/accessed, with an error message.'''
125 125 pass
126 126
127 127 # If an explicitdir is set, it will be called when an explicitly listed
128 128 # directory is visited.
129 129 explicitdir = None
130 130
131 131 # If an traversedir is set, it will be called when a directory discovered
132 132 # by recursive traversal is visited.
133 133 traversedir = None
134 134
135 135 def rel(self, f):
136 136 '''Convert repo path back to path that is relative to cwd of matcher.'''
137 137 return util.pathto(self._root, self._cwd, f)
138 138
139 139 def files(self):
140 140 '''Explicitly listed files or patterns or roots:
141 141 if no patterns or .always(): empty list,
142 142 if exact: list exact files,
143 143 if not .anypats(): list all files and dirs,
144 144 else: optimal roots'''
145 145 return self._files
146 146
147 147 def exact(self, f):
148 148 '''Returns True if f is in .files().'''
149 149 return f in self._fmap
150 150
151 151 def anypats(self):
152 152 '''Matcher uses patterns or include/exclude.'''
153 153 return self._anypats
154 154
155 155 def always(self):
156 156 '''Matcher will match everything and .files() will be empty
157 157 - optimization might be possible and necessary.'''
158 158 return self._always
159 159
160 160 class exact(match):
161 161 def __init__(self, root, cwd, files):
162 162 match.__init__(self, root, cwd, files, exact=True)
163 163
164 164 class always(match):
165 165 def __init__(self, root, cwd):
166 166 match.__init__(self, root, cwd, [])
167 167 self._always = True
168 168
169 169 class narrowmatcher(match):
170 170 """Adapt a matcher to work on a subdirectory only.
171 171
172 172 The paths are remapped to remove/insert the path as needed:
173 173
174 174 >>> m1 = match('root', '', ['a.txt', 'sub/b.txt'])
175 175 >>> m2 = narrowmatcher('sub', m1)
176 176 >>> bool(m2('a.txt'))
177 177 False
178 178 >>> bool(m2('b.txt'))
179 179 True
180 180 >>> bool(m2.matchfn('a.txt'))
181 181 False
182 182 >>> bool(m2.matchfn('b.txt'))
183 183 True
184 184 >>> m2.files()
185 185 ['b.txt']
186 186 >>> m2.exact('b.txt')
187 187 True
188 188 >>> m2.rel('b.txt')
189 189 'b.txt'
190 190 >>> def bad(f, msg):
191 191 ... print "%s: %s" % (f, msg)
192 192 >>> m1.bad = bad
193 193 >>> m2.bad('x.txt', 'No such file')
194 194 sub/x.txt: No such file
195 195 """
196 196
197 197 def __init__(self, path, matcher):
198 198 self._root = matcher._root
199 199 self._cwd = matcher._cwd
200 200 self._path = path
201 201 self._matcher = matcher
202 202 self._always = matcher._always
203 203
204 204 self._files = [f[len(path) + 1:] for f in matcher._files
205 205 if f.startswith(path + "/")]
206 206 self._anypats = matcher._anypats
207 207 self.matchfn = lambda fn: matcher.matchfn(self._path + "/" + fn)
208 208 self._fmap = set(self._files)
209 209
210 210 def bad(self, f, msg):
211 211 self._matcher.bad(self._path + "/" + f, msg)
212 212
213 213 def patkind(pattern, default=None):
214 214 '''If pattern is 'kind:pat' with a known kind, return kind.'''
215 215 return _patsplit(pattern, default)[0]
216 216
217 217 def _patsplit(pattern, default):
218 218 """Split a string into the optional pattern kind prefix and the actual
219 219 pattern."""
220 220 if ':' in pattern:
221 221 kind, pat = pattern.split(':', 1)
222 222 if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
223 223 'listfile', 'listfile0', 'set'):
224 224 return kind, pat
225 225 return default, pattern
226 226
227 227 def _globre(pat):
228 228 r'''Convert an extended glob string to a regexp string.
229 229
230 230 >>> print _globre(r'?')
231 231 .
232 232 >>> print _globre(r'*')
233 233 [^/]*
234 234 >>> print _globre(r'**')
235 235 .*
236 >>> print _globre(r'**/a')
237 (?:.*/)?a
238 >>> print _globre(r'a/**/b')
239 a\/(?:.*/)?b
236 240 >>> print _globre(r'[a*?!^][^b][!c]')
237 241 [a*?!^][\^b][^c]
238 242 >>> print _globre(r'{a,b}')
239 243 (?:a|b)
240 244 >>> print _globre(r'.\*\?')
241 245 \.\*\?
242 246 '''
243 247 i, n = 0, len(pat)
244 248 res = ''
245 249 group = 0
246 250 escape = re.escape
247 251 def peek():
248 252 return i < n and pat[i]
249 253 while i < n:
250 254 c = pat[i]
251 255 i += 1
252 256 if c not in '*?[{},\\':
253 257 res += escape(c)
254 258 elif c == '*':
255 259 if peek() == '*':
256 260 i += 1
257 res += '.*'
261 if peek() == '/':
262 i += 1
263 res += '(?:.*/)?'
264 else:
265 res += '.*'
258 266 else:
259 267 res += '[^/]*'
260 268 elif c == '?':
261 269 res += '.'
262 270 elif c == '[':
263 271 j = i
264 272 if j < n and pat[j] in '!]':
265 273 j += 1
266 274 while j < n and pat[j] != ']':
267 275 j += 1
268 276 if j >= n:
269 277 res += '\\['
270 278 else:
271 279 stuff = pat[i:j].replace('\\','\\\\')
272 280 i = j + 1
273 281 if stuff[0] == '!':
274 282 stuff = '^' + stuff[1:]
275 283 elif stuff[0] == '^':
276 284 stuff = '\\' + stuff
277 285 res = '%s[%s]' % (res, stuff)
278 286 elif c == '{':
279 287 group += 1
280 288 res += '(?:'
281 289 elif c == '}' and group:
282 290 res += ')'
283 291 group -= 1
284 292 elif c == ',' and group:
285 293 res += '|'
286 294 elif c == '\\':
287 295 p = peek()
288 296 if p:
289 297 i += 1
290 298 res += escape(p)
291 299 else:
292 300 res += escape(c)
293 301 else:
294 302 res += escape(c)
295 303 return res
296 304
297 305 def _regex(kind, pat, globsuffix):
298 306 '''Convert a (normalized) pattern of any kind into a regular expression.
299 307 globsuffix is appended to the regexp of globs.'''
300 308 if not pat:
301 309 return ''
302 310 if kind == 're':
303 311 return pat
304 312 if kind == 'path':
305 313 return '^' + re.escape(pat) + '(?:/|$)'
306 314 if kind == 'relglob':
307 315 return '(?:|.*/)' + _globre(pat) + globsuffix
308 316 if kind == 'relpath':
309 317 return re.escape(pat) + '(?:/|$)'
310 318 if kind == 'relre':
311 319 if pat.startswith('^'):
312 320 return pat
313 321 return '.*' + pat
314 322 return _globre(pat) + globsuffix
315 323
316 324 def _buildmatch(ctx, kindpats, globsuffix):
317 325 '''Return regexp string and a matcher function for kindpats.
318 326 globsuffix is appended to the regexp of globs.'''
319 327 fset, kindpats = _expandsets(kindpats, ctx)
320 328 if not kindpats:
321 329 return "", fset.__contains__
322 330
323 331 regex, mf = _buildregexmatch(kindpats, globsuffix)
324 332 if fset:
325 333 return regex, lambda f: f in fset or mf(f)
326 334 return regex, mf
327 335
328 336 def _buildregexmatch(kindpats, globsuffix):
329 337 """Build a match function from a list of kinds and kindpats,
330 338 return regexp string and a matcher function."""
331 339 try:
332 340 regex = '(?:%s)' % '|'.join([_regex(k, p, globsuffix)
333 341 for (k, p) in kindpats])
334 342 if len(regex) > 20000:
335 343 raise OverflowError
336 344 return regex, _rematcher(regex)
337 345 except OverflowError:
338 346 # We're using a Python with a tiny regex engine and we
339 347 # made it explode, so we'll divide the pattern list in two
340 348 # until it works
341 349 l = len(kindpats)
342 350 if l < 2:
343 351 raise
344 352 regexa, a = _buildregexmatch(kindpats[:l//2], globsuffix)
345 353 regexb, b = _buildregexmatch(kindpats[l//2:], globsuffix)
346 354 return regex, lambda s: a(s) or b(s)
347 355 except re.error:
348 356 for k, p in kindpats:
349 357 try:
350 358 _rematcher('(?:%s)' % _regex(k, p, globsuffix))
351 359 except re.error:
352 360 raise util.Abort(_("invalid pattern (%s): %s") % (k, p))
353 361 raise util.Abort(_("invalid pattern"))
354 362
355 363 def _normalize(patterns, default, root, cwd, auditor):
356 364 '''Convert 'kind:pat' from the patterns list to tuples with kind and
357 365 normalized and rooted patterns and with listfiles expanded.'''
358 366 kindpats = []
359 367 for kind, pat in [_patsplit(p, default) for p in patterns]:
360 368 if kind in ('glob', 'relpath'):
361 369 pat = pathutil.canonpath(root, cwd, pat, auditor)
362 370 elif kind in ('relglob', 'path'):
363 371 pat = util.normpath(pat)
364 372 elif kind in ('listfile', 'listfile0'):
365 373 try:
366 374 files = util.readfile(pat)
367 375 if kind == 'listfile0':
368 376 files = files.split('\0')
369 377 else:
370 378 files = files.splitlines()
371 379 files = [f for f in files if f]
372 380 except EnvironmentError:
373 381 raise util.Abort(_("unable to read file list (%s)") % pat)
374 382 kindpats += _normalize(files, default, root, cwd, auditor)
375 383 continue
376 384 # else: re or relre - which cannot be normalized
377 385 kindpats.append((kind, pat))
378 386 return kindpats
379 387
380 388 def _roots(kindpats):
381 389 '''return roots and exact explicitly listed files from patterns
382 390
383 391 >>> _roots([('glob', 'g/*'), ('glob', 'g'), ('glob', 'g*')])
384 392 ['g', 'g', '.']
385 393 >>> _roots([('relpath', 'r'), ('path', 'p/p'), ('path', '')])
386 394 ['r', 'p/p', '.']
387 395 >>> _roots([('relglob', 'rg*'), ('re', 're/'), ('relre', 'rr')])
388 396 ['.', '.', '.']
389 397 '''
390 398 r = []
391 399 for kind, pat in kindpats:
392 400 if kind == 'glob': # find the non-glob prefix
393 401 root = []
394 402 for p in pat.split('/'):
395 403 if '[' in p or '{' in p or '*' in p or '?' in p:
396 404 break
397 405 root.append(p)
398 406 r.append('/'.join(root) or '.')
399 407 elif kind in ('relpath', 'path'):
400 408 r.append(pat or '.')
401 409 else: # relglob, re, relre
402 410 r.append('.')
403 411 return r
404 412
405 413 def _anypats(kindpats):
406 414 for kind, pat in kindpats:
407 415 if kind in ('glob', 're', 'relglob', 'relre', 'set'):
408 416 return True
@@ -1,136 +1,150 b''
1 1 $ hg init
2 2
3 3 Issue562: .hgignore requires newline at end:
4 4
5 5 $ touch foo
6 6 $ touch bar
7 7 $ touch baz
8 8 $ cat > makeignore.py <<EOF
9 9 > f = open(".hgignore", "w")
10 10 > f.write("ignore\n")
11 11 > f.write("foo\n")
12 12 > # No EOL here
13 13 > f.write("bar")
14 14 > f.close()
15 15 > EOF
16 16
17 17 $ python makeignore.py
18 18
19 19 Should display baz only:
20 20
21 21 $ hg status
22 22 ? baz
23 23
24 24 $ rm foo bar baz .hgignore makeignore.py
25 25
26 26 $ touch a.o
27 27 $ touch a.c
28 28 $ touch syntax
29 29 $ mkdir dir
30 30 $ touch dir/a.o
31 31 $ touch dir/b.o
32 32 $ touch dir/c.o
33 33
34 34 $ hg add dir/a.o
35 35 $ hg commit -m 0
36 36 $ hg add dir/b.o
37 37
38 38 $ hg status
39 39 A dir/b.o
40 40 ? a.c
41 41 ? a.o
42 42 ? dir/c.o
43 43 ? syntax
44 44
45 45 $ echo "*.o" > .hgignore
46 46 $ hg status
47 47 abort: $TESTTMP/.hgignore: invalid pattern (relre): *.o (glob)
48 48 [255]
49 49
50 50 $ echo ".*\.o" > .hgignore
51 51 $ hg status
52 52 A dir/b.o
53 53 ? .hgignore
54 54 ? a.c
55 55 ? syntax
56 56
57 57 Check it does not ignore the current directory '.':
58 58
59 59 $ echo "^\." > .hgignore
60 60 $ hg status
61 61 A dir/b.o
62 62 ? a.c
63 63 ? a.o
64 64 ? dir/c.o
65 65 ? syntax
66 66
67 67 $ echo "glob:**.o" > .hgignore
68 68 $ hg status
69 69 A dir/b.o
70 70 ? .hgignore
71 71 ? a.c
72 72 ? syntax
73 73
74 74 $ echo "glob:*.o" > .hgignore
75 75 $ hg status
76 76 A dir/b.o
77 77 ? .hgignore
78 78 ? a.c
79 79 ? syntax
80 80
81 81 $ echo "syntax: glob" > .hgignore
82 82 $ echo "re:.*\.o" >> .hgignore
83 83 $ hg status
84 84 A dir/b.o
85 85 ? .hgignore
86 86 ? a.c
87 87 ? syntax
88 88
89 89 $ echo "syntax: invalid" > .hgignore
90 90 $ hg status
91 91 $TESTTMP/.hgignore: ignoring invalid syntax 'invalid' (glob)
92 92 A dir/b.o
93 93 ? .hgignore
94 94 ? a.c
95 95 ? a.o
96 96 ? dir/c.o
97 97 ? syntax
98 98
99 99 $ echo "syntax: glob" > .hgignore
100 100 $ echo "*.o" >> .hgignore
101 101 $ hg status
102 102 A dir/b.o
103 103 ? .hgignore
104 104 ? a.c
105 105 ? syntax
106 106
107 107 $ echo "relglob:syntax*" > .hgignore
108 108 $ hg status
109 109 A dir/b.o
110 110 ? .hgignore
111 111 ? a.c
112 112 ? a.o
113 113 ? dir/c.o
114 114
115 115 $ echo "relglob:*" > .hgignore
116 116 $ hg status
117 117 A dir/b.o
118 118
119 119 $ cd dir
120 120 $ hg status .
121 121 A b.o
122 122
123 123 $ hg debugignore
124 124 (?:(?:|.*/)[^/]*(?:/|$))
125 125
126 126 $ cd ..
127 127
128 128 Check patterns that match only the directory
129 129
130 130 $ echo "^dir\$" > .hgignore
131 131 $ hg status
132 132 A dir/b.o
133 133 ? .hgignore
134 134 ? a.c
135 135 ? a.o
136 136 ? syntax
137
138 Check recursive glob pattern matches no directories (dir/**/c.o matches dir/c.o)
139
140 $ echo "syntax: glob" > .hgignore
141 $ echo "dir/**/c.o" >> .hgignore
142 $ touch dir/c.o
143 $ mkdir dir/subdir
144 $ touch dir/subdir/c.o
145 $ hg status
146 A dir/b.o
147 ? .hgignore
148 ? a.c
149 ? a.o
150 ? syntax
General Comments 0
You need to be logged in to leave comments. Login now