##// END OF EJS Templates
match: introduce badmatch() to eliminate long callback chains with subrepos...
Matt Harbison -
r25433:419ac63f default
parent child Browse files
Show More
@@ -1,656 +1,664
1 # match.py - filename matching
1 # match.py - filename matching
2 #
2 #
3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 import re
8 import copy, re
9 import util, pathutil
9 import util, pathutil
10 from i18n import _
10 from i18n import _
11
11
12 propertycache = util.propertycache
12 propertycache = util.propertycache
13
13
14 def _rematcher(regex):
14 def _rematcher(regex):
15 '''compile the regexp with the best available regexp engine and return a
15 '''compile the regexp with the best available regexp engine and return a
16 matcher function'''
16 matcher function'''
17 m = util.re.compile(regex)
17 m = util.re.compile(regex)
18 try:
18 try:
19 # slightly faster, provided by facebook's re2 bindings
19 # slightly faster, provided by facebook's re2 bindings
20 return m.test_match
20 return m.test_match
21 except AttributeError:
21 except AttributeError:
22 return m.match
22 return m.match
23
23
24 def _expandsets(kindpats, ctx, listsubrepos):
24 def _expandsets(kindpats, ctx, listsubrepos):
25 '''Returns the kindpats list with the 'set' patterns expanded.'''
25 '''Returns the kindpats list with the 'set' patterns expanded.'''
26 fset = set()
26 fset = set()
27 other = []
27 other = []
28
28
29 for kind, pat, source in kindpats:
29 for kind, pat, source in kindpats:
30 if kind == 'set':
30 if kind == 'set':
31 if not ctx:
31 if not ctx:
32 raise util.Abort("fileset expression with no context")
32 raise util.Abort("fileset expression with no context")
33 s = ctx.getfileset(pat)
33 s = ctx.getfileset(pat)
34 fset.update(s)
34 fset.update(s)
35
35
36 if listsubrepos:
36 if listsubrepos:
37 for subpath in ctx.substate:
37 for subpath in ctx.substate:
38 s = ctx.sub(subpath).getfileset(pat)
38 s = ctx.sub(subpath).getfileset(pat)
39 fset.update(subpath + '/' + f for f in s)
39 fset.update(subpath + '/' + f for f in s)
40
40
41 continue
41 continue
42 other.append((kind, pat, source))
42 other.append((kind, pat, source))
43 return fset, other
43 return fset, other
44
44
45 def _expandsubinclude(kindpats, root):
45 def _expandsubinclude(kindpats, root):
46 '''Returns the list of subinclude matchers and the kindpats without the
46 '''Returns the list of subinclude matchers and the kindpats without the
47 subincludes in it.'''
47 subincludes in it.'''
48 relmatchers = []
48 relmatchers = []
49 other = []
49 other = []
50
50
51 for kind, pat, source in kindpats:
51 for kind, pat, source in kindpats:
52 if kind == 'subinclude':
52 if kind == 'subinclude':
53 sourceroot = pathutil.dirname(util.normpath(source))
53 sourceroot = pathutil.dirname(util.normpath(source))
54 pat = util.pconvert(pat)
54 pat = util.pconvert(pat)
55 path = pathutil.join(sourceroot, pat)
55 path = pathutil.join(sourceroot, pat)
56
56
57 newroot = pathutil.dirname(path)
57 newroot = pathutil.dirname(path)
58 relmatcher = match(newroot, '', [], ['include:%s' % path])
58 relmatcher = match(newroot, '', [], ['include:%s' % path])
59
59
60 prefix = pathutil.canonpath(root, root, newroot)
60 prefix = pathutil.canonpath(root, root, newroot)
61 if prefix:
61 if prefix:
62 prefix += '/'
62 prefix += '/'
63 relmatchers.append((prefix, relmatcher))
63 relmatchers.append((prefix, relmatcher))
64 else:
64 else:
65 other.append((kind, pat, source))
65 other.append((kind, pat, source))
66
66
67 return relmatchers, other
67 return relmatchers, other
68
68
69 def _kindpatsalwaysmatch(kindpats):
69 def _kindpatsalwaysmatch(kindpats):
70 """"Checks whether the kindspats match everything, as e.g.
70 """"Checks whether the kindspats match everything, as e.g.
71 'relpath:.' does.
71 'relpath:.' does.
72 """
72 """
73 for kind, pat, source in kindpats:
73 for kind, pat, source in kindpats:
74 if pat != '' or kind not in ['relpath', 'glob']:
74 if pat != '' or kind not in ['relpath', 'glob']:
75 return False
75 return False
76 return True
76 return True
77
77
78 class match(object):
78 class match(object):
79 def __init__(self, root, cwd, patterns, include=[], exclude=[],
79 def __init__(self, root, cwd, patterns, include=[], exclude=[],
80 default='glob', exact=False, auditor=None, ctx=None,
80 default='glob', exact=False, auditor=None, ctx=None,
81 listsubrepos=False, warn=None):
81 listsubrepos=False, warn=None):
82 """build an object to match a set of file patterns
82 """build an object to match a set of file patterns
83
83
84 arguments:
84 arguments:
85 root - the canonical root of the tree you're matching against
85 root - the canonical root of the tree you're matching against
86 cwd - the current working directory, if relevant
86 cwd - the current working directory, if relevant
87 patterns - patterns to find
87 patterns - patterns to find
88 include - patterns to include (unless they are excluded)
88 include - patterns to include (unless they are excluded)
89 exclude - patterns to exclude (even if they are included)
89 exclude - patterns to exclude (even if they are included)
90 default - if a pattern in patterns has no explicit type, assume this one
90 default - if a pattern in patterns has no explicit type, assume this one
91 exact - patterns are actually filenames (include/exclude still apply)
91 exact - patterns are actually filenames (include/exclude still apply)
92 warn - optional function used for printing warnings
92 warn - optional function used for printing warnings
93
93
94 a pattern is one of:
94 a pattern is one of:
95 'glob:<glob>' - a glob relative to cwd
95 'glob:<glob>' - a glob relative to cwd
96 're:<regexp>' - a regular expression
96 're:<regexp>' - a regular expression
97 'path:<path>' - a path relative to repository root
97 'path:<path>' - a path relative to repository root
98 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
98 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
99 'relpath:<path>' - a path relative to cwd
99 'relpath:<path>' - a path relative to cwd
100 'relre:<regexp>' - a regexp that needn't match the start of a name
100 'relre:<regexp>' - a regexp that needn't match the start of a name
101 'set:<fileset>' - a fileset expression
101 'set:<fileset>' - a fileset expression
102 'include:<path>' - a file of patterns to read and include
102 'include:<path>' - a file of patterns to read and include
103 'subinclude:<path>' - a file of patterns to match against files under
103 'subinclude:<path>' - a file of patterns to match against files under
104 the same directory
104 the same directory
105 '<something>' - a pattern of the specified default type
105 '<something>' - a pattern of the specified default type
106 """
106 """
107
107
108 self._root = root
108 self._root = root
109 self._cwd = cwd
109 self._cwd = cwd
110 self._files = [] # exact files and roots of patterns
110 self._files = [] # exact files and roots of patterns
111 self._anypats = bool(include or exclude)
111 self._anypats = bool(include or exclude)
112 self._always = False
112 self._always = False
113 self._pathrestricted = bool(include or exclude or patterns)
113 self._pathrestricted = bool(include or exclude or patterns)
114 self._warn = warn
114 self._warn = warn
115 self._includeroots = set()
115 self._includeroots = set()
116 self._includedirs = set(['.'])
116 self._includedirs = set(['.'])
117 self._excluderoots = set()
117 self._excluderoots = set()
118
118
119 matchfns = []
119 matchfns = []
120 if include:
120 if include:
121 kindpats = self._normalize(include, 'glob', root, cwd, auditor)
121 kindpats = self._normalize(include, 'glob', root, cwd, auditor)
122 self.includepat, im = _buildmatch(ctx, kindpats, '(?:/|$)',
122 self.includepat, im = _buildmatch(ctx, kindpats, '(?:/|$)',
123 listsubrepos, root)
123 listsubrepos, root)
124 self._includeroots.update(_roots(kindpats))
124 self._includeroots.update(_roots(kindpats))
125 self._includeroots.discard('.')
125 self._includeroots.discard('.')
126 self._includedirs.update(util.dirs(self._includeroots))
126 self._includedirs.update(util.dirs(self._includeroots))
127 matchfns.append(im)
127 matchfns.append(im)
128 if exclude:
128 if exclude:
129 kindpats = self._normalize(exclude, 'glob', root, cwd, auditor)
129 kindpats = self._normalize(exclude, 'glob', root, cwd, auditor)
130 self.excludepat, em = _buildmatch(ctx, kindpats, '(?:/|$)',
130 self.excludepat, em = _buildmatch(ctx, kindpats, '(?:/|$)',
131 listsubrepos, root)
131 listsubrepos, root)
132 if not _anypats(kindpats):
132 if not _anypats(kindpats):
133 self._excluderoots.update(_roots(kindpats))
133 self._excluderoots.update(_roots(kindpats))
134 self._excluderoots.discard('.')
134 self._excluderoots.discard('.')
135 matchfns.append(lambda f: not em(f))
135 matchfns.append(lambda f: not em(f))
136 if exact:
136 if exact:
137 if isinstance(patterns, list):
137 if isinstance(patterns, list):
138 self._files = patterns
138 self._files = patterns
139 else:
139 else:
140 self._files = list(patterns)
140 self._files = list(patterns)
141 matchfns.append(self.exact)
141 matchfns.append(self.exact)
142 elif patterns:
142 elif patterns:
143 kindpats = self._normalize(patterns, default, root, cwd, auditor)
143 kindpats = self._normalize(patterns, default, root, cwd, auditor)
144 if not _kindpatsalwaysmatch(kindpats):
144 if not _kindpatsalwaysmatch(kindpats):
145 self._files = _roots(kindpats)
145 self._files = _roots(kindpats)
146 self._anypats = self._anypats or _anypats(kindpats)
146 self._anypats = self._anypats or _anypats(kindpats)
147 self.patternspat, pm = _buildmatch(ctx, kindpats, '$',
147 self.patternspat, pm = _buildmatch(ctx, kindpats, '$',
148 listsubrepos, root)
148 listsubrepos, root)
149 matchfns.append(pm)
149 matchfns.append(pm)
150
150
151 if not matchfns:
151 if not matchfns:
152 m = util.always
152 m = util.always
153 self._always = True
153 self._always = True
154 elif len(matchfns) == 1:
154 elif len(matchfns) == 1:
155 m = matchfns[0]
155 m = matchfns[0]
156 else:
156 else:
157 def m(f):
157 def m(f):
158 for matchfn in matchfns:
158 for matchfn in matchfns:
159 if not matchfn(f):
159 if not matchfn(f):
160 return False
160 return False
161 return True
161 return True
162
162
163 self.matchfn = m
163 self.matchfn = m
164 self._fileroots = set(self._files)
164 self._fileroots = set(self._files)
165
165
166 def __call__(self, fn):
166 def __call__(self, fn):
167 return self.matchfn(fn)
167 return self.matchfn(fn)
168 def __iter__(self):
168 def __iter__(self):
169 for f in self._files:
169 for f in self._files:
170 yield f
170 yield f
171
171
172 # Callbacks related to how the matcher is used by dirstate.walk.
172 # Callbacks related to how the matcher is used by dirstate.walk.
173 # Subscribers to these events must monkeypatch the matcher object.
173 # Subscribers to these events must monkeypatch the matcher object.
174 def bad(self, f, msg):
174 def bad(self, f, msg):
175 '''Callback from dirstate.walk for each explicit file that can't be
175 '''Callback from dirstate.walk for each explicit file that can't be
176 found/accessed, with an error message.'''
176 found/accessed, with an error message.'''
177 pass
177 pass
178
178
179 # If an explicitdir is set, it will be called when an explicitly listed
179 # If an explicitdir is set, it will be called when an explicitly listed
180 # directory is visited.
180 # directory is visited.
181 explicitdir = None
181 explicitdir = None
182
182
183 # If an traversedir is set, it will be called when a directory discovered
183 # If an traversedir is set, it will be called when a directory discovered
184 # by recursive traversal is visited.
184 # by recursive traversal is visited.
185 traversedir = None
185 traversedir = None
186
186
187 def abs(self, f):
187 def abs(self, f):
188 '''Convert a repo path back to path that is relative to the root of the
188 '''Convert a repo path back to path that is relative to the root of the
189 matcher.'''
189 matcher.'''
190 return f
190 return f
191
191
192 def rel(self, f):
192 def rel(self, f):
193 '''Convert repo path back to path that is relative to cwd of matcher.'''
193 '''Convert repo path back to path that is relative to cwd of matcher.'''
194 return util.pathto(self._root, self._cwd, f)
194 return util.pathto(self._root, self._cwd, f)
195
195
196 def uipath(self, f):
196 def uipath(self, f):
197 '''Convert repo path to a display path. If patterns or -I/-X were used
197 '''Convert repo path to a display path. If patterns or -I/-X were used
198 to create this matcher, the display path will be relative to cwd.
198 to create this matcher, the display path will be relative to cwd.
199 Otherwise it is relative to the root of the repo.'''
199 Otherwise it is relative to the root of the repo.'''
200 return (self._pathrestricted and self.rel(f)) or self.abs(f)
200 return (self._pathrestricted and self.rel(f)) or self.abs(f)
201
201
202 def files(self):
202 def files(self):
203 '''Explicitly listed files or patterns or roots:
203 '''Explicitly listed files or patterns or roots:
204 if no patterns or .always(): empty list,
204 if no patterns or .always(): empty list,
205 if exact: list exact files,
205 if exact: list exact files,
206 if not .anypats(): list all files and dirs,
206 if not .anypats(): list all files and dirs,
207 else: optimal roots'''
207 else: optimal roots'''
208 return self._files
208 return self._files
209
209
210 @propertycache
210 @propertycache
211 def _dirs(self):
211 def _dirs(self):
212 return set(util.dirs(self._fileroots)) | set(['.'])
212 return set(util.dirs(self._fileroots)) | set(['.'])
213
213
214 def visitdir(self, dir):
214 def visitdir(self, dir):
215 '''Decides whether a directory should be visited based on whether it
215 '''Decides whether a directory should be visited based on whether it
216 has potential matches in it or one of its subdirectories. This is
216 has potential matches in it or one of its subdirectories. This is
217 based on the match's primary, included, and excluded patterns.
217 based on the match's primary, included, and excluded patterns.
218
218
219 This function's behavior is undefined if it has returned False for
219 This function's behavior is undefined if it has returned False for
220 one of the dir's parent directories.
220 one of the dir's parent directories.
221 '''
221 '''
222 if dir in self._excluderoots:
222 if dir in self._excluderoots:
223 return False
223 return False
224 parentdirs = None
224 parentdirs = None
225 if (self._includeroots and dir not in self._includeroots and
225 if (self._includeroots and dir not in self._includeroots and
226 dir not in self._includedirs):
226 dir not in self._includedirs):
227 parentdirs = list(util.finddirs(dir))
227 parentdirs = list(util.finddirs(dir))
228 if not any(parent in self._includeroots for parent in parentdirs):
228 if not any(parent in self._includeroots for parent in parentdirs):
229 return False
229 return False
230 return (not self._fileroots or '.' in self._fileroots or
230 return (not self._fileroots or '.' in self._fileroots or
231 dir in self._fileroots or dir in self._dirs or
231 dir in self._fileroots or dir in self._dirs or
232 any(parentdir in self._fileroots
232 any(parentdir in self._fileroots
233 for parentdir in parentdirs or util.finddirs(dir)))
233 for parentdir in parentdirs or util.finddirs(dir)))
234
234
235 def exact(self, f):
235 def exact(self, f):
236 '''Returns True if f is in .files().'''
236 '''Returns True if f is in .files().'''
237 return f in self._fileroots
237 return f in self._fileroots
238
238
239 def anypats(self):
239 def anypats(self):
240 '''Matcher uses patterns or include/exclude.'''
240 '''Matcher uses patterns or include/exclude.'''
241 return self._anypats
241 return self._anypats
242
242
243 def always(self):
243 def always(self):
244 '''Matcher will match everything and .files() will be empty
244 '''Matcher will match everything and .files() will be empty
245 - optimization might be possible and necessary.'''
245 - optimization might be possible and necessary.'''
246 return self._always
246 return self._always
247
247
248 def ispartial(self):
248 def ispartial(self):
249 '''True if the matcher won't always match.
249 '''True if the matcher won't always match.
250
250
251 Although it's just the inverse of _always in this implementation,
251 Although it's just the inverse of _always in this implementation,
252 an extenion such as narrowhg might make it return something
252 an extenion such as narrowhg might make it return something
253 slightly different.'''
253 slightly different.'''
254 return not self._always
254 return not self._always
255
255
256 def isexact(self):
256 def isexact(self):
257 return self.matchfn == self.exact
257 return self.matchfn == self.exact
258
258
259 def prefix(self):
259 def prefix(self):
260 return not self.always() and not self.isexact() and not self.anypats()
260 return not self.always() and not self.isexact() and not self.anypats()
261
261
262 def _normalize(self, patterns, default, root, cwd, auditor):
262 def _normalize(self, patterns, default, root, cwd, auditor):
263 '''Convert 'kind:pat' from the patterns list to tuples with kind and
263 '''Convert 'kind:pat' from the patterns list to tuples with kind and
264 normalized and rooted patterns and with listfiles expanded.'''
264 normalized and rooted patterns and with listfiles expanded.'''
265 kindpats = []
265 kindpats = []
266 for kind, pat in [_patsplit(p, default) for p in patterns]:
266 for kind, pat in [_patsplit(p, default) for p in patterns]:
267 if kind in ('glob', 'relpath'):
267 if kind in ('glob', 'relpath'):
268 pat = pathutil.canonpath(root, cwd, pat, auditor)
268 pat = pathutil.canonpath(root, cwd, pat, auditor)
269 elif kind in ('relglob', 'path'):
269 elif kind in ('relglob', 'path'):
270 pat = util.normpath(pat)
270 pat = util.normpath(pat)
271 elif kind in ('listfile', 'listfile0'):
271 elif kind in ('listfile', 'listfile0'):
272 try:
272 try:
273 files = util.readfile(pat)
273 files = util.readfile(pat)
274 if kind == 'listfile0':
274 if kind == 'listfile0':
275 files = files.split('\0')
275 files = files.split('\0')
276 else:
276 else:
277 files = files.splitlines()
277 files = files.splitlines()
278 files = [f for f in files if f]
278 files = [f for f in files if f]
279 except EnvironmentError:
279 except EnvironmentError:
280 raise util.Abort(_("unable to read file list (%s)") % pat)
280 raise util.Abort(_("unable to read file list (%s)") % pat)
281 for k, p, source in self._normalize(files, default, root, cwd,
281 for k, p, source in self._normalize(files, default, root, cwd,
282 auditor):
282 auditor):
283 kindpats.append((k, p, pat))
283 kindpats.append((k, p, pat))
284 continue
284 continue
285 elif kind == 'include':
285 elif kind == 'include':
286 try:
286 try:
287 includepats = readpatternfile(pat, self._warn)
287 includepats = readpatternfile(pat, self._warn)
288 for k, p, source in self._normalize(includepats, default,
288 for k, p, source in self._normalize(includepats, default,
289 root, cwd, auditor):
289 root, cwd, auditor):
290 kindpats.append((k, p, source or pat))
290 kindpats.append((k, p, source or pat))
291 except util.Abort, inst:
291 except util.Abort, inst:
292 raise util.Abort('%s: %s' % (pat, inst[0]))
292 raise util.Abort('%s: %s' % (pat, inst[0]))
293 except IOError, inst:
293 except IOError, inst:
294 if self._warn:
294 if self._warn:
295 self._warn(_("skipping unreadable pattern file "
295 self._warn(_("skipping unreadable pattern file "
296 "'%s': %s\n") % (pat, inst.strerror))
296 "'%s': %s\n") % (pat, inst.strerror))
297 continue
297 continue
298 # else: re or relre - which cannot be normalized
298 # else: re or relre - which cannot be normalized
299 kindpats.append((kind, pat, ''))
299 kindpats.append((kind, pat, ''))
300 return kindpats
300 return kindpats
301
301
302 def exact(root, cwd, files):
302 def exact(root, cwd, files):
303 return match(root, cwd, files, exact=True)
303 return match(root, cwd, files, exact=True)
304
304
305 def always(root, cwd):
305 def always(root, cwd):
306 return match(root, cwd, [])
306 return match(root, cwd, [])
307
307
308 def badmatch(match, badfn):
309 """Make a copy of the given matcher, replacing its bad method with the given
310 one.
311 """
312 m = copy.copy(match)
313 m.bad = badfn
314 return m
315
308 class narrowmatcher(match):
316 class narrowmatcher(match):
309 """Adapt a matcher to work on a subdirectory only.
317 """Adapt a matcher to work on a subdirectory only.
310
318
311 The paths are remapped to remove/insert the path as needed:
319 The paths are remapped to remove/insert the path as needed:
312
320
313 >>> m1 = match('root', '', ['a.txt', 'sub/b.txt'])
321 >>> m1 = match('root', '', ['a.txt', 'sub/b.txt'])
314 >>> m2 = narrowmatcher('sub', m1)
322 >>> m2 = narrowmatcher('sub', m1)
315 >>> bool(m2('a.txt'))
323 >>> bool(m2('a.txt'))
316 False
324 False
317 >>> bool(m2('b.txt'))
325 >>> bool(m2('b.txt'))
318 True
326 True
319 >>> bool(m2.matchfn('a.txt'))
327 >>> bool(m2.matchfn('a.txt'))
320 False
328 False
321 >>> bool(m2.matchfn('b.txt'))
329 >>> bool(m2.matchfn('b.txt'))
322 True
330 True
323 >>> m2.files()
331 >>> m2.files()
324 ['b.txt']
332 ['b.txt']
325 >>> m2.exact('b.txt')
333 >>> m2.exact('b.txt')
326 True
334 True
327 >>> util.pconvert(m2.rel('b.txt'))
335 >>> util.pconvert(m2.rel('b.txt'))
328 'sub/b.txt'
336 'sub/b.txt'
329 >>> def bad(f, msg):
337 >>> def bad(f, msg):
330 ... print "%s: %s" % (f, msg)
338 ... print "%s: %s" % (f, msg)
331 >>> m1.bad = bad
339 >>> m1.bad = bad
332 >>> m2.bad('x.txt', 'No such file')
340 >>> m2.bad('x.txt', 'No such file')
333 sub/x.txt: No such file
341 sub/x.txt: No such file
334 >>> m2.abs('c.txt')
342 >>> m2.abs('c.txt')
335 'sub/c.txt'
343 'sub/c.txt'
336 """
344 """
337
345
338 def __init__(self, path, matcher):
346 def __init__(self, path, matcher):
339 self._root = matcher._root
347 self._root = matcher._root
340 self._cwd = matcher._cwd
348 self._cwd = matcher._cwd
341 self._path = path
349 self._path = path
342 self._matcher = matcher
350 self._matcher = matcher
343 self._always = matcher._always
351 self._always = matcher._always
344 self._pathrestricted = matcher._pathrestricted
352 self._pathrestricted = matcher._pathrestricted
345
353
346 self._files = [f[len(path) + 1:] for f in matcher._files
354 self._files = [f[len(path) + 1:] for f in matcher._files
347 if f.startswith(path + "/")]
355 if f.startswith(path + "/")]
348
356
349 # If the parent repo had a path to this subrepo and no patterns are
357 # If the parent repo had a path to this subrepo and no patterns are
350 # specified, this submatcher always matches.
358 # specified, this submatcher always matches.
351 if not self._always and not matcher._anypats:
359 if not self._always and not matcher._anypats:
352 self._always = any(f == path for f in matcher._files)
360 self._always = any(f == path for f in matcher._files)
353
361
354 self._anypats = matcher._anypats
362 self._anypats = matcher._anypats
355 self.matchfn = lambda fn: matcher.matchfn(self._path + "/" + fn)
363 self.matchfn = lambda fn: matcher.matchfn(self._path + "/" + fn)
356 self._fileroots = set(self._files)
364 self._fileroots = set(self._files)
357
365
358 def abs(self, f):
366 def abs(self, f):
359 return self._matcher.abs(self._path + "/" + f)
367 return self._matcher.abs(self._path + "/" + f)
360
368
361 def bad(self, f, msg):
369 def bad(self, f, msg):
362 self._matcher.bad(self._path + "/" + f, msg)
370 self._matcher.bad(self._path + "/" + f, msg)
363
371
364 def rel(self, f):
372 def rel(self, f):
365 return self._matcher.rel(self._path + "/" + f)
373 return self._matcher.rel(self._path + "/" + f)
366
374
367 class icasefsmatcher(match):
375 class icasefsmatcher(match):
368 """A matcher for wdir on case insensitive filesystems, which normalizes the
376 """A matcher for wdir on case insensitive filesystems, which normalizes the
369 given patterns to the case in the filesystem.
377 given patterns to the case in the filesystem.
370 """
378 """
371
379
372 def __init__(self, root, cwd, patterns, include, exclude, default, auditor,
380 def __init__(self, root, cwd, patterns, include, exclude, default, auditor,
373 ctx, listsubrepos=False):
381 ctx, listsubrepos=False):
374 init = super(icasefsmatcher, self).__init__
382 init = super(icasefsmatcher, self).__init__
375 self._dsnormalize = ctx.repo().dirstate.normalize
383 self._dsnormalize = ctx.repo().dirstate.normalize
376
384
377 init(root, cwd, patterns, include, exclude, default, auditor=auditor,
385 init(root, cwd, patterns, include, exclude, default, auditor=auditor,
378 ctx=ctx, listsubrepos=listsubrepos)
386 ctx=ctx, listsubrepos=listsubrepos)
379
387
380 # m.exact(file) must be based off of the actual user input, otherwise
388 # m.exact(file) must be based off of the actual user input, otherwise
381 # inexact case matches are treated as exact, and not noted without -v.
389 # inexact case matches are treated as exact, and not noted without -v.
382 if self._files:
390 if self._files:
383 self._fileroots = set(_roots(self._kp))
391 self._fileroots = set(_roots(self._kp))
384
392
385 def _normalize(self, patterns, default, root, cwd, auditor):
393 def _normalize(self, patterns, default, root, cwd, auditor):
386 self._kp = super(icasefsmatcher, self)._normalize(patterns, default,
394 self._kp = super(icasefsmatcher, self)._normalize(patterns, default,
387 root, cwd, auditor)
395 root, cwd, auditor)
388 kindpats = []
396 kindpats = []
389 for kind, pats, source in self._kp:
397 for kind, pats, source in self._kp:
390 if kind not in ('re', 'relre'): # regex can't be normalized
398 if kind not in ('re', 'relre'): # regex can't be normalized
391 pats = self._dsnormalize(pats)
399 pats = self._dsnormalize(pats)
392 kindpats.append((kind, pats, source))
400 kindpats.append((kind, pats, source))
393 return kindpats
401 return kindpats
394
402
395 def patkind(pattern, default=None):
403 def patkind(pattern, default=None):
396 '''If pattern is 'kind:pat' with a known kind, return kind.'''
404 '''If pattern is 'kind:pat' with a known kind, return kind.'''
397 return _patsplit(pattern, default)[0]
405 return _patsplit(pattern, default)[0]
398
406
399 def _patsplit(pattern, default):
407 def _patsplit(pattern, default):
400 """Split a string into the optional pattern kind prefix and the actual
408 """Split a string into the optional pattern kind prefix and the actual
401 pattern."""
409 pattern."""
402 if ':' in pattern:
410 if ':' in pattern:
403 kind, pat = pattern.split(':', 1)
411 kind, pat = pattern.split(':', 1)
404 if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
412 if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
405 'listfile', 'listfile0', 'set', 'include', 'subinclude'):
413 'listfile', 'listfile0', 'set', 'include', 'subinclude'):
406 return kind, pat
414 return kind, pat
407 return default, pattern
415 return default, pattern
408
416
409 def _globre(pat):
417 def _globre(pat):
410 r'''Convert an extended glob string to a regexp string.
418 r'''Convert an extended glob string to a regexp string.
411
419
412 >>> print _globre(r'?')
420 >>> print _globre(r'?')
413 .
421 .
414 >>> print _globre(r'*')
422 >>> print _globre(r'*')
415 [^/]*
423 [^/]*
416 >>> print _globre(r'**')
424 >>> print _globre(r'**')
417 .*
425 .*
418 >>> print _globre(r'**/a')
426 >>> print _globre(r'**/a')
419 (?:.*/)?a
427 (?:.*/)?a
420 >>> print _globre(r'a/**/b')
428 >>> print _globre(r'a/**/b')
421 a\/(?:.*/)?b
429 a\/(?:.*/)?b
422 >>> print _globre(r'[a*?!^][^b][!c]')
430 >>> print _globre(r'[a*?!^][^b][!c]')
423 [a*?!^][\^b][^c]
431 [a*?!^][\^b][^c]
424 >>> print _globre(r'{a,b}')
432 >>> print _globre(r'{a,b}')
425 (?:a|b)
433 (?:a|b)
426 >>> print _globre(r'.\*\?')
434 >>> print _globre(r'.\*\?')
427 \.\*\?
435 \.\*\?
428 '''
436 '''
429 i, n = 0, len(pat)
437 i, n = 0, len(pat)
430 res = ''
438 res = ''
431 group = 0
439 group = 0
432 escape = util.re.escape
440 escape = util.re.escape
433 def peek():
441 def peek():
434 return i < n and pat[i]
442 return i < n and pat[i]
435 while i < n:
443 while i < n:
436 c = pat[i]
444 c = pat[i]
437 i += 1
445 i += 1
438 if c not in '*?[{},\\':
446 if c not in '*?[{},\\':
439 res += escape(c)
447 res += escape(c)
440 elif c == '*':
448 elif c == '*':
441 if peek() == '*':
449 if peek() == '*':
442 i += 1
450 i += 1
443 if peek() == '/':
451 if peek() == '/':
444 i += 1
452 i += 1
445 res += '(?:.*/)?'
453 res += '(?:.*/)?'
446 else:
454 else:
447 res += '.*'
455 res += '.*'
448 else:
456 else:
449 res += '[^/]*'
457 res += '[^/]*'
450 elif c == '?':
458 elif c == '?':
451 res += '.'
459 res += '.'
452 elif c == '[':
460 elif c == '[':
453 j = i
461 j = i
454 if j < n and pat[j] in '!]':
462 if j < n and pat[j] in '!]':
455 j += 1
463 j += 1
456 while j < n and pat[j] != ']':
464 while j < n and pat[j] != ']':
457 j += 1
465 j += 1
458 if j >= n:
466 if j >= n:
459 res += '\\['
467 res += '\\['
460 else:
468 else:
461 stuff = pat[i:j].replace('\\','\\\\')
469 stuff = pat[i:j].replace('\\','\\\\')
462 i = j + 1
470 i = j + 1
463 if stuff[0] == '!':
471 if stuff[0] == '!':
464 stuff = '^' + stuff[1:]
472 stuff = '^' + stuff[1:]
465 elif stuff[0] == '^':
473 elif stuff[0] == '^':
466 stuff = '\\' + stuff
474 stuff = '\\' + stuff
467 res = '%s[%s]' % (res, stuff)
475 res = '%s[%s]' % (res, stuff)
468 elif c == '{':
476 elif c == '{':
469 group += 1
477 group += 1
470 res += '(?:'
478 res += '(?:'
471 elif c == '}' and group:
479 elif c == '}' and group:
472 res += ')'
480 res += ')'
473 group -= 1
481 group -= 1
474 elif c == ',' and group:
482 elif c == ',' and group:
475 res += '|'
483 res += '|'
476 elif c == '\\':
484 elif c == '\\':
477 p = peek()
485 p = peek()
478 if p:
486 if p:
479 i += 1
487 i += 1
480 res += escape(p)
488 res += escape(p)
481 else:
489 else:
482 res += escape(c)
490 res += escape(c)
483 else:
491 else:
484 res += escape(c)
492 res += escape(c)
485 return res
493 return res
486
494
487 def _regex(kind, pat, globsuffix):
495 def _regex(kind, pat, globsuffix):
488 '''Convert a (normalized) pattern of any kind into a regular expression.
496 '''Convert a (normalized) pattern of any kind into a regular expression.
489 globsuffix is appended to the regexp of globs.'''
497 globsuffix is appended to the regexp of globs.'''
490 if not pat:
498 if not pat:
491 return ''
499 return ''
492 if kind == 're':
500 if kind == 're':
493 return pat
501 return pat
494 if kind == 'path':
502 if kind == 'path':
495 return '^' + util.re.escape(pat) + '(?:/|$)'
503 return '^' + util.re.escape(pat) + '(?:/|$)'
496 if kind == 'relglob':
504 if kind == 'relglob':
497 return '(?:|.*/)' + _globre(pat) + globsuffix
505 return '(?:|.*/)' + _globre(pat) + globsuffix
498 if kind == 'relpath':
506 if kind == 'relpath':
499 return util.re.escape(pat) + '(?:/|$)'
507 return util.re.escape(pat) + '(?:/|$)'
500 if kind == 'relre':
508 if kind == 'relre':
501 if pat.startswith('^'):
509 if pat.startswith('^'):
502 return pat
510 return pat
503 return '.*' + pat
511 return '.*' + pat
504 return _globre(pat) + globsuffix
512 return _globre(pat) + globsuffix
505
513
506 def _buildmatch(ctx, kindpats, globsuffix, listsubrepos, root):
514 def _buildmatch(ctx, kindpats, globsuffix, listsubrepos, root):
507 '''Return regexp string and a matcher function for kindpats.
515 '''Return regexp string and a matcher function for kindpats.
508 globsuffix is appended to the regexp of globs.'''
516 globsuffix is appended to the regexp of globs.'''
509 matchfuncs = []
517 matchfuncs = []
510
518
511 subincludes, kindpats = _expandsubinclude(kindpats, root)
519 subincludes, kindpats = _expandsubinclude(kindpats, root)
512 if subincludes:
520 if subincludes:
513 def matchsubinclude(f):
521 def matchsubinclude(f):
514 for prefix, mf in subincludes:
522 for prefix, mf in subincludes:
515 if f.startswith(prefix) and mf(f[len(prefix):]):
523 if f.startswith(prefix) and mf(f[len(prefix):]):
516 return True
524 return True
517 return False
525 return False
518 matchfuncs.append(matchsubinclude)
526 matchfuncs.append(matchsubinclude)
519
527
520 fset, kindpats = _expandsets(kindpats, ctx, listsubrepos)
528 fset, kindpats = _expandsets(kindpats, ctx, listsubrepos)
521 if fset:
529 if fset:
522 matchfuncs.append(fset.__contains__)
530 matchfuncs.append(fset.__contains__)
523
531
524 regex = ''
532 regex = ''
525 if kindpats:
533 if kindpats:
526 regex, mf = _buildregexmatch(kindpats, globsuffix)
534 regex, mf = _buildregexmatch(kindpats, globsuffix)
527 matchfuncs.append(mf)
535 matchfuncs.append(mf)
528
536
529 if len(matchfuncs) == 1:
537 if len(matchfuncs) == 1:
530 return regex, matchfuncs[0]
538 return regex, matchfuncs[0]
531 else:
539 else:
532 return regex, lambda f: any(mf(f) for mf in matchfuncs)
540 return regex, lambda f: any(mf(f) for mf in matchfuncs)
533
541
534 def _buildregexmatch(kindpats, globsuffix):
542 def _buildregexmatch(kindpats, globsuffix):
535 """Build a match function from a list of kinds and kindpats,
543 """Build a match function from a list of kinds and kindpats,
536 return regexp string and a matcher function."""
544 return regexp string and a matcher function."""
537 try:
545 try:
538 regex = '(?:%s)' % '|'.join([_regex(k, p, globsuffix)
546 regex = '(?:%s)' % '|'.join([_regex(k, p, globsuffix)
539 for (k, p, s) in kindpats])
547 for (k, p, s) in kindpats])
540 if len(regex) > 20000:
548 if len(regex) > 20000:
541 raise OverflowError
549 raise OverflowError
542 return regex, _rematcher(regex)
550 return regex, _rematcher(regex)
543 except OverflowError:
551 except OverflowError:
544 # We're using a Python with a tiny regex engine and we
552 # We're using a Python with a tiny regex engine and we
545 # made it explode, so we'll divide the pattern list in two
553 # made it explode, so we'll divide the pattern list in two
546 # until it works
554 # until it works
547 l = len(kindpats)
555 l = len(kindpats)
548 if l < 2:
556 if l < 2:
549 raise
557 raise
550 regexa, a = _buildregexmatch(kindpats[:l//2], globsuffix)
558 regexa, a = _buildregexmatch(kindpats[:l//2], globsuffix)
551 regexb, b = _buildregexmatch(kindpats[l//2:], globsuffix)
559 regexb, b = _buildregexmatch(kindpats[l//2:], globsuffix)
552 return regex, lambda s: a(s) or b(s)
560 return regex, lambda s: a(s) or b(s)
553 except re.error:
561 except re.error:
554 for k, p, s in kindpats:
562 for k, p, s in kindpats:
555 try:
563 try:
556 _rematcher('(?:%s)' % _regex(k, p, globsuffix))
564 _rematcher('(?:%s)' % _regex(k, p, globsuffix))
557 except re.error:
565 except re.error:
558 if s:
566 if s:
559 raise util.Abort(_("%s: invalid pattern (%s): %s") %
567 raise util.Abort(_("%s: invalid pattern (%s): %s") %
560 (s, k, p))
568 (s, k, p))
561 else:
569 else:
562 raise util.Abort(_("invalid pattern (%s): %s") % (k, p))
570 raise util.Abort(_("invalid pattern (%s): %s") % (k, p))
563 raise util.Abort(_("invalid pattern"))
571 raise util.Abort(_("invalid pattern"))
564
572
565 def _roots(kindpats):
573 def _roots(kindpats):
566 '''return roots and exact explicitly listed files from patterns
574 '''return roots and exact explicitly listed files from patterns
567
575
568 >>> _roots([('glob', 'g/*', ''), ('glob', 'g', ''), ('glob', 'g*', '')])
576 >>> _roots([('glob', 'g/*', ''), ('glob', 'g', ''), ('glob', 'g*', '')])
569 ['g', 'g', '.']
577 ['g', 'g', '.']
570 >>> _roots([('relpath', 'r', ''), ('path', 'p/p', ''), ('path', '', '')])
578 >>> _roots([('relpath', 'r', ''), ('path', 'p/p', ''), ('path', '', '')])
571 ['r', 'p/p', '.']
579 ['r', 'p/p', '.']
572 >>> _roots([('relglob', 'rg*', ''), ('re', 're/', ''), ('relre', 'rr', '')])
580 >>> _roots([('relglob', 'rg*', ''), ('re', 're/', ''), ('relre', 'rr', '')])
573 ['.', '.', '.']
581 ['.', '.', '.']
574 '''
582 '''
575 r = []
583 r = []
576 for kind, pat, source in kindpats:
584 for kind, pat, source in kindpats:
577 if kind == 'glob': # find the non-glob prefix
585 if kind == 'glob': # find the non-glob prefix
578 root = []
586 root = []
579 for p in pat.split('/'):
587 for p in pat.split('/'):
580 if '[' in p or '{' in p or '*' in p or '?' in p:
588 if '[' in p or '{' in p or '*' in p or '?' in p:
581 break
589 break
582 root.append(p)
590 root.append(p)
583 r.append('/'.join(root) or '.')
591 r.append('/'.join(root) or '.')
584 elif kind in ('relpath', 'path'):
592 elif kind in ('relpath', 'path'):
585 r.append(pat or '.')
593 r.append(pat or '.')
586 else: # relglob, re, relre
594 else: # relglob, re, relre
587 r.append('.')
595 r.append('.')
588 return r
596 return r
589
597
590 def _anypats(kindpats):
598 def _anypats(kindpats):
591 for kind, pat, source in kindpats:
599 for kind, pat, source in kindpats:
592 if kind in ('glob', 're', 'relglob', 'relre', 'set'):
600 if kind in ('glob', 're', 'relglob', 'relre', 'set'):
593 return True
601 return True
594
602
595 _commentre = None
603 _commentre = None
596
604
597 def readpatternfile(filepath, warn):
605 def readpatternfile(filepath, warn):
598 '''parse a pattern file, returning a list of
606 '''parse a pattern file, returning a list of
599 patterns. These patterns should be given to compile()
607 patterns. These patterns should be given to compile()
600 to be validated and converted into a match function.
608 to be validated and converted into a match function.
601
609
602 trailing white space is dropped.
610 trailing white space is dropped.
603 the escape character is backslash.
611 the escape character is backslash.
604 comments start with #.
612 comments start with #.
605 empty lines are skipped.
613 empty lines are skipped.
606
614
607 lines can be of the following formats:
615 lines can be of the following formats:
608
616
609 syntax: regexp # defaults following lines to non-rooted regexps
617 syntax: regexp # defaults following lines to non-rooted regexps
610 syntax: glob # defaults following lines to non-rooted globs
618 syntax: glob # defaults following lines to non-rooted globs
611 re:pattern # non-rooted regular expression
619 re:pattern # non-rooted regular expression
612 glob:pattern # non-rooted glob
620 glob:pattern # non-rooted glob
613 pattern # pattern of the current default type'''
621 pattern # pattern of the current default type'''
614
622
615 syntaxes = {'re': 'relre:', 'regexp': 'relre:', 'glob': 'relglob:',
623 syntaxes = {'re': 'relre:', 'regexp': 'relre:', 'glob': 'relglob:',
616 'include': 'include', 'subinclude': 'subinclude'}
624 'include': 'include', 'subinclude': 'subinclude'}
617 syntax = 'relre:'
625 syntax = 'relre:'
618 patterns = []
626 patterns = []
619
627
620 fp = open(filepath)
628 fp = open(filepath)
621 for line in fp:
629 for line in fp:
622 if "#" in line:
630 if "#" in line:
623 global _commentre
631 global _commentre
624 if not _commentre:
632 if not _commentre:
625 _commentre = re.compile(r'((^|[^\\])(\\\\)*)#.*')
633 _commentre = re.compile(r'((^|[^\\])(\\\\)*)#.*')
626 # remove comments prefixed by an even number of escapes
634 # remove comments prefixed by an even number of escapes
627 line = _commentre.sub(r'\1', line)
635 line = _commentre.sub(r'\1', line)
628 # fixup properly escaped comments that survived the above
636 # fixup properly escaped comments that survived the above
629 line = line.replace("\\#", "#")
637 line = line.replace("\\#", "#")
630 line = line.rstrip()
638 line = line.rstrip()
631 if not line:
639 if not line:
632 continue
640 continue
633
641
634 if line.startswith('syntax:'):
642 if line.startswith('syntax:'):
635 s = line[7:].strip()
643 s = line[7:].strip()
636 try:
644 try:
637 syntax = syntaxes[s]
645 syntax = syntaxes[s]
638 except KeyError:
646 except KeyError:
639 if warn:
647 if warn:
640 warn(_("%s: ignoring invalid syntax '%s'\n") %
648 warn(_("%s: ignoring invalid syntax '%s'\n") %
641 (filepath, s))
649 (filepath, s))
642 continue
650 continue
643
651
644 linesyntax = syntax
652 linesyntax = syntax
645 for s, rels in syntaxes.iteritems():
653 for s, rels in syntaxes.iteritems():
646 if line.startswith(rels):
654 if line.startswith(rels):
647 linesyntax = rels
655 linesyntax = rels
648 line = line[len(rels):]
656 line = line[len(rels):]
649 break
657 break
650 elif line.startswith(s+':'):
658 elif line.startswith(s+':'):
651 linesyntax = rels
659 linesyntax = rels
652 line = line[len(s) + 1:]
660 line = line[len(s) + 1:]
653 break
661 break
654 patterns.append(linesyntax + line)
662 patterns.append(linesyntax + line)
655 fp.close()
663 fp.close()
656 return patterns
664 return patterns
General Comments 0
You need to be logged in to leave comments. Login now