##// END OF EJS Templates
match: fix bug in match.visitdir()...
Drew Gottlieb -
r25250:f9a29dc9 default
parent child Browse files
Show More
@@ -1,620 +1,620 b''
1 # match.py - filename matching
1 # match.py - filename matching
2 #
2 #
3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 import re
8 import re
9 import util, pathutil
9 import util, pathutil
10 from i18n import _
10 from i18n import _
11
11
12 propertycache = util.propertycache
12 propertycache = util.propertycache
13
13
14 def _rematcher(regex):
14 def _rematcher(regex):
15 '''compile the regexp with the best available regexp engine and return a
15 '''compile the regexp with the best available regexp engine and return a
16 matcher function'''
16 matcher function'''
17 m = util.re.compile(regex)
17 m = util.re.compile(regex)
18 try:
18 try:
19 # slightly faster, provided by facebook's re2 bindings
19 # slightly faster, provided by facebook's re2 bindings
20 return m.test_match
20 return m.test_match
21 except AttributeError:
21 except AttributeError:
22 return m.match
22 return m.match
23
23
24 def _expandsets(kindpats, ctx, listsubrepos):
24 def _expandsets(kindpats, ctx, listsubrepos):
25 '''Returns the kindpats list with the 'set' patterns expanded.'''
25 '''Returns the kindpats list with the 'set' patterns expanded.'''
26 fset = set()
26 fset = set()
27 other = []
27 other = []
28
28
29 for kind, pat, source in kindpats:
29 for kind, pat, source in kindpats:
30 if kind == 'set':
30 if kind == 'set':
31 if not ctx:
31 if not ctx:
32 raise util.Abort("fileset expression with no context")
32 raise util.Abort("fileset expression with no context")
33 s = ctx.getfileset(pat)
33 s = ctx.getfileset(pat)
34 fset.update(s)
34 fset.update(s)
35
35
36 if listsubrepos:
36 if listsubrepos:
37 for subpath in ctx.substate:
37 for subpath in ctx.substate:
38 s = ctx.sub(subpath).getfileset(pat)
38 s = ctx.sub(subpath).getfileset(pat)
39 fset.update(subpath + '/' + f for f in s)
39 fset.update(subpath + '/' + f for f in s)
40
40
41 continue
41 continue
42 other.append((kind, pat, source))
42 other.append((kind, pat, source))
43 return fset, other
43 return fset, other
44
44
45 def _kindpatsalwaysmatch(kindpats):
45 def _kindpatsalwaysmatch(kindpats):
46 """"Checks whether the kindspats match everything, as e.g.
46 """"Checks whether the kindspats match everything, as e.g.
47 'relpath:.' does.
47 'relpath:.' does.
48 """
48 """
49 for kind, pat, source in kindpats:
49 for kind, pat, source in kindpats:
50 if pat != '' or kind not in ['relpath', 'glob']:
50 if pat != '' or kind not in ['relpath', 'glob']:
51 return False
51 return False
52 return True
52 return True
53
53
54 class match(object):
54 class match(object):
55 def __init__(self, root, cwd, patterns, include=[], exclude=[],
55 def __init__(self, root, cwd, patterns, include=[], exclude=[],
56 default='glob', exact=False, auditor=None, ctx=None,
56 default='glob', exact=False, auditor=None, ctx=None,
57 listsubrepos=False, warn=None):
57 listsubrepos=False, warn=None):
58 """build an object to match a set of file patterns
58 """build an object to match a set of file patterns
59
59
60 arguments:
60 arguments:
61 root - the canonical root of the tree you're matching against
61 root - the canonical root of the tree you're matching against
62 cwd - the current working directory, if relevant
62 cwd - the current working directory, if relevant
63 patterns - patterns to find
63 patterns - patterns to find
64 include - patterns to include (unless they are excluded)
64 include - patterns to include (unless they are excluded)
65 exclude - patterns to exclude (even if they are included)
65 exclude - patterns to exclude (even if they are included)
66 default - if a pattern in patterns has no explicit type, assume this one
66 default - if a pattern in patterns has no explicit type, assume this one
67 exact - patterns are actually filenames (include/exclude still apply)
67 exact - patterns are actually filenames (include/exclude still apply)
68 warn - optional function used for printing warnings
68 warn - optional function used for printing warnings
69
69
70 a pattern is one of:
70 a pattern is one of:
71 'glob:<glob>' - a glob relative to cwd
71 'glob:<glob>' - a glob relative to cwd
72 're:<regexp>' - a regular expression
72 're:<regexp>' - a regular expression
73 'path:<path>' - a path relative to repository root
73 'path:<path>' - a path relative to repository root
74 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
74 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
75 'relpath:<path>' - a path relative to cwd
75 'relpath:<path>' - a path relative to cwd
76 'relre:<regexp>' - a regexp that needn't match the start of a name
76 'relre:<regexp>' - a regexp that needn't match the start of a name
77 'set:<fileset>' - a fileset expression
77 'set:<fileset>' - a fileset expression
78 'include:<path>' - a file of patterns to read and include
78 'include:<path>' - a file of patterns to read and include
79 '<something>' - a pattern of the specified default type
79 '<something>' - a pattern of the specified default type
80 """
80 """
81
81
82 self._root = root
82 self._root = root
83 self._cwd = cwd
83 self._cwd = cwd
84 self._files = [] # exact files and roots of patterns
84 self._files = [] # exact files and roots of patterns
85 self._anypats = bool(include or exclude)
85 self._anypats = bool(include or exclude)
86 self._always = False
86 self._always = False
87 self._pathrestricted = bool(include or exclude or patterns)
87 self._pathrestricted = bool(include or exclude or patterns)
88 self._warn = warn
88 self._warn = warn
89 self._includeroots = set()
89 self._includeroots = set()
90 self._includedirs = set(['.'])
90 self._includedirs = set(['.'])
91 self._excluderoots = set()
91 self._excluderoots = set()
92
92
93 matchfns = []
93 matchfns = []
94 if include:
94 if include:
95 kindpats = self._normalize(include, 'glob', root, cwd, auditor)
95 kindpats = self._normalize(include, 'glob', root, cwd, auditor)
96 self.includepat, im = _buildmatch(ctx, kindpats, '(?:/|$)',
96 self.includepat, im = _buildmatch(ctx, kindpats, '(?:/|$)',
97 listsubrepos, root)
97 listsubrepos, root)
98 self._includeroots.update(_roots(kindpats))
98 self._includeroots.update(_roots(kindpats))
99 self._includeroots.discard('.')
99 self._includeroots.discard('.')
100 self._includedirs.update(util.dirs(self._includeroots))
100 self._includedirs.update(util.dirs(self._includeroots))
101 matchfns.append(im)
101 matchfns.append(im)
102 if exclude:
102 if exclude:
103 kindpats = self._normalize(exclude, 'glob', root, cwd, auditor)
103 kindpats = self._normalize(exclude, 'glob', root, cwd, auditor)
104 self.excludepat, em = _buildmatch(ctx, kindpats, '(?:/|$)',
104 self.excludepat, em = _buildmatch(ctx, kindpats, '(?:/|$)',
105 listsubrepos, root)
105 listsubrepos, root)
106 self._excluderoots.update(_roots(kindpats))
106 self._excluderoots.update(_roots(kindpats))
107 self._excluderoots.discard('.')
107 self._excluderoots.discard('.')
108 matchfns.append(lambda f: not em(f))
108 matchfns.append(lambda f: not em(f))
109 if exact:
109 if exact:
110 if isinstance(patterns, list):
110 if isinstance(patterns, list):
111 self._files = patterns
111 self._files = patterns
112 else:
112 else:
113 self._files = list(patterns)
113 self._files = list(patterns)
114 matchfns.append(self.exact)
114 matchfns.append(self.exact)
115 elif patterns:
115 elif patterns:
116 kindpats = self._normalize(patterns, default, root, cwd, auditor)
116 kindpats = self._normalize(patterns, default, root, cwd, auditor)
117 if not _kindpatsalwaysmatch(kindpats):
117 if not _kindpatsalwaysmatch(kindpats):
118 self._files = _roots(kindpats)
118 self._files = _roots(kindpats)
119 self._anypats = self._anypats or _anypats(kindpats)
119 self._anypats = self._anypats or _anypats(kindpats)
120 self.patternspat, pm = _buildmatch(ctx, kindpats, '$',
120 self.patternspat, pm = _buildmatch(ctx, kindpats, '$',
121 listsubrepos, root)
121 listsubrepos, root)
122 matchfns.append(pm)
122 matchfns.append(pm)
123
123
124 if not matchfns:
124 if not matchfns:
125 m = util.always
125 m = util.always
126 self._always = True
126 self._always = True
127 elif len(matchfns) == 1:
127 elif len(matchfns) == 1:
128 m = matchfns[0]
128 m = matchfns[0]
129 else:
129 else:
130 def m(f):
130 def m(f):
131 for matchfn in matchfns:
131 for matchfn in matchfns:
132 if not matchfn(f):
132 if not matchfn(f):
133 return False
133 return False
134 return True
134 return True
135
135
136 self.matchfn = m
136 self.matchfn = m
137 self._fileroots = set(self._files)
137 self._fileroots = set(self._files)
138
138
139 def __call__(self, fn):
139 def __call__(self, fn):
140 return self.matchfn(fn)
140 return self.matchfn(fn)
141 def __iter__(self):
141 def __iter__(self):
142 for f in self._files:
142 for f in self._files:
143 yield f
143 yield f
144
144
145 # Callbacks related to how the matcher is used by dirstate.walk.
145 # Callbacks related to how the matcher is used by dirstate.walk.
146 # Subscribers to these events must monkeypatch the matcher object.
146 # Subscribers to these events must monkeypatch the matcher object.
147 def bad(self, f, msg):
147 def bad(self, f, msg):
148 '''Callback from dirstate.walk for each explicit file that can't be
148 '''Callback from dirstate.walk for each explicit file that can't be
149 found/accessed, with an error message.'''
149 found/accessed, with an error message.'''
150 pass
150 pass
151
151
152 # If an explicitdir is set, it will be called when an explicitly listed
152 # If an explicitdir is set, it will be called when an explicitly listed
153 # directory is visited.
153 # directory is visited.
154 explicitdir = None
154 explicitdir = None
155
155
156 # If an traversedir is set, it will be called when a directory discovered
156 # If an traversedir is set, it will be called when a directory discovered
157 # by recursive traversal is visited.
157 # by recursive traversal is visited.
158 traversedir = None
158 traversedir = None
159
159
160 def abs(self, f):
160 def abs(self, f):
161 '''Convert a repo path back to path that is relative to the root of the
161 '''Convert a repo path back to path that is relative to the root of the
162 matcher.'''
162 matcher.'''
163 return f
163 return f
164
164
165 def rel(self, f):
165 def rel(self, f):
166 '''Convert repo path back to path that is relative to cwd of matcher.'''
166 '''Convert repo path back to path that is relative to cwd of matcher.'''
167 return util.pathto(self._root, self._cwd, f)
167 return util.pathto(self._root, self._cwd, f)
168
168
169 def uipath(self, f):
169 def uipath(self, f):
170 '''Convert repo path to a display path. If patterns or -I/-X were used
170 '''Convert repo path to a display path. If patterns or -I/-X were used
171 to create this matcher, the display path will be relative to cwd.
171 to create this matcher, the display path will be relative to cwd.
172 Otherwise it is relative to the root of the repo.'''
172 Otherwise it is relative to the root of the repo.'''
173 return (self._pathrestricted and self.rel(f)) or self.abs(f)
173 return (self._pathrestricted and self.rel(f)) or self.abs(f)
174
174
175 def files(self):
175 def files(self):
176 '''Explicitly listed files or patterns or roots:
176 '''Explicitly listed files or patterns or roots:
177 if no patterns or .always(): empty list,
177 if no patterns or .always(): empty list,
178 if exact: list exact files,
178 if exact: list exact files,
179 if not .anypats(): list all files and dirs,
179 if not .anypats(): list all files and dirs,
180 else: optimal roots'''
180 else: optimal roots'''
181 return self._files
181 return self._files
182
182
183 @propertycache
183 @propertycache
184 def _dirs(self):
184 def _dirs(self):
185 return set(util.dirs(self._fileroots)) | set(['.'])
185 return set(util.dirs(self._fileroots)) | set(['.'])
186
186
187 def visitdir(self, dir):
187 def visitdir(self, dir):
188 '''Decides whether a directory should be visited based on whether it
188 '''Decides whether a directory should be visited based on whether it
189 has potential matches in it or one of its subdirectories. This is
189 has potential matches in it or one of its subdirectories. This is
190 based on the match's primary, included, and excluded patterns.
190 based on the match's primary, included, and excluded patterns.
191
191
192 This function's behavior is undefined if it has returned False for
192 This function's behavior is undefined if it has returned False for
193 one of the dir's parent directories.
193 one of the dir's parent directories.
194 '''
194 '''
195 if dir in self._excluderoots:
195 if dir in self._excluderoots:
196 return False
196 return False
197 parentdirs = None
197 parentdirs = None
198 if (self._includeroots and dir not in self._includeroots and
198 if (self._includeroots and dir not in self._includeroots and
199 dir not in self._includedirs):
199 dir not in self._includedirs):
200 parentdirs = util.finddirs(dir)
200 parentdirs = list(util.finddirs(dir))
201 if not any(parent in self._includeroots for parent in parentdirs):
201 if not any(parent in self._includeroots for parent in parentdirs):
202 return False
202 return False
203 return (not self._fileroots or '.' in self._fileroots or
203 return (not self._fileroots or '.' in self._fileroots or
204 dir in self._fileroots or dir in self._dirs or
204 dir in self._fileroots or dir in self._dirs or
205 any(parentdir in self._fileroots
205 any(parentdir in self._fileroots
206 for parentdir in parentdirs or util.finddirs(dir)))
206 for parentdir in parentdirs or util.finddirs(dir)))
207
207
208 def exact(self, f):
208 def exact(self, f):
209 '''Returns True if f is in .files().'''
209 '''Returns True if f is in .files().'''
210 return f in self._fileroots
210 return f in self._fileroots
211
211
212 def anypats(self):
212 def anypats(self):
213 '''Matcher uses patterns or include/exclude.'''
213 '''Matcher uses patterns or include/exclude.'''
214 return self._anypats
214 return self._anypats
215
215
216 def always(self):
216 def always(self):
217 '''Matcher will match everything and .files() will be empty
217 '''Matcher will match everything and .files() will be empty
218 - optimization might be possible and necessary.'''
218 - optimization might be possible and necessary.'''
219 return self._always
219 return self._always
220
220
221 def ispartial(self):
221 def ispartial(self):
222 '''True if the matcher won't always match.
222 '''True if the matcher won't always match.
223
223
224 Although it's just the inverse of _always in this implementation,
224 Although it's just the inverse of _always in this implementation,
225 an extenion such as narrowhg might make it return something
225 an extenion such as narrowhg might make it return something
226 slightly different.'''
226 slightly different.'''
227 return not self._always
227 return not self._always
228
228
229 def isexact(self):
229 def isexact(self):
230 return self.matchfn == self.exact
230 return self.matchfn == self.exact
231
231
232 def prefix(self):
232 def prefix(self):
233 return not self.always() and not self.isexact() and not self.anypats()
233 return not self.always() and not self.isexact() and not self.anypats()
234
234
235 def _normalize(self, patterns, default, root, cwd, auditor):
235 def _normalize(self, patterns, default, root, cwd, auditor):
236 '''Convert 'kind:pat' from the patterns list to tuples with kind and
236 '''Convert 'kind:pat' from the patterns list to tuples with kind and
237 normalized and rooted patterns and with listfiles expanded.'''
237 normalized and rooted patterns and with listfiles expanded.'''
238 kindpats = []
238 kindpats = []
239 for kind, pat in [_patsplit(p, default) for p in patterns]:
239 for kind, pat in [_patsplit(p, default) for p in patterns]:
240 if kind in ('glob', 'relpath'):
240 if kind in ('glob', 'relpath'):
241 pat = pathutil.canonpath(root, cwd, pat, auditor)
241 pat = pathutil.canonpath(root, cwd, pat, auditor)
242 elif kind in ('relglob', 'path'):
242 elif kind in ('relglob', 'path'):
243 pat = util.normpath(pat)
243 pat = util.normpath(pat)
244 elif kind in ('listfile', 'listfile0'):
244 elif kind in ('listfile', 'listfile0'):
245 try:
245 try:
246 files = util.readfile(pat)
246 files = util.readfile(pat)
247 if kind == 'listfile0':
247 if kind == 'listfile0':
248 files = files.split('\0')
248 files = files.split('\0')
249 else:
249 else:
250 files = files.splitlines()
250 files = files.splitlines()
251 files = [f for f in files if f]
251 files = [f for f in files if f]
252 except EnvironmentError:
252 except EnvironmentError:
253 raise util.Abort(_("unable to read file list (%s)") % pat)
253 raise util.Abort(_("unable to read file list (%s)") % pat)
254 for k, p, source in self._normalize(files, default, root, cwd,
254 for k, p, source in self._normalize(files, default, root, cwd,
255 auditor):
255 auditor):
256 kindpats.append((k, p, pat))
256 kindpats.append((k, p, pat))
257 continue
257 continue
258 elif kind == 'include':
258 elif kind == 'include':
259 try:
259 try:
260 includepats = readpatternfile(pat, self._warn)
260 includepats = readpatternfile(pat, self._warn)
261 for k, p, source in self._normalize(includepats, default,
261 for k, p, source in self._normalize(includepats, default,
262 root, cwd, auditor):
262 root, cwd, auditor):
263 kindpats.append((k, p, source or pat))
263 kindpats.append((k, p, source or pat))
264 except util.Abort, inst:
264 except util.Abort, inst:
265 raise util.Abort('%s: %s' % (pat, inst[0]))
265 raise util.Abort('%s: %s' % (pat, inst[0]))
266 except IOError, inst:
266 except IOError, inst:
267 if self._warn:
267 if self._warn:
268 self._warn(_("skipping unreadable pattern file "
268 self._warn(_("skipping unreadable pattern file "
269 "'%s': %s\n") % (pat, inst.strerror))
269 "'%s': %s\n") % (pat, inst.strerror))
270 continue
270 continue
271 # else: re or relre - which cannot be normalized
271 # else: re or relre - which cannot be normalized
272 kindpats.append((kind, pat, ''))
272 kindpats.append((kind, pat, ''))
273 return kindpats
273 return kindpats
274
274
275 def exact(root, cwd, files):
275 def exact(root, cwd, files):
276 return match(root, cwd, files, exact=True)
276 return match(root, cwd, files, exact=True)
277
277
278 def always(root, cwd):
278 def always(root, cwd):
279 return match(root, cwd, [])
279 return match(root, cwd, [])
280
280
281 class narrowmatcher(match):
281 class narrowmatcher(match):
282 """Adapt a matcher to work on a subdirectory only.
282 """Adapt a matcher to work on a subdirectory only.
283
283
284 The paths are remapped to remove/insert the path as needed:
284 The paths are remapped to remove/insert the path as needed:
285
285
286 >>> m1 = match('root', '', ['a.txt', 'sub/b.txt'])
286 >>> m1 = match('root', '', ['a.txt', 'sub/b.txt'])
287 >>> m2 = narrowmatcher('sub', m1)
287 >>> m2 = narrowmatcher('sub', m1)
288 >>> bool(m2('a.txt'))
288 >>> bool(m2('a.txt'))
289 False
289 False
290 >>> bool(m2('b.txt'))
290 >>> bool(m2('b.txt'))
291 True
291 True
292 >>> bool(m2.matchfn('a.txt'))
292 >>> bool(m2.matchfn('a.txt'))
293 False
293 False
294 >>> bool(m2.matchfn('b.txt'))
294 >>> bool(m2.matchfn('b.txt'))
295 True
295 True
296 >>> m2.files()
296 >>> m2.files()
297 ['b.txt']
297 ['b.txt']
298 >>> m2.exact('b.txt')
298 >>> m2.exact('b.txt')
299 True
299 True
300 >>> util.pconvert(m2.rel('b.txt'))
300 >>> util.pconvert(m2.rel('b.txt'))
301 'sub/b.txt'
301 'sub/b.txt'
302 >>> def bad(f, msg):
302 >>> def bad(f, msg):
303 ... print "%s: %s" % (f, msg)
303 ... print "%s: %s" % (f, msg)
304 >>> m1.bad = bad
304 >>> m1.bad = bad
305 >>> m2.bad('x.txt', 'No such file')
305 >>> m2.bad('x.txt', 'No such file')
306 sub/x.txt: No such file
306 sub/x.txt: No such file
307 >>> m2.abs('c.txt')
307 >>> m2.abs('c.txt')
308 'sub/c.txt'
308 'sub/c.txt'
309 """
309 """
310
310
311 def __init__(self, path, matcher):
311 def __init__(self, path, matcher):
312 self._root = matcher._root
312 self._root = matcher._root
313 self._cwd = matcher._cwd
313 self._cwd = matcher._cwd
314 self._path = path
314 self._path = path
315 self._matcher = matcher
315 self._matcher = matcher
316 self._always = matcher._always
316 self._always = matcher._always
317 self._pathrestricted = matcher._pathrestricted
317 self._pathrestricted = matcher._pathrestricted
318
318
319 self._files = [f[len(path) + 1:] for f in matcher._files
319 self._files = [f[len(path) + 1:] for f in matcher._files
320 if f.startswith(path + "/")]
320 if f.startswith(path + "/")]
321
321
322 # If the parent repo had a path to this subrepo and no patterns are
322 # If the parent repo had a path to this subrepo and no patterns are
323 # specified, this submatcher always matches.
323 # specified, this submatcher always matches.
324 if not self._always and not matcher._anypats:
324 if not self._always and not matcher._anypats:
325 self._always = any(f == path for f in matcher._files)
325 self._always = any(f == path for f in matcher._files)
326
326
327 self._anypats = matcher._anypats
327 self._anypats = matcher._anypats
328 self.matchfn = lambda fn: matcher.matchfn(self._path + "/" + fn)
328 self.matchfn = lambda fn: matcher.matchfn(self._path + "/" + fn)
329 self._fileroots = set(self._files)
329 self._fileroots = set(self._files)
330
330
331 def abs(self, f):
331 def abs(self, f):
332 return self._matcher.abs(self._path + "/" + f)
332 return self._matcher.abs(self._path + "/" + f)
333
333
334 def bad(self, f, msg):
334 def bad(self, f, msg):
335 self._matcher.bad(self._path + "/" + f, msg)
335 self._matcher.bad(self._path + "/" + f, msg)
336
336
337 def rel(self, f):
337 def rel(self, f):
338 return self._matcher.rel(self._path + "/" + f)
338 return self._matcher.rel(self._path + "/" + f)
339
339
340 class icasefsmatcher(match):
340 class icasefsmatcher(match):
341 """A matcher for wdir on case insensitive filesystems, which normalizes the
341 """A matcher for wdir on case insensitive filesystems, which normalizes the
342 given patterns to the case in the filesystem.
342 given patterns to the case in the filesystem.
343 """
343 """
344
344
345 def __init__(self, root, cwd, patterns, include, exclude, default, auditor,
345 def __init__(self, root, cwd, patterns, include, exclude, default, auditor,
346 ctx, listsubrepos=False):
346 ctx, listsubrepos=False):
347 init = super(icasefsmatcher, self).__init__
347 init = super(icasefsmatcher, self).__init__
348 self._dsnormalize = ctx.repo().dirstate.normalize
348 self._dsnormalize = ctx.repo().dirstate.normalize
349
349
350 init(root, cwd, patterns, include, exclude, default, auditor=auditor,
350 init(root, cwd, patterns, include, exclude, default, auditor=auditor,
351 ctx=ctx, listsubrepos=listsubrepos)
351 ctx=ctx, listsubrepos=listsubrepos)
352
352
353 # m.exact(file) must be based off of the actual user input, otherwise
353 # m.exact(file) must be based off of the actual user input, otherwise
354 # inexact case matches are treated as exact, and not noted without -v.
354 # inexact case matches are treated as exact, and not noted without -v.
355 if self._files:
355 if self._files:
356 self._fileroots = set(_roots(self._kp))
356 self._fileroots = set(_roots(self._kp))
357
357
358 def _normalize(self, patterns, default, root, cwd, auditor):
358 def _normalize(self, patterns, default, root, cwd, auditor):
359 self._kp = super(icasefsmatcher, self)._normalize(patterns, default,
359 self._kp = super(icasefsmatcher, self)._normalize(patterns, default,
360 root, cwd, auditor)
360 root, cwd, auditor)
361 kindpats = []
361 kindpats = []
362 for kind, pats, source in self._kp:
362 for kind, pats, source in self._kp:
363 if kind not in ('re', 'relre'): # regex can't be normalized
363 if kind not in ('re', 'relre'): # regex can't be normalized
364 pats = self._dsnormalize(pats)
364 pats = self._dsnormalize(pats)
365 kindpats.append((kind, pats, source))
365 kindpats.append((kind, pats, source))
366 return kindpats
366 return kindpats
367
367
368 def patkind(pattern, default=None):
368 def patkind(pattern, default=None):
369 '''If pattern is 'kind:pat' with a known kind, return kind.'''
369 '''If pattern is 'kind:pat' with a known kind, return kind.'''
370 return _patsplit(pattern, default)[0]
370 return _patsplit(pattern, default)[0]
371
371
372 def _patsplit(pattern, default):
372 def _patsplit(pattern, default):
373 """Split a string into the optional pattern kind prefix and the actual
373 """Split a string into the optional pattern kind prefix and the actual
374 pattern."""
374 pattern."""
375 if ':' in pattern:
375 if ':' in pattern:
376 kind, pat = pattern.split(':', 1)
376 kind, pat = pattern.split(':', 1)
377 if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
377 if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
378 'listfile', 'listfile0', 'set', 'include'):
378 'listfile', 'listfile0', 'set', 'include'):
379 return kind, pat
379 return kind, pat
380 return default, pattern
380 return default, pattern
381
381
382 def _globre(pat):
382 def _globre(pat):
383 r'''Convert an extended glob string to a regexp string.
383 r'''Convert an extended glob string to a regexp string.
384
384
385 >>> print _globre(r'?')
385 >>> print _globre(r'?')
386 .
386 .
387 >>> print _globre(r'*')
387 >>> print _globre(r'*')
388 [^/]*
388 [^/]*
389 >>> print _globre(r'**')
389 >>> print _globre(r'**')
390 .*
390 .*
391 >>> print _globre(r'**/a')
391 >>> print _globre(r'**/a')
392 (?:.*/)?a
392 (?:.*/)?a
393 >>> print _globre(r'a/**/b')
393 >>> print _globre(r'a/**/b')
394 a\/(?:.*/)?b
394 a\/(?:.*/)?b
395 >>> print _globre(r'[a*?!^][^b][!c]')
395 >>> print _globre(r'[a*?!^][^b][!c]')
396 [a*?!^][\^b][^c]
396 [a*?!^][\^b][^c]
397 >>> print _globre(r'{a,b}')
397 >>> print _globre(r'{a,b}')
398 (?:a|b)
398 (?:a|b)
399 >>> print _globre(r'.\*\?')
399 >>> print _globre(r'.\*\?')
400 \.\*\?
400 \.\*\?
401 '''
401 '''
402 i, n = 0, len(pat)
402 i, n = 0, len(pat)
403 res = ''
403 res = ''
404 group = 0
404 group = 0
405 escape = util.re.escape
405 escape = util.re.escape
406 def peek():
406 def peek():
407 return i < n and pat[i]
407 return i < n and pat[i]
408 while i < n:
408 while i < n:
409 c = pat[i]
409 c = pat[i]
410 i += 1
410 i += 1
411 if c not in '*?[{},\\':
411 if c not in '*?[{},\\':
412 res += escape(c)
412 res += escape(c)
413 elif c == '*':
413 elif c == '*':
414 if peek() == '*':
414 if peek() == '*':
415 i += 1
415 i += 1
416 if peek() == '/':
416 if peek() == '/':
417 i += 1
417 i += 1
418 res += '(?:.*/)?'
418 res += '(?:.*/)?'
419 else:
419 else:
420 res += '.*'
420 res += '.*'
421 else:
421 else:
422 res += '[^/]*'
422 res += '[^/]*'
423 elif c == '?':
423 elif c == '?':
424 res += '.'
424 res += '.'
425 elif c == '[':
425 elif c == '[':
426 j = i
426 j = i
427 if j < n and pat[j] in '!]':
427 if j < n and pat[j] in '!]':
428 j += 1
428 j += 1
429 while j < n and pat[j] != ']':
429 while j < n and pat[j] != ']':
430 j += 1
430 j += 1
431 if j >= n:
431 if j >= n:
432 res += '\\['
432 res += '\\['
433 else:
433 else:
434 stuff = pat[i:j].replace('\\','\\\\')
434 stuff = pat[i:j].replace('\\','\\\\')
435 i = j + 1
435 i = j + 1
436 if stuff[0] == '!':
436 if stuff[0] == '!':
437 stuff = '^' + stuff[1:]
437 stuff = '^' + stuff[1:]
438 elif stuff[0] == '^':
438 elif stuff[0] == '^':
439 stuff = '\\' + stuff
439 stuff = '\\' + stuff
440 res = '%s[%s]' % (res, stuff)
440 res = '%s[%s]' % (res, stuff)
441 elif c == '{':
441 elif c == '{':
442 group += 1
442 group += 1
443 res += '(?:'
443 res += '(?:'
444 elif c == '}' and group:
444 elif c == '}' and group:
445 res += ')'
445 res += ')'
446 group -= 1
446 group -= 1
447 elif c == ',' and group:
447 elif c == ',' and group:
448 res += '|'
448 res += '|'
449 elif c == '\\':
449 elif c == '\\':
450 p = peek()
450 p = peek()
451 if p:
451 if p:
452 i += 1
452 i += 1
453 res += escape(p)
453 res += escape(p)
454 else:
454 else:
455 res += escape(c)
455 res += escape(c)
456 else:
456 else:
457 res += escape(c)
457 res += escape(c)
458 return res
458 return res
459
459
460 def _regex(kind, pat, globsuffix):
460 def _regex(kind, pat, globsuffix):
461 '''Convert a (normalized) pattern of any kind into a regular expression.
461 '''Convert a (normalized) pattern of any kind into a regular expression.
462 globsuffix is appended to the regexp of globs.'''
462 globsuffix is appended to the regexp of globs.'''
463 if not pat:
463 if not pat:
464 return ''
464 return ''
465 if kind == 're':
465 if kind == 're':
466 return pat
466 return pat
467 if kind == 'path':
467 if kind == 'path':
468 return '^' + util.re.escape(pat) + '(?:/|$)'
468 return '^' + util.re.escape(pat) + '(?:/|$)'
469 if kind == 'relglob':
469 if kind == 'relglob':
470 return '(?:|.*/)' + _globre(pat) + globsuffix
470 return '(?:|.*/)' + _globre(pat) + globsuffix
471 if kind == 'relpath':
471 if kind == 'relpath':
472 return util.re.escape(pat) + '(?:/|$)'
472 return util.re.escape(pat) + '(?:/|$)'
473 if kind == 'relre':
473 if kind == 'relre':
474 if pat.startswith('^'):
474 if pat.startswith('^'):
475 return pat
475 return pat
476 return '.*' + pat
476 return '.*' + pat
477 return _globre(pat) + globsuffix
477 return _globre(pat) + globsuffix
478
478
479 def _buildmatch(ctx, kindpats, globsuffix, listsubrepos, root):
479 def _buildmatch(ctx, kindpats, globsuffix, listsubrepos, root):
480 '''Return regexp string and a matcher function for kindpats.
480 '''Return regexp string and a matcher function for kindpats.
481 globsuffix is appended to the regexp of globs.'''
481 globsuffix is appended to the regexp of globs.'''
482 matchfuncs = []
482 matchfuncs = []
483
483
484 fset, kindpats = _expandsets(kindpats, ctx, listsubrepos)
484 fset, kindpats = _expandsets(kindpats, ctx, listsubrepos)
485 if fset:
485 if fset:
486 matchfuncs.append(fset.__contains__)
486 matchfuncs.append(fset.__contains__)
487
487
488 regex = ''
488 regex = ''
489 if kindpats:
489 if kindpats:
490 regex, mf = _buildregexmatch(kindpats, globsuffix)
490 regex, mf = _buildregexmatch(kindpats, globsuffix)
491 matchfuncs.append(mf)
491 matchfuncs.append(mf)
492
492
493 if len(matchfuncs) == 1:
493 if len(matchfuncs) == 1:
494 return regex, matchfuncs[0]
494 return regex, matchfuncs[0]
495 else:
495 else:
496 return regex, lambda f: any(mf(f) for mf in matchfuncs)
496 return regex, lambda f: any(mf(f) for mf in matchfuncs)
497
497
498 def _buildregexmatch(kindpats, globsuffix):
498 def _buildregexmatch(kindpats, globsuffix):
499 """Build a match function from a list of kinds and kindpats,
499 """Build a match function from a list of kinds and kindpats,
500 return regexp string and a matcher function."""
500 return regexp string and a matcher function."""
501 try:
501 try:
502 regex = '(?:%s)' % '|'.join([_regex(k, p, globsuffix)
502 regex = '(?:%s)' % '|'.join([_regex(k, p, globsuffix)
503 for (k, p, s) in kindpats])
503 for (k, p, s) in kindpats])
504 if len(regex) > 20000:
504 if len(regex) > 20000:
505 raise OverflowError
505 raise OverflowError
506 return regex, _rematcher(regex)
506 return regex, _rematcher(regex)
507 except OverflowError:
507 except OverflowError:
508 # We're using a Python with a tiny regex engine and we
508 # We're using a Python with a tiny regex engine and we
509 # made it explode, so we'll divide the pattern list in two
509 # made it explode, so we'll divide the pattern list in two
510 # until it works
510 # until it works
511 l = len(kindpats)
511 l = len(kindpats)
512 if l < 2:
512 if l < 2:
513 raise
513 raise
514 regexa, a = _buildregexmatch(kindpats[:l//2], globsuffix)
514 regexa, a = _buildregexmatch(kindpats[:l//2], globsuffix)
515 regexb, b = _buildregexmatch(kindpats[l//2:], globsuffix)
515 regexb, b = _buildregexmatch(kindpats[l//2:], globsuffix)
516 return regex, lambda s: a(s) or b(s)
516 return regex, lambda s: a(s) or b(s)
517 except re.error:
517 except re.error:
518 for k, p, s in kindpats:
518 for k, p, s in kindpats:
519 try:
519 try:
520 _rematcher('(?:%s)' % _regex(k, p, globsuffix))
520 _rematcher('(?:%s)' % _regex(k, p, globsuffix))
521 except re.error:
521 except re.error:
522 if s:
522 if s:
523 raise util.Abort(_("%s: invalid pattern (%s): %s") %
523 raise util.Abort(_("%s: invalid pattern (%s): %s") %
524 (s, k, p))
524 (s, k, p))
525 else:
525 else:
526 raise util.Abort(_("invalid pattern (%s): %s") % (k, p))
526 raise util.Abort(_("invalid pattern (%s): %s") % (k, p))
527 raise util.Abort(_("invalid pattern"))
527 raise util.Abort(_("invalid pattern"))
528
528
529 def _roots(kindpats):
529 def _roots(kindpats):
530 '''return roots and exact explicitly listed files from patterns
530 '''return roots and exact explicitly listed files from patterns
531
531
532 >>> _roots([('glob', 'g/*', ''), ('glob', 'g', ''), ('glob', 'g*', '')])
532 >>> _roots([('glob', 'g/*', ''), ('glob', 'g', ''), ('glob', 'g*', '')])
533 ['g', 'g', '.']
533 ['g', 'g', '.']
534 >>> _roots([('relpath', 'r', ''), ('path', 'p/p', ''), ('path', '', '')])
534 >>> _roots([('relpath', 'r', ''), ('path', 'p/p', ''), ('path', '', '')])
535 ['r', 'p/p', '.']
535 ['r', 'p/p', '.']
536 >>> _roots([('relglob', 'rg*', ''), ('re', 're/', ''), ('relre', 'rr', '')])
536 >>> _roots([('relglob', 'rg*', ''), ('re', 're/', ''), ('relre', 'rr', '')])
537 ['.', '.', '.']
537 ['.', '.', '.']
538 '''
538 '''
539 r = []
539 r = []
540 for kind, pat, source in kindpats:
540 for kind, pat, source in kindpats:
541 if kind == 'glob': # find the non-glob prefix
541 if kind == 'glob': # find the non-glob prefix
542 root = []
542 root = []
543 for p in pat.split('/'):
543 for p in pat.split('/'):
544 if '[' in p or '{' in p or '*' in p or '?' in p:
544 if '[' in p or '{' in p or '*' in p or '?' in p:
545 break
545 break
546 root.append(p)
546 root.append(p)
547 r.append('/'.join(root) or '.')
547 r.append('/'.join(root) or '.')
548 elif kind in ('relpath', 'path'):
548 elif kind in ('relpath', 'path'):
549 r.append(pat or '.')
549 r.append(pat or '.')
550 else: # relglob, re, relre
550 else: # relglob, re, relre
551 r.append('.')
551 r.append('.')
552 return r
552 return r
553
553
554 def _anypats(kindpats):
554 def _anypats(kindpats):
555 for kind, pat, source in kindpats:
555 for kind, pat, source in kindpats:
556 if kind in ('glob', 're', 'relglob', 'relre', 'set'):
556 if kind in ('glob', 're', 'relglob', 'relre', 'set'):
557 return True
557 return True
558
558
559 _commentre = None
559 _commentre = None
560
560
561 def readpatternfile(filepath, warn):
561 def readpatternfile(filepath, warn):
562 '''parse a pattern file, returning a list of
562 '''parse a pattern file, returning a list of
563 patterns. These patterns should be given to compile()
563 patterns. These patterns should be given to compile()
564 to be validated and converted into a match function.
564 to be validated and converted into a match function.
565
565
566 trailing white space is dropped.
566 trailing white space is dropped.
567 the escape character is backslash.
567 the escape character is backslash.
568 comments start with #.
568 comments start with #.
569 empty lines are skipped.
569 empty lines are skipped.
570
570
571 lines can be of the following formats:
571 lines can be of the following formats:
572
572
573 syntax: regexp # defaults following lines to non-rooted regexps
573 syntax: regexp # defaults following lines to non-rooted regexps
574 syntax: glob # defaults following lines to non-rooted globs
574 syntax: glob # defaults following lines to non-rooted globs
575 re:pattern # non-rooted regular expression
575 re:pattern # non-rooted regular expression
576 glob:pattern # non-rooted glob
576 glob:pattern # non-rooted glob
577 pattern # pattern of the current default type'''
577 pattern # pattern of the current default type'''
578
578
579 syntaxes = {'re': 'relre:', 'regexp': 'relre:', 'glob': 'relglob:',
579 syntaxes = {'re': 'relre:', 'regexp': 'relre:', 'glob': 'relglob:',
580 'include': 'include'}
580 'include': 'include'}
581 syntax = 'relre:'
581 syntax = 'relre:'
582 patterns = []
582 patterns = []
583
583
584 fp = open(filepath)
584 fp = open(filepath)
585 for line in fp:
585 for line in fp:
586 if "#" in line:
586 if "#" in line:
587 global _commentre
587 global _commentre
588 if not _commentre:
588 if not _commentre:
589 _commentre = re.compile(r'((^|[^\\])(\\\\)*)#.*')
589 _commentre = re.compile(r'((^|[^\\])(\\\\)*)#.*')
590 # remove comments prefixed by an even number of escapes
590 # remove comments prefixed by an even number of escapes
591 line = _commentre.sub(r'\1', line)
591 line = _commentre.sub(r'\1', line)
592 # fixup properly escaped comments that survived the above
592 # fixup properly escaped comments that survived the above
593 line = line.replace("\\#", "#")
593 line = line.replace("\\#", "#")
594 line = line.rstrip()
594 line = line.rstrip()
595 if not line:
595 if not line:
596 continue
596 continue
597
597
598 if line.startswith('syntax:'):
598 if line.startswith('syntax:'):
599 s = line[7:].strip()
599 s = line[7:].strip()
600 try:
600 try:
601 syntax = syntaxes[s]
601 syntax = syntaxes[s]
602 except KeyError:
602 except KeyError:
603 if warn:
603 if warn:
604 warn(_("%s: ignoring invalid syntax '%s'\n") %
604 warn(_("%s: ignoring invalid syntax '%s'\n") %
605 (filepath, s))
605 (filepath, s))
606 continue
606 continue
607
607
608 linesyntax = syntax
608 linesyntax = syntax
609 for s, rels in syntaxes.iteritems():
609 for s, rels in syntaxes.iteritems():
610 if line.startswith(rels):
610 if line.startswith(rels):
611 linesyntax = rels
611 linesyntax = rels
612 line = line[len(rels):]
612 line = line[len(rels):]
613 break
613 break
614 elif line.startswith(s+':'):
614 elif line.startswith(s+':'):
615 linesyntax = rels
615 linesyntax = rels
616 line = line[len(s) + 1:]
616 line = line[len(s) + 1:]
617 break
617 break
618 patterns.append(linesyntax + line)
618 patterns.append(linesyntax + line)
619 fp.close()
619 fp.close()
620 return patterns
620 return patterns
General Comments 0
You need to be logged in to leave comments. Login now