##// END OF EJS Templates
match: add root to _buildmatch...
Durham Goode -
r25238:5a55ad6e default
parent child Browse files
Show More
@@ -1,613 +1,613 b''
1 # match.py - filename matching
1 # match.py - filename matching
2 #
2 #
3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 import re
8 import re
9 import util, pathutil
9 import util, pathutil
10 from i18n import _
10 from i18n import _
11
11
12 propertycache = util.propertycache
12 propertycache = util.propertycache
13
13
14 def _rematcher(regex):
14 def _rematcher(regex):
15 '''compile the regexp with the best available regexp engine and return a
15 '''compile the regexp with the best available regexp engine and return a
16 matcher function'''
16 matcher function'''
17 m = util.re.compile(regex)
17 m = util.re.compile(regex)
18 try:
18 try:
19 # slightly faster, provided by facebook's re2 bindings
19 # slightly faster, provided by facebook's re2 bindings
20 return m.test_match
20 return m.test_match
21 except AttributeError:
21 except AttributeError:
22 return m.match
22 return m.match
23
23
24 def _expandsets(kindpats, ctx, listsubrepos):
24 def _expandsets(kindpats, ctx, listsubrepos):
25 '''Returns the kindpats list with the 'set' patterns expanded.'''
25 '''Returns the kindpats list with the 'set' patterns expanded.'''
26 fset = set()
26 fset = set()
27 other = []
27 other = []
28
28
29 for kind, pat, source in kindpats:
29 for kind, pat, source in kindpats:
30 if kind == 'set':
30 if kind == 'set':
31 if not ctx:
31 if not ctx:
32 raise util.Abort("fileset expression with no context")
32 raise util.Abort("fileset expression with no context")
33 s = ctx.getfileset(pat)
33 s = ctx.getfileset(pat)
34 fset.update(s)
34 fset.update(s)
35
35
36 if listsubrepos:
36 if listsubrepos:
37 for subpath in ctx.substate:
37 for subpath in ctx.substate:
38 s = ctx.sub(subpath).getfileset(pat)
38 s = ctx.sub(subpath).getfileset(pat)
39 fset.update(subpath + '/' + f for f in s)
39 fset.update(subpath + '/' + f for f in s)
40
40
41 continue
41 continue
42 other.append((kind, pat, source))
42 other.append((kind, pat, source))
43 return fset, other
43 return fset, other
44
44
45 def _kindpatsalwaysmatch(kindpats):
45 def _kindpatsalwaysmatch(kindpats):
46 """"Checks whether the kindspats match everything, as e.g.
46 """"Checks whether the kindspats match everything, as e.g.
47 'relpath:.' does.
47 'relpath:.' does.
48 """
48 """
49 for kind, pat, source in kindpats:
49 for kind, pat, source in kindpats:
50 if pat != '' or kind not in ['relpath', 'glob']:
50 if pat != '' or kind not in ['relpath', 'glob']:
51 return False
51 return False
52 return True
52 return True
53
53
54 class match(object):
54 class match(object):
55 def __init__(self, root, cwd, patterns, include=[], exclude=[],
55 def __init__(self, root, cwd, patterns, include=[], exclude=[],
56 default='glob', exact=False, auditor=None, ctx=None,
56 default='glob', exact=False, auditor=None, ctx=None,
57 listsubrepos=False, warn=None):
57 listsubrepos=False, warn=None):
58 """build an object to match a set of file patterns
58 """build an object to match a set of file patterns
59
59
60 arguments:
60 arguments:
61 root - the canonical root of the tree you're matching against
61 root - the canonical root of the tree you're matching against
62 cwd - the current working directory, if relevant
62 cwd - the current working directory, if relevant
63 patterns - patterns to find
63 patterns - patterns to find
64 include - patterns to include (unless they are excluded)
64 include - patterns to include (unless they are excluded)
65 exclude - patterns to exclude (even if they are included)
65 exclude - patterns to exclude (even if they are included)
66 default - if a pattern in patterns has no explicit type, assume this one
66 default - if a pattern in patterns has no explicit type, assume this one
67 exact - patterns are actually filenames (include/exclude still apply)
67 exact - patterns are actually filenames (include/exclude still apply)
68 warn - optional function used for printing warnings
68 warn - optional function used for printing warnings
69
69
70 a pattern is one of:
70 a pattern is one of:
71 'glob:<glob>' - a glob relative to cwd
71 'glob:<glob>' - a glob relative to cwd
72 're:<regexp>' - a regular expression
72 're:<regexp>' - a regular expression
73 'path:<path>' - a path relative to repository root
73 'path:<path>' - a path relative to repository root
74 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
74 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
75 'relpath:<path>' - a path relative to cwd
75 'relpath:<path>' - a path relative to cwd
76 'relre:<regexp>' - a regexp that needn't match the start of a name
76 'relre:<regexp>' - a regexp that needn't match the start of a name
77 'set:<fileset>' - a fileset expression
77 'set:<fileset>' - a fileset expression
78 'include:<path>' - a file of patterns to read and include
78 'include:<path>' - a file of patterns to read and include
79 '<something>' - a pattern of the specified default type
79 '<something>' - a pattern of the specified default type
80 """
80 """
81
81
82 self._root = root
82 self._root = root
83 self._cwd = cwd
83 self._cwd = cwd
84 self._files = [] # exact files and roots of patterns
84 self._files = [] # exact files and roots of patterns
85 self._anypats = bool(include or exclude)
85 self._anypats = bool(include or exclude)
86 self._always = False
86 self._always = False
87 self._pathrestricted = bool(include or exclude or patterns)
87 self._pathrestricted = bool(include or exclude or patterns)
88 self._warn = warn
88 self._warn = warn
89 self._includeroots = set()
89 self._includeroots = set()
90 self._includedirs = set(['.'])
90 self._includedirs = set(['.'])
91 self._excluderoots = set()
91 self._excluderoots = set()
92
92
93 matchfns = []
93 matchfns = []
94 if include:
94 if include:
95 kindpats = self._normalize(include, 'glob', root, cwd, auditor)
95 kindpats = self._normalize(include, 'glob', root, cwd, auditor)
96 self.includepat, im = _buildmatch(ctx, kindpats, '(?:/|$)',
96 self.includepat, im = _buildmatch(ctx, kindpats, '(?:/|$)',
97 listsubrepos)
97 listsubrepos, root)
98 self._includeroots.update(_roots(kindpats))
98 self._includeroots.update(_roots(kindpats))
99 self._includeroots.discard('.')
99 self._includeroots.discard('.')
100 self._includedirs.update(util.dirs(self._includeroots))
100 self._includedirs.update(util.dirs(self._includeroots))
101 matchfns.append(im)
101 matchfns.append(im)
102 if exclude:
102 if exclude:
103 kindpats = self._normalize(exclude, 'glob', root, cwd, auditor)
103 kindpats = self._normalize(exclude, 'glob', root, cwd, auditor)
104 self.excludepat, em = _buildmatch(ctx, kindpats, '(?:/|$)',
104 self.excludepat, em = _buildmatch(ctx, kindpats, '(?:/|$)',
105 listsubrepos)
105 listsubrepos, root)
106 self._excluderoots.update(_roots(kindpats))
106 self._excluderoots.update(_roots(kindpats))
107 self._excluderoots.discard('.')
107 self._excluderoots.discard('.')
108 matchfns.append(lambda f: not em(f))
108 matchfns.append(lambda f: not em(f))
109 if exact:
109 if exact:
110 if isinstance(patterns, list):
110 if isinstance(patterns, list):
111 self._files = patterns
111 self._files = patterns
112 else:
112 else:
113 self._files = list(patterns)
113 self._files = list(patterns)
114 matchfns.append(self.exact)
114 matchfns.append(self.exact)
115 elif patterns:
115 elif patterns:
116 kindpats = self._normalize(patterns, default, root, cwd, auditor)
116 kindpats = self._normalize(patterns, default, root, cwd, auditor)
117 if not _kindpatsalwaysmatch(kindpats):
117 if not _kindpatsalwaysmatch(kindpats):
118 self._files = _roots(kindpats)
118 self._files = _roots(kindpats)
119 self._anypats = self._anypats or _anypats(kindpats)
119 self._anypats = self._anypats or _anypats(kindpats)
120 self.patternspat, pm = _buildmatch(ctx, kindpats, '$',
120 self.patternspat, pm = _buildmatch(ctx, kindpats, '$',
121 listsubrepos)
121 listsubrepos, root)
122 matchfns.append(pm)
122 matchfns.append(pm)
123
123
124 if not matchfns:
124 if not matchfns:
125 m = util.always
125 m = util.always
126 self._always = True
126 self._always = True
127 elif len(matchfns) == 1:
127 elif len(matchfns) == 1:
128 m = matchfns[0]
128 m = matchfns[0]
129 else:
129 else:
130 def m(f):
130 def m(f):
131 for matchfn in matchfns:
131 for matchfn in matchfns:
132 if not matchfn(f):
132 if not matchfn(f):
133 return False
133 return False
134 return True
134 return True
135
135
136 self.matchfn = m
136 self.matchfn = m
137 self._fileroots = set(self._files)
137 self._fileroots = set(self._files)
138
138
139 def __call__(self, fn):
139 def __call__(self, fn):
140 return self.matchfn(fn)
140 return self.matchfn(fn)
141 def __iter__(self):
141 def __iter__(self):
142 for f in self._files:
142 for f in self._files:
143 yield f
143 yield f
144
144
145 # Callbacks related to how the matcher is used by dirstate.walk.
145 # Callbacks related to how the matcher is used by dirstate.walk.
146 # Subscribers to these events must monkeypatch the matcher object.
146 # Subscribers to these events must monkeypatch the matcher object.
147 def bad(self, f, msg):
147 def bad(self, f, msg):
148 '''Callback from dirstate.walk for each explicit file that can't be
148 '''Callback from dirstate.walk for each explicit file that can't be
149 found/accessed, with an error message.'''
149 found/accessed, with an error message.'''
150 pass
150 pass
151
151
152 # If an explicitdir is set, it will be called when an explicitly listed
152 # If an explicitdir is set, it will be called when an explicitly listed
153 # directory is visited.
153 # directory is visited.
154 explicitdir = None
154 explicitdir = None
155
155
156 # If an traversedir is set, it will be called when a directory discovered
156 # If an traversedir is set, it will be called when a directory discovered
157 # by recursive traversal is visited.
157 # by recursive traversal is visited.
158 traversedir = None
158 traversedir = None
159
159
160 def abs(self, f):
160 def abs(self, f):
161 '''Convert a repo path back to path that is relative to the root of the
161 '''Convert a repo path back to path that is relative to the root of the
162 matcher.'''
162 matcher.'''
163 return f
163 return f
164
164
165 def rel(self, f):
165 def rel(self, f):
166 '''Convert repo path back to path that is relative to cwd of matcher.'''
166 '''Convert repo path back to path that is relative to cwd of matcher.'''
167 return util.pathto(self._root, self._cwd, f)
167 return util.pathto(self._root, self._cwd, f)
168
168
169 def uipath(self, f):
169 def uipath(self, f):
170 '''Convert repo path to a display path. If patterns or -I/-X were used
170 '''Convert repo path to a display path. If patterns or -I/-X were used
171 to create this matcher, the display path will be relative to cwd.
171 to create this matcher, the display path will be relative to cwd.
172 Otherwise it is relative to the root of the repo.'''
172 Otherwise it is relative to the root of the repo.'''
173 return (self._pathrestricted and self.rel(f)) or self.abs(f)
173 return (self._pathrestricted and self.rel(f)) or self.abs(f)
174
174
175 def files(self):
175 def files(self):
176 '''Explicitly listed files or patterns or roots:
176 '''Explicitly listed files or patterns or roots:
177 if no patterns or .always(): empty list,
177 if no patterns or .always(): empty list,
178 if exact: list exact files,
178 if exact: list exact files,
179 if not .anypats(): list all files and dirs,
179 if not .anypats(): list all files and dirs,
180 else: optimal roots'''
180 else: optimal roots'''
181 return self._files
181 return self._files
182
182
183 @propertycache
183 @propertycache
184 def _dirs(self):
184 def _dirs(self):
185 return set(util.dirs(self._fileroots)) | set(['.'])
185 return set(util.dirs(self._fileroots)) | set(['.'])
186
186
187 def visitdir(self, dir):
187 def visitdir(self, dir):
188 '''Decides whether a directory should be visited based on whether it
188 '''Decides whether a directory should be visited based on whether it
189 has potential matches in it or one of its subdirectories. This is
189 has potential matches in it or one of its subdirectories. This is
190 based on the match's primary, included, and excluded patterns.
190 based on the match's primary, included, and excluded patterns.
191
191
192 This function's behavior is undefined if it has returned False for
192 This function's behavior is undefined if it has returned False for
193 one of the dir's parent directories.
193 one of the dir's parent directories.
194 '''
194 '''
195 if dir in self._excluderoots:
195 if dir in self._excluderoots:
196 return False
196 return False
197 parentdirs = None
197 parentdirs = None
198 if (self._includeroots and dir not in self._includeroots and
198 if (self._includeroots and dir not in self._includeroots and
199 dir not in self._includedirs):
199 dir not in self._includedirs):
200 parentdirs = util.finddirs(dir)
200 parentdirs = util.finddirs(dir)
201 if not any(parent in self._includeroots for parent in parentdirs):
201 if not any(parent in self._includeroots for parent in parentdirs):
202 return False
202 return False
203 return (not self._fileroots or '.' in self._fileroots or
203 return (not self._fileroots or '.' in self._fileroots or
204 dir in self._fileroots or dir in self._dirs or
204 dir in self._fileroots or dir in self._dirs or
205 any(parentdir in self._fileroots
205 any(parentdir in self._fileroots
206 for parentdir in parentdirs or util.finddirs(dir)))
206 for parentdir in parentdirs or util.finddirs(dir)))
207
207
208 def exact(self, f):
208 def exact(self, f):
209 '''Returns True if f is in .files().'''
209 '''Returns True if f is in .files().'''
210 return f in self._fileroots
210 return f in self._fileroots
211
211
212 def anypats(self):
212 def anypats(self):
213 '''Matcher uses patterns or include/exclude.'''
213 '''Matcher uses patterns or include/exclude.'''
214 return self._anypats
214 return self._anypats
215
215
216 def always(self):
216 def always(self):
217 '''Matcher will match everything and .files() will be empty
217 '''Matcher will match everything and .files() will be empty
218 - optimization might be possible and necessary.'''
218 - optimization might be possible and necessary.'''
219 return self._always
219 return self._always
220
220
221 def ispartial(self):
221 def ispartial(self):
222 '''True if the matcher won't always match.
222 '''True if the matcher won't always match.
223
223
224 Although it's just the inverse of _always in this implementation,
224 Although it's just the inverse of _always in this implementation,
225 an extenion such as narrowhg might make it return something
225 an extenion such as narrowhg might make it return something
226 slightly different.'''
226 slightly different.'''
227 return not self._always
227 return not self._always
228
228
229 def isexact(self):
229 def isexact(self):
230 return self.matchfn == self.exact
230 return self.matchfn == self.exact
231
231
232 def prefix(self):
232 def prefix(self):
233 return not self.always() and not self.isexact() and not self.anypats()
233 return not self.always() and not self.isexact() and not self.anypats()
234
234
235 def _normalize(self, patterns, default, root, cwd, auditor):
235 def _normalize(self, patterns, default, root, cwd, auditor):
236 '''Convert 'kind:pat' from the patterns list to tuples with kind and
236 '''Convert 'kind:pat' from the patterns list to tuples with kind and
237 normalized and rooted patterns and with listfiles expanded.'''
237 normalized and rooted patterns and with listfiles expanded.'''
238 kindpats = []
238 kindpats = []
239 for kind, pat in [_patsplit(p, default) for p in patterns]:
239 for kind, pat in [_patsplit(p, default) for p in patterns]:
240 if kind in ('glob', 'relpath'):
240 if kind in ('glob', 'relpath'):
241 pat = pathutil.canonpath(root, cwd, pat, auditor)
241 pat = pathutil.canonpath(root, cwd, pat, auditor)
242 elif kind in ('relglob', 'path'):
242 elif kind in ('relglob', 'path'):
243 pat = util.normpath(pat)
243 pat = util.normpath(pat)
244 elif kind in ('listfile', 'listfile0'):
244 elif kind in ('listfile', 'listfile0'):
245 try:
245 try:
246 files = util.readfile(pat)
246 files = util.readfile(pat)
247 if kind == 'listfile0':
247 if kind == 'listfile0':
248 files = files.split('\0')
248 files = files.split('\0')
249 else:
249 else:
250 files = files.splitlines()
250 files = files.splitlines()
251 files = [f for f in files if f]
251 files = [f for f in files if f]
252 except EnvironmentError:
252 except EnvironmentError:
253 raise util.Abort(_("unable to read file list (%s)") % pat)
253 raise util.Abort(_("unable to read file list (%s)") % pat)
254 for k, p, source in self._normalize(files, default, root, cwd,
254 for k, p, source in self._normalize(files, default, root, cwd,
255 auditor):
255 auditor):
256 kindpats.append((k, p, pat))
256 kindpats.append((k, p, pat))
257 continue
257 continue
258 elif kind == 'include':
258 elif kind == 'include':
259 try:
259 try:
260 includepats = readpatternfile(pat, self._warn)
260 includepats = readpatternfile(pat, self._warn)
261 for k, p, source in self._normalize(includepats, default,
261 for k, p, source in self._normalize(includepats, default,
262 root, cwd, auditor):
262 root, cwd, auditor):
263 kindpats.append((k, p, source or pat))
263 kindpats.append((k, p, source or pat))
264 except util.Abort, inst:
264 except util.Abort, inst:
265 raise util.Abort('%s: %s' % (pat, inst[0]))
265 raise util.Abort('%s: %s' % (pat, inst[0]))
266 except IOError, inst:
266 except IOError, inst:
267 if self._warn:
267 if self._warn:
268 self._warn(_("skipping unreadable pattern file "
268 self._warn(_("skipping unreadable pattern file "
269 "'%s': %s\n") % (pat, inst.strerror))
269 "'%s': %s\n") % (pat, inst.strerror))
270 continue
270 continue
271 # else: re or relre - which cannot be normalized
271 # else: re or relre - which cannot be normalized
272 kindpats.append((kind, pat, ''))
272 kindpats.append((kind, pat, ''))
273 return kindpats
273 return kindpats
274
274
275 def exact(root, cwd, files):
275 def exact(root, cwd, files):
276 return match(root, cwd, files, exact=True)
276 return match(root, cwd, files, exact=True)
277
277
278 def always(root, cwd):
278 def always(root, cwd):
279 return match(root, cwd, [])
279 return match(root, cwd, [])
280
280
281 class narrowmatcher(match):
281 class narrowmatcher(match):
282 """Adapt a matcher to work on a subdirectory only.
282 """Adapt a matcher to work on a subdirectory only.
283
283
284 The paths are remapped to remove/insert the path as needed:
284 The paths are remapped to remove/insert the path as needed:
285
285
286 >>> m1 = match('root', '', ['a.txt', 'sub/b.txt'])
286 >>> m1 = match('root', '', ['a.txt', 'sub/b.txt'])
287 >>> m2 = narrowmatcher('sub', m1)
287 >>> m2 = narrowmatcher('sub', m1)
288 >>> bool(m2('a.txt'))
288 >>> bool(m2('a.txt'))
289 False
289 False
290 >>> bool(m2('b.txt'))
290 >>> bool(m2('b.txt'))
291 True
291 True
292 >>> bool(m2.matchfn('a.txt'))
292 >>> bool(m2.matchfn('a.txt'))
293 False
293 False
294 >>> bool(m2.matchfn('b.txt'))
294 >>> bool(m2.matchfn('b.txt'))
295 True
295 True
296 >>> m2.files()
296 >>> m2.files()
297 ['b.txt']
297 ['b.txt']
298 >>> m2.exact('b.txt')
298 >>> m2.exact('b.txt')
299 True
299 True
300 >>> util.pconvert(m2.rel('b.txt'))
300 >>> util.pconvert(m2.rel('b.txt'))
301 'sub/b.txt'
301 'sub/b.txt'
302 >>> def bad(f, msg):
302 >>> def bad(f, msg):
303 ... print "%s: %s" % (f, msg)
303 ... print "%s: %s" % (f, msg)
304 >>> m1.bad = bad
304 >>> m1.bad = bad
305 >>> m2.bad('x.txt', 'No such file')
305 >>> m2.bad('x.txt', 'No such file')
306 sub/x.txt: No such file
306 sub/x.txt: No such file
307 >>> m2.abs('c.txt')
307 >>> m2.abs('c.txt')
308 'sub/c.txt'
308 'sub/c.txt'
309 """
309 """
310
310
311 def __init__(self, path, matcher):
311 def __init__(self, path, matcher):
312 self._root = matcher._root
312 self._root = matcher._root
313 self._cwd = matcher._cwd
313 self._cwd = matcher._cwd
314 self._path = path
314 self._path = path
315 self._matcher = matcher
315 self._matcher = matcher
316 self._always = matcher._always
316 self._always = matcher._always
317 self._pathrestricted = matcher._pathrestricted
317 self._pathrestricted = matcher._pathrestricted
318
318
319 self._files = [f[len(path) + 1:] for f in matcher._files
319 self._files = [f[len(path) + 1:] for f in matcher._files
320 if f.startswith(path + "/")]
320 if f.startswith(path + "/")]
321
321
322 # If the parent repo had a path to this subrepo and no patterns are
322 # If the parent repo had a path to this subrepo and no patterns are
323 # specified, this submatcher always matches.
323 # specified, this submatcher always matches.
324 if not self._always and not matcher._anypats:
324 if not self._always and not matcher._anypats:
325 self._always = any(f == path for f in matcher._files)
325 self._always = any(f == path for f in matcher._files)
326
326
327 self._anypats = matcher._anypats
327 self._anypats = matcher._anypats
328 self.matchfn = lambda fn: matcher.matchfn(self._path + "/" + fn)
328 self.matchfn = lambda fn: matcher.matchfn(self._path + "/" + fn)
329 self._fileroots = set(self._files)
329 self._fileroots = set(self._files)
330
330
331 def abs(self, f):
331 def abs(self, f):
332 return self._matcher.abs(self._path + "/" + f)
332 return self._matcher.abs(self._path + "/" + f)
333
333
334 def bad(self, f, msg):
334 def bad(self, f, msg):
335 self._matcher.bad(self._path + "/" + f, msg)
335 self._matcher.bad(self._path + "/" + f, msg)
336
336
337 def rel(self, f):
337 def rel(self, f):
338 return self._matcher.rel(self._path + "/" + f)
338 return self._matcher.rel(self._path + "/" + f)
339
339
340 class icasefsmatcher(match):
340 class icasefsmatcher(match):
341 """A matcher for wdir on case insensitive filesystems, which normalizes the
341 """A matcher for wdir on case insensitive filesystems, which normalizes the
342 given patterns to the case in the filesystem.
342 given patterns to the case in the filesystem.
343 """
343 """
344
344
345 def __init__(self, root, cwd, patterns, include, exclude, default, auditor,
345 def __init__(self, root, cwd, patterns, include, exclude, default, auditor,
346 ctx, listsubrepos=False):
346 ctx, listsubrepos=False):
347 init = super(icasefsmatcher, self).__init__
347 init = super(icasefsmatcher, self).__init__
348 self._dsnormalize = ctx.repo().dirstate.normalize
348 self._dsnormalize = ctx.repo().dirstate.normalize
349
349
350 init(root, cwd, patterns, include, exclude, default, auditor=auditor,
350 init(root, cwd, patterns, include, exclude, default, auditor=auditor,
351 ctx=ctx, listsubrepos=listsubrepos)
351 ctx=ctx, listsubrepos=listsubrepos)
352
352
353 # m.exact(file) must be based off of the actual user input, otherwise
353 # m.exact(file) must be based off of the actual user input, otherwise
354 # inexact case matches are treated as exact, and not noted without -v.
354 # inexact case matches are treated as exact, and not noted without -v.
355 if self._files:
355 if self._files:
356 self._fileroots = set(_roots(self._kp))
356 self._fileroots = set(_roots(self._kp))
357
357
358 def _normalize(self, patterns, default, root, cwd, auditor):
358 def _normalize(self, patterns, default, root, cwd, auditor):
359 self._kp = super(icasefsmatcher, self)._normalize(patterns, default,
359 self._kp = super(icasefsmatcher, self)._normalize(patterns, default,
360 root, cwd, auditor)
360 root, cwd, auditor)
361 kindpats = []
361 kindpats = []
362 for kind, pats, source in self._kp:
362 for kind, pats, source in self._kp:
363 if kind not in ('re', 'relre'): # regex can't be normalized
363 if kind not in ('re', 'relre'): # regex can't be normalized
364 pats = self._dsnormalize(pats)
364 pats = self._dsnormalize(pats)
365 kindpats.append((kind, pats, source))
365 kindpats.append((kind, pats, source))
366 return kindpats
366 return kindpats
367
367
368 def patkind(pattern, default=None):
368 def patkind(pattern, default=None):
369 '''If pattern is 'kind:pat' with a known kind, return kind.'''
369 '''If pattern is 'kind:pat' with a known kind, return kind.'''
370 return _patsplit(pattern, default)[0]
370 return _patsplit(pattern, default)[0]
371
371
372 def _patsplit(pattern, default):
372 def _patsplit(pattern, default):
373 """Split a string into the optional pattern kind prefix and the actual
373 """Split a string into the optional pattern kind prefix and the actual
374 pattern."""
374 pattern."""
375 if ':' in pattern:
375 if ':' in pattern:
376 kind, pat = pattern.split(':', 1)
376 kind, pat = pattern.split(':', 1)
377 if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
377 if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
378 'listfile', 'listfile0', 'set', 'include'):
378 'listfile', 'listfile0', 'set', 'include'):
379 return kind, pat
379 return kind, pat
380 return default, pattern
380 return default, pattern
381
381
382 def _globre(pat):
382 def _globre(pat):
383 r'''Convert an extended glob string to a regexp string.
383 r'''Convert an extended glob string to a regexp string.
384
384
385 >>> print _globre(r'?')
385 >>> print _globre(r'?')
386 .
386 .
387 >>> print _globre(r'*')
387 >>> print _globre(r'*')
388 [^/]*
388 [^/]*
389 >>> print _globre(r'**')
389 >>> print _globre(r'**')
390 .*
390 .*
391 >>> print _globre(r'**/a')
391 >>> print _globre(r'**/a')
392 (?:.*/)?a
392 (?:.*/)?a
393 >>> print _globre(r'a/**/b')
393 >>> print _globre(r'a/**/b')
394 a\/(?:.*/)?b
394 a\/(?:.*/)?b
395 >>> print _globre(r'[a*?!^][^b][!c]')
395 >>> print _globre(r'[a*?!^][^b][!c]')
396 [a*?!^][\^b][^c]
396 [a*?!^][\^b][^c]
397 >>> print _globre(r'{a,b}')
397 >>> print _globre(r'{a,b}')
398 (?:a|b)
398 (?:a|b)
399 >>> print _globre(r'.\*\?')
399 >>> print _globre(r'.\*\?')
400 \.\*\?
400 \.\*\?
401 '''
401 '''
402 i, n = 0, len(pat)
402 i, n = 0, len(pat)
403 res = ''
403 res = ''
404 group = 0
404 group = 0
405 escape = util.re.escape
405 escape = util.re.escape
406 def peek():
406 def peek():
407 return i < n and pat[i]
407 return i < n and pat[i]
408 while i < n:
408 while i < n:
409 c = pat[i]
409 c = pat[i]
410 i += 1
410 i += 1
411 if c not in '*?[{},\\':
411 if c not in '*?[{},\\':
412 res += escape(c)
412 res += escape(c)
413 elif c == '*':
413 elif c == '*':
414 if peek() == '*':
414 if peek() == '*':
415 i += 1
415 i += 1
416 if peek() == '/':
416 if peek() == '/':
417 i += 1
417 i += 1
418 res += '(?:.*/)?'
418 res += '(?:.*/)?'
419 else:
419 else:
420 res += '.*'
420 res += '.*'
421 else:
421 else:
422 res += '[^/]*'
422 res += '[^/]*'
423 elif c == '?':
423 elif c == '?':
424 res += '.'
424 res += '.'
425 elif c == '[':
425 elif c == '[':
426 j = i
426 j = i
427 if j < n and pat[j] in '!]':
427 if j < n and pat[j] in '!]':
428 j += 1
428 j += 1
429 while j < n and pat[j] != ']':
429 while j < n and pat[j] != ']':
430 j += 1
430 j += 1
431 if j >= n:
431 if j >= n:
432 res += '\\['
432 res += '\\['
433 else:
433 else:
434 stuff = pat[i:j].replace('\\','\\\\')
434 stuff = pat[i:j].replace('\\','\\\\')
435 i = j + 1
435 i = j + 1
436 if stuff[0] == '!':
436 if stuff[0] == '!':
437 stuff = '^' + stuff[1:]
437 stuff = '^' + stuff[1:]
438 elif stuff[0] == '^':
438 elif stuff[0] == '^':
439 stuff = '\\' + stuff
439 stuff = '\\' + stuff
440 res = '%s[%s]' % (res, stuff)
440 res = '%s[%s]' % (res, stuff)
441 elif c == '{':
441 elif c == '{':
442 group += 1
442 group += 1
443 res += '(?:'
443 res += '(?:'
444 elif c == '}' and group:
444 elif c == '}' and group:
445 res += ')'
445 res += ')'
446 group -= 1
446 group -= 1
447 elif c == ',' and group:
447 elif c == ',' and group:
448 res += '|'
448 res += '|'
449 elif c == '\\':
449 elif c == '\\':
450 p = peek()
450 p = peek()
451 if p:
451 if p:
452 i += 1
452 i += 1
453 res += escape(p)
453 res += escape(p)
454 else:
454 else:
455 res += escape(c)
455 res += escape(c)
456 else:
456 else:
457 res += escape(c)
457 res += escape(c)
458 return res
458 return res
459
459
460 def _regex(kind, pat, globsuffix):
460 def _regex(kind, pat, globsuffix):
461 '''Convert a (normalized) pattern of any kind into a regular expression.
461 '''Convert a (normalized) pattern of any kind into a regular expression.
462 globsuffix is appended to the regexp of globs.'''
462 globsuffix is appended to the regexp of globs.'''
463 if not pat:
463 if not pat:
464 return ''
464 return ''
465 if kind == 're':
465 if kind == 're':
466 return pat
466 return pat
467 if kind == 'path':
467 if kind == 'path':
468 return '^' + util.re.escape(pat) + '(?:/|$)'
468 return '^' + util.re.escape(pat) + '(?:/|$)'
469 if kind == 'relglob':
469 if kind == 'relglob':
470 return '(?:|.*/)' + _globre(pat) + globsuffix
470 return '(?:|.*/)' + _globre(pat) + globsuffix
471 if kind == 'relpath':
471 if kind == 'relpath':
472 return util.re.escape(pat) + '(?:/|$)'
472 return util.re.escape(pat) + '(?:/|$)'
473 if kind == 'relre':
473 if kind == 'relre':
474 if pat.startswith('^'):
474 if pat.startswith('^'):
475 return pat
475 return pat
476 return '.*' + pat
476 return '.*' + pat
477 return _globre(pat) + globsuffix
477 return _globre(pat) + globsuffix
478
478
479 def _buildmatch(ctx, kindpats, globsuffix, listsubrepos):
479 def _buildmatch(ctx, kindpats, globsuffix, listsubrepos, root):
480 '''Return regexp string and a matcher function for kindpats.
480 '''Return regexp string and a matcher function for kindpats.
481 globsuffix is appended to the regexp of globs.'''
481 globsuffix is appended to the regexp of globs.'''
482 fset, kindpats = _expandsets(kindpats, ctx, listsubrepos)
482 fset, kindpats = _expandsets(kindpats, ctx, listsubrepos)
483 if not kindpats:
483 if not kindpats:
484 return "", fset.__contains__
484 return "", fset.__contains__
485
485
486 regex, mf = _buildregexmatch(kindpats, globsuffix)
486 regex, mf = _buildregexmatch(kindpats, globsuffix)
487 if fset:
487 if fset:
488 return regex, lambda f: f in fset or mf(f)
488 return regex, lambda f: f in fset or mf(f)
489 return regex, mf
489 return regex, mf
490
490
491 def _buildregexmatch(kindpats, globsuffix):
491 def _buildregexmatch(kindpats, globsuffix):
492 """Build a match function from a list of kinds and kindpats,
492 """Build a match function from a list of kinds and kindpats,
493 return regexp string and a matcher function."""
493 return regexp string and a matcher function."""
494 try:
494 try:
495 regex = '(?:%s)' % '|'.join([_regex(k, p, globsuffix)
495 regex = '(?:%s)' % '|'.join([_regex(k, p, globsuffix)
496 for (k, p, s) in kindpats])
496 for (k, p, s) in kindpats])
497 if len(regex) > 20000:
497 if len(regex) > 20000:
498 raise OverflowError
498 raise OverflowError
499 return regex, _rematcher(regex)
499 return regex, _rematcher(regex)
500 except OverflowError:
500 except OverflowError:
501 # We're using a Python with a tiny regex engine and we
501 # We're using a Python with a tiny regex engine and we
502 # made it explode, so we'll divide the pattern list in two
502 # made it explode, so we'll divide the pattern list in two
503 # until it works
503 # until it works
504 l = len(kindpats)
504 l = len(kindpats)
505 if l < 2:
505 if l < 2:
506 raise
506 raise
507 regexa, a = _buildregexmatch(kindpats[:l//2], globsuffix)
507 regexa, a = _buildregexmatch(kindpats[:l//2], globsuffix)
508 regexb, b = _buildregexmatch(kindpats[l//2:], globsuffix)
508 regexb, b = _buildregexmatch(kindpats[l//2:], globsuffix)
509 return regex, lambda s: a(s) or b(s)
509 return regex, lambda s: a(s) or b(s)
510 except re.error:
510 except re.error:
511 for k, p, s in kindpats:
511 for k, p, s in kindpats:
512 try:
512 try:
513 _rematcher('(?:%s)' % _regex(k, p, globsuffix))
513 _rematcher('(?:%s)' % _regex(k, p, globsuffix))
514 except re.error:
514 except re.error:
515 if s:
515 if s:
516 raise util.Abort(_("%s: invalid pattern (%s): %s") %
516 raise util.Abort(_("%s: invalid pattern (%s): %s") %
517 (s, k, p))
517 (s, k, p))
518 else:
518 else:
519 raise util.Abort(_("invalid pattern (%s): %s") % (k, p))
519 raise util.Abort(_("invalid pattern (%s): %s") % (k, p))
520 raise util.Abort(_("invalid pattern"))
520 raise util.Abort(_("invalid pattern"))
521
521
522 def _roots(kindpats):
522 def _roots(kindpats):
523 '''return roots and exact explicitly listed files from patterns
523 '''return roots and exact explicitly listed files from patterns
524
524
525 >>> _roots([('glob', 'g/*', ''), ('glob', 'g', ''), ('glob', 'g*', '')])
525 >>> _roots([('glob', 'g/*', ''), ('glob', 'g', ''), ('glob', 'g*', '')])
526 ['g', 'g', '.']
526 ['g', 'g', '.']
527 >>> _roots([('relpath', 'r', ''), ('path', 'p/p', ''), ('path', '', '')])
527 >>> _roots([('relpath', 'r', ''), ('path', 'p/p', ''), ('path', '', '')])
528 ['r', 'p/p', '.']
528 ['r', 'p/p', '.']
529 >>> _roots([('relglob', 'rg*', ''), ('re', 're/', ''), ('relre', 'rr', '')])
529 >>> _roots([('relglob', 'rg*', ''), ('re', 're/', ''), ('relre', 'rr', '')])
530 ['.', '.', '.']
530 ['.', '.', '.']
531 '''
531 '''
532 r = []
532 r = []
533 for kind, pat, source in kindpats:
533 for kind, pat, source in kindpats:
534 if kind == 'glob': # find the non-glob prefix
534 if kind == 'glob': # find the non-glob prefix
535 root = []
535 root = []
536 for p in pat.split('/'):
536 for p in pat.split('/'):
537 if '[' in p or '{' in p or '*' in p or '?' in p:
537 if '[' in p or '{' in p or '*' in p or '?' in p:
538 break
538 break
539 root.append(p)
539 root.append(p)
540 r.append('/'.join(root) or '.')
540 r.append('/'.join(root) or '.')
541 elif kind in ('relpath', 'path'):
541 elif kind in ('relpath', 'path'):
542 r.append(pat or '.')
542 r.append(pat or '.')
543 else: # relglob, re, relre
543 else: # relglob, re, relre
544 r.append('.')
544 r.append('.')
545 return r
545 return r
546
546
547 def _anypats(kindpats):
547 def _anypats(kindpats):
548 for kind, pat, source in kindpats:
548 for kind, pat, source in kindpats:
549 if kind in ('glob', 're', 'relglob', 'relre', 'set'):
549 if kind in ('glob', 're', 'relglob', 'relre', 'set'):
550 return True
550 return True
551
551
552 _commentre = None
552 _commentre = None
553
553
554 def readpatternfile(filepath, warn):
554 def readpatternfile(filepath, warn):
555 '''parse a pattern file, returning a list of
555 '''parse a pattern file, returning a list of
556 patterns. These patterns should be given to compile()
556 patterns. These patterns should be given to compile()
557 to be validated and converted into a match function.
557 to be validated and converted into a match function.
558
558
559 trailing white space is dropped.
559 trailing white space is dropped.
560 the escape character is backslash.
560 the escape character is backslash.
561 comments start with #.
561 comments start with #.
562 empty lines are skipped.
562 empty lines are skipped.
563
563
564 lines can be of the following formats:
564 lines can be of the following formats:
565
565
566 syntax: regexp # defaults following lines to non-rooted regexps
566 syntax: regexp # defaults following lines to non-rooted regexps
567 syntax: glob # defaults following lines to non-rooted globs
567 syntax: glob # defaults following lines to non-rooted globs
568 re:pattern # non-rooted regular expression
568 re:pattern # non-rooted regular expression
569 glob:pattern # non-rooted glob
569 glob:pattern # non-rooted glob
570 pattern # pattern of the current default type'''
570 pattern # pattern of the current default type'''
571
571
572 syntaxes = {'re': 'relre:', 'regexp': 'relre:', 'glob': 'relglob:',
572 syntaxes = {'re': 'relre:', 'regexp': 'relre:', 'glob': 'relglob:',
573 'include': 'include'}
573 'include': 'include'}
574 syntax = 'relre:'
574 syntax = 'relre:'
575 patterns = []
575 patterns = []
576
576
577 fp = open(filepath)
577 fp = open(filepath)
578 for line in fp:
578 for line in fp:
579 if "#" in line:
579 if "#" in line:
580 global _commentre
580 global _commentre
581 if not _commentre:
581 if not _commentre:
582 _commentre = re.compile(r'((^|[^\\])(\\\\)*)#.*')
582 _commentre = re.compile(r'((^|[^\\])(\\\\)*)#.*')
583 # remove comments prefixed by an even number of escapes
583 # remove comments prefixed by an even number of escapes
584 line = _commentre.sub(r'\1', line)
584 line = _commentre.sub(r'\1', line)
585 # fixup properly escaped comments that survived the above
585 # fixup properly escaped comments that survived the above
586 line = line.replace("\\#", "#")
586 line = line.replace("\\#", "#")
587 line = line.rstrip()
587 line = line.rstrip()
588 if not line:
588 if not line:
589 continue
589 continue
590
590
591 if line.startswith('syntax:'):
591 if line.startswith('syntax:'):
592 s = line[7:].strip()
592 s = line[7:].strip()
593 try:
593 try:
594 syntax = syntaxes[s]
594 syntax = syntaxes[s]
595 except KeyError:
595 except KeyError:
596 if warn:
596 if warn:
597 warn(_("%s: ignoring invalid syntax '%s'\n") %
597 warn(_("%s: ignoring invalid syntax '%s'\n") %
598 (filepath, s))
598 (filepath, s))
599 continue
599 continue
600
600
601 linesyntax = syntax
601 linesyntax = syntax
602 for s, rels in syntaxes.iteritems():
602 for s, rels in syntaxes.iteritems():
603 if line.startswith(rels):
603 if line.startswith(rels):
604 linesyntax = rels
604 linesyntax = rels
605 line = line[len(rels):]
605 line = line[len(rels):]
606 break
606 break
607 elif line.startswith(s+':'):
607 elif line.startswith(s+':'):
608 linesyntax = rels
608 linesyntax = rels
609 line = line[len(s) + 1:]
609 line = line[len(s) + 1:]
610 break
610 break
611 patterns.append(linesyntax + line)
611 patterns.append(linesyntax + line)
612 fp.close()
612 fp.close()
613 return patterns
613 return patterns
General Comments 0
You need to be logged in to leave comments. Login now