##// END OF EJS Templates
match: break boolean expressions into one operand per line...
Martin von Zweigbergk -
r25576:d02f4b3e default
parent child Browse files
Show More
@@ -1,667 +1,670 b''
1 # match.py - filename matching
1 # match.py - filename matching
2 #
2 #
3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 import copy, re
8 import copy, re
9 import util, pathutil
9 import util, pathutil
10 from i18n import _
10 from i18n import _
11
11
12 propertycache = util.propertycache
12 propertycache = util.propertycache
13
13
14 def _rematcher(regex):
14 def _rematcher(regex):
15 '''compile the regexp with the best available regexp engine and return a
15 '''compile the regexp with the best available regexp engine and return a
16 matcher function'''
16 matcher function'''
17 m = util.re.compile(regex)
17 m = util.re.compile(regex)
18 try:
18 try:
19 # slightly faster, provided by facebook's re2 bindings
19 # slightly faster, provided by facebook's re2 bindings
20 return m.test_match
20 return m.test_match
21 except AttributeError:
21 except AttributeError:
22 return m.match
22 return m.match
23
23
24 def _expandsets(kindpats, ctx, listsubrepos):
24 def _expandsets(kindpats, ctx, listsubrepos):
25 '''Returns the kindpats list with the 'set' patterns expanded.'''
25 '''Returns the kindpats list with the 'set' patterns expanded.'''
26 fset = set()
26 fset = set()
27 other = []
27 other = []
28
28
29 for kind, pat, source in kindpats:
29 for kind, pat, source in kindpats:
30 if kind == 'set':
30 if kind == 'set':
31 if not ctx:
31 if not ctx:
32 raise util.Abort("fileset expression with no context")
32 raise util.Abort("fileset expression with no context")
33 s = ctx.getfileset(pat)
33 s = ctx.getfileset(pat)
34 fset.update(s)
34 fset.update(s)
35
35
36 if listsubrepos:
36 if listsubrepos:
37 for subpath in ctx.substate:
37 for subpath in ctx.substate:
38 s = ctx.sub(subpath).getfileset(pat)
38 s = ctx.sub(subpath).getfileset(pat)
39 fset.update(subpath + '/' + f for f in s)
39 fset.update(subpath + '/' + f for f in s)
40
40
41 continue
41 continue
42 other.append((kind, pat, source))
42 other.append((kind, pat, source))
43 return fset, other
43 return fset, other
44
44
45 def _expandsubinclude(kindpats, root):
45 def _expandsubinclude(kindpats, root):
46 '''Returns the list of subinclude matchers and the kindpats without the
46 '''Returns the list of subinclude matchers and the kindpats without the
47 subincludes in it.'''
47 subincludes in it.'''
48 relmatchers = []
48 relmatchers = []
49 other = []
49 other = []
50
50
51 for kind, pat, source in kindpats:
51 for kind, pat, source in kindpats:
52 if kind == 'subinclude':
52 if kind == 'subinclude':
53 sourceroot = pathutil.dirname(util.normpath(source))
53 sourceroot = pathutil.dirname(util.normpath(source))
54 pat = util.pconvert(pat)
54 pat = util.pconvert(pat)
55 path = pathutil.join(sourceroot, pat)
55 path = pathutil.join(sourceroot, pat)
56
56
57 newroot = pathutil.dirname(path)
57 newroot = pathutil.dirname(path)
58 relmatcher = match(newroot, '', [], ['include:%s' % path])
58 relmatcher = match(newroot, '', [], ['include:%s' % path])
59
59
60 prefix = pathutil.canonpath(root, root, newroot)
60 prefix = pathutil.canonpath(root, root, newroot)
61 if prefix:
61 if prefix:
62 prefix += '/'
62 prefix += '/'
63 relmatchers.append((prefix, relmatcher))
63 relmatchers.append((prefix, relmatcher))
64 else:
64 else:
65 other.append((kind, pat, source))
65 other.append((kind, pat, source))
66
66
67 return relmatchers, other
67 return relmatchers, other
68
68
69 def _kindpatsalwaysmatch(kindpats):
69 def _kindpatsalwaysmatch(kindpats):
70 """"Checks whether the kindspats match everything, as e.g.
70 """"Checks whether the kindspats match everything, as e.g.
71 'relpath:.' does.
71 'relpath:.' does.
72 """
72 """
73 for kind, pat, source in kindpats:
73 for kind, pat, source in kindpats:
74 if pat != '' or kind not in ['relpath', 'glob']:
74 if pat != '' or kind not in ['relpath', 'glob']:
75 return False
75 return False
76 return True
76 return True
77
77
78 class match(object):
78 class match(object):
79 def __init__(self, root, cwd, patterns, include=[], exclude=[],
79 def __init__(self, root, cwd, patterns, include=[], exclude=[],
80 default='glob', exact=False, auditor=None, ctx=None,
80 default='glob', exact=False, auditor=None, ctx=None,
81 listsubrepos=False, warn=None, badfn=None):
81 listsubrepos=False, warn=None, badfn=None):
82 """build an object to match a set of file patterns
82 """build an object to match a set of file patterns
83
83
84 arguments:
84 arguments:
85 root - the canonical root of the tree you're matching against
85 root - the canonical root of the tree you're matching against
86 cwd - the current working directory, if relevant
86 cwd - the current working directory, if relevant
87 patterns - patterns to find
87 patterns - patterns to find
88 include - patterns to include (unless they are excluded)
88 include - patterns to include (unless they are excluded)
89 exclude - patterns to exclude (even if they are included)
89 exclude - patterns to exclude (even if they are included)
90 default - if a pattern in patterns has no explicit type, assume this one
90 default - if a pattern in patterns has no explicit type, assume this one
91 exact - patterns are actually filenames (include/exclude still apply)
91 exact - patterns are actually filenames (include/exclude still apply)
92 warn - optional function used for printing warnings
92 warn - optional function used for printing warnings
93 badfn - optional bad() callback for this matcher instead of the default
93 badfn - optional bad() callback for this matcher instead of the default
94
94
95 a pattern is one of:
95 a pattern is one of:
96 'glob:<glob>' - a glob relative to cwd
96 'glob:<glob>' - a glob relative to cwd
97 're:<regexp>' - a regular expression
97 're:<regexp>' - a regular expression
98 'path:<path>' - a path relative to repository root
98 'path:<path>' - a path relative to repository root
99 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
99 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
100 'relpath:<path>' - a path relative to cwd
100 'relpath:<path>' - a path relative to cwd
101 'relre:<regexp>' - a regexp that needn't match the start of a name
101 'relre:<regexp>' - a regexp that needn't match the start of a name
102 'set:<fileset>' - a fileset expression
102 'set:<fileset>' - a fileset expression
103 'include:<path>' - a file of patterns to read and include
103 'include:<path>' - a file of patterns to read and include
104 'subinclude:<path>' - a file of patterns to match against files under
104 'subinclude:<path>' - a file of patterns to match against files under
105 the same directory
105 the same directory
106 '<something>' - a pattern of the specified default type
106 '<something>' - a pattern of the specified default type
107 """
107 """
108
108
109 self._root = root
109 self._root = root
110 self._cwd = cwd
110 self._cwd = cwd
111 self._files = [] # exact files and roots of patterns
111 self._files = [] # exact files and roots of patterns
112 self._anypats = bool(include or exclude)
112 self._anypats = bool(include or exclude)
113 self._always = False
113 self._always = False
114 self._pathrestricted = bool(include or exclude or patterns)
114 self._pathrestricted = bool(include or exclude or patterns)
115 self._warn = warn
115 self._warn = warn
116 self._includeroots = set()
116 self._includeroots = set()
117 self._includedirs = set(['.'])
117 self._includedirs = set(['.'])
118 self._excluderoots = set()
118 self._excluderoots = set()
119
119
120 if badfn is not None:
120 if badfn is not None:
121 self.bad = badfn
121 self.bad = badfn
122
122
123 matchfns = []
123 matchfns = []
124 if include:
124 if include:
125 kindpats = self._normalize(include, 'glob', root, cwd, auditor)
125 kindpats = self._normalize(include, 'glob', root, cwd, auditor)
126 self.includepat, im = _buildmatch(ctx, kindpats, '(?:/|$)',
126 self.includepat, im = _buildmatch(ctx, kindpats, '(?:/|$)',
127 listsubrepos, root)
127 listsubrepos, root)
128 self._includeroots.update(_roots(kindpats))
128 self._includeroots.update(_roots(kindpats))
129 self._includeroots.discard('.')
129 self._includeroots.discard('.')
130 self._includedirs.update(util.dirs(self._includeroots))
130 self._includedirs.update(util.dirs(self._includeroots))
131 matchfns.append(im)
131 matchfns.append(im)
132 if exclude:
132 if exclude:
133 kindpats = self._normalize(exclude, 'glob', root, cwd, auditor)
133 kindpats = self._normalize(exclude, 'glob', root, cwd, auditor)
134 self.excludepat, em = _buildmatch(ctx, kindpats, '(?:/|$)',
134 self.excludepat, em = _buildmatch(ctx, kindpats, '(?:/|$)',
135 listsubrepos, root)
135 listsubrepos, root)
136 if not _anypats(kindpats):
136 if not _anypats(kindpats):
137 self._excluderoots.update(_roots(kindpats))
137 self._excluderoots.update(_roots(kindpats))
138 matchfns.append(lambda f: not em(f))
138 matchfns.append(lambda f: not em(f))
139 if exact:
139 if exact:
140 if isinstance(patterns, list):
140 if isinstance(patterns, list):
141 self._files = patterns
141 self._files = patterns
142 else:
142 else:
143 self._files = list(patterns)
143 self._files = list(patterns)
144 matchfns.append(self.exact)
144 matchfns.append(self.exact)
145 elif patterns:
145 elif patterns:
146 kindpats = self._normalize(patterns, default, root, cwd, auditor)
146 kindpats = self._normalize(patterns, default, root, cwd, auditor)
147 if not _kindpatsalwaysmatch(kindpats):
147 if not _kindpatsalwaysmatch(kindpats):
148 self._files = _roots(kindpats)
148 self._files = _roots(kindpats)
149 self._anypats = self._anypats or _anypats(kindpats)
149 self._anypats = self._anypats or _anypats(kindpats)
150 self.patternspat, pm = _buildmatch(ctx, kindpats, '$',
150 self.patternspat, pm = _buildmatch(ctx, kindpats, '$',
151 listsubrepos, root)
151 listsubrepos, root)
152 matchfns.append(pm)
152 matchfns.append(pm)
153
153
154 if not matchfns:
154 if not matchfns:
155 m = util.always
155 m = util.always
156 self._always = True
156 self._always = True
157 elif len(matchfns) == 1:
157 elif len(matchfns) == 1:
158 m = matchfns[0]
158 m = matchfns[0]
159 else:
159 else:
160 def m(f):
160 def m(f):
161 for matchfn in matchfns:
161 for matchfn in matchfns:
162 if not matchfn(f):
162 if not matchfn(f):
163 return False
163 return False
164 return True
164 return True
165
165
166 self.matchfn = m
166 self.matchfn = m
167 self._fileroots = set(self._files)
167 self._fileroots = set(self._files)
168
168
169 def __call__(self, fn):
169 def __call__(self, fn):
170 return self.matchfn(fn)
170 return self.matchfn(fn)
171 def __iter__(self):
171 def __iter__(self):
172 for f in self._files:
172 for f in self._files:
173 yield f
173 yield f
174
174
175 # Callbacks related to how the matcher is used by dirstate.walk.
175 # Callbacks related to how the matcher is used by dirstate.walk.
176 # Subscribers to these events must monkeypatch the matcher object.
176 # Subscribers to these events must monkeypatch the matcher object.
177 def bad(self, f, msg):
177 def bad(self, f, msg):
178 '''Callback from dirstate.walk for each explicit file that can't be
178 '''Callback from dirstate.walk for each explicit file that can't be
179 found/accessed, with an error message.'''
179 found/accessed, with an error message.'''
180 pass
180 pass
181
181
182 # If an explicitdir is set, it will be called when an explicitly listed
182 # If an explicitdir is set, it will be called when an explicitly listed
183 # directory is visited.
183 # directory is visited.
184 explicitdir = None
184 explicitdir = None
185
185
186 # If an traversedir is set, it will be called when a directory discovered
186 # If an traversedir is set, it will be called when a directory discovered
187 # by recursive traversal is visited.
187 # by recursive traversal is visited.
188 traversedir = None
188 traversedir = None
189
189
190 def abs(self, f):
190 def abs(self, f):
191 '''Convert a repo path back to path that is relative to the root of the
191 '''Convert a repo path back to path that is relative to the root of the
192 matcher.'''
192 matcher.'''
193 return f
193 return f
194
194
195 def rel(self, f):
195 def rel(self, f):
196 '''Convert repo path back to path that is relative to cwd of matcher.'''
196 '''Convert repo path back to path that is relative to cwd of matcher.'''
197 return util.pathto(self._root, self._cwd, f)
197 return util.pathto(self._root, self._cwd, f)
198
198
199 def uipath(self, f):
199 def uipath(self, f):
200 '''Convert repo path to a display path. If patterns or -I/-X were used
200 '''Convert repo path to a display path. If patterns or -I/-X were used
201 to create this matcher, the display path will be relative to cwd.
201 to create this matcher, the display path will be relative to cwd.
202 Otherwise it is relative to the root of the repo.'''
202 Otherwise it is relative to the root of the repo.'''
203 return (self._pathrestricted and self.rel(f)) or self.abs(f)
203 return (self._pathrestricted and self.rel(f)) or self.abs(f)
204
204
205 def files(self):
205 def files(self):
206 '''Explicitly listed files or patterns or roots:
206 '''Explicitly listed files or patterns or roots:
207 if no patterns or .always(): empty list,
207 if no patterns or .always(): empty list,
208 if exact: list exact files,
208 if exact: list exact files,
209 if not .anypats(): list all files and dirs,
209 if not .anypats(): list all files and dirs,
210 else: optimal roots'''
210 else: optimal roots'''
211 return self._files
211 return self._files
212
212
213 @propertycache
213 @propertycache
214 def _dirs(self):
214 def _dirs(self):
215 return set(util.dirs(self._fileroots)) | set(['.'])
215 return set(util.dirs(self._fileroots)) | set(['.'])
216
216
217 def visitdir(self, dir):
217 def visitdir(self, dir):
218 '''Decides whether a directory should be visited based on whether it
218 '''Decides whether a directory should be visited based on whether it
219 has potential matches in it or one of its subdirectories. This is
219 has potential matches in it or one of its subdirectories. This is
220 based on the match's primary, included, and excluded patterns.
220 based on the match's primary, included, and excluded patterns.
221
221
222 This function's behavior is undefined if it has returned False for
222 This function's behavior is undefined if it has returned False for
223 one of the dir's parent directories.
223 one of the dir's parent directories.
224 '''
224 '''
225 if dir in self._excluderoots:
225 if dir in self._excluderoots:
226 return False
226 return False
227 parentdirs = None
227 parentdirs = None
228 if (self._includeroots and dir not in self._includeroots and
228 if (self._includeroots and
229 dir not in self._includedirs):
229 dir not in self._includeroots and
230 dir not in self._includedirs):
230 parentdirs = list(util.finddirs(dir))
231 parentdirs = list(util.finddirs(dir))
231 if not any(parent in self._includeroots for parent in parentdirs):
232 if not any(parent in self._includeroots for parent in parentdirs):
232 return False
233 return False
233 return (not self._fileroots or '.' in self._fileroots or
234 return (not self._fileroots or
234 dir in self._fileroots or dir in self._dirs or
235 '.' in self._fileroots or
236 dir in self._fileroots or
237 dir in self._dirs or
235 any(parentdir in self._fileroots
238 any(parentdir in self._fileroots
236 for parentdir in parentdirs or util.finddirs(dir)))
239 for parentdir in parentdirs or util.finddirs(dir)))
237
240
238 def exact(self, f):
241 def exact(self, f):
239 '''Returns True if f is in .files().'''
242 '''Returns True if f is in .files().'''
240 return f in self._fileroots
243 return f in self._fileroots
241
244
242 def anypats(self):
245 def anypats(self):
243 '''Matcher uses patterns or include/exclude.'''
246 '''Matcher uses patterns or include/exclude.'''
244 return self._anypats
247 return self._anypats
245
248
246 def always(self):
249 def always(self):
247 '''Matcher will match everything and .files() will be empty
250 '''Matcher will match everything and .files() will be empty
248 - optimization might be possible and necessary.'''
251 - optimization might be possible and necessary.'''
249 return self._always
252 return self._always
250
253
251 def ispartial(self):
254 def ispartial(self):
252 '''True if the matcher won't always match.
255 '''True if the matcher won't always match.
253
256
254 Although it's just the inverse of _always in this implementation,
257 Although it's just the inverse of _always in this implementation,
255 an extenion such as narrowhg might make it return something
258 an extenion such as narrowhg might make it return something
256 slightly different.'''
259 slightly different.'''
257 return not self._always
260 return not self._always
258
261
259 def isexact(self):
262 def isexact(self):
260 return self.matchfn == self.exact
263 return self.matchfn == self.exact
261
264
262 def prefix(self):
265 def prefix(self):
263 return not self.always() and not self.isexact() and not self.anypats()
266 return not self.always() and not self.isexact() and not self.anypats()
264
267
265 def _normalize(self, patterns, default, root, cwd, auditor):
268 def _normalize(self, patterns, default, root, cwd, auditor):
266 '''Convert 'kind:pat' from the patterns list to tuples with kind and
269 '''Convert 'kind:pat' from the patterns list to tuples with kind and
267 normalized and rooted patterns and with listfiles expanded.'''
270 normalized and rooted patterns and with listfiles expanded.'''
268 kindpats = []
271 kindpats = []
269 for kind, pat in [_patsplit(p, default) for p in patterns]:
272 for kind, pat in [_patsplit(p, default) for p in patterns]:
270 if kind in ('glob', 'relpath'):
273 if kind in ('glob', 'relpath'):
271 pat = pathutil.canonpath(root, cwd, pat, auditor)
274 pat = pathutil.canonpath(root, cwd, pat, auditor)
272 elif kind in ('relglob', 'path'):
275 elif kind in ('relglob', 'path'):
273 pat = util.normpath(pat)
276 pat = util.normpath(pat)
274 elif kind in ('listfile', 'listfile0'):
277 elif kind in ('listfile', 'listfile0'):
275 try:
278 try:
276 files = util.readfile(pat)
279 files = util.readfile(pat)
277 if kind == 'listfile0':
280 if kind == 'listfile0':
278 files = files.split('\0')
281 files = files.split('\0')
279 else:
282 else:
280 files = files.splitlines()
283 files = files.splitlines()
281 files = [f for f in files if f]
284 files = [f for f in files if f]
282 except EnvironmentError:
285 except EnvironmentError:
283 raise util.Abort(_("unable to read file list (%s)") % pat)
286 raise util.Abort(_("unable to read file list (%s)") % pat)
284 for k, p, source in self._normalize(files, default, root, cwd,
287 for k, p, source in self._normalize(files, default, root, cwd,
285 auditor):
288 auditor):
286 kindpats.append((k, p, pat))
289 kindpats.append((k, p, pat))
287 continue
290 continue
288 elif kind == 'include':
291 elif kind == 'include':
289 try:
292 try:
290 includepats = readpatternfile(pat, self._warn)
293 includepats = readpatternfile(pat, self._warn)
291 for k, p, source in self._normalize(includepats, default,
294 for k, p, source in self._normalize(includepats, default,
292 root, cwd, auditor):
295 root, cwd, auditor):
293 kindpats.append((k, p, source or pat))
296 kindpats.append((k, p, source or pat))
294 except util.Abort, inst:
297 except util.Abort, inst:
295 raise util.Abort('%s: %s' % (pat, inst[0]))
298 raise util.Abort('%s: %s' % (pat, inst[0]))
296 except IOError, inst:
299 except IOError, inst:
297 if self._warn:
300 if self._warn:
298 self._warn(_("skipping unreadable pattern file "
301 self._warn(_("skipping unreadable pattern file "
299 "'%s': %s\n") % (pat, inst.strerror))
302 "'%s': %s\n") % (pat, inst.strerror))
300 continue
303 continue
301 # else: re or relre - which cannot be normalized
304 # else: re or relre - which cannot be normalized
302 kindpats.append((kind, pat, ''))
305 kindpats.append((kind, pat, ''))
303 return kindpats
306 return kindpats
304
307
305 def exact(root, cwd, files, badfn=None):
308 def exact(root, cwd, files, badfn=None):
306 return match(root, cwd, files, exact=True, badfn=badfn)
309 return match(root, cwd, files, exact=True, badfn=badfn)
307
310
308 def always(root, cwd):
311 def always(root, cwd):
309 return match(root, cwd, [])
312 return match(root, cwd, [])
310
313
311 def badmatch(match, badfn):
314 def badmatch(match, badfn):
312 """Make a copy of the given matcher, replacing its bad method with the given
315 """Make a copy of the given matcher, replacing its bad method with the given
313 one.
316 one.
314 """
317 """
315 m = copy.copy(match)
318 m = copy.copy(match)
316 m.bad = badfn
319 m.bad = badfn
317 return m
320 return m
318
321
319 class narrowmatcher(match):
322 class narrowmatcher(match):
320 """Adapt a matcher to work on a subdirectory only.
323 """Adapt a matcher to work on a subdirectory only.
321
324
322 The paths are remapped to remove/insert the path as needed:
325 The paths are remapped to remove/insert the path as needed:
323
326
324 >>> m1 = match('root', '', ['a.txt', 'sub/b.txt'])
327 >>> m1 = match('root', '', ['a.txt', 'sub/b.txt'])
325 >>> m2 = narrowmatcher('sub', m1)
328 >>> m2 = narrowmatcher('sub', m1)
326 >>> bool(m2('a.txt'))
329 >>> bool(m2('a.txt'))
327 False
330 False
328 >>> bool(m2('b.txt'))
331 >>> bool(m2('b.txt'))
329 True
332 True
330 >>> bool(m2.matchfn('a.txt'))
333 >>> bool(m2.matchfn('a.txt'))
331 False
334 False
332 >>> bool(m2.matchfn('b.txt'))
335 >>> bool(m2.matchfn('b.txt'))
333 True
336 True
334 >>> m2.files()
337 >>> m2.files()
335 ['b.txt']
338 ['b.txt']
336 >>> m2.exact('b.txt')
339 >>> m2.exact('b.txt')
337 True
340 True
338 >>> util.pconvert(m2.rel('b.txt'))
341 >>> util.pconvert(m2.rel('b.txt'))
339 'sub/b.txt'
342 'sub/b.txt'
340 >>> def bad(f, msg):
343 >>> def bad(f, msg):
341 ... print "%s: %s" % (f, msg)
344 ... print "%s: %s" % (f, msg)
342 >>> m1.bad = bad
345 >>> m1.bad = bad
343 >>> m2.bad('x.txt', 'No such file')
346 >>> m2.bad('x.txt', 'No such file')
344 sub/x.txt: No such file
347 sub/x.txt: No such file
345 >>> m2.abs('c.txt')
348 >>> m2.abs('c.txt')
346 'sub/c.txt'
349 'sub/c.txt'
347 """
350 """
348
351
349 def __init__(self, path, matcher):
352 def __init__(self, path, matcher):
350 self._root = matcher._root
353 self._root = matcher._root
351 self._cwd = matcher._cwd
354 self._cwd = matcher._cwd
352 self._path = path
355 self._path = path
353 self._matcher = matcher
356 self._matcher = matcher
354 self._always = matcher._always
357 self._always = matcher._always
355 self._pathrestricted = matcher._pathrestricted
358 self._pathrestricted = matcher._pathrestricted
356
359
357 self._files = [f[len(path) + 1:] for f in matcher._files
360 self._files = [f[len(path) + 1:] for f in matcher._files
358 if f.startswith(path + "/")]
361 if f.startswith(path + "/")]
359
362
360 # If the parent repo had a path to this subrepo and no patterns are
363 # If the parent repo had a path to this subrepo and no patterns are
361 # specified, this submatcher always matches.
364 # specified, this submatcher always matches.
362 if not self._always and not matcher._anypats:
365 if not self._always and not matcher._anypats:
363 self._always = any(f == path for f in matcher._files)
366 self._always = any(f == path for f in matcher._files)
364
367
365 self._anypats = matcher._anypats
368 self._anypats = matcher._anypats
366 self.matchfn = lambda fn: matcher.matchfn(self._path + "/" + fn)
369 self.matchfn = lambda fn: matcher.matchfn(self._path + "/" + fn)
367 self._fileroots = set(self._files)
370 self._fileroots = set(self._files)
368
371
369 def abs(self, f):
372 def abs(self, f):
370 return self._matcher.abs(self._path + "/" + f)
373 return self._matcher.abs(self._path + "/" + f)
371
374
372 def bad(self, f, msg):
375 def bad(self, f, msg):
373 self._matcher.bad(self._path + "/" + f, msg)
376 self._matcher.bad(self._path + "/" + f, msg)
374
377
375 def rel(self, f):
378 def rel(self, f):
376 return self._matcher.rel(self._path + "/" + f)
379 return self._matcher.rel(self._path + "/" + f)
377
380
378 class icasefsmatcher(match):
381 class icasefsmatcher(match):
379 """A matcher for wdir on case insensitive filesystems, which normalizes the
382 """A matcher for wdir on case insensitive filesystems, which normalizes the
380 given patterns to the case in the filesystem.
383 given patterns to the case in the filesystem.
381 """
384 """
382
385
383 def __init__(self, root, cwd, patterns, include, exclude, default, auditor,
386 def __init__(self, root, cwd, patterns, include, exclude, default, auditor,
384 ctx, listsubrepos=False, badfn=None):
387 ctx, listsubrepos=False, badfn=None):
385 init = super(icasefsmatcher, self).__init__
388 init = super(icasefsmatcher, self).__init__
386 self._dsnormalize = ctx.repo().dirstate.normalize
389 self._dsnormalize = ctx.repo().dirstate.normalize
387
390
388 init(root, cwd, patterns, include, exclude, default, auditor=auditor,
391 init(root, cwd, patterns, include, exclude, default, auditor=auditor,
389 ctx=ctx, listsubrepos=listsubrepos, badfn=badfn)
392 ctx=ctx, listsubrepos=listsubrepos, badfn=badfn)
390
393
391 # m.exact(file) must be based off of the actual user input, otherwise
394 # m.exact(file) must be based off of the actual user input, otherwise
392 # inexact case matches are treated as exact, and not noted without -v.
395 # inexact case matches are treated as exact, and not noted without -v.
393 if self._files:
396 if self._files:
394 self._fileroots = set(_roots(self._kp))
397 self._fileroots = set(_roots(self._kp))
395
398
396 def _normalize(self, patterns, default, root, cwd, auditor):
399 def _normalize(self, patterns, default, root, cwd, auditor):
397 self._kp = super(icasefsmatcher, self)._normalize(patterns, default,
400 self._kp = super(icasefsmatcher, self)._normalize(patterns, default,
398 root, cwd, auditor)
401 root, cwd, auditor)
399 kindpats = []
402 kindpats = []
400 for kind, pats, source in self._kp:
403 for kind, pats, source in self._kp:
401 if kind not in ('re', 'relre'): # regex can't be normalized
404 if kind not in ('re', 'relre'): # regex can't be normalized
402 pats = self._dsnormalize(pats)
405 pats = self._dsnormalize(pats)
403 kindpats.append((kind, pats, source))
406 kindpats.append((kind, pats, source))
404 return kindpats
407 return kindpats
405
408
406 def patkind(pattern, default=None):
409 def patkind(pattern, default=None):
407 '''If pattern is 'kind:pat' with a known kind, return kind.'''
410 '''If pattern is 'kind:pat' with a known kind, return kind.'''
408 return _patsplit(pattern, default)[0]
411 return _patsplit(pattern, default)[0]
409
412
410 def _patsplit(pattern, default):
413 def _patsplit(pattern, default):
411 """Split a string into the optional pattern kind prefix and the actual
414 """Split a string into the optional pattern kind prefix and the actual
412 pattern."""
415 pattern."""
413 if ':' in pattern:
416 if ':' in pattern:
414 kind, pat = pattern.split(':', 1)
417 kind, pat = pattern.split(':', 1)
415 if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
418 if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
416 'listfile', 'listfile0', 'set', 'include', 'subinclude'):
419 'listfile', 'listfile0', 'set', 'include', 'subinclude'):
417 return kind, pat
420 return kind, pat
418 return default, pattern
421 return default, pattern
419
422
420 def _globre(pat):
423 def _globre(pat):
421 r'''Convert an extended glob string to a regexp string.
424 r'''Convert an extended glob string to a regexp string.
422
425
423 >>> print _globre(r'?')
426 >>> print _globre(r'?')
424 .
427 .
425 >>> print _globre(r'*')
428 >>> print _globre(r'*')
426 [^/]*
429 [^/]*
427 >>> print _globre(r'**')
430 >>> print _globre(r'**')
428 .*
431 .*
429 >>> print _globre(r'**/a')
432 >>> print _globre(r'**/a')
430 (?:.*/)?a
433 (?:.*/)?a
431 >>> print _globre(r'a/**/b')
434 >>> print _globre(r'a/**/b')
432 a\/(?:.*/)?b
435 a\/(?:.*/)?b
433 >>> print _globre(r'[a*?!^][^b][!c]')
436 >>> print _globre(r'[a*?!^][^b][!c]')
434 [a*?!^][\^b][^c]
437 [a*?!^][\^b][^c]
435 >>> print _globre(r'{a,b}')
438 >>> print _globre(r'{a,b}')
436 (?:a|b)
439 (?:a|b)
437 >>> print _globre(r'.\*\?')
440 >>> print _globre(r'.\*\?')
438 \.\*\?
441 \.\*\?
439 '''
442 '''
440 i, n = 0, len(pat)
443 i, n = 0, len(pat)
441 res = ''
444 res = ''
442 group = 0
445 group = 0
443 escape = util.re.escape
446 escape = util.re.escape
444 def peek():
447 def peek():
445 return i < n and pat[i]
448 return i < n and pat[i]
446 while i < n:
449 while i < n:
447 c = pat[i]
450 c = pat[i]
448 i += 1
451 i += 1
449 if c not in '*?[{},\\':
452 if c not in '*?[{},\\':
450 res += escape(c)
453 res += escape(c)
451 elif c == '*':
454 elif c == '*':
452 if peek() == '*':
455 if peek() == '*':
453 i += 1
456 i += 1
454 if peek() == '/':
457 if peek() == '/':
455 i += 1
458 i += 1
456 res += '(?:.*/)?'
459 res += '(?:.*/)?'
457 else:
460 else:
458 res += '.*'
461 res += '.*'
459 else:
462 else:
460 res += '[^/]*'
463 res += '[^/]*'
461 elif c == '?':
464 elif c == '?':
462 res += '.'
465 res += '.'
463 elif c == '[':
466 elif c == '[':
464 j = i
467 j = i
465 if j < n and pat[j] in '!]':
468 if j < n and pat[j] in '!]':
466 j += 1
469 j += 1
467 while j < n and pat[j] != ']':
470 while j < n and pat[j] != ']':
468 j += 1
471 j += 1
469 if j >= n:
472 if j >= n:
470 res += '\\['
473 res += '\\['
471 else:
474 else:
472 stuff = pat[i:j].replace('\\','\\\\')
475 stuff = pat[i:j].replace('\\','\\\\')
473 i = j + 1
476 i = j + 1
474 if stuff[0] == '!':
477 if stuff[0] == '!':
475 stuff = '^' + stuff[1:]
478 stuff = '^' + stuff[1:]
476 elif stuff[0] == '^':
479 elif stuff[0] == '^':
477 stuff = '\\' + stuff
480 stuff = '\\' + stuff
478 res = '%s[%s]' % (res, stuff)
481 res = '%s[%s]' % (res, stuff)
479 elif c == '{':
482 elif c == '{':
480 group += 1
483 group += 1
481 res += '(?:'
484 res += '(?:'
482 elif c == '}' and group:
485 elif c == '}' and group:
483 res += ')'
486 res += ')'
484 group -= 1
487 group -= 1
485 elif c == ',' and group:
488 elif c == ',' and group:
486 res += '|'
489 res += '|'
487 elif c == '\\':
490 elif c == '\\':
488 p = peek()
491 p = peek()
489 if p:
492 if p:
490 i += 1
493 i += 1
491 res += escape(p)
494 res += escape(p)
492 else:
495 else:
493 res += escape(c)
496 res += escape(c)
494 else:
497 else:
495 res += escape(c)
498 res += escape(c)
496 return res
499 return res
497
500
498 def _regex(kind, pat, globsuffix):
501 def _regex(kind, pat, globsuffix):
499 '''Convert a (normalized) pattern of any kind into a regular expression.
502 '''Convert a (normalized) pattern of any kind into a regular expression.
500 globsuffix is appended to the regexp of globs.'''
503 globsuffix is appended to the regexp of globs.'''
501 if not pat:
504 if not pat:
502 return ''
505 return ''
503 if kind == 're':
506 if kind == 're':
504 return pat
507 return pat
505 if kind == 'path':
508 if kind == 'path':
506 return '^' + util.re.escape(pat) + '(?:/|$)'
509 return '^' + util.re.escape(pat) + '(?:/|$)'
507 if kind == 'relglob':
510 if kind == 'relglob':
508 return '(?:|.*/)' + _globre(pat) + globsuffix
511 return '(?:|.*/)' + _globre(pat) + globsuffix
509 if kind == 'relpath':
512 if kind == 'relpath':
510 return util.re.escape(pat) + '(?:/|$)'
513 return util.re.escape(pat) + '(?:/|$)'
511 if kind == 'relre':
514 if kind == 'relre':
512 if pat.startswith('^'):
515 if pat.startswith('^'):
513 return pat
516 return pat
514 return '.*' + pat
517 return '.*' + pat
515 return _globre(pat) + globsuffix
518 return _globre(pat) + globsuffix
516
519
517 def _buildmatch(ctx, kindpats, globsuffix, listsubrepos, root):
520 def _buildmatch(ctx, kindpats, globsuffix, listsubrepos, root):
518 '''Return regexp string and a matcher function for kindpats.
521 '''Return regexp string and a matcher function for kindpats.
519 globsuffix is appended to the regexp of globs.'''
522 globsuffix is appended to the regexp of globs.'''
520 matchfuncs = []
523 matchfuncs = []
521
524
522 subincludes, kindpats = _expandsubinclude(kindpats, root)
525 subincludes, kindpats = _expandsubinclude(kindpats, root)
523 if subincludes:
526 if subincludes:
524 def matchsubinclude(f):
527 def matchsubinclude(f):
525 for prefix, mf in subincludes:
528 for prefix, mf in subincludes:
526 if f.startswith(prefix) and mf(f[len(prefix):]):
529 if f.startswith(prefix) and mf(f[len(prefix):]):
527 return True
530 return True
528 return False
531 return False
529 matchfuncs.append(matchsubinclude)
532 matchfuncs.append(matchsubinclude)
530
533
531 fset, kindpats = _expandsets(kindpats, ctx, listsubrepos)
534 fset, kindpats = _expandsets(kindpats, ctx, listsubrepos)
532 if fset:
535 if fset:
533 matchfuncs.append(fset.__contains__)
536 matchfuncs.append(fset.__contains__)
534
537
535 regex = ''
538 regex = ''
536 if kindpats:
539 if kindpats:
537 regex, mf = _buildregexmatch(kindpats, globsuffix)
540 regex, mf = _buildregexmatch(kindpats, globsuffix)
538 matchfuncs.append(mf)
541 matchfuncs.append(mf)
539
542
540 if len(matchfuncs) == 1:
543 if len(matchfuncs) == 1:
541 return regex, matchfuncs[0]
544 return regex, matchfuncs[0]
542 else:
545 else:
543 return regex, lambda f: any(mf(f) for mf in matchfuncs)
546 return regex, lambda f: any(mf(f) for mf in matchfuncs)
544
547
545 def _buildregexmatch(kindpats, globsuffix):
548 def _buildregexmatch(kindpats, globsuffix):
546 """Build a match function from a list of kinds and kindpats,
549 """Build a match function from a list of kinds and kindpats,
547 return regexp string and a matcher function."""
550 return regexp string and a matcher function."""
548 try:
551 try:
549 regex = '(?:%s)' % '|'.join([_regex(k, p, globsuffix)
552 regex = '(?:%s)' % '|'.join([_regex(k, p, globsuffix)
550 for (k, p, s) in kindpats])
553 for (k, p, s) in kindpats])
551 if len(regex) > 20000:
554 if len(regex) > 20000:
552 raise OverflowError
555 raise OverflowError
553 return regex, _rematcher(regex)
556 return regex, _rematcher(regex)
554 except OverflowError:
557 except OverflowError:
555 # We're using a Python with a tiny regex engine and we
558 # We're using a Python with a tiny regex engine and we
556 # made it explode, so we'll divide the pattern list in two
559 # made it explode, so we'll divide the pattern list in two
557 # until it works
560 # until it works
558 l = len(kindpats)
561 l = len(kindpats)
559 if l < 2:
562 if l < 2:
560 raise
563 raise
561 regexa, a = _buildregexmatch(kindpats[:l//2], globsuffix)
564 regexa, a = _buildregexmatch(kindpats[:l//2], globsuffix)
562 regexb, b = _buildregexmatch(kindpats[l//2:], globsuffix)
565 regexb, b = _buildregexmatch(kindpats[l//2:], globsuffix)
563 return regex, lambda s: a(s) or b(s)
566 return regex, lambda s: a(s) or b(s)
564 except re.error:
567 except re.error:
565 for k, p, s in kindpats:
568 for k, p, s in kindpats:
566 try:
569 try:
567 _rematcher('(?:%s)' % _regex(k, p, globsuffix))
570 _rematcher('(?:%s)' % _regex(k, p, globsuffix))
568 except re.error:
571 except re.error:
569 if s:
572 if s:
570 raise util.Abort(_("%s: invalid pattern (%s): %s") %
573 raise util.Abort(_("%s: invalid pattern (%s): %s") %
571 (s, k, p))
574 (s, k, p))
572 else:
575 else:
573 raise util.Abort(_("invalid pattern (%s): %s") % (k, p))
576 raise util.Abort(_("invalid pattern (%s): %s") % (k, p))
574 raise util.Abort(_("invalid pattern"))
577 raise util.Abort(_("invalid pattern"))
575
578
576 def _roots(kindpats):
579 def _roots(kindpats):
577 '''return roots and exact explicitly listed files from patterns
580 '''return roots and exact explicitly listed files from patterns
578
581
579 >>> _roots([('glob', 'g/*', ''), ('glob', 'g', ''), ('glob', 'g*', '')])
582 >>> _roots([('glob', 'g/*', ''), ('glob', 'g', ''), ('glob', 'g*', '')])
580 ['g', 'g', '.']
583 ['g', 'g', '.']
581 >>> _roots([('relpath', 'r', ''), ('path', 'p/p', ''), ('path', '', '')])
584 >>> _roots([('relpath', 'r', ''), ('path', 'p/p', ''), ('path', '', '')])
582 ['r', 'p/p', '.']
585 ['r', 'p/p', '.']
583 >>> _roots([('relglob', 'rg*', ''), ('re', 're/', ''), ('relre', 'rr', '')])
586 >>> _roots([('relglob', 'rg*', ''), ('re', 're/', ''), ('relre', 'rr', '')])
584 ['.', '.', '.']
587 ['.', '.', '.']
585 '''
588 '''
586 r = []
589 r = []
587 for kind, pat, source in kindpats:
590 for kind, pat, source in kindpats:
588 if kind == 'glob': # find the non-glob prefix
591 if kind == 'glob': # find the non-glob prefix
589 root = []
592 root = []
590 for p in pat.split('/'):
593 for p in pat.split('/'):
591 if '[' in p or '{' in p or '*' in p or '?' in p:
594 if '[' in p or '{' in p or '*' in p or '?' in p:
592 break
595 break
593 root.append(p)
596 root.append(p)
594 r.append('/'.join(root) or '.')
597 r.append('/'.join(root) or '.')
595 elif kind in ('relpath', 'path'):
598 elif kind in ('relpath', 'path'):
596 r.append(pat or '.')
599 r.append(pat or '.')
597 else: # relglob, re, relre
600 else: # relglob, re, relre
598 r.append('.')
601 r.append('.')
599 return r
602 return r
600
603
601 def _anypats(kindpats):
604 def _anypats(kindpats):
602 for kind, pat, source in kindpats:
605 for kind, pat, source in kindpats:
603 if kind in ('glob', 're', 'relglob', 'relre', 'set'):
606 if kind in ('glob', 're', 'relglob', 'relre', 'set'):
604 return True
607 return True
605
608
606 _commentre = None
609 _commentre = None
607
610
608 def readpatternfile(filepath, warn):
611 def readpatternfile(filepath, warn):
609 '''parse a pattern file, returning a list of
612 '''parse a pattern file, returning a list of
610 patterns. These patterns should be given to compile()
613 patterns. These patterns should be given to compile()
611 to be validated and converted into a match function.
614 to be validated and converted into a match function.
612
615
613 trailing white space is dropped.
616 trailing white space is dropped.
614 the escape character is backslash.
617 the escape character is backslash.
615 comments start with #.
618 comments start with #.
616 empty lines are skipped.
619 empty lines are skipped.
617
620
618 lines can be of the following formats:
621 lines can be of the following formats:
619
622
620 syntax: regexp # defaults following lines to non-rooted regexps
623 syntax: regexp # defaults following lines to non-rooted regexps
621 syntax: glob # defaults following lines to non-rooted globs
624 syntax: glob # defaults following lines to non-rooted globs
622 re:pattern # non-rooted regular expression
625 re:pattern # non-rooted regular expression
623 glob:pattern # non-rooted glob
626 glob:pattern # non-rooted glob
624 pattern # pattern of the current default type'''
627 pattern # pattern of the current default type'''
625
628
626 syntaxes = {'re': 'relre:', 'regexp': 'relre:', 'glob': 'relglob:',
629 syntaxes = {'re': 'relre:', 'regexp': 'relre:', 'glob': 'relglob:',
627 'include': 'include', 'subinclude': 'subinclude'}
630 'include': 'include', 'subinclude': 'subinclude'}
628 syntax = 'relre:'
631 syntax = 'relre:'
629 patterns = []
632 patterns = []
630
633
631 fp = open(filepath)
634 fp = open(filepath)
632 for line in fp:
635 for line in fp:
633 if "#" in line:
636 if "#" in line:
634 global _commentre
637 global _commentre
635 if not _commentre:
638 if not _commentre:
636 _commentre = re.compile(r'((^|[^\\])(\\\\)*)#.*')
639 _commentre = re.compile(r'((^|[^\\])(\\\\)*)#.*')
637 # remove comments prefixed by an even number of escapes
640 # remove comments prefixed by an even number of escapes
638 line = _commentre.sub(r'\1', line)
641 line = _commentre.sub(r'\1', line)
639 # fixup properly escaped comments that survived the above
642 # fixup properly escaped comments that survived the above
640 line = line.replace("\\#", "#")
643 line = line.replace("\\#", "#")
641 line = line.rstrip()
644 line = line.rstrip()
642 if not line:
645 if not line:
643 continue
646 continue
644
647
645 if line.startswith('syntax:'):
648 if line.startswith('syntax:'):
646 s = line[7:].strip()
649 s = line[7:].strip()
647 try:
650 try:
648 syntax = syntaxes[s]
651 syntax = syntaxes[s]
649 except KeyError:
652 except KeyError:
650 if warn:
653 if warn:
651 warn(_("%s: ignoring invalid syntax '%s'\n") %
654 warn(_("%s: ignoring invalid syntax '%s'\n") %
652 (filepath, s))
655 (filepath, s))
653 continue
656 continue
654
657
655 linesyntax = syntax
658 linesyntax = syntax
656 for s, rels in syntaxes.iteritems():
659 for s, rels in syntaxes.iteritems():
657 if line.startswith(rels):
660 if line.startswith(rels):
658 linesyntax = rels
661 linesyntax = rels
659 line = line[len(rels):]
662 line = line[len(rels):]
660 break
663 break
661 elif line.startswith(s+':'):
664 elif line.startswith(s+':'):
662 linesyntax = rels
665 linesyntax = rels
663 line = line[len(s) + 1:]
666 line = line[len(s) + 1:]
664 break
667 break
665 patterns.append(linesyntax + line)
668 patterns.append(linesyntax + line)
666 fp.close()
669 fp.close()
667 return patterns
670 return patterns
General Comments 0
You need to be logged in to leave comments. Login now