##// END OF EJS Templates
match: add an optional constructor parameter for a bad() override...
Matt Harbison -
r25464:504a1f29 default
parent child Browse files
Show More
@@ -1,664 +1,668
1 # match.py - filename matching
1 # match.py - filename matching
2 #
2 #
3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 import copy, re
8 import copy, re
9 import util, pathutil
9 import util, pathutil
10 from i18n import _
10 from i18n import _
11
11
12 propertycache = util.propertycache
12 propertycache = util.propertycache
13
13
14 def _rematcher(regex):
14 def _rematcher(regex):
15 '''compile the regexp with the best available regexp engine and return a
15 '''compile the regexp with the best available regexp engine and return a
16 matcher function'''
16 matcher function'''
17 m = util.re.compile(regex)
17 m = util.re.compile(regex)
18 try:
18 try:
19 # slightly faster, provided by facebook's re2 bindings
19 # slightly faster, provided by facebook's re2 bindings
20 return m.test_match
20 return m.test_match
21 except AttributeError:
21 except AttributeError:
22 return m.match
22 return m.match
23
23
24 def _expandsets(kindpats, ctx, listsubrepos):
24 def _expandsets(kindpats, ctx, listsubrepos):
25 '''Returns the kindpats list with the 'set' patterns expanded.'''
25 '''Returns the kindpats list with the 'set' patterns expanded.'''
26 fset = set()
26 fset = set()
27 other = []
27 other = []
28
28
29 for kind, pat, source in kindpats:
29 for kind, pat, source in kindpats:
30 if kind == 'set':
30 if kind == 'set':
31 if not ctx:
31 if not ctx:
32 raise util.Abort("fileset expression with no context")
32 raise util.Abort("fileset expression with no context")
33 s = ctx.getfileset(pat)
33 s = ctx.getfileset(pat)
34 fset.update(s)
34 fset.update(s)
35
35
36 if listsubrepos:
36 if listsubrepos:
37 for subpath in ctx.substate:
37 for subpath in ctx.substate:
38 s = ctx.sub(subpath).getfileset(pat)
38 s = ctx.sub(subpath).getfileset(pat)
39 fset.update(subpath + '/' + f for f in s)
39 fset.update(subpath + '/' + f for f in s)
40
40
41 continue
41 continue
42 other.append((kind, pat, source))
42 other.append((kind, pat, source))
43 return fset, other
43 return fset, other
44
44
45 def _expandsubinclude(kindpats, root):
45 def _expandsubinclude(kindpats, root):
46 '''Returns the list of subinclude matchers and the kindpats without the
46 '''Returns the list of subinclude matchers and the kindpats without the
47 subincludes in it.'''
47 subincludes in it.'''
48 relmatchers = []
48 relmatchers = []
49 other = []
49 other = []
50
50
51 for kind, pat, source in kindpats:
51 for kind, pat, source in kindpats:
52 if kind == 'subinclude':
52 if kind == 'subinclude':
53 sourceroot = pathutil.dirname(util.normpath(source))
53 sourceroot = pathutil.dirname(util.normpath(source))
54 pat = util.pconvert(pat)
54 pat = util.pconvert(pat)
55 path = pathutil.join(sourceroot, pat)
55 path = pathutil.join(sourceroot, pat)
56
56
57 newroot = pathutil.dirname(path)
57 newroot = pathutil.dirname(path)
58 relmatcher = match(newroot, '', [], ['include:%s' % path])
58 relmatcher = match(newroot, '', [], ['include:%s' % path])
59
59
60 prefix = pathutil.canonpath(root, root, newroot)
60 prefix = pathutil.canonpath(root, root, newroot)
61 if prefix:
61 if prefix:
62 prefix += '/'
62 prefix += '/'
63 relmatchers.append((prefix, relmatcher))
63 relmatchers.append((prefix, relmatcher))
64 else:
64 else:
65 other.append((kind, pat, source))
65 other.append((kind, pat, source))
66
66
67 return relmatchers, other
67 return relmatchers, other
68
68
69 def _kindpatsalwaysmatch(kindpats):
69 def _kindpatsalwaysmatch(kindpats):
70 """"Checks whether the kindspats match everything, as e.g.
70 """"Checks whether the kindspats match everything, as e.g.
71 'relpath:.' does.
71 'relpath:.' does.
72 """
72 """
73 for kind, pat, source in kindpats:
73 for kind, pat, source in kindpats:
74 if pat != '' or kind not in ['relpath', 'glob']:
74 if pat != '' or kind not in ['relpath', 'glob']:
75 return False
75 return False
76 return True
76 return True
77
77
78 class match(object):
78 class match(object):
79 def __init__(self, root, cwd, patterns, include=[], exclude=[],
79 def __init__(self, root, cwd, patterns, include=[], exclude=[],
80 default='glob', exact=False, auditor=None, ctx=None,
80 default='glob', exact=False, auditor=None, ctx=None,
81 listsubrepos=False, warn=None):
81 listsubrepos=False, warn=None, badfn=None):
82 """build an object to match a set of file patterns
82 """build an object to match a set of file patterns
83
83
84 arguments:
84 arguments:
85 root - the canonical root of the tree you're matching against
85 root - the canonical root of the tree you're matching against
86 cwd - the current working directory, if relevant
86 cwd - the current working directory, if relevant
87 patterns - patterns to find
87 patterns - patterns to find
88 include - patterns to include (unless they are excluded)
88 include - patterns to include (unless they are excluded)
89 exclude - patterns to exclude (even if they are included)
89 exclude - patterns to exclude (even if they are included)
90 default - if a pattern in patterns has no explicit type, assume this one
90 default - if a pattern in patterns has no explicit type, assume this one
91 exact - patterns are actually filenames (include/exclude still apply)
91 exact - patterns are actually filenames (include/exclude still apply)
92 warn - optional function used for printing warnings
92 warn - optional function used for printing warnings
93 badfn - optional bad() callback for this matcher instead of the default
93
94
94 a pattern is one of:
95 a pattern is one of:
95 'glob:<glob>' - a glob relative to cwd
96 'glob:<glob>' - a glob relative to cwd
96 're:<regexp>' - a regular expression
97 're:<regexp>' - a regular expression
97 'path:<path>' - a path relative to repository root
98 'path:<path>' - a path relative to repository root
98 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
99 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
99 'relpath:<path>' - a path relative to cwd
100 'relpath:<path>' - a path relative to cwd
100 'relre:<regexp>' - a regexp that needn't match the start of a name
101 'relre:<regexp>' - a regexp that needn't match the start of a name
101 'set:<fileset>' - a fileset expression
102 'set:<fileset>' - a fileset expression
102 'include:<path>' - a file of patterns to read and include
103 'include:<path>' - a file of patterns to read and include
103 'subinclude:<path>' - a file of patterns to match against files under
104 'subinclude:<path>' - a file of patterns to match against files under
104 the same directory
105 the same directory
105 '<something>' - a pattern of the specified default type
106 '<something>' - a pattern of the specified default type
106 """
107 """
107
108
108 self._root = root
109 self._root = root
109 self._cwd = cwd
110 self._cwd = cwd
110 self._files = [] # exact files and roots of patterns
111 self._files = [] # exact files and roots of patterns
111 self._anypats = bool(include or exclude)
112 self._anypats = bool(include or exclude)
112 self._always = False
113 self._always = False
113 self._pathrestricted = bool(include or exclude or patterns)
114 self._pathrestricted = bool(include or exclude or patterns)
114 self._warn = warn
115 self._warn = warn
115 self._includeroots = set()
116 self._includeroots = set()
116 self._includedirs = set(['.'])
117 self._includedirs = set(['.'])
117 self._excluderoots = set()
118 self._excluderoots = set()
118
119
120 if badfn is not None:
121 self.bad = badfn
122
119 matchfns = []
123 matchfns = []
120 if include:
124 if include:
121 kindpats = self._normalize(include, 'glob', root, cwd, auditor)
125 kindpats = self._normalize(include, 'glob', root, cwd, auditor)
122 self.includepat, im = _buildmatch(ctx, kindpats, '(?:/|$)',
126 self.includepat, im = _buildmatch(ctx, kindpats, '(?:/|$)',
123 listsubrepos, root)
127 listsubrepos, root)
124 self._includeroots.update(_roots(kindpats))
128 self._includeroots.update(_roots(kindpats))
125 self._includeroots.discard('.')
129 self._includeroots.discard('.')
126 self._includedirs.update(util.dirs(self._includeroots))
130 self._includedirs.update(util.dirs(self._includeroots))
127 matchfns.append(im)
131 matchfns.append(im)
128 if exclude:
132 if exclude:
129 kindpats = self._normalize(exclude, 'glob', root, cwd, auditor)
133 kindpats = self._normalize(exclude, 'glob', root, cwd, auditor)
130 self.excludepat, em = _buildmatch(ctx, kindpats, '(?:/|$)',
134 self.excludepat, em = _buildmatch(ctx, kindpats, '(?:/|$)',
131 listsubrepos, root)
135 listsubrepos, root)
132 if not _anypats(kindpats):
136 if not _anypats(kindpats):
133 self._excluderoots.update(_roots(kindpats))
137 self._excluderoots.update(_roots(kindpats))
134 self._excluderoots.discard('.')
138 self._excluderoots.discard('.')
135 matchfns.append(lambda f: not em(f))
139 matchfns.append(lambda f: not em(f))
136 if exact:
140 if exact:
137 if isinstance(patterns, list):
141 if isinstance(patterns, list):
138 self._files = patterns
142 self._files = patterns
139 else:
143 else:
140 self._files = list(patterns)
144 self._files = list(patterns)
141 matchfns.append(self.exact)
145 matchfns.append(self.exact)
142 elif patterns:
146 elif patterns:
143 kindpats = self._normalize(patterns, default, root, cwd, auditor)
147 kindpats = self._normalize(patterns, default, root, cwd, auditor)
144 if not _kindpatsalwaysmatch(kindpats):
148 if not _kindpatsalwaysmatch(kindpats):
145 self._files = _roots(kindpats)
149 self._files = _roots(kindpats)
146 self._anypats = self._anypats or _anypats(kindpats)
150 self._anypats = self._anypats or _anypats(kindpats)
147 self.patternspat, pm = _buildmatch(ctx, kindpats, '$',
151 self.patternspat, pm = _buildmatch(ctx, kindpats, '$',
148 listsubrepos, root)
152 listsubrepos, root)
149 matchfns.append(pm)
153 matchfns.append(pm)
150
154
151 if not matchfns:
155 if not matchfns:
152 m = util.always
156 m = util.always
153 self._always = True
157 self._always = True
154 elif len(matchfns) == 1:
158 elif len(matchfns) == 1:
155 m = matchfns[0]
159 m = matchfns[0]
156 else:
160 else:
157 def m(f):
161 def m(f):
158 for matchfn in matchfns:
162 for matchfn in matchfns:
159 if not matchfn(f):
163 if not matchfn(f):
160 return False
164 return False
161 return True
165 return True
162
166
163 self.matchfn = m
167 self.matchfn = m
164 self._fileroots = set(self._files)
168 self._fileroots = set(self._files)
165
169
166 def __call__(self, fn):
170 def __call__(self, fn):
167 return self.matchfn(fn)
171 return self.matchfn(fn)
168 def __iter__(self):
172 def __iter__(self):
169 for f in self._files:
173 for f in self._files:
170 yield f
174 yield f
171
175
172 # Callbacks related to how the matcher is used by dirstate.walk.
176 # Callbacks related to how the matcher is used by dirstate.walk.
173 # Subscribers to these events must monkeypatch the matcher object.
177 # Subscribers to these events must monkeypatch the matcher object.
174 def bad(self, f, msg):
178 def bad(self, f, msg):
175 '''Callback from dirstate.walk for each explicit file that can't be
179 '''Callback from dirstate.walk for each explicit file that can't be
176 found/accessed, with an error message.'''
180 found/accessed, with an error message.'''
177 pass
181 pass
178
182
179 # If an explicitdir is set, it will be called when an explicitly listed
183 # If an explicitdir is set, it will be called when an explicitly listed
180 # directory is visited.
184 # directory is visited.
181 explicitdir = None
185 explicitdir = None
182
186
183 # If an traversedir is set, it will be called when a directory discovered
187 # If an traversedir is set, it will be called when a directory discovered
184 # by recursive traversal is visited.
188 # by recursive traversal is visited.
185 traversedir = None
189 traversedir = None
186
190
187 def abs(self, f):
191 def abs(self, f):
188 '''Convert a repo path back to path that is relative to the root of the
192 '''Convert a repo path back to path that is relative to the root of the
189 matcher.'''
193 matcher.'''
190 return f
194 return f
191
195
192 def rel(self, f):
196 def rel(self, f):
193 '''Convert repo path back to path that is relative to cwd of matcher.'''
197 '''Convert repo path back to path that is relative to cwd of matcher.'''
194 return util.pathto(self._root, self._cwd, f)
198 return util.pathto(self._root, self._cwd, f)
195
199
196 def uipath(self, f):
200 def uipath(self, f):
197 '''Convert repo path to a display path. If patterns or -I/-X were used
201 '''Convert repo path to a display path. If patterns or -I/-X were used
198 to create this matcher, the display path will be relative to cwd.
202 to create this matcher, the display path will be relative to cwd.
199 Otherwise it is relative to the root of the repo.'''
203 Otherwise it is relative to the root of the repo.'''
200 return (self._pathrestricted and self.rel(f)) or self.abs(f)
204 return (self._pathrestricted and self.rel(f)) or self.abs(f)
201
205
202 def files(self):
206 def files(self):
203 '''Explicitly listed files or patterns or roots:
207 '''Explicitly listed files or patterns or roots:
204 if no patterns or .always(): empty list,
208 if no patterns or .always(): empty list,
205 if exact: list exact files,
209 if exact: list exact files,
206 if not .anypats(): list all files and dirs,
210 if not .anypats(): list all files and dirs,
207 else: optimal roots'''
211 else: optimal roots'''
208 return self._files
212 return self._files
209
213
210 @propertycache
214 @propertycache
211 def _dirs(self):
215 def _dirs(self):
212 return set(util.dirs(self._fileroots)) | set(['.'])
216 return set(util.dirs(self._fileroots)) | set(['.'])
213
217
214 def visitdir(self, dir):
218 def visitdir(self, dir):
215 '''Decides whether a directory should be visited based on whether it
219 '''Decides whether a directory should be visited based on whether it
216 has potential matches in it or one of its subdirectories. This is
220 has potential matches in it or one of its subdirectories. This is
217 based on the match's primary, included, and excluded patterns.
221 based on the match's primary, included, and excluded patterns.
218
222
219 This function's behavior is undefined if it has returned False for
223 This function's behavior is undefined if it has returned False for
220 one of the dir's parent directories.
224 one of the dir's parent directories.
221 '''
225 '''
222 if dir in self._excluderoots:
226 if dir in self._excluderoots:
223 return False
227 return False
224 parentdirs = None
228 parentdirs = None
225 if (self._includeroots and dir not in self._includeroots and
229 if (self._includeroots and dir not in self._includeroots and
226 dir not in self._includedirs):
230 dir not in self._includedirs):
227 parentdirs = list(util.finddirs(dir))
231 parentdirs = list(util.finddirs(dir))
228 if not any(parent in self._includeroots for parent in parentdirs):
232 if not any(parent in self._includeroots for parent in parentdirs):
229 return False
233 return False
230 return (not self._fileroots or '.' in self._fileroots or
234 return (not self._fileroots or '.' in self._fileroots or
231 dir in self._fileroots or dir in self._dirs or
235 dir in self._fileroots or dir in self._dirs or
232 any(parentdir in self._fileroots
236 any(parentdir in self._fileroots
233 for parentdir in parentdirs or util.finddirs(dir)))
237 for parentdir in parentdirs or util.finddirs(dir)))
234
238
235 def exact(self, f):
239 def exact(self, f):
236 '''Returns True if f is in .files().'''
240 '''Returns True if f is in .files().'''
237 return f in self._fileroots
241 return f in self._fileroots
238
242
239 def anypats(self):
243 def anypats(self):
240 '''Matcher uses patterns or include/exclude.'''
244 '''Matcher uses patterns or include/exclude.'''
241 return self._anypats
245 return self._anypats
242
246
243 def always(self):
247 def always(self):
244 '''Matcher will match everything and .files() will be empty
248 '''Matcher will match everything and .files() will be empty
245 - optimization might be possible and necessary.'''
249 - optimization might be possible and necessary.'''
246 return self._always
250 return self._always
247
251
248 def ispartial(self):
252 def ispartial(self):
249 '''True if the matcher won't always match.
253 '''True if the matcher won't always match.
250
254
251 Although it's just the inverse of _always in this implementation,
255 Although it's just the inverse of _always in this implementation,
252 an extenion such as narrowhg might make it return something
256 an extenion such as narrowhg might make it return something
253 slightly different.'''
257 slightly different.'''
254 return not self._always
258 return not self._always
255
259
256 def isexact(self):
260 def isexact(self):
257 return self.matchfn == self.exact
261 return self.matchfn == self.exact
258
262
259 def prefix(self):
263 def prefix(self):
260 return not self.always() and not self.isexact() and not self.anypats()
264 return not self.always() and not self.isexact() and not self.anypats()
261
265
262 def _normalize(self, patterns, default, root, cwd, auditor):
266 def _normalize(self, patterns, default, root, cwd, auditor):
263 '''Convert 'kind:pat' from the patterns list to tuples with kind and
267 '''Convert 'kind:pat' from the patterns list to tuples with kind and
264 normalized and rooted patterns and with listfiles expanded.'''
268 normalized and rooted patterns and with listfiles expanded.'''
265 kindpats = []
269 kindpats = []
266 for kind, pat in [_patsplit(p, default) for p in patterns]:
270 for kind, pat in [_patsplit(p, default) for p in patterns]:
267 if kind in ('glob', 'relpath'):
271 if kind in ('glob', 'relpath'):
268 pat = pathutil.canonpath(root, cwd, pat, auditor)
272 pat = pathutil.canonpath(root, cwd, pat, auditor)
269 elif kind in ('relglob', 'path'):
273 elif kind in ('relglob', 'path'):
270 pat = util.normpath(pat)
274 pat = util.normpath(pat)
271 elif kind in ('listfile', 'listfile0'):
275 elif kind in ('listfile', 'listfile0'):
272 try:
276 try:
273 files = util.readfile(pat)
277 files = util.readfile(pat)
274 if kind == 'listfile0':
278 if kind == 'listfile0':
275 files = files.split('\0')
279 files = files.split('\0')
276 else:
280 else:
277 files = files.splitlines()
281 files = files.splitlines()
278 files = [f for f in files if f]
282 files = [f for f in files if f]
279 except EnvironmentError:
283 except EnvironmentError:
280 raise util.Abort(_("unable to read file list (%s)") % pat)
284 raise util.Abort(_("unable to read file list (%s)") % pat)
281 for k, p, source in self._normalize(files, default, root, cwd,
285 for k, p, source in self._normalize(files, default, root, cwd,
282 auditor):
286 auditor):
283 kindpats.append((k, p, pat))
287 kindpats.append((k, p, pat))
284 continue
288 continue
285 elif kind == 'include':
289 elif kind == 'include':
286 try:
290 try:
287 includepats = readpatternfile(pat, self._warn)
291 includepats = readpatternfile(pat, self._warn)
288 for k, p, source in self._normalize(includepats, default,
292 for k, p, source in self._normalize(includepats, default,
289 root, cwd, auditor):
293 root, cwd, auditor):
290 kindpats.append((k, p, source or pat))
294 kindpats.append((k, p, source or pat))
291 except util.Abort, inst:
295 except util.Abort, inst:
292 raise util.Abort('%s: %s' % (pat, inst[0]))
296 raise util.Abort('%s: %s' % (pat, inst[0]))
293 except IOError, inst:
297 except IOError, inst:
294 if self._warn:
298 if self._warn:
295 self._warn(_("skipping unreadable pattern file "
299 self._warn(_("skipping unreadable pattern file "
296 "'%s': %s\n") % (pat, inst.strerror))
300 "'%s': %s\n") % (pat, inst.strerror))
297 continue
301 continue
298 # else: re or relre - which cannot be normalized
302 # else: re or relre - which cannot be normalized
299 kindpats.append((kind, pat, ''))
303 kindpats.append((kind, pat, ''))
300 return kindpats
304 return kindpats
301
305
302 def exact(root, cwd, files):
306 def exact(root, cwd, files, badfn=None):
303 return match(root, cwd, files, exact=True)
307 return match(root, cwd, files, exact=True, badfn=badfn)
304
308
305 def always(root, cwd):
309 def always(root, cwd):
306 return match(root, cwd, [])
310 return match(root, cwd, [])
307
311
308 def badmatch(match, badfn):
312 def badmatch(match, badfn):
309 """Make a copy of the given matcher, replacing its bad method with the given
313 """Make a copy of the given matcher, replacing its bad method with the given
310 one.
314 one.
311 """
315 """
312 m = copy.copy(match)
316 m = copy.copy(match)
313 m.bad = badfn
317 m.bad = badfn
314 return m
318 return m
315
319
316 class narrowmatcher(match):
320 class narrowmatcher(match):
317 """Adapt a matcher to work on a subdirectory only.
321 """Adapt a matcher to work on a subdirectory only.
318
322
319 The paths are remapped to remove/insert the path as needed:
323 The paths are remapped to remove/insert the path as needed:
320
324
321 >>> m1 = match('root', '', ['a.txt', 'sub/b.txt'])
325 >>> m1 = match('root', '', ['a.txt', 'sub/b.txt'])
322 >>> m2 = narrowmatcher('sub', m1)
326 >>> m2 = narrowmatcher('sub', m1)
323 >>> bool(m2('a.txt'))
327 >>> bool(m2('a.txt'))
324 False
328 False
325 >>> bool(m2('b.txt'))
329 >>> bool(m2('b.txt'))
326 True
330 True
327 >>> bool(m2.matchfn('a.txt'))
331 >>> bool(m2.matchfn('a.txt'))
328 False
332 False
329 >>> bool(m2.matchfn('b.txt'))
333 >>> bool(m2.matchfn('b.txt'))
330 True
334 True
331 >>> m2.files()
335 >>> m2.files()
332 ['b.txt']
336 ['b.txt']
333 >>> m2.exact('b.txt')
337 >>> m2.exact('b.txt')
334 True
338 True
335 >>> util.pconvert(m2.rel('b.txt'))
339 >>> util.pconvert(m2.rel('b.txt'))
336 'sub/b.txt'
340 'sub/b.txt'
337 >>> def bad(f, msg):
341 >>> def bad(f, msg):
338 ... print "%s: %s" % (f, msg)
342 ... print "%s: %s" % (f, msg)
339 >>> m1.bad = bad
343 >>> m1.bad = bad
340 >>> m2.bad('x.txt', 'No such file')
344 >>> m2.bad('x.txt', 'No such file')
341 sub/x.txt: No such file
345 sub/x.txt: No such file
342 >>> m2.abs('c.txt')
346 >>> m2.abs('c.txt')
343 'sub/c.txt'
347 'sub/c.txt'
344 """
348 """
345
349
346 def __init__(self, path, matcher):
350 def __init__(self, path, matcher):
347 self._root = matcher._root
351 self._root = matcher._root
348 self._cwd = matcher._cwd
352 self._cwd = matcher._cwd
349 self._path = path
353 self._path = path
350 self._matcher = matcher
354 self._matcher = matcher
351 self._always = matcher._always
355 self._always = matcher._always
352 self._pathrestricted = matcher._pathrestricted
356 self._pathrestricted = matcher._pathrestricted
353
357
354 self._files = [f[len(path) + 1:] for f in matcher._files
358 self._files = [f[len(path) + 1:] for f in matcher._files
355 if f.startswith(path + "/")]
359 if f.startswith(path + "/")]
356
360
357 # If the parent repo had a path to this subrepo and no patterns are
361 # If the parent repo had a path to this subrepo and no patterns are
358 # specified, this submatcher always matches.
362 # specified, this submatcher always matches.
359 if not self._always and not matcher._anypats:
363 if not self._always and not matcher._anypats:
360 self._always = any(f == path for f in matcher._files)
364 self._always = any(f == path for f in matcher._files)
361
365
362 self._anypats = matcher._anypats
366 self._anypats = matcher._anypats
363 self.matchfn = lambda fn: matcher.matchfn(self._path + "/" + fn)
367 self.matchfn = lambda fn: matcher.matchfn(self._path + "/" + fn)
364 self._fileroots = set(self._files)
368 self._fileroots = set(self._files)
365
369
366 def abs(self, f):
370 def abs(self, f):
367 return self._matcher.abs(self._path + "/" + f)
371 return self._matcher.abs(self._path + "/" + f)
368
372
369 def bad(self, f, msg):
373 def bad(self, f, msg):
370 self._matcher.bad(self._path + "/" + f, msg)
374 self._matcher.bad(self._path + "/" + f, msg)
371
375
372 def rel(self, f):
376 def rel(self, f):
373 return self._matcher.rel(self._path + "/" + f)
377 return self._matcher.rel(self._path + "/" + f)
374
378
375 class icasefsmatcher(match):
379 class icasefsmatcher(match):
376 """A matcher for wdir on case insensitive filesystems, which normalizes the
380 """A matcher for wdir on case insensitive filesystems, which normalizes the
377 given patterns to the case in the filesystem.
381 given patterns to the case in the filesystem.
378 """
382 """
379
383
380 def __init__(self, root, cwd, patterns, include, exclude, default, auditor,
384 def __init__(self, root, cwd, patterns, include, exclude, default, auditor,
381 ctx, listsubrepos=False):
385 ctx, listsubrepos=False, badfn=None):
382 init = super(icasefsmatcher, self).__init__
386 init = super(icasefsmatcher, self).__init__
383 self._dsnormalize = ctx.repo().dirstate.normalize
387 self._dsnormalize = ctx.repo().dirstate.normalize
384
388
385 init(root, cwd, patterns, include, exclude, default, auditor=auditor,
389 init(root, cwd, patterns, include, exclude, default, auditor=auditor,
386 ctx=ctx, listsubrepos=listsubrepos)
390 ctx=ctx, listsubrepos=listsubrepos, badfn=badfn)
387
391
388 # m.exact(file) must be based off of the actual user input, otherwise
392 # m.exact(file) must be based off of the actual user input, otherwise
389 # inexact case matches are treated as exact, and not noted without -v.
393 # inexact case matches are treated as exact, and not noted without -v.
390 if self._files:
394 if self._files:
391 self._fileroots = set(_roots(self._kp))
395 self._fileroots = set(_roots(self._kp))
392
396
393 def _normalize(self, patterns, default, root, cwd, auditor):
397 def _normalize(self, patterns, default, root, cwd, auditor):
394 self._kp = super(icasefsmatcher, self)._normalize(patterns, default,
398 self._kp = super(icasefsmatcher, self)._normalize(patterns, default,
395 root, cwd, auditor)
399 root, cwd, auditor)
396 kindpats = []
400 kindpats = []
397 for kind, pats, source in self._kp:
401 for kind, pats, source in self._kp:
398 if kind not in ('re', 'relre'): # regex can't be normalized
402 if kind not in ('re', 'relre'): # regex can't be normalized
399 pats = self._dsnormalize(pats)
403 pats = self._dsnormalize(pats)
400 kindpats.append((kind, pats, source))
404 kindpats.append((kind, pats, source))
401 return kindpats
405 return kindpats
402
406
403 def patkind(pattern, default=None):
407 def patkind(pattern, default=None):
404 '''If pattern is 'kind:pat' with a known kind, return kind.'''
408 '''If pattern is 'kind:pat' with a known kind, return kind.'''
405 return _patsplit(pattern, default)[0]
409 return _patsplit(pattern, default)[0]
406
410
407 def _patsplit(pattern, default):
411 def _patsplit(pattern, default):
408 """Split a string into the optional pattern kind prefix and the actual
412 """Split a string into the optional pattern kind prefix and the actual
409 pattern."""
413 pattern."""
410 if ':' in pattern:
414 if ':' in pattern:
411 kind, pat = pattern.split(':', 1)
415 kind, pat = pattern.split(':', 1)
412 if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
416 if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
413 'listfile', 'listfile0', 'set', 'include', 'subinclude'):
417 'listfile', 'listfile0', 'set', 'include', 'subinclude'):
414 return kind, pat
418 return kind, pat
415 return default, pattern
419 return default, pattern
416
420
417 def _globre(pat):
421 def _globre(pat):
418 r'''Convert an extended glob string to a regexp string.
422 r'''Convert an extended glob string to a regexp string.
419
423
420 >>> print _globre(r'?')
424 >>> print _globre(r'?')
421 .
425 .
422 >>> print _globre(r'*')
426 >>> print _globre(r'*')
423 [^/]*
427 [^/]*
424 >>> print _globre(r'**')
428 >>> print _globre(r'**')
425 .*
429 .*
426 >>> print _globre(r'**/a')
430 >>> print _globre(r'**/a')
427 (?:.*/)?a
431 (?:.*/)?a
428 >>> print _globre(r'a/**/b')
432 >>> print _globre(r'a/**/b')
429 a\/(?:.*/)?b
433 a\/(?:.*/)?b
430 >>> print _globre(r'[a*?!^][^b][!c]')
434 >>> print _globre(r'[a*?!^][^b][!c]')
431 [a*?!^][\^b][^c]
435 [a*?!^][\^b][^c]
432 >>> print _globre(r'{a,b}')
436 >>> print _globre(r'{a,b}')
433 (?:a|b)
437 (?:a|b)
434 >>> print _globre(r'.\*\?')
438 >>> print _globre(r'.\*\?')
435 \.\*\?
439 \.\*\?
436 '''
440 '''
437 i, n = 0, len(pat)
441 i, n = 0, len(pat)
438 res = ''
442 res = ''
439 group = 0
443 group = 0
440 escape = util.re.escape
444 escape = util.re.escape
441 def peek():
445 def peek():
442 return i < n and pat[i]
446 return i < n and pat[i]
443 while i < n:
447 while i < n:
444 c = pat[i]
448 c = pat[i]
445 i += 1
449 i += 1
446 if c not in '*?[{},\\':
450 if c not in '*?[{},\\':
447 res += escape(c)
451 res += escape(c)
448 elif c == '*':
452 elif c == '*':
449 if peek() == '*':
453 if peek() == '*':
450 i += 1
454 i += 1
451 if peek() == '/':
455 if peek() == '/':
452 i += 1
456 i += 1
453 res += '(?:.*/)?'
457 res += '(?:.*/)?'
454 else:
458 else:
455 res += '.*'
459 res += '.*'
456 else:
460 else:
457 res += '[^/]*'
461 res += '[^/]*'
458 elif c == '?':
462 elif c == '?':
459 res += '.'
463 res += '.'
460 elif c == '[':
464 elif c == '[':
461 j = i
465 j = i
462 if j < n and pat[j] in '!]':
466 if j < n and pat[j] in '!]':
463 j += 1
467 j += 1
464 while j < n and pat[j] != ']':
468 while j < n and pat[j] != ']':
465 j += 1
469 j += 1
466 if j >= n:
470 if j >= n:
467 res += '\\['
471 res += '\\['
468 else:
472 else:
469 stuff = pat[i:j].replace('\\','\\\\')
473 stuff = pat[i:j].replace('\\','\\\\')
470 i = j + 1
474 i = j + 1
471 if stuff[0] == '!':
475 if stuff[0] == '!':
472 stuff = '^' + stuff[1:]
476 stuff = '^' + stuff[1:]
473 elif stuff[0] == '^':
477 elif stuff[0] == '^':
474 stuff = '\\' + stuff
478 stuff = '\\' + stuff
475 res = '%s[%s]' % (res, stuff)
479 res = '%s[%s]' % (res, stuff)
476 elif c == '{':
480 elif c == '{':
477 group += 1
481 group += 1
478 res += '(?:'
482 res += '(?:'
479 elif c == '}' and group:
483 elif c == '}' and group:
480 res += ')'
484 res += ')'
481 group -= 1
485 group -= 1
482 elif c == ',' and group:
486 elif c == ',' and group:
483 res += '|'
487 res += '|'
484 elif c == '\\':
488 elif c == '\\':
485 p = peek()
489 p = peek()
486 if p:
490 if p:
487 i += 1
491 i += 1
488 res += escape(p)
492 res += escape(p)
489 else:
493 else:
490 res += escape(c)
494 res += escape(c)
491 else:
495 else:
492 res += escape(c)
496 res += escape(c)
493 return res
497 return res
494
498
495 def _regex(kind, pat, globsuffix):
499 def _regex(kind, pat, globsuffix):
496 '''Convert a (normalized) pattern of any kind into a regular expression.
500 '''Convert a (normalized) pattern of any kind into a regular expression.
497 globsuffix is appended to the regexp of globs.'''
501 globsuffix is appended to the regexp of globs.'''
498 if not pat:
502 if not pat:
499 return ''
503 return ''
500 if kind == 're':
504 if kind == 're':
501 return pat
505 return pat
502 if kind == 'path':
506 if kind == 'path':
503 return '^' + util.re.escape(pat) + '(?:/|$)'
507 return '^' + util.re.escape(pat) + '(?:/|$)'
504 if kind == 'relglob':
508 if kind == 'relglob':
505 return '(?:|.*/)' + _globre(pat) + globsuffix
509 return '(?:|.*/)' + _globre(pat) + globsuffix
506 if kind == 'relpath':
510 if kind == 'relpath':
507 return util.re.escape(pat) + '(?:/|$)'
511 return util.re.escape(pat) + '(?:/|$)'
508 if kind == 'relre':
512 if kind == 'relre':
509 if pat.startswith('^'):
513 if pat.startswith('^'):
510 return pat
514 return pat
511 return '.*' + pat
515 return '.*' + pat
512 return _globre(pat) + globsuffix
516 return _globre(pat) + globsuffix
513
517
514 def _buildmatch(ctx, kindpats, globsuffix, listsubrepos, root):
518 def _buildmatch(ctx, kindpats, globsuffix, listsubrepos, root):
515 '''Return regexp string and a matcher function for kindpats.
519 '''Return regexp string and a matcher function for kindpats.
516 globsuffix is appended to the regexp of globs.'''
520 globsuffix is appended to the regexp of globs.'''
517 matchfuncs = []
521 matchfuncs = []
518
522
519 subincludes, kindpats = _expandsubinclude(kindpats, root)
523 subincludes, kindpats = _expandsubinclude(kindpats, root)
520 if subincludes:
524 if subincludes:
521 def matchsubinclude(f):
525 def matchsubinclude(f):
522 for prefix, mf in subincludes:
526 for prefix, mf in subincludes:
523 if f.startswith(prefix) and mf(f[len(prefix):]):
527 if f.startswith(prefix) and mf(f[len(prefix):]):
524 return True
528 return True
525 return False
529 return False
526 matchfuncs.append(matchsubinclude)
530 matchfuncs.append(matchsubinclude)
527
531
528 fset, kindpats = _expandsets(kindpats, ctx, listsubrepos)
532 fset, kindpats = _expandsets(kindpats, ctx, listsubrepos)
529 if fset:
533 if fset:
530 matchfuncs.append(fset.__contains__)
534 matchfuncs.append(fset.__contains__)
531
535
532 regex = ''
536 regex = ''
533 if kindpats:
537 if kindpats:
534 regex, mf = _buildregexmatch(kindpats, globsuffix)
538 regex, mf = _buildregexmatch(kindpats, globsuffix)
535 matchfuncs.append(mf)
539 matchfuncs.append(mf)
536
540
537 if len(matchfuncs) == 1:
541 if len(matchfuncs) == 1:
538 return regex, matchfuncs[0]
542 return regex, matchfuncs[0]
539 else:
543 else:
540 return regex, lambda f: any(mf(f) for mf in matchfuncs)
544 return regex, lambda f: any(mf(f) for mf in matchfuncs)
541
545
542 def _buildregexmatch(kindpats, globsuffix):
546 def _buildregexmatch(kindpats, globsuffix):
543 """Build a match function from a list of kinds and kindpats,
547 """Build a match function from a list of kinds and kindpats,
544 return regexp string and a matcher function."""
548 return regexp string and a matcher function."""
545 try:
549 try:
546 regex = '(?:%s)' % '|'.join([_regex(k, p, globsuffix)
550 regex = '(?:%s)' % '|'.join([_regex(k, p, globsuffix)
547 for (k, p, s) in kindpats])
551 for (k, p, s) in kindpats])
548 if len(regex) > 20000:
552 if len(regex) > 20000:
549 raise OverflowError
553 raise OverflowError
550 return regex, _rematcher(regex)
554 return regex, _rematcher(regex)
551 except OverflowError:
555 except OverflowError:
552 # We're using a Python with a tiny regex engine and we
556 # We're using a Python with a tiny regex engine and we
553 # made it explode, so we'll divide the pattern list in two
557 # made it explode, so we'll divide the pattern list in two
554 # until it works
558 # until it works
555 l = len(kindpats)
559 l = len(kindpats)
556 if l < 2:
560 if l < 2:
557 raise
561 raise
558 regexa, a = _buildregexmatch(kindpats[:l//2], globsuffix)
562 regexa, a = _buildregexmatch(kindpats[:l//2], globsuffix)
559 regexb, b = _buildregexmatch(kindpats[l//2:], globsuffix)
563 regexb, b = _buildregexmatch(kindpats[l//2:], globsuffix)
560 return regex, lambda s: a(s) or b(s)
564 return regex, lambda s: a(s) or b(s)
561 except re.error:
565 except re.error:
562 for k, p, s in kindpats:
566 for k, p, s in kindpats:
563 try:
567 try:
564 _rematcher('(?:%s)' % _regex(k, p, globsuffix))
568 _rematcher('(?:%s)' % _regex(k, p, globsuffix))
565 except re.error:
569 except re.error:
566 if s:
570 if s:
567 raise util.Abort(_("%s: invalid pattern (%s): %s") %
571 raise util.Abort(_("%s: invalid pattern (%s): %s") %
568 (s, k, p))
572 (s, k, p))
569 else:
573 else:
570 raise util.Abort(_("invalid pattern (%s): %s") % (k, p))
574 raise util.Abort(_("invalid pattern (%s): %s") % (k, p))
571 raise util.Abort(_("invalid pattern"))
575 raise util.Abort(_("invalid pattern"))
572
576
573 def _roots(kindpats):
577 def _roots(kindpats):
574 '''return roots and exact explicitly listed files from patterns
578 '''return roots and exact explicitly listed files from patterns
575
579
576 >>> _roots([('glob', 'g/*', ''), ('glob', 'g', ''), ('glob', 'g*', '')])
580 >>> _roots([('glob', 'g/*', ''), ('glob', 'g', ''), ('glob', 'g*', '')])
577 ['g', 'g', '.']
581 ['g', 'g', '.']
578 >>> _roots([('relpath', 'r', ''), ('path', 'p/p', ''), ('path', '', '')])
582 >>> _roots([('relpath', 'r', ''), ('path', 'p/p', ''), ('path', '', '')])
579 ['r', 'p/p', '.']
583 ['r', 'p/p', '.']
580 >>> _roots([('relglob', 'rg*', ''), ('re', 're/', ''), ('relre', 'rr', '')])
584 >>> _roots([('relglob', 'rg*', ''), ('re', 're/', ''), ('relre', 'rr', '')])
581 ['.', '.', '.']
585 ['.', '.', '.']
582 '''
586 '''
583 r = []
587 r = []
584 for kind, pat, source in kindpats:
588 for kind, pat, source in kindpats:
585 if kind == 'glob': # find the non-glob prefix
589 if kind == 'glob': # find the non-glob prefix
586 root = []
590 root = []
587 for p in pat.split('/'):
591 for p in pat.split('/'):
588 if '[' in p or '{' in p or '*' in p or '?' in p:
592 if '[' in p or '{' in p or '*' in p or '?' in p:
589 break
593 break
590 root.append(p)
594 root.append(p)
591 r.append('/'.join(root) or '.')
595 r.append('/'.join(root) or '.')
592 elif kind in ('relpath', 'path'):
596 elif kind in ('relpath', 'path'):
593 r.append(pat or '.')
597 r.append(pat or '.')
594 else: # relglob, re, relre
598 else: # relglob, re, relre
595 r.append('.')
599 r.append('.')
596 return r
600 return r
597
601
598 def _anypats(kindpats):
602 def _anypats(kindpats):
599 for kind, pat, source in kindpats:
603 for kind, pat, source in kindpats:
600 if kind in ('glob', 're', 'relglob', 'relre', 'set'):
604 if kind in ('glob', 're', 'relglob', 'relre', 'set'):
601 return True
605 return True
602
606
603 _commentre = None
607 _commentre = None
604
608
605 def readpatternfile(filepath, warn):
609 def readpatternfile(filepath, warn):
606 '''parse a pattern file, returning a list of
610 '''parse a pattern file, returning a list of
607 patterns. These patterns should be given to compile()
611 patterns. These patterns should be given to compile()
608 to be validated and converted into a match function.
612 to be validated and converted into a match function.
609
613
610 trailing white space is dropped.
614 trailing white space is dropped.
611 the escape character is backslash.
615 the escape character is backslash.
612 comments start with #.
616 comments start with #.
613 empty lines are skipped.
617 empty lines are skipped.
614
618
615 lines can be of the following formats:
619 lines can be of the following formats:
616
620
617 syntax: regexp # defaults following lines to non-rooted regexps
621 syntax: regexp # defaults following lines to non-rooted regexps
618 syntax: glob # defaults following lines to non-rooted globs
622 syntax: glob # defaults following lines to non-rooted globs
619 re:pattern # non-rooted regular expression
623 re:pattern # non-rooted regular expression
620 glob:pattern # non-rooted glob
624 glob:pattern # non-rooted glob
621 pattern # pattern of the current default type'''
625 pattern # pattern of the current default type'''
622
626
623 syntaxes = {'re': 'relre:', 'regexp': 'relre:', 'glob': 'relglob:',
627 syntaxes = {'re': 'relre:', 'regexp': 'relre:', 'glob': 'relglob:',
624 'include': 'include', 'subinclude': 'subinclude'}
628 'include': 'include', 'subinclude': 'subinclude'}
625 syntax = 'relre:'
629 syntax = 'relre:'
626 patterns = []
630 patterns = []
627
631
628 fp = open(filepath)
632 fp = open(filepath)
629 for line in fp:
633 for line in fp:
630 if "#" in line:
634 if "#" in line:
631 global _commentre
635 global _commentre
632 if not _commentre:
636 if not _commentre:
633 _commentre = re.compile(r'((^|[^\\])(\\\\)*)#.*')
637 _commentre = re.compile(r'((^|[^\\])(\\\\)*)#.*')
634 # remove comments prefixed by an even number of escapes
638 # remove comments prefixed by an even number of escapes
635 line = _commentre.sub(r'\1', line)
639 line = _commentre.sub(r'\1', line)
636 # fixup properly escaped comments that survived the above
640 # fixup properly escaped comments that survived the above
637 line = line.replace("\\#", "#")
641 line = line.replace("\\#", "#")
638 line = line.rstrip()
642 line = line.rstrip()
639 if not line:
643 if not line:
640 continue
644 continue
641
645
642 if line.startswith('syntax:'):
646 if line.startswith('syntax:'):
643 s = line[7:].strip()
647 s = line[7:].strip()
644 try:
648 try:
645 syntax = syntaxes[s]
649 syntax = syntaxes[s]
646 except KeyError:
650 except KeyError:
647 if warn:
651 if warn:
648 warn(_("%s: ignoring invalid syntax '%s'\n") %
652 warn(_("%s: ignoring invalid syntax '%s'\n") %
649 (filepath, s))
653 (filepath, s))
650 continue
654 continue
651
655
652 linesyntax = syntax
656 linesyntax = syntax
653 for s, rels in syntaxes.iteritems():
657 for s, rels in syntaxes.iteritems():
654 if line.startswith(rels):
658 if line.startswith(rels):
655 linesyntax = rels
659 linesyntax = rels
656 line = line[len(rels):]
660 line = line[len(rels):]
657 break
661 break
658 elif line.startswith(s+':'):
662 elif line.startswith(s+':'):
659 linesyntax = rels
663 linesyntax = rels
660 line = line[len(s) + 1:]
664 line = line[len(s) + 1:]
661 break
665 break
662 patterns.append(linesyntax + line)
666 patterns.append(linesyntax + line)
663 fp.close()
667 fp.close()
664 return patterns
668 return patterns
General Comments 0
You need to be logged in to leave comments. Login now