##// END OF EJS Templates
match: drop optimization (?) of 'parentdirs' calculation...
Martin von Zweigbergk -
r25577:a410479c default
parent child Browse files
Show More
@@ -1,670 +1,669 b''
1 # match.py - filename matching
1 # match.py - filename matching
2 #
2 #
3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 import copy, re
8 import copy, re
9 import util, pathutil
9 import util, pathutil
10 from i18n import _
10 from i18n import _
11
11
12 propertycache = util.propertycache
12 propertycache = util.propertycache
13
13
14 def _rematcher(regex):
14 def _rematcher(regex):
15 '''compile the regexp with the best available regexp engine and return a
15 '''compile the regexp with the best available regexp engine and return a
16 matcher function'''
16 matcher function'''
17 m = util.re.compile(regex)
17 m = util.re.compile(regex)
18 try:
18 try:
19 # slightly faster, provided by facebook's re2 bindings
19 # slightly faster, provided by facebook's re2 bindings
20 return m.test_match
20 return m.test_match
21 except AttributeError:
21 except AttributeError:
22 return m.match
22 return m.match
23
23
24 def _expandsets(kindpats, ctx, listsubrepos):
24 def _expandsets(kindpats, ctx, listsubrepos):
25 '''Returns the kindpats list with the 'set' patterns expanded.'''
25 '''Returns the kindpats list with the 'set' patterns expanded.'''
26 fset = set()
26 fset = set()
27 other = []
27 other = []
28
28
29 for kind, pat, source in kindpats:
29 for kind, pat, source in kindpats:
30 if kind == 'set':
30 if kind == 'set':
31 if not ctx:
31 if not ctx:
32 raise util.Abort("fileset expression with no context")
32 raise util.Abort("fileset expression with no context")
33 s = ctx.getfileset(pat)
33 s = ctx.getfileset(pat)
34 fset.update(s)
34 fset.update(s)
35
35
36 if listsubrepos:
36 if listsubrepos:
37 for subpath in ctx.substate:
37 for subpath in ctx.substate:
38 s = ctx.sub(subpath).getfileset(pat)
38 s = ctx.sub(subpath).getfileset(pat)
39 fset.update(subpath + '/' + f for f in s)
39 fset.update(subpath + '/' + f for f in s)
40
40
41 continue
41 continue
42 other.append((kind, pat, source))
42 other.append((kind, pat, source))
43 return fset, other
43 return fset, other
44
44
45 def _expandsubinclude(kindpats, root):
45 def _expandsubinclude(kindpats, root):
46 '''Returns the list of subinclude matchers and the kindpats without the
46 '''Returns the list of subinclude matchers and the kindpats without the
47 subincludes in it.'''
47 subincludes in it.'''
48 relmatchers = []
48 relmatchers = []
49 other = []
49 other = []
50
50
51 for kind, pat, source in kindpats:
51 for kind, pat, source in kindpats:
52 if kind == 'subinclude':
52 if kind == 'subinclude':
53 sourceroot = pathutil.dirname(util.normpath(source))
53 sourceroot = pathutil.dirname(util.normpath(source))
54 pat = util.pconvert(pat)
54 pat = util.pconvert(pat)
55 path = pathutil.join(sourceroot, pat)
55 path = pathutil.join(sourceroot, pat)
56
56
57 newroot = pathutil.dirname(path)
57 newroot = pathutil.dirname(path)
58 relmatcher = match(newroot, '', [], ['include:%s' % path])
58 relmatcher = match(newroot, '', [], ['include:%s' % path])
59
59
60 prefix = pathutil.canonpath(root, root, newroot)
60 prefix = pathutil.canonpath(root, root, newroot)
61 if prefix:
61 if prefix:
62 prefix += '/'
62 prefix += '/'
63 relmatchers.append((prefix, relmatcher))
63 relmatchers.append((prefix, relmatcher))
64 else:
64 else:
65 other.append((kind, pat, source))
65 other.append((kind, pat, source))
66
66
67 return relmatchers, other
67 return relmatchers, other
68
68
69 def _kindpatsalwaysmatch(kindpats):
69 def _kindpatsalwaysmatch(kindpats):
70 """"Checks whether the kindspats match everything, as e.g.
70 """"Checks whether the kindspats match everything, as e.g.
71 'relpath:.' does.
71 'relpath:.' does.
72 """
72 """
73 for kind, pat, source in kindpats:
73 for kind, pat, source in kindpats:
74 if pat != '' or kind not in ['relpath', 'glob']:
74 if pat != '' or kind not in ['relpath', 'glob']:
75 return False
75 return False
76 return True
76 return True
77
77
78 class match(object):
78 class match(object):
79 def __init__(self, root, cwd, patterns, include=[], exclude=[],
79 def __init__(self, root, cwd, patterns, include=[], exclude=[],
80 default='glob', exact=False, auditor=None, ctx=None,
80 default='glob', exact=False, auditor=None, ctx=None,
81 listsubrepos=False, warn=None, badfn=None):
81 listsubrepos=False, warn=None, badfn=None):
82 """build an object to match a set of file patterns
82 """build an object to match a set of file patterns
83
83
84 arguments:
84 arguments:
85 root - the canonical root of the tree you're matching against
85 root - the canonical root of the tree you're matching against
86 cwd - the current working directory, if relevant
86 cwd - the current working directory, if relevant
87 patterns - patterns to find
87 patterns - patterns to find
88 include - patterns to include (unless they are excluded)
88 include - patterns to include (unless they are excluded)
89 exclude - patterns to exclude (even if they are included)
89 exclude - patterns to exclude (even if they are included)
90 default - if a pattern in patterns has no explicit type, assume this one
90 default - if a pattern in patterns has no explicit type, assume this one
91 exact - patterns are actually filenames (include/exclude still apply)
91 exact - patterns are actually filenames (include/exclude still apply)
92 warn - optional function used for printing warnings
92 warn - optional function used for printing warnings
93 badfn - optional bad() callback for this matcher instead of the default
93 badfn - optional bad() callback for this matcher instead of the default
94
94
95 a pattern is one of:
95 a pattern is one of:
96 'glob:<glob>' - a glob relative to cwd
96 'glob:<glob>' - a glob relative to cwd
97 're:<regexp>' - a regular expression
97 're:<regexp>' - a regular expression
98 'path:<path>' - a path relative to repository root
98 'path:<path>' - a path relative to repository root
99 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
99 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
100 'relpath:<path>' - a path relative to cwd
100 'relpath:<path>' - a path relative to cwd
101 'relre:<regexp>' - a regexp that needn't match the start of a name
101 'relre:<regexp>' - a regexp that needn't match the start of a name
102 'set:<fileset>' - a fileset expression
102 'set:<fileset>' - a fileset expression
103 'include:<path>' - a file of patterns to read and include
103 'include:<path>' - a file of patterns to read and include
104 'subinclude:<path>' - a file of patterns to match against files under
104 'subinclude:<path>' - a file of patterns to match against files under
105 the same directory
105 the same directory
106 '<something>' - a pattern of the specified default type
106 '<something>' - a pattern of the specified default type
107 """
107 """
108
108
109 self._root = root
109 self._root = root
110 self._cwd = cwd
110 self._cwd = cwd
111 self._files = [] # exact files and roots of patterns
111 self._files = [] # exact files and roots of patterns
112 self._anypats = bool(include or exclude)
112 self._anypats = bool(include or exclude)
113 self._always = False
113 self._always = False
114 self._pathrestricted = bool(include or exclude or patterns)
114 self._pathrestricted = bool(include or exclude or patterns)
115 self._warn = warn
115 self._warn = warn
116 self._includeroots = set()
116 self._includeroots = set()
117 self._includedirs = set(['.'])
117 self._includedirs = set(['.'])
118 self._excluderoots = set()
118 self._excluderoots = set()
119
119
120 if badfn is not None:
120 if badfn is not None:
121 self.bad = badfn
121 self.bad = badfn
122
122
123 matchfns = []
123 matchfns = []
124 if include:
124 if include:
125 kindpats = self._normalize(include, 'glob', root, cwd, auditor)
125 kindpats = self._normalize(include, 'glob', root, cwd, auditor)
126 self.includepat, im = _buildmatch(ctx, kindpats, '(?:/|$)',
126 self.includepat, im = _buildmatch(ctx, kindpats, '(?:/|$)',
127 listsubrepos, root)
127 listsubrepos, root)
128 self._includeroots.update(_roots(kindpats))
128 self._includeroots.update(_roots(kindpats))
129 self._includeroots.discard('.')
129 self._includeroots.discard('.')
130 self._includedirs.update(util.dirs(self._includeroots))
130 self._includedirs.update(util.dirs(self._includeroots))
131 matchfns.append(im)
131 matchfns.append(im)
132 if exclude:
132 if exclude:
133 kindpats = self._normalize(exclude, 'glob', root, cwd, auditor)
133 kindpats = self._normalize(exclude, 'glob', root, cwd, auditor)
134 self.excludepat, em = _buildmatch(ctx, kindpats, '(?:/|$)',
134 self.excludepat, em = _buildmatch(ctx, kindpats, '(?:/|$)',
135 listsubrepos, root)
135 listsubrepos, root)
136 if not _anypats(kindpats):
136 if not _anypats(kindpats):
137 self._excluderoots.update(_roots(kindpats))
137 self._excluderoots.update(_roots(kindpats))
138 matchfns.append(lambda f: not em(f))
138 matchfns.append(lambda f: not em(f))
139 if exact:
139 if exact:
140 if isinstance(patterns, list):
140 if isinstance(patterns, list):
141 self._files = patterns
141 self._files = patterns
142 else:
142 else:
143 self._files = list(patterns)
143 self._files = list(patterns)
144 matchfns.append(self.exact)
144 matchfns.append(self.exact)
145 elif patterns:
145 elif patterns:
146 kindpats = self._normalize(patterns, default, root, cwd, auditor)
146 kindpats = self._normalize(patterns, default, root, cwd, auditor)
147 if not _kindpatsalwaysmatch(kindpats):
147 if not _kindpatsalwaysmatch(kindpats):
148 self._files = _roots(kindpats)
148 self._files = _roots(kindpats)
149 self._anypats = self._anypats or _anypats(kindpats)
149 self._anypats = self._anypats or _anypats(kindpats)
150 self.patternspat, pm = _buildmatch(ctx, kindpats, '$',
150 self.patternspat, pm = _buildmatch(ctx, kindpats, '$',
151 listsubrepos, root)
151 listsubrepos, root)
152 matchfns.append(pm)
152 matchfns.append(pm)
153
153
154 if not matchfns:
154 if not matchfns:
155 m = util.always
155 m = util.always
156 self._always = True
156 self._always = True
157 elif len(matchfns) == 1:
157 elif len(matchfns) == 1:
158 m = matchfns[0]
158 m = matchfns[0]
159 else:
159 else:
160 def m(f):
160 def m(f):
161 for matchfn in matchfns:
161 for matchfn in matchfns:
162 if not matchfn(f):
162 if not matchfn(f):
163 return False
163 return False
164 return True
164 return True
165
165
166 self.matchfn = m
166 self.matchfn = m
167 self._fileroots = set(self._files)
167 self._fileroots = set(self._files)
168
168
169 def __call__(self, fn):
169 def __call__(self, fn):
170 return self.matchfn(fn)
170 return self.matchfn(fn)
171 def __iter__(self):
171 def __iter__(self):
172 for f in self._files:
172 for f in self._files:
173 yield f
173 yield f
174
174
175 # Callbacks related to how the matcher is used by dirstate.walk.
175 # Callbacks related to how the matcher is used by dirstate.walk.
176 # Subscribers to these events must monkeypatch the matcher object.
176 # Subscribers to these events must monkeypatch the matcher object.
177 def bad(self, f, msg):
177 def bad(self, f, msg):
178 '''Callback from dirstate.walk for each explicit file that can't be
178 '''Callback from dirstate.walk for each explicit file that can't be
179 found/accessed, with an error message.'''
179 found/accessed, with an error message.'''
180 pass
180 pass
181
181
182 # If an explicitdir is set, it will be called when an explicitly listed
182 # If an explicitdir is set, it will be called when an explicitly listed
183 # directory is visited.
183 # directory is visited.
184 explicitdir = None
184 explicitdir = None
185
185
186 # If an traversedir is set, it will be called when a directory discovered
186 # If an traversedir is set, it will be called when a directory discovered
187 # by recursive traversal is visited.
187 # by recursive traversal is visited.
188 traversedir = None
188 traversedir = None
189
189
190 def abs(self, f):
190 def abs(self, f):
191 '''Convert a repo path back to path that is relative to the root of the
191 '''Convert a repo path back to path that is relative to the root of the
192 matcher.'''
192 matcher.'''
193 return f
193 return f
194
194
195 def rel(self, f):
195 def rel(self, f):
196 '''Convert repo path back to path that is relative to cwd of matcher.'''
196 '''Convert repo path back to path that is relative to cwd of matcher.'''
197 return util.pathto(self._root, self._cwd, f)
197 return util.pathto(self._root, self._cwd, f)
198
198
199 def uipath(self, f):
199 def uipath(self, f):
200 '''Convert repo path to a display path. If patterns or -I/-X were used
200 '''Convert repo path to a display path. If patterns or -I/-X were used
201 to create this matcher, the display path will be relative to cwd.
201 to create this matcher, the display path will be relative to cwd.
202 Otherwise it is relative to the root of the repo.'''
202 Otherwise it is relative to the root of the repo.'''
203 return (self._pathrestricted and self.rel(f)) or self.abs(f)
203 return (self._pathrestricted and self.rel(f)) or self.abs(f)
204
204
205 def files(self):
205 def files(self):
206 '''Explicitly listed files or patterns or roots:
206 '''Explicitly listed files or patterns or roots:
207 if no patterns or .always(): empty list,
207 if no patterns or .always(): empty list,
208 if exact: list exact files,
208 if exact: list exact files,
209 if not .anypats(): list all files and dirs,
209 if not .anypats(): list all files and dirs,
210 else: optimal roots'''
210 else: optimal roots'''
211 return self._files
211 return self._files
212
212
213 @propertycache
213 @propertycache
214 def _dirs(self):
214 def _dirs(self):
215 return set(util.dirs(self._fileroots)) | set(['.'])
215 return set(util.dirs(self._fileroots)) | set(['.'])
216
216
217 def visitdir(self, dir):
217 def visitdir(self, dir):
218 '''Decides whether a directory should be visited based on whether it
218 '''Decides whether a directory should be visited based on whether it
219 has potential matches in it or one of its subdirectories. This is
219 has potential matches in it or one of its subdirectories. This is
220 based on the match's primary, included, and excluded patterns.
220 based on the match's primary, included, and excluded patterns.
221
221
222 This function's behavior is undefined if it has returned False for
222 This function's behavior is undefined if it has returned False for
223 one of the dir's parent directories.
223 one of the dir's parent directories.
224 '''
224 '''
225 if dir in self._excluderoots:
225 if dir in self._excluderoots:
226 return False
226 return False
227 parentdirs = None
228 if (self._includeroots and
227 if (self._includeroots and
229 dir not in self._includeroots and
228 dir not in self._includeroots and
230 dir not in self._includedirs):
229 dir not in self._includedirs):
231 parentdirs = list(util.finddirs(dir))
230 if not any(parent in self._includeroots
232 if not any(parent in self._includeroots for parent in parentdirs):
231 for parent in util.finddirs(dir)):
233 return False
232 return False
234 return (not self._fileroots or
233 return (not self._fileroots or
235 '.' in self._fileroots or
234 '.' in self._fileroots or
236 dir in self._fileroots or
235 dir in self._fileroots or
237 dir in self._dirs or
236 dir in self._dirs or
238 any(parentdir in self._fileroots
237 any(parentdir in self._fileroots
239 for parentdir in parentdirs or util.finddirs(dir)))
238 for parentdir in util.finddirs(dir)))
240
239
241 def exact(self, f):
240 def exact(self, f):
242 '''Returns True if f is in .files().'''
241 '''Returns True if f is in .files().'''
243 return f in self._fileroots
242 return f in self._fileroots
244
243
245 def anypats(self):
244 def anypats(self):
246 '''Matcher uses patterns or include/exclude.'''
245 '''Matcher uses patterns or include/exclude.'''
247 return self._anypats
246 return self._anypats
248
247
249 def always(self):
248 def always(self):
250 '''Matcher will match everything and .files() will be empty
249 '''Matcher will match everything and .files() will be empty
251 - optimization might be possible and necessary.'''
250 - optimization might be possible and necessary.'''
252 return self._always
251 return self._always
253
252
254 def ispartial(self):
253 def ispartial(self):
255 '''True if the matcher won't always match.
254 '''True if the matcher won't always match.
256
255
257 Although it's just the inverse of _always in this implementation,
256 Although it's just the inverse of _always in this implementation,
258 an extenion such as narrowhg might make it return something
257 an extenion such as narrowhg might make it return something
259 slightly different.'''
258 slightly different.'''
260 return not self._always
259 return not self._always
261
260
262 def isexact(self):
261 def isexact(self):
263 return self.matchfn == self.exact
262 return self.matchfn == self.exact
264
263
265 def prefix(self):
264 def prefix(self):
266 return not self.always() and not self.isexact() and not self.anypats()
265 return not self.always() and not self.isexact() and not self.anypats()
267
266
268 def _normalize(self, patterns, default, root, cwd, auditor):
267 def _normalize(self, patterns, default, root, cwd, auditor):
269 '''Convert 'kind:pat' from the patterns list to tuples with kind and
268 '''Convert 'kind:pat' from the patterns list to tuples with kind and
270 normalized and rooted patterns and with listfiles expanded.'''
269 normalized and rooted patterns and with listfiles expanded.'''
271 kindpats = []
270 kindpats = []
272 for kind, pat in [_patsplit(p, default) for p in patterns]:
271 for kind, pat in [_patsplit(p, default) for p in patterns]:
273 if kind in ('glob', 'relpath'):
272 if kind in ('glob', 'relpath'):
274 pat = pathutil.canonpath(root, cwd, pat, auditor)
273 pat = pathutil.canonpath(root, cwd, pat, auditor)
275 elif kind in ('relglob', 'path'):
274 elif kind in ('relglob', 'path'):
276 pat = util.normpath(pat)
275 pat = util.normpath(pat)
277 elif kind in ('listfile', 'listfile0'):
276 elif kind in ('listfile', 'listfile0'):
278 try:
277 try:
279 files = util.readfile(pat)
278 files = util.readfile(pat)
280 if kind == 'listfile0':
279 if kind == 'listfile0':
281 files = files.split('\0')
280 files = files.split('\0')
282 else:
281 else:
283 files = files.splitlines()
282 files = files.splitlines()
284 files = [f for f in files if f]
283 files = [f for f in files if f]
285 except EnvironmentError:
284 except EnvironmentError:
286 raise util.Abort(_("unable to read file list (%s)") % pat)
285 raise util.Abort(_("unable to read file list (%s)") % pat)
287 for k, p, source in self._normalize(files, default, root, cwd,
286 for k, p, source in self._normalize(files, default, root, cwd,
288 auditor):
287 auditor):
289 kindpats.append((k, p, pat))
288 kindpats.append((k, p, pat))
290 continue
289 continue
291 elif kind == 'include':
290 elif kind == 'include':
292 try:
291 try:
293 includepats = readpatternfile(pat, self._warn)
292 includepats = readpatternfile(pat, self._warn)
294 for k, p, source in self._normalize(includepats, default,
293 for k, p, source in self._normalize(includepats, default,
295 root, cwd, auditor):
294 root, cwd, auditor):
296 kindpats.append((k, p, source or pat))
295 kindpats.append((k, p, source or pat))
297 except util.Abort, inst:
296 except util.Abort, inst:
298 raise util.Abort('%s: %s' % (pat, inst[0]))
297 raise util.Abort('%s: %s' % (pat, inst[0]))
299 except IOError, inst:
298 except IOError, inst:
300 if self._warn:
299 if self._warn:
301 self._warn(_("skipping unreadable pattern file "
300 self._warn(_("skipping unreadable pattern file "
302 "'%s': %s\n") % (pat, inst.strerror))
301 "'%s': %s\n") % (pat, inst.strerror))
303 continue
302 continue
304 # else: re or relre - which cannot be normalized
303 # else: re or relre - which cannot be normalized
305 kindpats.append((kind, pat, ''))
304 kindpats.append((kind, pat, ''))
306 return kindpats
305 return kindpats
307
306
308 def exact(root, cwd, files, badfn=None):
307 def exact(root, cwd, files, badfn=None):
309 return match(root, cwd, files, exact=True, badfn=badfn)
308 return match(root, cwd, files, exact=True, badfn=badfn)
310
309
311 def always(root, cwd):
310 def always(root, cwd):
312 return match(root, cwd, [])
311 return match(root, cwd, [])
313
312
314 def badmatch(match, badfn):
313 def badmatch(match, badfn):
315 """Make a copy of the given matcher, replacing its bad method with the given
314 """Make a copy of the given matcher, replacing its bad method with the given
316 one.
315 one.
317 """
316 """
318 m = copy.copy(match)
317 m = copy.copy(match)
319 m.bad = badfn
318 m.bad = badfn
320 return m
319 return m
321
320
322 class narrowmatcher(match):
321 class narrowmatcher(match):
323 """Adapt a matcher to work on a subdirectory only.
322 """Adapt a matcher to work on a subdirectory only.
324
323
325 The paths are remapped to remove/insert the path as needed:
324 The paths are remapped to remove/insert the path as needed:
326
325
327 >>> m1 = match('root', '', ['a.txt', 'sub/b.txt'])
326 >>> m1 = match('root', '', ['a.txt', 'sub/b.txt'])
328 >>> m2 = narrowmatcher('sub', m1)
327 >>> m2 = narrowmatcher('sub', m1)
329 >>> bool(m2('a.txt'))
328 >>> bool(m2('a.txt'))
330 False
329 False
331 >>> bool(m2('b.txt'))
330 >>> bool(m2('b.txt'))
332 True
331 True
333 >>> bool(m2.matchfn('a.txt'))
332 >>> bool(m2.matchfn('a.txt'))
334 False
333 False
335 >>> bool(m2.matchfn('b.txt'))
334 >>> bool(m2.matchfn('b.txt'))
336 True
335 True
337 >>> m2.files()
336 >>> m2.files()
338 ['b.txt']
337 ['b.txt']
339 >>> m2.exact('b.txt')
338 >>> m2.exact('b.txt')
340 True
339 True
341 >>> util.pconvert(m2.rel('b.txt'))
340 >>> util.pconvert(m2.rel('b.txt'))
342 'sub/b.txt'
341 'sub/b.txt'
343 >>> def bad(f, msg):
342 >>> def bad(f, msg):
344 ... print "%s: %s" % (f, msg)
343 ... print "%s: %s" % (f, msg)
345 >>> m1.bad = bad
344 >>> m1.bad = bad
346 >>> m2.bad('x.txt', 'No such file')
345 >>> m2.bad('x.txt', 'No such file')
347 sub/x.txt: No such file
346 sub/x.txt: No such file
348 >>> m2.abs('c.txt')
347 >>> m2.abs('c.txt')
349 'sub/c.txt'
348 'sub/c.txt'
350 """
349 """
351
350
352 def __init__(self, path, matcher):
351 def __init__(self, path, matcher):
353 self._root = matcher._root
352 self._root = matcher._root
354 self._cwd = matcher._cwd
353 self._cwd = matcher._cwd
355 self._path = path
354 self._path = path
356 self._matcher = matcher
355 self._matcher = matcher
357 self._always = matcher._always
356 self._always = matcher._always
358 self._pathrestricted = matcher._pathrestricted
357 self._pathrestricted = matcher._pathrestricted
359
358
360 self._files = [f[len(path) + 1:] for f in matcher._files
359 self._files = [f[len(path) + 1:] for f in matcher._files
361 if f.startswith(path + "/")]
360 if f.startswith(path + "/")]
362
361
363 # If the parent repo had a path to this subrepo and no patterns are
362 # If the parent repo had a path to this subrepo and no patterns are
364 # specified, this submatcher always matches.
363 # specified, this submatcher always matches.
365 if not self._always and not matcher._anypats:
364 if not self._always and not matcher._anypats:
366 self._always = any(f == path for f in matcher._files)
365 self._always = any(f == path for f in matcher._files)
367
366
368 self._anypats = matcher._anypats
367 self._anypats = matcher._anypats
369 self.matchfn = lambda fn: matcher.matchfn(self._path + "/" + fn)
368 self.matchfn = lambda fn: matcher.matchfn(self._path + "/" + fn)
370 self._fileroots = set(self._files)
369 self._fileroots = set(self._files)
371
370
372 def abs(self, f):
371 def abs(self, f):
373 return self._matcher.abs(self._path + "/" + f)
372 return self._matcher.abs(self._path + "/" + f)
374
373
375 def bad(self, f, msg):
374 def bad(self, f, msg):
376 self._matcher.bad(self._path + "/" + f, msg)
375 self._matcher.bad(self._path + "/" + f, msg)
377
376
378 def rel(self, f):
377 def rel(self, f):
379 return self._matcher.rel(self._path + "/" + f)
378 return self._matcher.rel(self._path + "/" + f)
380
379
381 class icasefsmatcher(match):
380 class icasefsmatcher(match):
382 """A matcher for wdir on case insensitive filesystems, which normalizes the
381 """A matcher for wdir on case insensitive filesystems, which normalizes the
383 given patterns to the case in the filesystem.
382 given patterns to the case in the filesystem.
384 """
383 """
385
384
386 def __init__(self, root, cwd, patterns, include, exclude, default, auditor,
385 def __init__(self, root, cwd, patterns, include, exclude, default, auditor,
387 ctx, listsubrepos=False, badfn=None):
386 ctx, listsubrepos=False, badfn=None):
388 init = super(icasefsmatcher, self).__init__
387 init = super(icasefsmatcher, self).__init__
389 self._dsnormalize = ctx.repo().dirstate.normalize
388 self._dsnormalize = ctx.repo().dirstate.normalize
390
389
391 init(root, cwd, patterns, include, exclude, default, auditor=auditor,
390 init(root, cwd, patterns, include, exclude, default, auditor=auditor,
392 ctx=ctx, listsubrepos=listsubrepos, badfn=badfn)
391 ctx=ctx, listsubrepos=listsubrepos, badfn=badfn)
393
392
394 # m.exact(file) must be based off of the actual user input, otherwise
393 # m.exact(file) must be based off of the actual user input, otherwise
395 # inexact case matches are treated as exact, and not noted without -v.
394 # inexact case matches are treated as exact, and not noted without -v.
396 if self._files:
395 if self._files:
397 self._fileroots = set(_roots(self._kp))
396 self._fileroots = set(_roots(self._kp))
398
397
399 def _normalize(self, patterns, default, root, cwd, auditor):
398 def _normalize(self, patterns, default, root, cwd, auditor):
400 self._kp = super(icasefsmatcher, self)._normalize(patterns, default,
399 self._kp = super(icasefsmatcher, self)._normalize(patterns, default,
401 root, cwd, auditor)
400 root, cwd, auditor)
402 kindpats = []
401 kindpats = []
403 for kind, pats, source in self._kp:
402 for kind, pats, source in self._kp:
404 if kind not in ('re', 'relre'): # regex can't be normalized
403 if kind not in ('re', 'relre'): # regex can't be normalized
405 pats = self._dsnormalize(pats)
404 pats = self._dsnormalize(pats)
406 kindpats.append((kind, pats, source))
405 kindpats.append((kind, pats, source))
407 return kindpats
406 return kindpats
408
407
409 def patkind(pattern, default=None):
408 def patkind(pattern, default=None):
410 '''If pattern is 'kind:pat' with a known kind, return kind.'''
409 '''If pattern is 'kind:pat' with a known kind, return kind.'''
411 return _patsplit(pattern, default)[0]
410 return _patsplit(pattern, default)[0]
412
411
413 def _patsplit(pattern, default):
412 def _patsplit(pattern, default):
414 """Split a string into the optional pattern kind prefix and the actual
413 """Split a string into the optional pattern kind prefix and the actual
415 pattern."""
414 pattern."""
416 if ':' in pattern:
415 if ':' in pattern:
417 kind, pat = pattern.split(':', 1)
416 kind, pat = pattern.split(':', 1)
418 if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
417 if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
419 'listfile', 'listfile0', 'set', 'include', 'subinclude'):
418 'listfile', 'listfile0', 'set', 'include', 'subinclude'):
420 return kind, pat
419 return kind, pat
421 return default, pattern
420 return default, pattern
422
421
423 def _globre(pat):
422 def _globre(pat):
424 r'''Convert an extended glob string to a regexp string.
423 r'''Convert an extended glob string to a regexp string.
425
424
426 >>> print _globre(r'?')
425 >>> print _globre(r'?')
427 .
426 .
428 >>> print _globre(r'*')
427 >>> print _globre(r'*')
429 [^/]*
428 [^/]*
430 >>> print _globre(r'**')
429 >>> print _globre(r'**')
431 .*
430 .*
432 >>> print _globre(r'**/a')
431 >>> print _globre(r'**/a')
433 (?:.*/)?a
432 (?:.*/)?a
434 >>> print _globre(r'a/**/b')
433 >>> print _globre(r'a/**/b')
435 a\/(?:.*/)?b
434 a\/(?:.*/)?b
436 >>> print _globre(r'[a*?!^][^b][!c]')
435 >>> print _globre(r'[a*?!^][^b][!c]')
437 [a*?!^][\^b][^c]
436 [a*?!^][\^b][^c]
438 >>> print _globre(r'{a,b}')
437 >>> print _globre(r'{a,b}')
439 (?:a|b)
438 (?:a|b)
440 >>> print _globre(r'.\*\?')
439 >>> print _globre(r'.\*\?')
441 \.\*\?
440 \.\*\?
442 '''
441 '''
443 i, n = 0, len(pat)
442 i, n = 0, len(pat)
444 res = ''
443 res = ''
445 group = 0
444 group = 0
446 escape = util.re.escape
445 escape = util.re.escape
447 def peek():
446 def peek():
448 return i < n and pat[i]
447 return i < n and pat[i]
449 while i < n:
448 while i < n:
450 c = pat[i]
449 c = pat[i]
451 i += 1
450 i += 1
452 if c not in '*?[{},\\':
451 if c not in '*?[{},\\':
453 res += escape(c)
452 res += escape(c)
454 elif c == '*':
453 elif c == '*':
455 if peek() == '*':
454 if peek() == '*':
456 i += 1
455 i += 1
457 if peek() == '/':
456 if peek() == '/':
458 i += 1
457 i += 1
459 res += '(?:.*/)?'
458 res += '(?:.*/)?'
460 else:
459 else:
461 res += '.*'
460 res += '.*'
462 else:
461 else:
463 res += '[^/]*'
462 res += '[^/]*'
464 elif c == '?':
463 elif c == '?':
465 res += '.'
464 res += '.'
466 elif c == '[':
465 elif c == '[':
467 j = i
466 j = i
468 if j < n and pat[j] in '!]':
467 if j < n and pat[j] in '!]':
469 j += 1
468 j += 1
470 while j < n and pat[j] != ']':
469 while j < n and pat[j] != ']':
471 j += 1
470 j += 1
472 if j >= n:
471 if j >= n:
473 res += '\\['
472 res += '\\['
474 else:
473 else:
475 stuff = pat[i:j].replace('\\','\\\\')
474 stuff = pat[i:j].replace('\\','\\\\')
476 i = j + 1
475 i = j + 1
477 if stuff[0] == '!':
476 if stuff[0] == '!':
478 stuff = '^' + stuff[1:]
477 stuff = '^' + stuff[1:]
479 elif stuff[0] == '^':
478 elif stuff[0] == '^':
480 stuff = '\\' + stuff
479 stuff = '\\' + stuff
481 res = '%s[%s]' % (res, stuff)
480 res = '%s[%s]' % (res, stuff)
482 elif c == '{':
481 elif c == '{':
483 group += 1
482 group += 1
484 res += '(?:'
483 res += '(?:'
485 elif c == '}' and group:
484 elif c == '}' and group:
486 res += ')'
485 res += ')'
487 group -= 1
486 group -= 1
488 elif c == ',' and group:
487 elif c == ',' and group:
489 res += '|'
488 res += '|'
490 elif c == '\\':
489 elif c == '\\':
491 p = peek()
490 p = peek()
492 if p:
491 if p:
493 i += 1
492 i += 1
494 res += escape(p)
493 res += escape(p)
495 else:
494 else:
496 res += escape(c)
495 res += escape(c)
497 else:
496 else:
498 res += escape(c)
497 res += escape(c)
499 return res
498 return res
500
499
501 def _regex(kind, pat, globsuffix):
500 def _regex(kind, pat, globsuffix):
502 '''Convert a (normalized) pattern of any kind into a regular expression.
501 '''Convert a (normalized) pattern of any kind into a regular expression.
503 globsuffix is appended to the regexp of globs.'''
502 globsuffix is appended to the regexp of globs.'''
504 if not pat:
503 if not pat:
505 return ''
504 return ''
506 if kind == 're':
505 if kind == 're':
507 return pat
506 return pat
508 if kind == 'path':
507 if kind == 'path':
509 return '^' + util.re.escape(pat) + '(?:/|$)'
508 return '^' + util.re.escape(pat) + '(?:/|$)'
510 if kind == 'relglob':
509 if kind == 'relglob':
511 return '(?:|.*/)' + _globre(pat) + globsuffix
510 return '(?:|.*/)' + _globre(pat) + globsuffix
512 if kind == 'relpath':
511 if kind == 'relpath':
513 return util.re.escape(pat) + '(?:/|$)'
512 return util.re.escape(pat) + '(?:/|$)'
514 if kind == 'relre':
513 if kind == 'relre':
515 if pat.startswith('^'):
514 if pat.startswith('^'):
516 return pat
515 return pat
517 return '.*' + pat
516 return '.*' + pat
518 return _globre(pat) + globsuffix
517 return _globre(pat) + globsuffix
519
518
520 def _buildmatch(ctx, kindpats, globsuffix, listsubrepos, root):
519 def _buildmatch(ctx, kindpats, globsuffix, listsubrepos, root):
521 '''Return regexp string and a matcher function for kindpats.
520 '''Return regexp string and a matcher function for kindpats.
522 globsuffix is appended to the regexp of globs.'''
521 globsuffix is appended to the regexp of globs.'''
523 matchfuncs = []
522 matchfuncs = []
524
523
525 subincludes, kindpats = _expandsubinclude(kindpats, root)
524 subincludes, kindpats = _expandsubinclude(kindpats, root)
526 if subincludes:
525 if subincludes:
527 def matchsubinclude(f):
526 def matchsubinclude(f):
528 for prefix, mf in subincludes:
527 for prefix, mf in subincludes:
529 if f.startswith(prefix) and mf(f[len(prefix):]):
528 if f.startswith(prefix) and mf(f[len(prefix):]):
530 return True
529 return True
531 return False
530 return False
532 matchfuncs.append(matchsubinclude)
531 matchfuncs.append(matchsubinclude)
533
532
534 fset, kindpats = _expandsets(kindpats, ctx, listsubrepos)
533 fset, kindpats = _expandsets(kindpats, ctx, listsubrepos)
535 if fset:
534 if fset:
536 matchfuncs.append(fset.__contains__)
535 matchfuncs.append(fset.__contains__)
537
536
538 regex = ''
537 regex = ''
539 if kindpats:
538 if kindpats:
540 regex, mf = _buildregexmatch(kindpats, globsuffix)
539 regex, mf = _buildregexmatch(kindpats, globsuffix)
541 matchfuncs.append(mf)
540 matchfuncs.append(mf)
542
541
543 if len(matchfuncs) == 1:
542 if len(matchfuncs) == 1:
544 return regex, matchfuncs[0]
543 return regex, matchfuncs[0]
545 else:
544 else:
546 return regex, lambda f: any(mf(f) for mf in matchfuncs)
545 return regex, lambda f: any(mf(f) for mf in matchfuncs)
547
546
548 def _buildregexmatch(kindpats, globsuffix):
547 def _buildregexmatch(kindpats, globsuffix):
549 """Build a match function from a list of kinds and kindpats,
548 """Build a match function from a list of kinds and kindpats,
550 return regexp string and a matcher function."""
549 return regexp string and a matcher function."""
551 try:
550 try:
552 regex = '(?:%s)' % '|'.join([_regex(k, p, globsuffix)
551 regex = '(?:%s)' % '|'.join([_regex(k, p, globsuffix)
553 for (k, p, s) in kindpats])
552 for (k, p, s) in kindpats])
554 if len(regex) > 20000:
553 if len(regex) > 20000:
555 raise OverflowError
554 raise OverflowError
556 return regex, _rematcher(regex)
555 return regex, _rematcher(regex)
557 except OverflowError:
556 except OverflowError:
558 # We're using a Python with a tiny regex engine and we
557 # We're using a Python with a tiny regex engine and we
559 # made it explode, so we'll divide the pattern list in two
558 # made it explode, so we'll divide the pattern list in two
560 # until it works
559 # until it works
561 l = len(kindpats)
560 l = len(kindpats)
562 if l < 2:
561 if l < 2:
563 raise
562 raise
564 regexa, a = _buildregexmatch(kindpats[:l//2], globsuffix)
563 regexa, a = _buildregexmatch(kindpats[:l//2], globsuffix)
565 regexb, b = _buildregexmatch(kindpats[l//2:], globsuffix)
564 regexb, b = _buildregexmatch(kindpats[l//2:], globsuffix)
566 return regex, lambda s: a(s) or b(s)
565 return regex, lambda s: a(s) or b(s)
567 except re.error:
566 except re.error:
568 for k, p, s in kindpats:
567 for k, p, s in kindpats:
569 try:
568 try:
570 _rematcher('(?:%s)' % _regex(k, p, globsuffix))
569 _rematcher('(?:%s)' % _regex(k, p, globsuffix))
571 except re.error:
570 except re.error:
572 if s:
571 if s:
573 raise util.Abort(_("%s: invalid pattern (%s): %s") %
572 raise util.Abort(_("%s: invalid pattern (%s): %s") %
574 (s, k, p))
573 (s, k, p))
575 else:
574 else:
576 raise util.Abort(_("invalid pattern (%s): %s") % (k, p))
575 raise util.Abort(_("invalid pattern (%s): %s") % (k, p))
577 raise util.Abort(_("invalid pattern"))
576 raise util.Abort(_("invalid pattern"))
578
577
579 def _roots(kindpats):
578 def _roots(kindpats):
580 '''return roots and exact explicitly listed files from patterns
579 '''return roots and exact explicitly listed files from patterns
581
580
582 >>> _roots([('glob', 'g/*', ''), ('glob', 'g', ''), ('glob', 'g*', '')])
581 >>> _roots([('glob', 'g/*', ''), ('glob', 'g', ''), ('glob', 'g*', '')])
583 ['g', 'g', '.']
582 ['g', 'g', '.']
584 >>> _roots([('relpath', 'r', ''), ('path', 'p/p', ''), ('path', '', '')])
583 >>> _roots([('relpath', 'r', ''), ('path', 'p/p', ''), ('path', '', '')])
585 ['r', 'p/p', '.']
584 ['r', 'p/p', '.']
586 >>> _roots([('relglob', 'rg*', ''), ('re', 're/', ''), ('relre', 'rr', '')])
585 >>> _roots([('relglob', 'rg*', ''), ('re', 're/', ''), ('relre', 'rr', '')])
587 ['.', '.', '.']
586 ['.', '.', '.']
588 '''
587 '''
589 r = []
588 r = []
590 for kind, pat, source in kindpats:
589 for kind, pat, source in kindpats:
591 if kind == 'glob': # find the non-glob prefix
590 if kind == 'glob': # find the non-glob prefix
592 root = []
591 root = []
593 for p in pat.split('/'):
592 for p in pat.split('/'):
594 if '[' in p or '{' in p or '*' in p or '?' in p:
593 if '[' in p or '{' in p or '*' in p or '?' in p:
595 break
594 break
596 root.append(p)
595 root.append(p)
597 r.append('/'.join(root) or '.')
596 r.append('/'.join(root) or '.')
598 elif kind in ('relpath', 'path'):
597 elif kind in ('relpath', 'path'):
599 r.append(pat or '.')
598 r.append(pat or '.')
600 else: # relglob, re, relre
599 else: # relglob, re, relre
601 r.append('.')
600 r.append('.')
602 return r
601 return r
603
602
604 def _anypats(kindpats):
603 def _anypats(kindpats):
605 for kind, pat, source in kindpats:
604 for kind, pat, source in kindpats:
606 if kind in ('glob', 're', 'relglob', 'relre', 'set'):
605 if kind in ('glob', 're', 'relglob', 'relre', 'set'):
607 return True
606 return True
608
607
609 _commentre = None
608 _commentre = None
610
609
611 def readpatternfile(filepath, warn):
610 def readpatternfile(filepath, warn):
612 '''parse a pattern file, returning a list of
611 '''parse a pattern file, returning a list of
613 patterns. These patterns should be given to compile()
612 patterns. These patterns should be given to compile()
614 to be validated and converted into a match function.
613 to be validated and converted into a match function.
615
614
616 trailing white space is dropped.
615 trailing white space is dropped.
617 the escape character is backslash.
616 the escape character is backslash.
618 comments start with #.
617 comments start with #.
619 empty lines are skipped.
618 empty lines are skipped.
620
619
621 lines can be of the following formats:
620 lines can be of the following formats:
622
621
623 syntax: regexp # defaults following lines to non-rooted regexps
622 syntax: regexp # defaults following lines to non-rooted regexps
624 syntax: glob # defaults following lines to non-rooted globs
623 syntax: glob # defaults following lines to non-rooted globs
625 re:pattern # non-rooted regular expression
624 re:pattern # non-rooted regular expression
626 glob:pattern # non-rooted glob
625 glob:pattern # non-rooted glob
627 pattern # pattern of the current default type'''
626 pattern # pattern of the current default type'''
628
627
629 syntaxes = {'re': 'relre:', 'regexp': 'relre:', 'glob': 'relglob:',
628 syntaxes = {'re': 'relre:', 'regexp': 'relre:', 'glob': 'relglob:',
630 'include': 'include', 'subinclude': 'subinclude'}
629 'include': 'include', 'subinclude': 'subinclude'}
631 syntax = 'relre:'
630 syntax = 'relre:'
632 patterns = []
631 patterns = []
633
632
634 fp = open(filepath)
633 fp = open(filepath)
635 for line in fp:
634 for line in fp:
636 if "#" in line:
635 if "#" in line:
637 global _commentre
636 global _commentre
638 if not _commentre:
637 if not _commentre:
639 _commentre = re.compile(r'((^|[^\\])(\\\\)*)#.*')
638 _commentre = re.compile(r'((^|[^\\])(\\\\)*)#.*')
640 # remove comments prefixed by an even number of escapes
639 # remove comments prefixed by an even number of escapes
641 line = _commentre.sub(r'\1', line)
640 line = _commentre.sub(r'\1', line)
642 # fixup properly escaped comments that survived the above
641 # fixup properly escaped comments that survived the above
643 line = line.replace("\\#", "#")
642 line = line.replace("\\#", "#")
644 line = line.rstrip()
643 line = line.rstrip()
645 if not line:
644 if not line:
646 continue
645 continue
647
646
648 if line.startswith('syntax:'):
647 if line.startswith('syntax:'):
649 s = line[7:].strip()
648 s = line[7:].strip()
650 try:
649 try:
651 syntax = syntaxes[s]
650 syntax = syntaxes[s]
652 except KeyError:
651 except KeyError:
653 if warn:
652 if warn:
654 warn(_("%s: ignoring invalid syntax '%s'\n") %
653 warn(_("%s: ignoring invalid syntax '%s'\n") %
655 (filepath, s))
654 (filepath, s))
656 continue
655 continue
657
656
658 linesyntax = syntax
657 linesyntax = syntax
659 for s, rels in syntaxes.iteritems():
658 for s, rels in syntaxes.iteritems():
660 if line.startswith(rels):
659 if line.startswith(rels):
661 linesyntax = rels
660 linesyntax = rels
662 line = line[len(rels):]
661 line = line[len(rels):]
663 break
662 break
664 elif line.startswith(s+':'):
663 elif line.startswith(s+':'):
665 linesyntax = rels
664 linesyntax = rels
666 line = line[len(s) + 1:]
665 line = line[len(s) + 1:]
667 break
666 break
668 patterns.append(linesyntax + line)
667 patterns.append(linesyntax + line)
669 fp.close()
668 fp.close()
670 return patterns
669 return patterns
General Comments 0
You need to be logged in to leave comments. Login now