##// END OF EJS Templates
match: optimize visitdir() for when no explicit files are listed...
Martin von Zweigbergk -
r32554:f44ea253 default
parent child Browse files
Show More
@@ -1,1000 +1,1001
1 # match.py - filename matching
1 # match.py - filename matching
2 #
2 #
3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import copy
10 import copy
11 import os
11 import os
12 import re
12 import re
13
13
14 from .i18n import _
14 from .i18n import _
15 from . import (
15 from . import (
16 error,
16 error,
17 pathutil,
17 pathutil,
18 util,
18 util,
19 )
19 )
20
20
21 propertycache = util.propertycache
21 propertycache = util.propertycache
22
22
23 def _rematcher(regex):
23 def _rematcher(regex):
24 '''compile the regexp with the best available regexp engine and return a
24 '''compile the regexp with the best available regexp engine and return a
25 matcher function'''
25 matcher function'''
26 m = util.re.compile(regex)
26 m = util.re.compile(regex)
27 try:
27 try:
28 # slightly faster, provided by facebook's re2 bindings
28 # slightly faster, provided by facebook's re2 bindings
29 return m.test_match
29 return m.test_match
30 except AttributeError:
30 except AttributeError:
31 return m.match
31 return m.match
32
32
33 def _expandsets(kindpats, ctx, listsubrepos):
33 def _expandsets(kindpats, ctx, listsubrepos):
34 '''Returns the kindpats list with the 'set' patterns expanded.'''
34 '''Returns the kindpats list with the 'set' patterns expanded.'''
35 fset = set()
35 fset = set()
36 other = []
36 other = []
37
37
38 for kind, pat, source in kindpats:
38 for kind, pat, source in kindpats:
39 if kind == 'set':
39 if kind == 'set':
40 if not ctx:
40 if not ctx:
41 raise error.ProgrammingError("fileset expression with no "
41 raise error.ProgrammingError("fileset expression with no "
42 "context")
42 "context")
43 s = ctx.getfileset(pat)
43 s = ctx.getfileset(pat)
44 fset.update(s)
44 fset.update(s)
45
45
46 if listsubrepos:
46 if listsubrepos:
47 for subpath in ctx.substate:
47 for subpath in ctx.substate:
48 s = ctx.sub(subpath).getfileset(pat)
48 s = ctx.sub(subpath).getfileset(pat)
49 fset.update(subpath + '/' + f for f in s)
49 fset.update(subpath + '/' + f for f in s)
50
50
51 continue
51 continue
52 other.append((kind, pat, source))
52 other.append((kind, pat, source))
53 return fset, other
53 return fset, other
54
54
55 def _expandsubinclude(kindpats, root):
55 def _expandsubinclude(kindpats, root):
56 '''Returns the list of subinclude matcher args and the kindpats without the
56 '''Returns the list of subinclude matcher args and the kindpats without the
57 subincludes in it.'''
57 subincludes in it.'''
58 relmatchers = []
58 relmatchers = []
59 other = []
59 other = []
60
60
61 for kind, pat, source in kindpats:
61 for kind, pat, source in kindpats:
62 if kind == 'subinclude':
62 if kind == 'subinclude':
63 sourceroot = pathutil.dirname(util.normpath(source))
63 sourceroot = pathutil.dirname(util.normpath(source))
64 pat = util.pconvert(pat)
64 pat = util.pconvert(pat)
65 path = pathutil.join(sourceroot, pat)
65 path = pathutil.join(sourceroot, pat)
66
66
67 newroot = pathutil.dirname(path)
67 newroot = pathutil.dirname(path)
68 matcherargs = (newroot, '', [], ['include:%s' % path])
68 matcherargs = (newroot, '', [], ['include:%s' % path])
69
69
70 prefix = pathutil.canonpath(root, root, newroot)
70 prefix = pathutil.canonpath(root, root, newroot)
71 if prefix:
71 if prefix:
72 prefix += '/'
72 prefix += '/'
73 relmatchers.append((prefix, matcherargs))
73 relmatchers.append((prefix, matcherargs))
74 else:
74 else:
75 other.append((kind, pat, source))
75 other.append((kind, pat, source))
76
76
77 return relmatchers, other
77 return relmatchers, other
78
78
79 def _kindpatsalwaysmatch(kindpats):
79 def _kindpatsalwaysmatch(kindpats):
80 """"Checks whether the kindspats match everything, as e.g.
80 """"Checks whether the kindspats match everything, as e.g.
81 'relpath:.' does.
81 'relpath:.' does.
82 """
82 """
83 for kind, pat, source in kindpats:
83 for kind, pat, source in kindpats:
84 if pat != '' or kind not in ['relpath', 'glob']:
84 if pat != '' or kind not in ['relpath', 'glob']:
85 return False
85 return False
86 return True
86 return True
87
87
88 def match(root, cwd, patterns, include=None, exclude=None, default='glob',
88 def match(root, cwd, patterns, include=None, exclude=None, default='glob',
89 exact=False, auditor=None, ctx=None, listsubrepos=False, warn=None,
89 exact=False, auditor=None, ctx=None, listsubrepos=False, warn=None,
90 badfn=None, icasefs=False):
90 badfn=None, icasefs=False):
91 """build an object to match a set of file patterns
91 """build an object to match a set of file patterns
92
92
93 arguments:
93 arguments:
94 root - the canonical root of the tree you're matching against
94 root - the canonical root of the tree you're matching against
95 cwd - the current working directory, if relevant
95 cwd - the current working directory, if relevant
96 patterns - patterns to find
96 patterns - patterns to find
97 include - patterns to include (unless they are excluded)
97 include - patterns to include (unless they are excluded)
98 exclude - patterns to exclude (even if they are included)
98 exclude - patterns to exclude (even if they are included)
99 default - if a pattern in patterns has no explicit type, assume this one
99 default - if a pattern in patterns has no explicit type, assume this one
100 exact - patterns are actually filenames (include/exclude still apply)
100 exact - patterns are actually filenames (include/exclude still apply)
101 warn - optional function used for printing warnings
101 warn - optional function used for printing warnings
102 badfn - optional bad() callback for this matcher instead of the default
102 badfn - optional bad() callback for this matcher instead of the default
103 icasefs - make a matcher for wdir on case insensitive filesystems, which
103 icasefs - make a matcher for wdir on case insensitive filesystems, which
104 normalizes the given patterns to the case in the filesystem
104 normalizes the given patterns to the case in the filesystem
105
105
106 a pattern is one of:
106 a pattern is one of:
107 'glob:<glob>' - a glob relative to cwd
107 'glob:<glob>' - a glob relative to cwd
108 're:<regexp>' - a regular expression
108 're:<regexp>' - a regular expression
109 'path:<path>' - a path relative to repository root, which is matched
109 'path:<path>' - a path relative to repository root, which is matched
110 recursively
110 recursively
111 'rootfilesin:<path>' - a path relative to repository root, which is
111 'rootfilesin:<path>' - a path relative to repository root, which is
112 matched non-recursively (will not match subdirectories)
112 matched non-recursively (will not match subdirectories)
113 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
113 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
114 'relpath:<path>' - a path relative to cwd
114 'relpath:<path>' - a path relative to cwd
115 'relre:<regexp>' - a regexp that needn't match the start of a name
115 'relre:<regexp>' - a regexp that needn't match the start of a name
116 'set:<fileset>' - a fileset expression
116 'set:<fileset>' - a fileset expression
117 'include:<path>' - a file of patterns to read and include
117 'include:<path>' - a file of patterns to read and include
118 'subinclude:<path>' - a file of patterns to match against files under
118 'subinclude:<path>' - a file of patterns to match against files under
119 the same directory
119 the same directory
120 '<something>' - a pattern of the specified default type
120 '<something>' - a pattern of the specified default type
121 """
121 """
122 normalize = _donormalize
122 normalize = _donormalize
123 if icasefs:
123 if icasefs:
124 if exact:
124 if exact:
125 raise error.ProgrammingError("a case-insensitive exact matcher "
125 raise error.ProgrammingError("a case-insensitive exact matcher "
126 "doesn't make sense")
126 "doesn't make sense")
127 dirstate = ctx.repo().dirstate
127 dirstate = ctx.repo().dirstate
128 dsnormalize = dirstate.normalize
128 dsnormalize = dirstate.normalize
129
129
130 def normalize(patterns, default, root, cwd, auditor, warn):
130 def normalize(patterns, default, root, cwd, auditor, warn):
131 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
131 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
132 kindpats = []
132 kindpats = []
133 for kind, pats, source in kp:
133 for kind, pats, source in kp:
134 if kind not in ('re', 'relre'): # regex can't be normalized
134 if kind not in ('re', 'relre'): # regex can't be normalized
135 p = pats
135 p = pats
136 pats = dsnormalize(pats)
136 pats = dsnormalize(pats)
137
137
138 # Preserve the original to handle a case only rename.
138 # Preserve the original to handle a case only rename.
139 if p != pats and p in dirstate:
139 if p != pats and p in dirstate:
140 kindpats.append((kind, p, source))
140 kindpats.append((kind, p, source))
141
141
142 kindpats.append((kind, pats, source))
142 kindpats.append((kind, pats, source))
143 return kindpats
143 return kindpats
144
144
145 if exact:
145 if exact:
146 m = exactmatcher(root, cwd, patterns, badfn)
146 m = exactmatcher(root, cwd, patterns, badfn)
147 elif patterns:
147 elif patterns:
148 m = patternmatcher(root, cwd, normalize, patterns, default=default,
148 m = patternmatcher(root, cwd, normalize, patterns, default=default,
149 auditor=auditor, ctx=ctx, listsubrepos=listsubrepos,
149 auditor=auditor, ctx=ctx, listsubrepos=listsubrepos,
150 warn=warn, badfn=badfn)
150 warn=warn, badfn=badfn)
151 else:
151 else:
152 # It's a little strange that no patterns means to match everything.
152 # It's a little strange that no patterns means to match everything.
153 # Consider changing this to match nothing (probably adding a
153 # Consider changing this to match nothing (probably adding a
154 # "nevermatcher").
154 # "nevermatcher").
155 m = alwaysmatcher(root, cwd, badfn)
155 m = alwaysmatcher(root, cwd, badfn)
156
156
157 if include:
157 if include:
158 im = includematcher(root, cwd, normalize, include, auditor=auditor,
158 im = includematcher(root, cwd, normalize, include, auditor=auditor,
159 ctx=ctx, listsubrepos=listsubrepos, warn=warn,
159 ctx=ctx, listsubrepos=listsubrepos, warn=warn,
160 badfn=None)
160 badfn=None)
161 m = intersectmatchers(m, im)
161 m = intersectmatchers(m, im)
162 if exclude:
162 if exclude:
163 em = includematcher(root, cwd, normalize, exclude, auditor=auditor,
163 em = includematcher(root, cwd, normalize, exclude, auditor=auditor,
164 ctx=ctx, listsubrepos=listsubrepos, warn=warn,
164 ctx=ctx, listsubrepos=listsubrepos, warn=warn,
165 badfn=None)
165 badfn=None)
166 m = differencematcher(m, em)
166 m = differencematcher(m, em)
167 return m
167 return m
168
168
169 def exact(root, cwd, files, badfn=None):
169 def exact(root, cwd, files, badfn=None):
170 return exactmatcher(root, cwd, files, badfn=badfn)
170 return exactmatcher(root, cwd, files, badfn=badfn)
171
171
172 def always(root, cwd):
172 def always(root, cwd):
173 return alwaysmatcher(root, cwd)
173 return alwaysmatcher(root, cwd)
174
174
175 def badmatch(match, badfn):
175 def badmatch(match, badfn):
176 """Make a copy of the given matcher, replacing its bad method with the given
176 """Make a copy of the given matcher, replacing its bad method with the given
177 one.
177 one.
178 """
178 """
179 m = copy.copy(match)
179 m = copy.copy(match)
180 m.bad = badfn
180 m.bad = badfn
181 return m
181 return m
182
182
183 def _donormalize(patterns, default, root, cwd, auditor, warn):
183 def _donormalize(patterns, default, root, cwd, auditor, warn):
184 '''Convert 'kind:pat' from the patterns list to tuples with kind and
184 '''Convert 'kind:pat' from the patterns list to tuples with kind and
185 normalized and rooted patterns and with listfiles expanded.'''
185 normalized and rooted patterns and with listfiles expanded.'''
186 kindpats = []
186 kindpats = []
187 for kind, pat in [_patsplit(p, default) for p in patterns]:
187 for kind, pat in [_patsplit(p, default) for p in patterns]:
188 if kind in ('glob', 'relpath'):
188 if kind in ('glob', 'relpath'):
189 pat = pathutil.canonpath(root, cwd, pat, auditor)
189 pat = pathutil.canonpath(root, cwd, pat, auditor)
190 elif kind in ('relglob', 'path', 'rootfilesin'):
190 elif kind in ('relglob', 'path', 'rootfilesin'):
191 pat = util.normpath(pat)
191 pat = util.normpath(pat)
192 elif kind in ('listfile', 'listfile0'):
192 elif kind in ('listfile', 'listfile0'):
193 try:
193 try:
194 files = util.readfile(pat)
194 files = util.readfile(pat)
195 if kind == 'listfile0':
195 if kind == 'listfile0':
196 files = files.split('\0')
196 files = files.split('\0')
197 else:
197 else:
198 files = files.splitlines()
198 files = files.splitlines()
199 files = [f for f in files if f]
199 files = [f for f in files if f]
200 except EnvironmentError:
200 except EnvironmentError:
201 raise error.Abort(_("unable to read file list (%s)") % pat)
201 raise error.Abort(_("unable to read file list (%s)") % pat)
202 for k, p, source in _donormalize(files, default, root, cwd,
202 for k, p, source in _donormalize(files, default, root, cwd,
203 auditor, warn):
203 auditor, warn):
204 kindpats.append((k, p, pat))
204 kindpats.append((k, p, pat))
205 continue
205 continue
206 elif kind == 'include':
206 elif kind == 'include':
207 try:
207 try:
208 fullpath = os.path.join(root, util.localpath(pat))
208 fullpath = os.path.join(root, util.localpath(pat))
209 includepats = readpatternfile(fullpath, warn)
209 includepats = readpatternfile(fullpath, warn)
210 for k, p, source in _donormalize(includepats, default,
210 for k, p, source in _donormalize(includepats, default,
211 root, cwd, auditor, warn):
211 root, cwd, auditor, warn):
212 kindpats.append((k, p, source or pat))
212 kindpats.append((k, p, source or pat))
213 except error.Abort as inst:
213 except error.Abort as inst:
214 raise error.Abort('%s: %s' % (pat, inst[0]))
214 raise error.Abort('%s: %s' % (pat, inst[0]))
215 except IOError as inst:
215 except IOError as inst:
216 if warn:
216 if warn:
217 warn(_("skipping unreadable pattern file '%s': %s\n") %
217 warn(_("skipping unreadable pattern file '%s': %s\n") %
218 (pat, inst.strerror))
218 (pat, inst.strerror))
219 continue
219 continue
220 # else: re or relre - which cannot be normalized
220 # else: re or relre - which cannot be normalized
221 kindpats.append((kind, pat, ''))
221 kindpats.append((kind, pat, ''))
222 return kindpats
222 return kindpats
223
223
224 class basematcher(object):
224 class basematcher(object):
225
225
226 def __init__(self, root, cwd, badfn=None, relativeuipath=True):
226 def __init__(self, root, cwd, badfn=None, relativeuipath=True):
227 self._root = root
227 self._root = root
228 self._cwd = cwd
228 self._cwd = cwd
229 if badfn is not None:
229 if badfn is not None:
230 self.bad = badfn
230 self.bad = badfn
231 self._relativeuipath = relativeuipath
231 self._relativeuipath = relativeuipath
232
232
233 def __call__(self, fn):
233 def __call__(self, fn):
234 return self.matchfn(fn)
234 return self.matchfn(fn)
235 def __iter__(self):
235 def __iter__(self):
236 for f in self._files:
236 for f in self._files:
237 yield f
237 yield f
238 # Callbacks related to how the matcher is used by dirstate.walk.
238 # Callbacks related to how the matcher is used by dirstate.walk.
239 # Subscribers to these events must monkeypatch the matcher object.
239 # Subscribers to these events must monkeypatch the matcher object.
240 def bad(self, f, msg):
240 def bad(self, f, msg):
241 '''Callback from dirstate.walk for each explicit file that can't be
241 '''Callback from dirstate.walk for each explicit file that can't be
242 found/accessed, with an error message.'''
242 found/accessed, with an error message.'''
243 pass
243 pass
244
244
245 # If an explicitdir is set, it will be called when an explicitly listed
245 # If an explicitdir is set, it will be called when an explicitly listed
246 # directory is visited.
246 # directory is visited.
247 explicitdir = None
247 explicitdir = None
248
248
249 # If an traversedir is set, it will be called when a directory discovered
249 # If an traversedir is set, it will be called when a directory discovered
250 # by recursive traversal is visited.
250 # by recursive traversal is visited.
251 traversedir = None
251 traversedir = None
252
252
253 def abs(self, f):
253 def abs(self, f):
254 '''Convert a repo path back to path that is relative to the root of the
254 '''Convert a repo path back to path that is relative to the root of the
255 matcher.'''
255 matcher.'''
256 return f
256 return f
257
257
258 def rel(self, f):
258 def rel(self, f):
259 '''Convert repo path back to path that is relative to cwd of matcher.'''
259 '''Convert repo path back to path that is relative to cwd of matcher.'''
260 return util.pathto(self._root, self._cwd, f)
260 return util.pathto(self._root, self._cwd, f)
261
261
262 def uipath(self, f):
262 def uipath(self, f):
263 '''Convert repo path to a display path. If patterns or -I/-X were used
263 '''Convert repo path to a display path. If patterns or -I/-X were used
264 to create this matcher, the display path will be relative to cwd.
264 to create this matcher, the display path will be relative to cwd.
265 Otherwise it is relative to the root of the repo.'''
265 Otherwise it is relative to the root of the repo.'''
266 return (self._relativeuipath and self.rel(f)) or self.abs(f)
266 return (self._relativeuipath and self.rel(f)) or self.abs(f)
267
267
268 @propertycache
268 @propertycache
269 def _files(self):
269 def _files(self):
270 return []
270 return []
271
271
272 def files(self):
272 def files(self):
273 '''Explicitly listed files or patterns or roots:
273 '''Explicitly listed files or patterns or roots:
274 if no patterns or .always(): empty list,
274 if no patterns or .always(): empty list,
275 if exact: list exact files,
275 if exact: list exact files,
276 if not .anypats(): list all files and dirs,
276 if not .anypats(): list all files and dirs,
277 else: optimal roots'''
277 else: optimal roots'''
278 return self._files
278 return self._files
279
279
280 @propertycache
280 @propertycache
281 def _fileset(self):
281 def _fileset(self):
282 return set(self._files)
282 return set(self._files)
283
283
284 def exact(self, f):
284 def exact(self, f):
285 '''Returns True if f is in .files().'''
285 '''Returns True if f is in .files().'''
286 return f in self._fileset
286 return f in self._fileset
287
287
288 def matchfn(self, f):
288 def matchfn(self, f):
289 return False
289 return False
290
290
291 def visitdir(self, dir):
291 def visitdir(self, dir):
292 '''Decides whether a directory should be visited based on whether it
292 '''Decides whether a directory should be visited based on whether it
293 has potential matches in it or one of its subdirectories. This is
293 has potential matches in it or one of its subdirectories. This is
294 based on the match's primary, included, and excluded patterns.
294 based on the match's primary, included, and excluded patterns.
295
295
296 Returns the string 'all' if the given directory and all subdirectories
296 Returns the string 'all' if the given directory and all subdirectories
297 should be visited. Otherwise returns True or False indicating whether
297 should be visited. Otherwise returns True or False indicating whether
298 the given directory should be visited.
298 the given directory should be visited.
299
299
300 This function's behavior is undefined if it has returned False for
300 This function's behavior is undefined if it has returned False for
301 one of the dir's parent directories.
301 one of the dir's parent directories.
302 '''
302 '''
303 return False
303 return False
304
304
305 def anypats(self):
305 def anypats(self):
306 '''Matcher uses patterns or include/exclude.'''
306 '''Matcher uses patterns or include/exclude.'''
307 return False
307 return False
308
308
309 def always(self):
309 def always(self):
310 '''Matcher will match everything and .files() will be empty
310 '''Matcher will match everything and .files() will be empty
311 - optimization might be possible and necessary.'''
311 - optimization might be possible and necessary.'''
312 return False
312 return False
313
313
314 def isexact(self):
314 def isexact(self):
315 return False
315 return False
316
316
317 def prefix(self):
317 def prefix(self):
318 return not self.always() and not self.isexact() and not self.anypats()
318 return not self.always() and not self.isexact() and not self.anypats()
319
319
320 class alwaysmatcher(basematcher):
320 class alwaysmatcher(basematcher):
321 '''Matches everything.'''
321 '''Matches everything.'''
322
322
323 def __init__(self, root, cwd, badfn=None):
323 def __init__(self, root, cwd, badfn=None):
324 super(alwaysmatcher, self).__init__(root, cwd, badfn,
324 super(alwaysmatcher, self).__init__(root, cwd, badfn,
325 relativeuipath=False)
325 relativeuipath=False)
326
326
327 def always(self):
327 def always(self):
328 return True
328 return True
329
329
330 def matchfn(self, f):
330 def matchfn(self, f):
331 return True
331 return True
332
332
333 def visitdir(self, dir):
333 def visitdir(self, dir):
334 return 'all'
334 return 'all'
335
335
336 def __repr__(self):
336 def __repr__(self):
337 return '<alwaysmatcher>'
337 return '<alwaysmatcher>'
338
338
339 class patternmatcher(basematcher):
339 class patternmatcher(basematcher):
340
340
341 def __init__(self, root, cwd, normalize, patterns, default='glob',
341 def __init__(self, root, cwd, normalize, patterns, default='glob',
342 auditor=None, ctx=None, listsubrepos=False, warn=None,
342 auditor=None, ctx=None, listsubrepos=False, warn=None,
343 badfn=None):
343 badfn=None):
344 super(patternmatcher, self).__init__(root, cwd, badfn,
344 super(patternmatcher, self).__init__(root, cwd, badfn,
345 relativeuipath=bool(patterns))
345 relativeuipath=bool(patterns))
346
346
347 self._anypats = False
347 self._anypats = False
348 self._always = False
348 self._always = False
349 self.patternspat = None
349 self.patternspat = None
350
350
351 matchfns = []
351 matchfns = []
352 if patterns:
352 if patterns:
353 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
353 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
354 if not _kindpatsalwaysmatch(kindpats):
354 if not _kindpatsalwaysmatch(kindpats):
355 self._files = _explicitfiles(kindpats)
355 self._files = _explicitfiles(kindpats)
356 self._anypats = self._anypats or _anypats(kindpats)
356 self._anypats = self._anypats or _anypats(kindpats)
357 self.patternspat, pm = _buildmatch(ctx, kindpats, '$',
357 self.patternspat, pm = _buildmatch(ctx, kindpats, '$',
358 listsubrepos, root)
358 listsubrepos, root)
359 matchfns.append(pm)
359 matchfns.append(pm)
360
360
361 if not matchfns:
361 if not matchfns:
362 m = util.always
362 m = util.always
363 self._always = True
363 self._always = True
364 elif len(matchfns) == 1:
364 elif len(matchfns) == 1:
365 m = matchfns[0]
365 m = matchfns[0]
366 else:
366 else:
367 def m(f):
367 def m(f):
368 for matchfn in matchfns:
368 for matchfn in matchfns:
369 if not matchfn(f):
369 if not matchfn(f):
370 return False
370 return False
371 return True
371 return True
372
372
373 self.matchfn = m
373 self.matchfn = m
374
374
375 @propertycache
375 @propertycache
376 def _dirs(self):
376 def _dirs(self):
377 return set(util.dirs(self._fileset)) | {'.'}
377 return set(util.dirs(self._fileset)) | {'.'}
378
378
379 def visitdir(self, dir):
379 def visitdir(self, dir):
380 if self.always():
381 return 'all'
380 if self.prefix() and dir in self._fileset:
382 if self.prefix() and dir in self._fileset:
381 return 'all'
383 return 'all'
382 return (not self._fileset or
384 return ('.' in self._fileset or
383 '.' in self._fileset or
384 dir in self._fileset or
385 dir in self._fileset or
385 dir in self._dirs or
386 dir in self._dirs or
386 any(parentdir in self._fileset
387 any(parentdir in self._fileset
387 for parentdir in util.finddirs(dir)))
388 for parentdir in util.finddirs(dir)))
388
389
389 def anypats(self):
390 def anypats(self):
390 return self._anypats
391 return self._anypats
391
392
392 def always(self):
393 def always(self):
393 return self._always
394 return self._always
394
395
395 def __repr__(self):
396 def __repr__(self):
396 return ('<patternmatcher patterns=%r>' % self.patternspat)
397 return ('<patternmatcher patterns=%r>' % self.patternspat)
397
398
398 class includematcher(basematcher):
399 class includematcher(basematcher):
399
400
400 def __init__(self, root, cwd, normalize, include, auditor=None, ctx=None,
401 def __init__(self, root, cwd, normalize, include, auditor=None, ctx=None,
401 listsubrepos=False, warn=None, badfn=None):
402 listsubrepos=False, warn=None, badfn=None):
402 super(includematcher, self).__init__(root, cwd, badfn)
403 super(includematcher, self).__init__(root, cwd, badfn)
403
404
404 kindpats = normalize(include, 'glob', root, cwd, auditor, warn)
405 kindpats = normalize(include, 'glob', root, cwd, auditor, warn)
405 self.includepat, im = _buildmatch(ctx, kindpats, '(?:/|$)',
406 self.includepat, im = _buildmatch(ctx, kindpats, '(?:/|$)',
406 listsubrepos, root)
407 listsubrepos, root)
407 self._anypats = _anypats(kindpats)
408 self._anypats = _anypats(kindpats)
408 roots, dirs = _rootsanddirs(kindpats)
409 roots, dirs = _rootsanddirs(kindpats)
409 # roots are directories which are recursively included.
410 # roots are directories which are recursively included.
410 self._roots = set(roots)
411 self._roots = set(roots)
411 # dirs are directories which are non-recursively included.
412 # dirs are directories which are non-recursively included.
412 self._dirs = set(dirs)
413 self._dirs = set(dirs)
413 self.matchfn = im
414 self.matchfn = im
414
415
415 def visitdir(self, dir):
416 def visitdir(self, dir):
416 if not self._anypats and dir in self._roots:
417 if not self._anypats and dir in self._roots:
417 # The condition above is essentially self.prefix() for includes
418 # The condition above is essentially self.prefix() for includes
418 return 'all'
419 return 'all'
419 return ('.' in self._roots or
420 return ('.' in self._roots or
420 dir in self._roots or
421 dir in self._roots or
421 dir in self._dirs or
422 dir in self._dirs or
422 any(parentdir in self._roots
423 any(parentdir in self._roots
423 for parentdir in util.finddirs(dir)))
424 for parentdir in util.finddirs(dir)))
424
425
425 def anypats(self):
426 def anypats(self):
426 return True
427 return True
427
428
428 def __repr__(self):
429 def __repr__(self):
429 return ('<includematcher includes=%r>' % self.includepat)
430 return ('<includematcher includes=%r>' % self.includepat)
430
431
431 class exactmatcher(basematcher):
432 class exactmatcher(basematcher):
432 '''Matches the input files exactly. They are interpreted as paths, not
433 '''Matches the input files exactly. They are interpreted as paths, not
433 patterns (so no kind-prefixes).
434 patterns (so no kind-prefixes).
434 '''
435 '''
435
436
436 def __init__(self, root, cwd, files, badfn=None):
437 def __init__(self, root, cwd, files, badfn=None):
437 super(exactmatcher, self).__init__(root, cwd, badfn)
438 super(exactmatcher, self).__init__(root, cwd, badfn)
438
439
439 if isinstance(files, list):
440 if isinstance(files, list):
440 self._files = files
441 self._files = files
441 else:
442 else:
442 self._files = list(files)
443 self._files = list(files)
443
444
444 matchfn = basematcher.exact
445 matchfn = basematcher.exact
445
446
446 @propertycache
447 @propertycache
447 def _dirs(self):
448 def _dirs(self):
448 return set(util.dirs(self._fileset)) | {'.'}
449 return set(util.dirs(self._fileset)) | {'.'}
449
450
450 def visitdir(self, dir):
451 def visitdir(self, dir):
451 return dir in self._dirs
452 return dir in self._dirs
452
453
453 def isexact(self):
454 def isexact(self):
454 return True
455 return True
455
456
456 def __repr__(self):
457 def __repr__(self):
457 return ('<exactmatcher files=%r>' % self._files)
458 return ('<exactmatcher files=%r>' % self._files)
458
459
459 class differencematcher(basematcher):
460 class differencematcher(basematcher):
460 '''Composes two matchers by matching if the first matches and the second
461 '''Composes two matchers by matching if the first matches and the second
461 does not. Well, almost... If the user provides a pattern like "-X foo foo",
462 does not. Well, almost... If the user provides a pattern like "-X foo foo",
462 Mercurial actually does match "foo" against that. That's because exact
463 Mercurial actually does match "foo" against that. That's because exact
463 matches are treated specially. So, since this differencematcher is used for
464 matches are treated specially. So, since this differencematcher is used for
464 excludes, it needs to special-case exact matching.
465 excludes, it needs to special-case exact matching.
465
466
466 The second matcher's non-matching-attributes (root, cwd, bad, explicitdir,
467 The second matcher's non-matching-attributes (root, cwd, bad, explicitdir,
467 traversedir) are ignored.
468 traversedir) are ignored.
468
469
469 TODO: If we want to keep the behavior described above for exact matches, we
470 TODO: If we want to keep the behavior described above for exact matches, we
470 should consider instead treating the above case something like this:
471 should consider instead treating the above case something like this:
471 union(exact(foo), difference(pattern(foo), include(foo)))
472 union(exact(foo), difference(pattern(foo), include(foo)))
472 '''
473 '''
473 def __init__(self, m1, m2):
474 def __init__(self, m1, m2):
474 super(differencematcher, self).__init__(m1._root, m1._cwd)
475 super(differencematcher, self).__init__(m1._root, m1._cwd)
475 self._m1 = m1
476 self._m1 = m1
476 self._m2 = m2
477 self._m2 = m2
477 self.bad = m1.bad
478 self.bad = m1.bad
478 self.explicitdir = m1.explicitdir
479 self.explicitdir = m1.explicitdir
479 self.traversedir = m1.traversedir
480 self.traversedir = m1.traversedir
480
481
481 def matchfn(self, f):
482 def matchfn(self, f):
482 return self._m1(f) and (not self._m2(f) or self._m1.exact(f))
483 return self._m1(f) and (not self._m2(f) or self._m1.exact(f))
483
484
484 @propertycache
485 @propertycache
485 def _files(self):
486 def _files(self):
486 if self.isexact():
487 if self.isexact():
487 return [f for f in self._m1.files() if self(f)]
488 return [f for f in self._m1.files() if self(f)]
488 # If m1 is not an exact matcher, we can't easily figure out the set of
489 # If m1 is not an exact matcher, we can't easily figure out the set of
489 # files, because its files() are not always files. For example, if
490 # files, because its files() are not always files. For example, if
490 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
491 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
491 # want to remove "dir" from the set even though it would match m2,
492 # want to remove "dir" from the set even though it would match m2,
492 # because the "dir" in m1 may not be a file.
493 # because the "dir" in m1 may not be a file.
493 return self._m1.files()
494 return self._m1.files()
494
495
495 def visitdir(self, dir):
496 def visitdir(self, dir):
496 if self._m2.visitdir(dir) == 'all':
497 if self._m2.visitdir(dir) == 'all':
497 # There's a bug here: If m1 matches file 'dir/file' and m2 excludes
498 # There's a bug here: If m1 matches file 'dir/file' and m2 excludes
498 # 'dir' (recursively), we should still visit 'dir' due to the
499 # 'dir' (recursively), we should still visit 'dir' due to the
499 # exception we have for exact matches.
500 # exception we have for exact matches.
500 return False
501 return False
501 return bool(self._m1.visitdir(dir))
502 return bool(self._m1.visitdir(dir))
502
503
503 def isexact(self):
504 def isexact(self):
504 return self._m1.isexact()
505 return self._m1.isexact()
505
506
506 def anypats(self):
507 def anypats(self):
507 return self._m1.anypats() or self._m2.anypats()
508 return self._m1.anypats() or self._m2.anypats()
508
509
509 def __repr__(self):
510 def __repr__(self):
510 return ('<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2))
511 return ('<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2))
511
512
512 def intersectmatchers(m1, m2):
513 def intersectmatchers(m1, m2):
513 '''Composes two matchers by matching if both of them match.
514 '''Composes two matchers by matching if both of them match.
514
515
515 The second matcher's non-matching-attributes (root, cwd, bad, explicitdir,
516 The second matcher's non-matching-attributes (root, cwd, bad, explicitdir,
516 traversedir) are ignored.
517 traversedir) are ignored.
517 '''
518 '''
518 if m1 is None or m2 is None:
519 if m1 is None or m2 is None:
519 return m1 or m2
520 return m1 or m2
520 if m1.always():
521 if m1.always():
521 m = copy.copy(m2)
522 m = copy.copy(m2)
522 # TODO: Consider encapsulating these things in a class so there's only
523 # TODO: Consider encapsulating these things in a class so there's only
523 # one thing to copy from m1.
524 # one thing to copy from m1.
524 m.bad = m1.bad
525 m.bad = m1.bad
525 m.explicitdir = m1.explicitdir
526 m.explicitdir = m1.explicitdir
526 m.traversedir = m1.traversedir
527 m.traversedir = m1.traversedir
527 m.abs = m1.abs
528 m.abs = m1.abs
528 m.rel = m1.rel
529 m.rel = m1.rel
529 m._relativeuipath |= m1._relativeuipath
530 m._relativeuipath |= m1._relativeuipath
530 return m
531 return m
531 if m2.always():
532 if m2.always():
532 m = copy.copy(m1)
533 m = copy.copy(m1)
533 m._relativeuipath |= m2._relativeuipath
534 m._relativeuipath |= m2._relativeuipath
534 return m
535 return m
535 return intersectionmatcher(m1, m2)
536 return intersectionmatcher(m1, m2)
536
537
537 class intersectionmatcher(basematcher):
538 class intersectionmatcher(basematcher):
538 def __init__(self, m1, m2):
539 def __init__(self, m1, m2):
539 super(intersectionmatcher, self).__init__(m1._root, m1._cwd)
540 super(intersectionmatcher, self).__init__(m1._root, m1._cwd)
540 self._m1 = m1
541 self._m1 = m1
541 self._m2 = m2
542 self._m2 = m2
542 self.bad = m1.bad
543 self.bad = m1.bad
543 self.explicitdir = m1.explicitdir
544 self.explicitdir = m1.explicitdir
544 self.traversedir = m1.traversedir
545 self.traversedir = m1.traversedir
545
546
546 @propertycache
547 @propertycache
547 def _files(self):
548 def _files(self):
548 if self.isexact():
549 if self.isexact():
549 m1, m2 = self._m1, self._m2
550 m1, m2 = self._m1, self._m2
550 if not m1.isexact():
551 if not m1.isexact():
551 m1, m2 = m2, m1
552 m1, m2 = m2, m1
552 return [f for f in m1.files() if m2(f)]
553 return [f for f in m1.files() if m2(f)]
553 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
554 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
554 # the set of files, because their files() are not always files. For
555 # the set of files, because their files() are not always files. For
555 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
556 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
556 # "path:dir2", we don't want to remove "dir2" from the set.
557 # "path:dir2", we don't want to remove "dir2" from the set.
557 return self._m1.files() + self._m2.files()
558 return self._m1.files() + self._m2.files()
558
559
559 def matchfn(self, f):
560 def matchfn(self, f):
560 return self._m1(f) and self._m2(f)
561 return self._m1(f) and self._m2(f)
561
562
562 def visitdir(self, dir):
563 def visitdir(self, dir):
563 visit1 = self._m1.visitdir(dir)
564 visit1 = self._m1.visitdir(dir)
564 if visit1 == 'all':
565 if visit1 == 'all':
565 return self._m2.visitdir(dir)
566 return self._m2.visitdir(dir)
566 # bool() because visit1=True + visit2='all' should not be 'all'
567 # bool() because visit1=True + visit2='all' should not be 'all'
567 return bool(visit1 and self._m2.visitdir(dir))
568 return bool(visit1 and self._m2.visitdir(dir))
568
569
569 def always(self):
570 def always(self):
570 return self._m1.always() and self._m2.always()
571 return self._m1.always() and self._m2.always()
571
572
572 def isexact(self):
573 def isexact(self):
573 return self._m1.isexact() or self._m2.isexact()
574 return self._m1.isexact() or self._m2.isexact()
574
575
575 def anypats(self):
576 def anypats(self):
576 return self._m1.anypats() or self._m2.anypats()
577 return self._m1.anypats() or self._m2.anypats()
577
578
578 def __repr__(self):
579 def __repr__(self):
579 return ('<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2))
580 return ('<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2))
580
581
581 class subdirmatcher(basematcher):
582 class subdirmatcher(basematcher):
582 """Adapt a matcher to work on a subdirectory only.
583 """Adapt a matcher to work on a subdirectory only.
583
584
584 The paths are remapped to remove/insert the path as needed:
585 The paths are remapped to remove/insert the path as needed:
585
586
586 >>> m1 = match('root', '', ['a.txt', 'sub/b.txt'])
587 >>> m1 = match('root', '', ['a.txt', 'sub/b.txt'])
587 >>> m2 = subdirmatcher('sub', m1)
588 >>> m2 = subdirmatcher('sub', m1)
588 >>> bool(m2('a.txt'))
589 >>> bool(m2('a.txt'))
589 False
590 False
590 >>> bool(m2('b.txt'))
591 >>> bool(m2('b.txt'))
591 True
592 True
592 >>> bool(m2.matchfn('a.txt'))
593 >>> bool(m2.matchfn('a.txt'))
593 False
594 False
594 >>> bool(m2.matchfn('b.txt'))
595 >>> bool(m2.matchfn('b.txt'))
595 True
596 True
596 >>> m2.files()
597 >>> m2.files()
597 ['b.txt']
598 ['b.txt']
598 >>> m2.exact('b.txt')
599 >>> m2.exact('b.txt')
599 True
600 True
600 >>> util.pconvert(m2.rel('b.txt'))
601 >>> util.pconvert(m2.rel('b.txt'))
601 'sub/b.txt'
602 'sub/b.txt'
602 >>> def bad(f, msg):
603 >>> def bad(f, msg):
603 ... print "%s: %s" % (f, msg)
604 ... print "%s: %s" % (f, msg)
604 >>> m1.bad = bad
605 >>> m1.bad = bad
605 >>> m2.bad('x.txt', 'No such file')
606 >>> m2.bad('x.txt', 'No such file')
606 sub/x.txt: No such file
607 sub/x.txt: No such file
607 >>> m2.abs('c.txt')
608 >>> m2.abs('c.txt')
608 'sub/c.txt'
609 'sub/c.txt'
609 """
610 """
610
611
611 def __init__(self, path, matcher):
612 def __init__(self, path, matcher):
612 super(subdirmatcher, self).__init__(matcher._root, matcher._cwd)
613 super(subdirmatcher, self).__init__(matcher._root, matcher._cwd)
613 self._path = path
614 self._path = path
614 self._matcher = matcher
615 self._matcher = matcher
615 self._always = matcher.always()
616 self._always = matcher.always()
616
617
617 self._files = [f[len(path) + 1:] for f in matcher._files
618 self._files = [f[len(path) + 1:] for f in matcher._files
618 if f.startswith(path + "/")]
619 if f.startswith(path + "/")]
619
620
620 # If the parent repo had a path to this subrepo and the matcher is
621 # If the parent repo had a path to this subrepo and the matcher is
621 # a prefix matcher, this submatcher always matches.
622 # a prefix matcher, this submatcher always matches.
622 if matcher.prefix():
623 if matcher.prefix():
623 self._always = any(f == path for f in matcher._files)
624 self._always = any(f == path for f in matcher._files)
624
625
625 def bad(self, f, msg):
626 def bad(self, f, msg):
626 self._matcher.bad(self._path + "/" + f, msg)
627 self._matcher.bad(self._path + "/" + f, msg)
627
628
628 def abs(self, f):
629 def abs(self, f):
629 return self._matcher.abs(self._path + "/" + f)
630 return self._matcher.abs(self._path + "/" + f)
630
631
631 def rel(self, f):
632 def rel(self, f):
632 return self._matcher.rel(self._path + "/" + f)
633 return self._matcher.rel(self._path + "/" + f)
633
634
634 def uipath(self, f):
635 def uipath(self, f):
635 return self._matcher.uipath(self._path + "/" + f)
636 return self._matcher.uipath(self._path + "/" + f)
636
637
637 def matchfn(self, f):
638 def matchfn(self, f):
638 # Some information is lost in the superclass's constructor, so we
639 # Some information is lost in the superclass's constructor, so we
639 # can not accurately create the matching function for the subdirectory
640 # can not accurately create the matching function for the subdirectory
640 # from the inputs. Instead, we override matchfn() and visitdir() to
641 # from the inputs. Instead, we override matchfn() and visitdir() to
641 # call the original matcher with the subdirectory path prepended.
642 # call the original matcher with the subdirectory path prepended.
642 return self._matcher.matchfn(self._path + "/" + f)
643 return self._matcher.matchfn(self._path + "/" + f)
643
644
644 def visitdir(self, dir):
645 def visitdir(self, dir):
645 if dir == '.':
646 if dir == '.':
646 dir = self._path
647 dir = self._path
647 else:
648 else:
648 dir = self._path + "/" + dir
649 dir = self._path + "/" + dir
649 return self._matcher.visitdir(dir)
650 return self._matcher.visitdir(dir)
650
651
651 def always(self):
652 def always(self):
652 return self._always
653 return self._always
653
654
654 def anypats(self):
655 def anypats(self):
655 return self._matcher.anypats()
656 return self._matcher.anypats()
656
657
657 def __repr__(self):
658 def __repr__(self):
658 return ('<subdirmatcher path=%r, matcher=%r>' %
659 return ('<subdirmatcher path=%r, matcher=%r>' %
659 (self._path, self._matcher))
660 (self._path, self._matcher))
660
661
661 def patkind(pattern, default=None):
662 def patkind(pattern, default=None):
662 '''If pattern is 'kind:pat' with a known kind, return kind.'''
663 '''If pattern is 'kind:pat' with a known kind, return kind.'''
663 return _patsplit(pattern, default)[0]
664 return _patsplit(pattern, default)[0]
664
665
665 def _patsplit(pattern, default):
666 def _patsplit(pattern, default):
666 """Split a string into the optional pattern kind prefix and the actual
667 """Split a string into the optional pattern kind prefix and the actual
667 pattern."""
668 pattern."""
668 if ':' in pattern:
669 if ':' in pattern:
669 kind, pat = pattern.split(':', 1)
670 kind, pat = pattern.split(':', 1)
670 if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
671 if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
671 'listfile', 'listfile0', 'set', 'include', 'subinclude',
672 'listfile', 'listfile0', 'set', 'include', 'subinclude',
672 'rootfilesin'):
673 'rootfilesin'):
673 return kind, pat
674 return kind, pat
674 return default, pattern
675 return default, pattern
675
676
676 def _globre(pat):
677 def _globre(pat):
677 r'''Convert an extended glob string to a regexp string.
678 r'''Convert an extended glob string to a regexp string.
678
679
679 >>> print _globre(r'?')
680 >>> print _globre(r'?')
680 .
681 .
681 >>> print _globre(r'*')
682 >>> print _globre(r'*')
682 [^/]*
683 [^/]*
683 >>> print _globre(r'**')
684 >>> print _globre(r'**')
684 .*
685 .*
685 >>> print _globre(r'**/a')
686 >>> print _globre(r'**/a')
686 (?:.*/)?a
687 (?:.*/)?a
687 >>> print _globre(r'a/**/b')
688 >>> print _globre(r'a/**/b')
688 a\/(?:.*/)?b
689 a\/(?:.*/)?b
689 >>> print _globre(r'[a*?!^][^b][!c]')
690 >>> print _globre(r'[a*?!^][^b][!c]')
690 [a*?!^][\^b][^c]
691 [a*?!^][\^b][^c]
691 >>> print _globre(r'{a,b}')
692 >>> print _globre(r'{a,b}')
692 (?:a|b)
693 (?:a|b)
693 >>> print _globre(r'.\*\?')
694 >>> print _globre(r'.\*\?')
694 \.\*\?
695 \.\*\?
695 '''
696 '''
696 i, n = 0, len(pat)
697 i, n = 0, len(pat)
697 res = ''
698 res = ''
698 group = 0
699 group = 0
699 escape = util.re.escape
700 escape = util.re.escape
700 def peek():
701 def peek():
701 return i < n and pat[i:i + 1]
702 return i < n and pat[i:i + 1]
702 while i < n:
703 while i < n:
703 c = pat[i:i + 1]
704 c = pat[i:i + 1]
704 i += 1
705 i += 1
705 if c not in '*?[{},\\':
706 if c not in '*?[{},\\':
706 res += escape(c)
707 res += escape(c)
707 elif c == '*':
708 elif c == '*':
708 if peek() == '*':
709 if peek() == '*':
709 i += 1
710 i += 1
710 if peek() == '/':
711 if peek() == '/':
711 i += 1
712 i += 1
712 res += '(?:.*/)?'
713 res += '(?:.*/)?'
713 else:
714 else:
714 res += '.*'
715 res += '.*'
715 else:
716 else:
716 res += '[^/]*'
717 res += '[^/]*'
717 elif c == '?':
718 elif c == '?':
718 res += '.'
719 res += '.'
719 elif c == '[':
720 elif c == '[':
720 j = i
721 j = i
721 if j < n and pat[j:j + 1] in '!]':
722 if j < n and pat[j:j + 1] in '!]':
722 j += 1
723 j += 1
723 while j < n and pat[j:j + 1] != ']':
724 while j < n and pat[j:j + 1] != ']':
724 j += 1
725 j += 1
725 if j >= n:
726 if j >= n:
726 res += '\\['
727 res += '\\['
727 else:
728 else:
728 stuff = pat[i:j].replace('\\','\\\\')
729 stuff = pat[i:j].replace('\\','\\\\')
729 i = j + 1
730 i = j + 1
730 if stuff[0:1] == '!':
731 if stuff[0:1] == '!':
731 stuff = '^' + stuff[1:]
732 stuff = '^' + stuff[1:]
732 elif stuff[0:1] == '^':
733 elif stuff[0:1] == '^':
733 stuff = '\\' + stuff
734 stuff = '\\' + stuff
734 res = '%s[%s]' % (res, stuff)
735 res = '%s[%s]' % (res, stuff)
735 elif c == '{':
736 elif c == '{':
736 group += 1
737 group += 1
737 res += '(?:'
738 res += '(?:'
738 elif c == '}' and group:
739 elif c == '}' and group:
739 res += ')'
740 res += ')'
740 group -= 1
741 group -= 1
741 elif c == ',' and group:
742 elif c == ',' and group:
742 res += '|'
743 res += '|'
743 elif c == '\\':
744 elif c == '\\':
744 p = peek()
745 p = peek()
745 if p:
746 if p:
746 i += 1
747 i += 1
747 res += escape(p)
748 res += escape(p)
748 else:
749 else:
749 res += escape(c)
750 res += escape(c)
750 else:
751 else:
751 res += escape(c)
752 res += escape(c)
752 return res
753 return res
753
754
754 def _regex(kind, pat, globsuffix):
755 def _regex(kind, pat, globsuffix):
755 '''Convert a (normalized) pattern of any kind into a regular expression.
756 '''Convert a (normalized) pattern of any kind into a regular expression.
756 globsuffix is appended to the regexp of globs.'''
757 globsuffix is appended to the regexp of globs.'''
757 if not pat:
758 if not pat:
758 return ''
759 return ''
759 if kind == 're':
760 if kind == 're':
760 return pat
761 return pat
761 if kind == 'path':
762 if kind == 'path':
762 if pat == '.':
763 if pat == '.':
763 return ''
764 return ''
764 return '^' + util.re.escape(pat) + '(?:/|$)'
765 return '^' + util.re.escape(pat) + '(?:/|$)'
765 if kind == 'rootfilesin':
766 if kind == 'rootfilesin':
766 if pat == '.':
767 if pat == '.':
767 escaped = ''
768 escaped = ''
768 else:
769 else:
769 # Pattern is a directory name.
770 # Pattern is a directory name.
770 escaped = util.re.escape(pat) + '/'
771 escaped = util.re.escape(pat) + '/'
771 # Anything after the pattern must be a non-directory.
772 # Anything after the pattern must be a non-directory.
772 return '^' + escaped + '[^/]+$'
773 return '^' + escaped + '[^/]+$'
773 if kind == 'relglob':
774 if kind == 'relglob':
774 return '(?:|.*/)' + _globre(pat) + globsuffix
775 return '(?:|.*/)' + _globre(pat) + globsuffix
775 if kind == 'relpath':
776 if kind == 'relpath':
776 return util.re.escape(pat) + '(?:/|$)'
777 return util.re.escape(pat) + '(?:/|$)'
777 if kind == 'relre':
778 if kind == 'relre':
778 if pat.startswith('^'):
779 if pat.startswith('^'):
779 return pat
780 return pat
780 return '.*' + pat
781 return '.*' + pat
781 return _globre(pat) + globsuffix
782 return _globre(pat) + globsuffix
782
783
783 def _buildmatch(ctx, kindpats, globsuffix, listsubrepos, root):
784 def _buildmatch(ctx, kindpats, globsuffix, listsubrepos, root):
784 '''Return regexp string and a matcher function for kindpats.
785 '''Return regexp string and a matcher function for kindpats.
785 globsuffix is appended to the regexp of globs.'''
786 globsuffix is appended to the regexp of globs.'''
786 matchfuncs = []
787 matchfuncs = []
787
788
788 subincludes, kindpats = _expandsubinclude(kindpats, root)
789 subincludes, kindpats = _expandsubinclude(kindpats, root)
789 if subincludes:
790 if subincludes:
790 submatchers = {}
791 submatchers = {}
791 def matchsubinclude(f):
792 def matchsubinclude(f):
792 for prefix, matcherargs in subincludes:
793 for prefix, matcherargs in subincludes:
793 if f.startswith(prefix):
794 if f.startswith(prefix):
794 mf = submatchers.get(prefix)
795 mf = submatchers.get(prefix)
795 if mf is None:
796 if mf is None:
796 mf = match(*matcherargs)
797 mf = match(*matcherargs)
797 submatchers[prefix] = mf
798 submatchers[prefix] = mf
798
799
799 if mf(f[len(prefix):]):
800 if mf(f[len(prefix):]):
800 return True
801 return True
801 return False
802 return False
802 matchfuncs.append(matchsubinclude)
803 matchfuncs.append(matchsubinclude)
803
804
804 fset, kindpats = _expandsets(kindpats, ctx, listsubrepos)
805 fset, kindpats = _expandsets(kindpats, ctx, listsubrepos)
805 if fset:
806 if fset:
806 matchfuncs.append(fset.__contains__)
807 matchfuncs.append(fset.__contains__)
807
808
808 regex = ''
809 regex = ''
809 if kindpats:
810 if kindpats:
810 regex, mf = _buildregexmatch(kindpats, globsuffix)
811 regex, mf = _buildregexmatch(kindpats, globsuffix)
811 matchfuncs.append(mf)
812 matchfuncs.append(mf)
812
813
813 if len(matchfuncs) == 1:
814 if len(matchfuncs) == 1:
814 return regex, matchfuncs[0]
815 return regex, matchfuncs[0]
815 else:
816 else:
816 return regex, lambda f: any(mf(f) for mf in matchfuncs)
817 return regex, lambda f: any(mf(f) for mf in matchfuncs)
817
818
818 def _buildregexmatch(kindpats, globsuffix):
819 def _buildregexmatch(kindpats, globsuffix):
819 """Build a match function from a list of kinds and kindpats,
820 """Build a match function from a list of kinds and kindpats,
820 return regexp string and a matcher function."""
821 return regexp string and a matcher function."""
821 try:
822 try:
822 regex = '(?:%s)' % '|'.join([_regex(k, p, globsuffix)
823 regex = '(?:%s)' % '|'.join([_regex(k, p, globsuffix)
823 for (k, p, s) in kindpats])
824 for (k, p, s) in kindpats])
824 if len(regex) > 20000:
825 if len(regex) > 20000:
825 raise OverflowError
826 raise OverflowError
826 return regex, _rematcher(regex)
827 return regex, _rematcher(regex)
827 except OverflowError:
828 except OverflowError:
828 # We're using a Python with a tiny regex engine and we
829 # We're using a Python with a tiny regex engine and we
829 # made it explode, so we'll divide the pattern list in two
830 # made it explode, so we'll divide the pattern list in two
830 # until it works
831 # until it works
831 l = len(kindpats)
832 l = len(kindpats)
832 if l < 2:
833 if l < 2:
833 raise
834 raise
834 regexa, a = _buildregexmatch(kindpats[:l//2], globsuffix)
835 regexa, a = _buildregexmatch(kindpats[:l//2], globsuffix)
835 regexb, b = _buildregexmatch(kindpats[l//2:], globsuffix)
836 regexb, b = _buildregexmatch(kindpats[l//2:], globsuffix)
836 return regex, lambda s: a(s) or b(s)
837 return regex, lambda s: a(s) or b(s)
837 except re.error:
838 except re.error:
838 for k, p, s in kindpats:
839 for k, p, s in kindpats:
839 try:
840 try:
840 _rematcher('(?:%s)' % _regex(k, p, globsuffix))
841 _rematcher('(?:%s)' % _regex(k, p, globsuffix))
841 except re.error:
842 except re.error:
842 if s:
843 if s:
843 raise error.Abort(_("%s: invalid pattern (%s): %s") %
844 raise error.Abort(_("%s: invalid pattern (%s): %s") %
844 (s, k, p))
845 (s, k, p))
845 else:
846 else:
846 raise error.Abort(_("invalid pattern (%s): %s") % (k, p))
847 raise error.Abort(_("invalid pattern (%s): %s") % (k, p))
847 raise error.Abort(_("invalid pattern"))
848 raise error.Abort(_("invalid pattern"))
848
849
849 def _patternrootsanddirs(kindpats):
850 def _patternrootsanddirs(kindpats):
850 '''Returns roots and directories corresponding to each pattern.
851 '''Returns roots and directories corresponding to each pattern.
851
852
852 This calculates the roots and directories exactly matching the patterns and
853 This calculates the roots and directories exactly matching the patterns and
853 returns a tuple of (roots, dirs) for each. It does not return other
854 returns a tuple of (roots, dirs) for each. It does not return other
854 directories which may also need to be considered, like the parent
855 directories which may also need to be considered, like the parent
855 directories.
856 directories.
856 '''
857 '''
857 r = []
858 r = []
858 d = []
859 d = []
859 for kind, pat, source in kindpats:
860 for kind, pat, source in kindpats:
860 if kind == 'glob': # find the non-glob prefix
861 if kind == 'glob': # find the non-glob prefix
861 root = []
862 root = []
862 for p in pat.split('/'):
863 for p in pat.split('/'):
863 if '[' in p or '{' in p or '*' in p or '?' in p:
864 if '[' in p or '{' in p or '*' in p or '?' in p:
864 break
865 break
865 root.append(p)
866 root.append(p)
866 r.append('/'.join(root) or '.')
867 r.append('/'.join(root) or '.')
867 elif kind in ('relpath', 'path'):
868 elif kind in ('relpath', 'path'):
868 r.append(pat or '.')
869 r.append(pat or '.')
869 elif kind in ('rootfilesin',):
870 elif kind in ('rootfilesin',):
870 d.append(pat or '.')
871 d.append(pat or '.')
871 else: # relglob, re, relre
872 else: # relglob, re, relre
872 r.append('.')
873 r.append('.')
873 return r, d
874 return r, d
874
875
875 def _roots(kindpats):
876 def _roots(kindpats):
876 '''Returns root directories to match recursively from the given patterns.'''
877 '''Returns root directories to match recursively from the given patterns.'''
877 roots, dirs = _patternrootsanddirs(kindpats)
878 roots, dirs = _patternrootsanddirs(kindpats)
878 return roots
879 return roots
879
880
880 def _rootsanddirs(kindpats):
881 def _rootsanddirs(kindpats):
881 '''Returns roots and exact directories from patterns.
882 '''Returns roots and exact directories from patterns.
882
883
883 roots are directories to match recursively, whereas exact directories should
884 roots are directories to match recursively, whereas exact directories should
884 be matched non-recursively. The returned (roots, dirs) tuple will also
885 be matched non-recursively. The returned (roots, dirs) tuple will also
885 include directories that need to be implicitly considered as either, such as
886 include directories that need to be implicitly considered as either, such as
886 parent directories.
887 parent directories.
887
888
888 >>> _rootsanddirs(\
889 >>> _rootsanddirs(\
889 [('glob', 'g/h/*', ''), ('glob', 'g/h', ''), ('glob', 'g*', '')])
890 [('glob', 'g/h/*', ''), ('glob', 'g/h', ''), ('glob', 'g*', '')])
890 (['g/h', 'g/h', '.'], ['g', '.'])
891 (['g/h', 'g/h', '.'], ['g', '.'])
891 >>> _rootsanddirs(\
892 >>> _rootsanddirs(\
892 [('rootfilesin', 'g/h', ''), ('rootfilesin', '', '')])
893 [('rootfilesin', 'g/h', ''), ('rootfilesin', '', '')])
893 ([], ['g/h', '.', 'g', '.'])
894 ([], ['g/h', '.', 'g', '.'])
894 >>> _rootsanddirs(\
895 >>> _rootsanddirs(\
895 [('relpath', 'r', ''), ('path', 'p/p', ''), ('path', '', '')])
896 [('relpath', 'r', ''), ('path', 'p/p', ''), ('path', '', '')])
896 (['r', 'p/p', '.'], ['p', '.'])
897 (['r', 'p/p', '.'], ['p', '.'])
897 >>> _rootsanddirs(\
898 >>> _rootsanddirs(\
898 [('relglob', 'rg*', ''), ('re', 're/', ''), ('relre', 'rr', '')])
899 [('relglob', 'rg*', ''), ('re', 're/', ''), ('relre', 'rr', '')])
899 (['.', '.', '.'], ['.'])
900 (['.', '.', '.'], ['.'])
900 '''
901 '''
901 r, d = _patternrootsanddirs(kindpats)
902 r, d = _patternrootsanddirs(kindpats)
902
903
903 # Append the parents as non-recursive/exact directories, since they must be
904 # Append the parents as non-recursive/exact directories, since they must be
904 # scanned to get to either the roots or the other exact directories.
905 # scanned to get to either the roots or the other exact directories.
905 d.extend(util.dirs(d))
906 d.extend(util.dirs(d))
906 d.extend(util.dirs(r))
907 d.extend(util.dirs(r))
907 # util.dirs() does not include the root directory, so add it manually
908 # util.dirs() does not include the root directory, so add it manually
908 d.append('.')
909 d.append('.')
909
910
910 return r, d
911 return r, d
911
912
912 def _explicitfiles(kindpats):
913 def _explicitfiles(kindpats):
913 '''Returns the potential explicit filenames from the patterns.
914 '''Returns the potential explicit filenames from the patterns.
914
915
915 >>> _explicitfiles([('path', 'foo/bar', '')])
916 >>> _explicitfiles([('path', 'foo/bar', '')])
916 ['foo/bar']
917 ['foo/bar']
917 >>> _explicitfiles([('rootfilesin', 'foo/bar', '')])
918 >>> _explicitfiles([('rootfilesin', 'foo/bar', '')])
918 []
919 []
919 '''
920 '''
920 # Keep only the pattern kinds where one can specify filenames (vs only
921 # Keep only the pattern kinds where one can specify filenames (vs only
921 # directory names).
922 # directory names).
922 filable = [kp for kp in kindpats if kp[0] not in ('rootfilesin',)]
923 filable = [kp for kp in kindpats if kp[0] not in ('rootfilesin',)]
923 return _roots(filable)
924 return _roots(filable)
924
925
925 def _anypats(kindpats):
926 def _anypats(kindpats):
926 for kind, pat, source in kindpats:
927 for kind, pat, source in kindpats:
927 if kind in ('glob', 're', 'relglob', 'relre', 'set', 'rootfilesin'):
928 if kind in ('glob', 're', 'relglob', 'relre', 'set', 'rootfilesin'):
928 return True
929 return True
929
930
930 _commentre = None
931 _commentre = None
931
932
932 def readpatternfile(filepath, warn, sourceinfo=False):
933 def readpatternfile(filepath, warn, sourceinfo=False):
933 '''parse a pattern file, returning a list of
934 '''parse a pattern file, returning a list of
934 patterns. These patterns should be given to compile()
935 patterns. These patterns should be given to compile()
935 to be validated and converted into a match function.
936 to be validated and converted into a match function.
936
937
937 trailing white space is dropped.
938 trailing white space is dropped.
938 the escape character is backslash.
939 the escape character is backslash.
939 comments start with #.
940 comments start with #.
940 empty lines are skipped.
941 empty lines are skipped.
941
942
942 lines can be of the following formats:
943 lines can be of the following formats:
943
944
944 syntax: regexp # defaults following lines to non-rooted regexps
945 syntax: regexp # defaults following lines to non-rooted regexps
945 syntax: glob # defaults following lines to non-rooted globs
946 syntax: glob # defaults following lines to non-rooted globs
946 re:pattern # non-rooted regular expression
947 re:pattern # non-rooted regular expression
947 glob:pattern # non-rooted glob
948 glob:pattern # non-rooted glob
948 pattern # pattern of the current default type
949 pattern # pattern of the current default type
949
950
950 if sourceinfo is set, returns a list of tuples:
951 if sourceinfo is set, returns a list of tuples:
951 (pattern, lineno, originalline). This is useful to debug ignore patterns.
952 (pattern, lineno, originalline). This is useful to debug ignore patterns.
952 '''
953 '''
953
954
954 syntaxes = {'re': 'relre:', 'regexp': 'relre:', 'glob': 'relglob:',
955 syntaxes = {'re': 'relre:', 'regexp': 'relre:', 'glob': 'relglob:',
955 'include': 'include', 'subinclude': 'subinclude'}
956 'include': 'include', 'subinclude': 'subinclude'}
956 syntax = 'relre:'
957 syntax = 'relre:'
957 patterns = []
958 patterns = []
958
959
959 fp = open(filepath, 'rb')
960 fp = open(filepath, 'rb')
960 for lineno, line in enumerate(util.iterfile(fp), start=1):
961 for lineno, line in enumerate(util.iterfile(fp), start=1):
961 if "#" in line:
962 if "#" in line:
962 global _commentre
963 global _commentre
963 if not _commentre:
964 if not _commentre:
964 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
965 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
965 # remove comments prefixed by an even number of escapes
966 # remove comments prefixed by an even number of escapes
966 m = _commentre.search(line)
967 m = _commentre.search(line)
967 if m:
968 if m:
968 line = line[:m.end(1)]
969 line = line[:m.end(1)]
969 # fixup properly escaped comments that survived the above
970 # fixup properly escaped comments that survived the above
970 line = line.replace("\\#", "#")
971 line = line.replace("\\#", "#")
971 line = line.rstrip()
972 line = line.rstrip()
972 if not line:
973 if not line:
973 continue
974 continue
974
975
975 if line.startswith('syntax:'):
976 if line.startswith('syntax:'):
976 s = line[7:].strip()
977 s = line[7:].strip()
977 try:
978 try:
978 syntax = syntaxes[s]
979 syntax = syntaxes[s]
979 except KeyError:
980 except KeyError:
980 if warn:
981 if warn:
981 warn(_("%s: ignoring invalid syntax '%s'\n") %
982 warn(_("%s: ignoring invalid syntax '%s'\n") %
982 (filepath, s))
983 (filepath, s))
983 continue
984 continue
984
985
985 linesyntax = syntax
986 linesyntax = syntax
986 for s, rels in syntaxes.iteritems():
987 for s, rels in syntaxes.iteritems():
987 if line.startswith(rels):
988 if line.startswith(rels):
988 linesyntax = rels
989 linesyntax = rels
989 line = line[len(rels):]
990 line = line[len(rels):]
990 break
991 break
991 elif line.startswith(s+':'):
992 elif line.startswith(s+':'):
992 linesyntax = rels
993 linesyntax = rels
993 line = line[len(s) + 1:]
994 line = line[len(s) + 1:]
994 break
995 break
995 if sourceinfo:
996 if sourceinfo:
996 patterns.append((linesyntax + line, lineno, line))
997 patterns.append((linesyntax + line, lineno, line))
997 else:
998 else:
998 patterns.append(linesyntax + line)
999 patterns.append(linesyntax + line)
999 fp.close()
1000 fp.close()
1000 return patterns
1001 return patterns
General Comments 0
You need to be logged in to leave comments. Login now