##// END OF EJS Templates
match: remove doc about undefined behavior of visitdir()...
Yuya Nishihara -
r35165:6864c405 default
parent child Browse files
Show More
@@ -1,1033 +1,1030 b''
1 # match.py - filename matching
1 # match.py - filename matching
2 #
2 #
3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import, print_function
8 from __future__ import absolute_import, print_function
9
9
10 import copy
10 import copy
11 import os
11 import os
12 import re
12 import re
13
13
14 from .i18n import _
14 from .i18n import _
15 from . import (
15 from . import (
16 error,
16 error,
17 pathutil,
17 pathutil,
18 util,
18 util,
19 )
19 )
20
20
21 allpatternkinds = ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
21 allpatternkinds = ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
22 'listfile', 'listfile0', 'set', 'include', 'subinclude',
22 'listfile', 'listfile0', 'set', 'include', 'subinclude',
23 'rootfilesin')
23 'rootfilesin')
24 cwdrelativepatternkinds = ('relpath', 'glob')
24 cwdrelativepatternkinds = ('relpath', 'glob')
25
25
26 propertycache = util.propertycache
26 propertycache = util.propertycache
27
27
28 def _rematcher(regex):
28 def _rematcher(regex):
29 '''compile the regexp with the best available regexp engine and return a
29 '''compile the regexp with the best available regexp engine and return a
30 matcher function'''
30 matcher function'''
31 m = util.re.compile(regex)
31 m = util.re.compile(regex)
32 try:
32 try:
33 # slightly faster, provided by facebook's re2 bindings
33 # slightly faster, provided by facebook's re2 bindings
34 return m.test_match
34 return m.test_match
35 except AttributeError:
35 except AttributeError:
36 return m.match
36 return m.match
37
37
38 def _expandsets(kindpats, ctx, listsubrepos):
38 def _expandsets(kindpats, ctx, listsubrepos):
39 '''Returns the kindpats list with the 'set' patterns expanded.'''
39 '''Returns the kindpats list with the 'set' patterns expanded.'''
40 fset = set()
40 fset = set()
41 other = []
41 other = []
42
42
43 for kind, pat, source in kindpats:
43 for kind, pat, source in kindpats:
44 if kind == 'set':
44 if kind == 'set':
45 if not ctx:
45 if not ctx:
46 raise error.ProgrammingError("fileset expression with no "
46 raise error.ProgrammingError("fileset expression with no "
47 "context")
47 "context")
48 s = ctx.getfileset(pat)
48 s = ctx.getfileset(pat)
49 fset.update(s)
49 fset.update(s)
50
50
51 if listsubrepos:
51 if listsubrepos:
52 for subpath in ctx.substate:
52 for subpath in ctx.substate:
53 s = ctx.sub(subpath).getfileset(pat)
53 s = ctx.sub(subpath).getfileset(pat)
54 fset.update(subpath + '/' + f for f in s)
54 fset.update(subpath + '/' + f for f in s)
55
55
56 continue
56 continue
57 other.append((kind, pat, source))
57 other.append((kind, pat, source))
58 return fset, other
58 return fset, other
59
59
60 def _expandsubinclude(kindpats, root):
60 def _expandsubinclude(kindpats, root):
61 '''Returns the list of subinclude matcher args and the kindpats without the
61 '''Returns the list of subinclude matcher args and the kindpats without the
62 subincludes in it.'''
62 subincludes in it.'''
63 relmatchers = []
63 relmatchers = []
64 other = []
64 other = []
65
65
66 for kind, pat, source in kindpats:
66 for kind, pat, source in kindpats:
67 if kind == 'subinclude':
67 if kind == 'subinclude':
68 sourceroot = pathutil.dirname(util.normpath(source))
68 sourceroot = pathutil.dirname(util.normpath(source))
69 pat = util.pconvert(pat)
69 pat = util.pconvert(pat)
70 path = pathutil.join(sourceroot, pat)
70 path = pathutil.join(sourceroot, pat)
71
71
72 newroot = pathutil.dirname(path)
72 newroot = pathutil.dirname(path)
73 matcherargs = (newroot, '', [], ['include:%s' % path])
73 matcherargs = (newroot, '', [], ['include:%s' % path])
74
74
75 prefix = pathutil.canonpath(root, root, newroot)
75 prefix = pathutil.canonpath(root, root, newroot)
76 if prefix:
76 if prefix:
77 prefix += '/'
77 prefix += '/'
78 relmatchers.append((prefix, matcherargs))
78 relmatchers.append((prefix, matcherargs))
79 else:
79 else:
80 other.append((kind, pat, source))
80 other.append((kind, pat, source))
81
81
82 return relmatchers, other
82 return relmatchers, other
83
83
84 def _kindpatsalwaysmatch(kindpats):
84 def _kindpatsalwaysmatch(kindpats):
85 """"Checks whether the kindspats match everything, as e.g.
85 """"Checks whether the kindspats match everything, as e.g.
86 'relpath:.' does.
86 'relpath:.' does.
87 """
87 """
88 for kind, pat, source in kindpats:
88 for kind, pat, source in kindpats:
89 if pat != '' or kind not in ['relpath', 'glob']:
89 if pat != '' or kind not in ['relpath', 'glob']:
90 return False
90 return False
91 return True
91 return True
92
92
93 def match(root, cwd, patterns=None, include=None, exclude=None, default='glob',
93 def match(root, cwd, patterns=None, include=None, exclude=None, default='glob',
94 exact=False, auditor=None, ctx=None, listsubrepos=False, warn=None,
94 exact=False, auditor=None, ctx=None, listsubrepos=False, warn=None,
95 badfn=None, icasefs=False):
95 badfn=None, icasefs=False):
96 """build an object to match a set of file patterns
96 """build an object to match a set of file patterns
97
97
98 arguments:
98 arguments:
99 root - the canonical root of the tree you're matching against
99 root - the canonical root of the tree you're matching against
100 cwd - the current working directory, if relevant
100 cwd - the current working directory, if relevant
101 patterns - patterns to find
101 patterns - patterns to find
102 include - patterns to include (unless they are excluded)
102 include - patterns to include (unless they are excluded)
103 exclude - patterns to exclude (even if they are included)
103 exclude - patterns to exclude (even if they are included)
104 default - if a pattern in patterns has no explicit type, assume this one
104 default - if a pattern in patterns has no explicit type, assume this one
105 exact - patterns are actually filenames (include/exclude still apply)
105 exact - patterns are actually filenames (include/exclude still apply)
106 warn - optional function used for printing warnings
106 warn - optional function used for printing warnings
107 badfn - optional bad() callback for this matcher instead of the default
107 badfn - optional bad() callback for this matcher instead of the default
108 icasefs - make a matcher for wdir on case insensitive filesystems, which
108 icasefs - make a matcher for wdir on case insensitive filesystems, which
109 normalizes the given patterns to the case in the filesystem
109 normalizes the given patterns to the case in the filesystem
110
110
111 a pattern is one of:
111 a pattern is one of:
112 'glob:<glob>' - a glob relative to cwd
112 'glob:<glob>' - a glob relative to cwd
113 're:<regexp>' - a regular expression
113 're:<regexp>' - a regular expression
114 'path:<path>' - a path relative to repository root, which is matched
114 'path:<path>' - a path relative to repository root, which is matched
115 recursively
115 recursively
116 'rootfilesin:<path>' - a path relative to repository root, which is
116 'rootfilesin:<path>' - a path relative to repository root, which is
117 matched non-recursively (will not match subdirectories)
117 matched non-recursively (will not match subdirectories)
118 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
118 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
119 'relpath:<path>' - a path relative to cwd
119 'relpath:<path>' - a path relative to cwd
120 'relre:<regexp>' - a regexp that needn't match the start of a name
120 'relre:<regexp>' - a regexp that needn't match the start of a name
121 'set:<fileset>' - a fileset expression
121 'set:<fileset>' - a fileset expression
122 'include:<path>' - a file of patterns to read and include
122 'include:<path>' - a file of patterns to read and include
123 'subinclude:<path>' - a file of patterns to match against files under
123 'subinclude:<path>' - a file of patterns to match against files under
124 the same directory
124 the same directory
125 '<something>' - a pattern of the specified default type
125 '<something>' - a pattern of the specified default type
126 """
126 """
127 normalize = _donormalize
127 normalize = _donormalize
128 if icasefs:
128 if icasefs:
129 if exact:
129 if exact:
130 raise error.ProgrammingError("a case-insensitive exact matcher "
130 raise error.ProgrammingError("a case-insensitive exact matcher "
131 "doesn't make sense")
131 "doesn't make sense")
132 dirstate = ctx.repo().dirstate
132 dirstate = ctx.repo().dirstate
133 dsnormalize = dirstate.normalize
133 dsnormalize = dirstate.normalize
134
134
135 def normalize(patterns, default, root, cwd, auditor, warn):
135 def normalize(patterns, default, root, cwd, auditor, warn):
136 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
136 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
137 kindpats = []
137 kindpats = []
138 for kind, pats, source in kp:
138 for kind, pats, source in kp:
139 if kind not in ('re', 'relre'): # regex can't be normalized
139 if kind not in ('re', 'relre'): # regex can't be normalized
140 p = pats
140 p = pats
141 pats = dsnormalize(pats)
141 pats = dsnormalize(pats)
142
142
143 # Preserve the original to handle a case only rename.
143 # Preserve the original to handle a case only rename.
144 if p != pats and p in dirstate:
144 if p != pats and p in dirstate:
145 kindpats.append((kind, p, source))
145 kindpats.append((kind, p, source))
146
146
147 kindpats.append((kind, pats, source))
147 kindpats.append((kind, pats, source))
148 return kindpats
148 return kindpats
149
149
150 if exact:
150 if exact:
151 m = exactmatcher(root, cwd, patterns, badfn)
151 m = exactmatcher(root, cwd, patterns, badfn)
152 elif patterns:
152 elif patterns:
153 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
153 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
154 if _kindpatsalwaysmatch(kindpats):
154 if _kindpatsalwaysmatch(kindpats):
155 m = alwaysmatcher(root, cwd, badfn, relativeuipath=True)
155 m = alwaysmatcher(root, cwd, badfn, relativeuipath=True)
156 else:
156 else:
157 m = patternmatcher(root, cwd, kindpats, ctx=ctx,
157 m = patternmatcher(root, cwd, kindpats, ctx=ctx,
158 listsubrepos=listsubrepos, badfn=badfn)
158 listsubrepos=listsubrepos, badfn=badfn)
159 else:
159 else:
160 # It's a little strange that no patterns means to match everything.
160 # It's a little strange that no patterns means to match everything.
161 # Consider changing this to match nothing (probably using nevermatcher).
161 # Consider changing this to match nothing (probably using nevermatcher).
162 m = alwaysmatcher(root, cwd, badfn)
162 m = alwaysmatcher(root, cwd, badfn)
163
163
164 if include:
164 if include:
165 kindpats = normalize(include, 'glob', root, cwd, auditor, warn)
165 kindpats = normalize(include, 'glob', root, cwd, auditor, warn)
166 im = includematcher(root, cwd, kindpats, ctx=ctx,
166 im = includematcher(root, cwd, kindpats, ctx=ctx,
167 listsubrepos=listsubrepos, badfn=None)
167 listsubrepos=listsubrepos, badfn=None)
168 m = intersectmatchers(m, im)
168 m = intersectmatchers(m, im)
169 if exclude:
169 if exclude:
170 kindpats = normalize(exclude, 'glob', root, cwd, auditor, warn)
170 kindpats = normalize(exclude, 'glob', root, cwd, auditor, warn)
171 em = includematcher(root, cwd, kindpats, ctx=ctx,
171 em = includematcher(root, cwd, kindpats, ctx=ctx,
172 listsubrepos=listsubrepos, badfn=None)
172 listsubrepos=listsubrepos, badfn=None)
173 m = differencematcher(m, em)
173 m = differencematcher(m, em)
174 return m
174 return m
175
175
176 def exact(root, cwd, files, badfn=None):
176 def exact(root, cwd, files, badfn=None):
177 return exactmatcher(root, cwd, files, badfn=badfn)
177 return exactmatcher(root, cwd, files, badfn=badfn)
178
178
179 def always(root, cwd):
179 def always(root, cwd):
180 return alwaysmatcher(root, cwd)
180 return alwaysmatcher(root, cwd)
181
181
182 def never(root, cwd):
182 def never(root, cwd):
183 return nevermatcher(root, cwd)
183 return nevermatcher(root, cwd)
184
184
185 def badmatch(match, badfn):
185 def badmatch(match, badfn):
186 """Make a copy of the given matcher, replacing its bad method with the given
186 """Make a copy of the given matcher, replacing its bad method with the given
187 one.
187 one.
188 """
188 """
189 m = copy.copy(match)
189 m = copy.copy(match)
190 m.bad = badfn
190 m.bad = badfn
191 return m
191 return m
192
192
193 def _donormalize(patterns, default, root, cwd, auditor, warn):
193 def _donormalize(patterns, default, root, cwd, auditor, warn):
194 '''Convert 'kind:pat' from the patterns list to tuples with kind and
194 '''Convert 'kind:pat' from the patterns list to tuples with kind and
195 normalized and rooted patterns and with listfiles expanded.'''
195 normalized and rooted patterns and with listfiles expanded.'''
196 kindpats = []
196 kindpats = []
197 for kind, pat in [_patsplit(p, default) for p in patterns]:
197 for kind, pat in [_patsplit(p, default) for p in patterns]:
198 if kind in cwdrelativepatternkinds:
198 if kind in cwdrelativepatternkinds:
199 pat = pathutil.canonpath(root, cwd, pat, auditor)
199 pat = pathutil.canonpath(root, cwd, pat, auditor)
200 elif kind in ('relglob', 'path', 'rootfilesin'):
200 elif kind in ('relglob', 'path', 'rootfilesin'):
201 pat = util.normpath(pat)
201 pat = util.normpath(pat)
202 elif kind in ('listfile', 'listfile0'):
202 elif kind in ('listfile', 'listfile0'):
203 try:
203 try:
204 files = util.readfile(pat)
204 files = util.readfile(pat)
205 if kind == 'listfile0':
205 if kind == 'listfile0':
206 files = files.split('\0')
206 files = files.split('\0')
207 else:
207 else:
208 files = files.splitlines()
208 files = files.splitlines()
209 files = [f for f in files if f]
209 files = [f for f in files if f]
210 except EnvironmentError:
210 except EnvironmentError:
211 raise error.Abort(_("unable to read file list (%s)") % pat)
211 raise error.Abort(_("unable to read file list (%s)") % pat)
212 for k, p, source in _donormalize(files, default, root, cwd,
212 for k, p, source in _donormalize(files, default, root, cwd,
213 auditor, warn):
213 auditor, warn):
214 kindpats.append((k, p, pat))
214 kindpats.append((k, p, pat))
215 continue
215 continue
216 elif kind == 'include':
216 elif kind == 'include':
217 try:
217 try:
218 fullpath = os.path.join(root, util.localpath(pat))
218 fullpath = os.path.join(root, util.localpath(pat))
219 includepats = readpatternfile(fullpath, warn)
219 includepats = readpatternfile(fullpath, warn)
220 for k, p, source in _donormalize(includepats, default,
220 for k, p, source in _donormalize(includepats, default,
221 root, cwd, auditor, warn):
221 root, cwd, auditor, warn):
222 kindpats.append((k, p, source or pat))
222 kindpats.append((k, p, source or pat))
223 except error.Abort as inst:
223 except error.Abort as inst:
224 raise error.Abort('%s: %s' % (pat, inst[0]))
224 raise error.Abort('%s: %s' % (pat, inst[0]))
225 except IOError as inst:
225 except IOError as inst:
226 if warn:
226 if warn:
227 warn(_("skipping unreadable pattern file '%s': %s\n") %
227 warn(_("skipping unreadable pattern file '%s': %s\n") %
228 (pat, inst.strerror))
228 (pat, inst.strerror))
229 continue
229 continue
230 # else: re or relre - which cannot be normalized
230 # else: re or relre - which cannot be normalized
231 kindpats.append((kind, pat, ''))
231 kindpats.append((kind, pat, ''))
232 return kindpats
232 return kindpats
233
233
234 class basematcher(object):
234 class basematcher(object):
235
235
236 def __init__(self, root, cwd, badfn=None, relativeuipath=True):
236 def __init__(self, root, cwd, badfn=None, relativeuipath=True):
237 self._root = root
237 self._root = root
238 self._cwd = cwd
238 self._cwd = cwd
239 if badfn is not None:
239 if badfn is not None:
240 self.bad = badfn
240 self.bad = badfn
241 self._relativeuipath = relativeuipath
241 self._relativeuipath = relativeuipath
242
242
243 def __call__(self, fn):
243 def __call__(self, fn):
244 return self.matchfn(fn)
244 return self.matchfn(fn)
245 def __iter__(self):
245 def __iter__(self):
246 for f in self._files:
246 for f in self._files:
247 yield f
247 yield f
248 # Callbacks related to how the matcher is used by dirstate.walk.
248 # Callbacks related to how the matcher is used by dirstate.walk.
249 # Subscribers to these events must monkeypatch the matcher object.
249 # Subscribers to these events must monkeypatch the matcher object.
250 def bad(self, f, msg):
250 def bad(self, f, msg):
251 '''Callback from dirstate.walk for each explicit file that can't be
251 '''Callback from dirstate.walk for each explicit file that can't be
252 found/accessed, with an error message.'''
252 found/accessed, with an error message.'''
253
253
254 # If an explicitdir is set, it will be called when an explicitly listed
254 # If an explicitdir is set, it will be called when an explicitly listed
255 # directory is visited.
255 # directory is visited.
256 explicitdir = None
256 explicitdir = None
257
257
258 # If an traversedir is set, it will be called when a directory discovered
258 # If an traversedir is set, it will be called when a directory discovered
259 # by recursive traversal is visited.
259 # by recursive traversal is visited.
260 traversedir = None
260 traversedir = None
261
261
262 def abs(self, f):
262 def abs(self, f):
263 '''Convert a repo path back to path that is relative to the root of the
263 '''Convert a repo path back to path that is relative to the root of the
264 matcher.'''
264 matcher.'''
265 return f
265 return f
266
266
267 def rel(self, f):
267 def rel(self, f):
268 '''Convert repo path back to path that is relative to cwd of matcher.'''
268 '''Convert repo path back to path that is relative to cwd of matcher.'''
269 return util.pathto(self._root, self._cwd, f)
269 return util.pathto(self._root, self._cwd, f)
270
270
271 def uipath(self, f):
271 def uipath(self, f):
272 '''Convert repo path to a display path. If patterns or -I/-X were used
272 '''Convert repo path to a display path. If patterns or -I/-X were used
273 to create this matcher, the display path will be relative to cwd.
273 to create this matcher, the display path will be relative to cwd.
274 Otherwise it is relative to the root of the repo.'''
274 Otherwise it is relative to the root of the repo.'''
275 return (self._relativeuipath and self.rel(f)) or self.abs(f)
275 return (self._relativeuipath and self.rel(f)) or self.abs(f)
276
276
277 @propertycache
277 @propertycache
278 def _files(self):
278 def _files(self):
279 return []
279 return []
280
280
281 def files(self):
281 def files(self):
282 '''Explicitly listed files or patterns or roots:
282 '''Explicitly listed files or patterns or roots:
283 if no patterns or .always(): empty list,
283 if no patterns or .always(): empty list,
284 if exact: list exact files,
284 if exact: list exact files,
285 if not .anypats(): list all files and dirs,
285 if not .anypats(): list all files and dirs,
286 else: optimal roots'''
286 else: optimal roots'''
287 return self._files
287 return self._files
288
288
289 @propertycache
289 @propertycache
290 def _fileset(self):
290 def _fileset(self):
291 return set(self._files)
291 return set(self._files)
292
292
293 def exact(self, f):
293 def exact(self, f):
294 '''Returns True if f is in .files().'''
294 '''Returns True if f is in .files().'''
295 return f in self._fileset
295 return f in self._fileset
296
296
297 def matchfn(self, f):
297 def matchfn(self, f):
298 return False
298 return False
299
299
300 def visitdir(self, dir):
300 def visitdir(self, dir):
301 '''Decides whether a directory should be visited based on whether it
301 '''Decides whether a directory should be visited based on whether it
302 has potential matches in it or one of its subdirectories. This is
302 has potential matches in it or one of its subdirectories. This is
303 based on the match's primary, included, and excluded patterns.
303 based on the match's primary, included, and excluded patterns.
304
304
305 Returns the string 'all' if the given directory and all subdirectories
305 Returns the string 'all' if the given directory and all subdirectories
306 should be visited. Otherwise returns True or False indicating whether
306 should be visited. Otherwise returns True or False indicating whether
307 the given directory should be visited.
307 the given directory should be visited.
308
309 This function's behavior is undefined if it has returned False for
310 one of the dir's parent directories.
311 '''
308 '''
312 return True
309 return True
313
310
314 def always(self):
311 def always(self):
315 '''Matcher will match everything and .files() will be empty --
312 '''Matcher will match everything and .files() will be empty --
316 optimization might be possible.'''
313 optimization might be possible.'''
317 return False
314 return False
318
315
319 def isexact(self):
316 def isexact(self):
320 '''Matcher will match exactly the list of files in .files() --
317 '''Matcher will match exactly the list of files in .files() --
321 optimization might be possible.'''
318 optimization might be possible.'''
322 return False
319 return False
323
320
324 def prefix(self):
321 def prefix(self):
325 '''Matcher will match the paths in .files() recursively --
322 '''Matcher will match the paths in .files() recursively --
326 optimization might be possible.'''
323 optimization might be possible.'''
327 return False
324 return False
328
325
329 def anypats(self):
326 def anypats(self):
330 '''None of .always(), .isexact(), and .prefix() is true --
327 '''None of .always(), .isexact(), and .prefix() is true --
331 optimizations will be difficult.'''
328 optimizations will be difficult.'''
332 return not self.always() and not self.isexact() and not self.prefix()
329 return not self.always() and not self.isexact() and not self.prefix()
333
330
334 class alwaysmatcher(basematcher):
331 class alwaysmatcher(basematcher):
335 '''Matches everything.'''
332 '''Matches everything.'''
336
333
337 def __init__(self, root, cwd, badfn=None, relativeuipath=False):
334 def __init__(self, root, cwd, badfn=None, relativeuipath=False):
338 super(alwaysmatcher, self).__init__(root, cwd, badfn,
335 super(alwaysmatcher, self).__init__(root, cwd, badfn,
339 relativeuipath=relativeuipath)
336 relativeuipath=relativeuipath)
340
337
341 def always(self):
338 def always(self):
342 return True
339 return True
343
340
344 def matchfn(self, f):
341 def matchfn(self, f):
345 return True
342 return True
346
343
347 def visitdir(self, dir):
344 def visitdir(self, dir):
348 return 'all'
345 return 'all'
349
346
350 def __repr__(self):
347 def __repr__(self):
351 return '<alwaysmatcher>'
348 return '<alwaysmatcher>'
352
349
353 class nevermatcher(basematcher):
350 class nevermatcher(basematcher):
354 '''Matches nothing.'''
351 '''Matches nothing.'''
355
352
356 def __init__(self, root, cwd, badfn=None):
353 def __init__(self, root, cwd, badfn=None):
357 super(nevermatcher, self).__init__(root, cwd, badfn)
354 super(nevermatcher, self).__init__(root, cwd, badfn)
358
355
359 # It's a little weird to say that the nevermatcher is an exact matcher
356 # It's a little weird to say that the nevermatcher is an exact matcher
360 # or a prefix matcher, but it seems to make sense to let callers take
357 # or a prefix matcher, but it seems to make sense to let callers take
361 # fast paths based on either. There will be no exact matches, nor any
358 # fast paths based on either. There will be no exact matches, nor any
362 # prefixes (files() returns []), so fast paths iterating over them should
359 # prefixes (files() returns []), so fast paths iterating over them should
363 # be efficient (and correct).
360 # be efficient (and correct).
364 def isexact(self):
361 def isexact(self):
365 return True
362 return True
366
363
367 def prefix(self):
364 def prefix(self):
368 return True
365 return True
369
366
370 def visitdir(self, dir):
367 def visitdir(self, dir):
371 return False
368 return False
372
369
373 def __repr__(self):
370 def __repr__(self):
374 return '<nevermatcher>'
371 return '<nevermatcher>'
375
372
376 class patternmatcher(basematcher):
373 class patternmatcher(basematcher):
377
374
378 def __init__(self, root, cwd, kindpats, ctx=None, listsubrepos=False,
375 def __init__(self, root, cwd, kindpats, ctx=None, listsubrepos=False,
379 badfn=None):
376 badfn=None):
380 super(patternmatcher, self).__init__(root, cwd, badfn)
377 super(patternmatcher, self).__init__(root, cwd, badfn)
381
378
382 self._files = _explicitfiles(kindpats)
379 self._files = _explicitfiles(kindpats)
383 self._prefix = _prefix(kindpats)
380 self._prefix = _prefix(kindpats)
384 self._pats, self.matchfn = _buildmatch(ctx, kindpats, '$', listsubrepos,
381 self._pats, self.matchfn = _buildmatch(ctx, kindpats, '$', listsubrepos,
385 root)
382 root)
386
383
387 @propertycache
384 @propertycache
388 def _dirs(self):
385 def _dirs(self):
389 return set(util.dirs(self._fileset)) | {'.'}
386 return set(util.dirs(self._fileset)) | {'.'}
390
387
391 def visitdir(self, dir):
388 def visitdir(self, dir):
392 if self._prefix and dir in self._fileset:
389 if self._prefix and dir in self._fileset:
393 return 'all'
390 return 'all'
394 return ('.' in self._fileset or
391 return ('.' in self._fileset or
395 dir in self._fileset or
392 dir in self._fileset or
396 dir in self._dirs or
393 dir in self._dirs or
397 any(parentdir in self._fileset
394 any(parentdir in self._fileset
398 for parentdir in util.finddirs(dir)))
395 for parentdir in util.finddirs(dir)))
399
396
400 def prefix(self):
397 def prefix(self):
401 return self._prefix
398 return self._prefix
402
399
403 def __repr__(self):
400 def __repr__(self):
404 return ('<patternmatcher patterns=%r>' % self._pats)
401 return ('<patternmatcher patterns=%r>' % self._pats)
405
402
406 class includematcher(basematcher):
403 class includematcher(basematcher):
407
404
408 def __init__(self, root, cwd, kindpats, ctx=None, listsubrepos=False,
405 def __init__(self, root, cwd, kindpats, ctx=None, listsubrepos=False,
409 badfn=None):
406 badfn=None):
410 super(includematcher, self).__init__(root, cwd, badfn)
407 super(includematcher, self).__init__(root, cwd, badfn)
411
408
412 self._pats, self.matchfn = _buildmatch(ctx, kindpats, '(?:/|$)',
409 self._pats, self.matchfn = _buildmatch(ctx, kindpats, '(?:/|$)',
413 listsubrepos, root)
410 listsubrepos, root)
414 self._prefix = _prefix(kindpats)
411 self._prefix = _prefix(kindpats)
415 roots, dirs = _rootsanddirs(kindpats)
412 roots, dirs = _rootsanddirs(kindpats)
416 # roots are directories which are recursively included.
413 # roots are directories which are recursively included.
417 self._roots = set(roots)
414 self._roots = set(roots)
418 # dirs are directories which are non-recursively included.
415 # dirs are directories which are non-recursively included.
419 self._dirs = set(dirs)
416 self._dirs = set(dirs)
420
417
421 def visitdir(self, dir):
418 def visitdir(self, dir):
422 if self._prefix and dir in self._roots:
419 if self._prefix and dir in self._roots:
423 return 'all'
420 return 'all'
424 return ('.' in self._roots or
421 return ('.' in self._roots or
425 dir in self._roots or
422 dir in self._roots or
426 dir in self._dirs or
423 dir in self._dirs or
427 any(parentdir in self._roots
424 any(parentdir in self._roots
428 for parentdir in util.finddirs(dir)))
425 for parentdir in util.finddirs(dir)))
429
426
430 def __repr__(self):
427 def __repr__(self):
431 return ('<includematcher includes=%r>' % self._pats)
428 return ('<includematcher includes=%r>' % self._pats)
432
429
433 class exactmatcher(basematcher):
430 class exactmatcher(basematcher):
434 '''Matches the input files exactly. They are interpreted as paths, not
431 '''Matches the input files exactly. They are interpreted as paths, not
435 patterns (so no kind-prefixes).
432 patterns (so no kind-prefixes).
436 '''
433 '''
437
434
438 def __init__(self, root, cwd, files, badfn=None):
435 def __init__(self, root, cwd, files, badfn=None):
439 super(exactmatcher, self).__init__(root, cwd, badfn)
436 super(exactmatcher, self).__init__(root, cwd, badfn)
440
437
441 if isinstance(files, list):
438 if isinstance(files, list):
442 self._files = files
439 self._files = files
443 else:
440 else:
444 self._files = list(files)
441 self._files = list(files)
445
442
446 matchfn = basematcher.exact
443 matchfn = basematcher.exact
447
444
448 @propertycache
445 @propertycache
449 def _dirs(self):
446 def _dirs(self):
450 return set(util.dirs(self._fileset)) | {'.'}
447 return set(util.dirs(self._fileset)) | {'.'}
451
448
452 def visitdir(self, dir):
449 def visitdir(self, dir):
453 return dir in self._dirs
450 return dir in self._dirs
454
451
455 def isexact(self):
452 def isexact(self):
456 return True
453 return True
457
454
458 def __repr__(self):
455 def __repr__(self):
459 return ('<exactmatcher files=%r>' % self._files)
456 return ('<exactmatcher files=%r>' % self._files)
460
457
461 class differencematcher(basematcher):
458 class differencematcher(basematcher):
462 '''Composes two matchers by matching if the first matches and the second
459 '''Composes two matchers by matching if the first matches and the second
463 does not. Well, almost... If the user provides a pattern like "-X foo foo",
460 does not. Well, almost... If the user provides a pattern like "-X foo foo",
464 Mercurial actually does match "foo" against that. That's because exact
461 Mercurial actually does match "foo" against that. That's because exact
465 matches are treated specially. So, since this differencematcher is used for
462 matches are treated specially. So, since this differencematcher is used for
466 excludes, it needs to special-case exact matching.
463 excludes, it needs to special-case exact matching.
467
464
468 The second matcher's non-matching-attributes (root, cwd, bad, explicitdir,
465 The second matcher's non-matching-attributes (root, cwd, bad, explicitdir,
469 traversedir) are ignored.
466 traversedir) are ignored.
470
467
471 TODO: If we want to keep the behavior described above for exact matches, we
468 TODO: If we want to keep the behavior described above for exact matches, we
472 should consider instead treating the above case something like this:
469 should consider instead treating the above case something like this:
473 union(exact(foo), difference(pattern(foo), include(foo)))
470 union(exact(foo), difference(pattern(foo), include(foo)))
474 '''
471 '''
475 def __init__(self, m1, m2):
472 def __init__(self, m1, m2):
476 super(differencematcher, self).__init__(m1._root, m1._cwd)
473 super(differencematcher, self).__init__(m1._root, m1._cwd)
477 self._m1 = m1
474 self._m1 = m1
478 self._m2 = m2
475 self._m2 = m2
479 self.bad = m1.bad
476 self.bad = m1.bad
480 self.explicitdir = m1.explicitdir
477 self.explicitdir = m1.explicitdir
481 self.traversedir = m1.traversedir
478 self.traversedir = m1.traversedir
482
479
483 def matchfn(self, f):
480 def matchfn(self, f):
484 return self._m1(f) and (not self._m2(f) or self._m1.exact(f))
481 return self._m1(f) and (not self._m2(f) or self._m1.exact(f))
485
482
486 @propertycache
483 @propertycache
487 def _files(self):
484 def _files(self):
488 if self.isexact():
485 if self.isexact():
489 return [f for f in self._m1.files() if self(f)]
486 return [f for f in self._m1.files() if self(f)]
490 # If m1 is not an exact matcher, we can't easily figure out the set of
487 # If m1 is not an exact matcher, we can't easily figure out the set of
491 # files, because its files() are not always files. For example, if
488 # files, because its files() are not always files. For example, if
492 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
489 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
493 # want to remove "dir" from the set even though it would match m2,
490 # want to remove "dir" from the set even though it would match m2,
494 # because the "dir" in m1 may not be a file.
491 # because the "dir" in m1 may not be a file.
495 return self._m1.files()
492 return self._m1.files()
496
493
497 def visitdir(self, dir):
494 def visitdir(self, dir):
498 if self._m2.visitdir(dir) == 'all':
495 if self._m2.visitdir(dir) == 'all':
499 # There's a bug here: If m1 matches file 'dir/file' and m2 excludes
496 # There's a bug here: If m1 matches file 'dir/file' and m2 excludes
500 # 'dir' (recursively), we should still visit 'dir' due to the
497 # 'dir' (recursively), we should still visit 'dir' due to the
501 # exception we have for exact matches.
498 # exception we have for exact matches.
502 return False
499 return False
503 return bool(self._m1.visitdir(dir))
500 return bool(self._m1.visitdir(dir))
504
501
505 def isexact(self):
502 def isexact(self):
506 return self._m1.isexact()
503 return self._m1.isexact()
507
504
508 def __repr__(self):
505 def __repr__(self):
509 return ('<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2))
506 return ('<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2))
510
507
511 def intersectmatchers(m1, m2):
508 def intersectmatchers(m1, m2):
512 '''Composes two matchers by matching if both of them match.
509 '''Composes two matchers by matching if both of them match.
513
510
514 The second matcher's non-matching-attributes (root, cwd, bad, explicitdir,
511 The second matcher's non-matching-attributes (root, cwd, bad, explicitdir,
515 traversedir) are ignored.
512 traversedir) are ignored.
516 '''
513 '''
517 if m1 is None or m2 is None:
514 if m1 is None or m2 is None:
518 return m1 or m2
515 return m1 or m2
519 if m1.always():
516 if m1.always():
520 m = copy.copy(m2)
517 m = copy.copy(m2)
521 # TODO: Consider encapsulating these things in a class so there's only
518 # TODO: Consider encapsulating these things in a class so there's only
522 # one thing to copy from m1.
519 # one thing to copy from m1.
523 m.bad = m1.bad
520 m.bad = m1.bad
524 m.explicitdir = m1.explicitdir
521 m.explicitdir = m1.explicitdir
525 m.traversedir = m1.traversedir
522 m.traversedir = m1.traversedir
526 m.abs = m1.abs
523 m.abs = m1.abs
527 m.rel = m1.rel
524 m.rel = m1.rel
528 m._relativeuipath |= m1._relativeuipath
525 m._relativeuipath |= m1._relativeuipath
529 return m
526 return m
530 if m2.always():
527 if m2.always():
531 m = copy.copy(m1)
528 m = copy.copy(m1)
532 m._relativeuipath |= m2._relativeuipath
529 m._relativeuipath |= m2._relativeuipath
533 return m
530 return m
534 return intersectionmatcher(m1, m2)
531 return intersectionmatcher(m1, m2)
535
532
536 class intersectionmatcher(basematcher):
533 class intersectionmatcher(basematcher):
537 def __init__(self, m1, m2):
534 def __init__(self, m1, m2):
538 super(intersectionmatcher, self).__init__(m1._root, m1._cwd)
535 super(intersectionmatcher, self).__init__(m1._root, m1._cwd)
539 self._m1 = m1
536 self._m1 = m1
540 self._m2 = m2
537 self._m2 = m2
541 self.bad = m1.bad
538 self.bad = m1.bad
542 self.explicitdir = m1.explicitdir
539 self.explicitdir = m1.explicitdir
543 self.traversedir = m1.traversedir
540 self.traversedir = m1.traversedir
544
541
545 @propertycache
542 @propertycache
546 def _files(self):
543 def _files(self):
547 if self.isexact():
544 if self.isexact():
548 m1, m2 = self._m1, self._m2
545 m1, m2 = self._m1, self._m2
549 if not m1.isexact():
546 if not m1.isexact():
550 m1, m2 = m2, m1
547 m1, m2 = m2, m1
551 return [f for f in m1.files() if m2(f)]
548 return [f for f in m1.files() if m2(f)]
552 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
549 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
553 # the set of files, because their files() are not always files. For
550 # the set of files, because their files() are not always files. For
554 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
551 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
555 # "path:dir2", we don't want to remove "dir2" from the set.
552 # "path:dir2", we don't want to remove "dir2" from the set.
556 return self._m1.files() + self._m2.files()
553 return self._m1.files() + self._m2.files()
557
554
558 def matchfn(self, f):
555 def matchfn(self, f):
559 return self._m1(f) and self._m2(f)
556 return self._m1(f) and self._m2(f)
560
557
561 def visitdir(self, dir):
558 def visitdir(self, dir):
562 visit1 = self._m1.visitdir(dir)
559 visit1 = self._m1.visitdir(dir)
563 if visit1 == 'all':
560 if visit1 == 'all':
564 return self._m2.visitdir(dir)
561 return self._m2.visitdir(dir)
565 # bool() because visit1=True + visit2='all' should not be 'all'
562 # bool() because visit1=True + visit2='all' should not be 'all'
566 return bool(visit1 and self._m2.visitdir(dir))
563 return bool(visit1 and self._m2.visitdir(dir))
567
564
568 def always(self):
565 def always(self):
569 return self._m1.always() and self._m2.always()
566 return self._m1.always() and self._m2.always()
570
567
571 def isexact(self):
568 def isexact(self):
572 return self._m1.isexact() or self._m2.isexact()
569 return self._m1.isexact() or self._m2.isexact()
573
570
574 def __repr__(self):
571 def __repr__(self):
575 return ('<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2))
572 return ('<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2))
576
573
577 class subdirmatcher(basematcher):
574 class subdirmatcher(basematcher):
578 """Adapt a matcher to work on a subdirectory only.
575 """Adapt a matcher to work on a subdirectory only.
579
576
580 The paths are remapped to remove/insert the path as needed:
577 The paths are remapped to remove/insert the path as needed:
581
578
582 >>> from . import pycompat
579 >>> from . import pycompat
583 >>> m1 = match(b'root', b'', [b'a.txt', b'sub/b.txt'])
580 >>> m1 = match(b'root', b'', [b'a.txt', b'sub/b.txt'])
584 >>> m2 = subdirmatcher(b'sub', m1)
581 >>> m2 = subdirmatcher(b'sub', m1)
585 >>> bool(m2(b'a.txt'))
582 >>> bool(m2(b'a.txt'))
586 False
583 False
587 >>> bool(m2(b'b.txt'))
584 >>> bool(m2(b'b.txt'))
588 True
585 True
589 >>> bool(m2.matchfn(b'a.txt'))
586 >>> bool(m2.matchfn(b'a.txt'))
590 False
587 False
591 >>> bool(m2.matchfn(b'b.txt'))
588 >>> bool(m2.matchfn(b'b.txt'))
592 True
589 True
593 >>> m2.files()
590 >>> m2.files()
594 ['b.txt']
591 ['b.txt']
595 >>> m2.exact(b'b.txt')
592 >>> m2.exact(b'b.txt')
596 True
593 True
597 >>> util.pconvert(m2.rel(b'b.txt'))
594 >>> util.pconvert(m2.rel(b'b.txt'))
598 'sub/b.txt'
595 'sub/b.txt'
599 >>> def bad(f, msg):
596 >>> def bad(f, msg):
600 ... print(pycompat.sysstr(b"%s: %s" % (f, msg)))
597 ... print(pycompat.sysstr(b"%s: %s" % (f, msg)))
601 >>> m1.bad = bad
598 >>> m1.bad = bad
602 >>> m2.bad(b'x.txt', b'No such file')
599 >>> m2.bad(b'x.txt', b'No such file')
603 sub/x.txt: No such file
600 sub/x.txt: No such file
604 >>> m2.abs(b'c.txt')
601 >>> m2.abs(b'c.txt')
605 'sub/c.txt'
602 'sub/c.txt'
606 """
603 """
607
604
608 def __init__(self, path, matcher):
605 def __init__(self, path, matcher):
609 super(subdirmatcher, self).__init__(matcher._root, matcher._cwd)
606 super(subdirmatcher, self).__init__(matcher._root, matcher._cwd)
610 self._path = path
607 self._path = path
611 self._matcher = matcher
608 self._matcher = matcher
612 self._always = matcher.always()
609 self._always = matcher.always()
613
610
614 self._files = [f[len(path) + 1:] for f in matcher._files
611 self._files = [f[len(path) + 1:] for f in matcher._files
615 if f.startswith(path + "/")]
612 if f.startswith(path + "/")]
616
613
617 # If the parent repo had a path to this subrepo and the matcher is
614 # If the parent repo had a path to this subrepo and the matcher is
618 # a prefix matcher, this submatcher always matches.
615 # a prefix matcher, this submatcher always matches.
619 if matcher.prefix():
616 if matcher.prefix():
620 self._always = any(f == path for f in matcher._files)
617 self._always = any(f == path for f in matcher._files)
621
618
622 def bad(self, f, msg):
619 def bad(self, f, msg):
623 self._matcher.bad(self._path + "/" + f, msg)
620 self._matcher.bad(self._path + "/" + f, msg)
624
621
625 def abs(self, f):
622 def abs(self, f):
626 return self._matcher.abs(self._path + "/" + f)
623 return self._matcher.abs(self._path + "/" + f)
627
624
628 def rel(self, f):
625 def rel(self, f):
629 return self._matcher.rel(self._path + "/" + f)
626 return self._matcher.rel(self._path + "/" + f)
630
627
631 def uipath(self, f):
628 def uipath(self, f):
632 return self._matcher.uipath(self._path + "/" + f)
629 return self._matcher.uipath(self._path + "/" + f)
633
630
634 def matchfn(self, f):
631 def matchfn(self, f):
635 # Some information is lost in the superclass's constructor, so we
632 # Some information is lost in the superclass's constructor, so we
636 # can not accurately create the matching function for the subdirectory
633 # can not accurately create the matching function for the subdirectory
637 # from the inputs. Instead, we override matchfn() and visitdir() to
634 # from the inputs. Instead, we override matchfn() and visitdir() to
638 # call the original matcher with the subdirectory path prepended.
635 # call the original matcher with the subdirectory path prepended.
639 return self._matcher.matchfn(self._path + "/" + f)
636 return self._matcher.matchfn(self._path + "/" + f)
640
637
641 def visitdir(self, dir):
638 def visitdir(self, dir):
642 if dir == '.':
639 if dir == '.':
643 dir = self._path
640 dir = self._path
644 else:
641 else:
645 dir = self._path + "/" + dir
642 dir = self._path + "/" + dir
646 return self._matcher.visitdir(dir)
643 return self._matcher.visitdir(dir)
647
644
648 def always(self):
645 def always(self):
649 return self._always
646 return self._always
650
647
651 def prefix(self):
648 def prefix(self):
652 return self._matcher.prefix() and not self._always
649 return self._matcher.prefix() and not self._always
653
650
654 def __repr__(self):
651 def __repr__(self):
655 return ('<subdirmatcher path=%r, matcher=%r>' %
652 return ('<subdirmatcher path=%r, matcher=%r>' %
656 (self._path, self._matcher))
653 (self._path, self._matcher))
657
654
658 class unionmatcher(basematcher):
655 class unionmatcher(basematcher):
659 """A matcher that is the union of several matchers.
656 """A matcher that is the union of several matchers.
660
657
661 The non-matching-attributes (root, cwd, bad, explicitdir, traversedir) are
658 The non-matching-attributes (root, cwd, bad, explicitdir, traversedir) are
662 taken from the first matcher.
659 taken from the first matcher.
663 """
660 """
664
661
665 def __init__(self, matchers):
662 def __init__(self, matchers):
666 m1 = matchers[0]
663 m1 = matchers[0]
667 super(unionmatcher, self).__init__(m1._root, m1._cwd)
664 super(unionmatcher, self).__init__(m1._root, m1._cwd)
668 self.explicitdir = m1.explicitdir
665 self.explicitdir = m1.explicitdir
669 self.traversedir = m1.traversedir
666 self.traversedir = m1.traversedir
670 self._matchers = matchers
667 self._matchers = matchers
671
668
672 def matchfn(self, f):
669 def matchfn(self, f):
673 for match in self._matchers:
670 for match in self._matchers:
674 if match(f):
671 if match(f):
675 return True
672 return True
676 return False
673 return False
677
674
678 def visitdir(self, dir):
675 def visitdir(self, dir):
679 r = False
676 r = False
680 for m in self._matchers:
677 for m in self._matchers:
681 v = m.visitdir(dir)
678 v = m.visitdir(dir)
682 if v == 'all':
679 if v == 'all':
683 return v
680 return v
684 r |= v
681 r |= v
685 return r
682 return r
686
683
687 def __repr__(self):
684 def __repr__(self):
688 return ('<unionmatcher matchers=%r>' % self._matchers)
685 return ('<unionmatcher matchers=%r>' % self._matchers)
689
686
690 def patkind(pattern, default=None):
687 def patkind(pattern, default=None):
691 '''If pattern is 'kind:pat' with a known kind, return kind.'''
688 '''If pattern is 'kind:pat' with a known kind, return kind.'''
692 return _patsplit(pattern, default)[0]
689 return _patsplit(pattern, default)[0]
693
690
694 def _patsplit(pattern, default):
691 def _patsplit(pattern, default):
695 """Split a string into the optional pattern kind prefix and the actual
692 """Split a string into the optional pattern kind prefix and the actual
696 pattern."""
693 pattern."""
697 if ':' in pattern:
694 if ':' in pattern:
698 kind, pat = pattern.split(':', 1)
695 kind, pat = pattern.split(':', 1)
699 if kind in allpatternkinds:
696 if kind in allpatternkinds:
700 return kind, pat
697 return kind, pat
701 return default, pattern
698 return default, pattern
702
699
703 def _globre(pat):
700 def _globre(pat):
704 r'''Convert an extended glob string to a regexp string.
701 r'''Convert an extended glob string to a regexp string.
705
702
706 >>> from . import pycompat
703 >>> from . import pycompat
707 >>> def bprint(s):
704 >>> def bprint(s):
708 ... print(pycompat.sysstr(s))
705 ... print(pycompat.sysstr(s))
709 >>> bprint(_globre(br'?'))
706 >>> bprint(_globre(br'?'))
710 .
707 .
711 >>> bprint(_globre(br'*'))
708 >>> bprint(_globre(br'*'))
712 [^/]*
709 [^/]*
713 >>> bprint(_globre(br'**'))
710 >>> bprint(_globre(br'**'))
714 .*
711 .*
715 >>> bprint(_globre(br'**/a'))
712 >>> bprint(_globre(br'**/a'))
716 (?:.*/)?a
713 (?:.*/)?a
717 >>> bprint(_globre(br'a/**/b'))
714 >>> bprint(_globre(br'a/**/b'))
718 a\/(?:.*/)?b
715 a\/(?:.*/)?b
719 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
716 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
720 [a*?!^][\^b][^c]
717 [a*?!^][\^b][^c]
721 >>> bprint(_globre(br'{a,b}'))
718 >>> bprint(_globre(br'{a,b}'))
722 (?:a|b)
719 (?:a|b)
723 >>> bprint(_globre(br'.\*\?'))
720 >>> bprint(_globre(br'.\*\?'))
724 \.\*\?
721 \.\*\?
725 '''
722 '''
726 i, n = 0, len(pat)
723 i, n = 0, len(pat)
727 res = ''
724 res = ''
728 group = 0
725 group = 0
729 escape = util.re.escape
726 escape = util.re.escape
730 def peek():
727 def peek():
731 return i < n and pat[i:i + 1]
728 return i < n and pat[i:i + 1]
732 while i < n:
729 while i < n:
733 c = pat[i:i + 1]
730 c = pat[i:i + 1]
734 i += 1
731 i += 1
735 if c not in '*?[{},\\':
732 if c not in '*?[{},\\':
736 res += escape(c)
733 res += escape(c)
737 elif c == '*':
734 elif c == '*':
738 if peek() == '*':
735 if peek() == '*':
739 i += 1
736 i += 1
740 if peek() == '/':
737 if peek() == '/':
741 i += 1
738 i += 1
742 res += '(?:.*/)?'
739 res += '(?:.*/)?'
743 else:
740 else:
744 res += '.*'
741 res += '.*'
745 else:
742 else:
746 res += '[^/]*'
743 res += '[^/]*'
747 elif c == '?':
744 elif c == '?':
748 res += '.'
745 res += '.'
749 elif c == '[':
746 elif c == '[':
750 j = i
747 j = i
751 if j < n and pat[j:j + 1] in '!]':
748 if j < n and pat[j:j + 1] in '!]':
752 j += 1
749 j += 1
753 while j < n and pat[j:j + 1] != ']':
750 while j < n and pat[j:j + 1] != ']':
754 j += 1
751 j += 1
755 if j >= n:
752 if j >= n:
756 res += '\\['
753 res += '\\['
757 else:
754 else:
758 stuff = pat[i:j].replace('\\','\\\\')
755 stuff = pat[i:j].replace('\\','\\\\')
759 i = j + 1
756 i = j + 1
760 if stuff[0:1] == '!':
757 if stuff[0:1] == '!':
761 stuff = '^' + stuff[1:]
758 stuff = '^' + stuff[1:]
762 elif stuff[0:1] == '^':
759 elif stuff[0:1] == '^':
763 stuff = '\\' + stuff
760 stuff = '\\' + stuff
764 res = '%s[%s]' % (res, stuff)
761 res = '%s[%s]' % (res, stuff)
765 elif c == '{':
762 elif c == '{':
766 group += 1
763 group += 1
767 res += '(?:'
764 res += '(?:'
768 elif c == '}' and group:
765 elif c == '}' and group:
769 res += ')'
766 res += ')'
770 group -= 1
767 group -= 1
771 elif c == ',' and group:
768 elif c == ',' and group:
772 res += '|'
769 res += '|'
773 elif c == '\\':
770 elif c == '\\':
774 p = peek()
771 p = peek()
775 if p:
772 if p:
776 i += 1
773 i += 1
777 res += escape(p)
774 res += escape(p)
778 else:
775 else:
779 res += escape(c)
776 res += escape(c)
780 else:
777 else:
781 res += escape(c)
778 res += escape(c)
782 return res
779 return res
783
780
784 def _regex(kind, pat, globsuffix):
781 def _regex(kind, pat, globsuffix):
785 '''Convert a (normalized) pattern of any kind into a regular expression.
782 '''Convert a (normalized) pattern of any kind into a regular expression.
786 globsuffix is appended to the regexp of globs.'''
783 globsuffix is appended to the regexp of globs.'''
787 if not pat:
784 if not pat:
788 return ''
785 return ''
789 if kind == 're':
786 if kind == 're':
790 return pat
787 return pat
791 if kind in ('path', 'relpath'):
788 if kind in ('path', 'relpath'):
792 if pat == '.':
789 if pat == '.':
793 return ''
790 return ''
794 return util.re.escape(pat) + '(?:/|$)'
791 return util.re.escape(pat) + '(?:/|$)'
795 if kind == 'rootfilesin':
792 if kind == 'rootfilesin':
796 if pat == '.':
793 if pat == '.':
797 escaped = ''
794 escaped = ''
798 else:
795 else:
799 # Pattern is a directory name.
796 # Pattern is a directory name.
800 escaped = util.re.escape(pat) + '/'
797 escaped = util.re.escape(pat) + '/'
801 # Anything after the pattern must be a non-directory.
798 # Anything after the pattern must be a non-directory.
802 return escaped + '[^/]+$'
799 return escaped + '[^/]+$'
803 if kind == 'relglob':
800 if kind == 'relglob':
804 return '(?:|.*/)' + _globre(pat) + globsuffix
801 return '(?:|.*/)' + _globre(pat) + globsuffix
805 if kind == 'relre':
802 if kind == 'relre':
806 if pat.startswith('^'):
803 if pat.startswith('^'):
807 return pat
804 return pat
808 return '.*' + pat
805 return '.*' + pat
809 return _globre(pat) + globsuffix
806 return _globre(pat) + globsuffix
810
807
811 def _buildmatch(ctx, kindpats, globsuffix, listsubrepos, root):
808 def _buildmatch(ctx, kindpats, globsuffix, listsubrepos, root):
812 '''Return regexp string and a matcher function for kindpats.
809 '''Return regexp string and a matcher function for kindpats.
813 globsuffix is appended to the regexp of globs.'''
810 globsuffix is appended to the regexp of globs.'''
814 matchfuncs = []
811 matchfuncs = []
815
812
816 subincludes, kindpats = _expandsubinclude(kindpats, root)
813 subincludes, kindpats = _expandsubinclude(kindpats, root)
817 if subincludes:
814 if subincludes:
818 submatchers = {}
815 submatchers = {}
819 def matchsubinclude(f):
816 def matchsubinclude(f):
820 for prefix, matcherargs in subincludes:
817 for prefix, matcherargs in subincludes:
821 if f.startswith(prefix):
818 if f.startswith(prefix):
822 mf = submatchers.get(prefix)
819 mf = submatchers.get(prefix)
823 if mf is None:
820 if mf is None:
824 mf = match(*matcherargs)
821 mf = match(*matcherargs)
825 submatchers[prefix] = mf
822 submatchers[prefix] = mf
826
823
827 if mf(f[len(prefix):]):
824 if mf(f[len(prefix):]):
828 return True
825 return True
829 return False
826 return False
830 matchfuncs.append(matchsubinclude)
827 matchfuncs.append(matchsubinclude)
831
828
832 fset, kindpats = _expandsets(kindpats, ctx, listsubrepos)
829 fset, kindpats = _expandsets(kindpats, ctx, listsubrepos)
833 if fset:
830 if fset:
834 matchfuncs.append(fset.__contains__)
831 matchfuncs.append(fset.__contains__)
835
832
836 regex = ''
833 regex = ''
837 if kindpats:
834 if kindpats:
838 regex, mf = _buildregexmatch(kindpats, globsuffix)
835 regex, mf = _buildregexmatch(kindpats, globsuffix)
839 matchfuncs.append(mf)
836 matchfuncs.append(mf)
840
837
841 if len(matchfuncs) == 1:
838 if len(matchfuncs) == 1:
842 return regex, matchfuncs[0]
839 return regex, matchfuncs[0]
843 else:
840 else:
844 return regex, lambda f: any(mf(f) for mf in matchfuncs)
841 return regex, lambda f: any(mf(f) for mf in matchfuncs)
845
842
846 def _buildregexmatch(kindpats, globsuffix):
843 def _buildregexmatch(kindpats, globsuffix):
847 """Build a match function from a list of kinds and kindpats,
844 """Build a match function from a list of kinds and kindpats,
848 return regexp string and a matcher function."""
845 return regexp string and a matcher function."""
849 try:
846 try:
850 regex = '(?:%s)' % '|'.join([_regex(k, p, globsuffix)
847 regex = '(?:%s)' % '|'.join([_regex(k, p, globsuffix)
851 for (k, p, s) in kindpats])
848 for (k, p, s) in kindpats])
852 if len(regex) > 20000:
849 if len(regex) > 20000:
853 raise OverflowError
850 raise OverflowError
854 return regex, _rematcher(regex)
851 return regex, _rematcher(regex)
855 except OverflowError:
852 except OverflowError:
856 # We're using a Python with a tiny regex engine and we
853 # We're using a Python with a tiny regex engine and we
857 # made it explode, so we'll divide the pattern list in two
854 # made it explode, so we'll divide the pattern list in two
858 # until it works
855 # until it works
859 l = len(kindpats)
856 l = len(kindpats)
860 if l < 2:
857 if l < 2:
861 raise
858 raise
862 regexa, a = _buildregexmatch(kindpats[:l//2], globsuffix)
859 regexa, a = _buildregexmatch(kindpats[:l//2], globsuffix)
863 regexb, b = _buildregexmatch(kindpats[l//2:], globsuffix)
860 regexb, b = _buildregexmatch(kindpats[l//2:], globsuffix)
864 return regex, lambda s: a(s) or b(s)
861 return regex, lambda s: a(s) or b(s)
865 except re.error:
862 except re.error:
866 for k, p, s in kindpats:
863 for k, p, s in kindpats:
867 try:
864 try:
868 _rematcher('(?:%s)' % _regex(k, p, globsuffix))
865 _rematcher('(?:%s)' % _regex(k, p, globsuffix))
869 except re.error:
866 except re.error:
870 if s:
867 if s:
871 raise error.Abort(_("%s: invalid pattern (%s): %s") %
868 raise error.Abort(_("%s: invalid pattern (%s): %s") %
872 (s, k, p))
869 (s, k, p))
873 else:
870 else:
874 raise error.Abort(_("invalid pattern (%s): %s") % (k, p))
871 raise error.Abort(_("invalid pattern (%s): %s") % (k, p))
875 raise error.Abort(_("invalid pattern"))
872 raise error.Abort(_("invalid pattern"))
876
873
877 def _patternrootsanddirs(kindpats):
874 def _patternrootsanddirs(kindpats):
878 '''Returns roots and directories corresponding to each pattern.
875 '''Returns roots and directories corresponding to each pattern.
879
876
880 This calculates the roots and directories exactly matching the patterns and
877 This calculates the roots and directories exactly matching the patterns and
881 returns a tuple of (roots, dirs) for each. It does not return other
878 returns a tuple of (roots, dirs) for each. It does not return other
882 directories which may also need to be considered, like the parent
879 directories which may also need to be considered, like the parent
883 directories.
880 directories.
884 '''
881 '''
885 r = []
882 r = []
886 d = []
883 d = []
887 for kind, pat, source in kindpats:
884 for kind, pat, source in kindpats:
888 if kind == 'glob': # find the non-glob prefix
885 if kind == 'glob': # find the non-glob prefix
889 root = []
886 root = []
890 for p in pat.split('/'):
887 for p in pat.split('/'):
891 if '[' in p or '{' in p or '*' in p or '?' in p:
888 if '[' in p or '{' in p or '*' in p or '?' in p:
892 break
889 break
893 root.append(p)
890 root.append(p)
894 r.append('/'.join(root) or '.')
891 r.append('/'.join(root) or '.')
895 elif kind in ('relpath', 'path'):
892 elif kind in ('relpath', 'path'):
896 r.append(pat or '.')
893 r.append(pat or '.')
897 elif kind in ('rootfilesin',):
894 elif kind in ('rootfilesin',):
898 d.append(pat or '.')
895 d.append(pat or '.')
899 else: # relglob, re, relre
896 else: # relglob, re, relre
900 r.append('.')
897 r.append('.')
901 return r, d
898 return r, d
902
899
903 def _roots(kindpats):
900 def _roots(kindpats):
904 '''Returns root directories to match recursively from the given patterns.'''
901 '''Returns root directories to match recursively from the given patterns.'''
905 roots, dirs = _patternrootsanddirs(kindpats)
902 roots, dirs = _patternrootsanddirs(kindpats)
906 return roots
903 return roots
907
904
908 def _rootsanddirs(kindpats):
905 def _rootsanddirs(kindpats):
909 '''Returns roots and exact directories from patterns.
906 '''Returns roots and exact directories from patterns.
910
907
911 roots are directories to match recursively, whereas exact directories should
908 roots are directories to match recursively, whereas exact directories should
912 be matched non-recursively. The returned (roots, dirs) tuple will also
909 be matched non-recursively. The returned (roots, dirs) tuple will also
913 include directories that need to be implicitly considered as either, such as
910 include directories that need to be implicitly considered as either, such as
914 parent directories.
911 parent directories.
915
912
916 >>> _rootsanddirs(
913 >>> _rootsanddirs(
917 ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
914 ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
918 ... (b'glob', b'g*', b'')])
915 ... (b'glob', b'g*', b'')])
919 (['g/h', 'g/h', '.'], ['g', '.'])
916 (['g/h', 'g/h', '.'], ['g', '.'])
920 >>> _rootsanddirs(
917 >>> _rootsanddirs(
921 ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
918 ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
922 ([], ['g/h', '.', 'g', '.'])
919 ([], ['g/h', '.', 'g', '.'])
923 >>> _rootsanddirs(
920 >>> _rootsanddirs(
924 ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
921 ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
925 ... (b'path', b'', b'')])
922 ... (b'path', b'', b'')])
926 (['r', 'p/p', '.'], ['p', '.'])
923 (['r', 'p/p', '.'], ['p', '.'])
927 >>> _rootsanddirs(
924 >>> _rootsanddirs(
928 ... [(b'relglob', b'rg*', b''), (b're', b're/', b''),
925 ... [(b'relglob', b'rg*', b''), (b're', b're/', b''),
929 ... (b'relre', b'rr', b'')])
926 ... (b'relre', b'rr', b'')])
930 (['.', '.', '.'], ['.'])
927 (['.', '.', '.'], ['.'])
931 '''
928 '''
932 r, d = _patternrootsanddirs(kindpats)
929 r, d = _patternrootsanddirs(kindpats)
933
930
934 # Append the parents as non-recursive/exact directories, since they must be
931 # Append the parents as non-recursive/exact directories, since they must be
935 # scanned to get to either the roots or the other exact directories.
932 # scanned to get to either the roots or the other exact directories.
936 d.extend(util.dirs(d))
933 d.extend(util.dirs(d))
937 d.extend(util.dirs(r))
934 d.extend(util.dirs(r))
938 # util.dirs() does not include the root directory, so add it manually
935 # util.dirs() does not include the root directory, so add it manually
939 d.append('.')
936 d.append('.')
940
937
941 return r, d
938 return r, d
942
939
943 def _explicitfiles(kindpats):
940 def _explicitfiles(kindpats):
944 '''Returns the potential explicit filenames from the patterns.
941 '''Returns the potential explicit filenames from the patterns.
945
942
946 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
943 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
947 ['foo/bar']
944 ['foo/bar']
948 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
945 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
949 []
946 []
950 '''
947 '''
951 # Keep only the pattern kinds where one can specify filenames (vs only
948 # Keep only the pattern kinds where one can specify filenames (vs only
952 # directory names).
949 # directory names).
953 filable = [kp for kp in kindpats if kp[0] not in ('rootfilesin',)]
950 filable = [kp for kp in kindpats if kp[0] not in ('rootfilesin',)]
954 return _roots(filable)
951 return _roots(filable)
955
952
956 def _prefix(kindpats):
953 def _prefix(kindpats):
957 '''Whether all the patterns match a prefix (i.e. recursively)'''
954 '''Whether all the patterns match a prefix (i.e. recursively)'''
958 for kind, pat, source in kindpats:
955 for kind, pat, source in kindpats:
959 if kind not in ('path', 'relpath'):
956 if kind not in ('path', 'relpath'):
960 return False
957 return False
961 return True
958 return True
962
959
963 _commentre = None
960 _commentre = None
964
961
965 def readpatternfile(filepath, warn, sourceinfo=False):
962 def readpatternfile(filepath, warn, sourceinfo=False):
966 '''parse a pattern file, returning a list of
963 '''parse a pattern file, returning a list of
967 patterns. These patterns should be given to compile()
964 patterns. These patterns should be given to compile()
968 to be validated and converted into a match function.
965 to be validated and converted into a match function.
969
966
970 trailing white space is dropped.
967 trailing white space is dropped.
971 the escape character is backslash.
968 the escape character is backslash.
972 comments start with #.
969 comments start with #.
973 empty lines are skipped.
970 empty lines are skipped.
974
971
975 lines can be of the following formats:
972 lines can be of the following formats:
976
973
977 syntax: regexp # defaults following lines to non-rooted regexps
974 syntax: regexp # defaults following lines to non-rooted regexps
978 syntax: glob # defaults following lines to non-rooted globs
975 syntax: glob # defaults following lines to non-rooted globs
979 re:pattern # non-rooted regular expression
976 re:pattern # non-rooted regular expression
980 glob:pattern # non-rooted glob
977 glob:pattern # non-rooted glob
981 pattern # pattern of the current default type
978 pattern # pattern of the current default type
982
979
983 if sourceinfo is set, returns a list of tuples:
980 if sourceinfo is set, returns a list of tuples:
984 (pattern, lineno, originalline). This is useful to debug ignore patterns.
981 (pattern, lineno, originalline). This is useful to debug ignore patterns.
985 '''
982 '''
986
983
987 syntaxes = {'re': 'relre:', 'regexp': 'relre:', 'glob': 'relglob:',
984 syntaxes = {'re': 'relre:', 'regexp': 'relre:', 'glob': 'relglob:',
988 'include': 'include', 'subinclude': 'subinclude'}
985 'include': 'include', 'subinclude': 'subinclude'}
989 syntax = 'relre:'
986 syntax = 'relre:'
990 patterns = []
987 patterns = []
991
988
992 fp = open(filepath, 'rb')
989 fp = open(filepath, 'rb')
993 for lineno, line in enumerate(util.iterfile(fp), start=1):
990 for lineno, line in enumerate(util.iterfile(fp), start=1):
994 if "#" in line:
991 if "#" in line:
995 global _commentre
992 global _commentre
996 if not _commentre:
993 if not _commentre:
997 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
994 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
998 # remove comments prefixed by an even number of escapes
995 # remove comments prefixed by an even number of escapes
999 m = _commentre.search(line)
996 m = _commentre.search(line)
1000 if m:
997 if m:
1001 line = line[:m.end(1)]
998 line = line[:m.end(1)]
1002 # fixup properly escaped comments that survived the above
999 # fixup properly escaped comments that survived the above
1003 line = line.replace("\\#", "#")
1000 line = line.replace("\\#", "#")
1004 line = line.rstrip()
1001 line = line.rstrip()
1005 if not line:
1002 if not line:
1006 continue
1003 continue
1007
1004
1008 if line.startswith('syntax:'):
1005 if line.startswith('syntax:'):
1009 s = line[7:].strip()
1006 s = line[7:].strip()
1010 try:
1007 try:
1011 syntax = syntaxes[s]
1008 syntax = syntaxes[s]
1012 except KeyError:
1009 except KeyError:
1013 if warn:
1010 if warn:
1014 warn(_("%s: ignoring invalid syntax '%s'\n") %
1011 warn(_("%s: ignoring invalid syntax '%s'\n") %
1015 (filepath, s))
1012 (filepath, s))
1016 continue
1013 continue
1017
1014
1018 linesyntax = syntax
1015 linesyntax = syntax
1019 for s, rels in syntaxes.iteritems():
1016 for s, rels in syntaxes.iteritems():
1020 if line.startswith(rels):
1017 if line.startswith(rels):
1021 linesyntax = rels
1018 linesyntax = rels
1022 line = line[len(rels):]
1019 line = line[len(rels):]
1023 break
1020 break
1024 elif line.startswith(s+':'):
1021 elif line.startswith(s+':'):
1025 linesyntax = rels
1022 linesyntax = rels
1026 line = line[len(s) + 1:]
1023 line = line[len(s) + 1:]
1027 break
1024 break
1028 if sourceinfo:
1025 if sourceinfo:
1029 patterns.append((linesyntax + line, lineno, line))
1026 patterns.append((linesyntax + line, lineno, line))
1030 else:
1027 else:
1031 patterns.append(linesyntax + line)
1028 patterns.append(linesyntax + line)
1032 fp.close()
1029 fp.close()
1033 return patterns
1030 return patterns
General Comments 0
You need to be logged in to leave comments. Login now