##// END OF EJS Templates
match: add basic wrapper for boolean function...
Yuya Nishihara -
r38596:76838305 default
parent child Browse files
Show More
@@ -1,1032 +1,1046
1 # match.py - filename matching
1 # match.py - filename matching
2 #
2 #
3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import, print_function
8 from __future__ import absolute_import, print_function
9
9
10 import copy
10 import copy
11 import os
11 import os
12 import re
12 import re
13
13
14 from .i18n import _
14 from .i18n import _
15 from . import (
15 from . import (
16 encoding,
16 encoding,
17 error,
17 error,
18 pathutil,
18 pathutil,
19 pycompat,
19 pycompat,
20 util,
20 util,
21 )
21 )
22 from .utils import (
22 from .utils import (
23 stringutil,
23 stringutil,
24 )
24 )
25
25
26 allpatternkinds = ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
26 allpatternkinds = ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
27 'listfile', 'listfile0', 'set', 'include', 'subinclude',
27 'listfile', 'listfile0', 'set', 'include', 'subinclude',
28 'rootfilesin')
28 'rootfilesin')
29 cwdrelativepatternkinds = ('relpath', 'glob')
29 cwdrelativepatternkinds = ('relpath', 'glob')
30
30
31 propertycache = util.propertycache
31 propertycache = util.propertycache
32
32
33 def _rematcher(regex):
33 def _rematcher(regex):
34 '''compile the regexp with the best available regexp engine and return a
34 '''compile the regexp with the best available regexp engine and return a
35 matcher function'''
35 matcher function'''
36 m = util.re.compile(regex)
36 m = util.re.compile(regex)
37 try:
37 try:
38 # slightly faster, provided by facebook's re2 bindings
38 # slightly faster, provided by facebook's re2 bindings
39 return m.test_match
39 return m.test_match
40 except AttributeError:
40 except AttributeError:
41 return m.match
41 return m.match
42
42
43 def _expandsets(kindpats, ctx, listsubrepos):
43 def _expandsets(kindpats, ctx, listsubrepos):
44 '''Returns the kindpats list with the 'set' patterns expanded.'''
44 '''Returns the kindpats list with the 'set' patterns expanded.'''
45 fset = set()
45 fset = set()
46 other = []
46 other = []
47
47
48 for kind, pat, source in kindpats:
48 for kind, pat, source in kindpats:
49 if kind == 'set':
49 if kind == 'set':
50 if not ctx:
50 if not ctx:
51 raise error.ProgrammingError("fileset expression with no "
51 raise error.ProgrammingError("fileset expression with no "
52 "context")
52 "context")
53 s = ctx.getfileset(pat)
53 s = ctx.getfileset(pat)
54 fset.update(s)
54 fset.update(s)
55
55
56 if listsubrepos:
56 if listsubrepos:
57 for subpath in ctx.substate:
57 for subpath in ctx.substate:
58 s = ctx.sub(subpath).getfileset(pat)
58 s = ctx.sub(subpath).getfileset(pat)
59 fset.update(subpath + '/' + f for f in s)
59 fset.update(subpath + '/' + f for f in s)
60
60
61 continue
61 continue
62 other.append((kind, pat, source))
62 other.append((kind, pat, source))
63 return fset, other
63 return fset, other
64
64
65 def _expandsubinclude(kindpats, root):
65 def _expandsubinclude(kindpats, root):
66 '''Returns the list of subinclude matcher args and the kindpats without the
66 '''Returns the list of subinclude matcher args and the kindpats without the
67 subincludes in it.'''
67 subincludes in it.'''
68 relmatchers = []
68 relmatchers = []
69 other = []
69 other = []
70
70
71 for kind, pat, source in kindpats:
71 for kind, pat, source in kindpats:
72 if kind == 'subinclude':
72 if kind == 'subinclude':
73 sourceroot = pathutil.dirname(util.normpath(source))
73 sourceroot = pathutil.dirname(util.normpath(source))
74 pat = util.pconvert(pat)
74 pat = util.pconvert(pat)
75 path = pathutil.join(sourceroot, pat)
75 path = pathutil.join(sourceroot, pat)
76
76
77 newroot = pathutil.dirname(path)
77 newroot = pathutil.dirname(path)
78 matcherargs = (newroot, '', [], ['include:%s' % path])
78 matcherargs = (newroot, '', [], ['include:%s' % path])
79
79
80 prefix = pathutil.canonpath(root, root, newroot)
80 prefix = pathutil.canonpath(root, root, newroot)
81 if prefix:
81 if prefix:
82 prefix += '/'
82 prefix += '/'
83 relmatchers.append((prefix, matcherargs))
83 relmatchers.append((prefix, matcherargs))
84 else:
84 else:
85 other.append((kind, pat, source))
85 other.append((kind, pat, source))
86
86
87 return relmatchers, other
87 return relmatchers, other
88
88
89 def _kindpatsalwaysmatch(kindpats):
89 def _kindpatsalwaysmatch(kindpats):
90 """"Checks whether the kindspats match everything, as e.g.
90 """"Checks whether the kindspats match everything, as e.g.
91 'relpath:.' does.
91 'relpath:.' does.
92 """
92 """
93 for kind, pat, source in kindpats:
93 for kind, pat, source in kindpats:
94 if pat != '' or kind not in ['relpath', 'glob']:
94 if pat != '' or kind not in ['relpath', 'glob']:
95 return False
95 return False
96 return True
96 return True
97
97
98 def match(root, cwd, patterns=None, include=None, exclude=None, default='glob',
98 def match(root, cwd, patterns=None, include=None, exclude=None, default='glob',
99 exact=False, auditor=None, ctx=None, listsubrepos=False, warn=None,
99 exact=False, auditor=None, ctx=None, listsubrepos=False, warn=None,
100 badfn=None, icasefs=False):
100 badfn=None, icasefs=False):
101 """build an object to match a set of file patterns
101 """build an object to match a set of file patterns
102
102
103 arguments:
103 arguments:
104 root - the canonical root of the tree you're matching against
104 root - the canonical root of the tree you're matching against
105 cwd - the current working directory, if relevant
105 cwd - the current working directory, if relevant
106 patterns - patterns to find
106 patterns - patterns to find
107 include - patterns to include (unless they are excluded)
107 include - patterns to include (unless they are excluded)
108 exclude - patterns to exclude (even if they are included)
108 exclude - patterns to exclude (even if they are included)
109 default - if a pattern in patterns has no explicit type, assume this one
109 default - if a pattern in patterns has no explicit type, assume this one
110 exact - patterns are actually filenames (include/exclude still apply)
110 exact - patterns are actually filenames (include/exclude still apply)
111 warn - optional function used for printing warnings
111 warn - optional function used for printing warnings
112 badfn - optional bad() callback for this matcher instead of the default
112 badfn - optional bad() callback for this matcher instead of the default
113 icasefs - make a matcher for wdir on case insensitive filesystems, which
113 icasefs - make a matcher for wdir on case insensitive filesystems, which
114 normalizes the given patterns to the case in the filesystem
114 normalizes the given patterns to the case in the filesystem
115
115
116 a pattern is one of:
116 a pattern is one of:
117 'glob:<glob>' - a glob relative to cwd
117 'glob:<glob>' - a glob relative to cwd
118 're:<regexp>' - a regular expression
118 're:<regexp>' - a regular expression
119 'path:<path>' - a path relative to repository root, which is matched
119 'path:<path>' - a path relative to repository root, which is matched
120 recursively
120 recursively
121 'rootfilesin:<path>' - a path relative to repository root, which is
121 'rootfilesin:<path>' - a path relative to repository root, which is
122 matched non-recursively (will not match subdirectories)
122 matched non-recursively (will not match subdirectories)
123 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
123 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
124 'relpath:<path>' - a path relative to cwd
124 'relpath:<path>' - a path relative to cwd
125 'relre:<regexp>' - a regexp that needn't match the start of a name
125 'relre:<regexp>' - a regexp that needn't match the start of a name
126 'set:<fileset>' - a fileset expression
126 'set:<fileset>' - a fileset expression
127 'include:<path>' - a file of patterns to read and include
127 'include:<path>' - a file of patterns to read and include
128 'subinclude:<path>' - a file of patterns to match against files under
128 'subinclude:<path>' - a file of patterns to match against files under
129 the same directory
129 the same directory
130 '<something>' - a pattern of the specified default type
130 '<something>' - a pattern of the specified default type
131 """
131 """
132 normalize = _donormalize
132 normalize = _donormalize
133 if icasefs:
133 if icasefs:
134 if exact:
134 if exact:
135 raise error.ProgrammingError("a case-insensitive exact matcher "
135 raise error.ProgrammingError("a case-insensitive exact matcher "
136 "doesn't make sense")
136 "doesn't make sense")
137 dirstate = ctx.repo().dirstate
137 dirstate = ctx.repo().dirstate
138 dsnormalize = dirstate.normalize
138 dsnormalize = dirstate.normalize
139
139
140 def normalize(patterns, default, root, cwd, auditor, warn):
140 def normalize(patterns, default, root, cwd, auditor, warn):
141 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
141 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
142 kindpats = []
142 kindpats = []
143 for kind, pats, source in kp:
143 for kind, pats, source in kp:
144 if kind not in ('re', 'relre'): # regex can't be normalized
144 if kind not in ('re', 'relre'): # regex can't be normalized
145 p = pats
145 p = pats
146 pats = dsnormalize(pats)
146 pats = dsnormalize(pats)
147
147
148 # Preserve the original to handle a case only rename.
148 # Preserve the original to handle a case only rename.
149 if p != pats and p in dirstate:
149 if p != pats and p in dirstate:
150 kindpats.append((kind, p, source))
150 kindpats.append((kind, p, source))
151
151
152 kindpats.append((kind, pats, source))
152 kindpats.append((kind, pats, source))
153 return kindpats
153 return kindpats
154
154
155 if exact:
155 if exact:
156 m = exactmatcher(root, cwd, patterns, badfn)
156 m = exactmatcher(root, cwd, patterns, badfn)
157 elif patterns:
157 elif patterns:
158 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
158 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
159 if _kindpatsalwaysmatch(kindpats):
159 if _kindpatsalwaysmatch(kindpats):
160 m = alwaysmatcher(root, cwd, badfn, relativeuipath=True)
160 m = alwaysmatcher(root, cwd, badfn, relativeuipath=True)
161 else:
161 else:
162 m = patternmatcher(root, cwd, kindpats, ctx=ctx,
162 m = patternmatcher(root, cwd, kindpats, ctx=ctx,
163 listsubrepos=listsubrepos, badfn=badfn)
163 listsubrepos=listsubrepos, badfn=badfn)
164 else:
164 else:
165 # It's a little strange that no patterns means to match everything.
165 # It's a little strange that no patterns means to match everything.
166 # Consider changing this to match nothing (probably using nevermatcher).
166 # Consider changing this to match nothing (probably using nevermatcher).
167 m = alwaysmatcher(root, cwd, badfn)
167 m = alwaysmatcher(root, cwd, badfn)
168
168
169 if include:
169 if include:
170 kindpats = normalize(include, 'glob', root, cwd, auditor, warn)
170 kindpats = normalize(include, 'glob', root, cwd, auditor, warn)
171 im = includematcher(root, cwd, kindpats, ctx=ctx,
171 im = includematcher(root, cwd, kindpats, ctx=ctx,
172 listsubrepos=listsubrepos, badfn=None)
172 listsubrepos=listsubrepos, badfn=None)
173 m = intersectmatchers(m, im)
173 m = intersectmatchers(m, im)
174 if exclude:
174 if exclude:
175 kindpats = normalize(exclude, 'glob', root, cwd, auditor, warn)
175 kindpats = normalize(exclude, 'glob', root, cwd, auditor, warn)
176 em = includematcher(root, cwd, kindpats, ctx=ctx,
176 em = includematcher(root, cwd, kindpats, ctx=ctx,
177 listsubrepos=listsubrepos, badfn=None)
177 listsubrepos=listsubrepos, badfn=None)
178 m = differencematcher(m, em)
178 m = differencematcher(m, em)
179 return m
179 return m
180
180
181 def exact(root, cwd, files, badfn=None):
181 def exact(root, cwd, files, badfn=None):
182 return exactmatcher(root, cwd, files, badfn=badfn)
182 return exactmatcher(root, cwd, files, badfn=badfn)
183
183
184 def always(root, cwd):
184 def always(root, cwd):
185 return alwaysmatcher(root, cwd)
185 return alwaysmatcher(root, cwd)
186
186
187 def never(root, cwd):
187 def never(root, cwd):
188 return nevermatcher(root, cwd)
188 return nevermatcher(root, cwd)
189
189
190 def badmatch(match, badfn):
190 def badmatch(match, badfn):
191 """Make a copy of the given matcher, replacing its bad method with the given
191 """Make a copy of the given matcher, replacing its bad method with the given
192 one.
192 one.
193 """
193 """
194 m = copy.copy(match)
194 m = copy.copy(match)
195 m.bad = badfn
195 m.bad = badfn
196 return m
196 return m
197
197
198 def _donormalize(patterns, default, root, cwd, auditor, warn):
198 def _donormalize(patterns, default, root, cwd, auditor, warn):
199 '''Convert 'kind:pat' from the patterns list to tuples with kind and
199 '''Convert 'kind:pat' from the patterns list to tuples with kind and
200 normalized and rooted patterns and with listfiles expanded.'''
200 normalized and rooted patterns and with listfiles expanded.'''
201 kindpats = []
201 kindpats = []
202 for kind, pat in [_patsplit(p, default) for p in patterns]:
202 for kind, pat in [_patsplit(p, default) for p in patterns]:
203 if kind in cwdrelativepatternkinds:
203 if kind in cwdrelativepatternkinds:
204 pat = pathutil.canonpath(root, cwd, pat, auditor)
204 pat = pathutil.canonpath(root, cwd, pat, auditor)
205 elif kind in ('relglob', 'path', 'rootfilesin'):
205 elif kind in ('relglob', 'path', 'rootfilesin'):
206 pat = util.normpath(pat)
206 pat = util.normpath(pat)
207 elif kind in ('listfile', 'listfile0'):
207 elif kind in ('listfile', 'listfile0'):
208 try:
208 try:
209 files = util.readfile(pat)
209 files = util.readfile(pat)
210 if kind == 'listfile0':
210 if kind == 'listfile0':
211 files = files.split('\0')
211 files = files.split('\0')
212 else:
212 else:
213 files = files.splitlines()
213 files = files.splitlines()
214 files = [f for f in files if f]
214 files = [f for f in files if f]
215 except EnvironmentError:
215 except EnvironmentError:
216 raise error.Abort(_("unable to read file list (%s)") % pat)
216 raise error.Abort(_("unable to read file list (%s)") % pat)
217 for k, p, source in _donormalize(files, default, root, cwd,
217 for k, p, source in _donormalize(files, default, root, cwd,
218 auditor, warn):
218 auditor, warn):
219 kindpats.append((k, p, pat))
219 kindpats.append((k, p, pat))
220 continue
220 continue
221 elif kind == 'include':
221 elif kind == 'include':
222 try:
222 try:
223 fullpath = os.path.join(root, util.localpath(pat))
223 fullpath = os.path.join(root, util.localpath(pat))
224 includepats = readpatternfile(fullpath, warn)
224 includepats = readpatternfile(fullpath, warn)
225 for k, p, source in _donormalize(includepats, default,
225 for k, p, source in _donormalize(includepats, default,
226 root, cwd, auditor, warn):
226 root, cwd, auditor, warn):
227 kindpats.append((k, p, source or pat))
227 kindpats.append((k, p, source or pat))
228 except error.Abort as inst:
228 except error.Abort as inst:
229 raise error.Abort('%s: %s' % (pat, inst[0]))
229 raise error.Abort('%s: %s' % (pat, inst[0]))
230 except IOError as inst:
230 except IOError as inst:
231 if warn:
231 if warn:
232 warn(_("skipping unreadable pattern file '%s': %s\n") %
232 warn(_("skipping unreadable pattern file '%s': %s\n") %
233 (pat, stringutil.forcebytestr(inst.strerror)))
233 (pat, stringutil.forcebytestr(inst.strerror)))
234 continue
234 continue
235 # else: re or relre - which cannot be normalized
235 # else: re or relre - which cannot be normalized
236 kindpats.append((kind, pat, ''))
236 kindpats.append((kind, pat, ''))
237 return kindpats
237 return kindpats
238
238
239 class basematcher(object):
239 class basematcher(object):
240
240
241 def __init__(self, root, cwd, badfn=None, relativeuipath=True):
241 def __init__(self, root, cwd, badfn=None, relativeuipath=True):
242 self._root = root
242 self._root = root
243 self._cwd = cwd
243 self._cwd = cwd
244 if badfn is not None:
244 if badfn is not None:
245 self.bad = badfn
245 self.bad = badfn
246 self._relativeuipath = relativeuipath
246 self._relativeuipath = relativeuipath
247
247
248 def __call__(self, fn):
248 def __call__(self, fn):
249 return self.matchfn(fn)
249 return self.matchfn(fn)
250 def __iter__(self):
250 def __iter__(self):
251 for f in self._files:
251 for f in self._files:
252 yield f
252 yield f
253 # Callbacks related to how the matcher is used by dirstate.walk.
253 # Callbacks related to how the matcher is used by dirstate.walk.
254 # Subscribers to these events must monkeypatch the matcher object.
254 # Subscribers to these events must monkeypatch the matcher object.
255 def bad(self, f, msg):
255 def bad(self, f, msg):
256 '''Callback from dirstate.walk for each explicit file that can't be
256 '''Callback from dirstate.walk for each explicit file that can't be
257 found/accessed, with an error message.'''
257 found/accessed, with an error message.'''
258
258
259 # If an explicitdir is set, it will be called when an explicitly listed
259 # If an explicitdir is set, it will be called when an explicitly listed
260 # directory is visited.
260 # directory is visited.
261 explicitdir = None
261 explicitdir = None
262
262
263 # If an traversedir is set, it will be called when a directory discovered
263 # If an traversedir is set, it will be called when a directory discovered
264 # by recursive traversal is visited.
264 # by recursive traversal is visited.
265 traversedir = None
265 traversedir = None
266
266
267 def abs(self, f):
267 def abs(self, f):
268 '''Convert a repo path back to path that is relative to the root of the
268 '''Convert a repo path back to path that is relative to the root of the
269 matcher.'''
269 matcher.'''
270 return f
270 return f
271
271
272 def rel(self, f):
272 def rel(self, f):
273 '''Convert repo path back to path that is relative to cwd of matcher.'''
273 '''Convert repo path back to path that is relative to cwd of matcher.'''
274 return util.pathto(self._root, self._cwd, f)
274 return util.pathto(self._root, self._cwd, f)
275
275
276 def uipath(self, f):
276 def uipath(self, f):
277 '''Convert repo path to a display path. If patterns or -I/-X were used
277 '''Convert repo path to a display path. If patterns or -I/-X were used
278 to create this matcher, the display path will be relative to cwd.
278 to create this matcher, the display path will be relative to cwd.
279 Otherwise it is relative to the root of the repo.'''
279 Otherwise it is relative to the root of the repo.'''
280 return (self._relativeuipath and self.rel(f)) or self.abs(f)
280 return (self._relativeuipath and self.rel(f)) or self.abs(f)
281
281
282 @propertycache
282 @propertycache
283 def _files(self):
283 def _files(self):
284 return []
284 return []
285
285
286 def files(self):
286 def files(self):
287 '''Explicitly listed files or patterns or roots:
287 '''Explicitly listed files or patterns or roots:
288 if no patterns or .always(): empty list,
288 if no patterns or .always(): empty list,
289 if exact: list exact files,
289 if exact: list exact files,
290 if not .anypats(): list all files and dirs,
290 if not .anypats(): list all files and dirs,
291 else: optimal roots'''
291 else: optimal roots'''
292 return self._files
292 return self._files
293
293
294 @propertycache
294 @propertycache
295 def _fileset(self):
295 def _fileset(self):
296 return set(self._files)
296 return set(self._files)
297
297
298 def exact(self, f):
298 def exact(self, f):
299 '''Returns True if f is in .files().'''
299 '''Returns True if f is in .files().'''
300 return f in self._fileset
300 return f in self._fileset
301
301
302 def matchfn(self, f):
302 def matchfn(self, f):
303 return False
303 return False
304
304
305 def visitdir(self, dir):
305 def visitdir(self, dir):
306 '''Decides whether a directory should be visited based on whether it
306 '''Decides whether a directory should be visited based on whether it
307 has potential matches in it or one of its subdirectories. This is
307 has potential matches in it or one of its subdirectories. This is
308 based on the match's primary, included, and excluded patterns.
308 based on the match's primary, included, and excluded patterns.
309
309
310 Returns the string 'all' if the given directory and all subdirectories
310 Returns the string 'all' if the given directory and all subdirectories
311 should be visited. Otherwise returns True or False indicating whether
311 should be visited. Otherwise returns True or False indicating whether
312 the given directory should be visited.
312 the given directory should be visited.
313 '''
313 '''
314 return True
314 return True
315
315
316 def always(self):
316 def always(self):
317 '''Matcher will match everything and .files() will be empty --
317 '''Matcher will match everything and .files() will be empty --
318 optimization might be possible.'''
318 optimization might be possible.'''
319 return False
319 return False
320
320
321 def isexact(self):
321 def isexact(self):
322 '''Matcher will match exactly the list of files in .files() --
322 '''Matcher will match exactly the list of files in .files() --
323 optimization might be possible.'''
323 optimization might be possible.'''
324 return False
324 return False
325
325
326 def prefix(self):
326 def prefix(self):
327 '''Matcher will match the paths in .files() recursively --
327 '''Matcher will match the paths in .files() recursively --
328 optimization might be possible.'''
328 optimization might be possible.'''
329 return False
329 return False
330
330
331 def anypats(self):
331 def anypats(self):
332 '''None of .always(), .isexact(), and .prefix() is true --
332 '''None of .always(), .isexact(), and .prefix() is true --
333 optimizations will be difficult.'''
333 optimizations will be difficult.'''
334 return not self.always() and not self.isexact() and not self.prefix()
334 return not self.always() and not self.isexact() and not self.prefix()
335
335
336 class alwaysmatcher(basematcher):
336 class alwaysmatcher(basematcher):
337 '''Matches everything.'''
337 '''Matches everything.'''
338
338
339 def __init__(self, root, cwd, badfn=None, relativeuipath=False):
339 def __init__(self, root, cwd, badfn=None, relativeuipath=False):
340 super(alwaysmatcher, self).__init__(root, cwd, badfn,
340 super(alwaysmatcher, self).__init__(root, cwd, badfn,
341 relativeuipath=relativeuipath)
341 relativeuipath=relativeuipath)
342
342
343 def always(self):
343 def always(self):
344 return True
344 return True
345
345
346 def matchfn(self, f):
346 def matchfn(self, f):
347 return True
347 return True
348
348
349 def visitdir(self, dir):
349 def visitdir(self, dir):
350 return 'all'
350 return 'all'
351
351
352 def __repr__(self):
352 def __repr__(self):
353 return r'<alwaysmatcher>'
353 return r'<alwaysmatcher>'
354
354
355 class nevermatcher(basematcher):
355 class nevermatcher(basematcher):
356 '''Matches nothing.'''
356 '''Matches nothing.'''
357
357
358 def __init__(self, root, cwd, badfn=None):
358 def __init__(self, root, cwd, badfn=None):
359 super(nevermatcher, self).__init__(root, cwd, badfn)
359 super(nevermatcher, self).__init__(root, cwd, badfn)
360
360
361 # It's a little weird to say that the nevermatcher is an exact matcher
361 # It's a little weird to say that the nevermatcher is an exact matcher
362 # or a prefix matcher, but it seems to make sense to let callers take
362 # or a prefix matcher, but it seems to make sense to let callers take
363 # fast paths based on either. There will be no exact matches, nor any
363 # fast paths based on either. There will be no exact matches, nor any
364 # prefixes (files() returns []), so fast paths iterating over them should
364 # prefixes (files() returns []), so fast paths iterating over them should
365 # be efficient (and correct).
365 # be efficient (and correct).
366 def isexact(self):
366 def isexact(self):
367 return True
367 return True
368
368
369 def prefix(self):
369 def prefix(self):
370 return True
370 return True
371
371
372 def visitdir(self, dir):
372 def visitdir(self, dir):
373 return False
373 return False
374
374
375 def __repr__(self):
375 def __repr__(self):
376 return r'<nevermatcher>'
376 return r'<nevermatcher>'
377
377
378 class predicatematcher(basematcher):
379 """A matcher adapter for a simple boolean function"""
380
381 def __init__(self, root, cwd, predfn, predrepr=None, badfn=None):
382 super(predicatematcher, self).__init__(root, cwd, badfn)
383 self.matchfn = predfn
384 self._predrepr = predrepr
385
386 @encoding.strmethod
387 def __repr__(self):
388 s = (stringutil.buildrepr(self._predrepr)
389 or pycompat.byterepr(self.matchfn))
390 return '<predicatenmatcher pred=%s>' % s
391
378 class patternmatcher(basematcher):
392 class patternmatcher(basematcher):
379
393
380 def __init__(self, root, cwd, kindpats, ctx=None, listsubrepos=False,
394 def __init__(self, root, cwd, kindpats, ctx=None, listsubrepos=False,
381 badfn=None):
395 badfn=None):
382 super(patternmatcher, self).__init__(root, cwd, badfn)
396 super(patternmatcher, self).__init__(root, cwd, badfn)
383
397
384 self._files = _explicitfiles(kindpats)
398 self._files = _explicitfiles(kindpats)
385 self._prefix = _prefix(kindpats)
399 self._prefix = _prefix(kindpats)
386 self._pats, self.matchfn = _buildmatch(ctx, kindpats, '$', listsubrepos,
400 self._pats, self.matchfn = _buildmatch(ctx, kindpats, '$', listsubrepos,
387 root)
401 root)
388
402
389 @propertycache
403 @propertycache
390 def _dirs(self):
404 def _dirs(self):
391 return set(util.dirs(self._fileset)) | {'.'}
405 return set(util.dirs(self._fileset)) | {'.'}
392
406
393 def visitdir(self, dir):
407 def visitdir(self, dir):
394 if self._prefix and dir in self._fileset:
408 if self._prefix and dir in self._fileset:
395 return 'all'
409 return 'all'
396 return ('.' in self._fileset or
410 return ('.' in self._fileset or
397 dir in self._fileset or
411 dir in self._fileset or
398 dir in self._dirs or
412 dir in self._dirs or
399 any(parentdir in self._fileset
413 any(parentdir in self._fileset
400 for parentdir in util.finddirs(dir)))
414 for parentdir in util.finddirs(dir)))
401
415
402 def prefix(self):
416 def prefix(self):
403 return self._prefix
417 return self._prefix
404
418
405 @encoding.strmethod
419 @encoding.strmethod
406 def __repr__(self):
420 def __repr__(self):
407 return ('<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats))
421 return ('<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats))
408
422
409 class includematcher(basematcher):
423 class includematcher(basematcher):
410
424
411 def __init__(self, root, cwd, kindpats, ctx=None, listsubrepos=False,
425 def __init__(self, root, cwd, kindpats, ctx=None, listsubrepos=False,
412 badfn=None):
426 badfn=None):
413 super(includematcher, self).__init__(root, cwd, badfn)
427 super(includematcher, self).__init__(root, cwd, badfn)
414
428
415 self._pats, self.matchfn = _buildmatch(ctx, kindpats, '(?:/|$)',
429 self._pats, self.matchfn = _buildmatch(ctx, kindpats, '(?:/|$)',
416 listsubrepos, root)
430 listsubrepos, root)
417 self._prefix = _prefix(kindpats)
431 self._prefix = _prefix(kindpats)
418 roots, dirs = _rootsanddirs(kindpats)
432 roots, dirs = _rootsanddirs(kindpats)
419 # roots are directories which are recursively included.
433 # roots are directories which are recursively included.
420 self._roots = set(roots)
434 self._roots = set(roots)
421 # dirs are directories which are non-recursively included.
435 # dirs are directories which are non-recursively included.
422 self._dirs = set(dirs)
436 self._dirs = set(dirs)
423
437
424 def visitdir(self, dir):
438 def visitdir(self, dir):
425 if self._prefix and dir in self._roots:
439 if self._prefix and dir in self._roots:
426 return 'all'
440 return 'all'
427 return ('.' in self._roots or
441 return ('.' in self._roots or
428 dir in self._roots or
442 dir in self._roots or
429 dir in self._dirs or
443 dir in self._dirs or
430 any(parentdir in self._roots
444 any(parentdir in self._roots
431 for parentdir in util.finddirs(dir)))
445 for parentdir in util.finddirs(dir)))
432
446
433 @encoding.strmethod
447 @encoding.strmethod
434 def __repr__(self):
448 def __repr__(self):
435 return ('<includematcher includes=%r>' % pycompat.bytestr(self._pats))
449 return ('<includematcher includes=%r>' % pycompat.bytestr(self._pats))
436
450
437 class exactmatcher(basematcher):
451 class exactmatcher(basematcher):
438 '''Matches the input files exactly. They are interpreted as paths, not
452 '''Matches the input files exactly. They are interpreted as paths, not
439 patterns (so no kind-prefixes).
453 patterns (so no kind-prefixes).
440 '''
454 '''
441
455
442 def __init__(self, root, cwd, files, badfn=None):
456 def __init__(self, root, cwd, files, badfn=None):
443 super(exactmatcher, self).__init__(root, cwd, badfn)
457 super(exactmatcher, self).__init__(root, cwd, badfn)
444
458
445 if isinstance(files, list):
459 if isinstance(files, list):
446 self._files = files
460 self._files = files
447 else:
461 else:
448 self._files = list(files)
462 self._files = list(files)
449
463
450 matchfn = basematcher.exact
464 matchfn = basematcher.exact
451
465
452 @propertycache
466 @propertycache
453 def _dirs(self):
467 def _dirs(self):
454 return set(util.dirs(self._fileset)) | {'.'}
468 return set(util.dirs(self._fileset)) | {'.'}
455
469
456 def visitdir(self, dir):
470 def visitdir(self, dir):
457 return dir in self._dirs
471 return dir in self._dirs
458
472
459 def isexact(self):
473 def isexact(self):
460 return True
474 return True
461
475
462 @encoding.strmethod
476 @encoding.strmethod
463 def __repr__(self):
477 def __repr__(self):
464 return ('<exactmatcher files=%r>' % self._files)
478 return ('<exactmatcher files=%r>' % self._files)
465
479
466 class differencematcher(basematcher):
480 class differencematcher(basematcher):
467 '''Composes two matchers by matching if the first matches and the second
481 '''Composes two matchers by matching if the first matches and the second
468 does not.
482 does not.
469
483
470 The second matcher's non-matching-attributes (root, cwd, bad, explicitdir,
484 The second matcher's non-matching-attributes (root, cwd, bad, explicitdir,
471 traversedir) are ignored.
485 traversedir) are ignored.
472 '''
486 '''
473 def __init__(self, m1, m2):
487 def __init__(self, m1, m2):
474 super(differencematcher, self).__init__(m1._root, m1._cwd)
488 super(differencematcher, self).__init__(m1._root, m1._cwd)
475 self._m1 = m1
489 self._m1 = m1
476 self._m2 = m2
490 self._m2 = m2
477 self.bad = m1.bad
491 self.bad = m1.bad
478 self.explicitdir = m1.explicitdir
492 self.explicitdir = m1.explicitdir
479 self.traversedir = m1.traversedir
493 self.traversedir = m1.traversedir
480
494
481 def matchfn(self, f):
495 def matchfn(self, f):
482 return self._m1(f) and not self._m2(f)
496 return self._m1(f) and not self._m2(f)
483
497
484 @propertycache
498 @propertycache
485 def _files(self):
499 def _files(self):
486 if self.isexact():
500 if self.isexact():
487 return [f for f in self._m1.files() if self(f)]
501 return [f for f in self._m1.files() if self(f)]
488 # If m1 is not an exact matcher, we can't easily figure out the set of
502 # If m1 is not an exact matcher, we can't easily figure out the set of
489 # files, because its files() are not always files. For example, if
503 # files, because its files() are not always files. For example, if
490 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
504 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
491 # want to remove "dir" from the set even though it would match m2,
505 # want to remove "dir" from the set even though it would match m2,
492 # because the "dir" in m1 may not be a file.
506 # because the "dir" in m1 may not be a file.
493 return self._m1.files()
507 return self._m1.files()
494
508
495 def visitdir(self, dir):
509 def visitdir(self, dir):
496 if self._m2.visitdir(dir) == 'all':
510 if self._m2.visitdir(dir) == 'all':
497 return False
511 return False
498 return bool(self._m1.visitdir(dir))
512 return bool(self._m1.visitdir(dir))
499
513
500 def isexact(self):
514 def isexact(self):
501 return self._m1.isexact()
515 return self._m1.isexact()
502
516
503 @encoding.strmethod
517 @encoding.strmethod
504 def __repr__(self):
518 def __repr__(self):
505 return ('<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2))
519 return ('<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2))
506
520
507 def intersectmatchers(m1, m2):
521 def intersectmatchers(m1, m2):
508 '''Composes two matchers by matching if both of them match.
522 '''Composes two matchers by matching if both of them match.
509
523
510 The second matcher's non-matching-attributes (root, cwd, bad, explicitdir,
524 The second matcher's non-matching-attributes (root, cwd, bad, explicitdir,
511 traversedir) are ignored.
525 traversedir) are ignored.
512 '''
526 '''
513 if m1 is None or m2 is None:
527 if m1 is None or m2 is None:
514 return m1 or m2
528 return m1 or m2
515 if m1.always():
529 if m1.always():
516 m = copy.copy(m2)
530 m = copy.copy(m2)
517 # TODO: Consider encapsulating these things in a class so there's only
531 # TODO: Consider encapsulating these things in a class so there's only
518 # one thing to copy from m1.
532 # one thing to copy from m1.
519 m.bad = m1.bad
533 m.bad = m1.bad
520 m.explicitdir = m1.explicitdir
534 m.explicitdir = m1.explicitdir
521 m.traversedir = m1.traversedir
535 m.traversedir = m1.traversedir
522 m.abs = m1.abs
536 m.abs = m1.abs
523 m.rel = m1.rel
537 m.rel = m1.rel
524 m._relativeuipath |= m1._relativeuipath
538 m._relativeuipath |= m1._relativeuipath
525 return m
539 return m
526 if m2.always():
540 if m2.always():
527 m = copy.copy(m1)
541 m = copy.copy(m1)
528 m._relativeuipath |= m2._relativeuipath
542 m._relativeuipath |= m2._relativeuipath
529 return m
543 return m
530 return intersectionmatcher(m1, m2)
544 return intersectionmatcher(m1, m2)
531
545
532 class intersectionmatcher(basematcher):
546 class intersectionmatcher(basematcher):
533 def __init__(self, m1, m2):
547 def __init__(self, m1, m2):
534 super(intersectionmatcher, self).__init__(m1._root, m1._cwd)
548 super(intersectionmatcher, self).__init__(m1._root, m1._cwd)
535 self._m1 = m1
549 self._m1 = m1
536 self._m2 = m2
550 self._m2 = m2
537 self.bad = m1.bad
551 self.bad = m1.bad
538 self.explicitdir = m1.explicitdir
552 self.explicitdir = m1.explicitdir
539 self.traversedir = m1.traversedir
553 self.traversedir = m1.traversedir
540
554
541 @propertycache
555 @propertycache
542 def _files(self):
556 def _files(self):
543 if self.isexact():
557 if self.isexact():
544 m1, m2 = self._m1, self._m2
558 m1, m2 = self._m1, self._m2
545 if not m1.isexact():
559 if not m1.isexact():
546 m1, m2 = m2, m1
560 m1, m2 = m2, m1
547 return [f for f in m1.files() if m2(f)]
561 return [f for f in m1.files() if m2(f)]
548 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
562 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
549 # the set of files, because their files() are not always files. For
563 # the set of files, because their files() are not always files. For
550 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
564 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
551 # "path:dir2", we don't want to remove "dir2" from the set.
565 # "path:dir2", we don't want to remove "dir2" from the set.
552 return self._m1.files() + self._m2.files()
566 return self._m1.files() + self._m2.files()
553
567
554 def matchfn(self, f):
568 def matchfn(self, f):
555 return self._m1(f) and self._m2(f)
569 return self._m1(f) and self._m2(f)
556
570
557 def visitdir(self, dir):
571 def visitdir(self, dir):
558 visit1 = self._m1.visitdir(dir)
572 visit1 = self._m1.visitdir(dir)
559 if visit1 == 'all':
573 if visit1 == 'all':
560 return self._m2.visitdir(dir)
574 return self._m2.visitdir(dir)
561 # bool() because visit1=True + visit2='all' should not be 'all'
575 # bool() because visit1=True + visit2='all' should not be 'all'
562 return bool(visit1 and self._m2.visitdir(dir))
576 return bool(visit1 and self._m2.visitdir(dir))
563
577
564 def always(self):
578 def always(self):
565 return self._m1.always() and self._m2.always()
579 return self._m1.always() and self._m2.always()
566
580
567 def isexact(self):
581 def isexact(self):
568 return self._m1.isexact() or self._m2.isexact()
582 return self._m1.isexact() or self._m2.isexact()
569
583
570 @encoding.strmethod
584 @encoding.strmethod
571 def __repr__(self):
585 def __repr__(self):
572 return ('<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2))
586 return ('<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2))
573
587
574 class subdirmatcher(basematcher):
588 class subdirmatcher(basematcher):
575 """Adapt a matcher to work on a subdirectory only.
589 """Adapt a matcher to work on a subdirectory only.
576
590
577 The paths are remapped to remove/insert the path as needed:
591 The paths are remapped to remove/insert the path as needed:
578
592
579 >>> from . import pycompat
593 >>> from . import pycompat
580 >>> m1 = match(b'root', b'', [b'a.txt', b'sub/b.txt'])
594 >>> m1 = match(b'root', b'', [b'a.txt', b'sub/b.txt'])
581 >>> m2 = subdirmatcher(b'sub', m1)
595 >>> m2 = subdirmatcher(b'sub', m1)
582 >>> bool(m2(b'a.txt'))
596 >>> bool(m2(b'a.txt'))
583 False
597 False
584 >>> bool(m2(b'b.txt'))
598 >>> bool(m2(b'b.txt'))
585 True
599 True
586 >>> bool(m2.matchfn(b'a.txt'))
600 >>> bool(m2.matchfn(b'a.txt'))
587 False
601 False
588 >>> bool(m2.matchfn(b'b.txt'))
602 >>> bool(m2.matchfn(b'b.txt'))
589 True
603 True
590 >>> m2.files()
604 >>> m2.files()
591 ['b.txt']
605 ['b.txt']
592 >>> m2.exact(b'b.txt')
606 >>> m2.exact(b'b.txt')
593 True
607 True
594 >>> util.pconvert(m2.rel(b'b.txt'))
608 >>> util.pconvert(m2.rel(b'b.txt'))
595 'sub/b.txt'
609 'sub/b.txt'
596 >>> def bad(f, msg):
610 >>> def bad(f, msg):
597 ... print(pycompat.sysstr(b"%s: %s" % (f, msg)))
611 ... print(pycompat.sysstr(b"%s: %s" % (f, msg)))
598 >>> m1.bad = bad
612 >>> m1.bad = bad
599 >>> m2.bad(b'x.txt', b'No such file')
613 >>> m2.bad(b'x.txt', b'No such file')
600 sub/x.txt: No such file
614 sub/x.txt: No such file
601 >>> m2.abs(b'c.txt')
615 >>> m2.abs(b'c.txt')
602 'sub/c.txt'
616 'sub/c.txt'
603 """
617 """
604
618
605 def __init__(self, path, matcher):
619 def __init__(self, path, matcher):
606 super(subdirmatcher, self).__init__(matcher._root, matcher._cwd)
620 super(subdirmatcher, self).__init__(matcher._root, matcher._cwd)
607 self._path = path
621 self._path = path
608 self._matcher = matcher
622 self._matcher = matcher
609 self._always = matcher.always()
623 self._always = matcher.always()
610
624
611 self._files = [f[len(path) + 1:] for f in matcher._files
625 self._files = [f[len(path) + 1:] for f in matcher._files
612 if f.startswith(path + "/")]
626 if f.startswith(path + "/")]
613
627
614 # If the parent repo had a path to this subrepo and the matcher is
628 # If the parent repo had a path to this subrepo and the matcher is
615 # a prefix matcher, this submatcher always matches.
629 # a prefix matcher, this submatcher always matches.
616 if matcher.prefix():
630 if matcher.prefix():
617 self._always = any(f == path for f in matcher._files)
631 self._always = any(f == path for f in matcher._files)
618
632
619 def bad(self, f, msg):
633 def bad(self, f, msg):
620 self._matcher.bad(self._path + "/" + f, msg)
634 self._matcher.bad(self._path + "/" + f, msg)
621
635
622 def abs(self, f):
636 def abs(self, f):
623 return self._matcher.abs(self._path + "/" + f)
637 return self._matcher.abs(self._path + "/" + f)
624
638
625 def rel(self, f):
639 def rel(self, f):
626 return self._matcher.rel(self._path + "/" + f)
640 return self._matcher.rel(self._path + "/" + f)
627
641
628 def uipath(self, f):
642 def uipath(self, f):
629 return self._matcher.uipath(self._path + "/" + f)
643 return self._matcher.uipath(self._path + "/" + f)
630
644
631 def matchfn(self, f):
645 def matchfn(self, f):
632 # Some information is lost in the superclass's constructor, so we
646 # Some information is lost in the superclass's constructor, so we
633 # can not accurately create the matching function for the subdirectory
647 # can not accurately create the matching function for the subdirectory
634 # from the inputs. Instead, we override matchfn() and visitdir() to
648 # from the inputs. Instead, we override matchfn() and visitdir() to
635 # call the original matcher with the subdirectory path prepended.
649 # call the original matcher with the subdirectory path prepended.
636 return self._matcher.matchfn(self._path + "/" + f)
650 return self._matcher.matchfn(self._path + "/" + f)
637
651
638 def visitdir(self, dir):
652 def visitdir(self, dir):
639 if dir == '.':
653 if dir == '.':
640 dir = self._path
654 dir = self._path
641 else:
655 else:
642 dir = self._path + "/" + dir
656 dir = self._path + "/" + dir
643 return self._matcher.visitdir(dir)
657 return self._matcher.visitdir(dir)
644
658
645 def always(self):
659 def always(self):
646 return self._always
660 return self._always
647
661
648 def prefix(self):
662 def prefix(self):
649 return self._matcher.prefix() and not self._always
663 return self._matcher.prefix() and not self._always
650
664
651 @encoding.strmethod
665 @encoding.strmethod
652 def __repr__(self):
666 def __repr__(self):
653 return ('<subdirmatcher path=%r, matcher=%r>' %
667 return ('<subdirmatcher path=%r, matcher=%r>' %
654 (self._path, self._matcher))
668 (self._path, self._matcher))
655
669
656 class unionmatcher(basematcher):
670 class unionmatcher(basematcher):
657 """A matcher that is the union of several matchers.
671 """A matcher that is the union of several matchers.
658
672
659 The non-matching-attributes (root, cwd, bad, explicitdir, traversedir) are
673 The non-matching-attributes (root, cwd, bad, explicitdir, traversedir) are
660 taken from the first matcher.
674 taken from the first matcher.
661 """
675 """
662
676
663 def __init__(self, matchers):
677 def __init__(self, matchers):
664 m1 = matchers[0]
678 m1 = matchers[0]
665 super(unionmatcher, self).__init__(m1._root, m1._cwd)
679 super(unionmatcher, self).__init__(m1._root, m1._cwd)
666 self.explicitdir = m1.explicitdir
680 self.explicitdir = m1.explicitdir
667 self.traversedir = m1.traversedir
681 self.traversedir = m1.traversedir
668 self._matchers = matchers
682 self._matchers = matchers
669
683
670 def matchfn(self, f):
684 def matchfn(self, f):
671 for match in self._matchers:
685 for match in self._matchers:
672 if match(f):
686 if match(f):
673 return True
687 return True
674 return False
688 return False
675
689
676 def visitdir(self, dir):
690 def visitdir(self, dir):
677 r = False
691 r = False
678 for m in self._matchers:
692 for m in self._matchers:
679 v = m.visitdir(dir)
693 v = m.visitdir(dir)
680 if v == 'all':
694 if v == 'all':
681 return v
695 return v
682 r |= v
696 r |= v
683 return r
697 return r
684
698
685 @encoding.strmethod
699 @encoding.strmethod
686 def __repr__(self):
700 def __repr__(self):
687 return ('<unionmatcher matchers=%r>' % self._matchers)
701 return ('<unionmatcher matchers=%r>' % self._matchers)
688
702
689 def patkind(pattern, default=None):
703 def patkind(pattern, default=None):
690 '''If pattern is 'kind:pat' with a known kind, return kind.'''
704 '''If pattern is 'kind:pat' with a known kind, return kind.'''
691 return _patsplit(pattern, default)[0]
705 return _patsplit(pattern, default)[0]
692
706
693 def _patsplit(pattern, default):
707 def _patsplit(pattern, default):
694 """Split a string into the optional pattern kind prefix and the actual
708 """Split a string into the optional pattern kind prefix and the actual
695 pattern."""
709 pattern."""
696 if ':' in pattern:
710 if ':' in pattern:
697 kind, pat = pattern.split(':', 1)
711 kind, pat = pattern.split(':', 1)
698 if kind in allpatternkinds:
712 if kind in allpatternkinds:
699 return kind, pat
713 return kind, pat
700 return default, pattern
714 return default, pattern
701
715
702 def _globre(pat):
716 def _globre(pat):
703 r'''Convert an extended glob string to a regexp string.
717 r'''Convert an extended glob string to a regexp string.
704
718
705 >>> from . import pycompat
719 >>> from . import pycompat
706 >>> def bprint(s):
720 >>> def bprint(s):
707 ... print(pycompat.sysstr(s))
721 ... print(pycompat.sysstr(s))
708 >>> bprint(_globre(br'?'))
722 >>> bprint(_globre(br'?'))
709 .
723 .
710 >>> bprint(_globre(br'*'))
724 >>> bprint(_globre(br'*'))
711 [^/]*
725 [^/]*
712 >>> bprint(_globre(br'**'))
726 >>> bprint(_globre(br'**'))
713 .*
727 .*
714 >>> bprint(_globre(br'**/a'))
728 >>> bprint(_globre(br'**/a'))
715 (?:.*/)?a
729 (?:.*/)?a
716 >>> bprint(_globre(br'a/**/b'))
730 >>> bprint(_globre(br'a/**/b'))
717 a/(?:.*/)?b
731 a/(?:.*/)?b
718 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
732 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
719 [a*?!^][\^b][^c]
733 [a*?!^][\^b][^c]
720 >>> bprint(_globre(br'{a,b}'))
734 >>> bprint(_globre(br'{a,b}'))
721 (?:a|b)
735 (?:a|b)
722 >>> bprint(_globre(br'.\*\?'))
736 >>> bprint(_globre(br'.\*\?'))
723 \.\*\?
737 \.\*\?
724 '''
738 '''
725 i, n = 0, len(pat)
739 i, n = 0, len(pat)
726 res = ''
740 res = ''
727 group = 0
741 group = 0
728 escape = util.stringutil.reescape
742 escape = util.stringutil.reescape
729 def peek():
743 def peek():
730 return i < n and pat[i:i + 1]
744 return i < n and pat[i:i + 1]
731 while i < n:
745 while i < n:
732 c = pat[i:i + 1]
746 c = pat[i:i + 1]
733 i += 1
747 i += 1
734 if c not in '*?[{},\\':
748 if c not in '*?[{},\\':
735 res += escape(c)
749 res += escape(c)
736 elif c == '*':
750 elif c == '*':
737 if peek() == '*':
751 if peek() == '*':
738 i += 1
752 i += 1
739 if peek() == '/':
753 if peek() == '/':
740 i += 1
754 i += 1
741 res += '(?:.*/)?'
755 res += '(?:.*/)?'
742 else:
756 else:
743 res += '.*'
757 res += '.*'
744 else:
758 else:
745 res += '[^/]*'
759 res += '[^/]*'
746 elif c == '?':
760 elif c == '?':
747 res += '.'
761 res += '.'
748 elif c == '[':
762 elif c == '[':
749 j = i
763 j = i
750 if j < n and pat[j:j + 1] in '!]':
764 if j < n and pat[j:j + 1] in '!]':
751 j += 1
765 j += 1
752 while j < n and pat[j:j + 1] != ']':
766 while j < n and pat[j:j + 1] != ']':
753 j += 1
767 j += 1
754 if j >= n:
768 if j >= n:
755 res += '\\['
769 res += '\\['
756 else:
770 else:
757 stuff = pat[i:j].replace('\\','\\\\')
771 stuff = pat[i:j].replace('\\','\\\\')
758 i = j + 1
772 i = j + 1
759 if stuff[0:1] == '!':
773 if stuff[0:1] == '!':
760 stuff = '^' + stuff[1:]
774 stuff = '^' + stuff[1:]
761 elif stuff[0:1] == '^':
775 elif stuff[0:1] == '^':
762 stuff = '\\' + stuff
776 stuff = '\\' + stuff
763 res = '%s[%s]' % (res, stuff)
777 res = '%s[%s]' % (res, stuff)
764 elif c == '{':
778 elif c == '{':
765 group += 1
779 group += 1
766 res += '(?:'
780 res += '(?:'
767 elif c == '}' and group:
781 elif c == '}' and group:
768 res += ')'
782 res += ')'
769 group -= 1
783 group -= 1
770 elif c == ',' and group:
784 elif c == ',' and group:
771 res += '|'
785 res += '|'
772 elif c == '\\':
786 elif c == '\\':
773 p = peek()
787 p = peek()
774 if p:
788 if p:
775 i += 1
789 i += 1
776 res += escape(p)
790 res += escape(p)
777 else:
791 else:
778 res += escape(c)
792 res += escape(c)
779 else:
793 else:
780 res += escape(c)
794 res += escape(c)
781 return res
795 return res
782
796
783 def _regex(kind, pat, globsuffix):
797 def _regex(kind, pat, globsuffix):
784 '''Convert a (normalized) pattern of any kind into a regular expression.
798 '''Convert a (normalized) pattern of any kind into a regular expression.
785 globsuffix is appended to the regexp of globs.'''
799 globsuffix is appended to the regexp of globs.'''
786 if not pat:
800 if not pat:
787 return ''
801 return ''
788 if kind == 're':
802 if kind == 're':
789 return pat
803 return pat
790 if kind in ('path', 'relpath'):
804 if kind in ('path', 'relpath'):
791 if pat == '.':
805 if pat == '.':
792 return ''
806 return ''
793 return util.stringutil.reescape(pat) + '(?:/|$)'
807 return util.stringutil.reescape(pat) + '(?:/|$)'
794 if kind == 'rootfilesin':
808 if kind == 'rootfilesin':
795 if pat == '.':
809 if pat == '.':
796 escaped = ''
810 escaped = ''
797 else:
811 else:
798 # Pattern is a directory name.
812 # Pattern is a directory name.
799 escaped = util.stringutil.reescape(pat) + '/'
813 escaped = util.stringutil.reescape(pat) + '/'
800 # Anything after the pattern must be a non-directory.
814 # Anything after the pattern must be a non-directory.
801 return escaped + '[^/]+$'
815 return escaped + '[^/]+$'
802 if kind == 'relglob':
816 if kind == 'relglob':
803 return '(?:|.*/)' + _globre(pat) + globsuffix
817 return '(?:|.*/)' + _globre(pat) + globsuffix
804 if kind == 'relre':
818 if kind == 'relre':
805 if pat.startswith('^'):
819 if pat.startswith('^'):
806 return pat
820 return pat
807 return '.*' + pat
821 return '.*' + pat
808 return _globre(pat) + globsuffix
822 return _globre(pat) + globsuffix
809
823
810 def _buildmatch(ctx, kindpats, globsuffix, listsubrepos, root):
824 def _buildmatch(ctx, kindpats, globsuffix, listsubrepos, root):
811 '''Return regexp string and a matcher function for kindpats.
825 '''Return regexp string and a matcher function for kindpats.
812 globsuffix is appended to the regexp of globs.'''
826 globsuffix is appended to the regexp of globs.'''
813 matchfuncs = []
827 matchfuncs = []
814
828
815 subincludes, kindpats = _expandsubinclude(kindpats, root)
829 subincludes, kindpats = _expandsubinclude(kindpats, root)
816 if subincludes:
830 if subincludes:
817 submatchers = {}
831 submatchers = {}
818 def matchsubinclude(f):
832 def matchsubinclude(f):
819 for prefix, matcherargs in subincludes:
833 for prefix, matcherargs in subincludes:
820 if f.startswith(prefix):
834 if f.startswith(prefix):
821 mf = submatchers.get(prefix)
835 mf = submatchers.get(prefix)
822 if mf is None:
836 if mf is None:
823 mf = match(*matcherargs)
837 mf = match(*matcherargs)
824 submatchers[prefix] = mf
838 submatchers[prefix] = mf
825
839
826 if mf(f[len(prefix):]):
840 if mf(f[len(prefix):]):
827 return True
841 return True
828 return False
842 return False
829 matchfuncs.append(matchsubinclude)
843 matchfuncs.append(matchsubinclude)
830
844
831 fset, kindpats = _expandsets(kindpats, ctx, listsubrepos)
845 fset, kindpats = _expandsets(kindpats, ctx, listsubrepos)
832 if fset:
846 if fset:
833 matchfuncs.append(fset.__contains__)
847 matchfuncs.append(fset.__contains__)
834
848
835 regex = ''
849 regex = ''
836 if kindpats:
850 if kindpats:
837 regex, mf = _buildregexmatch(kindpats, globsuffix)
851 regex, mf = _buildregexmatch(kindpats, globsuffix)
838 matchfuncs.append(mf)
852 matchfuncs.append(mf)
839
853
840 if len(matchfuncs) == 1:
854 if len(matchfuncs) == 1:
841 return regex, matchfuncs[0]
855 return regex, matchfuncs[0]
842 else:
856 else:
843 return regex, lambda f: any(mf(f) for mf in matchfuncs)
857 return regex, lambda f: any(mf(f) for mf in matchfuncs)
844
858
845 def _buildregexmatch(kindpats, globsuffix):
859 def _buildregexmatch(kindpats, globsuffix):
846 """Build a match function from a list of kinds and kindpats,
860 """Build a match function from a list of kinds and kindpats,
847 return regexp string and a matcher function."""
861 return regexp string and a matcher function."""
848 try:
862 try:
849 regex = '(?:%s)' % '|'.join([_regex(k, p, globsuffix)
863 regex = '(?:%s)' % '|'.join([_regex(k, p, globsuffix)
850 for (k, p, s) in kindpats])
864 for (k, p, s) in kindpats])
851 if len(regex) > 20000:
865 if len(regex) > 20000:
852 raise OverflowError
866 raise OverflowError
853 return regex, _rematcher(regex)
867 return regex, _rematcher(regex)
854 except OverflowError:
868 except OverflowError:
855 # We're using a Python with a tiny regex engine and we
869 # We're using a Python with a tiny regex engine and we
856 # made it explode, so we'll divide the pattern list in two
870 # made it explode, so we'll divide the pattern list in two
857 # until it works
871 # until it works
858 l = len(kindpats)
872 l = len(kindpats)
859 if l < 2:
873 if l < 2:
860 raise
874 raise
861 regexa, a = _buildregexmatch(kindpats[:l//2], globsuffix)
875 regexa, a = _buildregexmatch(kindpats[:l//2], globsuffix)
862 regexb, b = _buildregexmatch(kindpats[l//2:], globsuffix)
876 regexb, b = _buildregexmatch(kindpats[l//2:], globsuffix)
863 return regex, lambda s: a(s) or b(s)
877 return regex, lambda s: a(s) or b(s)
864 except re.error:
878 except re.error:
865 for k, p, s in kindpats:
879 for k, p, s in kindpats:
866 try:
880 try:
867 _rematcher('(?:%s)' % _regex(k, p, globsuffix))
881 _rematcher('(?:%s)' % _regex(k, p, globsuffix))
868 except re.error:
882 except re.error:
869 if s:
883 if s:
870 raise error.Abort(_("%s: invalid pattern (%s): %s") %
884 raise error.Abort(_("%s: invalid pattern (%s): %s") %
871 (s, k, p))
885 (s, k, p))
872 else:
886 else:
873 raise error.Abort(_("invalid pattern (%s): %s") % (k, p))
887 raise error.Abort(_("invalid pattern (%s): %s") % (k, p))
874 raise error.Abort(_("invalid pattern"))
888 raise error.Abort(_("invalid pattern"))
875
889
876 def _patternrootsanddirs(kindpats):
890 def _patternrootsanddirs(kindpats):
877 '''Returns roots and directories corresponding to each pattern.
891 '''Returns roots and directories corresponding to each pattern.
878
892
879 This calculates the roots and directories exactly matching the patterns and
893 This calculates the roots and directories exactly matching the patterns and
880 returns a tuple of (roots, dirs) for each. It does not return other
894 returns a tuple of (roots, dirs) for each. It does not return other
881 directories which may also need to be considered, like the parent
895 directories which may also need to be considered, like the parent
882 directories.
896 directories.
883 '''
897 '''
884 r = []
898 r = []
885 d = []
899 d = []
886 for kind, pat, source in kindpats:
900 for kind, pat, source in kindpats:
887 if kind == 'glob': # find the non-glob prefix
901 if kind == 'glob': # find the non-glob prefix
888 root = []
902 root = []
889 for p in pat.split('/'):
903 for p in pat.split('/'):
890 if '[' in p or '{' in p or '*' in p or '?' in p:
904 if '[' in p or '{' in p or '*' in p or '?' in p:
891 break
905 break
892 root.append(p)
906 root.append(p)
893 r.append('/'.join(root) or '.')
907 r.append('/'.join(root) or '.')
894 elif kind in ('relpath', 'path'):
908 elif kind in ('relpath', 'path'):
895 r.append(pat or '.')
909 r.append(pat or '.')
896 elif kind in ('rootfilesin',):
910 elif kind in ('rootfilesin',):
897 d.append(pat or '.')
911 d.append(pat or '.')
898 else: # relglob, re, relre
912 else: # relglob, re, relre
899 r.append('.')
913 r.append('.')
900 return r, d
914 return r, d
901
915
902 def _roots(kindpats):
916 def _roots(kindpats):
903 '''Returns root directories to match recursively from the given patterns.'''
917 '''Returns root directories to match recursively from the given patterns.'''
904 roots, dirs = _patternrootsanddirs(kindpats)
918 roots, dirs = _patternrootsanddirs(kindpats)
905 return roots
919 return roots
906
920
907 def _rootsanddirs(kindpats):
921 def _rootsanddirs(kindpats):
908 '''Returns roots and exact directories from patterns.
922 '''Returns roots and exact directories from patterns.
909
923
910 roots are directories to match recursively, whereas exact directories should
924 roots are directories to match recursively, whereas exact directories should
911 be matched non-recursively. The returned (roots, dirs) tuple will also
925 be matched non-recursively. The returned (roots, dirs) tuple will also
912 include directories that need to be implicitly considered as either, such as
926 include directories that need to be implicitly considered as either, such as
913 parent directories.
927 parent directories.
914
928
915 >>> _rootsanddirs(
929 >>> _rootsanddirs(
916 ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
930 ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
917 ... (b'glob', b'g*', b'')])
931 ... (b'glob', b'g*', b'')])
918 (['g/h', 'g/h', '.'], ['g', '.'])
932 (['g/h', 'g/h', '.'], ['g', '.'])
919 >>> _rootsanddirs(
933 >>> _rootsanddirs(
920 ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
934 ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
921 ([], ['g/h', '.', 'g', '.'])
935 ([], ['g/h', '.', 'g', '.'])
922 >>> _rootsanddirs(
936 >>> _rootsanddirs(
923 ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
937 ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
924 ... (b'path', b'', b'')])
938 ... (b'path', b'', b'')])
925 (['r', 'p/p', '.'], ['p', '.'])
939 (['r', 'p/p', '.'], ['p', '.'])
926 >>> _rootsanddirs(
940 >>> _rootsanddirs(
927 ... [(b'relglob', b'rg*', b''), (b're', b're/', b''),
941 ... [(b'relglob', b'rg*', b''), (b're', b're/', b''),
928 ... (b'relre', b'rr', b'')])
942 ... (b'relre', b'rr', b'')])
929 (['.', '.', '.'], ['.'])
943 (['.', '.', '.'], ['.'])
930 '''
944 '''
931 r, d = _patternrootsanddirs(kindpats)
945 r, d = _patternrootsanddirs(kindpats)
932
946
933 # Append the parents as non-recursive/exact directories, since they must be
947 # Append the parents as non-recursive/exact directories, since they must be
934 # scanned to get to either the roots or the other exact directories.
948 # scanned to get to either the roots or the other exact directories.
935 d.extend(util.dirs(d))
949 d.extend(util.dirs(d))
936 d.extend(util.dirs(r))
950 d.extend(util.dirs(r))
937 # util.dirs() does not include the root directory, so add it manually
951 # util.dirs() does not include the root directory, so add it manually
938 d.append('.')
952 d.append('.')
939
953
940 return r, d
954 return r, d
941
955
942 def _explicitfiles(kindpats):
956 def _explicitfiles(kindpats):
943 '''Returns the potential explicit filenames from the patterns.
957 '''Returns the potential explicit filenames from the patterns.
944
958
945 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
959 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
946 ['foo/bar']
960 ['foo/bar']
947 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
961 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
948 []
962 []
949 '''
963 '''
950 # Keep only the pattern kinds where one can specify filenames (vs only
964 # Keep only the pattern kinds where one can specify filenames (vs only
951 # directory names).
965 # directory names).
952 filable = [kp for kp in kindpats if kp[0] not in ('rootfilesin',)]
966 filable = [kp for kp in kindpats if kp[0] not in ('rootfilesin',)]
953 return _roots(filable)
967 return _roots(filable)
954
968
955 def _prefix(kindpats):
969 def _prefix(kindpats):
956 '''Whether all the patterns match a prefix (i.e. recursively)'''
970 '''Whether all the patterns match a prefix (i.e. recursively)'''
957 for kind, pat, source in kindpats:
971 for kind, pat, source in kindpats:
958 if kind not in ('path', 'relpath'):
972 if kind not in ('path', 'relpath'):
959 return False
973 return False
960 return True
974 return True
961
975
962 _commentre = None
976 _commentre = None
963
977
964 def readpatternfile(filepath, warn, sourceinfo=False):
978 def readpatternfile(filepath, warn, sourceinfo=False):
965 '''parse a pattern file, returning a list of
979 '''parse a pattern file, returning a list of
966 patterns. These patterns should be given to compile()
980 patterns. These patterns should be given to compile()
967 to be validated and converted into a match function.
981 to be validated and converted into a match function.
968
982
969 trailing white space is dropped.
983 trailing white space is dropped.
970 the escape character is backslash.
984 the escape character is backslash.
971 comments start with #.
985 comments start with #.
972 empty lines are skipped.
986 empty lines are skipped.
973
987
974 lines can be of the following formats:
988 lines can be of the following formats:
975
989
976 syntax: regexp # defaults following lines to non-rooted regexps
990 syntax: regexp # defaults following lines to non-rooted regexps
977 syntax: glob # defaults following lines to non-rooted globs
991 syntax: glob # defaults following lines to non-rooted globs
978 re:pattern # non-rooted regular expression
992 re:pattern # non-rooted regular expression
979 glob:pattern # non-rooted glob
993 glob:pattern # non-rooted glob
980 pattern # pattern of the current default type
994 pattern # pattern of the current default type
981
995
982 if sourceinfo is set, returns a list of tuples:
996 if sourceinfo is set, returns a list of tuples:
983 (pattern, lineno, originalline). This is useful to debug ignore patterns.
997 (pattern, lineno, originalline). This is useful to debug ignore patterns.
984 '''
998 '''
985
999
986 syntaxes = {'re': 'relre:', 'regexp': 'relre:', 'glob': 'relglob:',
1000 syntaxes = {'re': 'relre:', 'regexp': 'relre:', 'glob': 'relglob:',
987 'include': 'include', 'subinclude': 'subinclude'}
1001 'include': 'include', 'subinclude': 'subinclude'}
988 syntax = 'relre:'
1002 syntax = 'relre:'
989 patterns = []
1003 patterns = []
990
1004
991 fp = open(filepath, 'rb')
1005 fp = open(filepath, 'rb')
992 for lineno, line in enumerate(util.iterfile(fp), start=1):
1006 for lineno, line in enumerate(util.iterfile(fp), start=1):
993 if "#" in line:
1007 if "#" in line:
994 global _commentre
1008 global _commentre
995 if not _commentre:
1009 if not _commentre:
996 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
1010 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
997 # remove comments prefixed by an even number of escapes
1011 # remove comments prefixed by an even number of escapes
998 m = _commentre.search(line)
1012 m = _commentre.search(line)
999 if m:
1013 if m:
1000 line = line[:m.end(1)]
1014 line = line[:m.end(1)]
1001 # fixup properly escaped comments that survived the above
1015 # fixup properly escaped comments that survived the above
1002 line = line.replace("\\#", "#")
1016 line = line.replace("\\#", "#")
1003 line = line.rstrip()
1017 line = line.rstrip()
1004 if not line:
1018 if not line:
1005 continue
1019 continue
1006
1020
1007 if line.startswith('syntax:'):
1021 if line.startswith('syntax:'):
1008 s = line[7:].strip()
1022 s = line[7:].strip()
1009 try:
1023 try:
1010 syntax = syntaxes[s]
1024 syntax = syntaxes[s]
1011 except KeyError:
1025 except KeyError:
1012 if warn:
1026 if warn:
1013 warn(_("%s: ignoring invalid syntax '%s'\n") %
1027 warn(_("%s: ignoring invalid syntax '%s'\n") %
1014 (filepath, s))
1028 (filepath, s))
1015 continue
1029 continue
1016
1030
1017 linesyntax = syntax
1031 linesyntax = syntax
1018 for s, rels in syntaxes.iteritems():
1032 for s, rels in syntaxes.iteritems():
1019 if line.startswith(rels):
1033 if line.startswith(rels):
1020 linesyntax = rels
1034 linesyntax = rels
1021 line = line[len(rels):]
1035 line = line[len(rels):]
1022 break
1036 break
1023 elif line.startswith(s+':'):
1037 elif line.startswith(s+':'):
1024 linesyntax = rels
1038 linesyntax = rels
1025 line = line[len(s) + 1:]
1039 line = line[len(s) + 1:]
1026 break
1040 break
1027 if sourceinfo:
1041 if sourceinfo:
1028 patterns.append((linesyntax + line, lineno, line))
1042 patterns.append((linesyntax + line, lineno, line))
1029 else:
1043 else:
1030 patterns.append(linesyntax + line)
1044 patterns.append(linesyntax + line)
1031 fp.close()
1045 fp.close()
1032 return patterns
1046 return patterns
General Comments 0
You need to be logged in to leave comments. Login now