##// END OF EJS Templates
match: allow passing in badfn to always() and never()...
Martin von Zweigbergk -
r41821:c302218a default
parent child Browse files
Show More
@@ -1,1374 +1,1374 b''
1 # match.py - filename matching
1 # match.py - filename matching
2 #
2 #
3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import, print_function
8 from __future__ import absolute_import, print_function
9
9
10 import copy
10 import copy
11 import itertools
11 import itertools
12 import os
12 import os
13 import re
13 import re
14
14
15 from .i18n import _
15 from .i18n import _
16 from . import (
16 from . import (
17 encoding,
17 encoding,
18 error,
18 error,
19 pathutil,
19 pathutil,
20 pycompat,
20 pycompat,
21 util,
21 util,
22 )
22 )
23 from .utils import (
23 from .utils import (
24 stringutil,
24 stringutil,
25 )
25 )
26
26
27 allpatternkinds = ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
27 allpatternkinds = ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
28 'rootglob',
28 'rootglob',
29 'listfile', 'listfile0', 'set', 'include', 'subinclude',
29 'listfile', 'listfile0', 'set', 'include', 'subinclude',
30 'rootfilesin')
30 'rootfilesin')
31 cwdrelativepatternkinds = ('relpath', 'glob')
31 cwdrelativepatternkinds = ('relpath', 'glob')
32
32
33 propertycache = util.propertycache
33 propertycache = util.propertycache
34
34
35 def _rematcher(regex):
35 def _rematcher(regex):
36 '''compile the regexp with the best available regexp engine and return a
36 '''compile the regexp with the best available regexp engine and return a
37 matcher function'''
37 matcher function'''
38 m = util.re.compile(regex)
38 m = util.re.compile(regex)
39 try:
39 try:
40 # slightly faster, provided by facebook's re2 bindings
40 # slightly faster, provided by facebook's re2 bindings
41 return m.test_match
41 return m.test_match
42 except AttributeError:
42 except AttributeError:
43 return m.match
43 return m.match
44
44
45 def _expandsets(root, cwd, kindpats, ctx, listsubrepos, badfn):
45 def _expandsets(root, cwd, kindpats, ctx, listsubrepos, badfn):
46 '''Returns the kindpats list with the 'set' patterns expanded to matchers'''
46 '''Returns the kindpats list with the 'set' patterns expanded to matchers'''
47 matchers = []
47 matchers = []
48 other = []
48 other = []
49
49
50 for kind, pat, source in kindpats:
50 for kind, pat, source in kindpats:
51 if kind == 'set':
51 if kind == 'set':
52 if ctx is None:
52 if ctx is None:
53 raise error.ProgrammingError("fileset expression with no "
53 raise error.ProgrammingError("fileset expression with no "
54 "context")
54 "context")
55 matchers.append(ctx.matchfileset(pat, badfn=badfn))
55 matchers.append(ctx.matchfileset(pat, badfn=badfn))
56
56
57 if listsubrepos:
57 if listsubrepos:
58 for subpath in ctx.substate:
58 for subpath in ctx.substate:
59 sm = ctx.sub(subpath).matchfileset(pat, badfn=badfn)
59 sm = ctx.sub(subpath).matchfileset(pat, badfn=badfn)
60 pm = prefixdirmatcher(root, cwd, subpath, sm, badfn=badfn)
60 pm = prefixdirmatcher(root, cwd, subpath, sm, badfn=badfn)
61 matchers.append(pm)
61 matchers.append(pm)
62
62
63 continue
63 continue
64 other.append((kind, pat, source))
64 other.append((kind, pat, source))
65 return matchers, other
65 return matchers, other
66
66
67 def _expandsubinclude(kindpats, root):
67 def _expandsubinclude(kindpats, root):
68 '''Returns the list of subinclude matcher args and the kindpats without the
68 '''Returns the list of subinclude matcher args and the kindpats without the
69 subincludes in it.'''
69 subincludes in it.'''
70 relmatchers = []
70 relmatchers = []
71 other = []
71 other = []
72
72
73 for kind, pat, source in kindpats:
73 for kind, pat, source in kindpats:
74 if kind == 'subinclude':
74 if kind == 'subinclude':
75 sourceroot = pathutil.dirname(util.normpath(source))
75 sourceroot = pathutil.dirname(util.normpath(source))
76 pat = util.pconvert(pat)
76 pat = util.pconvert(pat)
77 path = pathutil.join(sourceroot, pat)
77 path = pathutil.join(sourceroot, pat)
78
78
79 newroot = pathutil.dirname(path)
79 newroot = pathutil.dirname(path)
80 matcherargs = (newroot, '', [], ['include:%s' % path])
80 matcherargs = (newroot, '', [], ['include:%s' % path])
81
81
82 prefix = pathutil.canonpath(root, root, newroot)
82 prefix = pathutil.canonpath(root, root, newroot)
83 if prefix:
83 if prefix:
84 prefix += '/'
84 prefix += '/'
85 relmatchers.append((prefix, matcherargs))
85 relmatchers.append((prefix, matcherargs))
86 else:
86 else:
87 other.append((kind, pat, source))
87 other.append((kind, pat, source))
88
88
89 return relmatchers, other
89 return relmatchers, other
90
90
91 def _kindpatsalwaysmatch(kindpats):
91 def _kindpatsalwaysmatch(kindpats):
92 """"Checks whether the kindspats match everything, as e.g.
92 """"Checks whether the kindspats match everything, as e.g.
93 'relpath:.' does.
93 'relpath:.' does.
94 """
94 """
95 for kind, pat, source in kindpats:
95 for kind, pat, source in kindpats:
96 if pat != '' or kind not in ['relpath', 'glob']:
96 if pat != '' or kind not in ['relpath', 'glob']:
97 return False
97 return False
98 return True
98 return True
99
99
100 def _buildkindpatsmatcher(matchercls, root, cwd, kindpats, ctx=None,
100 def _buildkindpatsmatcher(matchercls, root, cwd, kindpats, ctx=None,
101 listsubrepos=False, badfn=None):
101 listsubrepos=False, badfn=None):
102 matchers = []
102 matchers = []
103 fms, kindpats = _expandsets(root, cwd, kindpats, ctx=ctx,
103 fms, kindpats = _expandsets(root, cwd, kindpats, ctx=ctx,
104 listsubrepos=listsubrepos, badfn=badfn)
104 listsubrepos=listsubrepos, badfn=badfn)
105 if kindpats:
105 if kindpats:
106 m = matchercls(root, cwd, kindpats, badfn=badfn)
106 m = matchercls(root, cwd, kindpats, badfn=badfn)
107 matchers.append(m)
107 matchers.append(m)
108 if fms:
108 if fms:
109 matchers.extend(fms)
109 matchers.extend(fms)
110 if not matchers:
110 if not matchers:
111 return nevermatcher(root, cwd, badfn=badfn)
111 return nevermatcher(root, cwd, badfn=badfn)
112 if len(matchers) == 1:
112 if len(matchers) == 1:
113 return matchers[0]
113 return matchers[0]
114 return unionmatcher(matchers)
114 return unionmatcher(matchers)
115
115
116 def match(root, cwd, patterns=None, include=None, exclude=None, default='glob',
116 def match(root, cwd, patterns=None, include=None, exclude=None, default='glob',
117 auditor=None, ctx=None, listsubrepos=False, warn=None,
117 auditor=None, ctx=None, listsubrepos=False, warn=None,
118 badfn=None, icasefs=False):
118 badfn=None, icasefs=False):
119 """build an object to match a set of file patterns
119 """build an object to match a set of file patterns
120
120
121 arguments:
121 arguments:
122 root - the canonical root of the tree you're matching against
122 root - the canonical root of the tree you're matching against
123 cwd - the current working directory, if relevant
123 cwd - the current working directory, if relevant
124 patterns - patterns to find
124 patterns - patterns to find
125 include - patterns to include (unless they are excluded)
125 include - patterns to include (unless they are excluded)
126 exclude - patterns to exclude (even if they are included)
126 exclude - patterns to exclude (even if they are included)
127 default - if a pattern in patterns has no explicit type, assume this one
127 default - if a pattern in patterns has no explicit type, assume this one
128 warn - optional function used for printing warnings
128 warn - optional function used for printing warnings
129 badfn - optional bad() callback for this matcher instead of the default
129 badfn - optional bad() callback for this matcher instead of the default
130 icasefs - make a matcher for wdir on case insensitive filesystems, which
130 icasefs - make a matcher for wdir on case insensitive filesystems, which
131 normalizes the given patterns to the case in the filesystem
131 normalizes the given patterns to the case in the filesystem
132
132
133 a pattern is one of:
133 a pattern is one of:
134 'glob:<glob>' - a glob relative to cwd
134 'glob:<glob>' - a glob relative to cwd
135 're:<regexp>' - a regular expression
135 're:<regexp>' - a regular expression
136 'path:<path>' - a path relative to repository root, which is matched
136 'path:<path>' - a path relative to repository root, which is matched
137 recursively
137 recursively
138 'rootfilesin:<path>' - a path relative to repository root, which is
138 'rootfilesin:<path>' - a path relative to repository root, which is
139 matched non-recursively (will not match subdirectories)
139 matched non-recursively (will not match subdirectories)
140 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
140 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
141 'relpath:<path>' - a path relative to cwd
141 'relpath:<path>' - a path relative to cwd
142 'relre:<regexp>' - a regexp that needn't match the start of a name
142 'relre:<regexp>' - a regexp that needn't match the start of a name
143 'set:<fileset>' - a fileset expression
143 'set:<fileset>' - a fileset expression
144 'include:<path>' - a file of patterns to read and include
144 'include:<path>' - a file of patterns to read and include
145 'subinclude:<path>' - a file of patterns to match against files under
145 'subinclude:<path>' - a file of patterns to match against files under
146 the same directory
146 the same directory
147 '<something>' - a pattern of the specified default type
147 '<something>' - a pattern of the specified default type
148 """
148 """
149 normalize = _donormalize
149 normalize = _donormalize
150 if icasefs:
150 if icasefs:
151 dirstate = ctx.repo().dirstate
151 dirstate = ctx.repo().dirstate
152 dsnormalize = dirstate.normalize
152 dsnormalize = dirstate.normalize
153
153
154 def normalize(patterns, default, root, cwd, auditor, warn):
154 def normalize(patterns, default, root, cwd, auditor, warn):
155 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
155 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
156 kindpats = []
156 kindpats = []
157 for kind, pats, source in kp:
157 for kind, pats, source in kp:
158 if kind not in ('re', 'relre'): # regex can't be normalized
158 if kind not in ('re', 'relre'): # regex can't be normalized
159 p = pats
159 p = pats
160 pats = dsnormalize(pats)
160 pats = dsnormalize(pats)
161
161
162 # Preserve the original to handle a case only rename.
162 # Preserve the original to handle a case only rename.
163 if p != pats and p in dirstate:
163 if p != pats and p in dirstate:
164 kindpats.append((kind, p, source))
164 kindpats.append((kind, p, source))
165
165
166 kindpats.append((kind, pats, source))
166 kindpats.append((kind, pats, source))
167 return kindpats
167 return kindpats
168
168
169 if patterns:
169 if patterns:
170 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
170 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
171 if _kindpatsalwaysmatch(kindpats):
171 if _kindpatsalwaysmatch(kindpats):
172 m = alwaysmatcher(root, cwd, badfn)
172 m = alwaysmatcher(root, cwd, badfn)
173 else:
173 else:
174 m = _buildkindpatsmatcher(patternmatcher, root, cwd, kindpats,
174 m = _buildkindpatsmatcher(patternmatcher, root, cwd, kindpats,
175 ctx=ctx, listsubrepos=listsubrepos,
175 ctx=ctx, listsubrepos=listsubrepos,
176 badfn=badfn)
176 badfn=badfn)
177 else:
177 else:
178 # It's a little strange that no patterns means to match everything.
178 # It's a little strange that no patterns means to match everything.
179 # Consider changing this to match nothing (probably using nevermatcher).
179 # Consider changing this to match nothing (probably using nevermatcher).
180 m = alwaysmatcher(root, cwd, badfn)
180 m = alwaysmatcher(root, cwd, badfn)
181
181
182 if include:
182 if include:
183 kindpats = normalize(include, 'glob', root, cwd, auditor, warn)
183 kindpats = normalize(include, 'glob', root, cwd, auditor, warn)
184 im = _buildkindpatsmatcher(includematcher, root, cwd, kindpats, ctx=ctx,
184 im = _buildkindpatsmatcher(includematcher, root, cwd, kindpats, ctx=ctx,
185 listsubrepos=listsubrepos, badfn=None)
185 listsubrepos=listsubrepos, badfn=None)
186 m = intersectmatchers(m, im)
186 m = intersectmatchers(m, im)
187 if exclude:
187 if exclude:
188 kindpats = normalize(exclude, 'glob', root, cwd, auditor, warn)
188 kindpats = normalize(exclude, 'glob', root, cwd, auditor, warn)
189 em = _buildkindpatsmatcher(includematcher, root, cwd, kindpats, ctx=ctx,
189 em = _buildkindpatsmatcher(includematcher, root, cwd, kindpats, ctx=ctx,
190 listsubrepos=listsubrepos, badfn=None)
190 listsubrepos=listsubrepos, badfn=None)
191 m = differencematcher(m, em)
191 m = differencematcher(m, em)
192 return m
192 return m
193
193
194 def exact(root, cwd, files, badfn=None):
194 def exact(root, cwd, files, badfn=None):
195 return exactmatcher(root, cwd, files, badfn=badfn)
195 return exactmatcher(root, cwd, files, badfn=badfn)
196
196
197 def always(root, cwd):
197 def always(root, cwd, badfn=None):
198 return alwaysmatcher(root, cwd)
198 return alwaysmatcher(root, cwd, badfn=badfn)
199
199
200 def never(root, cwd):
200 def never(root, cwd, badfn=None):
201 return nevermatcher(root, cwd)
201 return nevermatcher(root, cwd, badfn=badfn)
202
202
203 def badmatch(match, badfn):
203 def badmatch(match, badfn):
204 """Make a copy of the given matcher, replacing its bad method with the given
204 """Make a copy of the given matcher, replacing its bad method with the given
205 one.
205 one.
206 """
206 """
207 m = copy.copy(match)
207 m = copy.copy(match)
208 m.bad = badfn
208 m.bad = badfn
209 return m
209 return m
210
210
211 def _donormalize(patterns, default, root, cwd, auditor, warn):
211 def _donormalize(patterns, default, root, cwd, auditor, warn):
212 '''Convert 'kind:pat' from the patterns list to tuples with kind and
212 '''Convert 'kind:pat' from the patterns list to tuples with kind and
213 normalized and rooted patterns and with listfiles expanded.'''
213 normalized and rooted patterns and with listfiles expanded.'''
214 kindpats = []
214 kindpats = []
215 for kind, pat in [_patsplit(p, default) for p in patterns]:
215 for kind, pat in [_patsplit(p, default) for p in patterns]:
216 if kind in cwdrelativepatternkinds:
216 if kind in cwdrelativepatternkinds:
217 pat = pathutil.canonpath(root, cwd, pat, auditor)
217 pat = pathutil.canonpath(root, cwd, pat, auditor)
218 elif kind in ('relglob', 'path', 'rootfilesin', 'rootglob'):
218 elif kind in ('relglob', 'path', 'rootfilesin', 'rootglob'):
219 pat = util.normpath(pat)
219 pat = util.normpath(pat)
220 elif kind in ('listfile', 'listfile0'):
220 elif kind in ('listfile', 'listfile0'):
221 try:
221 try:
222 files = util.readfile(pat)
222 files = util.readfile(pat)
223 if kind == 'listfile0':
223 if kind == 'listfile0':
224 files = files.split('\0')
224 files = files.split('\0')
225 else:
225 else:
226 files = files.splitlines()
226 files = files.splitlines()
227 files = [f for f in files if f]
227 files = [f for f in files if f]
228 except EnvironmentError:
228 except EnvironmentError:
229 raise error.Abort(_("unable to read file list (%s)") % pat)
229 raise error.Abort(_("unable to read file list (%s)") % pat)
230 for k, p, source in _donormalize(files, default, root, cwd,
230 for k, p, source in _donormalize(files, default, root, cwd,
231 auditor, warn):
231 auditor, warn):
232 kindpats.append((k, p, pat))
232 kindpats.append((k, p, pat))
233 continue
233 continue
234 elif kind == 'include':
234 elif kind == 'include':
235 try:
235 try:
236 fullpath = os.path.join(root, util.localpath(pat))
236 fullpath = os.path.join(root, util.localpath(pat))
237 includepats = readpatternfile(fullpath, warn)
237 includepats = readpatternfile(fullpath, warn)
238 for k, p, source in _donormalize(includepats, default,
238 for k, p, source in _donormalize(includepats, default,
239 root, cwd, auditor, warn):
239 root, cwd, auditor, warn):
240 kindpats.append((k, p, source or pat))
240 kindpats.append((k, p, source or pat))
241 except error.Abort as inst:
241 except error.Abort as inst:
242 raise error.Abort('%s: %s' % (pat, inst[0]))
242 raise error.Abort('%s: %s' % (pat, inst[0]))
243 except IOError as inst:
243 except IOError as inst:
244 if warn:
244 if warn:
245 warn(_("skipping unreadable pattern file '%s': %s\n") %
245 warn(_("skipping unreadable pattern file '%s': %s\n") %
246 (pat, stringutil.forcebytestr(inst.strerror)))
246 (pat, stringutil.forcebytestr(inst.strerror)))
247 continue
247 continue
248 # else: re or relre - which cannot be normalized
248 # else: re or relre - which cannot be normalized
249 kindpats.append((kind, pat, ''))
249 kindpats.append((kind, pat, ''))
250 return kindpats
250 return kindpats
251
251
252 class basematcher(object):
252 class basematcher(object):
253
253
254 def __init__(self, root, cwd, badfn=None):
254 def __init__(self, root, cwd, badfn=None):
255 self._root = root
255 self._root = root
256 self._cwd = cwd
256 self._cwd = cwd
257 if badfn is not None:
257 if badfn is not None:
258 self.bad = badfn
258 self.bad = badfn
259
259
260 def __call__(self, fn):
260 def __call__(self, fn):
261 return self.matchfn(fn)
261 return self.matchfn(fn)
262 def __iter__(self):
262 def __iter__(self):
263 for f in self._files:
263 for f in self._files:
264 yield f
264 yield f
265 # Callbacks related to how the matcher is used by dirstate.walk.
265 # Callbacks related to how the matcher is used by dirstate.walk.
266 # Subscribers to these events must monkeypatch the matcher object.
266 # Subscribers to these events must monkeypatch the matcher object.
267 def bad(self, f, msg):
267 def bad(self, f, msg):
268 '''Callback from dirstate.walk for each explicit file that can't be
268 '''Callback from dirstate.walk for each explicit file that can't be
269 found/accessed, with an error message.'''
269 found/accessed, with an error message.'''
270
270
271 # If an explicitdir is set, it will be called when an explicitly listed
271 # If an explicitdir is set, it will be called when an explicitly listed
272 # directory is visited.
272 # directory is visited.
273 explicitdir = None
273 explicitdir = None
274
274
275 # If an traversedir is set, it will be called when a directory discovered
275 # If an traversedir is set, it will be called when a directory discovered
276 # by recursive traversal is visited.
276 # by recursive traversal is visited.
277 traversedir = None
277 traversedir = None
278
278
279 @propertycache
279 @propertycache
280 def _files(self):
280 def _files(self):
281 return []
281 return []
282
282
283 def files(self):
283 def files(self):
284 '''Explicitly listed files or patterns or roots:
284 '''Explicitly listed files or patterns or roots:
285 if no patterns or .always(): empty list,
285 if no patterns or .always(): empty list,
286 if exact: list exact files,
286 if exact: list exact files,
287 if not .anypats(): list all files and dirs,
287 if not .anypats(): list all files and dirs,
288 else: optimal roots'''
288 else: optimal roots'''
289 return self._files
289 return self._files
290
290
291 @propertycache
291 @propertycache
292 def _fileset(self):
292 def _fileset(self):
293 return set(self._files)
293 return set(self._files)
294
294
295 def exact(self, f):
295 def exact(self, f):
296 '''Returns True if f is in .files().'''
296 '''Returns True if f is in .files().'''
297 return f in self._fileset
297 return f in self._fileset
298
298
299 def matchfn(self, f):
299 def matchfn(self, f):
300 return False
300 return False
301
301
302 def visitdir(self, dir):
302 def visitdir(self, dir):
303 '''Decides whether a directory should be visited based on whether it
303 '''Decides whether a directory should be visited based on whether it
304 has potential matches in it or one of its subdirectories. This is
304 has potential matches in it or one of its subdirectories. This is
305 based on the match's primary, included, and excluded patterns.
305 based on the match's primary, included, and excluded patterns.
306
306
307 Returns the string 'all' if the given directory and all subdirectories
307 Returns the string 'all' if the given directory and all subdirectories
308 should be visited. Otherwise returns True or False indicating whether
308 should be visited. Otherwise returns True or False indicating whether
309 the given directory should be visited.
309 the given directory should be visited.
310 '''
310 '''
311 return True
311 return True
312
312
313 def visitchildrenset(self, dir):
313 def visitchildrenset(self, dir):
314 '''Decides whether a directory should be visited based on whether it
314 '''Decides whether a directory should be visited based on whether it
315 has potential matches in it or one of its subdirectories, and
315 has potential matches in it or one of its subdirectories, and
316 potentially lists which subdirectories of that directory should be
316 potentially lists which subdirectories of that directory should be
317 visited. This is based on the match's primary, included, and excluded
317 visited. This is based on the match's primary, included, and excluded
318 patterns.
318 patterns.
319
319
320 This function is very similar to 'visitdir', and the following mapping
320 This function is very similar to 'visitdir', and the following mapping
321 can be applied:
321 can be applied:
322
322
323 visitdir | visitchildrenlist
323 visitdir | visitchildrenlist
324 ----------+-------------------
324 ----------+-------------------
325 False | set()
325 False | set()
326 'all' | 'all'
326 'all' | 'all'
327 True | 'this' OR non-empty set of subdirs -or files- to visit
327 True | 'this' OR non-empty set of subdirs -or files- to visit
328
328
329 Example:
329 Example:
330 Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return
330 Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return
331 the following values (assuming the implementation of visitchildrenset
331 the following values (assuming the implementation of visitchildrenset
332 is capable of recognizing this; some implementations are not).
332 is capable of recognizing this; some implementations are not).
333
333
334 '.' -> {'foo', 'qux'}
334 '.' -> {'foo', 'qux'}
335 'baz' -> set()
335 'baz' -> set()
336 'foo' -> {'bar'}
336 'foo' -> {'bar'}
337 # Ideally this would be 'all', but since the prefix nature of matchers
337 # Ideally this would be 'all', but since the prefix nature of matchers
338 # is applied to the entire matcher, we have to downgrade this to
338 # is applied to the entire matcher, we have to downgrade this to
339 # 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed
339 # 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed
340 # in.
340 # in.
341 'foo/bar' -> 'this'
341 'foo/bar' -> 'this'
342 'qux' -> 'this'
342 'qux' -> 'this'
343
343
344 Important:
344 Important:
345 Most matchers do not know if they're representing files or
345 Most matchers do not know if they're representing files or
346 directories. They see ['path:dir/f'] and don't know whether 'f' is a
346 directories. They see ['path:dir/f'] and don't know whether 'f' is a
347 file or a directory, so visitchildrenset('dir') for most matchers will
347 file or a directory, so visitchildrenset('dir') for most matchers will
348 return {'f'}, but if the matcher knows it's a file (like exactmatcher
348 return {'f'}, but if the matcher knows it's a file (like exactmatcher
349 does), it may return 'this'. Do not rely on the return being a set
349 does), it may return 'this'. Do not rely on the return being a set
350 indicating that there are no files in this dir to investigate (or
350 indicating that there are no files in this dir to investigate (or
351 equivalently that if there are files to investigate in 'dir' that it
351 equivalently that if there are files to investigate in 'dir' that it
352 will always return 'this').
352 will always return 'this').
353 '''
353 '''
354 return 'this'
354 return 'this'
355
355
356 def always(self):
356 def always(self):
357 '''Matcher will match everything and .files() will be empty --
357 '''Matcher will match everything and .files() will be empty --
358 optimization might be possible.'''
358 optimization might be possible.'''
359 return False
359 return False
360
360
361 def isexact(self):
361 def isexact(self):
362 '''Matcher will match exactly the list of files in .files() --
362 '''Matcher will match exactly the list of files in .files() --
363 optimization might be possible.'''
363 optimization might be possible.'''
364 return False
364 return False
365
365
366 def prefix(self):
366 def prefix(self):
367 '''Matcher will match the paths in .files() recursively --
367 '''Matcher will match the paths in .files() recursively --
368 optimization might be possible.'''
368 optimization might be possible.'''
369 return False
369 return False
370
370
371 def anypats(self):
371 def anypats(self):
372 '''None of .always(), .isexact(), and .prefix() is true --
372 '''None of .always(), .isexact(), and .prefix() is true --
373 optimizations will be difficult.'''
373 optimizations will be difficult.'''
374 return not self.always() and not self.isexact() and not self.prefix()
374 return not self.always() and not self.isexact() and not self.prefix()
375
375
376 class alwaysmatcher(basematcher):
376 class alwaysmatcher(basematcher):
377 '''Matches everything.'''
377 '''Matches everything.'''
378
378
379 def __init__(self, root, cwd, badfn=None):
379 def __init__(self, root, cwd, badfn=None):
380 super(alwaysmatcher, self).__init__(root, cwd, badfn)
380 super(alwaysmatcher, self).__init__(root, cwd, badfn)
381
381
382 def always(self):
382 def always(self):
383 return True
383 return True
384
384
385 def matchfn(self, f):
385 def matchfn(self, f):
386 return True
386 return True
387
387
388 def visitdir(self, dir):
388 def visitdir(self, dir):
389 return 'all'
389 return 'all'
390
390
391 def visitchildrenset(self, dir):
391 def visitchildrenset(self, dir):
392 return 'all'
392 return 'all'
393
393
394 def __repr__(self):
394 def __repr__(self):
395 return r'<alwaysmatcher>'
395 return r'<alwaysmatcher>'
396
396
397 class nevermatcher(basematcher):
397 class nevermatcher(basematcher):
398 '''Matches nothing.'''
398 '''Matches nothing.'''
399
399
400 def __init__(self, root, cwd, badfn=None):
400 def __init__(self, root, cwd, badfn=None):
401 super(nevermatcher, self).__init__(root, cwd, badfn)
401 super(nevermatcher, self).__init__(root, cwd, badfn)
402
402
403 # It's a little weird to say that the nevermatcher is an exact matcher
403 # It's a little weird to say that the nevermatcher is an exact matcher
404 # or a prefix matcher, but it seems to make sense to let callers take
404 # or a prefix matcher, but it seems to make sense to let callers take
405 # fast paths based on either. There will be no exact matches, nor any
405 # fast paths based on either. There will be no exact matches, nor any
406 # prefixes (files() returns []), so fast paths iterating over them should
406 # prefixes (files() returns []), so fast paths iterating over them should
407 # be efficient (and correct).
407 # be efficient (and correct).
408 def isexact(self):
408 def isexact(self):
409 return True
409 return True
410
410
411 def prefix(self):
411 def prefix(self):
412 return True
412 return True
413
413
414 def visitdir(self, dir):
414 def visitdir(self, dir):
415 return False
415 return False
416
416
417 def visitchildrenset(self, dir):
417 def visitchildrenset(self, dir):
418 return set()
418 return set()
419
419
420 def __repr__(self):
420 def __repr__(self):
421 return r'<nevermatcher>'
421 return r'<nevermatcher>'
422
422
423 class predicatematcher(basematcher):
423 class predicatematcher(basematcher):
424 """A matcher adapter for a simple boolean function"""
424 """A matcher adapter for a simple boolean function"""
425
425
426 def __init__(self, root, cwd, predfn, predrepr=None, badfn=None):
426 def __init__(self, root, cwd, predfn, predrepr=None, badfn=None):
427 super(predicatematcher, self).__init__(root, cwd, badfn)
427 super(predicatematcher, self).__init__(root, cwd, badfn)
428 self.matchfn = predfn
428 self.matchfn = predfn
429 self._predrepr = predrepr
429 self._predrepr = predrepr
430
430
431 @encoding.strmethod
431 @encoding.strmethod
432 def __repr__(self):
432 def __repr__(self):
433 s = (stringutil.buildrepr(self._predrepr)
433 s = (stringutil.buildrepr(self._predrepr)
434 or pycompat.byterepr(self.matchfn))
434 or pycompat.byterepr(self.matchfn))
435 return '<predicatenmatcher pred=%s>' % s
435 return '<predicatenmatcher pred=%s>' % s
436
436
437 class patternmatcher(basematcher):
437 class patternmatcher(basematcher):
438
438
439 def __init__(self, root, cwd, kindpats, badfn=None):
439 def __init__(self, root, cwd, kindpats, badfn=None):
440 super(patternmatcher, self).__init__(root, cwd, badfn)
440 super(patternmatcher, self).__init__(root, cwd, badfn)
441
441
442 self._files = _explicitfiles(kindpats)
442 self._files = _explicitfiles(kindpats)
443 self._prefix = _prefix(kindpats)
443 self._prefix = _prefix(kindpats)
444 self._pats, self.matchfn = _buildmatch(kindpats, '$', root)
444 self._pats, self.matchfn = _buildmatch(kindpats, '$', root)
445
445
446 @propertycache
446 @propertycache
447 def _dirs(self):
447 def _dirs(self):
448 return set(util.dirs(self._fileset)) | {'.'}
448 return set(util.dirs(self._fileset)) | {'.'}
449
449
450 def visitdir(self, dir):
450 def visitdir(self, dir):
451 if self._prefix and dir in self._fileset:
451 if self._prefix and dir in self._fileset:
452 return 'all'
452 return 'all'
453 return ('.' in self._fileset or
453 return ('.' in self._fileset or
454 dir in self._fileset or
454 dir in self._fileset or
455 dir in self._dirs or
455 dir in self._dirs or
456 any(parentdir in self._fileset
456 any(parentdir in self._fileset
457 for parentdir in util.finddirs(dir)))
457 for parentdir in util.finddirs(dir)))
458
458
459 def visitchildrenset(self, dir):
459 def visitchildrenset(self, dir):
460 ret = self.visitdir(dir)
460 ret = self.visitdir(dir)
461 if ret is True:
461 if ret is True:
462 return 'this'
462 return 'this'
463 elif not ret:
463 elif not ret:
464 return set()
464 return set()
465 assert ret == 'all'
465 assert ret == 'all'
466 return 'all'
466 return 'all'
467
467
468 def prefix(self):
468 def prefix(self):
469 return self._prefix
469 return self._prefix
470
470
471 @encoding.strmethod
471 @encoding.strmethod
472 def __repr__(self):
472 def __repr__(self):
473 return ('<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats))
473 return ('<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats))
474
474
475 # This is basically a reimplementation of util.dirs that stores the children
475 # This is basically a reimplementation of util.dirs that stores the children
476 # instead of just a count of them, plus a small optional optimization to avoid
476 # instead of just a count of them, plus a small optional optimization to avoid
477 # some directories we don't need.
477 # some directories we don't need.
478 class _dirchildren(object):
478 class _dirchildren(object):
479 def __init__(self, paths, onlyinclude=None):
479 def __init__(self, paths, onlyinclude=None):
480 self._dirs = {}
480 self._dirs = {}
481 self._onlyinclude = onlyinclude or []
481 self._onlyinclude = onlyinclude or []
482 addpath = self.addpath
482 addpath = self.addpath
483 for f in paths:
483 for f in paths:
484 addpath(f)
484 addpath(f)
485
485
486 def addpath(self, path):
486 def addpath(self, path):
487 if path == '.':
487 if path == '.':
488 return
488 return
489 dirs = self._dirs
489 dirs = self._dirs
490 findsplitdirs = _dirchildren._findsplitdirs
490 findsplitdirs = _dirchildren._findsplitdirs
491 for d, b in findsplitdirs(path):
491 for d, b in findsplitdirs(path):
492 if d not in self._onlyinclude:
492 if d not in self._onlyinclude:
493 continue
493 continue
494 dirs.setdefault(d, set()).add(b)
494 dirs.setdefault(d, set()).add(b)
495
495
496 @staticmethod
496 @staticmethod
497 def _findsplitdirs(path):
497 def _findsplitdirs(path):
498 # yields (dirname, basename) tuples, walking back to the root. This is
498 # yields (dirname, basename) tuples, walking back to the root. This is
499 # very similar to util.finddirs, except:
499 # very similar to util.finddirs, except:
500 # - produces a (dirname, basename) tuple, not just 'dirname'
500 # - produces a (dirname, basename) tuple, not just 'dirname'
501 # - includes root dir
501 # - includes root dir
502 # Unlike manifest._splittopdir, this does not suffix `dirname` with a
502 # Unlike manifest._splittopdir, this does not suffix `dirname` with a
503 # slash, and produces '.' for the root instead of ''.
503 # slash, and produces '.' for the root instead of ''.
504 oldpos = len(path)
504 oldpos = len(path)
505 pos = path.rfind('/')
505 pos = path.rfind('/')
506 while pos != -1:
506 while pos != -1:
507 yield path[:pos], path[pos + 1:oldpos]
507 yield path[:pos], path[pos + 1:oldpos]
508 oldpos = pos
508 oldpos = pos
509 pos = path.rfind('/', 0, pos)
509 pos = path.rfind('/', 0, pos)
510 yield '.', path[:oldpos]
510 yield '.', path[:oldpos]
511
511
512 def get(self, path):
512 def get(self, path):
513 return self._dirs.get(path, set())
513 return self._dirs.get(path, set())
514
514
515 class includematcher(basematcher):
515 class includematcher(basematcher):
516
516
517 def __init__(self, root, cwd, kindpats, badfn=None):
517 def __init__(self, root, cwd, kindpats, badfn=None):
518 super(includematcher, self).__init__(root, cwd, badfn)
518 super(includematcher, self).__init__(root, cwd, badfn)
519
519
520 self._pats, self.matchfn = _buildmatch(kindpats, '(?:/|$)', root)
520 self._pats, self.matchfn = _buildmatch(kindpats, '(?:/|$)', root)
521 self._prefix = _prefix(kindpats)
521 self._prefix = _prefix(kindpats)
522 roots, dirs, parents = _rootsdirsandparents(kindpats)
522 roots, dirs, parents = _rootsdirsandparents(kindpats)
523 # roots are directories which are recursively included.
523 # roots are directories which are recursively included.
524 self._roots = set(roots)
524 self._roots = set(roots)
525 # dirs are directories which are non-recursively included.
525 # dirs are directories which are non-recursively included.
526 self._dirs = set(dirs)
526 self._dirs = set(dirs)
527 # parents are directories which are non-recursively included because
527 # parents are directories which are non-recursively included because
528 # they are needed to get to items in _dirs or _roots.
528 # they are needed to get to items in _dirs or _roots.
529 self._parents = set(parents)
529 self._parents = set(parents)
530
530
531 def visitdir(self, dir):
531 def visitdir(self, dir):
532 if self._prefix and dir in self._roots:
532 if self._prefix and dir in self._roots:
533 return 'all'
533 return 'all'
534 return ('.' in self._roots or
534 return ('.' in self._roots or
535 dir in self._roots or
535 dir in self._roots or
536 dir in self._dirs or
536 dir in self._dirs or
537 dir in self._parents or
537 dir in self._parents or
538 any(parentdir in self._roots
538 any(parentdir in self._roots
539 for parentdir in util.finddirs(dir)))
539 for parentdir in util.finddirs(dir)))
540
540
541 @propertycache
541 @propertycache
542 def _allparentschildren(self):
542 def _allparentschildren(self):
543 # It may seem odd that we add dirs, roots, and parents, and then
543 # It may seem odd that we add dirs, roots, and parents, and then
544 # restrict to only parents. This is to catch the case of:
544 # restrict to only parents. This is to catch the case of:
545 # dirs = ['foo/bar']
545 # dirs = ['foo/bar']
546 # parents = ['foo']
546 # parents = ['foo']
547 # if we asked for the children of 'foo', but had only added
547 # if we asked for the children of 'foo', but had only added
548 # self._parents, we wouldn't be able to respond ['bar'].
548 # self._parents, we wouldn't be able to respond ['bar'].
549 return _dirchildren(
549 return _dirchildren(
550 itertools.chain(self._dirs, self._roots, self._parents),
550 itertools.chain(self._dirs, self._roots, self._parents),
551 onlyinclude=self._parents)
551 onlyinclude=self._parents)
552
552
553 def visitchildrenset(self, dir):
553 def visitchildrenset(self, dir):
554 if self._prefix and dir in self._roots:
554 if self._prefix and dir in self._roots:
555 return 'all'
555 return 'all'
556 # Note: this does *not* include the 'dir in self._parents' case from
556 # Note: this does *not* include the 'dir in self._parents' case from
557 # visitdir, that's handled below.
557 # visitdir, that's handled below.
558 if ('.' in self._roots or
558 if ('.' in self._roots or
559 dir in self._roots or
559 dir in self._roots or
560 dir in self._dirs or
560 dir in self._dirs or
561 any(parentdir in self._roots
561 any(parentdir in self._roots
562 for parentdir in util.finddirs(dir))):
562 for parentdir in util.finddirs(dir))):
563 return 'this'
563 return 'this'
564
564
565 if dir in self._parents:
565 if dir in self._parents:
566 return self._allparentschildren.get(dir) or set()
566 return self._allparentschildren.get(dir) or set()
567 return set()
567 return set()
568
568
569 @encoding.strmethod
569 @encoding.strmethod
570 def __repr__(self):
570 def __repr__(self):
571 return ('<includematcher includes=%r>' % pycompat.bytestr(self._pats))
571 return ('<includematcher includes=%r>' % pycompat.bytestr(self._pats))
572
572
573 class exactmatcher(basematcher):
573 class exactmatcher(basematcher):
574 '''Matches the input files exactly. They are interpreted as paths, not
574 '''Matches the input files exactly. They are interpreted as paths, not
575 patterns (so no kind-prefixes).
575 patterns (so no kind-prefixes).
576 '''
576 '''
577
577
578 def __init__(self, root, cwd, files, badfn=None):
578 def __init__(self, root, cwd, files, badfn=None):
579 super(exactmatcher, self).__init__(root, cwd, badfn)
579 super(exactmatcher, self).__init__(root, cwd, badfn)
580
580
581 if isinstance(files, list):
581 if isinstance(files, list):
582 self._files = files
582 self._files = files
583 else:
583 else:
584 self._files = list(files)
584 self._files = list(files)
585
585
586 matchfn = basematcher.exact
586 matchfn = basematcher.exact
587
587
588 @propertycache
588 @propertycache
589 def _dirs(self):
589 def _dirs(self):
590 return set(util.dirs(self._fileset)) | {'.'}
590 return set(util.dirs(self._fileset)) | {'.'}
591
591
592 def visitdir(self, dir):
592 def visitdir(self, dir):
593 return dir in self._dirs
593 return dir in self._dirs
594
594
595 def visitchildrenset(self, dir):
595 def visitchildrenset(self, dir):
596 if not self._fileset or dir not in self._dirs:
596 if not self._fileset or dir not in self._dirs:
597 return set()
597 return set()
598
598
599 candidates = self._fileset | self._dirs - {'.'}
599 candidates = self._fileset | self._dirs - {'.'}
600 if dir != '.':
600 if dir != '.':
601 d = dir + '/'
601 d = dir + '/'
602 candidates = set(c[len(d):] for c in candidates if
602 candidates = set(c[len(d):] for c in candidates if
603 c.startswith(d))
603 c.startswith(d))
604 # self._dirs includes all of the directories, recursively, so if
604 # self._dirs includes all of the directories, recursively, so if
605 # we're attempting to match foo/bar/baz.txt, it'll have '.', 'foo',
605 # we're attempting to match foo/bar/baz.txt, it'll have '.', 'foo',
606 # 'foo/bar' in it. Thus we can safely ignore a candidate that has a
606 # 'foo/bar' in it. Thus we can safely ignore a candidate that has a
607 # '/' in it, indicating a it's for a subdir-of-a-subdir; the
607 # '/' in it, indicating a it's for a subdir-of-a-subdir; the
608 # immediate subdir will be in there without a slash.
608 # immediate subdir will be in there without a slash.
609 ret = {c for c in candidates if '/' not in c}
609 ret = {c for c in candidates if '/' not in c}
610 # We really do not expect ret to be empty, since that would imply that
610 # We really do not expect ret to be empty, since that would imply that
611 # there's something in _dirs that didn't have a file in _fileset.
611 # there's something in _dirs that didn't have a file in _fileset.
612 assert ret
612 assert ret
613 return ret
613 return ret
614
614
615 def isexact(self):
615 def isexact(self):
616 return True
616 return True
617
617
618 @encoding.strmethod
618 @encoding.strmethod
619 def __repr__(self):
619 def __repr__(self):
620 return ('<exactmatcher files=%r>' % self._files)
620 return ('<exactmatcher files=%r>' % self._files)
621
621
622 class differencematcher(basematcher):
622 class differencematcher(basematcher):
623 '''Composes two matchers by matching if the first matches and the second
623 '''Composes two matchers by matching if the first matches and the second
624 does not.
624 does not.
625
625
626 The second matcher's non-matching-attributes (root, cwd, bad, explicitdir,
626 The second matcher's non-matching-attributes (root, cwd, bad, explicitdir,
627 traversedir) are ignored.
627 traversedir) are ignored.
628 '''
628 '''
629 def __init__(self, m1, m2):
629 def __init__(self, m1, m2):
630 super(differencematcher, self).__init__(m1._root, m1._cwd)
630 super(differencematcher, self).__init__(m1._root, m1._cwd)
631 self._m1 = m1
631 self._m1 = m1
632 self._m2 = m2
632 self._m2 = m2
633 self.bad = m1.bad
633 self.bad = m1.bad
634 self.explicitdir = m1.explicitdir
634 self.explicitdir = m1.explicitdir
635 self.traversedir = m1.traversedir
635 self.traversedir = m1.traversedir
636
636
637 def matchfn(self, f):
637 def matchfn(self, f):
638 return self._m1(f) and not self._m2(f)
638 return self._m1(f) and not self._m2(f)
639
639
640 @propertycache
640 @propertycache
641 def _files(self):
641 def _files(self):
642 if self.isexact():
642 if self.isexact():
643 return [f for f in self._m1.files() if self(f)]
643 return [f for f in self._m1.files() if self(f)]
644 # If m1 is not an exact matcher, we can't easily figure out the set of
644 # If m1 is not an exact matcher, we can't easily figure out the set of
645 # files, because its files() are not always files. For example, if
645 # files, because its files() are not always files. For example, if
646 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
646 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
647 # want to remove "dir" from the set even though it would match m2,
647 # want to remove "dir" from the set even though it would match m2,
648 # because the "dir" in m1 may not be a file.
648 # because the "dir" in m1 may not be a file.
649 return self._m1.files()
649 return self._m1.files()
650
650
651 def visitdir(self, dir):
651 def visitdir(self, dir):
652 if self._m2.visitdir(dir) == 'all':
652 if self._m2.visitdir(dir) == 'all':
653 return False
653 return False
654 elif not self._m2.visitdir(dir):
654 elif not self._m2.visitdir(dir):
655 # m2 does not match dir, we can return 'all' here if possible
655 # m2 does not match dir, we can return 'all' here if possible
656 return self._m1.visitdir(dir)
656 return self._m1.visitdir(dir)
657 return bool(self._m1.visitdir(dir))
657 return bool(self._m1.visitdir(dir))
658
658
659 def visitchildrenset(self, dir):
659 def visitchildrenset(self, dir):
660 m2_set = self._m2.visitchildrenset(dir)
660 m2_set = self._m2.visitchildrenset(dir)
661 if m2_set == 'all':
661 if m2_set == 'all':
662 return set()
662 return set()
663 m1_set = self._m1.visitchildrenset(dir)
663 m1_set = self._m1.visitchildrenset(dir)
664 # Possible values for m1: 'all', 'this', set(...), set()
664 # Possible values for m1: 'all', 'this', set(...), set()
665 # Possible values for m2: 'this', set(...), set()
665 # Possible values for m2: 'this', set(...), set()
666 # If m2 has nothing under here that we care about, return m1, even if
666 # If m2 has nothing under here that we care about, return m1, even if
667 # it's 'all'. This is a change in behavior from visitdir, which would
667 # it's 'all'. This is a change in behavior from visitdir, which would
668 # return True, not 'all', for some reason.
668 # return True, not 'all', for some reason.
669 if not m2_set:
669 if not m2_set:
670 return m1_set
670 return m1_set
671 if m1_set in ['all', 'this']:
671 if m1_set in ['all', 'this']:
672 # Never return 'all' here if m2_set is any kind of non-empty (either
672 # Never return 'all' here if m2_set is any kind of non-empty (either
673 # 'this' or set(foo)), since m2 might return set() for a
673 # 'this' or set(foo)), since m2 might return set() for a
674 # subdirectory.
674 # subdirectory.
675 return 'this'
675 return 'this'
676 # Possible values for m1: set(...), set()
676 # Possible values for m1: set(...), set()
677 # Possible values for m2: 'this', set(...)
677 # Possible values for m2: 'this', set(...)
678 # We ignore m2's set results. They're possibly incorrect:
678 # We ignore m2's set results. They're possibly incorrect:
679 # m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset('.'):
679 # m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset('.'):
680 # m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd
680 # m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd
681 # return set(), which is *not* correct, we still need to visit 'dir'!
681 # return set(), which is *not* correct, we still need to visit 'dir'!
682 return m1_set
682 return m1_set
683
683
684 def isexact(self):
684 def isexact(self):
685 return self._m1.isexact()
685 return self._m1.isexact()
686
686
687 @encoding.strmethod
687 @encoding.strmethod
688 def __repr__(self):
688 def __repr__(self):
689 return ('<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2))
689 return ('<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2))
690
690
691 def intersectmatchers(m1, m2):
691 def intersectmatchers(m1, m2):
692 '''Composes two matchers by matching if both of them match.
692 '''Composes two matchers by matching if both of them match.
693
693
694 The second matcher's non-matching-attributes (root, cwd, bad, explicitdir,
694 The second matcher's non-matching-attributes (root, cwd, bad, explicitdir,
695 traversedir) are ignored.
695 traversedir) are ignored.
696 '''
696 '''
697 if m1 is None or m2 is None:
697 if m1 is None or m2 is None:
698 return m1 or m2
698 return m1 or m2
699 if m1.always():
699 if m1.always():
700 m = copy.copy(m2)
700 m = copy.copy(m2)
701 # TODO: Consider encapsulating these things in a class so there's only
701 # TODO: Consider encapsulating these things in a class so there's only
702 # one thing to copy from m1.
702 # one thing to copy from m1.
703 m.bad = m1.bad
703 m.bad = m1.bad
704 m.explicitdir = m1.explicitdir
704 m.explicitdir = m1.explicitdir
705 m.traversedir = m1.traversedir
705 m.traversedir = m1.traversedir
706 return m
706 return m
707 if m2.always():
707 if m2.always():
708 m = copy.copy(m1)
708 m = copy.copy(m1)
709 return m
709 return m
710 return intersectionmatcher(m1, m2)
710 return intersectionmatcher(m1, m2)
711
711
712 class intersectionmatcher(basematcher):
712 class intersectionmatcher(basematcher):
713 def __init__(self, m1, m2):
713 def __init__(self, m1, m2):
714 super(intersectionmatcher, self).__init__(m1._root, m1._cwd)
714 super(intersectionmatcher, self).__init__(m1._root, m1._cwd)
715 self._m1 = m1
715 self._m1 = m1
716 self._m2 = m2
716 self._m2 = m2
717 self.bad = m1.bad
717 self.bad = m1.bad
718 self.explicitdir = m1.explicitdir
718 self.explicitdir = m1.explicitdir
719 self.traversedir = m1.traversedir
719 self.traversedir = m1.traversedir
720
720
721 @propertycache
721 @propertycache
722 def _files(self):
722 def _files(self):
723 if self.isexact():
723 if self.isexact():
724 m1, m2 = self._m1, self._m2
724 m1, m2 = self._m1, self._m2
725 if not m1.isexact():
725 if not m1.isexact():
726 m1, m2 = m2, m1
726 m1, m2 = m2, m1
727 return [f for f in m1.files() if m2(f)]
727 return [f for f in m1.files() if m2(f)]
728 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
728 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
729 # the set of files, because their files() are not always files. For
729 # the set of files, because their files() are not always files. For
730 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
730 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
731 # "path:dir2", we don't want to remove "dir2" from the set.
731 # "path:dir2", we don't want to remove "dir2" from the set.
732 return self._m1.files() + self._m2.files()
732 return self._m1.files() + self._m2.files()
733
733
734 def matchfn(self, f):
734 def matchfn(self, f):
735 return self._m1(f) and self._m2(f)
735 return self._m1(f) and self._m2(f)
736
736
737 def visitdir(self, dir):
737 def visitdir(self, dir):
738 visit1 = self._m1.visitdir(dir)
738 visit1 = self._m1.visitdir(dir)
739 if visit1 == 'all':
739 if visit1 == 'all':
740 return self._m2.visitdir(dir)
740 return self._m2.visitdir(dir)
741 # bool() because visit1=True + visit2='all' should not be 'all'
741 # bool() because visit1=True + visit2='all' should not be 'all'
742 return bool(visit1 and self._m2.visitdir(dir))
742 return bool(visit1 and self._m2.visitdir(dir))
743
743
744 def visitchildrenset(self, dir):
744 def visitchildrenset(self, dir):
745 m1_set = self._m1.visitchildrenset(dir)
745 m1_set = self._m1.visitchildrenset(dir)
746 if not m1_set:
746 if not m1_set:
747 return set()
747 return set()
748 m2_set = self._m2.visitchildrenset(dir)
748 m2_set = self._m2.visitchildrenset(dir)
749 if not m2_set:
749 if not m2_set:
750 return set()
750 return set()
751
751
752 if m1_set == 'all':
752 if m1_set == 'all':
753 return m2_set
753 return m2_set
754 elif m2_set == 'all':
754 elif m2_set == 'all':
755 return m1_set
755 return m1_set
756
756
757 if m1_set == 'this' or m2_set == 'this':
757 if m1_set == 'this' or m2_set == 'this':
758 return 'this'
758 return 'this'
759
759
760 assert isinstance(m1_set, set) and isinstance(m2_set, set)
760 assert isinstance(m1_set, set) and isinstance(m2_set, set)
761 return m1_set.intersection(m2_set)
761 return m1_set.intersection(m2_set)
762
762
763 def always(self):
763 def always(self):
764 return self._m1.always() and self._m2.always()
764 return self._m1.always() and self._m2.always()
765
765
766 def isexact(self):
766 def isexact(self):
767 return self._m1.isexact() or self._m2.isexact()
767 return self._m1.isexact() or self._m2.isexact()
768
768
769 @encoding.strmethod
769 @encoding.strmethod
770 def __repr__(self):
770 def __repr__(self):
771 return ('<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2))
771 return ('<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2))
772
772
773 class subdirmatcher(basematcher):
773 class subdirmatcher(basematcher):
774 """Adapt a matcher to work on a subdirectory only.
774 """Adapt a matcher to work on a subdirectory only.
775
775
776 The paths are remapped to remove/insert the path as needed:
776 The paths are remapped to remove/insert the path as needed:
777
777
778 >>> from . import pycompat
778 >>> from . import pycompat
779 >>> m1 = match(b'root', b'', [b'a.txt', b'sub/b.txt'])
779 >>> m1 = match(b'root', b'', [b'a.txt', b'sub/b.txt'])
780 >>> m2 = subdirmatcher(b'sub', m1)
780 >>> m2 = subdirmatcher(b'sub', m1)
781 >>> bool(m2(b'a.txt'))
781 >>> bool(m2(b'a.txt'))
782 False
782 False
783 >>> bool(m2(b'b.txt'))
783 >>> bool(m2(b'b.txt'))
784 True
784 True
785 >>> bool(m2.matchfn(b'a.txt'))
785 >>> bool(m2.matchfn(b'a.txt'))
786 False
786 False
787 >>> bool(m2.matchfn(b'b.txt'))
787 >>> bool(m2.matchfn(b'b.txt'))
788 True
788 True
789 >>> m2.files()
789 >>> m2.files()
790 ['b.txt']
790 ['b.txt']
791 >>> m2.exact(b'b.txt')
791 >>> m2.exact(b'b.txt')
792 True
792 True
793 >>> def bad(f, msg):
793 >>> def bad(f, msg):
794 ... print(pycompat.sysstr(b"%s: %s" % (f, msg)))
794 ... print(pycompat.sysstr(b"%s: %s" % (f, msg)))
795 >>> m1.bad = bad
795 >>> m1.bad = bad
796 >>> m2.bad(b'x.txt', b'No such file')
796 >>> m2.bad(b'x.txt', b'No such file')
797 sub/x.txt: No such file
797 sub/x.txt: No such file
798 """
798 """
799
799
800 def __init__(self, path, matcher):
800 def __init__(self, path, matcher):
801 super(subdirmatcher, self).__init__(matcher._root, matcher._cwd)
801 super(subdirmatcher, self).__init__(matcher._root, matcher._cwd)
802 self._path = path
802 self._path = path
803 self._matcher = matcher
803 self._matcher = matcher
804 self._always = matcher.always()
804 self._always = matcher.always()
805
805
806 self._files = [f[len(path) + 1:] for f in matcher._files
806 self._files = [f[len(path) + 1:] for f in matcher._files
807 if f.startswith(path + "/")]
807 if f.startswith(path + "/")]
808
808
809 # If the parent repo had a path to this subrepo and the matcher is
809 # If the parent repo had a path to this subrepo and the matcher is
810 # a prefix matcher, this submatcher always matches.
810 # a prefix matcher, this submatcher always matches.
811 if matcher.prefix():
811 if matcher.prefix():
812 self._always = any(f == path for f in matcher._files)
812 self._always = any(f == path for f in matcher._files)
813
813
814 def bad(self, f, msg):
814 def bad(self, f, msg):
815 self._matcher.bad(self._path + "/" + f, msg)
815 self._matcher.bad(self._path + "/" + f, msg)
816
816
817 def matchfn(self, f):
817 def matchfn(self, f):
818 # Some information is lost in the superclass's constructor, so we
818 # Some information is lost in the superclass's constructor, so we
819 # can not accurately create the matching function for the subdirectory
819 # can not accurately create the matching function for the subdirectory
820 # from the inputs. Instead, we override matchfn() and visitdir() to
820 # from the inputs. Instead, we override matchfn() and visitdir() to
821 # call the original matcher with the subdirectory path prepended.
821 # call the original matcher with the subdirectory path prepended.
822 return self._matcher.matchfn(self._path + "/" + f)
822 return self._matcher.matchfn(self._path + "/" + f)
823
823
824 def visitdir(self, dir):
824 def visitdir(self, dir):
825 if dir == '.':
825 if dir == '.':
826 dir = self._path
826 dir = self._path
827 else:
827 else:
828 dir = self._path + "/" + dir
828 dir = self._path + "/" + dir
829 return self._matcher.visitdir(dir)
829 return self._matcher.visitdir(dir)
830
830
831 def visitchildrenset(self, dir):
831 def visitchildrenset(self, dir):
832 if dir == '.':
832 if dir == '.':
833 dir = self._path
833 dir = self._path
834 else:
834 else:
835 dir = self._path + "/" + dir
835 dir = self._path + "/" + dir
836 return self._matcher.visitchildrenset(dir)
836 return self._matcher.visitchildrenset(dir)
837
837
838 def always(self):
838 def always(self):
839 return self._always
839 return self._always
840
840
841 def prefix(self):
841 def prefix(self):
842 return self._matcher.prefix() and not self._always
842 return self._matcher.prefix() and not self._always
843
843
844 @encoding.strmethod
844 @encoding.strmethod
845 def __repr__(self):
845 def __repr__(self):
846 return ('<subdirmatcher path=%r, matcher=%r>' %
846 return ('<subdirmatcher path=%r, matcher=%r>' %
847 (self._path, self._matcher))
847 (self._path, self._matcher))
848
848
849 class prefixdirmatcher(basematcher):
849 class prefixdirmatcher(basematcher):
850 """Adapt a matcher to work on a parent directory.
850 """Adapt a matcher to work on a parent directory.
851
851
852 The matcher's non-matching-attributes (root, cwd, bad, explicitdir,
852 The matcher's non-matching-attributes (root, cwd, bad, explicitdir,
853 traversedir) are ignored.
853 traversedir) are ignored.
854
854
855 The prefix path should usually be the relative path from the root of
855 The prefix path should usually be the relative path from the root of
856 this matcher to the root of the wrapped matcher.
856 this matcher to the root of the wrapped matcher.
857
857
858 >>> m1 = match(util.localpath(b'root/d/e'), b'f', [b'../a.txt', b'b.txt'])
858 >>> m1 = match(util.localpath(b'root/d/e'), b'f', [b'../a.txt', b'b.txt'])
859 >>> m2 = prefixdirmatcher(b'root', b'd/e/f', b'd/e', m1)
859 >>> m2 = prefixdirmatcher(b'root', b'd/e/f', b'd/e', m1)
860 >>> bool(m2(b'a.txt'),)
860 >>> bool(m2(b'a.txt'),)
861 False
861 False
862 >>> bool(m2(b'd/e/a.txt'))
862 >>> bool(m2(b'd/e/a.txt'))
863 True
863 True
864 >>> bool(m2(b'd/e/b.txt'))
864 >>> bool(m2(b'd/e/b.txt'))
865 False
865 False
866 >>> m2.files()
866 >>> m2.files()
867 ['d/e/a.txt', 'd/e/f/b.txt']
867 ['d/e/a.txt', 'd/e/f/b.txt']
868 >>> m2.exact(b'd/e/a.txt')
868 >>> m2.exact(b'd/e/a.txt')
869 True
869 True
870 >>> m2.visitdir(b'd')
870 >>> m2.visitdir(b'd')
871 True
871 True
872 >>> m2.visitdir(b'd/e')
872 >>> m2.visitdir(b'd/e')
873 True
873 True
874 >>> m2.visitdir(b'd/e/f')
874 >>> m2.visitdir(b'd/e/f')
875 True
875 True
876 >>> m2.visitdir(b'd/e/g')
876 >>> m2.visitdir(b'd/e/g')
877 False
877 False
878 >>> m2.visitdir(b'd/ef')
878 >>> m2.visitdir(b'd/ef')
879 False
879 False
880 """
880 """
881
881
882 def __init__(self, root, cwd, path, matcher, badfn=None):
882 def __init__(self, root, cwd, path, matcher, badfn=None):
883 super(prefixdirmatcher, self).__init__(root, cwd, badfn)
883 super(prefixdirmatcher, self).__init__(root, cwd, badfn)
884 if not path:
884 if not path:
885 raise error.ProgrammingError('prefix path must not be empty')
885 raise error.ProgrammingError('prefix path must not be empty')
886 self._path = path
886 self._path = path
887 self._pathprefix = path + '/'
887 self._pathprefix = path + '/'
888 self._matcher = matcher
888 self._matcher = matcher
889
889
890 @propertycache
890 @propertycache
891 def _files(self):
891 def _files(self):
892 return [self._pathprefix + f for f in self._matcher._files]
892 return [self._pathprefix + f for f in self._matcher._files]
893
893
894 def matchfn(self, f):
894 def matchfn(self, f):
895 if not f.startswith(self._pathprefix):
895 if not f.startswith(self._pathprefix):
896 return False
896 return False
897 return self._matcher.matchfn(f[len(self._pathprefix):])
897 return self._matcher.matchfn(f[len(self._pathprefix):])
898
898
899 @propertycache
899 @propertycache
900 def _pathdirs(self):
900 def _pathdirs(self):
901 return set(util.finddirs(self._path)) | {'.'}
901 return set(util.finddirs(self._path)) | {'.'}
902
902
903 def visitdir(self, dir):
903 def visitdir(self, dir):
904 if dir == self._path:
904 if dir == self._path:
905 return self._matcher.visitdir('.')
905 return self._matcher.visitdir('.')
906 if dir.startswith(self._pathprefix):
906 if dir.startswith(self._pathprefix):
907 return self._matcher.visitdir(dir[len(self._pathprefix):])
907 return self._matcher.visitdir(dir[len(self._pathprefix):])
908 return dir in self._pathdirs
908 return dir in self._pathdirs
909
909
910 def visitchildrenset(self, dir):
910 def visitchildrenset(self, dir):
911 if dir == self._path:
911 if dir == self._path:
912 return self._matcher.visitchildrenset('.')
912 return self._matcher.visitchildrenset('.')
913 if dir.startswith(self._pathprefix):
913 if dir.startswith(self._pathprefix):
914 return self._matcher.visitchildrenset(dir[len(self._pathprefix):])
914 return self._matcher.visitchildrenset(dir[len(self._pathprefix):])
915 if dir in self._pathdirs:
915 if dir in self._pathdirs:
916 return 'this'
916 return 'this'
917 return set()
917 return set()
918
918
919 def isexact(self):
919 def isexact(self):
920 return self._matcher.isexact()
920 return self._matcher.isexact()
921
921
922 def prefix(self):
922 def prefix(self):
923 return self._matcher.prefix()
923 return self._matcher.prefix()
924
924
925 @encoding.strmethod
925 @encoding.strmethod
926 def __repr__(self):
926 def __repr__(self):
927 return ('<prefixdirmatcher path=%r, matcher=%r>'
927 return ('<prefixdirmatcher path=%r, matcher=%r>'
928 % (pycompat.bytestr(self._path), self._matcher))
928 % (pycompat.bytestr(self._path), self._matcher))
929
929
930 class unionmatcher(basematcher):
930 class unionmatcher(basematcher):
931 """A matcher that is the union of several matchers.
931 """A matcher that is the union of several matchers.
932
932
933 The non-matching-attributes (root, cwd, bad, explicitdir, traversedir) are
933 The non-matching-attributes (root, cwd, bad, explicitdir, traversedir) are
934 taken from the first matcher.
934 taken from the first matcher.
935 """
935 """
936
936
937 def __init__(self, matchers):
937 def __init__(self, matchers):
938 m1 = matchers[0]
938 m1 = matchers[0]
939 super(unionmatcher, self).__init__(m1._root, m1._cwd)
939 super(unionmatcher, self).__init__(m1._root, m1._cwd)
940 self.explicitdir = m1.explicitdir
940 self.explicitdir = m1.explicitdir
941 self.traversedir = m1.traversedir
941 self.traversedir = m1.traversedir
942 self._matchers = matchers
942 self._matchers = matchers
943
943
944 def matchfn(self, f):
944 def matchfn(self, f):
945 for match in self._matchers:
945 for match in self._matchers:
946 if match(f):
946 if match(f):
947 return True
947 return True
948 return False
948 return False
949
949
950 def visitdir(self, dir):
950 def visitdir(self, dir):
951 r = False
951 r = False
952 for m in self._matchers:
952 for m in self._matchers:
953 v = m.visitdir(dir)
953 v = m.visitdir(dir)
954 if v == 'all':
954 if v == 'all':
955 return v
955 return v
956 r |= v
956 r |= v
957 return r
957 return r
958
958
959 def visitchildrenset(self, dir):
959 def visitchildrenset(self, dir):
960 r = set()
960 r = set()
961 this = False
961 this = False
962 for m in self._matchers:
962 for m in self._matchers:
963 v = m.visitchildrenset(dir)
963 v = m.visitchildrenset(dir)
964 if not v:
964 if not v:
965 continue
965 continue
966 if v == 'all':
966 if v == 'all':
967 return v
967 return v
968 if this or v == 'this':
968 if this or v == 'this':
969 this = True
969 this = True
970 # don't break, we might have an 'all' in here.
970 # don't break, we might have an 'all' in here.
971 continue
971 continue
972 assert isinstance(v, set)
972 assert isinstance(v, set)
973 r = r.union(v)
973 r = r.union(v)
974 if this:
974 if this:
975 return 'this'
975 return 'this'
976 return r
976 return r
977
977
978 @encoding.strmethod
978 @encoding.strmethod
979 def __repr__(self):
979 def __repr__(self):
980 return ('<unionmatcher matchers=%r>' % self._matchers)
980 return ('<unionmatcher matchers=%r>' % self._matchers)
981
981
982 def patkind(pattern, default=None):
982 def patkind(pattern, default=None):
983 '''If pattern is 'kind:pat' with a known kind, return kind.'''
983 '''If pattern is 'kind:pat' with a known kind, return kind.'''
984 return _patsplit(pattern, default)[0]
984 return _patsplit(pattern, default)[0]
985
985
986 def _patsplit(pattern, default):
986 def _patsplit(pattern, default):
987 """Split a string into the optional pattern kind prefix and the actual
987 """Split a string into the optional pattern kind prefix and the actual
988 pattern."""
988 pattern."""
989 if ':' in pattern:
989 if ':' in pattern:
990 kind, pat = pattern.split(':', 1)
990 kind, pat = pattern.split(':', 1)
991 if kind in allpatternkinds:
991 if kind in allpatternkinds:
992 return kind, pat
992 return kind, pat
993 return default, pattern
993 return default, pattern
994
994
995 def _globre(pat):
995 def _globre(pat):
996 r'''Convert an extended glob string to a regexp string.
996 r'''Convert an extended glob string to a regexp string.
997
997
998 >>> from . import pycompat
998 >>> from . import pycompat
999 >>> def bprint(s):
999 >>> def bprint(s):
1000 ... print(pycompat.sysstr(s))
1000 ... print(pycompat.sysstr(s))
1001 >>> bprint(_globre(br'?'))
1001 >>> bprint(_globre(br'?'))
1002 .
1002 .
1003 >>> bprint(_globre(br'*'))
1003 >>> bprint(_globre(br'*'))
1004 [^/]*
1004 [^/]*
1005 >>> bprint(_globre(br'**'))
1005 >>> bprint(_globre(br'**'))
1006 .*
1006 .*
1007 >>> bprint(_globre(br'**/a'))
1007 >>> bprint(_globre(br'**/a'))
1008 (?:.*/)?a
1008 (?:.*/)?a
1009 >>> bprint(_globre(br'a/**/b'))
1009 >>> bprint(_globre(br'a/**/b'))
1010 a/(?:.*/)?b
1010 a/(?:.*/)?b
1011 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
1011 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
1012 [a*?!^][\^b][^c]
1012 [a*?!^][\^b][^c]
1013 >>> bprint(_globre(br'{a,b}'))
1013 >>> bprint(_globre(br'{a,b}'))
1014 (?:a|b)
1014 (?:a|b)
1015 >>> bprint(_globre(br'.\*\?'))
1015 >>> bprint(_globre(br'.\*\?'))
1016 \.\*\?
1016 \.\*\?
1017 '''
1017 '''
1018 i, n = 0, len(pat)
1018 i, n = 0, len(pat)
1019 res = ''
1019 res = ''
1020 group = 0
1020 group = 0
1021 escape = util.stringutil.regexbytesescapemap.get
1021 escape = util.stringutil.regexbytesescapemap.get
1022 def peek():
1022 def peek():
1023 return i < n and pat[i:i + 1]
1023 return i < n and pat[i:i + 1]
1024 while i < n:
1024 while i < n:
1025 c = pat[i:i + 1]
1025 c = pat[i:i + 1]
1026 i += 1
1026 i += 1
1027 if c not in '*?[{},\\':
1027 if c not in '*?[{},\\':
1028 res += escape(c, c)
1028 res += escape(c, c)
1029 elif c == '*':
1029 elif c == '*':
1030 if peek() == '*':
1030 if peek() == '*':
1031 i += 1
1031 i += 1
1032 if peek() == '/':
1032 if peek() == '/':
1033 i += 1
1033 i += 1
1034 res += '(?:.*/)?'
1034 res += '(?:.*/)?'
1035 else:
1035 else:
1036 res += '.*'
1036 res += '.*'
1037 else:
1037 else:
1038 res += '[^/]*'
1038 res += '[^/]*'
1039 elif c == '?':
1039 elif c == '?':
1040 res += '.'
1040 res += '.'
1041 elif c == '[':
1041 elif c == '[':
1042 j = i
1042 j = i
1043 if j < n and pat[j:j + 1] in '!]':
1043 if j < n and pat[j:j + 1] in '!]':
1044 j += 1
1044 j += 1
1045 while j < n and pat[j:j + 1] != ']':
1045 while j < n and pat[j:j + 1] != ']':
1046 j += 1
1046 j += 1
1047 if j >= n:
1047 if j >= n:
1048 res += '\\['
1048 res += '\\['
1049 else:
1049 else:
1050 stuff = pat[i:j].replace('\\','\\\\')
1050 stuff = pat[i:j].replace('\\','\\\\')
1051 i = j + 1
1051 i = j + 1
1052 if stuff[0:1] == '!':
1052 if stuff[0:1] == '!':
1053 stuff = '^' + stuff[1:]
1053 stuff = '^' + stuff[1:]
1054 elif stuff[0:1] == '^':
1054 elif stuff[0:1] == '^':
1055 stuff = '\\' + stuff
1055 stuff = '\\' + stuff
1056 res = '%s[%s]' % (res, stuff)
1056 res = '%s[%s]' % (res, stuff)
1057 elif c == '{':
1057 elif c == '{':
1058 group += 1
1058 group += 1
1059 res += '(?:'
1059 res += '(?:'
1060 elif c == '}' and group:
1060 elif c == '}' and group:
1061 res += ')'
1061 res += ')'
1062 group -= 1
1062 group -= 1
1063 elif c == ',' and group:
1063 elif c == ',' and group:
1064 res += '|'
1064 res += '|'
1065 elif c == '\\':
1065 elif c == '\\':
1066 p = peek()
1066 p = peek()
1067 if p:
1067 if p:
1068 i += 1
1068 i += 1
1069 res += escape(p, p)
1069 res += escape(p, p)
1070 else:
1070 else:
1071 res += escape(c, c)
1071 res += escape(c, c)
1072 else:
1072 else:
1073 res += escape(c, c)
1073 res += escape(c, c)
1074 return res
1074 return res
1075
1075
1076 def _regex(kind, pat, globsuffix):
1076 def _regex(kind, pat, globsuffix):
1077 '''Convert a (normalized) pattern of any kind into a regular expression.
1077 '''Convert a (normalized) pattern of any kind into a regular expression.
1078 globsuffix is appended to the regexp of globs.'''
1078 globsuffix is appended to the regexp of globs.'''
1079 if not pat:
1079 if not pat:
1080 return ''
1080 return ''
1081 if kind == 're':
1081 if kind == 're':
1082 return pat
1082 return pat
1083 if kind in ('path', 'relpath'):
1083 if kind in ('path', 'relpath'):
1084 if pat == '.':
1084 if pat == '.':
1085 return ''
1085 return ''
1086 return util.stringutil.reescape(pat) + '(?:/|$)'
1086 return util.stringutil.reescape(pat) + '(?:/|$)'
1087 if kind == 'rootfilesin':
1087 if kind == 'rootfilesin':
1088 if pat == '.':
1088 if pat == '.':
1089 escaped = ''
1089 escaped = ''
1090 else:
1090 else:
1091 # Pattern is a directory name.
1091 # Pattern is a directory name.
1092 escaped = util.stringutil.reescape(pat) + '/'
1092 escaped = util.stringutil.reescape(pat) + '/'
1093 # Anything after the pattern must be a non-directory.
1093 # Anything after the pattern must be a non-directory.
1094 return escaped + '[^/]+$'
1094 return escaped + '[^/]+$'
1095 if kind == 'relglob':
1095 if kind == 'relglob':
1096 return '(?:|.*/)' + _globre(pat) + globsuffix
1096 return '(?:|.*/)' + _globre(pat) + globsuffix
1097 if kind == 'relre':
1097 if kind == 'relre':
1098 if pat.startswith('^'):
1098 if pat.startswith('^'):
1099 return pat
1099 return pat
1100 return '.*' + pat
1100 return '.*' + pat
1101 if kind in ('glob', 'rootglob'):
1101 if kind in ('glob', 'rootglob'):
1102 return _globre(pat) + globsuffix
1102 return _globre(pat) + globsuffix
1103 raise error.ProgrammingError('not a regex pattern: %s:%s' % (kind, pat))
1103 raise error.ProgrammingError('not a regex pattern: %s:%s' % (kind, pat))
1104
1104
1105 def _buildmatch(kindpats, globsuffix, root):
1105 def _buildmatch(kindpats, globsuffix, root):
1106 '''Return regexp string and a matcher function for kindpats.
1106 '''Return regexp string and a matcher function for kindpats.
1107 globsuffix is appended to the regexp of globs.'''
1107 globsuffix is appended to the regexp of globs.'''
1108 matchfuncs = []
1108 matchfuncs = []
1109
1109
1110 subincludes, kindpats = _expandsubinclude(kindpats, root)
1110 subincludes, kindpats = _expandsubinclude(kindpats, root)
1111 if subincludes:
1111 if subincludes:
1112 submatchers = {}
1112 submatchers = {}
1113 def matchsubinclude(f):
1113 def matchsubinclude(f):
1114 for prefix, matcherargs in subincludes:
1114 for prefix, matcherargs in subincludes:
1115 if f.startswith(prefix):
1115 if f.startswith(prefix):
1116 mf = submatchers.get(prefix)
1116 mf = submatchers.get(prefix)
1117 if mf is None:
1117 if mf is None:
1118 mf = match(*matcherargs)
1118 mf = match(*matcherargs)
1119 submatchers[prefix] = mf
1119 submatchers[prefix] = mf
1120
1120
1121 if mf(f[len(prefix):]):
1121 if mf(f[len(prefix):]):
1122 return True
1122 return True
1123 return False
1123 return False
1124 matchfuncs.append(matchsubinclude)
1124 matchfuncs.append(matchsubinclude)
1125
1125
1126 regex = ''
1126 regex = ''
1127 if kindpats:
1127 if kindpats:
1128 if all(k == 'rootfilesin' for k, p, s in kindpats):
1128 if all(k == 'rootfilesin' for k, p, s in kindpats):
1129 dirs = {p for k, p, s in kindpats}
1129 dirs = {p for k, p, s in kindpats}
1130 def mf(f):
1130 def mf(f):
1131 i = f.rfind('/')
1131 i = f.rfind('/')
1132 if i >= 0:
1132 if i >= 0:
1133 dir = f[:i]
1133 dir = f[:i]
1134 else:
1134 else:
1135 dir = '.'
1135 dir = '.'
1136 return dir in dirs
1136 return dir in dirs
1137 regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))
1137 regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))
1138 matchfuncs.append(mf)
1138 matchfuncs.append(mf)
1139 else:
1139 else:
1140 regex, mf = _buildregexmatch(kindpats, globsuffix)
1140 regex, mf = _buildregexmatch(kindpats, globsuffix)
1141 matchfuncs.append(mf)
1141 matchfuncs.append(mf)
1142
1142
1143 if len(matchfuncs) == 1:
1143 if len(matchfuncs) == 1:
1144 return regex, matchfuncs[0]
1144 return regex, matchfuncs[0]
1145 else:
1145 else:
1146 return regex, lambda f: any(mf(f) for mf in matchfuncs)
1146 return regex, lambda f: any(mf(f) for mf in matchfuncs)
1147
1147
1148 MAX_RE_SIZE = 20000
1148 MAX_RE_SIZE = 20000
1149
1149
1150 def _joinregexes(regexps):
1150 def _joinregexes(regexps):
1151 """gather multiple regular expressions into a single one"""
1151 """gather multiple regular expressions into a single one"""
1152 return '|'.join(regexps)
1152 return '|'.join(regexps)
1153
1153
1154 def _buildregexmatch(kindpats, globsuffix):
1154 def _buildregexmatch(kindpats, globsuffix):
1155 """Build a match function from a list of kinds and kindpats,
1155 """Build a match function from a list of kinds and kindpats,
1156 return regexp string and a matcher function.
1156 return regexp string and a matcher function.
1157
1157
1158 Test too large input
1158 Test too large input
1159 >>> _buildregexmatch([
1159 >>> _buildregexmatch([
1160 ... (b'relglob', b'?' * MAX_RE_SIZE, b'')
1160 ... (b'relglob', b'?' * MAX_RE_SIZE, b'')
1161 ... ], b'$')
1161 ... ], b'$')
1162 Traceback (most recent call last):
1162 Traceback (most recent call last):
1163 ...
1163 ...
1164 Abort: matcher pattern is too long (20009 bytes)
1164 Abort: matcher pattern is too long (20009 bytes)
1165 """
1165 """
1166 try:
1166 try:
1167 allgroups = []
1167 allgroups = []
1168 regexps = [_regex(k, p, globsuffix) for (k, p, s) in kindpats]
1168 regexps = [_regex(k, p, globsuffix) for (k, p, s) in kindpats]
1169 fullregexp = _joinregexes(regexps)
1169 fullregexp = _joinregexes(regexps)
1170
1170
1171 startidx = 0
1171 startidx = 0
1172 groupsize = 0
1172 groupsize = 0
1173 for idx, r in enumerate(regexps):
1173 for idx, r in enumerate(regexps):
1174 piecesize = len(r)
1174 piecesize = len(r)
1175 if piecesize > MAX_RE_SIZE:
1175 if piecesize > MAX_RE_SIZE:
1176 msg = _("matcher pattern is too long (%d bytes)") % piecesize
1176 msg = _("matcher pattern is too long (%d bytes)") % piecesize
1177 raise error.Abort(msg)
1177 raise error.Abort(msg)
1178 elif (groupsize + piecesize) > MAX_RE_SIZE:
1178 elif (groupsize + piecesize) > MAX_RE_SIZE:
1179 group = regexps[startidx:idx]
1179 group = regexps[startidx:idx]
1180 allgroups.append(_joinregexes(group))
1180 allgroups.append(_joinregexes(group))
1181 startidx = idx
1181 startidx = idx
1182 groupsize = 0
1182 groupsize = 0
1183 groupsize += piecesize + 1
1183 groupsize += piecesize + 1
1184
1184
1185 if startidx == 0:
1185 if startidx == 0:
1186 func = _rematcher(fullregexp)
1186 func = _rematcher(fullregexp)
1187 else:
1187 else:
1188 group = regexps[startidx:]
1188 group = regexps[startidx:]
1189 allgroups.append(_joinregexes(group))
1189 allgroups.append(_joinregexes(group))
1190 allmatchers = [_rematcher(g) for g in allgroups]
1190 allmatchers = [_rematcher(g) for g in allgroups]
1191 func = lambda s: any(m(s) for m in allmatchers)
1191 func = lambda s: any(m(s) for m in allmatchers)
1192 return fullregexp, func
1192 return fullregexp, func
1193 except re.error:
1193 except re.error:
1194 for k, p, s in kindpats:
1194 for k, p, s in kindpats:
1195 try:
1195 try:
1196 _rematcher(_regex(k, p, globsuffix))
1196 _rematcher(_regex(k, p, globsuffix))
1197 except re.error:
1197 except re.error:
1198 if s:
1198 if s:
1199 raise error.Abort(_("%s: invalid pattern (%s): %s") %
1199 raise error.Abort(_("%s: invalid pattern (%s): %s") %
1200 (s, k, p))
1200 (s, k, p))
1201 else:
1201 else:
1202 raise error.Abort(_("invalid pattern (%s): %s") % (k, p))
1202 raise error.Abort(_("invalid pattern (%s): %s") % (k, p))
1203 raise error.Abort(_("invalid pattern"))
1203 raise error.Abort(_("invalid pattern"))
1204
1204
1205 def _patternrootsanddirs(kindpats):
1205 def _patternrootsanddirs(kindpats):
1206 '''Returns roots and directories corresponding to each pattern.
1206 '''Returns roots and directories corresponding to each pattern.
1207
1207
1208 This calculates the roots and directories exactly matching the patterns and
1208 This calculates the roots and directories exactly matching the patterns and
1209 returns a tuple of (roots, dirs) for each. It does not return other
1209 returns a tuple of (roots, dirs) for each. It does not return other
1210 directories which may also need to be considered, like the parent
1210 directories which may also need to be considered, like the parent
1211 directories.
1211 directories.
1212 '''
1212 '''
1213 r = []
1213 r = []
1214 d = []
1214 d = []
1215 for kind, pat, source in kindpats:
1215 for kind, pat, source in kindpats:
1216 if kind in ('glob', 'rootglob'): # find the non-glob prefix
1216 if kind in ('glob', 'rootglob'): # find the non-glob prefix
1217 root = []
1217 root = []
1218 for p in pat.split('/'):
1218 for p in pat.split('/'):
1219 if '[' in p or '{' in p or '*' in p or '?' in p:
1219 if '[' in p or '{' in p or '*' in p or '?' in p:
1220 break
1220 break
1221 root.append(p)
1221 root.append(p)
1222 r.append('/'.join(root) or '.')
1222 r.append('/'.join(root) or '.')
1223 elif kind in ('relpath', 'path'):
1223 elif kind in ('relpath', 'path'):
1224 r.append(pat or '.')
1224 r.append(pat or '.')
1225 elif kind in ('rootfilesin',):
1225 elif kind in ('rootfilesin',):
1226 d.append(pat or '.')
1226 d.append(pat or '.')
1227 else: # relglob, re, relre
1227 else: # relglob, re, relre
1228 r.append('.')
1228 r.append('.')
1229 return r, d
1229 return r, d
1230
1230
1231 def _roots(kindpats):
1231 def _roots(kindpats):
1232 '''Returns root directories to match recursively from the given patterns.'''
1232 '''Returns root directories to match recursively from the given patterns.'''
1233 roots, dirs = _patternrootsanddirs(kindpats)
1233 roots, dirs = _patternrootsanddirs(kindpats)
1234 return roots
1234 return roots
1235
1235
1236 def _rootsdirsandparents(kindpats):
1236 def _rootsdirsandparents(kindpats):
1237 '''Returns roots and exact directories from patterns.
1237 '''Returns roots and exact directories from patterns.
1238
1238
1239 `roots` are directories to match recursively, `dirs` should
1239 `roots` are directories to match recursively, `dirs` should
1240 be matched non-recursively, and `parents` are the implicitly required
1240 be matched non-recursively, and `parents` are the implicitly required
1241 directories to walk to items in either roots or dirs.
1241 directories to walk to items in either roots or dirs.
1242
1242
1243 Returns a tuple of (roots, dirs, parents).
1243 Returns a tuple of (roots, dirs, parents).
1244
1244
1245 >>> _rootsdirsandparents(
1245 >>> _rootsdirsandparents(
1246 ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
1246 ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
1247 ... (b'glob', b'g*', b'')])
1247 ... (b'glob', b'g*', b'')])
1248 (['g/h', 'g/h', '.'], [], ['g', '.'])
1248 (['g/h', 'g/h', '.'], [], ['g', '.'])
1249 >>> _rootsdirsandparents(
1249 >>> _rootsdirsandparents(
1250 ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
1250 ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
1251 ([], ['g/h', '.'], ['g', '.'])
1251 ([], ['g/h', '.'], ['g', '.'])
1252 >>> _rootsdirsandparents(
1252 >>> _rootsdirsandparents(
1253 ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
1253 ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
1254 ... (b'path', b'', b'')])
1254 ... (b'path', b'', b'')])
1255 (['r', 'p/p', '.'], [], ['p', '.'])
1255 (['r', 'p/p', '.'], [], ['p', '.'])
1256 >>> _rootsdirsandparents(
1256 >>> _rootsdirsandparents(
1257 ... [(b'relglob', b'rg*', b''), (b're', b're/', b''),
1257 ... [(b'relglob', b'rg*', b''), (b're', b're/', b''),
1258 ... (b'relre', b'rr', b'')])
1258 ... (b'relre', b'rr', b'')])
1259 (['.', '.', '.'], [], ['.'])
1259 (['.', '.', '.'], [], ['.'])
1260 '''
1260 '''
1261 r, d = _patternrootsanddirs(kindpats)
1261 r, d = _patternrootsanddirs(kindpats)
1262
1262
1263 p = []
1263 p = []
1264 # Append the parents as non-recursive/exact directories, since they must be
1264 # Append the parents as non-recursive/exact directories, since they must be
1265 # scanned to get to either the roots or the other exact directories.
1265 # scanned to get to either the roots or the other exact directories.
1266 p.extend(util.dirs(d))
1266 p.extend(util.dirs(d))
1267 p.extend(util.dirs(r))
1267 p.extend(util.dirs(r))
1268 # util.dirs() does not include the root directory, so add it manually
1268 # util.dirs() does not include the root directory, so add it manually
1269 p.append('.')
1269 p.append('.')
1270
1270
1271 # FIXME: all uses of this function convert these to sets, do so before
1271 # FIXME: all uses of this function convert these to sets, do so before
1272 # returning.
1272 # returning.
1273 # FIXME: all uses of this function do not need anything in 'roots' and
1273 # FIXME: all uses of this function do not need anything in 'roots' and
1274 # 'dirs' to also be in 'parents', consider removing them before returning.
1274 # 'dirs' to also be in 'parents', consider removing them before returning.
1275 return r, d, p
1275 return r, d, p
1276
1276
1277 def _explicitfiles(kindpats):
1277 def _explicitfiles(kindpats):
1278 '''Returns the potential explicit filenames from the patterns.
1278 '''Returns the potential explicit filenames from the patterns.
1279
1279
1280 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
1280 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
1281 ['foo/bar']
1281 ['foo/bar']
1282 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
1282 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
1283 []
1283 []
1284 '''
1284 '''
1285 # Keep only the pattern kinds where one can specify filenames (vs only
1285 # Keep only the pattern kinds where one can specify filenames (vs only
1286 # directory names).
1286 # directory names).
1287 filable = [kp for kp in kindpats if kp[0] not in ('rootfilesin',)]
1287 filable = [kp for kp in kindpats if kp[0] not in ('rootfilesin',)]
1288 return _roots(filable)
1288 return _roots(filable)
1289
1289
1290 def _prefix(kindpats):
1290 def _prefix(kindpats):
1291 '''Whether all the patterns match a prefix (i.e. recursively)'''
1291 '''Whether all the patterns match a prefix (i.e. recursively)'''
1292 for kind, pat, source in kindpats:
1292 for kind, pat, source in kindpats:
1293 if kind not in ('path', 'relpath'):
1293 if kind not in ('path', 'relpath'):
1294 return False
1294 return False
1295 return True
1295 return True
1296
1296
1297 _commentre = None
1297 _commentre = None
1298
1298
1299 def readpatternfile(filepath, warn, sourceinfo=False):
1299 def readpatternfile(filepath, warn, sourceinfo=False):
1300 '''parse a pattern file, returning a list of
1300 '''parse a pattern file, returning a list of
1301 patterns. These patterns should be given to compile()
1301 patterns. These patterns should be given to compile()
1302 to be validated and converted into a match function.
1302 to be validated and converted into a match function.
1303
1303
1304 trailing white space is dropped.
1304 trailing white space is dropped.
1305 the escape character is backslash.
1305 the escape character is backslash.
1306 comments start with #.
1306 comments start with #.
1307 empty lines are skipped.
1307 empty lines are skipped.
1308
1308
1309 lines can be of the following formats:
1309 lines can be of the following formats:
1310
1310
1311 syntax: regexp # defaults following lines to non-rooted regexps
1311 syntax: regexp # defaults following lines to non-rooted regexps
1312 syntax: glob # defaults following lines to non-rooted globs
1312 syntax: glob # defaults following lines to non-rooted globs
1313 re:pattern # non-rooted regular expression
1313 re:pattern # non-rooted regular expression
1314 glob:pattern # non-rooted glob
1314 glob:pattern # non-rooted glob
1315 rootglob:pat # rooted glob (same root as ^ in regexps)
1315 rootglob:pat # rooted glob (same root as ^ in regexps)
1316 pattern # pattern of the current default type
1316 pattern # pattern of the current default type
1317
1317
1318 if sourceinfo is set, returns a list of tuples:
1318 if sourceinfo is set, returns a list of tuples:
1319 (pattern, lineno, originalline). This is useful to debug ignore patterns.
1319 (pattern, lineno, originalline). This is useful to debug ignore patterns.
1320 '''
1320 '''
1321
1321
1322 syntaxes = {
1322 syntaxes = {
1323 're': 'relre:',
1323 're': 'relre:',
1324 'regexp': 'relre:',
1324 'regexp': 'relre:',
1325 'glob': 'relglob:',
1325 'glob': 'relglob:',
1326 'rootglob': 'rootglob:',
1326 'rootglob': 'rootglob:',
1327 'include': 'include',
1327 'include': 'include',
1328 'subinclude': 'subinclude',
1328 'subinclude': 'subinclude',
1329 }
1329 }
1330 syntax = 'relre:'
1330 syntax = 'relre:'
1331 patterns = []
1331 patterns = []
1332
1332
1333 fp = open(filepath, 'rb')
1333 fp = open(filepath, 'rb')
1334 for lineno, line in enumerate(util.iterfile(fp), start=1):
1334 for lineno, line in enumerate(util.iterfile(fp), start=1):
1335 if "#" in line:
1335 if "#" in line:
1336 global _commentre
1336 global _commentre
1337 if not _commentre:
1337 if not _commentre:
1338 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
1338 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
1339 # remove comments prefixed by an even number of escapes
1339 # remove comments prefixed by an even number of escapes
1340 m = _commentre.search(line)
1340 m = _commentre.search(line)
1341 if m:
1341 if m:
1342 line = line[:m.end(1)]
1342 line = line[:m.end(1)]
1343 # fixup properly escaped comments that survived the above
1343 # fixup properly escaped comments that survived the above
1344 line = line.replace("\\#", "#")
1344 line = line.replace("\\#", "#")
1345 line = line.rstrip()
1345 line = line.rstrip()
1346 if not line:
1346 if not line:
1347 continue
1347 continue
1348
1348
1349 if line.startswith('syntax:'):
1349 if line.startswith('syntax:'):
1350 s = line[7:].strip()
1350 s = line[7:].strip()
1351 try:
1351 try:
1352 syntax = syntaxes[s]
1352 syntax = syntaxes[s]
1353 except KeyError:
1353 except KeyError:
1354 if warn:
1354 if warn:
1355 warn(_("%s: ignoring invalid syntax '%s'\n") %
1355 warn(_("%s: ignoring invalid syntax '%s'\n") %
1356 (filepath, s))
1356 (filepath, s))
1357 continue
1357 continue
1358
1358
1359 linesyntax = syntax
1359 linesyntax = syntax
1360 for s, rels in syntaxes.iteritems():
1360 for s, rels in syntaxes.iteritems():
1361 if line.startswith(rels):
1361 if line.startswith(rels):
1362 linesyntax = rels
1362 linesyntax = rels
1363 line = line[len(rels):]
1363 line = line[len(rels):]
1364 break
1364 break
1365 elif line.startswith(s+':'):
1365 elif line.startswith(s+':'):
1366 linesyntax = rels
1366 linesyntax = rels
1367 line = line[len(s) + 1:]
1367 line = line[len(s) + 1:]
1368 break
1368 break
1369 if sourceinfo:
1369 if sourceinfo:
1370 patterns.append((linesyntax + line, lineno, line))
1370 patterns.append((linesyntax + line, lineno, line))
1371 else:
1371 else:
1372 patterns.append(linesyntax + line)
1372 patterns.append(linesyntax + line)
1373 fp.close()
1373 fp.close()
1374 return patterns
1374 return patterns
General Comments 0
You need to be logged in to leave comments. Login now