##// END OF EJS Templates
match: complete documentation of match() parameters
Denis Laxalde -
r42252:bee16475 default
parent child Browse files
Show More
@@ -1,1429 +1,1432
1 # match.py - filename matching
1 # match.py - filename matching
2 #
2 #
3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import, print_function
8 from __future__ import absolute_import, print_function
9
9
10 import copy
10 import copy
11 import itertools
11 import itertools
12 import os
12 import os
13 import re
13 import re
14
14
15 from .i18n import _
15 from .i18n import _
16 from . import (
16 from . import (
17 encoding,
17 encoding,
18 error,
18 error,
19 pathutil,
19 pathutil,
20 pycompat,
20 pycompat,
21 util,
21 util,
22 )
22 )
23 from .utils import (
23 from .utils import (
24 stringutil,
24 stringutil,
25 )
25 )
26
26
27 allpatternkinds = ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
27 allpatternkinds = ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
28 'rootglob',
28 'rootglob',
29 'listfile', 'listfile0', 'set', 'include', 'subinclude',
29 'listfile', 'listfile0', 'set', 'include', 'subinclude',
30 'rootfilesin')
30 'rootfilesin')
31 cwdrelativepatternkinds = ('relpath', 'glob')
31 cwdrelativepatternkinds = ('relpath', 'glob')
32
32
33 propertycache = util.propertycache
33 propertycache = util.propertycache
34
34
35 def _rematcher(regex):
35 def _rematcher(regex):
36 '''compile the regexp with the best available regexp engine and return a
36 '''compile the regexp with the best available regexp engine and return a
37 matcher function'''
37 matcher function'''
38 m = util.re.compile(regex)
38 m = util.re.compile(regex)
39 try:
39 try:
40 # slightly faster, provided by facebook's re2 bindings
40 # slightly faster, provided by facebook's re2 bindings
41 return m.test_match
41 return m.test_match
42 except AttributeError:
42 except AttributeError:
43 return m.match
43 return m.match
44
44
45 def _expandsets(kindpats, ctx, listsubrepos, badfn):
45 def _expandsets(kindpats, ctx, listsubrepos, badfn):
46 '''Returns the kindpats list with the 'set' patterns expanded to matchers'''
46 '''Returns the kindpats list with the 'set' patterns expanded to matchers'''
47 matchers = []
47 matchers = []
48 other = []
48 other = []
49
49
50 for kind, pat, source in kindpats:
50 for kind, pat, source in kindpats:
51 if kind == 'set':
51 if kind == 'set':
52 if ctx is None:
52 if ctx is None:
53 raise error.ProgrammingError("fileset expression with no "
53 raise error.ProgrammingError("fileset expression with no "
54 "context")
54 "context")
55 matchers.append(ctx.matchfileset(pat, badfn=badfn))
55 matchers.append(ctx.matchfileset(pat, badfn=badfn))
56
56
57 if listsubrepos:
57 if listsubrepos:
58 for subpath in ctx.substate:
58 for subpath in ctx.substate:
59 sm = ctx.sub(subpath).matchfileset(pat, badfn=badfn)
59 sm = ctx.sub(subpath).matchfileset(pat, badfn=badfn)
60 pm = prefixdirmatcher(subpath, sm, badfn=badfn)
60 pm = prefixdirmatcher(subpath, sm, badfn=badfn)
61 matchers.append(pm)
61 matchers.append(pm)
62
62
63 continue
63 continue
64 other.append((kind, pat, source))
64 other.append((kind, pat, source))
65 return matchers, other
65 return matchers, other
66
66
67 def _expandsubinclude(kindpats, root):
67 def _expandsubinclude(kindpats, root):
68 '''Returns the list of subinclude matcher args and the kindpats without the
68 '''Returns the list of subinclude matcher args and the kindpats without the
69 subincludes in it.'''
69 subincludes in it.'''
70 relmatchers = []
70 relmatchers = []
71 other = []
71 other = []
72
72
73 for kind, pat, source in kindpats:
73 for kind, pat, source in kindpats:
74 if kind == 'subinclude':
74 if kind == 'subinclude':
75 sourceroot = pathutil.dirname(util.normpath(source))
75 sourceroot = pathutil.dirname(util.normpath(source))
76 pat = util.pconvert(pat)
76 pat = util.pconvert(pat)
77 path = pathutil.join(sourceroot, pat)
77 path = pathutil.join(sourceroot, pat)
78
78
79 newroot = pathutil.dirname(path)
79 newroot = pathutil.dirname(path)
80 matcherargs = (newroot, '', [], ['include:%s' % path])
80 matcherargs = (newroot, '', [], ['include:%s' % path])
81
81
82 prefix = pathutil.canonpath(root, root, newroot)
82 prefix = pathutil.canonpath(root, root, newroot)
83 if prefix:
83 if prefix:
84 prefix += '/'
84 prefix += '/'
85 relmatchers.append((prefix, matcherargs))
85 relmatchers.append((prefix, matcherargs))
86 else:
86 else:
87 other.append((kind, pat, source))
87 other.append((kind, pat, source))
88
88
89 return relmatchers, other
89 return relmatchers, other
90
90
91 def _kindpatsalwaysmatch(kindpats):
91 def _kindpatsalwaysmatch(kindpats):
92 """"Checks whether the kindspats match everything, as e.g.
92 """"Checks whether the kindspats match everything, as e.g.
93 'relpath:.' does.
93 'relpath:.' does.
94 """
94 """
95 for kind, pat, source in kindpats:
95 for kind, pat, source in kindpats:
96 if pat != '' or kind not in ['relpath', 'glob']:
96 if pat != '' or kind not in ['relpath', 'glob']:
97 return False
97 return False
98 return True
98 return True
99
99
100 def _buildkindpatsmatcher(matchercls, root, kindpats, ctx=None,
100 def _buildkindpatsmatcher(matchercls, root, kindpats, ctx=None,
101 listsubrepos=False, badfn=None):
101 listsubrepos=False, badfn=None):
102 matchers = []
102 matchers = []
103 fms, kindpats = _expandsets(kindpats, ctx=ctx,
103 fms, kindpats = _expandsets(kindpats, ctx=ctx,
104 listsubrepos=listsubrepos, badfn=badfn)
104 listsubrepos=listsubrepos, badfn=badfn)
105 if kindpats:
105 if kindpats:
106 m = matchercls(root, kindpats, badfn=badfn)
106 m = matchercls(root, kindpats, badfn=badfn)
107 matchers.append(m)
107 matchers.append(m)
108 if fms:
108 if fms:
109 matchers.extend(fms)
109 matchers.extend(fms)
110 if not matchers:
110 if not matchers:
111 return nevermatcher(badfn=badfn)
111 return nevermatcher(badfn=badfn)
112 if len(matchers) == 1:
112 if len(matchers) == 1:
113 return matchers[0]
113 return matchers[0]
114 return unionmatcher(matchers)
114 return unionmatcher(matchers)
115
115
116 def match(root, cwd, patterns=None, include=None, exclude=None, default='glob',
116 def match(root, cwd, patterns=None, include=None, exclude=None, default='glob',
117 auditor=None, ctx=None, listsubrepos=False, warn=None,
117 auditor=None, ctx=None, listsubrepos=False, warn=None,
118 badfn=None, icasefs=False):
118 badfn=None, icasefs=False):
119 """build an object to match a set of file patterns
119 """build an object to match a set of file patterns
120
120
121 arguments:
121 arguments:
122 root - the canonical root of the tree you're matching against
122 root - the canonical root of the tree you're matching against
123 cwd - the current working directory, if relevant
123 cwd - the current working directory, if relevant
124 patterns - patterns to find
124 patterns - patterns to find
125 include - patterns to include (unless they are excluded)
125 include - patterns to include (unless they are excluded)
126 exclude - patterns to exclude (even if they are included)
126 exclude - patterns to exclude (even if they are included)
127 default - if a pattern in patterns has no explicit type, assume this one
127 default - if a pattern in patterns has no explicit type, assume this one
128 auditor - optional path auditor
129 ctx - optional changecontext
130 listsubrepos - if True, recurse into subrepositories
128 warn - optional function used for printing warnings
131 warn - optional function used for printing warnings
129 badfn - optional bad() callback for this matcher instead of the default
132 badfn - optional bad() callback for this matcher instead of the default
130 icasefs - make a matcher for wdir on case insensitive filesystems, which
133 icasefs - make a matcher for wdir on case insensitive filesystems, which
131 normalizes the given patterns to the case in the filesystem
134 normalizes the given patterns to the case in the filesystem
132
135
133 a pattern is one of:
136 a pattern is one of:
134 'glob:<glob>' - a glob relative to cwd
137 'glob:<glob>' - a glob relative to cwd
135 're:<regexp>' - a regular expression
138 're:<regexp>' - a regular expression
136 'path:<path>' - a path relative to repository root, which is matched
139 'path:<path>' - a path relative to repository root, which is matched
137 recursively
140 recursively
138 'rootfilesin:<path>' - a path relative to repository root, which is
141 'rootfilesin:<path>' - a path relative to repository root, which is
139 matched non-recursively (will not match subdirectories)
142 matched non-recursively (will not match subdirectories)
140 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
143 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
141 'relpath:<path>' - a path relative to cwd
144 'relpath:<path>' - a path relative to cwd
142 'relre:<regexp>' - a regexp that needn't match the start of a name
145 'relre:<regexp>' - a regexp that needn't match the start of a name
143 'set:<fileset>' - a fileset expression
146 'set:<fileset>' - a fileset expression
144 'include:<path>' - a file of patterns to read and include
147 'include:<path>' - a file of patterns to read and include
145 'subinclude:<path>' - a file of patterns to match against files under
148 'subinclude:<path>' - a file of patterns to match against files under
146 the same directory
149 the same directory
147 '<something>' - a pattern of the specified default type
150 '<something>' - a pattern of the specified default type
148 """
151 """
149 normalize = _donormalize
152 normalize = _donormalize
150 if icasefs:
153 if icasefs:
151 dirstate = ctx.repo().dirstate
154 dirstate = ctx.repo().dirstate
152 dsnormalize = dirstate.normalize
155 dsnormalize = dirstate.normalize
153
156
154 def normalize(patterns, default, root, cwd, auditor, warn):
157 def normalize(patterns, default, root, cwd, auditor, warn):
155 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
158 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
156 kindpats = []
159 kindpats = []
157 for kind, pats, source in kp:
160 for kind, pats, source in kp:
158 if kind not in ('re', 'relre'): # regex can't be normalized
161 if kind not in ('re', 'relre'): # regex can't be normalized
159 p = pats
162 p = pats
160 pats = dsnormalize(pats)
163 pats = dsnormalize(pats)
161
164
162 # Preserve the original to handle a case only rename.
165 # Preserve the original to handle a case only rename.
163 if p != pats and p in dirstate:
166 if p != pats and p in dirstate:
164 kindpats.append((kind, p, source))
167 kindpats.append((kind, p, source))
165
168
166 kindpats.append((kind, pats, source))
169 kindpats.append((kind, pats, source))
167 return kindpats
170 return kindpats
168
171
169 if patterns:
172 if patterns:
170 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
173 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
171 if _kindpatsalwaysmatch(kindpats):
174 if _kindpatsalwaysmatch(kindpats):
172 m = alwaysmatcher(badfn)
175 m = alwaysmatcher(badfn)
173 else:
176 else:
174 m = _buildkindpatsmatcher(patternmatcher, root, kindpats, ctx=ctx,
177 m = _buildkindpatsmatcher(patternmatcher, root, kindpats, ctx=ctx,
175 listsubrepos=listsubrepos, badfn=badfn)
178 listsubrepos=listsubrepos, badfn=badfn)
176 else:
179 else:
177 # It's a little strange that no patterns means to match everything.
180 # It's a little strange that no patterns means to match everything.
178 # Consider changing this to match nothing (probably using nevermatcher).
181 # Consider changing this to match nothing (probably using nevermatcher).
179 m = alwaysmatcher(badfn)
182 m = alwaysmatcher(badfn)
180
183
181 if include:
184 if include:
182 kindpats = normalize(include, 'glob', root, cwd, auditor, warn)
185 kindpats = normalize(include, 'glob', root, cwd, auditor, warn)
183 im = _buildkindpatsmatcher(includematcher, root, kindpats, ctx=ctx,
186 im = _buildkindpatsmatcher(includematcher, root, kindpats, ctx=ctx,
184 listsubrepos=listsubrepos, badfn=None)
187 listsubrepos=listsubrepos, badfn=None)
185 m = intersectmatchers(m, im)
188 m = intersectmatchers(m, im)
186 if exclude:
189 if exclude:
187 kindpats = normalize(exclude, 'glob', root, cwd, auditor, warn)
190 kindpats = normalize(exclude, 'glob', root, cwd, auditor, warn)
188 em = _buildkindpatsmatcher(includematcher, root, kindpats, ctx=ctx,
191 em = _buildkindpatsmatcher(includematcher, root, kindpats, ctx=ctx,
189 listsubrepos=listsubrepos, badfn=None)
192 listsubrepos=listsubrepos, badfn=None)
190 m = differencematcher(m, em)
193 m = differencematcher(m, em)
191 return m
194 return m
192
195
193 def exact(files, badfn=None):
196 def exact(files, badfn=None):
194 return exactmatcher(files, badfn=badfn)
197 return exactmatcher(files, badfn=badfn)
195
198
196 def always(badfn=None):
199 def always(badfn=None):
197 return alwaysmatcher(badfn)
200 return alwaysmatcher(badfn)
198
201
199 def never(badfn=None):
202 def never(badfn=None):
200 return nevermatcher(badfn)
203 return nevermatcher(badfn)
201
204
202 def badmatch(match, badfn):
205 def badmatch(match, badfn):
203 """Make a copy of the given matcher, replacing its bad method with the given
206 """Make a copy of the given matcher, replacing its bad method with the given
204 one.
207 one.
205 """
208 """
206 m = copy.copy(match)
209 m = copy.copy(match)
207 m.bad = badfn
210 m.bad = badfn
208 return m
211 return m
209
212
210 def _donormalize(patterns, default, root, cwd, auditor, warn):
213 def _donormalize(patterns, default, root, cwd, auditor, warn):
211 '''Convert 'kind:pat' from the patterns list to tuples with kind and
214 '''Convert 'kind:pat' from the patterns list to tuples with kind and
212 normalized and rooted patterns and with listfiles expanded.'''
215 normalized and rooted patterns and with listfiles expanded.'''
213 kindpats = []
216 kindpats = []
214 for kind, pat in [_patsplit(p, default) for p in patterns]:
217 for kind, pat in [_patsplit(p, default) for p in patterns]:
215 if kind in cwdrelativepatternkinds:
218 if kind in cwdrelativepatternkinds:
216 pat = pathutil.canonpath(root, cwd, pat, auditor)
219 pat = pathutil.canonpath(root, cwd, pat, auditor)
217 elif kind in ('relglob', 'path', 'rootfilesin', 'rootglob'):
220 elif kind in ('relglob', 'path', 'rootfilesin', 'rootglob'):
218 pat = util.normpath(pat)
221 pat = util.normpath(pat)
219 elif kind in ('listfile', 'listfile0'):
222 elif kind in ('listfile', 'listfile0'):
220 try:
223 try:
221 files = util.readfile(pat)
224 files = util.readfile(pat)
222 if kind == 'listfile0':
225 if kind == 'listfile0':
223 files = files.split('\0')
226 files = files.split('\0')
224 else:
227 else:
225 files = files.splitlines()
228 files = files.splitlines()
226 files = [f for f in files if f]
229 files = [f for f in files if f]
227 except EnvironmentError:
230 except EnvironmentError:
228 raise error.Abort(_("unable to read file list (%s)") % pat)
231 raise error.Abort(_("unable to read file list (%s)") % pat)
229 for k, p, source in _donormalize(files, default, root, cwd,
232 for k, p, source in _donormalize(files, default, root, cwd,
230 auditor, warn):
233 auditor, warn):
231 kindpats.append((k, p, pat))
234 kindpats.append((k, p, pat))
232 continue
235 continue
233 elif kind == 'include':
236 elif kind == 'include':
234 try:
237 try:
235 fullpath = os.path.join(root, util.localpath(pat))
238 fullpath = os.path.join(root, util.localpath(pat))
236 includepats = readpatternfile(fullpath, warn)
239 includepats = readpatternfile(fullpath, warn)
237 for k, p, source in _donormalize(includepats, default,
240 for k, p, source in _donormalize(includepats, default,
238 root, cwd, auditor, warn):
241 root, cwd, auditor, warn):
239 kindpats.append((k, p, source or pat))
242 kindpats.append((k, p, source or pat))
240 except error.Abort as inst:
243 except error.Abort as inst:
241 raise error.Abort('%s: %s' % (pat, inst[0]))
244 raise error.Abort('%s: %s' % (pat, inst[0]))
242 except IOError as inst:
245 except IOError as inst:
243 if warn:
246 if warn:
244 warn(_("skipping unreadable pattern file '%s': %s\n") %
247 warn(_("skipping unreadable pattern file '%s': %s\n") %
245 (pat, stringutil.forcebytestr(inst.strerror)))
248 (pat, stringutil.forcebytestr(inst.strerror)))
246 continue
249 continue
247 # else: re or relre - which cannot be normalized
250 # else: re or relre - which cannot be normalized
248 kindpats.append((kind, pat, ''))
251 kindpats.append((kind, pat, ''))
249 return kindpats
252 return kindpats
250
253
251 class basematcher(object):
254 class basematcher(object):
252
255
253 def __init__(self, badfn=None):
256 def __init__(self, badfn=None):
254 if badfn is not None:
257 if badfn is not None:
255 self.bad = badfn
258 self.bad = badfn
256
259
257 def __call__(self, fn):
260 def __call__(self, fn):
258 return self.matchfn(fn)
261 return self.matchfn(fn)
259 def __iter__(self):
262 def __iter__(self):
260 for f in self._files:
263 for f in self._files:
261 yield f
264 yield f
262 # Callbacks related to how the matcher is used by dirstate.walk.
265 # Callbacks related to how the matcher is used by dirstate.walk.
263 # Subscribers to these events must monkeypatch the matcher object.
266 # Subscribers to these events must monkeypatch the matcher object.
264 def bad(self, f, msg):
267 def bad(self, f, msg):
265 '''Callback from dirstate.walk for each explicit file that can't be
268 '''Callback from dirstate.walk for each explicit file that can't be
266 found/accessed, with an error message.'''
269 found/accessed, with an error message.'''
267
270
268 # If an explicitdir is set, it will be called when an explicitly listed
271 # If an explicitdir is set, it will be called when an explicitly listed
269 # directory is visited.
272 # directory is visited.
270 explicitdir = None
273 explicitdir = None
271
274
272 # If an traversedir is set, it will be called when a directory discovered
275 # If an traversedir is set, it will be called when a directory discovered
273 # by recursive traversal is visited.
276 # by recursive traversal is visited.
274 traversedir = None
277 traversedir = None
275
278
276 @propertycache
279 @propertycache
277 def _files(self):
280 def _files(self):
278 return []
281 return []
279
282
280 def files(self):
283 def files(self):
281 '''Explicitly listed files or patterns or roots:
284 '''Explicitly listed files or patterns or roots:
282 if no patterns or .always(): empty list,
285 if no patterns or .always(): empty list,
283 if exact: list exact files,
286 if exact: list exact files,
284 if not .anypats(): list all files and dirs,
287 if not .anypats(): list all files and dirs,
285 else: optimal roots'''
288 else: optimal roots'''
286 return self._files
289 return self._files
287
290
288 @propertycache
291 @propertycache
289 def _fileset(self):
292 def _fileset(self):
290 return set(self._files)
293 return set(self._files)
291
294
292 def exact(self, f):
295 def exact(self, f):
293 '''Returns True if f is in .files().'''
296 '''Returns True if f is in .files().'''
294 return f in self._fileset
297 return f in self._fileset
295
298
296 def matchfn(self, f):
299 def matchfn(self, f):
297 return False
300 return False
298
301
299 def visitdir(self, dir):
302 def visitdir(self, dir):
300 '''Decides whether a directory should be visited based on whether it
303 '''Decides whether a directory should be visited based on whether it
301 has potential matches in it or one of its subdirectories. This is
304 has potential matches in it or one of its subdirectories. This is
302 based on the match's primary, included, and excluded patterns.
305 based on the match's primary, included, and excluded patterns.
303
306
304 Returns the string 'all' if the given directory and all subdirectories
307 Returns the string 'all' if the given directory and all subdirectories
305 should be visited. Otherwise returns True or False indicating whether
308 should be visited. Otherwise returns True or False indicating whether
306 the given directory should be visited.
309 the given directory should be visited.
307 '''
310 '''
308 return True
311 return True
309
312
310 def visitchildrenset(self, dir):
313 def visitchildrenset(self, dir):
311 '''Decides whether a directory should be visited based on whether it
314 '''Decides whether a directory should be visited based on whether it
312 has potential matches in it or one of its subdirectories, and
315 has potential matches in it or one of its subdirectories, and
313 potentially lists which subdirectories of that directory should be
316 potentially lists which subdirectories of that directory should be
314 visited. This is based on the match's primary, included, and excluded
317 visited. This is based on the match's primary, included, and excluded
315 patterns.
318 patterns.
316
319
317 This function is very similar to 'visitdir', and the following mapping
320 This function is very similar to 'visitdir', and the following mapping
318 can be applied:
321 can be applied:
319
322
320 visitdir | visitchildrenlist
323 visitdir | visitchildrenlist
321 ----------+-------------------
324 ----------+-------------------
322 False | set()
325 False | set()
323 'all' | 'all'
326 'all' | 'all'
324 True | 'this' OR non-empty set of subdirs -or files- to visit
327 True | 'this' OR non-empty set of subdirs -or files- to visit
325
328
326 Example:
329 Example:
327 Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return
330 Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return
328 the following values (assuming the implementation of visitchildrenset
331 the following values (assuming the implementation of visitchildrenset
329 is capable of recognizing this; some implementations are not).
332 is capable of recognizing this; some implementations are not).
330
333
331 '.' -> {'foo', 'qux'}
334 '.' -> {'foo', 'qux'}
332 'baz' -> set()
335 'baz' -> set()
333 'foo' -> {'bar'}
336 'foo' -> {'bar'}
334 # Ideally this would be 'all', but since the prefix nature of matchers
337 # Ideally this would be 'all', but since the prefix nature of matchers
335 # is applied to the entire matcher, we have to downgrade this to
338 # is applied to the entire matcher, we have to downgrade this to
336 # 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed
339 # 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed
337 # in.
340 # in.
338 'foo/bar' -> 'this'
341 'foo/bar' -> 'this'
339 'qux' -> 'this'
342 'qux' -> 'this'
340
343
341 Important:
344 Important:
342 Most matchers do not know if they're representing files or
345 Most matchers do not know if they're representing files or
343 directories. They see ['path:dir/f'] and don't know whether 'f' is a
346 directories. They see ['path:dir/f'] and don't know whether 'f' is a
344 file or a directory, so visitchildrenset('dir') for most matchers will
347 file or a directory, so visitchildrenset('dir') for most matchers will
345 return {'f'}, but if the matcher knows it's a file (like exactmatcher
348 return {'f'}, but if the matcher knows it's a file (like exactmatcher
346 does), it may return 'this'. Do not rely on the return being a set
349 does), it may return 'this'. Do not rely on the return being a set
347 indicating that there are no files in this dir to investigate (or
350 indicating that there are no files in this dir to investigate (or
348 equivalently that if there are files to investigate in 'dir' that it
351 equivalently that if there are files to investigate in 'dir' that it
349 will always return 'this').
352 will always return 'this').
350 '''
353 '''
351 return 'this'
354 return 'this'
352
355
353 def always(self):
356 def always(self):
354 '''Matcher will match everything and .files() will be empty --
357 '''Matcher will match everything and .files() will be empty --
355 optimization might be possible.'''
358 optimization might be possible.'''
356 return False
359 return False
357
360
358 def isexact(self):
361 def isexact(self):
359 '''Matcher will match exactly the list of files in .files() --
362 '''Matcher will match exactly the list of files in .files() --
360 optimization might be possible.'''
363 optimization might be possible.'''
361 return False
364 return False
362
365
363 def prefix(self):
366 def prefix(self):
364 '''Matcher will match the paths in .files() recursively --
367 '''Matcher will match the paths in .files() recursively --
365 optimization might be possible.'''
368 optimization might be possible.'''
366 return False
369 return False
367
370
368 def anypats(self):
371 def anypats(self):
369 '''None of .always(), .isexact(), and .prefix() is true --
372 '''None of .always(), .isexact(), and .prefix() is true --
370 optimizations will be difficult.'''
373 optimizations will be difficult.'''
371 return not self.always() and not self.isexact() and not self.prefix()
374 return not self.always() and not self.isexact() and not self.prefix()
372
375
373 class alwaysmatcher(basematcher):
376 class alwaysmatcher(basematcher):
374 '''Matches everything.'''
377 '''Matches everything.'''
375
378
376 def __init__(self, badfn=None):
379 def __init__(self, badfn=None):
377 super(alwaysmatcher, self).__init__(badfn)
380 super(alwaysmatcher, self).__init__(badfn)
378
381
379 def always(self):
382 def always(self):
380 return True
383 return True
381
384
382 def matchfn(self, f):
385 def matchfn(self, f):
383 return True
386 return True
384
387
385 def visitdir(self, dir):
388 def visitdir(self, dir):
386 return 'all'
389 return 'all'
387
390
388 def visitchildrenset(self, dir):
391 def visitchildrenset(self, dir):
389 return 'all'
392 return 'all'
390
393
391 def __repr__(self):
394 def __repr__(self):
392 return r'<alwaysmatcher>'
395 return r'<alwaysmatcher>'
393
396
394 class nevermatcher(basematcher):
397 class nevermatcher(basematcher):
395 '''Matches nothing.'''
398 '''Matches nothing.'''
396
399
397 def __init__(self, badfn=None):
400 def __init__(self, badfn=None):
398 super(nevermatcher, self).__init__(badfn)
401 super(nevermatcher, self).__init__(badfn)
399
402
400 # It's a little weird to say that the nevermatcher is an exact matcher
403 # It's a little weird to say that the nevermatcher is an exact matcher
401 # or a prefix matcher, but it seems to make sense to let callers take
404 # or a prefix matcher, but it seems to make sense to let callers take
402 # fast paths based on either. There will be no exact matches, nor any
405 # fast paths based on either. There will be no exact matches, nor any
403 # prefixes (files() returns []), so fast paths iterating over them should
406 # prefixes (files() returns []), so fast paths iterating over them should
404 # be efficient (and correct).
407 # be efficient (and correct).
405 def isexact(self):
408 def isexact(self):
406 return True
409 return True
407
410
408 def prefix(self):
411 def prefix(self):
409 return True
412 return True
410
413
411 def visitdir(self, dir):
414 def visitdir(self, dir):
412 return False
415 return False
413
416
414 def visitchildrenset(self, dir):
417 def visitchildrenset(self, dir):
415 return set()
418 return set()
416
419
417 def __repr__(self):
420 def __repr__(self):
418 return r'<nevermatcher>'
421 return r'<nevermatcher>'
419
422
420 class predicatematcher(basematcher):
423 class predicatematcher(basematcher):
421 """A matcher adapter for a simple boolean function"""
424 """A matcher adapter for a simple boolean function"""
422
425
423 def __init__(self, predfn, predrepr=None, badfn=None):
426 def __init__(self, predfn, predrepr=None, badfn=None):
424 super(predicatematcher, self).__init__(badfn)
427 super(predicatematcher, self).__init__(badfn)
425 self.matchfn = predfn
428 self.matchfn = predfn
426 self._predrepr = predrepr
429 self._predrepr = predrepr
427
430
428 @encoding.strmethod
431 @encoding.strmethod
429 def __repr__(self):
432 def __repr__(self):
430 s = (stringutil.buildrepr(self._predrepr)
433 s = (stringutil.buildrepr(self._predrepr)
431 or pycompat.byterepr(self.matchfn))
434 or pycompat.byterepr(self.matchfn))
432 return '<predicatenmatcher pred=%s>' % s
435 return '<predicatenmatcher pred=%s>' % s
433
436
434 class patternmatcher(basematcher):
437 class patternmatcher(basematcher):
435 """Matches a set of (kind, pat, source) against a 'root' directory.
438 """Matches a set of (kind, pat, source) against a 'root' directory.
436
439
437 >>> kindpats = [
440 >>> kindpats = [
438 ... ('re', '.*\.c$', ''),
441 ... ('re', '.*\.c$', ''),
439 ... ('path', 'foo/a', ''),
442 ... ('path', 'foo/a', ''),
440 ... ('relpath', 'b', ''),
443 ... ('relpath', 'b', ''),
441 ... ('glob', '*.h', ''),
444 ... ('glob', '*.h', ''),
442 ... ]
445 ... ]
443 >>> m = patternmatcher('foo', kindpats)
446 >>> m = patternmatcher('foo', kindpats)
444 >>> bool(m('main.c')) # matches re:.*\.c$
447 >>> bool(m('main.c')) # matches re:.*\.c$
445 True
448 True
446 >>> bool(m('b.txt'))
449 >>> bool(m('b.txt'))
447 False
450 False
448 >>> bool(m('foo/a')) # matches path:foo/a
451 >>> bool(m('foo/a')) # matches path:foo/a
449 True
452 True
450 >>> bool(m('a')) # does not match path:b, since 'root' is 'foo'
453 >>> bool(m('a')) # does not match path:b, since 'root' is 'foo'
451 False
454 False
452 >>> bool(m('b')) # matches relpath:b, since 'root' is 'foo'
455 >>> bool(m('b')) # matches relpath:b, since 'root' is 'foo'
453 True
456 True
454 >>> bool(m('lib.h')) # matches glob:*.h
457 >>> bool(m('lib.h')) # matches glob:*.h
455 True
458 True
456
459
457 >>> m.files()
460 >>> m.files()
458 ['.', 'foo/a', 'b', '.']
461 ['.', 'foo/a', 'b', '.']
459 >>> m.exact('foo/a')
462 >>> m.exact('foo/a')
460 True
463 True
461 >>> m.exact('b')
464 >>> m.exact('b')
462 True
465 True
463 >>> m.exact('lib.h') # exact matches are for (rel)path kinds
466 >>> m.exact('lib.h') # exact matches are for (rel)path kinds
464 False
467 False
465 """
468 """
466
469
467 def __init__(self, root, kindpats, badfn=None):
470 def __init__(self, root, kindpats, badfn=None):
468 super(patternmatcher, self).__init__(badfn)
471 super(patternmatcher, self).__init__(badfn)
469
472
470 self._files = _explicitfiles(kindpats)
473 self._files = _explicitfiles(kindpats)
471 self._prefix = _prefix(kindpats)
474 self._prefix = _prefix(kindpats)
472 self._pats, self.matchfn = _buildmatch(kindpats, '$', root)
475 self._pats, self.matchfn = _buildmatch(kindpats, '$', root)
473
476
474 @propertycache
477 @propertycache
475 def _dirs(self):
478 def _dirs(self):
476 return set(util.dirs(self._fileset)) | {'.'}
479 return set(util.dirs(self._fileset)) | {'.'}
477
480
478 def visitdir(self, dir):
481 def visitdir(self, dir):
479 if self._prefix and dir in self._fileset:
482 if self._prefix and dir in self._fileset:
480 return 'all'
483 return 'all'
481 return ('.' in self._fileset or
484 return ('.' in self._fileset or
482 dir in self._fileset or
485 dir in self._fileset or
483 dir in self._dirs or
486 dir in self._dirs or
484 any(parentdir in self._fileset
487 any(parentdir in self._fileset
485 for parentdir in util.finddirs(dir)))
488 for parentdir in util.finddirs(dir)))
486
489
487 def visitchildrenset(self, dir):
490 def visitchildrenset(self, dir):
488 ret = self.visitdir(dir)
491 ret = self.visitdir(dir)
489 if ret is True:
492 if ret is True:
490 return 'this'
493 return 'this'
491 elif not ret:
494 elif not ret:
492 return set()
495 return set()
493 assert ret == 'all'
496 assert ret == 'all'
494 return 'all'
497 return 'all'
495
498
496 def prefix(self):
499 def prefix(self):
497 return self._prefix
500 return self._prefix
498
501
499 @encoding.strmethod
502 @encoding.strmethod
500 def __repr__(self):
503 def __repr__(self):
501 return ('<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats))
504 return ('<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats))
502
505
503 # This is basically a reimplementation of util.dirs that stores the children
506 # This is basically a reimplementation of util.dirs that stores the children
504 # instead of just a count of them, plus a small optional optimization to avoid
507 # instead of just a count of them, plus a small optional optimization to avoid
505 # some directories we don't need.
508 # some directories we don't need.
506 class _dirchildren(object):
509 class _dirchildren(object):
507 def __init__(self, paths, onlyinclude=None):
510 def __init__(self, paths, onlyinclude=None):
508 self._dirs = {}
511 self._dirs = {}
509 self._onlyinclude = onlyinclude or []
512 self._onlyinclude = onlyinclude or []
510 addpath = self.addpath
513 addpath = self.addpath
511 for f in paths:
514 for f in paths:
512 addpath(f)
515 addpath(f)
513
516
514 def addpath(self, path):
517 def addpath(self, path):
515 if path == '.':
518 if path == '.':
516 return
519 return
517 dirs = self._dirs
520 dirs = self._dirs
518 findsplitdirs = _dirchildren._findsplitdirs
521 findsplitdirs = _dirchildren._findsplitdirs
519 for d, b in findsplitdirs(path):
522 for d, b in findsplitdirs(path):
520 if d not in self._onlyinclude:
523 if d not in self._onlyinclude:
521 continue
524 continue
522 dirs.setdefault(d, set()).add(b)
525 dirs.setdefault(d, set()).add(b)
523
526
524 @staticmethod
527 @staticmethod
525 def _findsplitdirs(path):
528 def _findsplitdirs(path):
526 # yields (dirname, basename) tuples, walking back to the root. This is
529 # yields (dirname, basename) tuples, walking back to the root. This is
527 # very similar to util.finddirs, except:
530 # very similar to util.finddirs, except:
528 # - produces a (dirname, basename) tuple, not just 'dirname'
531 # - produces a (dirname, basename) tuple, not just 'dirname'
529 # - includes root dir
532 # - includes root dir
530 # Unlike manifest._splittopdir, this does not suffix `dirname` with a
533 # Unlike manifest._splittopdir, this does not suffix `dirname` with a
531 # slash, and produces '.' for the root instead of ''.
534 # slash, and produces '.' for the root instead of ''.
532 oldpos = len(path)
535 oldpos = len(path)
533 pos = path.rfind('/')
536 pos = path.rfind('/')
534 while pos != -1:
537 while pos != -1:
535 yield path[:pos], path[pos + 1:oldpos]
538 yield path[:pos], path[pos + 1:oldpos]
536 oldpos = pos
539 oldpos = pos
537 pos = path.rfind('/', 0, pos)
540 pos = path.rfind('/', 0, pos)
538 yield '.', path[:oldpos]
541 yield '.', path[:oldpos]
539
542
540 def get(self, path):
543 def get(self, path):
541 return self._dirs.get(path, set())
544 return self._dirs.get(path, set())
542
545
543 class includematcher(basematcher):
546 class includematcher(basematcher):
544
547
545 def __init__(self, root, kindpats, badfn=None):
548 def __init__(self, root, kindpats, badfn=None):
546 super(includematcher, self).__init__(badfn)
549 super(includematcher, self).__init__(badfn)
547
550
548 self._pats, self.matchfn = _buildmatch(kindpats, '(?:/|$)', root)
551 self._pats, self.matchfn = _buildmatch(kindpats, '(?:/|$)', root)
549 self._prefix = _prefix(kindpats)
552 self._prefix = _prefix(kindpats)
550 roots, dirs, parents = _rootsdirsandparents(kindpats)
553 roots, dirs, parents = _rootsdirsandparents(kindpats)
551 # roots are directories which are recursively included.
554 # roots are directories which are recursively included.
552 self._roots = set(roots)
555 self._roots = set(roots)
553 # dirs are directories which are non-recursively included.
556 # dirs are directories which are non-recursively included.
554 self._dirs = set(dirs)
557 self._dirs = set(dirs)
555 # parents are directories which are non-recursively included because
558 # parents are directories which are non-recursively included because
556 # they are needed to get to items in _dirs or _roots.
559 # they are needed to get to items in _dirs or _roots.
557 self._parents = set(parents)
560 self._parents = set(parents)
558
561
559 def visitdir(self, dir):
562 def visitdir(self, dir):
560 if self._prefix and dir in self._roots:
563 if self._prefix and dir in self._roots:
561 return 'all'
564 return 'all'
562 return ('.' in self._roots or
565 return ('.' in self._roots or
563 dir in self._roots or
566 dir in self._roots or
564 dir in self._dirs or
567 dir in self._dirs or
565 dir in self._parents or
568 dir in self._parents or
566 any(parentdir in self._roots
569 any(parentdir in self._roots
567 for parentdir in util.finddirs(dir)))
570 for parentdir in util.finddirs(dir)))
568
571
569 @propertycache
572 @propertycache
570 def _allparentschildren(self):
573 def _allparentschildren(self):
571 # It may seem odd that we add dirs, roots, and parents, and then
574 # It may seem odd that we add dirs, roots, and parents, and then
572 # restrict to only parents. This is to catch the case of:
575 # restrict to only parents. This is to catch the case of:
573 # dirs = ['foo/bar']
576 # dirs = ['foo/bar']
574 # parents = ['foo']
577 # parents = ['foo']
575 # if we asked for the children of 'foo', but had only added
578 # if we asked for the children of 'foo', but had only added
576 # self._parents, we wouldn't be able to respond ['bar'].
579 # self._parents, we wouldn't be able to respond ['bar'].
577 return _dirchildren(
580 return _dirchildren(
578 itertools.chain(self._dirs, self._roots, self._parents),
581 itertools.chain(self._dirs, self._roots, self._parents),
579 onlyinclude=self._parents)
582 onlyinclude=self._parents)
580
583
581 def visitchildrenset(self, dir):
584 def visitchildrenset(self, dir):
582 if self._prefix and dir in self._roots:
585 if self._prefix and dir in self._roots:
583 return 'all'
586 return 'all'
584 # Note: this does *not* include the 'dir in self._parents' case from
587 # Note: this does *not* include the 'dir in self._parents' case from
585 # visitdir, that's handled below.
588 # visitdir, that's handled below.
586 if ('.' in self._roots or
589 if ('.' in self._roots or
587 dir in self._roots or
590 dir in self._roots or
588 dir in self._dirs or
591 dir in self._dirs or
589 any(parentdir in self._roots
592 any(parentdir in self._roots
590 for parentdir in util.finddirs(dir))):
593 for parentdir in util.finddirs(dir))):
591 return 'this'
594 return 'this'
592
595
593 if dir in self._parents:
596 if dir in self._parents:
594 return self._allparentschildren.get(dir) or set()
597 return self._allparentschildren.get(dir) or set()
595 return set()
598 return set()
596
599
597 @encoding.strmethod
600 @encoding.strmethod
598 def __repr__(self):
601 def __repr__(self):
599 return ('<includematcher includes=%r>' % pycompat.bytestr(self._pats))
602 return ('<includematcher includes=%r>' % pycompat.bytestr(self._pats))
600
603
601 class exactmatcher(basematcher):
604 class exactmatcher(basematcher):
602 r'''Matches the input files exactly. They are interpreted as paths, not
605 r'''Matches the input files exactly. They are interpreted as paths, not
603 patterns (so no kind-prefixes).
606 patterns (so no kind-prefixes).
604
607
605 >>> m = exactmatcher(['a.txt', 're:.*\.c$'])
608 >>> m = exactmatcher(['a.txt', 're:.*\.c$'])
606 >>> m('a.txt')
609 >>> m('a.txt')
607 True
610 True
608 >>> m('b.txt')
611 >>> m('b.txt')
609 False
612 False
610
613
611 Input files that would be matched are exactly those returned by .files()
614 Input files that would be matched are exactly those returned by .files()
612 >>> m.files()
615 >>> m.files()
613 ['a.txt', 're:.*\\.c$']
616 ['a.txt', 're:.*\\.c$']
614
617
615 So pattern 're:.*\.c$' is not considered as a regex, but as a file name
618 So pattern 're:.*\.c$' is not considered as a regex, but as a file name
616 >>> m('main.c')
619 >>> m('main.c')
617 False
620 False
618 >>> m('re:.*\.c$')
621 >>> m('re:.*\.c$')
619 True
622 True
620 '''
623 '''
621
624
622 def __init__(self, files, badfn=None):
625 def __init__(self, files, badfn=None):
623 super(exactmatcher, self).__init__(badfn)
626 super(exactmatcher, self).__init__(badfn)
624
627
625 if isinstance(files, list):
628 if isinstance(files, list):
626 self._files = files
629 self._files = files
627 else:
630 else:
628 self._files = list(files)
631 self._files = list(files)
629
632
630 matchfn = basematcher.exact
633 matchfn = basematcher.exact
631
634
632 @propertycache
635 @propertycache
633 def _dirs(self):
636 def _dirs(self):
634 return set(util.dirs(self._fileset)) | {'.'}
637 return set(util.dirs(self._fileset)) | {'.'}
635
638
636 def visitdir(self, dir):
639 def visitdir(self, dir):
637 return dir in self._dirs
640 return dir in self._dirs
638
641
639 def visitchildrenset(self, dir):
642 def visitchildrenset(self, dir):
640 if not self._fileset or dir not in self._dirs:
643 if not self._fileset or dir not in self._dirs:
641 return set()
644 return set()
642
645
643 candidates = self._fileset | self._dirs - {'.'}
646 candidates = self._fileset | self._dirs - {'.'}
644 if dir != '.':
647 if dir != '.':
645 d = dir + '/'
648 d = dir + '/'
646 candidates = set(c[len(d):] for c in candidates if
649 candidates = set(c[len(d):] for c in candidates if
647 c.startswith(d))
650 c.startswith(d))
648 # self._dirs includes all of the directories, recursively, so if
651 # self._dirs includes all of the directories, recursively, so if
649 # we're attempting to match foo/bar/baz.txt, it'll have '.', 'foo',
652 # we're attempting to match foo/bar/baz.txt, it'll have '.', 'foo',
650 # 'foo/bar' in it. Thus we can safely ignore a candidate that has a
653 # 'foo/bar' in it. Thus we can safely ignore a candidate that has a
651 # '/' in it, indicating a it's for a subdir-of-a-subdir; the
654 # '/' in it, indicating a it's for a subdir-of-a-subdir; the
652 # immediate subdir will be in there without a slash.
655 # immediate subdir will be in there without a slash.
653 ret = {c for c in candidates if '/' not in c}
656 ret = {c for c in candidates if '/' not in c}
654 # We really do not expect ret to be empty, since that would imply that
657 # We really do not expect ret to be empty, since that would imply that
655 # there's something in _dirs that didn't have a file in _fileset.
658 # there's something in _dirs that didn't have a file in _fileset.
656 assert ret
659 assert ret
657 return ret
660 return ret
658
661
659 def isexact(self):
662 def isexact(self):
660 return True
663 return True
661
664
662 @encoding.strmethod
665 @encoding.strmethod
663 def __repr__(self):
666 def __repr__(self):
664 return ('<exactmatcher files=%r>' % self._files)
667 return ('<exactmatcher files=%r>' % self._files)
665
668
666 class differencematcher(basematcher):
669 class differencematcher(basematcher):
667 '''Composes two matchers by matching if the first matches and the second
670 '''Composes two matchers by matching if the first matches and the second
668 does not.
671 does not.
669
672
670 The second matcher's non-matching-attributes (bad, explicitdir,
673 The second matcher's non-matching-attributes (bad, explicitdir,
671 traversedir) are ignored.
674 traversedir) are ignored.
672 '''
675 '''
673 def __init__(self, m1, m2):
676 def __init__(self, m1, m2):
674 super(differencematcher, self).__init__()
677 super(differencematcher, self).__init__()
675 self._m1 = m1
678 self._m1 = m1
676 self._m2 = m2
679 self._m2 = m2
677 self.bad = m1.bad
680 self.bad = m1.bad
678 self.explicitdir = m1.explicitdir
681 self.explicitdir = m1.explicitdir
679 self.traversedir = m1.traversedir
682 self.traversedir = m1.traversedir
680
683
681 def matchfn(self, f):
684 def matchfn(self, f):
682 return self._m1(f) and not self._m2(f)
685 return self._m1(f) and not self._m2(f)
683
686
684 @propertycache
687 @propertycache
685 def _files(self):
688 def _files(self):
686 if self.isexact():
689 if self.isexact():
687 return [f for f in self._m1.files() if self(f)]
690 return [f for f in self._m1.files() if self(f)]
688 # If m1 is not an exact matcher, we can't easily figure out the set of
691 # If m1 is not an exact matcher, we can't easily figure out the set of
689 # files, because its files() are not always files. For example, if
692 # files, because its files() are not always files. For example, if
690 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
693 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
691 # want to remove "dir" from the set even though it would match m2,
694 # want to remove "dir" from the set even though it would match m2,
692 # because the "dir" in m1 may not be a file.
695 # because the "dir" in m1 may not be a file.
693 return self._m1.files()
696 return self._m1.files()
694
697
695 def visitdir(self, dir):
698 def visitdir(self, dir):
696 if self._m2.visitdir(dir) == 'all':
699 if self._m2.visitdir(dir) == 'all':
697 return False
700 return False
698 elif not self._m2.visitdir(dir):
701 elif not self._m2.visitdir(dir):
699 # m2 does not match dir, we can return 'all' here if possible
702 # m2 does not match dir, we can return 'all' here if possible
700 return self._m1.visitdir(dir)
703 return self._m1.visitdir(dir)
701 return bool(self._m1.visitdir(dir))
704 return bool(self._m1.visitdir(dir))
702
705
703 def visitchildrenset(self, dir):
706 def visitchildrenset(self, dir):
704 m2_set = self._m2.visitchildrenset(dir)
707 m2_set = self._m2.visitchildrenset(dir)
705 if m2_set == 'all':
708 if m2_set == 'all':
706 return set()
709 return set()
707 m1_set = self._m1.visitchildrenset(dir)
710 m1_set = self._m1.visitchildrenset(dir)
708 # Possible values for m1: 'all', 'this', set(...), set()
711 # Possible values for m1: 'all', 'this', set(...), set()
709 # Possible values for m2: 'this', set(...), set()
712 # Possible values for m2: 'this', set(...), set()
710 # If m2 has nothing under here that we care about, return m1, even if
713 # If m2 has nothing under here that we care about, return m1, even if
711 # it's 'all'. This is a change in behavior from visitdir, which would
714 # it's 'all'. This is a change in behavior from visitdir, which would
712 # return True, not 'all', for some reason.
715 # return True, not 'all', for some reason.
713 if not m2_set:
716 if not m2_set:
714 return m1_set
717 return m1_set
715 if m1_set in ['all', 'this']:
718 if m1_set in ['all', 'this']:
716 # Never return 'all' here if m2_set is any kind of non-empty (either
719 # Never return 'all' here if m2_set is any kind of non-empty (either
717 # 'this' or set(foo)), since m2 might return set() for a
720 # 'this' or set(foo)), since m2 might return set() for a
718 # subdirectory.
721 # subdirectory.
719 return 'this'
722 return 'this'
720 # Possible values for m1: set(...), set()
723 # Possible values for m1: set(...), set()
721 # Possible values for m2: 'this', set(...)
724 # Possible values for m2: 'this', set(...)
722 # We ignore m2's set results. They're possibly incorrect:
725 # We ignore m2's set results. They're possibly incorrect:
723 # m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset('.'):
726 # m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset('.'):
724 # m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd
727 # m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd
725 # return set(), which is *not* correct, we still need to visit 'dir'!
728 # return set(), which is *not* correct, we still need to visit 'dir'!
726 return m1_set
729 return m1_set
727
730
728 def isexact(self):
731 def isexact(self):
729 return self._m1.isexact()
732 return self._m1.isexact()
730
733
731 @encoding.strmethod
734 @encoding.strmethod
732 def __repr__(self):
735 def __repr__(self):
733 return ('<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2))
736 return ('<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2))
734
737
735 def intersectmatchers(m1, m2):
738 def intersectmatchers(m1, m2):
736 '''Composes two matchers by matching if both of them match.
739 '''Composes two matchers by matching if both of them match.
737
740
738 The second matcher's non-matching-attributes (bad, explicitdir,
741 The second matcher's non-matching-attributes (bad, explicitdir,
739 traversedir) are ignored.
742 traversedir) are ignored.
740 '''
743 '''
741 if m1 is None or m2 is None:
744 if m1 is None or m2 is None:
742 return m1 or m2
745 return m1 or m2
743 if m1.always():
746 if m1.always():
744 m = copy.copy(m2)
747 m = copy.copy(m2)
745 # TODO: Consider encapsulating these things in a class so there's only
748 # TODO: Consider encapsulating these things in a class so there's only
746 # one thing to copy from m1.
749 # one thing to copy from m1.
747 m.bad = m1.bad
750 m.bad = m1.bad
748 m.explicitdir = m1.explicitdir
751 m.explicitdir = m1.explicitdir
749 m.traversedir = m1.traversedir
752 m.traversedir = m1.traversedir
750 return m
753 return m
751 if m2.always():
754 if m2.always():
752 m = copy.copy(m1)
755 m = copy.copy(m1)
753 return m
756 return m
754 return intersectionmatcher(m1, m2)
757 return intersectionmatcher(m1, m2)
755
758
756 class intersectionmatcher(basematcher):
759 class intersectionmatcher(basematcher):
757 def __init__(self, m1, m2):
760 def __init__(self, m1, m2):
758 super(intersectionmatcher, self).__init__()
761 super(intersectionmatcher, self).__init__()
759 self._m1 = m1
762 self._m1 = m1
760 self._m2 = m2
763 self._m2 = m2
761 self.bad = m1.bad
764 self.bad = m1.bad
762 self.explicitdir = m1.explicitdir
765 self.explicitdir = m1.explicitdir
763 self.traversedir = m1.traversedir
766 self.traversedir = m1.traversedir
764
767
765 @propertycache
768 @propertycache
766 def _files(self):
769 def _files(self):
767 if self.isexact():
770 if self.isexact():
768 m1, m2 = self._m1, self._m2
771 m1, m2 = self._m1, self._m2
769 if not m1.isexact():
772 if not m1.isexact():
770 m1, m2 = m2, m1
773 m1, m2 = m2, m1
771 return [f for f in m1.files() if m2(f)]
774 return [f for f in m1.files() if m2(f)]
772 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
775 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
773 # the set of files, because their files() are not always files. For
776 # the set of files, because their files() are not always files. For
774 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
777 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
775 # "path:dir2", we don't want to remove "dir2" from the set.
778 # "path:dir2", we don't want to remove "dir2" from the set.
776 return self._m1.files() + self._m2.files()
779 return self._m1.files() + self._m2.files()
777
780
778 def matchfn(self, f):
781 def matchfn(self, f):
779 return self._m1(f) and self._m2(f)
782 return self._m1(f) and self._m2(f)
780
783
781 def visitdir(self, dir):
784 def visitdir(self, dir):
782 visit1 = self._m1.visitdir(dir)
785 visit1 = self._m1.visitdir(dir)
783 if visit1 == 'all':
786 if visit1 == 'all':
784 return self._m2.visitdir(dir)
787 return self._m2.visitdir(dir)
785 # bool() because visit1=True + visit2='all' should not be 'all'
788 # bool() because visit1=True + visit2='all' should not be 'all'
786 return bool(visit1 and self._m2.visitdir(dir))
789 return bool(visit1 and self._m2.visitdir(dir))
787
790
788 def visitchildrenset(self, dir):
791 def visitchildrenset(self, dir):
789 m1_set = self._m1.visitchildrenset(dir)
792 m1_set = self._m1.visitchildrenset(dir)
790 if not m1_set:
793 if not m1_set:
791 return set()
794 return set()
792 m2_set = self._m2.visitchildrenset(dir)
795 m2_set = self._m2.visitchildrenset(dir)
793 if not m2_set:
796 if not m2_set:
794 return set()
797 return set()
795
798
796 if m1_set == 'all':
799 if m1_set == 'all':
797 return m2_set
800 return m2_set
798 elif m2_set == 'all':
801 elif m2_set == 'all':
799 return m1_set
802 return m1_set
800
803
801 if m1_set == 'this' or m2_set == 'this':
804 if m1_set == 'this' or m2_set == 'this':
802 return 'this'
805 return 'this'
803
806
804 assert isinstance(m1_set, set) and isinstance(m2_set, set)
807 assert isinstance(m1_set, set) and isinstance(m2_set, set)
805 return m1_set.intersection(m2_set)
808 return m1_set.intersection(m2_set)
806
809
807 def always(self):
810 def always(self):
808 return self._m1.always() and self._m2.always()
811 return self._m1.always() and self._m2.always()
809
812
810 def isexact(self):
813 def isexact(self):
811 return self._m1.isexact() or self._m2.isexact()
814 return self._m1.isexact() or self._m2.isexact()
812
815
813 @encoding.strmethod
816 @encoding.strmethod
814 def __repr__(self):
817 def __repr__(self):
815 return ('<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2))
818 return ('<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2))
816
819
817 class subdirmatcher(basematcher):
820 class subdirmatcher(basematcher):
818 """Adapt a matcher to work on a subdirectory only.
821 """Adapt a matcher to work on a subdirectory only.
819
822
820 The paths are remapped to remove/insert the path as needed:
823 The paths are remapped to remove/insert the path as needed:
821
824
822 >>> from . import pycompat
825 >>> from . import pycompat
823 >>> m1 = match(b'root', b'', [b'a.txt', b'sub/b.txt'])
826 >>> m1 = match(b'root', b'', [b'a.txt', b'sub/b.txt'])
824 >>> m2 = subdirmatcher(b'sub', m1)
827 >>> m2 = subdirmatcher(b'sub', m1)
825 >>> bool(m2(b'a.txt'))
828 >>> bool(m2(b'a.txt'))
826 False
829 False
827 >>> bool(m2(b'b.txt'))
830 >>> bool(m2(b'b.txt'))
828 True
831 True
829 >>> bool(m2.matchfn(b'a.txt'))
832 >>> bool(m2.matchfn(b'a.txt'))
830 False
833 False
831 >>> bool(m2.matchfn(b'b.txt'))
834 >>> bool(m2.matchfn(b'b.txt'))
832 True
835 True
833 >>> m2.files()
836 >>> m2.files()
834 ['b.txt']
837 ['b.txt']
835 >>> m2.exact(b'b.txt')
838 >>> m2.exact(b'b.txt')
836 True
839 True
837 >>> def bad(f, msg):
840 >>> def bad(f, msg):
838 ... print(pycompat.sysstr(b"%s: %s" % (f, msg)))
841 ... print(pycompat.sysstr(b"%s: %s" % (f, msg)))
839 >>> m1.bad = bad
842 >>> m1.bad = bad
840 >>> m2.bad(b'x.txt', b'No such file')
843 >>> m2.bad(b'x.txt', b'No such file')
841 sub/x.txt: No such file
844 sub/x.txt: No such file
842 """
845 """
843
846
844 def __init__(self, path, matcher):
847 def __init__(self, path, matcher):
845 super(subdirmatcher, self).__init__()
848 super(subdirmatcher, self).__init__()
846 self._path = path
849 self._path = path
847 self._matcher = matcher
850 self._matcher = matcher
848 self._always = matcher.always()
851 self._always = matcher.always()
849
852
850 self._files = [f[len(path) + 1:] for f in matcher._files
853 self._files = [f[len(path) + 1:] for f in matcher._files
851 if f.startswith(path + "/")]
854 if f.startswith(path + "/")]
852
855
853 # If the parent repo had a path to this subrepo and the matcher is
856 # If the parent repo had a path to this subrepo and the matcher is
854 # a prefix matcher, this submatcher always matches.
857 # a prefix matcher, this submatcher always matches.
855 if matcher.prefix():
858 if matcher.prefix():
856 self._always = any(f == path for f in matcher._files)
859 self._always = any(f == path for f in matcher._files)
857
860
858 def bad(self, f, msg):
861 def bad(self, f, msg):
859 self._matcher.bad(self._path + "/" + f, msg)
862 self._matcher.bad(self._path + "/" + f, msg)
860
863
861 def matchfn(self, f):
864 def matchfn(self, f):
862 # Some information is lost in the superclass's constructor, so we
865 # Some information is lost in the superclass's constructor, so we
863 # can not accurately create the matching function for the subdirectory
866 # can not accurately create the matching function for the subdirectory
864 # from the inputs. Instead, we override matchfn() and visitdir() to
867 # from the inputs. Instead, we override matchfn() and visitdir() to
865 # call the original matcher with the subdirectory path prepended.
868 # call the original matcher with the subdirectory path prepended.
866 return self._matcher.matchfn(self._path + "/" + f)
869 return self._matcher.matchfn(self._path + "/" + f)
867
870
868 def visitdir(self, dir):
871 def visitdir(self, dir):
869 if dir == '.':
872 if dir == '.':
870 dir = self._path
873 dir = self._path
871 else:
874 else:
872 dir = self._path + "/" + dir
875 dir = self._path + "/" + dir
873 return self._matcher.visitdir(dir)
876 return self._matcher.visitdir(dir)
874
877
875 def visitchildrenset(self, dir):
878 def visitchildrenset(self, dir):
876 if dir == '.':
879 if dir == '.':
877 dir = self._path
880 dir = self._path
878 else:
881 else:
879 dir = self._path + "/" + dir
882 dir = self._path + "/" + dir
880 return self._matcher.visitchildrenset(dir)
883 return self._matcher.visitchildrenset(dir)
881
884
882 def always(self):
885 def always(self):
883 return self._always
886 return self._always
884
887
885 def prefix(self):
888 def prefix(self):
886 return self._matcher.prefix() and not self._always
889 return self._matcher.prefix() and not self._always
887
890
888 @encoding.strmethod
891 @encoding.strmethod
889 def __repr__(self):
892 def __repr__(self):
890 return ('<subdirmatcher path=%r, matcher=%r>' %
893 return ('<subdirmatcher path=%r, matcher=%r>' %
891 (self._path, self._matcher))
894 (self._path, self._matcher))
892
895
893 class prefixdirmatcher(basematcher):
896 class prefixdirmatcher(basematcher):
894 """Adapt a matcher to work on a parent directory.
897 """Adapt a matcher to work on a parent directory.
895
898
896 The matcher's non-matching-attributes (bad, explicitdir, traversedir) are
899 The matcher's non-matching-attributes (bad, explicitdir, traversedir) are
897 ignored.
900 ignored.
898
901
899 The prefix path should usually be the relative path from the root of
902 The prefix path should usually be the relative path from the root of
900 this matcher to the root of the wrapped matcher.
903 this matcher to the root of the wrapped matcher.
901
904
902 >>> m1 = match(util.localpath(b'root/d/e'), b'f', [b'../a.txt', b'b.txt'])
905 >>> m1 = match(util.localpath(b'root/d/e'), b'f', [b'../a.txt', b'b.txt'])
903 >>> m2 = prefixdirmatcher(b'd/e', m1)
906 >>> m2 = prefixdirmatcher(b'd/e', m1)
904 >>> bool(m2(b'a.txt'),)
907 >>> bool(m2(b'a.txt'),)
905 False
908 False
906 >>> bool(m2(b'd/e/a.txt'))
909 >>> bool(m2(b'd/e/a.txt'))
907 True
910 True
908 >>> bool(m2(b'd/e/b.txt'))
911 >>> bool(m2(b'd/e/b.txt'))
909 False
912 False
910 >>> m2.files()
913 >>> m2.files()
911 ['d/e/a.txt', 'd/e/f/b.txt']
914 ['d/e/a.txt', 'd/e/f/b.txt']
912 >>> m2.exact(b'd/e/a.txt')
915 >>> m2.exact(b'd/e/a.txt')
913 True
916 True
914 >>> m2.visitdir(b'd')
917 >>> m2.visitdir(b'd')
915 True
918 True
916 >>> m2.visitdir(b'd/e')
919 >>> m2.visitdir(b'd/e')
917 True
920 True
918 >>> m2.visitdir(b'd/e/f')
921 >>> m2.visitdir(b'd/e/f')
919 True
922 True
920 >>> m2.visitdir(b'd/e/g')
923 >>> m2.visitdir(b'd/e/g')
921 False
924 False
922 >>> m2.visitdir(b'd/ef')
925 >>> m2.visitdir(b'd/ef')
923 False
926 False
924 """
927 """
925
928
926 def __init__(self, path, matcher, badfn=None):
929 def __init__(self, path, matcher, badfn=None):
927 super(prefixdirmatcher, self).__init__(badfn)
930 super(prefixdirmatcher, self).__init__(badfn)
928 if not path:
931 if not path:
929 raise error.ProgrammingError('prefix path must not be empty')
932 raise error.ProgrammingError('prefix path must not be empty')
930 self._path = path
933 self._path = path
931 self._pathprefix = path + '/'
934 self._pathprefix = path + '/'
932 self._matcher = matcher
935 self._matcher = matcher
933
936
934 @propertycache
937 @propertycache
935 def _files(self):
938 def _files(self):
936 return [self._pathprefix + f for f in self._matcher._files]
939 return [self._pathprefix + f for f in self._matcher._files]
937
940
938 def matchfn(self, f):
941 def matchfn(self, f):
939 if not f.startswith(self._pathprefix):
942 if not f.startswith(self._pathprefix):
940 return False
943 return False
941 return self._matcher.matchfn(f[len(self._pathprefix):])
944 return self._matcher.matchfn(f[len(self._pathprefix):])
942
945
943 @propertycache
946 @propertycache
944 def _pathdirs(self):
947 def _pathdirs(self):
945 return set(util.finddirs(self._path)) | {'.'}
948 return set(util.finddirs(self._path)) | {'.'}
946
949
947 def visitdir(self, dir):
950 def visitdir(self, dir):
948 if dir == self._path:
951 if dir == self._path:
949 return self._matcher.visitdir('.')
952 return self._matcher.visitdir('.')
950 if dir.startswith(self._pathprefix):
953 if dir.startswith(self._pathprefix):
951 return self._matcher.visitdir(dir[len(self._pathprefix):])
954 return self._matcher.visitdir(dir[len(self._pathprefix):])
952 return dir in self._pathdirs
955 return dir in self._pathdirs
953
956
954 def visitchildrenset(self, dir):
957 def visitchildrenset(self, dir):
955 if dir == self._path:
958 if dir == self._path:
956 return self._matcher.visitchildrenset('.')
959 return self._matcher.visitchildrenset('.')
957 if dir.startswith(self._pathprefix):
960 if dir.startswith(self._pathprefix):
958 return self._matcher.visitchildrenset(dir[len(self._pathprefix):])
961 return self._matcher.visitchildrenset(dir[len(self._pathprefix):])
959 if dir in self._pathdirs:
962 if dir in self._pathdirs:
960 return 'this'
963 return 'this'
961 return set()
964 return set()
962
965
963 def isexact(self):
966 def isexact(self):
964 return self._matcher.isexact()
967 return self._matcher.isexact()
965
968
966 def prefix(self):
969 def prefix(self):
967 return self._matcher.prefix()
970 return self._matcher.prefix()
968
971
969 @encoding.strmethod
972 @encoding.strmethod
970 def __repr__(self):
973 def __repr__(self):
971 return ('<prefixdirmatcher path=%r, matcher=%r>'
974 return ('<prefixdirmatcher path=%r, matcher=%r>'
972 % (pycompat.bytestr(self._path), self._matcher))
975 % (pycompat.bytestr(self._path), self._matcher))
973
976
974 class unionmatcher(basematcher):
977 class unionmatcher(basematcher):
975 """A matcher that is the union of several matchers.
978 """A matcher that is the union of several matchers.
976
979
977 The non-matching-attributes (bad, explicitdir, traversedir) are taken from
980 The non-matching-attributes (bad, explicitdir, traversedir) are taken from
978 the first matcher.
981 the first matcher.
979 """
982 """
980
983
981 def __init__(self, matchers):
984 def __init__(self, matchers):
982 m1 = matchers[0]
985 m1 = matchers[0]
983 super(unionmatcher, self).__init__()
986 super(unionmatcher, self).__init__()
984 self.explicitdir = m1.explicitdir
987 self.explicitdir = m1.explicitdir
985 self.traversedir = m1.traversedir
988 self.traversedir = m1.traversedir
986 self._matchers = matchers
989 self._matchers = matchers
987
990
988 def matchfn(self, f):
991 def matchfn(self, f):
989 for match in self._matchers:
992 for match in self._matchers:
990 if match(f):
993 if match(f):
991 return True
994 return True
992 return False
995 return False
993
996
994 def visitdir(self, dir):
997 def visitdir(self, dir):
995 r = False
998 r = False
996 for m in self._matchers:
999 for m in self._matchers:
997 v = m.visitdir(dir)
1000 v = m.visitdir(dir)
998 if v == 'all':
1001 if v == 'all':
999 return v
1002 return v
1000 r |= v
1003 r |= v
1001 return r
1004 return r
1002
1005
1003 def visitchildrenset(self, dir):
1006 def visitchildrenset(self, dir):
1004 r = set()
1007 r = set()
1005 this = False
1008 this = False
1006 for m in self._matchers:
1009 for m in self._matchers:
1007 v = m.visitchildrenset(dir)
1010 v = m.visitchildrenset(dir)
1008 if not v:
1011 if not v:
1009 continue
1012 continue
1010 if v == 'all':
1013 if v == 'all':
1011 return v
1014 return v
1012 if this or v == 'this':
1015 if this or v == 'this':
1013 this = True
1016 this = True
1014 # don't break, we might have an 'all' in here.
1017 # don't break, we might have an 'all' in here.
1015 continue
1018 continue
1016 assert isinstance(v, set)
1019 assert isinstance(v, set)
1017 r = r.union(v)
1020 r = r.union(v)
1018 if this:
1021 if this:
1019 return 'this'
1022 return 'this'
1020 return r
1023 return r
1021
1024
1022 @encoding.strmethod
1025 @encoding.strmethod
1023 def __repr__(self):
1026 def __repr__(self):
1024 return ('<unionmatcher matchers=%r>' % self._matchers)
1027 return ('<unionmatcher matchers=%r>' % self._matchers)
1025
1028
1026 def patkind(pattern, default=None):
1029 def patkind(pattern, default=None):
1027 '''If pattern is 'kind:pat' with a known kind, return kind.
1030 '''If pattern is 'kind:pat' with a known kind, return kind.
1028
1031
1029 >>> patkind('re:.*\.c$')
1032 >>> patkind('re:.*\.c$')
1030 're'
1033 're'
1031 >>> patkind('glob:*.c')
1034 >>> patkind('glob:*.c')
1032 'glob'
1035 'glob'
1033 >>> patkind('relpath:test.py')
1036 >>> patkind('relpath:test.py')
1034 'relpath'
1037 'relpath'
1035 >>> patkind('main.py')
1038 >>> patkind('main.py')
1036 >>> patkind('main.py', default='re')
1039 >>> patkind('main.py', default='re')
1037 're'
1040 're'
1038 '''
1041 '''
1039 return _patsplit(pattern, default)[0]
1042 return _patsplit(pattern, default)[0]
1040
1043
1041 def _patsplit(pattern, default):
1044 def _patsplit(pattern, default):
1042 """Split a string into the optional pattern kind prefix and the actual
1045 """Split a string into the optional pattern kind prefix and the actual
1043 pattern."""
1046 pattern."""
1044 if ':' in pattern:
1047 if ':' in pattern:
1045 kind, pat = pattern.split(':', 1)
1048 kind, pat = pattern.split(':', 1)
1046 if kind in allpatternkinds:
1049 if kind in allpatternkinds:
1047 return kind, pat
1050 return kind, pat
1048 return default, pattern
1051 return default, pattern
1049
1052
1050 def _globre(pat):
1053 def _globre(pat):
1051 r'''Convert an extended glob string to a regexp string.
1054 r'''Convert an extended glob string to a regexp string.
1052
1055
1053 >>> from . import pycompat
1056 >>> from . import pycompat
1054 >>> def bprint(s):
1057 >>> def bprint(s):
1055 ... print(pycompat.sysstr(s))
1058 ... print(pycompat.sysstr(s))
1056 >>> bprint(_globre(br'?'))
1059 >>> bprint(_globre(br'?'))
1057 .
1060 .
1058 >>> bprint(_globre(br'*'))
1061 >>> bprint(_globre(br'*'))
1059 [^/]*
1062 [^/]*
1060 >>> bprint(_globre(br'**'))
1063 >>> bprint(_globre(br'**'))
1061 .*
1064 .*
1062 >>> bprint(_globre(br'**/a'))
1065 >>> bprint(_globre(br'**/a'))
1063 (?:.*/)?a
1066 (?:.*/)?a
1064 >>> bprint(_globre(br'a/**/b'))
1067 >>> bprint(_globre(br'a/**/b'))
1065 a/(?:.*/)?b
1068 a/(?:.*/)?b
1066 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
1069 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
1067 [a*?!^][\^b][^c]
1070 [a*?!^][\^b][^c]
1068 >>> bprint(_globre(br'{a,b}'))
1071 >>> bprint(_globre(br'{a,b}'))
1069 (?:a|b)
1072 (?:a|b)
1070 >>> bprint(_globre(br'.\*\?'))
1073 >>> bprint(_globre(br'.\*\?'))
1071 \.\*\?
1074 \.\*\?
1072 '''
1075 '''
1073 i, n = 0, len(pat)
1076 i, n = 0, len(pat)
1074 res = ''
1077 res = ''
1075 group = 0
1078 group = 0
1076 escape = util.stringutil.regexbytesescapemap.get
1079 escape = util.stringutil.regexbytesescapemap.get
1077 def peek():
1080 def peek():
1078 return i < n and pat[i:i + 1]
1081 return i < n and pat[i:i + 1]
1079 while i < n:
1082 while i < n:
1080 c = pat[i:i + 1]
1083 c = pat[i:i + 1]
1081 i += 1
1084 i += 1
1082 if c not in '*?[{},\\':
1085 if c not in '*?[{},\\':
1083 res += escape(c, c)
1086 res += escape(c, c)
1084 elif c == '*':
1087 elif c == '*':
1085 if peek() == '*':
1088 if peek() == '*':
1086 i += 1
1089 i += 1
1087 if peek() == '/':
1090 if peek() == '/':
1088 i += 1
1091 i += 1
1089 res += '(?:.*/)?'
1092 res += '(?:.*/)?'
1090 else:
1093 else:
1091 res += '.*'
1094 res += '.*'
1092 else:
1095 else:
1093 res += '[^/]*'
1096 res += '[^/]*'
1094 elif c == '?':
1097 elif c == '?':
1095 res += '.'
1098 res += '.'
1096 elif c == '[':
1099 elif c == '[':
1097 j = i
1100 j = i
1098 if j < n and pat[j:j + 1] in '!]':
1101 if j < n and pat[j:j + 1] in '!]':
1099 j += 1
1102 j += 1
1100 while j < n and pat[j:j + 1] != ']':
1103 while j < n and pat[j:j + 1] != ']':
1101 j += 1
1104 j += 1
1102 if j >= n:
1105 if j >= n:
1103 res += '\\['
1106 res += '\\['
1104 else:
1107 else:
1105 stuff = pat[i:j].replace('\\','\\\\')
1108 stuff = pat[i:j].replace('\\','\\\\')
1106 i = j + 1
1109 i = j + 1
1107 if stuff[0:1] == '!':
1110 if stuff[0:1] == '!':
1108 stuff = '^' + stuff[1:]
1111 stuff = '^' + stuff[1:]
1109 elif stuff[0:1] == '^':
1112 elif stuff[0:1] == '^':
1110 stuff = '\\' + stuff
1113 stuff = '\\' + stuff
1111 res = '%s[%s]' % (res, stuff)
1114 res = '%s[%s]' % (res, stuff)
1112 elif c == '{':
1115 elif c == '{':
1113 group += 1
1116 group += 1
1114 res += '(?:'
1117 res += '(?:'
1115 elif c == '}' and group:
1118 elif c == '}' and group:
1116 res += ')'
1119 res += ')'
1117 group -= 1
1120 group -= 1
1118 elif c == ',' and group:
1121 elif c == ',' and group:
1119 res += '|'
1122 res += '|'
1120 elif c == '\\':
1123 elif c == '\\':
1121 p = peek()
1124 p = peek()
1122 if p:
1125 if p:
1123 i += 1
1126 i += 1
1124 res += escape(p, p)
1127 res += escape(p, p)
1125 else:
1128 else:
1126 res += escape(c, c)
1129 res += escape(c, c)
1127 else:
1130 else:
1128 res += escape(c, c)
1131 res += escape(c, c)
1129 return res
1132 return res
1130
1133
1131 def _regex(kind, pat, globsuffix):
1134 def _regex(kind, pat, globsuffix):
1132 '''Convert a (normalized) pattern of any kind into a regular expression.
1135 '''Convert a (normalized) pattern of any kind into a regular expression.
1133 globsuffix is appended to the regexp of globs.'''
1136 globsuffix is appended to the regexp of globs.'''
1134 if not pat:
1137 if not pat:
1135 return ''
1138 return ''
1136 if kind == 're':
1139 if kind == 're':
1137 return pat
1140 return pat
1138 if kind in ('path', 'relpath'):
1141 if kind in ('path', 'relpath'):
1139 if pat == '.':
1142 if pat == '.':
1140 return ''
1143 return ''
1141 return util.stringutil.reescape(pat) + '(?:/|$)'
1144 return util.stringutil.reescape(pat) + '(?:/|$)'
1142 if kind == 'rootfilesin':
1145 if kind == 'rootfilesin':
1143 if pat == '.':
1146 if pat == '.':
1144 escaped = ''
1147 escaped = ''
1145 else:
1148 else:
1146 # Pattern is a directory name.
1149 # Pattern is a directory name.
1147 escaped = util.stringutil.reescape(pat) + '/'
1150 escaped = util.stringutil.reescape(pat) + '/'
1148 # Anything after the pattern must be a non-directory.
1151 # Anything after the pattern must be a non-directory.
1149 return escaped + '[^/]+$'
1152 return escaped + '[^/]+$'
1150 if kind == 'relglob':
1153 if kind == 'relglob':
1151 return '(?:|.*/)' + _globre(pat) + globsuffix
1154 return '(?:|.*/)' + _globre(pat) + globsuffix
1152 if kind == 'relre':
1155 if kind == 'relre':
1153 if pat.startswith('^'):
1156 if pat.startswith('^'):
1154 return pat
1157 return pat
1155 return '.*' + pat
1158 return '.*' + pat
1156 if kind in ('glob', 'rootglob'):
1159 if kind in ('glob', 'rootglob'):
1157 return _globre(pat) + globsuffix
1160 return _globre(pat) + globsuffix
1158 raise error.ProgrammingError('not a regex pattern: %s:%s' % (kind, pat))
1161 raise error.ProgrammingError('not a regex pattern: %s:%s' % (kind, pat))
1159
1162
1160 def _buildmatch(kindpats, globsuffix, root):
1163 def _buildmatch(kindpats, globsuffix, root):
1161 '''Return regexp string and a matcher function for kindpats.
1164 '''Return regexp string and a matcher function for kindpats.
1162 globsuffix is appended to the regexp of globs.'''
1165 globsuffix is appended to the regexp of globs.'''
1163 matchfuncs = []
1166 matchfuncs = []
1164
1167
1165 subincludes, kindpats = _expandsubinclude(kindpats, root)
1168 subincludes, kindpats = _expandsubinclude(kindpats, root)
1166 if subincludes:
1169 if subincludes:
1167 submatchers = {}
1170 submatchers = {}
1168 def matchsubinclude(f):
1171 def matchsubinclude(f):
1169 for prefix, matcherargs in subincludes:
1172 for prefix, matcherargs in subincludes:
1170 if f.startswith(prefix):
1173 if f.startswith(prefix):
1171 mf = submatchers.get(prefix)
1174 mf = submatchers.get(prefix)
1172 if mf is None:
1175 if mf is None:
1173 mf = match(*matcherargs)
1176 mf = match(*matcherargs)
1174 submatchers[prefix] = mf
1177 submatchers[prefix] = mf
1175
1178
1176 if mf(f[len(prefix):]):
1179 if mf(f[len(prefix):]):
1177 return True
1180 return True
1178 return False
1181 return False
1179 matchfuncs.append(matchsubinclude)
1182 matchfuncs.append(matchsubinclude)
1180
1183
1181 regex = ''
1184 regex = ''
1182 if kindpats:
1185 if kindpats:
1183 if all(k == 'rootfilesin' for k, p, s in kindpats):
1186 if all(k == 'rootfilesin' for k, p, s in kindpats):
1184 dirs = {p for k, p, s in kindpats}
1187 dirs = {p for k, p, s in kindpats}
1185 def mf(f):
1188 def mf(f):
1186 i = f.rfind('/')
1189 i = f.rfind('/')
1187 if i >= 0:
1190 if i >= 0:
1188 dir = f[:i]
1191 dir = f[:i]
1189 else:
1192 else:
1190 dir = '.'
1193 dir = '.'
1191 return dir in dirs
1194 return dir in dirs
1192 regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))
1195 regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))
1193 matchfuncs.append(mf)
1196 matchfuncs.append(mf)
1194 else:
1197 else:
1195 regex, mf = _buildregexmatch(kindpats, globsuffix)
1198 regex, mf = _buildregexmatch(kindpats, globsuffix)
1196 matchfuncs.append(mf)
1199 matchfuncs.append(mf)
1197
1200
1198 if len(matchfuncs) == 1:
1201 if len(matchfuncs) == 1:
1199 return regex, matchfuncs[0]
1202 return regex, matchfuncs[0]
1200 else:
1203 else:
1201 return regex, lambda f: any(mf(f) for mf in matchfuncs)
1204 return regex, lambda f: any(mf(f) for mf in matchfuncs)
1202
1205
1203 MAX_RE_SIZE = 20000
1206 MAX_RE_SIZE = 20000
1204
1207
1205 def _joinregexes(regexps):
1208 def _joinregexes(regexps):
1206 """gather multiple regular expressions into a single one"""
1209 """gather multiple regular expressions into a single one"""
1207 return '|'.join(regexps)
1210 return '|'.join(regexps)
1208
1211
1209 def _buildregexmatch(kindpats, globsuffix):
1212 def _buildregexmatch(kindpats, globsuffix):
1210 """Build a match function from a list of kinds and kindpats,
1213 """Build a match function from a list of kinds and kindpats,
1211 return regexp string and a matcher function.
1214 return regexp string and a matcher function.
1212
1215
1213 Test too large input
1216 Test too large input
1214 >>> _buildregexmatch([
1217 >>> _buildregexmatch([
1215 ... (b'relglob', b'?' * MAX_RE_SIZE, b'')
1218 ... (b'relglob', b'?' * MAX_RE_SIZE, b'')
1216 ... ], b'$')
1219 ... ], b'$')
1217 Traceback (most recent call last):
1220 Traceback (most recent call last):
1218 ...
1221 ...
1219 Abort: matcher pattern is too long (20009 bytes)
1222 Abort: matcher pattern is too long (20009 bytes)
1220 """
1223 """
1221 try:
1224 try:
1222 allgroups = []
1225 allgroups = []
1223 regexps = [_regex(k, p, globsuffix) for (k, p, s) in kindpats]
1226 regexps = [_regex(k, p, globsuffix) for (k, p, s) in kindpats]
1224 fullregexp = _joinregexes(regexps)
1227 fullregexp = _joinregexes(regexps)
1225
1228
1226 startidx = 0
1229 startidx = 0
1227 groupsize = 0
1230 groupsize = 0
1228 for idx, r in enumerate(regexps):
1231 for idx, r in enumerate(regexps):
1229 piecesize = len(r)
1232 piecesize = len(r)
1230 if piecesize > MAX_RE_SIZE:
1233 if piecesize > MAX_RE_SIZE:
1231 msg = _("matcher pattern is too long (%d bytes)") % piecesize
1234 msg = _("matcher pattern is too long (%d bytes)") % piecesize
1232 raise error.Abort(msg)
1235 raise error.Abort(msg)
1233 elif (groupsize + piecesize) > MAX_RE_SIZE:
1236 elif (groupsize + piecesize) > MAX_RE_SIZE:
1234 group = regexps[startidx:idx]
1237 group = regexps[startidx:idx]
1235 allgroups.append(_joinregexes(group))
1238 allgroups.append(_joinregexes(group))
1236 startidx = idx
1239 startidx = idx
1237 groupsize = 0
1240 groupsize = 0
1238 groupsize += piecesize + 1
1241 groupsize += piecesize + 1
1239
1242
1240 if startidx == 0:
1243 if startidx == 0:
1241 func = _rematcher(fullregexp)
1244 func = _rematcher(fullregexp)
1242 else:
1245 else:
1243 group = regexps[startidx:]
1246 group = regexps[startidx:]
1244 allgroups.append(_joinregexes(group))
1247 allgroups.append(_joinregexes(group))
1245 allmatchers = [_rematcher(g) for g in allgroups]
1248 allmatchers = [_rematcher(g) for g in allgroups]
1246 func = lambda s: any(m(s) for m in allmatchers)
1249 func = lambda s: any(m(s) for m in allmatchers)
1247 return fullregexp, func
1250 return fullregexp, func
1248 except re.error:
1251 except re.error:
1249 for k, p, s in kindpats:
1252 for k, p, s in kindpats:
1250 try:
1253 try:
1251 _rematcher(_regex(k, p, globsuffix))
1254 _rematcher(_regex(k, p, globsuffix))
1252 except re.error:
1255 except re.error:
1253 if s:
1256 if s:
1254 raise error.Abort(_("%s: invalid pattern (%s): %s") %
1257 raise error.Abort(_("%s: invalid pattern (%s): %s") %
1255 (s, k, p))
1258 (s, k, p))
1256 else:
1259 else:
1257 raise error.Abort(_("invalid pattern (%s): %s") % (k, p))
1260 raise error.Abort(_("invalid pattern (%s): %s") % (k, p))
1258 raise error.Abort(_("invalid pattern"))
1261 raise error.Abort(_("invalid pattern"))
1259
1262
1260 def _patternrootsanddirs(kindpats):
1263 def _patternrootsanddirs(kindpats):
1261 '''Returns roots and directories corresponding to each pattern.
1264 '''Returns roots and directories corresponding to each pattern.
1262
1265
1263 This calculates the roots and directories exactly matching the patterns and
1266 This calculates the roots and directories exactly matching the patterns and
1264 returns a tuple of (roots, dirs) for each. It does not return other
1267 returns a tuple of (roots, dirs) for each. It does not return other
1265 directories which may also need to be considered, like the parent
1268 directories which may also need to be considered, like the parent
1266 directories.
1269 directories.
1267 '''
1270 '''
1268 r = []
1271 r = []
1269 d = []
1272 d = []
1270 for kind, pat, source in kindpats:
1273 for kind, pat, source in kindpats:
1271 if kind in ('glob', 'rootglob'): # find the non-glob prefix
1274 if kind in ('glob', 'rootglob'): # find the non-glob prefix
1272 root = []
1275 root = []
1273 for p in pat.split('/'):
1276 for p in pat.split('/'):
1274 if '[' in p or '{' in p or '*' in p or '?' in p:
1277 if '[' in p or '{' in p or '*' in p or '?' in p:
1275 break
1278 break
1276 root.append(p)
1279 root.append(p)
1277 r.append('/'.join(root) or '.')
1280 r.append('/'.join(root) or '.')
1278 elif kind in ('relpath', 'path'):
1281 elif kind in ('relpath', 'path'):
1279 r.append(pat or '.')
1282 r.append(pat or '.')
1280 elif kind in ('rootfilesin',):
1283 elif kind in ('rootfilesin',):
1281 d.append(pat or '.')
1284 d.append(pat or '.')
1282 else: # relglob, re, relre
1285 else: # relglob, re, relre
1283 r.append('.')
1286 r.append('.')
1284 return r, d
1287 return r, d
1285
1288
1286 def _roots(kindpats):
1289 def _roots(kindpats):
1287 '''Returns root directories to match recursively from the given patterns.'''
1290 '''Returns root directories to match recursively from the given patterns.'''
1288 roots, dirs = _patternrootsanddirs(kindpats)
1291 roots, dirs = _patternrootsanddirs(kindpats)
1289 return roots
1292 return roots
1290
1293
1291 def _rootsdirsandparents(kindpats):
1294 def _rootsdirsandparents(kindpats):
1292 '''Returns roots and exact directories from patterns.
1295 '''Returns roots and exact directories from patterns.
1293
1296
1294 `roots` are directories to match recursively, `dirs` should
1297 `roots` are directories to match recursively, `dirs` should
1295 be matched non-recursively, and `parents` are the implicitly required
1298 be matched non-recursively, and `parents` are the implicitly required
1296 directories to walk to items in either roots or dirs.
1299 directories to walk to items in either roots or dirs.
1297
1300
1298 Returns a tuple of (roots, dirs, parents).
1301 Returns a tuple of (roots, dirs, parents).
1299
1302
1300 >>> _rootsdirsandparents(
1303 >>> _rootsdirsandparents(
1301 ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
1304 ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
1302 ... (b'glob', b'g*', b'')])
1305 ... (b'glob', b'g*', b'')])
1303 (['g/h', 'g/h', '.'], [], ['g', '.'])
1306 (['g/h', 'g/h', '.'], [], ['g', '.'])
1304 >>> _rootsdirsandparents(
1307 >>> _rootsdirsandparents(
1305 ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
1308 ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
1306 ([], ['g/h', '.'], ['g', '.'])
1309 ([], ['g/h', '.'], ['g', '.'])
1307 >>> _rootsdirsandparents(
1310 >>> _rootsdirsandparents(
1308 ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
1311 ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
1309 ... (b'path', b'', b'')])
1312 ... (b'path', b'', b'')])
1310 (['r', 'p/p', '.'], [], ['p', '.'])
1313 (['r', 'p/p', '.'], [], ['p', '.'])
1311 >>> _rootsdirsandparents(
1314 >>> _rootsdirsandparents(
1312 ... [(b'relglob', b'rg*', b''), (b're', b're/', b''),
1315 ... [(b'relglob', b'rg*', b''), (b're', b're/', b''),
1313 ... (b'relre', b'rr', b'')])
1316 ... (b'relre', b'rr', b'')])
1314 (['.', '.', '.'], [], ['.'])
1317 (['.', '.', '.'], [], ['.'])
1315 '''
1318 '''
1316 r, d = _patternrootsanddirs(kindpats)
1319 r, d = _patternrootsanddirs(kindpats)
1317
1320
1318 p = []
1321 p = []
1319 # Append the parents as non-recursive/exact directories, since they must be
1322 # Append the parents as non-recursive/exact directories, since they must be
1320 # scanned to get to either the roots or the other exact directories.
1323 # scanned to get to either the roots or the other exact directories.
1321 p.extend(util.dirs(d))
1324 p.extend(util.dirs(d))
1322 p.extend(util.dirs(r))
1325 p.extend(util.dirs(r))
1323 # util.dirs() does not include the root directory, so add it manually
1326 # util.dirs() does not include the root directory, so add it manually
1324 p.append('.')
1327 p.append('.')
1325
1328
1326 # FIXME: all uses of this function convert these to sets, do so before
1329 # FIXME: all uses of this function convert these to sets, do so before
1327 # returning.
1330 # returning.
1328 # FIXME: all uses of this function do not need anything in 'roots' and
1331 # FIXME: all uses of this function do not need anything in 'roots' and
1329 # 'dirs' to also be in 'parents', consider removing them before returning.
1332 # 'dirs' to also be in 'parents', consider removing them before returning.
1330 return r, d, p
1333 return r, d, p
1331
1334
1332 def _explicitfiles(kindpats):
1335 def _explicitfiles(kindpats):
1333 '''Returns the potential explicit filenames from the patterns.
1336 '''Returns the potential explicit filenames from the patterns.
1334
1337
1335 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
1338 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
1336 ['foo/bar']
1339 ['foo/bar']
1337 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
1340 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
1338 []
1341 []
1339 '''
1342 '''
1340 # Keep only the pattern kinds where one can specify filenames (vs only
1343 # Keep only the pattern kinds where one can specify filenames (vs only
1341 # directory names).
1344 # directory names).
1342 filable = [kp for kp in kindpats if kp[0] not in ('rootfilesin',)]
1345 filable = [kp for kp in kindpats if kp[0] not in ('rootfilesin',)]
1343 return _roots(filable)
1346 return _roots(filable)
1344
1347
1345 def _prefix(kindpats):
1348 def _prefix(kindpats):
1346 '''Whether all the patterns match a prefix (i.e. recursively)'''
1349 '''Whether all the patterns match a prefix (i.e. recursively)'''
1347 for kind, pat, source in kindpats:
1350 for kind, pat, source in kindpats:
1348 if kind not in ('path', 'relpath'):
1351 if kind not in ('path', 'relpath'):
1349 return False
1352 return False
1350 return True
1353 return True
1351
1354
1352 _commentre = None
1355 _commentre = None
1353
1356
1354 def readpatternfile(filepath, warn, sourceinfo=False):
1357 def readpatternfile(filepath, warn, sourceinfo=False):
1355 '''parse a pattern file, returning a list of
1358 '''parse a pattern file, returning a list of
1356 patterns. These patterns should be given to compile()
1359 patterns. These patterns should be given to compile()
1357 to be validated and converted into a match function.
1360 to be validated and converted into a match function.
1358
1361
1359 trailing white space is dropped.
1362 trailing white space is dropped.
1360 the escape character is backslash.
1363 the escape character is backslash.
1361 comments start with #.
1364 comments start with #.
1362 empty lines are skipped.
1365 empty lines are skipped.
1363
1366
1364 lines can be of the following formats:
1367 lines can be of the following formats:
1365
1368
1366 syntax: regexp # defaults following lines to non-rooted regexps
1369 syntax: regexp # defaults following lines to non-rooted regexps
1367 syntax: glob # defaults following lines to non-rooted globs
1370 syntax: glob # defaults following lines to non-rooted globs
1368 re:pattern # non-rooted regular expression
1371 re:pattern # non-rooted regular expression
1369 glob:pattern # non-rooted glob
1372 glob:pattern # non-rooted glob
1370 rootglob:pat # rooted glob (same root as ^ in regexps)
1373 rootglob:pat # rooted glob (same root as ^ in regexps)
1371 pattern # pattern of the current default type
1374 pattern # pattern of the current default type
1372
1375
1373 if sourceinfo is set, returns a list of tuples:
1376 if sourceinfo is set, returns a list of tuples:
1374 (pattern, lineno, originalline). This is useful to debug ignore patterns.
1377 (pattern, lineno, originalline). This is useful to debug ignore patterns.
1375 '''
1378 '''
1376
1379
1377 syntaxes = {
1380 syntaxes = {
1378 're': 'relre:',
1381 're': 'relre:',
1379 'regexp': 'relre:',
1382 'regexp': 'relre:',
1380 'glob': 'relglob:',
1383 'glob': 'relglob:',
1381 'rootglob': 'rootglob:',
1384 'rootglob': 'rootglob:',
1382 'include': 'include',
1385 'include': 'include',
1383 'subinclude': 'subinclude',
1386 'subinclude': 'subinclude',
1384 }
1387 }
1385 syntax = 'relre:'
1388 syntax = 'relre:'
1386 patterns = []
1389 patterns = []
1387
1390
1388 fp = open(filepath, 'rb')
1391 fp = open(filepath, 'rb')
1389 for lineno, line in enumerate(util.iterfile(fp), start=1):
1392 for lineno, line in enumerate(util.iterfile(fp), start=1):
1390 if "#" in line:
1393 if "#" in line:
1391 global _commentre
1394 global _commentre
1392 if not _commentre:
1395 if not _commentre:
1393 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
1396 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
1394 # remove comments prefixed by an even number of escapes
1397 # remove comments prefixed by an even number of escapes
1395 m = _commentre.search(line)
1398 m = _commentre.search(line)
1396 if m:
1399 if m:
1397 line = line[:m.end(1)]
1400 line = line[:m.end(1)]
1398 # fixup properly escaped comments that survived the above
1401 # fixup properly escaped comments that survived the above
1399 line = line.replace("\\#", "#")
1402 line = line.replace("\\#", "#")
1400 line = line.rstrip()
1403 line = line.rstrip()
1401 if not line:
1404 if not line:
1402 continue
1405 continue
1403
1406
1404 if line.startswith('syntax:'):
1407 if line.startswith('syntax:'):
1405 s = line[7:].strip()
1408 s = line[7:].strip()
1406 try:
1409 try:
1407 syntax = syntaxes[s]
1410 syntax = syntaxes[s]
1408 except KeyError:
1411 except KeyError:
1409 if warn:
1412 if warn:
1410 warn(_("%s: ignoring invalid syntax '%s'\n") %
1413 warn(_("%s: ignoring invalid syntax '%s'\n") %
1411 (filepath, s))
1414 (filepath, s))
1412 continue
1415 continue
1413
1416
1414 linesyntax = syntax
1417 linesyntax = syntax
1415 for s, rels in syntaxes.iteritems():
1418 for s, rels in syntaxes.iteritems():
1416 if line.startswith(rels):
1419 if line.startswith(rels):
1417 linesyntax = rels
1420 linesyntax = rels
1418 line = line[len(rels):]
1421 line = line[len(rels):]
1419 break
1422 break
1420 elif line.startswith(s+':'):
1423 elif line.startswith(s+':'):
1421 linesyntax = rels
1424 linesyntax = rels
1422 line = line[len(s) + 1:]
1425 line = line[len(s) + 1:]
1423 break
1426 break
1424 if sourceinfo:
1427 if sourceinfo:
1425 patterns.append((linesyntax + line, lineno, line))
1428 patterns.append((linesyntax + line, lineno, line))
1426 else:
1429 else:
1427 patterns.append(linesyntax + line)
1430 patterns.append(linesyntax + line)
1428 fp.close()
1431 fp.close()
1429 return patterns
1432 return patterns
General Comments 0
You need to be logged in to leave comments. Login now