##// END OF EJS Templates
match: add doctest examples for patkind()
Denis Laxalde -
r42251:413a75da default
parent child Browse files
Show More
@@ -1,1418 +1,1429 b''
1 # match.py - filename matching
1 # match.py - filename matching
2 #
2 #
3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import, print_function
8 from __future__ import absolute_import, print_function
9
9
10 import copy
10 import copy
11 import itertools
11 import itertools
12 import os
12 import os
13 import re
13 import re
14
14
15 from .i18n import _
15 from .i18n import _
16 from . import (
16 from . import (
17 encoding,
17 encoding,
18 error,
18 error,
19 pathutil,
19 pathutil,
20 pycompat,
20 pycompat,
21 util,
21 util,
22 )
22 )
23 from .utils import (
23 from .utils import (
24 stringutil,
24 stringutil,
25 )
25 )
26
26
27 allpatternkinds = ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
27 allpatternkinds = ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
28 'rootglob',
28 'rootglob',
29 'listfile', 'listfile0', 'set', 'include', 'subinclude',
29 'listfile', 'listfile0', 'set', 'include', 'subinclude',
30 'rootfilesin')
30 'rootfilesin')
31 cwdrelativepatternkinds = ('relpath', 'glob')
31 cwdrelativepatternkinds = ('relpath', 'glob')
32
32
33 propertycache = util.propertycache
33 propertycache = util.propertycache
34
34
35 def _rematcher(regex):
35 def _rematcher(regex):
36 '''compile the regexp with the best available regexp engine and return a
36 '''compile the regexp with the best available regexp engine and return a
37 matcher function'''
37 matcher function'''
38 m = util.re.compile(regex)
38 m = util.re.compile(regex)
39 try:
39 try:
40 # slightly faster, provided by facebook's re2 bindings
40 # slightly faster, provided by facebook's re2 bindings
41 return m.test_match
41 return m.test_match
42 except AttributeError:
42 except AttributeError:
43 return m.match
43 return m.match
44
44
45 def _expandsets(kindpats, ctx, listsubrepos, badfn):
45 def _expandsets(kindpats, ctx, listsubrepos, badfn):
46 '''Returns the kindpats list with the 'set' patterns expanded to matchers'''
46 '''Returns the kindpats list with the 'set' patterns expanded to matchers'''
47 matchers = []
47 matchers = []
48 other = []
48 other = []
49
49
50 for kind, pat, source in kindpats:
50 for kind, pat, source in kindpats:
51 if kind == 'set':
51 if kind == 'set':
52 if ctx is None:
52 if ctx is None:
53 raise error.ProgrammingError("fileset expression with no "
53 raise error.ProgrammingError("fileset expression with no "
54 "context")
54 "context")
55 matchers.append(ctx.matchfileset(pat, badfn=badfn))
55 matchers.append(ctx.matchfileset(pat, badfn=badfn))
56
56
57 if listsubrepos:
57 if listsubrepos:
58 for subpath in ctx.substate:
58 for subpath in ctx.substate:
59 sm = ctx.sub(subpath).matchfileset(pat, badfn=badfn)
59 sm = ctx.sub(subpath).matchfileset(pat, badfn=badfn)
60 pm = prefixdirmatcher(subpath, sm, badfn=badfn)
60 pm = prefixdirmatcher(subpath, sm, badfn=badfn)
61 matchers.append(pm)
61 matchers.append(pm)
62
62
63 continue
63 continue
64 other.append((kind, pat, source))
64 other.append((kind, pat, source))
65 return matchers, other
65 return matchers, other
66
66
67 def _expandsubinclude(kindpats, root):
67 def _expandsubinclude(kindpats, root):
68 '''Returns the list of subinclude matcher args and the kindpats without the
68 '''Returns the list of subinclude matcher args and the kindpats without the
69 subincludes in it.'''
69 subincludes in it.'''
70 relmatchers = []
70 relmatchers = []
71 other = []
71 other = []
72
72
73 for kind, pat, source in kindpats:
73 for kind, pat, source in kindpats:
74 if kind == 'subinclude':
74 if kind == 'subinclude':
75 sourceroot = pathutil.dirname(util.normpath(source))
75 sourceroot = pathutil.dirname(util.normpath(source))
76 pat = util.pconvert(pat)
76 pat = util.pconvert(pat)
77 path = pathutil.join(sourceroot, pat)
77 path = pathutil.join(sourceroot, pat)
78
78
79 newroot = pathutil.dirname(path)
79 newroot = pathutil.dirname(path)
80 matcherargs = (newroot, '', [], ['include:%s' % path])
80 matcherargs = (newroot, '', [], ['include:%s' % path])
81
81
82 prefix = pathutil.canonpath(root, root, newroot)
82 prefix = pathutil.canonpath(root, root, newroot)
83 if prefix:
83 if prefix:
84 prefix += '/'
84 prefix += '/'
85 relmatchers.append((prefix, matcherargs))
85 relmatchers.append((prefix, matcherargs))
86 else:
86 else:
87 other.append((kind, pat, source))
87 other.append((kind, pat, source))
88
88
89 return relmatchers, other
89 return relmatchers, other
90
90
91 def _kindpatsalwaysmatch(kindpats):
91 def _kindpatsalwaysmatch(kindpats):
92 """"Checks whether the kindspats match everything, as e.g.
92 """"Checks whether the kindspats match everything, as e.g.
93 'relpath:.' does.
93 'relpath:.' does.
94 """
94 """
95 for kind, pat, source in kindpats:
95 for kind, pat, source in kindpats:
96 if pat != '' or kind not in ['relpath', 'glob']:
96 if pat != '' or kind not in ['relpath', 'glob']:
97 return False
97 return False
98 return True
98 return True
99
99
100 def _buildkindpatsmatcher(matchercls, root, kindpats, ctx=None,
100 def _buildkindpatsmatcher(matchercls, root, kindpats, ctx=None,
101 listsubrepos=False, badfn=None):
101 listsubrepos=False, badfn=None):
102 matchers = []
102 matchers = []
103 fms, kindpats = _expandsets(kindpats, ctx=ctx,
103 fms, kindpats = _expandsets(kindpats, ctx=ctx,
104 listsubrepos=listsubrepos, badfn=badfn)
104 listsubrepos=listsubrepos, badfn=badfn)
105 if kindpats:
105 if kindpats:
106 m = matchercls(root, kindpats, badfn=badfn)
106 m = matchercls(root, kindpats, badfn=badfn)
107 matchers.append(m)
107 matchers.append(m)
108 if fms:
108 if fms:
109 matchers.extend(fms)
109 matchers.extend(fms)
110 if not matchers:
110 if not matchers:
111 return nevermatcher(badfn=badfn)
111 return nevermatcher(badfn=badfn)
112 if len(matchers) == 1:
112 if len(matchers) == 1:
113 return matchers[0]
113 return matchers[0]
114 return unionmatcher(matchers)
114 return unionmatcher(matchers)
115
115
116 def match(root, cwd, patterns=None, include=None, exclude=None, default='glob',
116 def match(root, cwd, patterns=None, include=None, exclude=None, default='glob',
117 auditor=None, ctx=None, listsubrepos=False, warn=None,
117 auditor=None, ctx=None, listsubrepos=False, warn=None,
118 badfn=None, icasefs=False):
118 badfn=None, icasefs=False):
119 """build an object to match a set of file patterns
119 """build an object to match a set of file patterns
120
120
121 arguments:
121 arguments:
122 root - the canonical root of the tree you're matching against
122 root - the canonical root of the tree you're matching against
123 cwd - the current working directory, if relevant
123 cwd - the current working directory, if relevant
124 patterns - patterns to find
124 patterns - patterns to find
125 include - patterns to include (unless they are excluded)
125 include - patterns to include (unless they are excluded)
126 exclude - patterns to exclude (even if they are included)
126 exclude - patterns to exclude (even if they are included)
127 default - if a pattern in patterns has no explicit type, assume this one
127 default - if a pattern in patterns has no explicit type, assume this one
128 warn - optional function used for printing warnings
128 warn - optional function used for printing warnings
129 badfn - optional bad() callback for this matcher instead of the default
129 badfn - optional bad() callback for this matcher instead of the default
130 icasefs - make a matcher for wdir on case insensitive filesystems, which
130 icasefs - make a matcher for wdir on case insensitive filesystems, which
131 normalizes the given patterns to the case in the filesystem
131 normalizes the given patterns to the case in the filesystem
132
132
133 a pattern is one of:
133 a pattern is one of:
134 'glob:<glob>' - a glob relative to cwd
134 'glob:<glob>' - a glob relative to cwd
135 're:<regexp>' - a regular expression
135 're:<regexp>' - a regular expression
136 'path:<path>' - a path relative to repository root, which is matched
136 'path:<path>' - a path relative to repository root, which is matched
137 recursively
137 recursively
138 'rootfilesin:<path>' - a path relative to repository root, which is
138 'rootfilesin:<path>' - a path relative to repository root, which is
139 matched non-recursively (will not match subdirectories)
139 matched non-recursively (will not match subdirectories)
140 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
140 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
141 'relpath:<path>' - a path relative to cwd
141 'relpath:<path>' - a path relative to cwd
142 'relre:<regexp>' - a regexp that needn't match the start of a name
142 'relre:<regexp>' - a regexp that needn't match the start of a name
143 'set:<fileset>' - a fileset expression
143 'set:<fileset>' - a fileset expression
144 'include:<path>' - a file of patterns to read and include
144 'include:<path>' - a file of patterns to read and include
145 'subinclude:<path>' - a file of patterns to match against files under
145 'subinclude:<path>' - a file of patterns to match against files under
146 the same directory
146 the same directory
147 '<something>' - a pattern of the specified default type
147 '<something>' - a pattern of the specified default type
148 """
148 """
149 normalize = _donormalize
149 normalize = _donormalize
150 if icasefs:
150 if icasefs:
151 dirstate = ctx.repo().dirstate
151 dirstate = ctx.repo().dirstate
152 dsnormalize = dirstate.normalize
152 dsnormalize = dirstate.normalize
153
153
154 def normalize(patterns, default, root, cwd, auditor, warn):
154 def normalize(patterns, default, root, cwd, auditor, warn):
155 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
155 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
156 kindpats = []
156 kindpats = []
157 for kind, pats, source in kp:
157 for kind, pats, source in kp:
158 if kind not in ('re', 'relre'): # regex can't be normalized
158 if kind not in ('re', 'relre'): # regex can't be normalized
159 p = pats
159 p = pats
160 pats = dsnormalize(pats)
160 pats = dsnormalize(pats)
161
161
162 # Preserve the original to handle a case only rename.
162 # Preserve the original to handle a case only rename.
163 if p != pats and p in dirstate:
163 if p != pats and p in dirstate:
164 kindpats.append((kind, p, source))
164 kindpats.append((kind, p, source))
165
165
166 kindpats.append((kind, pats, source))
166 kindpats.append((kind, pats, source))
167 return kindpats
167 return kindpats
168
168
169 if patterns:
169 if patterns:
170 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
170 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
171 if _kindpatsalwaysmatch(kindpats):
171 if _kindpatsalwaysmatch(kindpats):
172 m = alwaysmatcher(badfn)
172 m = alwaysmatcher(badfn)
173 else:
173 else:
174 m = _buildkindpatsmatcher(patternmatcher, root, kindpats, ctx=ctx,
174 m = _buildkindpatsmatcher(patternmatcher, root, kindpats, ctx=ctx,
175 listsubrepos=listsubrepos, badfn=badfn)
175 listsubrepos=listsubrepos, badfn=badfn)
176 else:
176 else:
177 # It's a little strange that no patterns means to match everything.
177 # It's a little strange that no patterns means to match everything.
178 # Consider changing this to match nothing (probably using nevermatcher).
178 # Consider changing this to match nothing (probably using nevermatcher).
179 m = alwaysmatcher(badfn)
179 m = alwaysmatcher(badfn)
180
180
181 if include:
181 if include:
182 kindpats = normalize(include, 'glob', root, cwd, auditor, warn)
182 kindpats = normalize(include, 'glob', root, cwd, auditor, warn)
183 im = _buildkindpatsmatcher(includematcher, root, kindpats, ctx=ctx,
183 im = _buildkindpatsmatcher(includematcher, root, kindpats, ctx=ctx,
184 listsubrepos=listsubrepos, badfn=None)
184 listsubrepos=listsubrepos, badfn=None)
185 m = intersectmatchers(m, im)
185 m = intersectmatchers(m, im)
186 if exclude:
186 if exclude:
187 kindpats = normalize(exclude, 'glob', root, cwd, auditor, warn)
187 kindpats = normalize(exclude, 'glob', root, cwd, auditor, warn)
188 em = _buildkindpatsmatcher(includematcher, root, kindpats, ctx=ctx,
188 em = _buildkindpatsmatcher(includematcher, root, kindpats, ctx=ctx,
189 listsubrepos=listsubrepos, badfn=None)
189 listsubrepos=listsubrepos, badfn=None)
190 m = differencematcher(m, em)
190 m = differencematcher(m, em)
191 return m
191 return m
192
192
193 def exact(files, badfn=None):
193 def exact(files, badfn=None):
194 return exactmatcher(files, badfn=badfn)
194 return exactmatcher(files, badfn=badfn)
195
195
196 def always(badfn=None):
196 def always(badfn=None):
197 return alwaysmatcher(badfn)
197 return alwaysmatcher(badfn)
198
198
199 def never(badfn=None):
199 def never(badfn=None):
200 return nevermatcher(badfn)
200 return nevermatcher(badfn)
201
201
202 def badmatch(match, badfn):
202 def badmatch(match, badfn):
203 """Make a copy of the given matcher, replacing its bad method with the given
203 """Make a copy of the given matcher, replacing its bad method with the given
204 one.
204 one.
205 """
205 """
206 m = copy.copy(match)
206 m = copy.copy(match)
207 m.bad = badfn
207 m.bad = badfn
208 return m
208 return m
209
209
210 def _donormalize(patterns, default, root, cwd, auditor, warn):
210 def _donormalize(patterns, default, root, cwd, auditor, warn):
211 '''Convert 'kind:pat' from the patterns list to tuples with kind and
211 '''Convert 'kind:pat' from the patterns list to tuples with kind and
212 normalized and rooted patterns and with listfiles expanded.'''
212 normalized and rooted patterns and with listfiles expanded.'''
213 kindpats = []
213 kindpats = []
214 for kind, pat in [_patsplit(p, default) for p in patterns]:
214 for kind, pat in [_patsplit(p, default) for p in patterns]:
215 if kind in cwdrelativepatternkinds:
215 if kind in cwdrelativepatternkinds:
216 pat = pathutil.canonpath(root, cwd, pat, auditor)
216 pat = pathutil.canonpath(root, cwd, pat, auditor)
217 elif kind in ('relglob', 'path', 'rootfilesin', 'rootglob'):
217 elif kind in ('relglob', 'path', 'rootfilesin', 'rootglob'):
218 pat = util.normpath(pat)
218 pat = util.normpath(pat)
219 elif kind in ('listfile', 'listfile0'):
219 elif kind in ('listfile', 'listfile0'):
220 try:
220 try:
221 files = util.readfile(pat)
221 files = util.readfile(pat)
222 if kind == 'listfile0':
222 if kind == 'listfile0':
223 files = files.split('\0')
223 files = files.split('\0')
224 else:
224 else:
225 files = files.splitlines()
225 files = files.splitlines()
226 files = [f for f in files if f]
226 files = [f for f in files if f]
227 except EnvironmentError:
227 except EnvironmentError:
228 raise error.Abort(_("unable to read file list (%s)") % pat)
228 raise error.Abort(_("unable to read file list (%s)") % pat)
229 for k, p, source in _donormalize(files, default, root, cwd,
229 for k, p, source in _donormalize(files, default, root, cwd,
230 auditor, warn):
230 auditor, warn):
231 kindpats.append((k, p, pat))
231 kindpats.append((k, p, pat))
232 continue
232 continue
233 elif kind == 'include':
233 elif kind == 'include':
234 try:
234 try:
235 fullpath = os.path.join(root, util.localpath(pat))
235 fullpath = os.path.join(root, util.localpath(pat))
236 includepats = readpatternfile(fullpath, warn)
236 includepats = readpatternfile(fullpath, warn)
237 for k, p, source in _donormalize(includepats, default,
237 for k, p, source in _donormalize(includepats, default,
238 root, cwd, auditor, warn):
238 root, cwd, auditor, warn):
239 kindpats.append((k, p, source or pat))
239 kindpats.append((k, p, source or pat))
240 except error.Abort as inst:
240 except error.Abort as inst:
241 raise error.Abort('%s: %s' % (pat, inst[0]))
241 raise error.Abort('%s: %s' % (pat, inst[0]))
242 except IOError as inst:
242 except IOError as inst:
243 if warn:
243 if warn:
244 warn(_("skipping unreadable pattern file '%s': %s\n") %
244 warn(_("skipping unreadable pattern file '%s': %s\n") %
245 (pat, stringutil.forcebytestr(inst.strerror)))
245 (pat, stringutil.forcebytestr(inst.strerror)))
246 continue
246 continue
247 # else: re or relre - which cannot be normalized
247 # else: re or relre - which cannot be normalized
248 kindpats.append((kind, pat, ''))
248 kindpats.append((kind, pat, ''))
249 return kindpats
249 return kindpats
250
250
251 class basematcher(object):
251 class basematcher(object):
252
252
253 def __init__(self, badfn=None):
253 def __init__(self, badfn=None):
254 if badfn is not None:
254 if badfn is not None:
255 self.bad = badfn
255 self.bad = badfn
256
256
257 def __call__(self, fn):
257 def __call__(self, fn):
258 return self.matchfn(fn)
258 return self.matchfn(fn)
259 def __iter__(self):
259 def __iter__(self):
260 for f in self._files:
260 for f in self._files:
261 yield f
261 yield f
262 # Callbacks related to how the matcher is used by dirstate.walk.
262 # Callbacks related to how the matcher is used by dirstate.walk.
263 # Subscribers to these events must monkeypatch the matcher object.
263 # Subscribers to these events must monkeypatch the matcher object.
264 def bad(self, f, msg):
264 def bad(self, f, msg):
265 '''Callback from dirstate.walk for each explicit file that can't be
265 '''Callback from dirstate.walk for each explicit file that can't be
266 found/accessed, with an error message.'''
266 found/accessed, with an error message.'''
267
267
268 # If an explicitdir is set, it will be called when an explicitly listed
268 # If an explicitdir is set, it will be called when an explicitly listed
269 # directory is visited.
269 # directory is visited.
270 explicitdir = None
270 explicitdir = None
271
271
272 # If an traversedir is set, it will be called when a directory discovered
272 # If an traversedir is set, it will be called when a directory discovered
273 # by recursive traversal is visited.
273 # by recursive traversal is visited.
274 traversedir = None
274 traversedir = None
275
275
276 @propertycache
276 @propertycache
277 def _files(self):
277 def _files(self):
278 return []
278 return []
279
279
280 def files(self):
280 def files(self):
281 '''Explicitly listed files or patterns or roots:
281 '''Explicitly listed files or patterns or roots:
282 if no patterns or .always(): empty list,
282 if no patterns or .always(): empty list,
283 if exact: list exact files,
283 if exact: list exact files,
284 if not .anypats(): list all files and dirs,
284 if not .anypats(): list all files and dirs,
285 else: optimal roots'''
285 else: optimal roots'''
286 return self._files
286 return self._files
287
287
288 @propertycache
288 @propertycache
289 def _fileset(self):
289 def _fileset(self):
290 return set(self._files)
290 return set(self._files)
291
291
292 def exact(self, f):
292 def exact(self, f):
293 '''Returns True if f is in .files().'''
293 '''Returns True if f is in .files().'''
294 return f in self._fileset
294 return f in self._fileset
295
295
296 def matchfn(self, f):
296 def matchfn(self, f):
297 return False
297 return False
298
298
299 def visitdir(self, dir):
299 def visitdir(self, dir):
300 '''Decides whether a directory should be visited based on whether it
300 '''Decides whether a directory should be visited based on whether it
301 has potential matches in it or one of its subdirectories. This is
301 has potential matches in it or one of its subdirectories. This is
302 based on the match's primary, included, and excluded patterns.
302 based on the match's primary, included, and excluded patterns.
303
303
304 Returns the string 'all' if the given directory and all subdirectories
304 Returns the string 'all' if the given directory and all subdirectories
305 should be visited. Otherwise returns True or False indicating whether
305 should be visited. Otherwise returns True or False indicating whether
306 the given directory should be visited.
306 the given directory should be visited.
307 '''
307 '''
308 return True
308 return True
309
309
310 def visitchildrenset(self, dir):
310 def visitchildrenset(self, dir):
311 '''Decides whether a directory should be visited based on whether it
311 '''Decides whether a directory should be visited based on whether it
312 has potential matches in it or one of its subdirectories, and
312 has potential matches in it or one of its subdirectories, and
313 potentially lists which subdirectories of that directory should be
313 potentially lists which subdirectories of that directory should be
314 visited. This is based on the match's primary, included, and excluded
314 visited. This is based on the match's primary, included, and excluded
315 patterns.
315 patterns.
316
316
317 This function is very similar to 'visitdir', and the following mapping
317 This function is very similar to 'visitdir', and the following mapping
318 can be applied:
318 can be applied:
319
319
320 visitdir | visitchildrenlist
320 visitdir | visitchildrenlist
321 ----------+-------------------
321 ----------+-------------------
322 False | set()
322 False | set()
323 'all' | 'all'
323 'all' | 'all'
324 True | 'this' OR non-empty set of subdirs -or files- to visit
324 True | 'this' OR non-empty set of subdirs -or files- to visit
325
325
326 Example:
326 Example:
327 Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return
327 Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return
328 the following values (assuming the implementation of visitchildrenset
328 the following values (assuming the implementation of visitchildrenset
329 is capable of recognizing this; some implementations are not).
329 is capable of recognizing this; some implementations are not).
330
330
331 '.' -> {'foo', 'qux'}
331 '.' -> {'foo', 'qux'}
332 'baz' -> set()
332 'baz' -> set()
333 'foo' -> {'bar'}
333 'foo' -> {'bar'}
334 # Ideally this would be 'all', but since the prefix nature of matchers
334 # Ideally this would be 'all', but since the prefix nature of matchers
335 # is applied to the entire matcher, we have to downgrade this to
335 # is applied to the entire matcher, we have to downgrade this to
336 # 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed
336 # 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed
337 # in.
337 # in.
338 'foo/bar' -> 'this'
338 'foo/bar' -> 'this'
339 'qux' -> 'this'
339 'qux' -> 'this'
340
340
341 Important:
341 Important:
342 Most matchers do not know if they're representing files or
342 Most matchers do not know if they're representing files or
343 directories. They see ['path:dir/f'] and don't know whether 'f' is a
343 directories. They see ['path:dir/f'] and don't know whether 'f' is a
344 file or a directory, so visitchildrenset('dir') for most matchers will
344 file or a directory, so visitchildrenset('dir') for most matchers will
345 return {'f'}, but if the matcher knows it's a file (like exactmatcher
345 return {'f'}, but if the matcher knows it's a file (like exactmatcher
346 does), it may return 'this'. Do not rely on the return being a set
346 does), it may return 'this'. Do not rely on the return being a set
347 indicating that there are no files in this dir to investigate (or
347 indicating that there are no files in this dir to investigate (or
348 equivalently that if there are files to investigate in 'dir' that it
348 equivalently that if there are files to investigate in 'dir' that it
349 will always return 'this').
349 will always return 'this').
350 '''
350 '''
351 return 'this'
351 return 'this'
352
352
353 def always(self):
353 def always(self):
354 '''Matcher will match everything and .files() will be empty --
354 '''Matcher will match everything and .files() will be empty --
355 optimization might be possible.'''
355 optimization might be possible.'''
356 return False
356 return False
357
357
358 def isexact(self):
358 def isexact(self):
359 '''Matcher will match exactly the list of files in .files() --
359 '''Matcher will match exactly the list of files in .files() --
360 optimization might be possible.'''
360 optimization might be possible.'''
361 return False
361 return False
362
362
363 def prefix(self):
363 def prefix(self):
364 '''Matcher will match the paths in .files() recursively --
364 '''Matcher will match the paths in .files() recursively --
365 optimization might be possible.'''
365 optimization might be possible.'''
366 return False
366 return False
367
367
368 def anypats(self):
368 def anypats(self):
369 '''None of .always(), .isexact(), and .prefix() is true --
369 '''None of .always(), .isexact(), and .prefix() is true --
370 optimizations will be difficult.'''
370 optimizations will be difficult.'''
371 return not self.always() and not self.isexact() and not self.prefix()
371 return not self.always() and not self.isexact() and not self.prefix()
372
372
373 class alwaysmatcher(basematcher):
373 class alwaysmatcher(basematcher):
374 '''Matches everything.'''
374 '''Matches everything.'''
375
375
376 def __init__(self, badfn=None):
376 def __init__(self, badfn=None):
377 super(alwaysmatcher, self).__init__(badfn)
377 super(alwaysmatcher, self).__init__(badfn)
378
378
379 def always(self):
379 def always(self):
380 return True
380 return True
381
381
382 def matchfn(self, f):
382 def matchfn(self, f):
383 return True
383 return True
384
384
385 def visitdir(self, dir):
385 def visitdir(self, dir):
386 return 'all'
386 return 'all'
387
387
388 def visitchildrenset(self, dir):
388 def visitchildrenset(self, dir):
389 return 'all'
389 return 'all'
390
390
391 def __repr__(self):
391 def __repr__(self):
392 return r'<alwaysmatcher>'
392 return r'<alwaysmatcher>'
393
393
394 class nevermatcher(basematcher):
394 class nevermatcher(basematcher):
395 '''Matches nothing.'''
395 '''Matches nothing.'''
396
396
397 def __init__(self, badfn=None):
397 def __init__(self, badfn=None):
398 super(nevermatcher, self).__init__(badfn)
398 super(nevermatcher, self).__init__(badfn)
399
399
400 # It's a little weird to say that the nevermatcher is an exact matcher
400 # It's a little weird to say that the nevermatcher is an exact matcher
401 # or a prefix matcher, but it seems to make sense to let callers take
401 # or a prefix matcher, but it seems to make sense to let callers take
402 # fast paths based on either. There will be no exact matches, nor any
402 # fast paths based on either. There will be no exact matches, nor any
403 # prefixes (files() returns []), so fast paths iterating over them should
403 # prefixes (files() returns []), so fast paths iterating over them should
404 # be efficient (and correct).
404 # be efficient (and correct).
405 def isexact(self):
405 def isexact(self):
406 return True
406 return True
407
407
408 def prefix(self):
408 def prefix(self):
409 return True
409 return True
410
410
411 def visitdir(self, dir):
411 def visitdir(self, dir):
412 return False
412 return False
413
413
414 def visitchildrenset(self, dir):
414 def visitchildrenset(self, dir):
415 return set()
415 return set()
416
416
417 def __repr__(self):
417 def __repr__(self):
418 return r'<nevermatcher>'
418 return r'<nevermatcher>'
419
419
420 class predicatematcher(basematcher):
420 class predicatematcher(basematcher):
421 """A matcher adapter for a simple boolean function"""
421 """A matcher adapter for a simple boolean function"""
422
422
423 def __init__(self, predfn, predrepr=None, badfn=None):
423 def __init__(self, predfn, predrepr=None, badfn=None):
424 super(predicatematcher, self).__init__(badfn)
424 super(predicatematcher, self).__init__(badfn)
425 self.matchfn = predfn
425 self.matchfn = predfn
426 self._predrepr = predrepr
426 self._predrepr = predrepr
427
427
428 @encoding.strmethod
428 @encoding.strmethod
429 def __repr__(self):
429 def __repr__(self):
430 s = (stringutil.buildrepr(self._predrepr)
430 s = (stringutil.buildrepr(self._predrepr)
431 or pycompat.byterepr(self.matchfn))
431 or pycompat.byterepr(self.matchfn))
432 return '<predicatenmatcher pred=%s>' % s
432 return '<predicatenmatcher pred=%s>' % s
433
433
434 class patternmatcher(basematcher):
434 class patternmatcher(basematcher):
435 """Matches a set of (kind, pat, source) against a 'root' directory.
435 """Matches a set of (kind, pat, source) against a 'root' directory.
436
436
437 >>> kindpats = [
437 >>> kindpats = [
438 ... ('re', '.*\.c$', ''),
438 ... ('re', '.*\.c$', ''),
439 ... ('path', 'foo/a', ''),
439 ... ('path', 'foo/a', ''),
440 ... ('relpath', 'b', ''),
440 ... ('relpath', 'b', ''),
441 ... ('glob', '*.h', ''),
441 ... ('glob', '*.h', ''),
442 ... ]
442 ... ]
443 >>> m = patternmatcher('foo', kindpats)
443 >>> m = patternmatcher('foo', kindpats)
444 >>> bool(m('main.c')) # matches re:.*\.c$
444 >>> bool(m('main.c')) # matches re:.*\.c$
445 True
445 True
446 >>> bool(m('b.txt'))
446 >>> bool(m('b.txt'))
447 False
447 False
448 >>> bool(m('foo/a')) # matches path:foo/a
448 >>> bool(m('foo/a')) # matches path:foo/a
449 True
449 True
450 >>> bool(m('a')) # does not match path:b, since 'root' is 'foo'
450 >>> bool(m('a')) # does not match path:b, since 'root' is 'foo'
451 False
451 False
452 >>> bool(m('b')) # matches relpath:b, since 'root' is 'foo'
452 >>> bool(m('b')) # matches relpath:b, since 'root' is 'foo'
453 True
453 True
454 >>> bool(m('lib.h')) # matches glob:*.h
454 >>> bool(m('lib.h')) # matches glob:*.h
455 True
455 True
456
456
457 >>> m.files()
457 >>> m.files()
458 ['.', 'foo/a', 'b', '.']
458 ['.', 'foo/a', 'b', '.']
459 >>> m.exact('foo/a')
459 >>> m.exact('foo/a')
460 True
460 True
461 >>> m.exact('b')
461 >>> m.exact('b')
462 True
462 True
463 >>> m.exact('lib.h') # exact matches are for (rel)path kinds
463 >>> m.exact('lib.h') # exact matches are for (rel)path kinds
464 False
464 False
465 """
465 """
466
466
467 def __init__(self, root, kindpats, badfn=None):
467 def __init__(self, root, kindpats, badfn=None):
468 super(patternmatcher, self).__init__(badfn)
468 super(patternmatcher, self).__init__(badfn)
469
469
470 self._files = _explicitfiles(kindpats)
470 self._files = _explicitfiles(kindpats)
471 self._prefix = _prefix(kindpats)
471 self._prefix = _prefix(kindpats)
472 self._pats, self.matchfn = _buildmatch(kindpats, '$', root)
472 self._pats, self.matchfn = _buildmatch(kindpats, '$', root)
473
473
474 @propertycache
474 @propertycache
475 def _dirs(self):
475 def _dirs(self):
476 return set(util.dirs(self._fileset)) | {'.'}
476 return set(util.dirs(self._fileset)) | {'.'}
477
477
478 def visitdir(self, dir):
478 def visitdir(self, dir):
479 if self._prefix and dir in self._fileset:
479 if self._prefix and dir in self._fileset:
480 return 'all'
480 return 'all'
481 return ('.' in self._fileset or
481 return ('.' in self._fileset or
482 dir in self._fileset or
482 dir in self._fileset or
483 dir in self._dirs or
483 dir in self._dirs or
484 any(parentdir in self._fileset
484 any(parentdir in self._fileset
485 for parentdir in util.finddirs(dir)))
485 for parentdir in util.finddirs(dir)))
486
486
487 def visitchildrenset(self, dir):
487 def visitchildrenset(self, dir):
488 ret = self.visitdir(dir)
488 ret = self.visitdir(dir)
489 if ret is True:
489 if ret is True:
490 return 'this'
490 return 'this'
491 elif not ret:
491 elif not ret:
492 return set()
492 return set()
493 assert ret == 'all'
493 assert ret == 'all'
494 return 'all'
494 return 'all'
495
495
496 def prefix(self):
496 def prefix(self):
497 return self._prefix
497 return self._prefix
498
498
499 @encoding.strmethod
499 @encoding.strmethod
500 def __repr__(self):
500 def __repr__(self):
501 return ('<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats))
501 return ('<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats))
502
502
503 # This is basically a reimplementation of util.dirs that stores the children
503 # This is basically a reimplementation of util.dirs that stores the children
504 # instead of just a count of them, plus a small optional optimization to avoid
504 # instead of just a count of them, plus a small optional optimization to avoid
505 # some directories we don't need.
505 # some directories we don't need.
506 class _dirchildren(object):
506 class _dirchildren(object):
507 def __init__(self, paths, onlyinclude=None):
507 def __init__(self, paths, onlyinclude=None):
508 self._dirs = {}
508 self._dirs = {}
509 self._onlyinclude = onlyinclude or []
509 self._onlyinclude = onlyinclude or []
510 addpath = self.addpath
510 addpath = self.addpath
511 for f in paths:
511 for f in paths:
512 addpath(f)
512 addpath(f)
513
513
514 def addpath(self, path):
514 def addpath(self, path):
515 if path == '.':
515 if path == '.':
516 return
516 return
517 dirs = self._dirs
517 dirs = self._dirs
518 findsplitdirs = _dirchildren._findsplitdirs
518 findsplitdirs = _dirchildren._findsplitdirs
519 for d, b in findsplitdirs(path):
519 for d, b in findsplitdirs(path):
520 if d not in self._onlyinclude:
520 if d not in self._onlyinclude:
521 continue
521 continue
522 dirs.setdefault(d, set()).add(b)
522 dirs.setdefault(d, set()).add(b)
523
523
524 @staticmethod
524 @staticmethod
525 def _findsplitdirs(path):
525 def _findsplitdirs(path):
526 # yields (dirname, basename) tuples, walking back to the root. This is
526 # yields (dirname, basename) tuples, walking back to the root. This is
527 # very similar to util.finddirs, except:
527 # very similar to util.finddirs, except:
528 # - produces a (dirname, basename) tuple, not just 'dirname'
528 # - produces a (dirname, basename) tuple, not just 'dirname'
529 # - includes root dir
529 # - includes root dir
530 # Unlike manifest._splittopdir, this does not suffix `dirname` with a
530 # Unlike manifest._splittopdir, this does not suffix `dirname` with a
531 # slash, and produces '.' for the root instead of ''.
531 # slash, and produces '.' for the root instead of ''.
532 oldpos = len(path)
532 oldpos = len(path)
533 pos = path.rfind('/')
533 pos = path.rfind('/')
534 while pos != -1:
534 while pos != -1:
535 yield path[:pos], path[pos + 1:oldpos]
535 yield path[:pos], path[pos + 1:oldpos]
536 oldpos = pos
536 oldpos = pos
537 pos = path.rfind('/', 0, pos)
537 pos = path.rfind('/', 0, pos)
538 yield '.', path[:oldpos]
538 yield '.', path[:oldpos]
539
539
540 def get(self, path):
540 def get(self, path):
541 return self._dirs.get(path, set())
541 return self._dirs.get(path, set())
542
542
543 class includematcher(basematcher):
543 class includematcher(basematcher):
544
544
545 def __init__(self, root, kindpats, badfn=None):
545 def __init__(self, root, kindpats, badfn=None):
546 super(includematcher, self).__init__(badfn)
546 super(includematcher, self).__init__(badfn)
547
547
548 self._pats, self.matchfn = _buildmatch(kindpats, '(?:/|$)', root)
548 self._pats, self.matchfn = _buildmatch(kindpats, '(?:/|$)', root)
549 self._prefix = _prefix(kindpats)
549 self._prefix = _prefix(kindpats)
550 roots, dirs, parents = _rootsdirsandparents(kindpats)
550 roots, dirs, parents = _rootsdirsandparents(kindpats)
551 # roots are directories which are recursively included.
551 # roots are directories which are recursively included.
552 self._roots = set(roots)
552 self._roots = set(roots)
553 # dirs are directories which are non-recursively included.
553 # dirs are directories which are non-recursively included.
554 self._dirs = set(dirs)
554 self._dirs = set(dirs)
555 # parents are directories which are non-recursively included because
555 # parents are directories which are non-recursively included because
556 # they are needed to get to items in _dirs or _roots.
556 # they are needed to get to items in _dirs or _roots.
557 self._parents = set(parents)
557 self._parents = set(parents)
558
558
559 def visitdir(self, dir):
559 def visitdir(self, dir):
560 if self._prefix and dir in self._roots:
560 if self._prefix and dir in self._roots:
561 return 'all'
561 return 'all'
562 return ('.' in self._roots or
562 return ('.' in self._roots or
563 dir in self._roots or
563 dir in self._roots or
564 dir in self._dirs or
564 dir in self._dirs or
565 dir in self._parents or
565 dir in self._parents or
566 any(parentdir in self._roots
566 any(parentdir in self._roots
567 for parentdir in util.finddirs(dir)))
567 for parentdir in util.finddirs(dir)))
568
568
569 @propertycache
569 @propertycache
570 def _allparentschildren(self):
570 def _allparentschildren(self):
571 # It may seem odd that we add dirs, roots, and parents, and then
571 # It may seem odd that we add dirs, roots, and parents, and then
572 # restrict to only parents. This is to catch the case of:
572 # restrict to only parents. This is to catch the case of:
573 # dirs = ['foo/bar']
573 # dirs = ['foo/bar']
574 # parents = ['foo']
574 # parents = ['foo']
575 # if we asked for the children of 'foo', but had only added
575 # if we asked for the children of 'foo', but had only added
576 # self._parents, we wouldn't be able to respond ['bar'].
576 # self._parents, we wouldn't be able to respond ['bar'].
577 return _dirchildren(
577 return _dirchildren(
578 itertools.chain(self._dirs, self._roots, self._parents),
578 itertools.chain(self._dirs, self._roots, self._parents),
579 onlyinclude=self._parents)
579 onlyinclude=self._parents)
580
580
581 def visitchildrenset(self, dir):
581 def visitchildrenset(self, dir):
582 if self._prefix and dir in self._roots:
582 if self._prefix and dir in self._roots:
583 return 'all'
583 return 'all'
584 # Note: this does *not* include the 'dir in self._parents' case from
584 # Note: this does *not* include the 'dir in self._parents' case from
585 # visitdir, that's handled below.
585 # visitdir, that's handled below.
586 if ('.' in self._roots or
586 if ('.' in self._roots or
587 dir in self._roots or
587 dir in self._roots or
588 dir in self._dirs or
588 dir in self._dirs or
589 any(parentdir in self._roots
589 any(parentdir in self._roots
590 for parentdir in util.finddirs(dir))):
590 for parentdir in util.finddirs(dir))):
591 return 'this'
591 return 'this'
592
592
593 if dir in self._parents:
593 if dir in self._parents:
594 return self._allparentschildren.get(dir) or set()
594 return self._allparentschildren.get(dir) or set()
595 return set()
595 return set()
596
596
597 @encoding.strmethod
597 @encoding.strmethod
598 def __repr__(self):
598 def __repr__(self):
599 return ('<includematcher includes=%r>' % pycompat.bytestr(self._pats))
599 return ('<includematcher includes=%r>' % pycompat.bytestr(self._pats))
600
600
601 class exactmatcher(basematcher):
601 class exactmatcher(basematcher):
602 r'''Matches the input files exactly. They are interpreted as paths, not
602 r'''Matches the input files exactly. They are interpreted as paths, not
603 patterns (so no kind-prefixes).
603 patterns (so no kind-prefixes).
604
604
605 >>> m = exactmatcher(['a.txt', 're:.*\.c$'])
605 >>> m = exactmatcher(['a.txt', 're:.*\.c$'])
606 >>> m('a.txt')
606 >>> m('a.txt')
607 True
607 True
608 >>> m('b.txt')
608 >>> m('b.txt')
609 False
609 False
610
610
611 Input files that would be matched are exactly those returned by .files()
611 Input files that would be matched are exactly those returned by .files()
612 >>> m.files()
612 >>> m.files()
613 ['a.txt', 're:.*\\.c$']
613 ['a.txt', 're:.*\\.c$']
614
614
615 So pattern 're:.*\.c$' is not considered as a regex, but as a file name
615 So pattern 're:.*\.c$' is not considered as a regex, but as a file name
616 >>> m('main.c')
616 >>> m('main.c')
617 False
617 False
618 >>> m('re:.*\.c$')
618 >>> m('re:.*\.c$')
619 True
619 True
620 '''
620 '''
621
621
622 def __init__(self, files, badfn=None):
622 def __init__(self, files, badfn=None):
623 super(exactmatcher, self).__init__(badfn)
623 super(exactmatcher, self).__init__(badfn)
624
624
625 if isinstance(files, list):
625 if isinstance(files, list):
626 self._files = files
626 self._files = files
627 else:
627 else:
628 self._files = list(files)
628 self._files = list(files)
629
629
630 matchfn = basematcher.exact
630 matchfn = basematcher.exact
631
631
632 @propertycache
632 @propertycache
633 def _dirs(self):
633 def _dirs(self):
634 return set(util.dirs(self._fileset)) | {'.'}
634 return set(util.dirs(self._fileset)) | {'.'}
635
635
636 def visitdir(self, dir):
636 def visitdir(self, dir):
637 return dir in self._dirs
637 return dir in self._dirs
638
638
639 def visitchildrenset(self, dir):
639 def visitchildrenset(self, dir):
640 if not self._fileset or dir not in self._dirs:
640 if not self._fileset or dir not in self._dirs:
641 return set()
641 return set()
642
642
643 candidates = self._fileset | self._dirs - {'.'}
643 candidates = self._fileset | self._dirs - {'.'}
644 if dir != '.':
644 if dir != '.':
645 d = dir + '/'
645 d = dir + '/'
646 candidates = set(c[len(d):] for c in candidates if
646 candidates = set(c[len(d):] for c in candidates if
647 c.startswith(d))
647 c.startswith(d))
648 # self._dirs includes all of the directories, recursively, so if
648 # self._dirs includes all of the directories, recursively, so if
649 # we're attempting to match foo/bar/baz.txt, it'll have '.', 'foo',
649 # we're attempting to match foo/bar/baz.txt, it'll have '.', 'foo',
650 # 'foo/bar' in it. Thus we can safely ignore a candidate that has a
650 # 'foo/bar' in it. Thus we can safely ignore a candidate that has a
651 # '/' in it, indicating a it's for a subdir-of-a-subdir; the
651 # '/' in it, indicating a it's for a subdir-of-a-subdir; the
652 # immediate subdir will be in there without a slash.
652 # immediate subdir will be in there without a slash.
653 ret = {c for c in candidates if '/' not in c}
653 ret = {c for c in candidates if '/' not in c}
654 # We really do not expect ret to be empty, since that would imply that
654 # We really do not expect ret to be empty, since that would imply that
655 # there's something in _dirs that didn't have a file in _fileset.
655 # there's something in _dirs that didn't have a file in _fileset.
656 assert ret
656 assert ret
657 return ret
657 return ret
658
658
659 def isexact(self):
659 def isexact(self):
660 return True
660 return True
661
661
662 @encoding.strmethod
662 @encoding.strmethod
663 def __repr__(self):
663 def __repr__(self):
664 return ('<exactmatcher files=%r>' % self._files)
664 return ('<exactmatcher files=%r>' % self._files)
665
665
666 class differencematcher(basematcher):
666 class differencematcher(basematcher):
667 '''Composes two matchers by matching if the first matches and the second
667 '''Composes two matchers by matching if the first matches and the second
668 does not.
668 does not.
669
669
670 The second matcher's non-matching-attributes (bad, explicitdir,
670 The second matcher's non-matching-attributes (bad, explicitdir,
671 traversedir) are ignored.
671 traversedir) are ignored.
672 '''
672 '''
673 def __init__(self, m1, m2):
673 def __init__(self, m1, m2):
674 super(differencematcher, self).__init__()
674 super(differencematcher, self).__init__()
675 self._m1 = m1
675 self._m1 = m1
676 self._m2 = m2
676 self._m2 = m2
677 self.bad = m1.bad
677 self.bad = m1.bad
678 self.explicitdir = m1.explicitdir
678 self.explicitdir = m1.explicitdir
679 self.traversedir = m1.traversedir
679 self.traversedir = m1.traversedir
680
680
681 def matchfn(self, f):
681 def matchfn(self, f):
682 return self._m1(f) and not self._m2(f)
682 return self._m1(f) and not self._m2(f)
683
683
684 @propertycache
684 @propertycache
685 def _files(self):
685 def _files(self):
686 if self.isexact():
686 if self.isexact():
687 return [f for f in self._m1.files() if self(f)]
687 return [f for f in self._m1.files() if self(f)]
688 # If m1 is not an exact matcher, we can't easily figure out the set of
688 # If m1 is not an exact matcher, we can't easily figure out the set of
689 # files, because its files() are not always files. For example, if
689 # files, because its files() are not always files. For example, if
690 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
690 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
691 # want to remove "dir" from the set even though it would match m2,
691 # want to remove "dir" from the set even though it would match m2,
692 # because the "dir" in m1 may not be a file.
692 # because the "dir" in m1 may not be a file.
693 return self._m1.files()
693 return self._m1.files()
694
694
695 def visitdir(self, dir):
695 def visitdir(self, dir):
696 if self._m2.visitdir(dir) == 'all':
696 if self._m2.visitdir(dir) == 'all':
697 return False
697 return False
698 elif not self._m2.visitdir(dir):
698 elif not self._m2.visitdir(dir):
699 # m2 does not match dir, we can return 'all' here if possible
699 # m2 does not match dir, we can return 'all' here if possible
700 return self._m1.visitdir(dir)
700 return self._m1.visitdir(dir)
701 return bool(self._m1.visitdir(dir))
701 return bool(self._m1.visitdir(dir))
702
702
703 def visitchildrenset(self, dir):
703 def visitchildrenset(self, dir):
704 m2_set = self._m2.visitchildrenset(dir)
704 m2_set = self._m2.visitchildrenset(dir)
705 if m2_set == 'all':
705 if m2_set == 'all':
706 return set()
706 return set()
707 m1_set = self._m1.visitchildrenset(dir)
707 m1_set = self._m1.visitchildrenset(dir)
708 # Possible values for m1: 'all', 'this', set(...), set()
708 # Possible values for m1: 'all', 'this', set(...), set()
709 # Possible values for m2: 'this', set(...), set()
709 # Possible values for m2: 'this', set(...), set()
710 # If m2 has nothing under here that we care about, return m1, even if
710 # If m2 has nothing under here that we care about, return m1, even if
711 # it's 'all'. This is a change in behavior from visitdir, which would
711 # it's 'all'. This is a change in behavior from visitdir, which would
712 # return True, not 'all', for some reason.
712 # return True, not 'all', for some reason.
713 if not m2_set:
713 if not m2_set:
714 return m1_set
714 return m1_set
715 if m1_set in ['all', 'this']:
715 if m1_set in ['all', 'this']:
716 # Never return 'all' here if m2_set is any kind of non-empty (either
716 # Never return 'all' here if m2_set is any kind of non-empty (either
717 # 'this' or set(foo)), since m2 might return set() for a
717 # 'this' or set(foo)), since m2 might return set() for a
718 # subdirectory.
718 # subdirectory.
719 return 'this'
719 return 'this'
720 # Possible values for m1: set(...), set()
720 # Possible values for m1: set(...), set()
721 # Possible values for m2: 'this', set(...)
721 # Possible values for m2: 'this', set(...)
722 # We ignore m2's set results. They're possibly incorrect:
722 # We ignore m2's set results. They're possibly incorrect:
723 # m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset('.'):
723 # m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset('.'):
724 # m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd
724 # m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd
725 # return set(), which is *not* correct, we still need to visit 'dir'!
725 # return set(), which is *not* correct, we still need to visit 'dir'!
726 return m1_set
726 return m1_set
727
727
728 def isexact(self):
728 def isexact(self):
729 return self._m1.isexact()
729 return self._m1.isexact()
730
730
731 @encoding.strmethod
731 @encoding.strmethod
732 def __repr__(self):
732 def __repr__(self):
733 return ('<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2))
733 return ('<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2))
734
734
735 def intersectmatchers(m1, m2):
735 def intersectmatchers(m1, m2):
736 '''Composes two matchers by matching if both of them match.
736 '''Composes two matchers by matching if both of them match.
737
737
738 The second matcher's non-matching-attributes (bad, explicitdir,
738 The second matcher's non-matching-attributes (bad, explicitdir,
739 traversedir) are ignored.
739 traversedir) are ignored.
740 '''
740 '''
741 if m1 is None or m2 is None:
741 if m1 is None or m2 is None:
742 return m1 or m2
742 return m1 or m2
743 if m1.always():
743 if m1.always():
744 m = copy.copy(m2)
744 m = copy.copy(m2)
745 # TODO: Consider encapsulating these things in a class so there's only
745 # TODO: Consider encapsulating these things in a class so there's only
746 # one thing to copy from m1.
746 # one thing to copy from m1.
747 m.bad = m1.bad
747 m.bad = m1.bad
748 m.explicitdir = m1.explicitdir
748 m.explicitdir = m1.explicitdir
749 m.traversedir = m1.traversedir
749 m.traversedir = m1.traversedir
750 return m
750 return m
751 if m2.always():
751 if m2.always():
752 m = copy.copy(m1)
752 m = copy.copy(m1)
753 return m
753 return m
754 return intersectionmatcher(m1, m2)
754 return intersectionmatcher(m1, m2)
755
755
756 class intersectionmatcher(basematcher):
756 class intersectionmatcher(basematcher):
757 def __init__(self, m1, m2):
757 def __init__(self, m1, m2):
758 super(intersectionmatcher, self).__init__()
758 super(intersectionmatcher, self).__init__()
759 self._m1 = m1
759 self._m1 = m1
760 self._m2 = m2
760 self._m2 = m2
761 self.bad = m1.bad
761 self.bad = m1.bad
762 self.explicitdir = m1.explicitdir
762 self.explicitdir = m1.explicitdir
763 self.traversedir = m1.traversedir
763 self.traversedir = m1.traversedir
764
764
765 @propertycache
765 @propertycache
766 def _files(self):
766 def _files(self):
767 if self.isexact():
767 if self.isexact():
768 m1, m2 = self._m1, self._m2
768 m1, m2 = self._m1, self._m2
769 if not m1.isexact():
769 if not m1.isexact():
770 m1, m2 = m2, m1
770 m1, m2 = m2, m1
771 return [f for f in m1.files() if m2(f)]
771 return [f for f in m1.files() if m2(f)]
772 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
772 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
773 # the set of files, because their files() are not always files. For
773 # the set of files, because their files() are not always files. For
774 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
774 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
775 # "path:dir2", we don't want to remove "dir2" from the set.
775 # "path:dir2", we don't want to remove "dir2" from the set.
776 return self._m1.files() + self._m2.files()
776 return self._m1.files() + self._m2.files()
777
777
778 def matchfn(self, f):
778 def matchfn(self, f):
779 return self._m1(f) and self._m2(f)
779 return self._m1(f) and self._m2(f)
780
780
781 def visitdir(self, dir):
781 def visitdir(self, dir):
782 visit1 = self._m1.visitdir(dir)
782 visit1 = self._m1.visitdir(dir)
783 if visit1 == 'all':
783 if visit1 == 'all':
784 return self._m2.visitdir(dir)
784 return self._m2.visitdir(dir)
785 # bool() because visit1=True + visit2='all' should not be 'all'
785 # bool() because visit1=True + visit2='all' should not be 'all'
786 return bool(visit1 and self._m2.visitdir(dir))
786 return bool(visit1 and self._m2.visitdir(dir))
787
787
788 def visitchildrenset(self, dir):
788 def visitchildrenset(self, dir):
789 m1_set = self._m1.visitchildrenset(dir)
789 m1_set = self._m1.visitchildrenset(dir)
790 if not m1_set:
790 if not m1_set:
791 return set()
791 return set()
792 m2_set = self._m2.visitchildrenset(dir)
792 m2_set = self._m2.visitchildrenset(dir)
793 if not m2_set:
793 if not m2_set:
794 return set()
794 return set()
795
795
796 if m1_set == 'all':
796 if m1_set == 'all':
797 return m2_set
797 return m2_set
798 elif m2_set == 'all':
798 elif m2_set == 'all':
799 return m1_set
799 return m1_set
800
800
801 if m1_set == 'this' or m2_set == 'this':
801 if m1_set == 'this' or m2_set == 'this':
802 return 'this'
802 return 'this'
803
803
804 assert isinstance(m1_set, set) and isinstance(m2_set, set)
804 assert isinstance(m1_set, set) and isinstance(m2_set, set)
805 return m1_set.intersection(m2_set)
805 return m1_set.intersection(m2_set)
806
806
807 def always(self):
807 def always(self):
808 return self._m1.always() and self._m2.always()
808 return self._m1.always() and self._m2.always()
809
809
810 def isexact(self):
810 def isexact(self):
811 return self._m1.isexact() or self._m2.isexact()
811 return self._m1.isexact() or self._m2.isexact()
812
812
813 @encoding.strmethod
813 @encoding.strmethod
814 def __repr__(self):
814 def __repr__(self):
815 return ('<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2))
815 return ('<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2))
816
816
817 class subdirmatcher(basematcher):
817 class subdirmatcher(basematcher):
818 """Adapt a matcher to work on a subdirectory only.
818 """Adapt a matcher to work on a subdirectory only.
819
819
820 The paths are remapped to remove/insert the path as needed:
820 The paths are remapped to remove/insert the path as needed:
821
821
822 >>> from . import pycompat
822 >>> from . import pycompat
823 >>> m1 = match(b'root', b'', [b'a.txt', b'sub/b.txt'])
823 >>> m1 = match(b'root', b'', [b'a.txt', b'sub/b.txt'])
824 >>> m2 = subdirmatcher(b'sub', m1)
824 >>> m2 = subdirmatcher(b'sub', m1)
825 >>> bool(m2(b'a.txt'))
825 >>> bool(m2(b'a.txt'))
826 False
826 False
827 >>> bool(m2(b'b.txt'))
827 >>> bool(m2(b'b.txt'))
828 True
828 True
829 >>> bool(m2.matchfn(b'a.txt'))
829 >>> bool(m2.matchfn(b'a.txt'))
830 False
830 False
831 >>> bool(m2.matchfn(b'b.txt'))
831 >>> bool(m2.matchfn(b'b.txt'))
832 True
832 True
833 >>> m2.files()
833 >>> m2.files()
834 ['b.txt']
834 ['b.txt']
835 >>> m2.exact(b'b.txt')
835 >>> m2.exact(b'b.txt')
836 True
836 True
837 >>> def bad(f, msg):
837 >>> def bad(f, msg):
838 ... print(pycompat.sysstr(b"%s: %s" % (f, msg)))
838 ... print(pycompat.sysstr(b"%s: %s" % (f, msg)))
839 >>> m1.bad = bad
839 >>> m1.bad = bad
840 >>> m2.bad(b'x.txt', b'No such file')
840 >>> m2.bad(b'x.txt', b'No such file')
841 sub/x.txt: No such file
841 sub/x.txt: No such file
842 """
842 """
843
843
844 def __init__(self, path, matcher):
844 def __init__(self, path, matcher):
845 super(subdirmatcher, self).__init__()
845 super(subdirmatcher, self).__init__()
846 self._path = path
846 self._path = path
847 self._matcher = matcher
847 self._matcher = matcher
848 self._always = matcher.always()
848 self._always = matcher.always()
849
849
850 self._files = [f[len(path) + 1:] for f in matcher._files
850 self._files = [f[len(path) + 1:] for f in matcher._files
851 if f.startswith(path + "/")]
851 if f.startswith(path + "/")]
852
852
853 # If the parent repo had a path to this subrepo and the matcher is
853 # If the parent repo had a path to this subrepo and the matcher is
854 # a prefix matcher, this submatcher always matches.
854 # a prefix matcher, this submatcher always matches.
855 if matcher.prefix():
855 if matcher.prefix():
856 self._always = any(f == path for f in matcher._files)
856 self._always = any(f == path for f in matcher._files)
857
857
858 def bad(self, f, msg):
858 def bad(self, f, msg):
859 self._matcher.bad(self._path + "/" + f, msg)
859 self._matcher.bad(self._path + "/" + f, msg)
860
860
861 def matchfn(self, f):
861 def matchfn(self, f):
862 # Some information is lost in the superclass's constructor, so we
862 # Some information is lost in the superclass's constructor, so we
863 # can not accurately create the matching function for the subdirectory
863 # can not accurately create the matching function for the subdirectory
864 # from the inputs. Instead, we override matchfn() and visitdir() to
864 # from the inputs. Instead, we override matchfn() and visitdir() to
865 # call the original matcher with the subdirectory path prepended.
865 # call the original matcher with the subdirectory path prepended.
866 return self._matcher.matchfn(self._path + "/" + f)
866 return self._matcher.matchfn(self._path + "/" + f)
867
867
868 def visitdir(self, dir):
868 def visitdir(self, dir):
869 if dir == '.':
869 if dir == '.':
870 dir = self._path
870 dir = self._path
871 else:
871 else:
872 dir = self._path + "/" + dir
872 dir = self._path + "/" + dir
873 return self._matcher.visitdir(dir)
873 return self._matcher.visitdir(dir)
874
874
875 def visitchildrenset(self, dir):
875 def visitchildrenset(self, dir):
876 if dir == '.':
876 if dir == '.':
877 dir = self._path
877 dir = self._path
878 else:
878 else:
879 dir = self._path + "/" + dir
879 dir = self._path + "/" + dir
880 return self._matcher.visitchildrenset(dir)
880 return self._matcher.visitchildrenset(dir)
881
881
882 def always(self):
882 def always(self):
883 return self._always
883 return self._always
884
884
885 def prefix(self):
885 def prefix(self):
886 return self._matcher.prefix() and not self._always
886 return self._matcher.prefix() and not self._always
887
887
888 @encoding.strmethod
888 @encoding.strmethod
889 def __repr__(self):
889 def __repr__(self):
890 return ('<subdirmatcher path=%r, matcher=%r>' %
890 return ('<subdirmatcher path=%r, matcher=%r>' %
891 (self._path, self._matcher))
891 (self._path, self._matcher))
892
892
893 class prefixdirmatcher(basematcher):
893 class prefixdirmatcher(basematcher):
894 """Adapt a matcher to work on a parent directory.
894 """Adapt a matcher to work on a parent directory.
895
895
896 The matcher's non-matching-attributes (bad, explicitdir, traversedir) are
896 The matcher's non-matching-attributes (bad, explicitdir, traversedir) are
897 ignored.
897 ignored.
898
898
899 The prefix path should usually be the relative path from the root of
899 The prefix path should usually be the relative path from the root of
900 this matcher to the root of the wrapped matcher.
900 this matcher to the root of the wrapped matcher.
901
901
902 >>> m1 = match(util.localpath(b'root/d/e'), b'f', [b'../a.txt', b'b.txt'])
902 >>> m1 = match(util.localpath(b'root/d/e'), b'f', [b'../a.txt', b'b.txt'])
903 >>> m2 = prefixdirmatcher(b'd/e', m1)
903 >>> m2 = prefixdirmatcher(b'd/e', m1)
904 >>> bool(m2(b'a.txt'),)
904 >>> bool(m2(b'a.txt'),)
905 False
905 False
906 >>> bool(m2(b'd/e/a.txt'))
906 >>> bool(m2(b'd/e/a.txt'))
907 True
907 True
908 >>> bool(m2(b'd/e/b.txt'))
908 >>> bool(m2(b'd/e/b.txt'))
909 False
909 False
910 >>> m2.files()
910 >>> m2.files()
911 ['d/e/a.txt', 'd/e/f/b.txt']
911 ['d/e/a.txt', 'd/e/f/b.txt']
912 >>> m2.exact(b'd/e/a.txt')
912 >>> m2.exact(b'd/e/a.txt')
913 True
913 True
914 >>> m2.visitdir(b'd')
914 >>> m2.visitdir(b'd')
915 True
915 True
916 >>> m2.visitdir(b'd/e')
916 >>> m2.visitdir(b'd/e')
917 True
917 True
918 >>> m2.visitdir(b'd/e/f')
918 >>> m2.visitdir(b'd/e/f')
919 True
919 True
920 >>> m2.visitdir(b'd/e/g')
920 >>> m2.visitdir(b'd/e/g')
921 False
921 False
922 >>> m2.visitdir(b'd/ef')
922 >>> m2.visitdir(b'd/ef')
923 False
923 False
924 """
924 """
925
925
926 def __init__(self, path, matcher, badfn=None):
926 def __init__(self, path, matcher, badfn=None):
927 super(prefixdirmatcher, self).__init__(badfn)
927 super(prefixdirmatcher, self).__init__(badfn)
928 if not path:
928 if not path:
929 raise error.ProgrammingError('prefix path must not be empty')
929 raise error.ProgrammingError('prefix path must not be empty')
930 self._path = path
930 self._path = path
931 self._pathprefix = path + '/'
931 self._pathprefix = path + '/'
932 self._matcher = matcher
932 self._matcher = matcher
933
933
934 @propertycache
934 @propertycache
935 def _files(self):
935 def _files(self):
936 return [self._pathprefix + f for f in self._matcher._files]
936 return [self._pathprefix + f for f in self._matcher._files]
937
937
938 def matchfn(self, f):
938 def matchfn(self, f):
939 if not f.startswith(self._pathprefix):
939 if not f.startswith(self._pathprefix):
940 return False
940 return False
941 return self._matcher.matchfn(f[len(self._pathprefix):])
941 return self._matcher.matchfn(f[len(self._pathprefix):])
942
942
943 @propertycache
943 @propertycache
944 def _pathdirs(self):
944 def _pathdirs(self):
945 return set(util.finddirs(self._path)) | {'.'}
945 return set(util.finddirs(self._path)) | {'.'}
946
946
947 def visitdir(self, dir):
947 def visitdir(self, dir):
948 if dir == self._path:
948 if dir == self._path:
949 return self._matcher.visitdir('.')
949 return self._matcher.visitdir('.')
950 if dir.startswith(self._pathprefix):
950 if dir.startswith(self._pathprefix):
951 return self._matcher.visitdir(dir[len(self._pathprefix):])
951 return self._matcher.visitdir(dir[len(self._pathprefix):])
952 return dir in self._pathdirs
952 return dir in self._pathdirs
953
953
954 def visitchildrenset(self, dir):
954 def visitchildrenset(self, dir):
955 if dir == self._path:
955 if dir == self._path:
956 return self._matcher.visitchildrenset('.')
956 return self._matcher.visitchildrenset('.')
957 if dir.startswith(self._pathprefix):
957 if dir.startswith(self._pathprefix):
958 return self._matcher.visitchildrenset(dir[len(self._pathprefix):])
958 return self._matcher.visitchildrenset(dir[len(self._pathprefix):])
959 if dir in self._pathdirs:
959 if dir in self._pathdirs:
960 return 'this'
960 return 'this'
961 return set()
961 return set()
962
962
963 def isexact(self):
963 def isexact(self):
964 return self._matcher.isexact()
964 return self._matcher.isexact()
965
965
966 def prefix(self):
966 def prefix(self):
967 return self._matcher.prefix()
967 return self._matcher.prefix()
968
968
969 @encoding.strmethod
969 @encoding.strmethod
970 def __repr__(self):
970 def __repr__(self):
971 return ('<prefixdirmatcher path=%r, matcher=%r>'
971 return ('<prefixdirmatcher path=%r, matcher=%r>'
972 % (pycompat.bytestr(self._path), self._matcher))
972 % (pycompat.bytestr(self._path), self._matcher))
973
973
974 class unionmatcher(basematcher):
974 class unionmatcher(basematcher):
975 """A matcher that is the union of several matchers.
975 """A matcher that is the union of several matchers.
976
976
977 The non-matching-attributes (bad, explicitdir, traversedir) are taken from
977 The non-matching-attributes (bad, explicitdir, traversedir) are taken from
978 the first matcher.
978 the first matcher.
979 """
979 """
980
980
981 def __init__(self, matchers):
981 def __init__(self, matchers):
982 m1 = matchers[0]
982 m1 = matchers[0]
983 super(unionmatcher, self).__init__()
983 super(unionmatcher, self).__init__()
984 self.explicitdir = m1.explicitdir
984 self.explicitdir = m1.explicitdir
985 self.traversedir = m1.traversedir
985 self.traversedir = m1.traversedir
986 self._matchers = matchers
986 self._matchers = matchers
987
987
988 def matchfn(self, f):
988 def matchfn(self, f):
989 for match in self._matchers:
989 for match in self._matchers:
990 if match(f):
990 if match(f):
991 return True
991 return True
992 return False
992 return False
993
993
994 def visitdir(self, dir):
994 def visitdir(self, dir):
995 r = False
995 r = False
996 for m in self._matchers:
996 for m in self._matchers:
997 v = m.visitdir(dir)
997 v = m.visitdir(dir)
998 if v == 'all':
998 if v == 'all':
999 return v
999 return v
1000 r |= v
1000 r |= v
1001 return r
1001 return r
1002
1002
1003 def visitchildrenset(self, dir):
1003 def visitchildrenset(self, dir):
1004 r = set()
1004 r = set()
1005 this = False
1005 this = False
1006 for m in self._matchers:
1006 for m in self._matchers:
1007 v = m.visitchildrenset(dir)
1007 v = m.visitchildrenset(dir)
1008 if not v:
1008 if not v:
1009 continue
1009 continue
1010 if v == 'all':
1010 if v == 'all':
1011 return v
1011 return v
1012 if this or v == 'this':
1012 if this or v == 'this':
1013 this = True
1013 this = True
1014 # don't break, we might have an 'all' in here.
1014 # don't break, we might have an 'all' in here.
1015 continue
1015 continue
1016 assert isinstance(v, set)
1016 assert isinstance(v, set)
1017 r = r.union(v)
1017 r = r.union(v)
1018 if this:
1018 if this:
1019 return 'this'
1019 return 'this'
1020 return r
1020 return r
1021
1021
1022 @encoding.strmethod
1022 @encoding.strmethod
1023 def __repr__(self):
1023 def __repr__(self):
1024 return ('<unionmatcher matchers=%r>' % self._matchers)
1024 return ('<unionmatcher matchers=%r>' % self._matchers)
1025
1025
1026 def patkind(pattern, default=None):
1026 def patkind(pattern, default=None):
1027 '''If pattern is 'kind:pat' with a known kind, return kind.'''
1027 '''If pattern is 'kind:pat' with a known kind, return kind.
1028
1029 >>> patkind('re:.*\.c$')
1030 're'
1031 >>> patkind('glob:*.c')
1032 'glob'
1033 >>> patkind('relpath:test.py')
1034 'relpath'
1035 >>> patkind('main.py')
1036 >>> patkind('main.py', default='re')
1037 're'
1038 '''
1028 return _patsplit(pattern, default)[0]
1039 return _patsplit(pattern, default)[0]
1029
1040
1030 def _patsplit(pattern, default):
1041 def _patsplit(pattern, default):
1031 """Split a string into the optional pattern kind prefix and the actual
1042 """Split a string into the optional pattern kind prefix and the actual
1032 pattern."""
1043 pattern."""
1033 if ':' in pattern:
1044 if ':' in pattern:
1034 kind, pat = pattern.split(':', 1)
1045 kind, pat = pattern.split(':', 1)
1035 if kind in allpatternkinds:
1046 if kind in allpatternkinds:
1036 return kind, pat
1047 return kind, pat
1037 return default, pattern
1048 return default, pattern
1038
1049
1039 def _globre(pat):
1050 def _globre(pat):
1040 r'''Convert an extended glob string to a regexp string.
1051 r'''Convert an extended glob string to a regexp string.
1041
1052
1042 >>> from . import pycompat
1053 >>> from . import pycompat
1043 >>> def bprint(s):
1054 >>> def bprint(s):
1044 ... print(pycompat.sysstr(s))
1055 ... print(pycompat.sysstr(s))
1045 >>> bprint(_globre(br'?'))
1056 >>> bprint(_globre(br'?'))
1046 .
1057 .
1047 >>> bprint(_globre(br'*'))
1058 >>> bprint(_globre(br'*'))
1048 [^/]*
1059 [^/]*
1049 >>> bprint(_globre(br'**'))
1060 >>> bprint(_globre(br'**'))
1050 .*
1061 .*
1051 >>> bprint(_globre(br'**/a'))
1062 >>> bprint(_globre(br'**/a'))
1052 (?:.*/)?a
1063 (?:.*/)?a
1053 >>> bprint(_globre(br'a/**/b'))
1064 >>> bprint(_globre(br'a/**/b'))
1054 a/(?:.*/)?b
1065 a/(?:.*/)?b
1055 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
1066 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
1056 [a*?!^][\^b][^c]
1067 [a*?!^][\^b][^c]
1057 >>> bprint(_globre(br'{a,b}'))
1068 >>> bprint(_globre(br'{a,b}'))
1058 (?:a|b)
1069 (?:a|b)
1059 >>> bprint(_globre(br'.\*\?'))
1070 >>> bprint(_globre(br'.\*\?'))
1060 \.\*\?
1071 \.\*\?
1061 '''
1072 '''
1062 i, n = 0, len(pat)
1073 i, n = 0, len(pat)
1063 res = ''
1074 res = ''
1064 group = 0
1075 group = 0
1065 escape = util.stringutil.regexbytesescapemap.get
1076 escape = util.stringutil.regexbytesescapemap.get
1066 def peek():
1077 def peek():
1067 return i < n and pat[i:i + 1]
1078 return i < n and pat[i:i + 1]
1068 while i < n:
1079 while i < n:
1069 c = pat[i:i + 1]
1080 c = pat[i:i + 1]
1070 i += 1
1081 i += 1
1071 if c not in '*?[{},\\':
1082 if c not in '*?[{},\\':
1072 res += escape(c, c)
1083 res += escape(c, c)
1073 elif c == '*':
1084 elif c == '*':
1074 if peek() == '*':
1085 if peek() == '*':
1075 i += 1
1086 i += 1
1076 if peek() == '/':
1087 if peek() == '/':
1077 i += 1
1088 i += 1
1078 res += '(?:.*/)?'
1089 res += '(?:.*/)?'
1079 else:
1090 else:
1080 res += '.*'
1091 res += '.*'
1081 else:
1092 else:
1082 res += '[^/]*'
1093 res += '[^/]*'
1083 elif c == '?':
1094 elif c == '?':
1084 res += '.'
1095 res += '.'
1085 elif c == '[':
1096 elif c == '[':
1086 j = i
1097 j = i
1087 if j < n and pat[j:j + 1] in '!]':
1098 if j < n and pat[j:j + 1] in '!]':
1088 j += 1
1099 j += 1
1089 while j < n and pat[j:j + 1] != ']':
1100 while j < n and pat[j:j + 1] != ']':
1090 j += 1
1101 j += 1
1091 if j >= n:
1102 if j >= n:
1092 res += '\\['
1103 res += '\\['
1093 else:
1104 else:
1094 stuff = pat[i:j].replace('\\','\\\\')
1105 stuff = pat[i:j].replace('\\','\\\\')
1095 i = j + 1
1106 i = j + 1
1096 if stuff[0:1] == '!':
1107 if stuff[0:1] == '!':
1097 stuff = '^' + stuff[1:]
1108 stuff = '^' + stuff[1:]
1098 elif stuff[0:1] == '^':
1109 elif stuff[0:1] == '^':
1099 stuff = '\\' + stuff
1110 stuff = '\\' + stuff
1100 res = '%s[%s]' % (res, stuff)
1111 res = '%s[%s]' % (res, stuff)
1101 elif c == '{':
1112 elif c == '{':
1102 group += 1
1113 group += 1
1103 res += '(?:'
1114 res += '(?:'
1104 elif c == '}' and group:
1115 elif c == '}' and group:
1105 res += ')'
1116 res += ')'
1106 group -= 1
1117 group -= 1
1107 elif c == ',' and group:
1118 elif c == ',' and group:
1108 res += '|'
1119 res += '|'
1109 elif c == '\\':
1120 elif c == '\\':
1110 p = peek()
1121 p = peek()
1111 if p:
1122 if p:
1112 i += 1
1123 i += 1
1113 res += escape(p, p)
1124 res += escape(p, p)
1114 else:
1125 else:
1115 res += escape(c, c)
1126 res += escape(c, c)
1116 else:
1127 else:
1117 res += escape(c, c)
1128 res += escape(c, c)
1118 return res
1129 return res
1119
1130
1120 def _regex(kind, pat, globsuffix):
1131 def _regex(kind, pat, globsuffix):
1121 '''Convert a (normalized) pattern of any kind into a regular expression.
1132 '''Convert a (normalized) pattern of any kind into a regular expression.
1122 globsuffix is appended to the regexp of globs.'''
1133 globsuffix is appended to the regexp of globs.'''
1123 if not pat:
1134 if not pat:
1124 return ''
1135 return ''
1125 if kind == 're':
1136 if kind == 're':
1126 return pat
1137 return pat
1127 if kind in ('path', 'relpath'):
1138 if kind in ('path', 'relpath'):
1128 if pat == '.':
1139 if pat == '.':
1129 return ''
1140 return ''
1130 return util.stringutil.reescape(pat) + '(?:/|$)'
1141 return util.stringutil.reescape(pat) + '(?:/|$)'
1131 if kind == 'rootfilesin':
1142 if kind == 'rootfilesin':
1132 if pat == '.':
1143 if pat == '.':
1133 escaped = ''
1144 escaped = ''
1134 else:
1145 else:
1135 # Pattern is a directory name.
1146 # Pattern is a directory name.
1136 escaped = util.stringutil.reescape(pat) + '/'
1147 escaped = util.stringutil.reescape(pat) + '/'
1137 # Anything after the pattern must be a non-directory.
1148 # Anything after the pattern must be a non-directory.
1138 return escaped + '[^/]+$'
1149 return escaped + '[^/]+$'
1139 if kind == 'relglob':
1150 if kind == 'relglob':
1140 return '(?:|.*/)' + _globre(pat) + globsuffix
1151 return '(?:|.*/)' + _globre(pat) + globsuffix
1141 if kind == 'relre':
1152 if kind == 'relre':
1142 if pat.startswith('^'):
1153 if pat.startswith('^'):
1143 return pat
1154 return pat
1144 return '.*' + pat
1155 return '.*' + pat
1145 if kind in ('glob', 'rootglob'):
1156 if kind in ('glob', 'rootglob'):
1146 return _globre(pat) + globsuffix
1157 return _globre(pat) + globsuffix
1147 raise error.ProgrammingError('not a regex pattern: %s:%s' % (kind, pat))
1158 raise error.ProgrammingError('not a regex pattern: %s:%s' % (kind, pat))
1148
1159
1149 def _buildmatch(kindpats, globsuffix, root):
1160 def _buildmatch(kindpats, globsuffix, root):
1150 '''Return regexp string and a matcher function for kindpats.
1161 '''Return regexp string and a matcher function for kindpats.
1151 globsuffix is appended to the regexp of globs.'''
1162 globsuffix is appended to the regexp of globs.'''
1152 matchfuncs = []
1163 matchfuncs = []
1153
1164
1154 subincludes, kindpats = _expandsubinclude(kindpats, root)
1165 subincludes, kindpats = _expandsubinclude(kindpats, root)
1155 if subincludes:
1166 if subincludes:
1156 submatchers = {}
1167 submatchers = {}
1157 def matchsubinclude(f):
1168 def matchsubinclude(f):
1158 for prefix, matcherargs in subincludes:
1169 for prefix, matcherargs in subincludes:
1159 if f.startswith(prefix):
1170 if f.startswith(prefix):
1160 mf = submatchers.get(prefix)
1171 mf = submatchers.get(prefix)
1161 if mf is None:
1172 if mf is None:
1162 mf = match(*matcherargs)
1173 mf = match(*matcherargs)
1163 submatchers[prefix] = mf
1174 submatchers[prefix] = mf
1164
1175
1165 if mf(f[len(prefix):]):
1176 if mf(f[len(prefix):]):
1166 return True
1177 return True
1167 return False
1178 return False
1168 matchfuncs.append(matchsubinclude)
1179 matchfuncs.append(matchsubinclude)
1169
1180
1170 regex = ''
1181 regex = ''
1171 if kindpats:
1182 if kindpats:
1172 if all(k == 'rootfilesin' for k, p, s in kindpats):
1183 if all(k == 'rootfilesin' for k, p, s in kindpats):
1173 dirs = {p for k, p, s in kindpats}
1184 dirs = {p for k, p, s in kindpats}
1174 def mf(f):
1185 def mf(f):
1175 i = f.rfind('/')
1186 i = f.rfind('/')
1176 if i >= 0:
1187 if i >= 0:
1177 dir = f[:i]
1188 dir = f[:i]
1178 else:
1189 else:
1179 dir = '.'
1190 dir = '.'
1180 return dir in dirs
1191 return dir in dirs
1181 regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))
1192 regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))
1182 matchfuncs.append(mf)
1193 matchfuncs.append(mf)
1183 else:
1194 else:
1184 regex, mf = _buildregexmatch(kindpats, globsuffix)
1195 regex, mf = _buildregexmatch(kindpats, globsuffix)
1185 matchfuncs.append(mf)
1196 matchfuncs.append(mf)
1186
1197
1187 if len(matchfuncs) == 1:
1198 if len(matchfuncs) == 1:
1188 return regex, matchfuncs[0]
1199 return regex, matchfuncs[0]
1189 else:
1200 else:
1190 return regex, lambda f: any(mf(f) for mf in matchfuncs)
1201 return regex, lambda f: any(mf(f) for mf in matchfuncs)
1191
1202
1192 MAX_RE_SIZE = 20000
1203 MAX_RE_SIZE = 20000
1193
1204
1194 def _joinregexes(regexps):
1205 def _joinregexes(regexps):
1195 """gather multiple regular expressions into a single one"""
1206 """gather multiple regular expressions into a single one"""
1196 return '|'.join(regexps)
1207 return '|'.join(regexps)
1197
1208
1198 def _buildregexmatch(kindpats, globsuffix):
1209 def _buildregexmatch(kindpats, globsuffix):
1199 """Build a match function from a list of kinds and kindpats,
1210 """Build a match function from a list of kinds and kindpats,
1200 return regexp string and a matcher function.
1211 return regexp string and a matcher function.
1201
1212
1202 Test too large input
1213 Test too large input
1203 >>> _buildregexmatch([
1214 >>> _buildregexmatch([
1204 ... (b'relglob', b'?' * MAX_RE_SIZE, b'')
1215 ... (b'relglob', b'?' * MAX_RE_SIZE, b'')
1205 ... ], b'$')
1216 ... ], b'$')
1206 Traceback (most recent call last):
1217 Traceback (most recent call last):
1207 ...
1218 ...
1208 Abort: matcher pattern is too long (20009 bytes)
1219 Abort: matcher pattern is too long (20009 bytes)
1209 """
1220 """
1210 try:
1221 try:
1211 allgroups = []
1222 allgroups = []
1212 regexps = [_regex(k, p, globsuffix) for (k, p, s) in kindpats]
1223 regexps = [_regex(k, p, globsuffix) for (k, p, s) in kindpats]
1213 fullregexp = _joinregexes(regexps)
1224 fullregexp = _joinregexes(regexps)
1214
1225
1215 startidx = 0
1226 startidx = 0
1216 groupsize = 0
1227 groupsize = 0
1217 for idx, r in enumerate(regexps):
1228 for idx, r in enumerate(regexps):
1218 piecesize = len(r)
1229 piecesize = len(r)
1219 if piecesize > MAX_RE_SIZE:
1230 if piecesize > MAX_RE_SIZE:
1220 msg = _("matcher pattern is too long (%d bytes)") % piecesize
1231 msg = _("matcher pattern is too long (%d bytes)") % piecesize
1221 raise error.Abort(msg)
1232 raise error.Abort(msg)
1222 elif (groupsize + piecesize) > MAX_RE_SIZE:
1233 elif (groupsize + piecesize) > MAX_RE_SIZE:
1223 group = regexps[startidx:idx]
1234 group = regexps[startidx:idx]
1224 allgroups.append(_joinregexes(group))
1235 allgroups.append(_joinregexes(group))
1225 startidx = idx
1236 startidx = idx
1226 groupsize = 0
1237 groupsize = 0
1227 groupsize += piecesize + 1
1238 groupsize += piecesize + 1
1228
1239
1229 if startidx == 0:
1240 if startidx == 0:
1230 func = _rematcher(fullregexp)
1241 func = _rematcher(fullregexp)
1231 else:
1242 else:
1232 group = regexps[startidx:]
1243 group = regexps[startidx:]
1233 allgroups.append(_joinregexes(group))
1244 allgroups.append(_joinregexes(group))
1234 allmatchers = [_rematcher(g) for g in allgroups]
1245 allmatchers = [_rematcher(g) for g in allgroups]
1235 func = lambda s: any(m(s) for m in allmatchers)
1246 func = lambda s: any(m(s) for m in allmatchers)
1236 return fullregexp, func
1247 return fullregexp, func
1237 except re.error:
1248 except re.error:
1238 for k, p, s in kindpats:
1249 for k, p, s in kindpats:
1239 try:
1250 try:
1240 _rematcher(_regex(k, p, globsuffix))
1251 _rematcher(_regex(k, p, globsuffix))
1241 except re.error:
1252 except re.error:
1242 if s:
1253 if s:
1243 raise error.Abort(_("%s: invalid pattern (%s): %s") %
1254 raise error.Abort(_("%s: invalid pattern (%s): %s") %
1244 (s, k, p))
1255 (s, k, p))
1245 else:
1256 else:
1246 raise error.Abort(_("invalid pattern (%s): %s") % (k, p))
1257 raise error.Abort(_("invalid pattern (%s): %s") % (k, p))
1247 raise error.Abort(_("invalid pattern"))
1258 raise error.Abort(_("invalid pattern"))
1248
1259
1249 def _patternrootsanddirs(kindpats):
1260 def _patternrootsanddirs(kindpats):
1250 '''Returns roots and directories corresponding to each pattern.
1261 '''Returns roots and directories corresponding to each pattern.
1251
1262
1252 This calculates the roots and directories exactly matching the patterns and
1263 This calculates the roots and directories exactly matching the patterns and
1253 returns a tuple of (roots, dirs) for each. It does not return other
1264 returns a tuple of (roots, dirs) for each. It does not return other
1254 directories which may also need to be considered, like the parent
1265 directories which may also need to be considered, like the parent
1255 directories.
1266 directories.
1256 '''
1267 '''
1257 r = []
1268 r = []
1258 d = []
1269 d = []
1259 for kind, pat, source in kindpats:
1270 for kind, pat, source in kindpats:
1260 if kind in ('glob', 'rootglob'): # find the non-glob prefix
1271 if kind in ('glob', 'rootglob'): # find the non-glob prefix
1261 root = []
1272 root = []
1262 for p in pat.split('/'):
1273 for p in pat.split('/'):
1263 if '[' in p or '{' in p or '*' in p or '?' in p:
1274 if '[' in p or '{' in p or '*' in p or '?' in p:
1264 break
1275 break
1265 root.append(p)
1276 root.append(p)
1266 r.append('/'.join(root) or '.')
1277 r.append('/'.join(root) or '.')
1267 elif kind in ('relpath', 'path'):
1278 elif kind in ('relpath', 'path'):
1268 r.append(pat or '.')
1279 r.append(pat or '.')
1269 elif kind in ('rootfilesin',):
1280 elif kind in ('rootfilesin',):
1270 d.append(pat or '.')
1281 d.append(pat or '.')
1271 else: # relglob, re, relre
1282 else: # relglob, re, relre
1272 r.append('.')
1283 r.append('.')
1273 return r, d
1284 return r, d
1274
1285
1275 def _roots(kindpats):
1286 def _roots(kindpats):
1276 '''Returns root directories to match recursively from the given patterns.'''
1287 '''Returns root directories to match recursively from the given patterns.'''
1277 roots, dirs = _patternrootsanddirs(kindpats)
1288 roots, dirs = _patternrootsanddirs(kindpats)
1278 return roots
1289 return roots
1279
1290
1280 def _rootsdirsandparents(kindpats):
1291 def _rootsdirsandparents(kindpats):
1281 '''Returns roots and exact directories from patterns.
1292 '''Returns roots and exact directories from patterns.
1282
1293
1283 `roots` are directories to match recursively, `dirs` should
1294 `roots` are directories to match recursively, `dirs` should
1284 be matched non-recursively, and `parents` are the implicitly required
1295 be matched non-recursively, and `parents` are the implicitly required
1285 directories to walk to items in either roots or dirs.
1296 directories to walk to items in either roots or dirs.
1286
1297
1287 Returns a tuple of (roots, dirs, parents).
1298 Returns a tuple of (roots, dirs, parents).
1288
1299
1289 >>> _rootsdirsandparents(
1300 >>> _rootsdirsandparents(
1290 ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
1301 ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
1291 ... (b'glob', b'g*', b'')])
1302 ... (b'glob', b'g*', b'')])
1292 (['g/h', 'g/h', '.'], [], ['g', '.'])
1303 (['g/h', 'g/h', '.'], [], ['g', '.'])
1293 >>> _rootsdirsandparents(
1304 >>> _rootsdirsandparents(
1294 ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
1305 ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
1295 ([], ['g/h', '.'], ['g', '.'])
1306 ([], ['g/h', '.'], ['g', '.'])
1296 >>> _rootsdirsandparents(
1307 >>> _rootsdirsandparents(
1297 ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
1308 ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
1298 ... (b'path', b'', b'')])
1309 ... (b'path', b'', b'')])
1299 (['r', 'p/p', '.'], [], ['p', '.'])
1310 (['r', 'p/p', '.'], [], ['p', '.'])
1300 >>> _rootsdirsandparents(
1311 >>> _rootsdirsandparents(
1301 ... [(b'relglob', b'rg*', b''), (b're', b're/', b''),
1312 ... [(b'relglob', b'rg*', b''), (b're', b're/', b''),
1302 ... (b'relre', b'rr', b'')])
1313 ... (b'relre', b'rr', b'')])
1303 (['.', '.', '.'], [], ['.'])
1314 (['.', '.', '.'], [], ['.'])
1304 '''
1315 '''
1305 r, d = _patternrootsanddirs(kindpats)
1316 r, d = _patternrootsanddirs(kindpats)
1306
1317
1307 p = []
1318 p = []
1308 # Append the parents as non-recursive/exact directories, since they must be
1319 # Append the parents as non-recursive/exact directories, since they must be
1309 # scanned to get to either the roots or the other exact directories.
1320 # scanned to get to either the roots or the other exact directories.
1310 p.extend(util.dirs(d))
1321 p.extend(util.dirs(d))
1311 p.extend(util.dirs(r))
1322 p.extend(util.dirs(r))
1312 # util.dirs() does not include the root directory, so add it manually
1323 # util.dirs() does not include the root directory, so add it manually
1313 p.append('.')
1324 p.append('.')
1314
1325
1315 # FIXME: all uses of this function convert these to sets, do so before
1326 # FIXME: all uses of this function convert these to sets, do so before
1316 # returning.
1327 # returning.
1317 # FIXME: all uses of this function do not need anything in 'roots' and
1328 # FIXME: all uses of this function do not need anything in 'roots' and
1318 # 'dirs' to also be in 'parents', consider removing them before returning.
1329 # 'dirs' to also be in 'parents', consider removing them before returning.
1319 return r, d, p
1330 return r, d, p
1320
1331
1321 def _explicitfiles(kindpats):
1332 def _explicitfiles(kindpats):
1322 '''Returns the potential explicit filenames from the patterns.
1333 '''Returns the potential explicit filenames from the patterns.
1323
1334
1324 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
1335 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
1325 ['foo/bar']
1336 ['foo/bar']
1326 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
1337 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
1327 []
1338 []
1328 '''
1339 '''
1329 # Keep only the pattern kinds where one can specify filenames (vs only
1340 # Keep only the pattern kinds where one can specify filenames (vs only
1330 # directory names).
1341 # directory names).
1331 filable = [kp for kp in kindpats if kp[0] not in ('rootfilesin',)]
1342 filable = [kp for kp in kindpats if kp[0] not in ('rootfilesin',)]
1332 return _roots(filable)
1343 return _roots(filable)
1333
1344
1334 def _prefix(kindpats):
1345 def _prefix(kindpats):
1335 '''Whether all the patterns match a prefix (i.e. recursively)'''
1346 '''Whether all the patterns match a prefix (i.e. recursively)'''
1336 for kind, pat, source in kindpats:
1347 for kind, pat, source in kindpats:
1337 if kind not in ('path', 'relpath'):
1348 if kind not in ('path', 'relpath'):
1338 return False
1349 return False
1339 return True
1350 return True
1340
1351
1341 _commentre = None
1352 _commentre = None
1342
1353
1343 def readpatternfile(filepath, warn, sourceinfo=False):
1354 def readpatternfile(filepath, warn, sourceinfo=False):
1344 '''parse a pattern file, returning a list of
1355 '''parse a pattern file, returning a list of
1345 patterns. These patterns should be given to compile()
1356 patterns. These patterns should be given to compile()
1346 to be validated and converted into a match function.
1357 to be validated and converted into a match function.
1347
1358
1348 trailing white space is dropped.
1359 trailing white space is dropped.
1349 the escape character is backslash.
1360 the escape character is backslash.
1350 comments start with #.
1361 comments start with #.
1351 empty lines are skipped.
1362 empty lines are skipped.
1352
1363
1353 lines can be of the following formats:
1364 lines can be of the following formats:
1354
1365
1355 syntax: regexp # defaults following lines to non-rooted regexps
1366 syntax: regexp # defaults following lines to non-rooted regexps
1356 syntax: glob # defaults following lines to non-rooted globs
1367 syntax: glob # defaults following lines to non-rooted globs
1357 re:pattern # non-rooted regular expression
1368 re:pattern # non-rooted regular expression
1358 glob:pattern # non-rooted glob
1369 glob:pattern # non-rooted glob
1359 rootglob:pat # rooted glob (same root as ^ in regexps)
1370 rootglob:pat # rooted glob (same root as ^ in regexps)
1360 pattern # pattern of the current default type
1371 pattern # pattern of the current default type
1361
1372
1362 if sourceinfo is set, returns a list of tuples:
1373 if sourceinfo is set, returns a list of tuples:
1363 (pattern, lineno, originalline). This is useful to debug ignore patterns.
1374 (pattern, lineno, originalline). This is useful to debug ignore patterns.
1364 '''
1375 '''
1365
1376
1366 syntaxes = {
1377 syntaxes = {
1367 're': 'relre:',
1378 're': 'relre:',
1368 'regexp': 'relre:',
1379 'regexp': 'relre:',
1369 'glob': 'relglob:',
1380 'glob': 'relglob:',
1370 'rootglob': 'rootglob:',
1381 'rootglob': 'rootglob:',
1371 'include': 'include',
1382 'include': 'include',
1372 'subinclude': 'subinclude',
1383 'subinclude': 'subinclude',
1373 }
1384 }
1374 syntax = 'relre:'
1385 syntax = 'relre:'
1375 patterns = []
1386 patterns = []
1376
1387
1377 fp = open(filepath, 'rb')
1388 fp = open(filepath, 'rb')
1378 for lineno, line in enumerate(util.iterfile(fp), start=1):
1389 for lineno, line in enumerate(util.iterfile(fp), start=1):
1379 if "#" in line:
1390 if "#" in line:
1380 global _commentre
1391 global _commentre
1381 if not _commentre:
1392 if not _commentre:
1382 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
1393 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
1383 # remove comments prefixed by an even number of escapes
1394 # remove comments prefixed by an even number of escapes
1384 m = _commentre.search(line)
1395 m = _commentre.search(line)
1385 if m:
1396 if m:
1386 line = line[:m.end(1)]
1397 line = line[:m.end(1)]
1387 # fixup properly escaped comments that survived the above
1398 # fixup properly escaped comments that survived the above
1388 line = line.replace("\\#", "#")
1399 line = line.replace("\\#", "#")
1389 line = line.rstrip()
1400 line = line.rstrip()
1390 if not line:
1401 if not line:
1391 continue
1402 continue
1392
1403
1393 if line.startswith('syntax:'):
1404 if line.startswith('syntax:'):
1394 s = line[7:].strip()
1405 s = line[7:].strip()
1395 try:
1406 try:
1396 syntax = syntaxes[s]
1407 syntax = syntaxes[s]
1397 except KeyError:
1408 except KeyError:
1398 if warn:
1409 if warn:
1399 warn(_("%s: ignoring invalid syntax '%s'\n") %
1410 warn(_("%s: ignoring invalid syntax '%s'\n") %
1400 (filepath, s))
1411 (filepath, s))
1401 continue
1412 continue
1402
1413
1403 linesyntax = syntax
1414 linesyntax = syntax
1404 for s, rels in syntaxes.iteritems():
1415 for s, rels in syntaxes.iteritems():
1405 if line.startswith(rels):
1416 if line.startswith(rels):
1406 linesyntax = rels
1417 linesyntax = rels
1407 line = line[len(rels):]
1418 line = line[len(rels):]
1408 break
1419 break
1409 elif line.startswith(s+':'):
1420 elif line.startswith(s+':'):
1410 linesyntax = rels
1421 linesyntax = rels
1411 line = line[len(s) + 1:]
1422 line = line[len(s) + 1:]
1412 break
1423 break
1413 if sourceinfo:
1424 if sourceinfo:
1414 patterns.append((linesyntax + line, lineno, line))
1425 patterns.append((linesyntax + line, lineno, line))
1415 else:
1426 else:
1416 patterns.append(linesyntax + line)
1427 patterns.append(linesyntax + line)
1417 fp.close()
1428 fp.close()
1418 return patterns
1429 return patterns
General Comments 0
You need to be logged in to leave comments. Login now