##// END OF EJS Templates
match: avoid translating glob to matcher multiple times for large sets...
Boris Feld -
r40813:3c842749 default
parent child Browse files
Show More
@@ -1,1399 +1,1411
1 # match.py - filename matching
1 # match.py - filename matching
2 #
2 #
3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import, print_function
8 from __future__ import absolute_import, print_function
9
9
10 import copy
10 import copy
11 import itertools
11 import itertools
12 import os
12 import os
13 import re
13 import re
14
14
15 from .i18n import _
15 from .i18n import _
16 from . import (
16 from . import (
17 encoding,
17 encoding,
18 error,
18 error,
19 pathutil,
19 pathutil,
20 pycompat,
20 pycompat,
21 util,
21 util,
22 )
22 )
23 from .utils import (
23 from .utils import (
24 stringutil,
24 stringutil,
25 )
25 )
26
26
27 allpatternkinds = ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
27 allpatternkinds = ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
28 'listfile', 'listfile0', 'set', 'include', 'subinclude',
28 'listfile', 'listfile0', 'set', 'include', 'subinclude',
29 'rootfilesin')
29 'rootfilesin')
30 cwdrelativepatternkinds = ('relpath', 'glob')
30 cwdrelativepatternkinds = ('relpath', 'glob')
31
31
32 propertycache = util.propertycache
32 propertycache = util.propertycache
33
33
34 def _rematcher(regex):
34 def _rematcher(regex):
35 '''compile the regexp with the best available regexp engine and return a
35 '''compile the regexp with the best available regexp engine and return a
36 matcher function'''
36 matcher function'''
37 m = util.re.compile(regex)
37 m = util.re.compile(regex)
38 try:
38 try:
39 # slightly faster, provided by facebook's re2 bindings
39 # slightly faster, provided by facebook's re2 bindings
40 return m.test_match
40 return m.test_match
41 except AttributeError:
41 except AttributeError:
42 return m.match
42 return m.match
43
43
44 def _expandsets(root, cwd, kindpats, ctx, listsubrepos, badfn):
44 def _expandsets(root, cwd, kindpats, ctx, listsubrepos, badfn):
45 '''Returns the kindpats list with the 'set' patterns expanded to matchers'''
45 '''Returns the kindpats list with the 'set' patterns expanded to matchers'''
46 matchers = []
46 matchers = []
47 other = []
47 other = []
48
48
49 for kind, pat, source in kindpats:
49 for kind, pat, source in kindpats:
50 if kind == 'set':
50 if kind == 'set':
51 if not ctx:
51 if not ctx:
52 raise error.ProgrammingError("fileset expression with no "
52 raise error.ProgrammingError("fileset expression with no "
53 "context")
53 "context")
54 matchers.append(ctx.matchfileset(pat, badfn=badfn))
54 matchers.append(ctx.matchfileset(pat, badfn=badfn))
55
55
56 if listsubrepos:
56 if listsubrepos:
57 for subpath in ctx.substate:
57 for subpath in ctx.substate:
58 sm = ctx.sub(subpath).matchfileset(pat, badfn=badfn)
58 sm = ctx.sub(subpath).matchfileset(pat, badfn=badfn)
59 pm = prefixdirmatcher(root, cwd, subpath, sm, badfn=badfn)
59 pm = prefixdirmatcher(root, cwd, subpath, sm, badfn=badfn)
60 matchers.append(pm)
60 matchers.append(pm)
61
61
62 continue
62 continue
63 other.append((kind, pat, source))
63 other.append((kind, pat, source))
64 return matchers, other
64 return matchers, other
65
65
66 def _expandsubinclude(kindpats, root):
66 def _expandsubinclude(kindpats, root):
67 '''Returns the list of subinclude matcher args and the kindpats without the
67 '''Returns the list of subinclude matcher args and the kindpats without the
68 subincludes in it.'''
68 subincludes in it.'''
69 relmatchers = []
69 relmatchers = []
70 other = []
70 other = []
71
71
72 for kind, pat, source in kindpats:
72 for kind, pat, source in kindpats:
73 if kind == 'subinclude':
73 if kind == 'subinclude':
74 sourceroot = pathutil.dirname(util.normpath(source))
74 sourceroot = pathutil.dirname(util.normpath(source))
75 pat = util.pconvert(pat)
75 pat = util.pconvert(pat)
76 path = pathutil.join(sourceroot, pat)
76 path = pathutil.join(sourceroot, pat)
77
77
78 newroot = pathutil.dirname(path)
78 newroot = pathutil.dirname(path)
79 matcherargs = (newroot, '', [], ['include:%s' % path])
79 matcherargs = (newroot, '', [], ['include:%s' % path])
80
80
81 prefix = pathutil.canonpath(root, root, newroot)
81 prefix = pathutil.canonpath(root, root, newroot)
82 if prefix:
82 if prefix:
83 prefix += '/'
83 prefix += '/'
84 relmatchers.append((prefix, matcherargs))
84 relmatchers.append((prefix, matcherargs))
85 else:
85 else:
86 other.append((kind, pat, source))
86 other.append((kind, pat, source))
87
87
88 return relmatchers, other
88 return relmatchers, other
89
89
90 def _kindpatsalwaysmatch(kindpats):
90 def _kindpatsalwaysmatch(kindpats):
91 """"Checks whether the kindspats match everything, as e.g.
91 """"Checks whether the kindspats match everything, as e.g.
92 'relpath:.' does.
92 'relpath:.' does.
93 """
93 """
94 for kind, pat, source in kindpats:
94 for kind, pat, source in kindpats:
95 if pat != '' or kind not in ['relpath', 'glob']:
95 if pat != '' or kind not in ['relpath', 'glob']:
96 return False
96 return False
97 return True
97 return True
98
98
99 def _buildkindpatsmatcher(matchercls, root, cwd, kindpats, ctx=None,
99 def _buildkindpatsmatcher(matchercls, root, cwd, kindpats, ctx=None,
100 listsubrepos=False, badfn=None):
100 listsubrepos=False, badfn=None):
101 matchers = []
101 matchers = []
102 fms, kindpats = _expandsets(root, cwd, kindpats, ctx=ctx,
102 fms, kindpats = _expandsets(root, cwd, kindpats, ctx=ctx,
103 listsubrepos=listsubrepos, badfn=badfn)
103 listsubrepos=listsubrepos, badfn=badfn)
104 if kindpats:
104 if kindpats:
105 m = matchercls(root, cwd, kindpats, listsubrepos=listsubrepos,
105 m = matchercls(root, cwd, kindpats, listsubrepos=listsubrepos,
106 badfn=badfn)
106 badfn=badfn)
107 matchers.append(m)
107 matchers.append(m)
108 if fms:
108 if fms:
109 matchers.extend(fms)
109 matchers.extend(fms)
110 if not matchers:
110 if not matchers:
111 return nevermatcher(root, cwd, badfn=badfn)
111 return nevermatcher(root, cwd, badfn=badfn)
112 if len(matchers) == 1:
112 if len(matchers) == 1:
113 return matchers[0]
113 return matchers[0]
114 return unionmatcher(matchers)
114 return unionmatcher(matchers)
115
115
116 def match(root, cwd, patterns=None, include=None, exclude=None, default='glob',
116 def match(root, cwd, patterns=None, include=None, exclude=None, default='glob',
117 exact=False, auditor=None, ctx=None, listsubrepos=False, warn=None,
117 exact=False, auditor=None, ctx=None, listsubrepos=False, warn=None,
118 badfn=None, icasefs=False):
118 badfn=None, icasefs=False):
119 """build an object to match a set of file patterns
119 """build an object to match a set of file patterns
120
120
121 arguments:
121 arguments:
122 root - the canonical root of the tree you're matching against
122 root - the canonical root of the tree you're matching against
123 cwd - the current working directory, if relevant
123 cwd - the current working directory, if relevant
124 patterns - patterns to find
124 patterns - patterns to find
125 include - patterns to include (unless they are excluded)
125 include - patterns to include (unless they are excluded)
126 exclude - patterns to exclude (even if they are included)
126 exclude - patterns to exclude (even if they are included)
127 default - if a pattern in patterns has no explicit type, assume this one
127 default - if a pattern in patterns has no explicit type, assume this one
128 exact - patterns are actually filenames (include/exclude still apply)
128 exact - patterns are actually filenames (include/exclude still apply)
129 warn - optional function used for printing warnings
129 warn - optional function used for printing warnings
130 badfn - optional bad() callback for this matcher instead of the default
130 badfn - optional bad() callback for this matcher instead of the default
131 icasefs - make a matcher for wdir on case insensitive filesystems, which
131 icasefs - make a matcher for wdir on case insensitive filesystems, which
132 normalizes the given patterns to the case in the filesystem
132 normalizes the given patterns to the case in the filesystem
133
133
134 a pattern is one of:
134 a pattern is one of:
135 'glob:<glob>' - a glob relative to cwd
135 'glob:<glob>' - a glob relative to cwd
136 're:<regexp>' - a regular expression
136 're:<regexp>' - a regular expression
137 'path:<path>' - a path relative to repository root, which is matched
137 'path:<path>' - a path relative to repository root, which is matched
138 recursively
138 recursively
139 'rootfilesin:<path>' - a path relative to repository root, which is
139 'rootfilesin:<path>' - a path relative to repository root, which is
140 matched non-recursively (will not match subdirectories)
140 matched non-recursively (will not match subdirectories)
141 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
141 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
142 'relpath:<path>' - a path relative to cwd
142 'relpath:<path>' - a path relative to cwd
143 'relre:<regexp>' - a regexp that needn't match the start of a name
143 'relre:<regexp>' - a regexp that needn't match the start of a name
144 'set:<fileset>' - a fileset expression
144 'set:<fileset>' - a fileset expression
145 'include:<path>' - a file of patterns to read and include
145 'include:<path>' - a file of patterns to read and include
146 'subinclude:<path>' - a file of patterns to match against files under
146 'subinclude:<path>' - a file of patterns to match against files under
147 the same directory
147 the same directory
148 '<something>' - a pattern of the specified default type
148 '<something>' - a pattern of the specified default type
149 """
149 """
150 normalize = _donormalize
150 normalize = _donormalize
151 if icasefs:
151 if icasefs:
152 if exact:
152 if exact:
153 raise error.ProgrammingError("a case-insensitive exact matcher "
153 raise error.ProgrammingError("a case-insensitive exact matcher "
154 "doesn't make sense")
154 "doesn't make sense")
155 dirstate = ctx.repo().dirstate
155 dirstate = ctx.repo().dirstate
156 dsnormalize = dirstate.normalize
156 dsnormalize = dirstate.normalize
157
157
158 def normalize(patterns, default, root, cwd, auditor, warn):
158 def normalize(patterns, default, root, cwd, auditor, warn):
159 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
159 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
160 kindpats = []
160 kindpats = []
161 for kind, pats, source in kp:
161 for kind, pats, source in kp:
162 if kind not in ('re', 'relre'): # regex can't be normalized
162 if kind not in ('re', 'relre'): # regex can't be normalized
163 p = pats
163 p = pats
164 pats = dsnormalize(pats)
164 pats = dsnormalize(pats)
165
165
166 # Preserve the original to handle a case only rename.
166 # Preserve the original to handle a case only rename.
167 if p != pats and p in dirstate:
167 if p != pats and p in dirstate:
168 kindpats.append((kind, p, source))
168 kindpats.append((kind, p, source))
169
169
170 kindpats.append((kind, pats, source))
170 kindpats.append((kind, pats, source))
171 return kindpats
171 return kindpats
172
172
173 if exact:
173 if exact:
174 m = exactmatcher(root, cwd, patterns, badfn)
174 m = exactmatcher(root, cwd, patterns, badfn)
175 elif patterns:
175 elif patterns:
176 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
176 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
177 if _kindpatsalwaysmatch(kindpats):
177 if _kindpatsalwaysmatch(kindpats):
178 m = alwaysmatcher(root, cwd, badfn, relativeuipath=True)
178 m = alwaysmatcher(root, cwd, badfn, relativeuipath=True)
179 else:
179 else:
180 m = _buildkindpatsmatcher(patternmatcher, root, cwd, kindpats,
180 m = _buildkindpatsmatcher(patternmatcher, root, cwd, kindpats,
181 ctx=ctx, listsubrepos=listsubrepos,
181 ctx=ctx, listsubrepos=listsubrepos,
182 badfn=badfn)
182 badfn=badfn)
183 else:
183 else:
184 # It's a little strange that no patterns means to match everything.
184 # It's a little strange that no patterns means to match everything.
185 # Consider changing this to match nothing (probably using nevermatcher).
185 # Consider changing this to match nothing (probably using nevermatcher).
186 m = alwaysmatcher(root, cwd, badfn)
186 m = alwaysmatcher(root, cwd, badfn)
187
187
188 if include:
188 if include:
189 kindpats = normalize(include, 'glob', root, cwd, auditor, warn)
189 kindpats = normalize(include, 'glob', root, cwd, auditor, warn)
190 im = _buildkindpatsmatcher(includematcher, root, cwd, kindpats, ctx=ctx,
190 im = _buildkindpatsmatcher(includematcher, root, cwd, kindpats, ctx=ctx,
191 listsubrepos=listsubrepos, badfn=None)
191 listsubrepos=listsubrepos, badfn=None)
192 m = intersectmatchers(m, im)
192 m = intersectmatchers(m, im)
193 if exclude:
193 if exclude:
194 kindpats = normalize(exclude, 'glob', root, cwd, auditor, warn)
194 kindpats = normalize(exclude, 'glob', root, cwd, auditor, warn)
195 em = _buildkindpatsmatcher(includematcher, root, cwd, kindpats, ctx=ctx,
195 em = _buildkindpatsmatcher(includematcher, root, cwd, kindpats, ctx=ctx,
196 listsubrepos=listsubrepos, badfn=None)
196 listsubrepos=listsubrepos, badfn=None)
197 m = differencematcher(m, em)
197 m = differencematcher(m, em)
198 return m
198 return m
199
199
200 def exact(root, cwd, files, badfn=None):
200 def exact(root, cwd, files, badfn=None):
201 return exactmatcher(root, cwd, files, badfn=badfn)
201 return exactmatcher(root, cwd, files, badfn=badfn)
202
202
203 def always(root, cwd):
203 def always(root, cwd):
204 return alwaysmatcher(root, cwd)
204 return alwaysmatcher(root, cwd)
205
205
206 def never(root, cwd):
206 def never(root, cwd):
207 return nevermatcher(root, cwd)
207 return nevermatcher(root, cwd)
208
208
209 def badmatch(match, badfn):
209 def badmatch(match, badfn):
210 """Make a copy of the given matcher, replacing its bad method with the given
210 """Make a copy of the given matcher, replacing its bad method with the given
211 one.
211 one.
212 """
212 """
213 m = copy.copy(match)
213 m = copy.copy(match)
214 m.bad = badfn
214 m.bad = badfn
215 return m
215 return m
216
216
217 def _donormalize(patterns, default, root, cwd, auditor, warn):
217 def _donormalize(patterns, default, root, cwd, auditor, warn):
218 '''Convert 'kind:pat' from the patterns list to tuples with kind and
218 '''Convert 'kind:pat' from the patterns list to tuples with kind and
219 normalized and rooted patterns and with listfiles expanded.'''
219 normalized and rooted patterns and with listfiles expanded.'''
220 kindpats = []
220 kindpats = []
221 for kind, pat in [_patsplit(p, default) for p in patterns]:
221 for kind, pat in [_patsplit(p, default) for p in patterns]:
222 if kind in cwdrelativepatternkinds:
222 if kind in cwdrelativepatternkinds:
223 pat = pathutil.canonpath(root, cwd, pat, auditor)
223 pat = pathutil.canonpath(root, cwd, pat, auditor)
224 elif kind in ('relglob', 'path', 'rootfilesin'):
224 elif kind in ('relglob', 'path', 'rootfilesin'):
225 pat = util.normpath(pat)
225 pat = util.normpath(pat)
226 elif kind in ('listfile', 'listfile0'):
226 elif kind in ('listfile', 'listfile0'):
227 try:
227 try:
228 files = util.readfile(pat)
228 files = util.readfile(pat)
229 if kind == 'listfile0':
229 if kind == 'listfile0':
230 files = files.split('\0')
230 files = files.split('\0')
231 else:
231 else:
232 files = files.splitlines()
232 files = files.splitlines()
233 files = [f for f in files if f]
233 files = [f for f in files if f]
234 except EnvironmentError:
234 except EnvironmentError:
235 raise error.Abort(_("unable to read file list (%s)") % pat)
235 raise error.Abort(_("unable to read file list (%s)") % pat)
236 for k, p, source in _donormalize(files, default, root, cwd,
236 for k, p, source in _donormalize(files, default, root, cwd,
237 auditor, warn):
237 auditor, warn):
238 kindpats.append((k, p, pat))
238 kindpats.append((k, p, pat))
239 continue
239 continue
240 elif kind == 'include':
240 elif kind == 'include':
241 try:
241 try:
242 fullpath = os.path.join(root, util.localpath(pat))
242 fullpath = os.path.join(root, util.localpath(pat))
243 includepats = readpatternfile(fullpath, warn)
243 includepats = readpatternfile(fullpath, warn)
244 for k, p, source in _donormalize(includepats, default,
244 for k, p, source in _donormalize(includepats, default,
245 root, cwd, auditor, warn):
245 root, cwd, auditor, warn):
246 kindpats.append((k, p, source or pat))
246 kindpats.append((k, p, source or pat))
247 except error.Abort as inst:
247 except error.Abort as inst:
248 raise error.Abort('%s: %s' % (pat, inst[0]))
248 raise error.Abort('%s: %s' % (pat, inst[0]))
249 except IOError as inst:
249 except IOError as inst:
250 if warn:
250 if warn:
251 warn(_("skipping unreadable pattern file '%s': %s\n") %
251 warn(_("skipping unreadable pattern file '%s': %s\n") %
252 (pat, stringutil.forcebytestr(inst.strerror)))
252 (pat, stringutil.forcebytestr(inst.strerror)))
253 continue
253 continue
254 # else: re or relre - which cannot be normalized
254 # else: re or relre - which cannot be normalized
255 kindpats.append((kind, pat, ''))
255 kindpats.append((kind, pat, ''))
256 return kindpats
256 return kindpats
257
257
258 class basematcher(object):
258 class basematcher(object):
259
259
260 def __init__(self, root, cwd, badfn=None, relativeuipath=True):
260 def __init__(self, root, cwd, badfn=None, relativeuipath=True):
261 self._root = root
261 self._root = root
262 self._cwd = cwd
262 self._cwd = cwd
263 if badfn is not None:
263 if badfn is not None:
264 self.bad = badfn
264 self.bad = badfn
265 self._relativeuipath = relativeuipath
265 self._relativeuipath = relativeuipath
266
266
267 def __call__(self, fn):
267 def __call__(self, fn):
268 return self.matchfn(fn)
268 return self.matchfn(fn)
269 def __iter__(self):
269 def __iter__(self):
270 for f in self._files:
270 for f in self._files:
271 yield f
271 yield f
272 # Callbacks related to how the matcher is used by dirstate.walk.
272 # Callbacks related to how the matcher is used by dirstate.walk.
273 # Subscribers to these events must monkeypatch the matcher object.
273 # Subscribers to these events must monkeypatch the matcher object.
274 def bad(self, f, msg):
274 def bad(self, f, msg):
275 '''Callback from dirstate.walk for each explicit file that can't be
275 '''Callback from dirstate.walk for each explicit file that can't be
276 found/accessed, with an error message.'''
276 found/accessed, with an error message.'''
277
277
278 # If an explicitdir is set, it will be called when an explicitly listed
278 # If an explicitdir is set, it will be called when an explicitly listed
279 # directory is visited.
279 # directory is visited.
280 explicitdir = None
280 explicitdir = None
281
281
282 # If an traversedir is set, it will be called when a directory discovered
282 # If an traversedir is set, it will be called when a directory discovered
283 # by recursive traversal is visited.
283 # by recursive traversal is visited.
284 traversedir = None
284 traversedir = None
285
285
286 def abs(self, f):
286 def abs(self, f):
287 '''Convert a repo path back to path that is relative to the root of the
287 '''Convert a repo path back to path that is relative to the root of the
288 matcher.'''
288 matcher.'''
289 return f
289 return f
290
290
291 def rel(self, f):
291 def rel(self, f):
292 '''Convert repo path back to path that is relative to cwd of matcher.'''
292 '''Convert repo path back to path that is relative to cwd of matcher.'''
293 return util.pathto(self._root, self._cwd, f)
293 return util.pathto(self._root, self._cwd, f)
294
294
295 def uipath(self, f):
295 def uipath(self, f):
296 '''Convert repo path to a display path. If patterns or -I/-X were used
296 '''Convert repo path to a display path. If patterns or -I/-X were used
297 to create this matcher, the display path will be relative to cwd.
297 to create this matcher, the display path will be relative to cwd.
298 Otherwise it is relative to the root of the repo.'''
298 Otherwise it is relative to the root of the repo.'''
299 return (self._relativeuipath and self.rel(f)) or self.abs(f)
299 return (self._relativeuipath and self.rel(f)) or self.abs(f)
300
300
301 @propertycache
301 @propertycache
302 def _files(self):
302 def _files(self):
303 return []
303 return []
304
304
305 def files(self):
305 def files(self):
306 '''Explicitly listed files or patterns or roots:
306 '''Explicitly listed files or patterns or roots:
307 if no patterns or .always(): empty list,
307 if no patterns or .always(): empty list,
308 if exact: list exact files,
308 if exact: list exact files,
309 if not .anypats(): list all files and dirs,
309 if not .anypats(): list all files and dirs,
310 else: optimal roots'''
310 else: optimal roots'''
311 return self._files
311 return self._files
312
312
313 @propertycache
313 @propertycache
314 def _fileset(self):
314 def _fileset(self):
315 return set(self._files)
315 return set(self._files)
316
316
317 def exact(self, f):
317 def exact(self, f):
318 '''Returns True if f is in .files().'''
318 '''Returns True if f is in .files().'''
319 return f in self._fileset
319 return f in self._fileset
320
320
321 def matchfn(self, f):
321 def matchfn(self, f):
322 return False
322 return False
323
323
324 def visitdir(self, dir):
324 def visitdir(self, dir):
325 '''Decides whether a directory should be visited based on whether it
325 '''Decides whether a directory should be visited based on whether it
326 has potential matches in it or one of its subdirectories. This is
326 has potential matches in it or one of its subdirectories. This is
327 based on the match's primary, included, and excluded patterns.
327 based on the match's primary, included, and excluded patterns.
328
328
329 Returns the string 'all' if the given directory and all subdirectories
329 Returns the string 'all' if the given directory and all subdirectories
330 should be visited. Otherwise returns True or False indicating whether
330 should be visited. Otherwise returns True or False indicating whether
331 the given directory should be visited.
331 the given directory should be visited.
332 '''
332 '''
333 return True
333 return True
334
334
335 def visitchildrenset(self, dir):
335 def visitchildrenset(self, dir):
336 '''Decides whether a directory should be visited based on whether it
336 '''Decides whether a directory should be visited based on whether it
337 has potential matches in it or one of its subdirectories, and
337 has potential matches in it or one of its subdirectories, and
338 potentially lists which subdirectories of that directory should be
338 potentially lists which subdirectories of that directory should be
339 visited. This is based on the match's primary, included, and excluded
339 visited. This is based on the match's primary, included, and excluded
340 patterns.
340 patterns.
341
341
342 This function is very similar to 'visitdir', and the following mapping
342 This function is very similar to 'visitdir', and the following mapping
343 can be applied:
343 can be applied:
344
344
345 visitdir | visitchildrenlist
345 visitdir | visitchildrenlist
346 ----------+-------------------
346 ----------+-------------------
347 False | set()
347 False | set()
348 'all' | 'all'
348 'all' | 'all'
349 True | 'this' OR non-empty set of subdirs -or files- to visit
349 True | 'this' OR non-empty set of subdirs -or files- to visit
350
350
351 Example:
351 Example:
352 Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return
352 Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return
353 the following values (assuming the implementation of visitchildrenset
353 the following values (assuming the implementation of visitchildrenset
354 is capable of recognizing this; some implementations are not).
354 is capable of recognizing this; some implementations are not).
355
355
356 '.' -> {'foo', 'qux'}
356 '.' -> {'foo', 'qux'}
357 'baz' -> set()
357 'baz' -> set()
358 'foo' -> {'bar'}
358 'foo' -> {'bar'}
359 # Ideally this would be 'all', but since the prefix nature of matchers
359 # Ideally this would be 'all', but since the prefix nature of matchers
360 # is applied to the entire matcher, we have to downgrade this to
360 # is applied to the entire matcher, we have to downgrade this to
361 # 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed
361 # 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed
362 # in.
362 # in.
363 'foo/bar' -> 'this'
363 'foo/bar' -> 'this'
364 'qux' -> 'this'
364 'qux' -> 'this'
365
365
366 Important:
366 Important:
367 Most matchers do not know if they're representing files or
367 Most matchers do not know if they're representing files or
368 directories. They see ['path:dir/f'] and don't know whether 'f' is a
368 directories. They see ['path:dir/f'] and don't know whether 'f' is a
369 file or a directory, so visitchildrenset('dir') for most matchers will
369 file or a directory, so visitchildrenset('dir') for most matchers will
370 return {'f'}, but if the matcher knows it's a file (like exactmatcher
370 return {'f'}, but if the matcher knows it's a file (like exactmatcher
371 does), it may return 'this'. Do not rely on the return being a set
371 does), it may return 'this'. Do not rely on the return being a set
372 indicating that there are no files in this dir to investigate (or
372 indicating that there are no files in this dir to investigate (or
373 equivalently that if there are files to investigate in 'dir' that it
373 equivalently that if there are files to investigate in 'dir' that it
374 will always return 'this').
374 will always return 'this').
375 '''
375 '''
376 return 'this'
376 return 'this'
377
377
378 def always(self):
378 def always(self):
379 '''Matcher will match everything and .files() will be empty --
379 '''Matcher will match everything and .files() will be empty --
380 optimization might be possible.'''
380 optimization might be possible.'''
381 return False
381 return False
382
382
383 def isexact(self):
383 def isexact(self):
384 '''Matcher will match exactly the list of files in .files() --
384 '''Matcher will match exactly the list of files in .files() --
385 optimization might be possible.'''
385 optimization might be possible.'''
386 return False
386 return False
387
387
388 def prefix(self):
388 def prefix(self):
389 '''Matcher will match the paths in .files() recursively --
389 '''Matcher will match the paths in .files() recursively --
390 optimization might be possible.'''
390 optimization might be possible.'''
391 return False
391 return False
392
392
393 def anypats(self):
393 def anypats(self):
394 '''None of .always(), .isexact(), and .prefix() is true --
394 '''None of .always(), .isexact(), and .prefix() is true --
395 optimizations will be difficult.'''
395 optimizations will be difficult.'''
396 return not self.always() and not self.isexact() and not self.prefix()
396 return not self.always() and not self.isexact() and not self.prefix()
397
397
398 class alwaysmatcher(basematcher):
398 class alwaysmatcher(basematcher):
399 '''Matches everything.'''
399 '''Matches everything.'''
400
400
401 def __init__(self, root, cwd, badfn=None, relativeuipath=False):
401 def __init__(self, root, cwd, badfn=None, relativeuipath=False):
402 super(alwaysmatcher, self).__init__(root, cwd, badfn,
402 super(alwaysmatcher, self).__init__(root, cwd, badfn,
403 relativeuipath=relativeuipath)
403 relativeuipath=relativeuipath)
404
404
405 def always(self):
405 def always(self):
406 return True
406 return True
407
407
408 def matchfn(self, f):
408 def matchfn(self, f):
409 return True
409 return True
410
410
411 def visitdir(self, dir):
411 def visitdir(self, dir):
412 return 'all'
412 return 'all'
413
413
414 def visitchildrenset(self, dir):
414 def visitchildrenset(self, dir):
415 return 'all'
415 return 'all'
416
416
417 def __repr__(self):
417 def __repr__(self):
418 return r'<alwaysmatcher>'
418 return r'<alwaysmatcher>'
419
419
420 class nevermatcher(basematcher):
420 class nevermatcher(basematcher):
421 '''Matches nothing.'''
421 '''Matches nothing.'''
422
422
423 def __init__(self, root, cwd, badfn=None):
423 def __init__(self, root, cwd, badfn=None):
424 super(nevermatcher, self).__init__(root, cwd, badfn)
424 super(nevermatcher, self).__init__(root, cwd, badfn)
425
425
426 # It's a little weird to say that the nevermatcher is an exact matcher
426 # It's a little weird to say that the nevermatcher is an exact matcher
427 # or a prefix matcher, but it seems to make sense to let callers take
427 # or a prefix matcher, but it seems to make sense to let callers take
428 # fast paths based on either. There will be no exact matches, nor any
428 # fast paths based on either. There will be no exact matches, nor any
429 # prefixes (files() returns []), so fast paths iterating over them should
429 # prefixes (files() returns []), so fast paths iterating over them should
430 # be efficient (and correct).
430 # be efficient (and correct).
431 def isexact(self):
431 def isexact(self):
432 return True
432 return True
433
433
434 def prefix(self):
434 def prefix(self):
435 return True
435 return True
436
436
437 def visitdir(self, dir):
437 def visitdir(self, dir):
438 return False
438 return False
439
439
440 def visitchildrenset(self, dir):
440 def visitchildrenset(self, dir):
441 return set()
441 return set()
442
442
443 def __repr__(self):
443 def __repr__(self):
444 return r'<nevermatcher>'
444 return r'<nevermatcher>'
445
445
446 class predicatematcher(basematcher):
446 class predicatematcher(basematcher):
447 """A matcher adapter for a simple boolean function"""
447 """A matcher adapter for a simple boolean function"""
448
448
449 def __init__(self, root, cwd, predfn, predrepr=None, badfn=None):
449 def __init__(self, root, cwd, predfn, predrepr=None, badfn=None):
450 super(predicatematcher, self).__init__(root, cwd, badfn)
450 super(predicatematcher, self).__init__(root, cwd, badfn)
451 self.matchfn = predfn
451 self.matchfn = predfn
452 self._predrepr = predrepr
452 self._predrepr = predrepr
453
453
454 @encoding.strmethod
454 @encoding.strmethod
455 def __repr__(self):
455 def __repr__(self):
456 s = (stringutil.buildrepr(self._predrepr)
456 s = (stringutil.buildrepr(self._predrepr)
457 or pycompat.byterepr(self.matchfn))
457 or pycompat.byterepr(self.matchfn))
458 return '<predicatenmatcher pred=%s>' % s
458 return '<predicatenmatcher pred=%s>' % s
459
459
460 class patternmatcher(basematcher):
460 class patternmatcher(basematcher):
461
461
462 def __init__(self, root, cwd, kindpats, listsubrepos=False, badfn=None):
462 def __init__(self, root, cwd, kindpats, listsubrepos=False, badfn=None):
463 super(patternmatcher, self).__init__(root, cwd, badfn)
463 super(patternmatcher, self).__init__(root, cwd, badfn)
464
464
465 self._files = _explicitfiles(kindpats)
465 self._files = _explicitfiles(kindpats)
466 self._prefix = _prefix(kindpats)
466 self._prefix = _prefix(kindpats)
467 self._pats, self.matchfn = _buildmatch(kindpats, '$', listsubrepos,
467 self._pats, self.matchfn = _buildmatch(kindpats, '$', listsubrepos,
468 root)
468 root)
469
469
470 @propertycache
470 @propertycache
471 def _dirs(self):
471 def _dirs(self):
472 return set(util.dirs(self._fileset)) | {'.'}
472 return set(util.dirs(self._fileset)) | {'.'}
473
473
474 def visitdir(self, dir):
474 def visitdir(self, dir):
475 if self._prefix and dir in self._fileset:
475 if self._prefix and dir in self._fileset:
476 return 'all'
476 return 'all'
477 return ('.' in self._fileset or
477 return ('.' in self._fileset or
478 dir in self._fileset or
478 dir in self._fileset or
479 dir in self._dirs or
479 dir in self._dirs or
480 any(parentdir in self._fileset
480 any(parentdir in self._fileset
481 for parentdir in util.finddirs(dir)))
481 for parentdir in util.finddirs(dir)))
482
482
483 def visitchildrenset(self, dir):
483 def visitchildrenset(self, dir):
484 ret = self.visitdir(dir)
484 ret = self.visitdir(dir)
485 if ret is True:
485 if ret is True:
486 return 'this'
486 return 'this'
487 elif not ret:
487 elif not ret:
488 return set()
488 return set()
489 assert ret == 'all'
489 assert ret == 'all'
490 return 'all'
490 return 'all'
491
491
492 def prefix(self):
492 def prefix(self):
493 return self._prefix
493 return self._prefix
494
494
495 @encoding.strmethod
495 @encoding.strmethod
496 def __repr__(self):
496 def __repr__(self):
497 return ('<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats))
497 return ('<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats))
498
498
499 # This is basically a reimplementation of util.dirs that stores the children
499 # This is basically a reimplementation of util.dirs that stores the children
500 # instead of just a count of them, plus a small optional optimization to avoid
500 # instead of just a count of them, plus a small optional optimization to avoid
501 # some directories we don't need.
501 # some directories we don't need.
502 class _dirchildren(object):
502 class _dirchildren(object):
503 def __init__(self, paths, onlyinclude=None):
503 def __init__(self, paths, onlyinclude=None):
504 self._dirs = {}
504 self._dirs = {}
505 self._onlyinclude = onlyinclude or []
505 self._onlyinclude = onlyinclude or []
506 addpath = self.addpath
506 addpath = self.addpath
507 for f in paths:
507 for f in paths:
508 addpath(f)
508 addpath(f)
509
509
510 def addpath(self, path):
510 def addpath(self, path):
511 if path == '.':
511 if path == '.':
512 return
512 return
513 dirs = self._dirs
513 dirs = self._dirs
514 findsplitdirs = _dirchildren._findsplitdirs
514 findsplitdirs = _dirchildren._findsplitdirs
515 for d, b in findsplitdirs(path):
515 for d, b in findsplitdirs(path):
516 if d not in self._onlyinclude:
516 if d not in self._onlyinclude:
517 continue
517 continue
518 dirs.setdefault(d, set()).add(b)
518 dirs.setdefault(d, set()).add(b)
519
519
520 @staticmethod
520 @staticmethod
521 def _findsplitdirs(path):
521 def _findsplitdirs(path):
522 # yields (dirname, basename) tuples, walking back to the root. This is
522 # yields (dirname, basename) tuples, walking back to the root. This is
523 # very similar to util.finddirs, except:
523 # very similar to util.finddirs, except:
524 # - produces a (dirname, basename) tuple, not just 'dirname'
524 # - produces a (dirname, basename) tuple, not just 'dirname'
525 # - includes root dir
525 # - includes root dir
526 # Unlike manifest._splittopdir, this does not suffix `dirname` with a
526 # Unlike manifest._splittopdir, this does not suffix `dirname` with a
527 # slash, and produces '.' for the root instead of ''.
527 # slash, and produces '.' for the root instead of ''.
528 oldpos = len(path)
528 oldpos = len(path)
529 pos = path.rfind('/')
529 pos = path.rfind('/')
530 while pos != -1:
530 while pos != -1:
531 yield path[:pos], path[pos + 1:oldpos]
531 yield path[:pos], path[pos + 1:oldpos]
532 oldpos = pos
532 oldpos = pos
533 pos = path.rfind('/', 0, pos)
533 pos = path.rfind('/', 0, pos)
534 yield '.', path[:oldpos]
534 yield '.', path[:oldpos]
535
535
536 def get(self, path):
536 def get(self, path):
537 return self._dirs.get(path, set())
537 return self._dirs.get(path, set())
538
538
539 class includematcher(basematcher):
539 class includematcher(basematcher):
540
540
541 def __init__(self, root, cwd, kindpats, listsubrepos=False, badfn=None):
541 def __init__(self, root, cwd, kindpats, listsubrepos=False, badfn=None):
542 super(includematcher, self).__init__(root, cwd, badfn)
542 super(includematcher, self).__init__(root, cwd, badfn)
543
543
544 self._pats, self.matchfn = _buildmatch(kindpats, '(?:/|$)',
544 self._pats, self.matchfn = _buildmatch(kindpats, '(?:/|$)',
545 listsubrepos, root)
545 listsubrepos, root)
546 self._prefix = _prefix(kindpats)
546 self._prefix = _prefix(kindpats)
547 roots, dirs, parents = _rootsdirsandparents(kindpats)
547 roots, dirs, parents = _rootsdirsandparents(kindpats)
548 # roots are directories which are recursively included.
548 # roots are directories which are recursively included.
549 self._roots = set(roots)
549 self._roots = set(roots)
550 # dirs are directories which are non-recursively included.
550 # dirs are directories which are non-recursively included.
551 self._dirs = set(dirs)
551 self._dirs = set(dirs)
552 # parents are directories which are non-recursively included because
552 # parents are directories which are non-recursively included because
553 # they are needed to get to items in _dirs or _roots.
553 # they are needed to get to items in _dirs or _roots.
554 self._parents = set(parents)
554 self._parents = set(parents)
555
555
556 def visitdir(self, dir):
556 def visitdir(self, dir):
557 if self._prefix and dir in self._roots:
557 if self._prefix and dir in self._roots:
558 return 'all'
558 return 'all'
559 return ('.' in self._roots or
559 return ('.' in self._roots or
560 dir in self._roots or
560 dir in self._roots or
561 dir in self._dirs or
561 dir in self._dirs or
562 dir in self._parents or
562 dir in self._parents or
563 any(parentdir in self._roots
563 any(parentdir in self._roots
564 for parentdir in util.finddirs(dir)))
564 for parentdir in util.finddirs(dir)))
565
565
566 @propertycache
566 @propertycache
567 def _allparentschildren(self):
567 def _allparentschildren(self):
568 # It may seem odd that we add dirs, roots, and parents, and then
568 # It may seem odd that we add dirs, roots, and parents, and then
569 # restrict to only parents. This is to catch the case of:
569 # restrict to only parents. This is to catch the case of:
570 # dirs = ['foo/bar']
570 # dirs = ['foo/bar']
571 # parents = ['foo']
571 # parents = ['foo']
572 # if we asked for the children of 'foo', but had only added
572 # if we asked for the children of 'foo', but had only added
573 # self._parents, we wouldn't be able to respond ['bar'].
573 # self._parents, we wouldn't be able to respond ['bar'].
574 return _dirchildren(
574 return _dirchildren(
575 itertools.chain(self._dirs, self._roots, self._parents),
575 itertools.chain(self._dirs, self._roots, self._parents),
576 onlyinclude=self._parents)
576 onlyinclude=self._parents)
577
577
578 def visitchildrenset(self, dir):
578 def visitchildrenset(self, dir):
579 if self._prefix and dir in self._roots:
579 if self._prefix and dir in self._roots:
580 return 'all'
580 return 'all'
581 # Note: this does *not* include the 'dir in self._parents' case from
581 # Note: this does *not* include the 'dir in self._parents' case from
582 # visitdir, that's handled below.
582 # visitdir, that's handled below.
583 if ('.' in self._roots or
583 if ('.' in self._roots or
584 dir in self._roots or
584 dir in self._roots or
585 dir in self._dirs or
585 dir in self._dirs or
586 any(parentdir in self._roots
586 any(parentdir in self._roots
587 for parentdir in util.finddirs(dir))):
587 for parentdir in util.finddirs(dir))):
588 return 'this'
588 return 'this'
589
589
590 if dir in self._parents:
590 if dir in self._parents:
591 return self._allparentschildren.get(dir) or set()
591 return self._allparentschildren.get(dir) or set()
592 return set()
592 return set()
593
593
594 @encoding.strmethod
594 @encoding.strmethod
595 def __repr__(self):
595 def __repr__(self):
596 return ('<includematcher includes=%r>' % pycompat.bytestr(self._pats))
596 return ('<includematcher includes=%r>' % pycompat.bytestr(self._pats))
597
597
598 class exactmatcher(basematcher):
598 class exactmatcher(basematcher):
599 '''Matches the input files exactly. They are interpreted as paths, not
599 '''Matches the input files exactly. They are interpreted as paths, not
600 patterns (so no kind-prefixes).
600 patterns (so no kind-prefixes).
601 '''
601 '''
602
602
603 def __init__(self, root, cwd, files, badfn=None):
603 def __init__(self, root, cwd, files, badfn=None):
604 super(exactmatcher, self).__init__(root, cwd, badfn)
604 super(exactmatcher, self).__init__(root, cwd, badfn)
605
605
606 if isinstance(files, list):
606 if isinstance(files, list):
607 self._files = files
607 self._files = files
608 else:
608 else:
609 self._files = list(files)
609 self._files = list(files)
610
610
611 matchfn = basematcher.exact
611 matchfn = basematcher.exact
612
612
613 @propertycache
613 @propertycache
614 def _dirs(self):
614 def _dirs(self):
615 return set(util.dirs(self._fileset)) | {'.'}
615 return set(util.dirs(self._fileset)) | {'.'}
616
616
617 def visitdir(self, dir):
617 def visitdir(self, dir):
618 return dir in self._dirs
618 return dir in self._dirs
619
619
620 def visitchildrenset(self, dir):
620 def visitchildrenset(self, dir):
621 if not self._fileset or dir not in self._dirs:
621 if not self._fileset or dir not in self._dirs:
622 return set()
622 return set()
623
623
624 candidates = self._fileset | self._dirs - {'.'}
624 candidates = self._fileset | self._dirs - {'.'}
625 if dir != '.':
625 if dir != '.':
626 d = dir + '/'
626 d = dir + '/'
627 candidates = set(c[len(d):] for c in candidates if
627 candidates = set(c[len(d):] for c in candidates if
628 c.startswith(d))
628 c.startswith(d))
629 # self._dirs includes all of the directories, recursively, so if
629 # self._dirs includes all of the directories, recursively, so if
630 # we're attempting to match foo/bar/baz.txt, it'll have '.', 'foo',
630 # we're attempting to match foo/bar/baz.txt, it'll have '.', 'foo',
631 # 'foo/bar' in it. Thus we can safely ignore a candidate that has a
631 # 'foo/bar' in it. Thus we can safely ignore a candidate that has a
632 # '/' in it, indicating a it's for a subdir-of-a-subdir; the
632 # '/' in it, indicating a it's for a subdir-of-a-subdir; the
633 # immediate subdir will be in there without a slash.
633 # immediate subdir will be in there without a slash.
634 ret = {c for c in candidates if '/' not in c}
634 ret = {c for c in candidates if '/' not in c}
635 # We really do not expect ret to be empty, since that would imply that
635 # We really do not expect ret to be empty, since that would imply that
636 # there's something in _dirs that didn't have a file in _fileset.
636 # there's something in _dirs that didn't have a file in _fileset.
637 assert ret
637 assert ret
638 return ret
638 return ret
639
639
640 def isexact(self):
640 def isexact(self):
641 return True
641 return True
642
642
643 @encoding.strmethod
643 @encoding.strmethod
644 def __repr__(self):
644 def __repr__(self):
645 return ('<exactmatcher files=%r>' % self._files)
645 return ('<exactmatcher files=%r>' % self._files)
646
646
647 class differencematcher(basematcher):
647 class differencematcher(basematcher):
648 '''Composes two matchers by matching if the first matches and the second
648 '''Composes two matchers by matching if the first matches and the second
649 does not.
649 does not.
650
650
651 The second matcher's non-matching-attributes (root, cwd, bad, explicitdir,
651 The second matcher's non-matching-attributes (root, cwd, bad, explicitdir,
652 traversedir) are ignored.
652 traversedir) are ignored.
653 '''
653 '''
654 def __init__(self, m1, m2):
654 def __init__(self, m1, m2):
655 super(differencematcher, self).__init__(m1._root, m1._cwd)
655 super(differencematcher, self).__init__(m1._root, m1._cwd)
656 self._m1 = m1
656 self._m1 = m1
657 self._m2 = m2
657 self._m2 = m2
658 self.bad = m1.bad
658 self.bad = m1.bad
659 self.explicitdir = m1.explicitdir
659 self.explicitdir = m1.explicitdir
660 self.traversedir = m1.traversedir
660 self.traversedir = m1.traversedir
661
661
662 def matchfn(self, f):
662 def matchfn(self, f):
663 return self._m1(f) and not self._m2(f)
663 return self._m1(f) and not self._m2(f)
664
664
665 @propertycache
665 @propertycache
666 def _files(self):
666 def _files(self):
667 if self.isexact():
667 if self.isexact():
668 return [f for f in self._m1.files() if self(f)]
668 return [f for f in self._m1.files() if self(f)]
669 # If m1 is not an exact matcher, we can't easily figure out the set of
669 # If m1 is not an exact matcher, we can't easily figure out the set of
670 # files, because its files() are not always files. For example, if
670 # files, because its files() are not always files. For example, if
671 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
671 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
672 # want to remove "dir" from the set even though it would match m2,
672 # want to remove "dir" from the set even though it would match m2,
673 # because the "dir" in m1 may not be a file.
673 # because the "dir" in m1 may not be a file.
674 return self._m1.files()
674 return self._m1.files()
675
675
676 def visitdir(self, dir):
676 def visitdir(self, dir):
677 if self._m2.visitdir(dir) == 'all':
677 if self._m2.visitdir(dir) == 'all':
678 return False
678 return False
679 return bool(self._m1.visitdir(dir))
679 return bool(self._m1.visitdir(dir))
680
680
681 def visitchildrenset(self, dir):
681 def visitchildrenset(self, dir):
682 m2_set = self._m2.visitchildrenset(dir)
682 m2_set = self._m2.visitchildrenset(dir)
683 if m2_set == 'all':
683 if m2_set == 'all':
684 return set()
684 return set()
685 m1_set = self._m1.visitchildrenset(dir)
685 m1_set = self._m1.visitchildrenset(dir)
686 # Possible values for m1: 'all', 'this', set(...), set()
686 # Possible values for m1: 'all', 'this', set(...), set()
687 # Possible values for m2: 'this', set(...), set()
687 # Possible values for m2: 'this', set(...), set()
688 # If m2 has nothing under here that we care about, return m1, even if
688 # If m2 has nothing under here that we care about, return m1, even if
689 # it's 'all'. This is a change in behavior from visitdir, which would
689 # it's 'all'. This is a change in behavior from visitdir, which would
690 # return True, not 'all', for some reason.
690 # return True, not 'all', for some reason.
691 if not m2_set:
691 if not m2_set:
692 return m1_set
692 return m1_set
693 if m1_set in ['all', 'this']:
693 if m1_set in ['all', 'this']:
694 # Never return 'all' here if m2_set is any kind of non-empty (either
694 # Never return 'all' here if m2_set is any kind of non-empty (either
695 # 'this' or set(foo)), since m2 might return set() for a
695 # 'this' or set(foo)), since m2 might return set() for a
696 # subdirectory.
696 # subdirectory.
697 return 'this'
697 return 'this'
698 # Possible values for m1: set(...), set()
698 # Possible values for m1: set(...), set()
699 # Possible values for m2: 'this', set(...)
699 # Possible values for m2: 'this', set(...)
700 # We ignore m2's set results. They're possibly incorrect:
700 # We ignore m2's set results. They're possibly incorrect:
701 # m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset('.'):
701 # m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset('.'):
702 # m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd
702 # m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd
703 # return set(), which is *not* correct, we still need to visit 'dir'!
703 # return set(), which is *not* correct, we still need to visit 'dir'!
704 return m1_set
704 return m1_set
705
705
706 def isexact(self):
706 def isexact(self):
707 return self._m1.isexact()
707 return self._m1.isexact()
708
708
709 @encoding.strmethod
709 @encoding.strmethod
710 def __repr__(self):
710 def __repr__(self):
711 return ('<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2))
711 return ('<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2))
712
712
713 def intersectmatchers(m1, m2):
713 def intersectmatchers(m1, m2):
714 '''Composes two matchers by matching if both of them match.
714 '''Composes two matchers by matching if both of them match.
715
715
716 The second matcher's non-matching-attributes (root, cwd, bad, explicitdir,
716 The second matcher's non-matching-attributes (root, cwd, bad, explicitdir,
717 traversedir) are ignored.
717 traversedir) are ignored.
718 '''
718 '''
719 if m1 is None or m2 is None:
719 if m1 is None or m2 is None:
720 return m1 or m2
720 return m1 or m2
721 if m1.always():
721 if m1.always():
722 m = copy.copy(m2)
722 m = copy.copy(m2)
723 # TODO: Consider encapsulating these things in a class so there's only
723 # TODO: Consider encapsulating these things in a class so there's only
724 # one thing to copy from m1.
724 # one thing to copy from m1.
725 m.bad = m1.bad
725 m.bad = m1.bad
726 m.explicitdir = m1.explicitdir
726 m.explicitdir = m1.explicitdir
727 m.traversedir = m1.traversedir
727 m.traversedir = m1.traversedir
728 m.abs = m1.abs
728 m.abs = m1.abs
729 m.rel = m1.rel
729 m.rel = m1.rel
730 m._relativeuipath |= m1._relativeuipath
730 m._relativeuipath |= m1._relativeuipath
731 return m
731 return m
732 if m2.always():
732 if m2.always():
733 m = copy.copy(m1)
733 m = copy.copy(m1)
734 m._relativeuipath |= m2._relativeuipath
734 m._relativeuipath |= m2._relativeuipath
735 return m
735 return m
736 return intersectionmatcher(m1, m2)
736 return intersectionmatcher(m1, m2)
737
737
738 class intersectionmatcher(basematcher):
738 class intersectionmatcher(basematcher):
739 def __init__(self, m1, m2):
739 def __init__(self, m1, m2):
740 super(intersectionmatcher, self).__init__(m1._root, m1._cwd)
740 super(intersectionmatcher, self).__init__(m1._root, m1._cwd)
741 self._m1 = m1
741 self._m1 = m1
742 self._m2 = m2
742 self._m2 = m2
743 self.bad = m1.bad
743 self.bad = m1.bad
744 self.explicitdir = m1.explicitdir
744 self.explicitdir = m1.explicitdir
745 self.traversedir = m1.traversedir
745 self.traversedir = m1.traversedir
746
746
747 @propertycache
747 @propertycache
748 def _files(self):
748 def _files(self):
749 if self.isexact():
749 if self.isexact():
750 m1, m2 = self._m1, self._m2
750 m1, m2 = self._m1, self._m2
751 if not m1.isexact():
751 if not m1.isexact():
752 m1, m2 = m2, m1
752 m1, m2 = m2, m1
753 return [f for f in m1.files() if m2(f)]
753 return [f for f in m1.files() if m2(f)]
754 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
754 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
755 # the set of files, because their files() are not always files. For
755 # the set of files, because their files() are not always files. For
756 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
756 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
757 # "path:dir2", we don't want to remove "dir2" from the set.
757 # "path:dir2", we don't want to remove "dir2" from the set.
758 return self._m1.files() + self._m2.files()
758 return self._m1.files() + self._m2.files()
759
759
760 def matchfn(self, f):
760 def matchfn(self, f):
761 return self._m1(f) and self._m2(f)
761 return self._m1(f) and self._m2(f)
762
762
763 def visitdir(self, dir):
763 def visitdir(self, dir):
764 visit1 = self._m1.visitdir(dir)
764 visit1 = self._m1.visitdir(dir)
765 if visit1 == 'all':
765 if visit1 == 'all':
766 return self._m2.visitdir(dir)
766 return self._m2.visitdir(dir)
767 # bool() because visit1=True + visit2='all' should not be 'all'
767 # bool() because visit1=True + visit2='all' should not be 'all'
768 return bool(visit1 and self._m2.visitdir(dir))
768 return bool(visit1 and self._m2.visitdir(dir))
769
769
770 def visitchildrenset(self, dir):
770 def visitchildrenset(self, dir):
771 m1_set = self._m1.visitchildrenset(dir)
771 m1_set = self._m1.visitchildrenset(dir)
772 if not m1_set:
772 if not m1_set:
773 return set()
773 return set()
774 m2_set = self._m2.visitchildrenset(dir)
774 m2_set = self._m2.visitchildrenset(dir)
775 if not m2_set:
775 if not m2_set:
776 return set()
776 return set()
777
777
778 if m1_set == 'all':
778 if m1_set == 'all':
779 return m2_set
779 return m2_set
780 elif m2_set == 'all':
780 elif m2_set == 'all':
781 return m1_set
781 return m1_set
782
782
783 if m1_set == 'this' or m2_set == 'this':
783 if m1_set == 'this' or m2_set == 'this':
784 return 'this'
784 return 'this'
785
785
786 assert isinstance(m1_set, set) and isinstance(m2_set, set)
786 assert isinstance(m1_set, set) and isinstance(m2_set, set)
787 return m1_set.intersection(m2_set)
787 return m1_set.intersection(m2_set)
788
788
789 def always(self):
789 def always(self):
790 return self._m1.always() and self._m2.always()
790 return self._m1.always() and self._m2.always()
791
791
792 def isexact(self):
792 def isexact(self):
793 return self._m1.isexact() or self._m2.isexact()
793 return self._m1.isexact() or self._m2.isexact()
794
794
795 @encoding.strmethod
795 @encoding.strmethod
796 def __repr__(self):
796 def __repr__(self):
797 return ('<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2))
797 return ('<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2))
798
798
799 class subdirmatcher(basematcher):
799 class subdirmatcher(basematcher):
800 """Adapt a matcher to work on a subdirectory only.
800 """Adapt a matcher to work on a subdirectory only.
801
801
802 The paths are remapped to remove/insert the path as needed:
802 The paths are remapped to remove/insert the path as needed:
803
803
804 >>> from . import pycompat
804 >>> from . import pycompat
805 >>> m1 = match(b'root', b'', [b'a.txt', b'sub/b.txt'])
805 >>> m1 = match(b'root', b'', [b'a.txt', b'sub/b.txt'])
806 >>> m2 = subdirmatcher(b'sub', m1)
806 >>> m2 = subdirmatcher(b'sub', m1)
807 >>> bool(m2(b'a.txt'))
807 >>> bool(m2(b'a.txt'))
808 False
808 False
809 >>> bool(m2(b'b.txt'))
809 >>> bool(m2(b'b.txt'))
810 True
810 True
811 >>> bool(m2.matchfn(b'a.txt'))
811 >>> bool(m2.matchfn(b'a.txt'))
812 False
812 False
813 >>> bool(m2.matchfn(b'b.txt'))
813 >>> bool(m2.matchfn(b'b.txt'))
814 True
814 True
815 >>> m2.files()
815 >>> m2.files()
816 ['b.txt']
816 ['b.txt']
817 >>> m2.exact(b'b.txt')
817 >>> m2.exact(b'b.txt')
818 True
818 True
819 >>> util.pconvert(m2.rel(b'b.txt'))
819 >>> util.pconvert(m2.rel(b'b.txt'))
820 'sub/b.txt'
820 'sub/b.txt'
821 >>> def bad(f, msg):
821 >>> def bad(f, msg):
822 ... print(pycompat.sysstr(b"%s: %s" % (f, msg)))
822 ... print(pycompat.sysstr(b"%s: %s" % (f, msg)))
823 >>> m1.bad = bad
823 >>> m1.bad = bad
824 >>> m2.bad(b'x.txt', b'No such file')
824 >>> m2.bad(b'x.txt', b'No such file')
825 sub/x.txt: No such file
825 sub/x.txt: No such file
826 >>> m2.abs(b'c.txt')
826 >>> m2.abs(b'c.txt')
827 'sub/c.txt'
827 'sub/c.txt'
828 """
828 """
829
829
830 def __init__(self, path, matcher):
830 def __init__(self, path, matcher):
831 super(subdirmatcher, self).__init__(matcher._root, matcher._cwd)
831 super(subdirmatcher, self).__init__(matcher._root, matcher._cwd)
832 self._path = path
832 self._path = path
833 self._matcher = matcher
833 self._matcher = matcher
834 self._always = matcher.always()
834 self._always = matcher.always()
835
835
836 self._files = [f[len(path) + 1:] for f in matcher._files
836 self._files = [f[len(path) + 1:] for f in matcher._files
837 if f.startswith(path + "/")]
837 if f.startswith(path + "/")]
838
838
839 # If the parent repo had a path to this subrepo and the matcher is
839 # If the parent repo had a path to this subrepo and the matcher is
840 # a prefix matcher, this submatcher always matches.
840 # a prefix matcher, this submatcher always matches.
841 if matcher.prefix():
841 if matcher.prefix():
842 self._always = any(f == path for f in matcher._files)
842 self._always = any(f == path for f in matcher._files)
843
843
844 def bad(self, f, msg):
844 def bad(self, f, msg):
845 self._matcher.bad(self._path + "/" + f, msg)
845 self._matcher.bad(self._path + "/" + f, msg)
846
846
847 def abs(self, f):
847 def abs(self, f):
848 return self._matcher.abs(self._path + "/" + f)
848 return self._matcher.abs(self._path + "/" + f)
849
849
850 def rel(self, f):
850 def rel(self, f):
851 return self._matcher.rel(self._path + "/" + f)
851 return self._matcher.rel(self._path + "/" + f)
852
852
853 def uipath(self, f):
853 def uipath(self, f):
854 return self._matcher.uipath(self._path + "/" + f)
854 return self._matcher.uipath(self._path + "/" + f)
855
855
856 def matchfn(self, f):
856 def matchfn(self, f):
857 # Some information is lost in the superclass's constructor, so we
857 # Some information is lost in the superclass's constructor, so we
858 # can not accurately create the matching function for the subdirectory
858 # can not accurately create the matching function for the subdirectory
859 # from the inputs. Instead, we override matchfn() and visitdir() to
859 # from the inputs. Instead, we override matchfn() and visitdir() to
860 # call the original matcher with the subdirectory path prepended.
860 # call the original matcher with the subdirectory path prepended.
861 return self._matcher.matchfn(self._path + "/" + f)
861 return self._matcher.matchfn(self._path + "/" + f)
862
862
863 def visitdir(self, dir):
863 def visitdir(self, dir):
864 if dir == '.':
864 if dir == '.':
865 dir = self._path
865 dir = self._path
866 else:
866 else:
867 dir = self._path + "/" + dir
867 dir = self._path + "/" + dir
868 return self._matcher.visitdir(dir)
868 return self._matcher.visitdir(dir)
869
869
870 def visitchildrenset(self, dir):
870 def visitchildrenset(self, dir):
871 if dir == '.':
871 if dir == '.':
872 dir = self._path
872 dir = self._path
873 else:
873 else:
874 dir = self._path + "/" + dir
874 dir = self._path + "/" + dir
875 return self._matcher.visitchildrenset(dir)
875 return self._matcher.visitchildrenset(dir)
876
876
877 def always(self):
877 def always(self):
878 return self._always
878 return self._always
879
879
880 def prefix(self):
880 def prefix(self):
881 return self._matcher.prefix() and not self._always
881 return self._matcher.prefix() and not self._always
882
882
883 @encoding.strmethod
883 @encoding.strmethod
884 def __repr__(self):
884 def __repr__(self):
885 return ('<subdirmatcher path=%r, matcher=%r>' %
885 return ('<subdirmatcher path=%r, matcher=%r>' %
886 (self._path, self._matcher))
886 (self._path, self._matcher))
887
887
888 class prefixdirmatcher(basematcher):
888 class prefixdirmatcher(basematcher):
889 """Adapt a matcher to work on a parent directory.
889 """Adapt a matcher to work on a parent directory.
890
890
891 The matcher's non-matching-attributes (root, cwd, bad, explicitdir,
891 The matcher's non-matching-attributes (root, cwd, bad, explicitdir,
892 traversedir) are ignored.
892 traversedir) are ignored.
893
893
894 The prefix path should usually be the relative path from the root of
894 The prefix path should usually be the relative path from the root of
895 this matcher to the root of the wrapped matcher.
895 this matcher to the root of the wrapped matcher.
896
896
897 >>> m1 = match(util.localpath(b'root/d/e'), b'f', [b'../a.txt', b'b.txt'])
897 >>> m1 = match(util.localpath(b'root/d/e'), b'f', [b'../a.txt', b'b.txt'])
898 >>> m2 = prefixdirmatcher(b'root', b'd/e/f', b'd/e', m1)
898 >>> m2 = prefixdirmatcher(b'root', b'd/e/f', b'd/e', m1)
899 >>> bool(m2(b'a.txt'),)
899 >>> bool(m2(b'a.txt'),)
900 False
900 False
901 >>> bool(m2(b'd/e/a.txt'))
901 >>> bool(m2(b'd/e/a.txt'))
902 True
902 True
903 >>> bool(m2(b'd/e/b.txt'))
903 >>> bool(m2(b'd/e/b.txt'))
904 False
904 False
905 >>> m2.files()
905 >>> m2.files()
906 ['d/e/a.txt', 'd/e/f/b.txt']
906 ['d/e/a.txt', 'd/e/f/b.txt']
907 >>> m2.exact(b'd/e/a.txt')
907 >>> m2.exact(b'd/e/a.txt')
908 True
908 True
909 >>> m2.visitdir(b'd')
909 >>> m2.visitdir(b'd')
910 True
910 True
911 >>> m2.visitdir(b'd/e')
911 >>> m2.visitdir(b'd/e')
912 True
912 True
913 >>> m2.visitdir(b'd/e/f')
913 >>> m2.visitdir(b'd/e/f')
914 True
914 True
915 >>> m2.visitdir(b'd/e/g')
915 >>> m2.visitdir(b'd/e/g')
916 False
916 False
917 >>> m2.visitdir(b'd/ef')
917 >>> m2.visitdir(b'd/ef')
918 False
918 False
919 """
919 """
920
920
921 def __init__(self, root, cwd, path, matcher, badfn=None):
921 def __init__(self, root, cwd, path, matcher, badfn=None):
922 super(prefixdirmatcher, self).__init__(root, cwd, badfn)
922 super(prefixdirmatcher, self).__init__(root, cwd, badfn)
923 if not path:
923 if not path:
924 raise error.ProgrammingError('prefix path must not be empty')
924 raise error.ProgrammingError('prefix path must not be empty')
925 self._path = path
925 self._path = path
926 self._pathprefix = path + '/'
926 self._pathprefix = path + '/'
927 self._matcher = matcher
927 self._matcher = matcher
928
928
929 @propertycache
929 @propertycache
930 def _files(self):
930 def _files(self):
931 return [self._pathprefix + f for f in self._matcher._files]
931 return [self._pathprefix + f for f in self._matcher._files]
932
932
933 def matchfn(self, f):
933 def matchfn(self, f):
934 if not f.startswith(self._pathprefix):
934 if not f.startswith(self._pathprefix):
935 return False
935 return False
936 return self._matcher.matchfn(f[len(self._pathprefix):])
936 return self._matcher.matchfn(f[len(self._pathprefix):])
937
937
938 @propertycache
938 @propertycache
939 def _pathdirs(self):
939 def _pathdirs(self):
940 return set(util.finddirs(self._path)) | {'.'}
940 return set(util.finddirs(self._path)) | {'.'}
941
941
942 def visitdir(self, dir):
942 def visitdir(self, dir):
943 if dir == self._path:
943 if dir == self._path:
944 return self._matcher.visitdir('.')
944 return self._matcher.visitdir('.')
945 if dir.startswith(self._pathprefix):
945 if dir.startswith(self._pathprefix):
946 return self._matcher.visitdir(dir[len(self._pathprefix):])
946 return self._matcher.visitdir(dir[len(self._pathprefix):])
947 return dir in self._pathdirs
947 return dir in self._pathdirs
948
948
949 def visitchildrenset(self, dir):
949 def visitchildrenset(self, dir):
950 if dir == self._path:
950 if dir == self._path:
951 return self._matcher.visitchildrenset('.')
951 return self._matcher.visitchildrenset('.')
952 if dir.startswith(self._pathprefix):
952 if dir.startswith(self._pathprefix):
953 return self._matcher.visitchildrenset(dir[len(self._pathprefix):])
953 return self._matcher.visitchildrenset(dir[len(self._pathprefix):])
954 if dir in self._pathdirs:
954 if dir in self._pathdirs:
955 return 'this'
955 return 'this'
956 return set()
956 return set()
957
957
958 def isexact(self):
958 def isexact(self):
959 return self._matcher.isexact()
959 return self._matcher.isexact()
960
960
961 def prefix(self):
961 def prefix(self):
962 return self._matcher.prefix()
962 return self._matcher.prefix()
963
963
964 @encoding.strmethod
964 @encoding.strmethod
965 def __repr__(self):
965 def __repr__(self):
966 return ('<prefixdirmatcher path=%r, matcher=%r>'
966 return ('<prefixdirmatcher path=%r, matcher=%r>'
967 % (pycompat.bytestr(self._path), self._matcher))
967 % (pycompat.bytestr(self._path), self._matcher))
968
968
969 class unionmatcher(basematcher):
969 class unionmatcher(basematcher):
970 """A matcher that is the union of several matchers.
970 """A matcher that is the union of several matchers.
971
971
972 The non-matching-attributes (root, cwd, bad, explicitdir, traversedir) are
972 The non-matching-attributes (root, cwd, bad, explicitdir, traversedir) are
973 taken from the first matcher.
973 taken from the first matcher.
974 """
974 """
975
975
976 def __init__(self, matchers):
976 def __init__(self, matchers):
977 m1 = matchers[0]
977 m1 = matchers[0]
978 super(unionmatcher, self).__init__(m1._root, m1._cwd)
978 super(unionmatcher, self).__init__(m1._root, m1._cwd)
979 self.explicitdir = m1.explicitdir
979 self.explicitdir = m1.explicitdir
980 self.traversedir = m1.traversedir
980 self.traversedir = m1.traversedir
981 self._matchers = matchers
981 self._matchers = matchers
982
982
983 def matchfn(self, f):
983 def matchfn(self, f):
984 for match in self._matchers:
984 for match in self._matchers:
985 if match(f):
985 if match(f):
986 return True
986 return True
987 return False
987 return False
988
988
989 def visitdir(self, dir):
989 def visitdir(self, dir):
990 r = False
990 r = False
991 for m in self._matchers:
991 for m in self._matchers:
992 v = m.visitdir(dir)
992 v = m.visitdir(dir)
993 if v == 'all':
993 if v == 'all':
994 return v
994 return v
995 r |= v
995 r |= v
996 return r
996 return r
997
997
998 def visitchildrenset(self, dir):
998 def visitchildrenset(self, dir):
999 r = set()
999 r = set()
1000 this = False
1000 this = False
1001 for m in self._matchers:
1001 for m in self._matchers:
1002 v = m.visitchildrenset(dir)
1002 v = m.visitchildrenset(dir)
1003 if not v:
1003 if not v:
1004 continue
1004 continue
1005 if v == 'all':
1005 if v == 'all':
1006 return v
1006 return v
1007 if this or v == 'this':
1007 if this or v == 'this':
1008 this = True
1008 this = True
1009 # don't break, we might have an 'all' in here.
1009 # don't break, we might have an 'all' in here.
1010 continue
1010 continue
1011 assert isinstance(v, set)
1011 assert isinstance(v, set)
1012 r = r.union(v)
1012 r = r.union(v)
1013 if this:
1013 if this:
1014 return 'this'
1014 return 'this'
1015 return r
1015 return r
1016
1016
1017 @encoding.strmethod
1017 @encoding.strmethod
1018 def __repr__(self):
1018 def __repr__(self):
1019 return ('<unionmatcher matchers=%r>' % self._matchers)
1019 return ('<unionmatcher matchers=%r>' % self._matchers)
1020
1020
1021 def patkind(pattern, default=None):
1021 def patkind(pattern, default=None):
1022 '''If pattern is 'kind:pat' with a known kind, return kind.'''
1022 '''If pattern is 'kind:pat' with a known kind, return kind.'''
1023 return _patsplit(pattern, default)[0]
1023 return _patsplit(pattern, default)[0]
1024
1024
1025 def _patsplit(pattern, default):
1025 def _patsplit(pattern, default):
1026 """Split a string into the optional pattern kind prefix and the actual
1026 """Split a string into the optional pattern kind prefix and the actual
1027 pattern."""
1027 pattern."""
1028 if ':' in pattern:
1028 if ':' in pattern:
1029 kind, pat = pattern.split(':', 1)
1029 kind, pat = pattern.split(':', 1)
1030 if kind in allpatternkinds:
1030 if kind in allpatternkinds:
1031 return kind, pat
1031 return kind, pat
1032 return default, pattern
1032 return default, pattern
1033
1033
1034 def _globre(pat):
1034 def _globre(pat):
1035 r'''Convert an extended glob string to a regexp string.
1035 r'''Convert an extended glob string to a regexp string.
1036
1036
1037 >>> from . import pycompat
1037 >>> from . import pycompat
1038 >>> def bprint(s):
1038 >>> def bprint(s):
1039 ... print(pycompat.sysstr(s))
1039 ... print(pycompat.sysstr(s))
1040 >>> bprint(_globre(br'?'))
1040 >>> bprint(_globre(br'?'))
1041 .
1041 .
1042 >>> bprint(_globre(br'*'))
1042 >>> bprint(_globre(br'*'))
1043 [^/]*
1043 [^/]*
1044 >>> bprint(_globre(br'**'))
1044 >>> bprint(_globre(br'**'))
1045 .*
1045 .*
1046 >>> bprint(_globre(br'**/a'))
1046 >>> bprint(_globre(br'**/a'))
1047 (?:.*/)?a
1047 (?:.*/)?a
1048 >>> bprint(_globre(br'a/**/b'))
1048 >>> bprint(_globre(br'a/**/b'))
1049 a/(?:.*/)?b
1049 a/(?:.*/)?b
1050 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
1050 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
1051 [a*?!^][\^b][^c]
1051 [a*?!^][\^b][^c]
1052 >>> bprint(_globre(br'{a,b}'))
1052 >>> bprint(_globre(br'{a,b}'))
1053 (?:a|b)
1053 (?:a|b)
1054 >>> bprint(_globre(br'.\*\?'))
1054 >>> bprint(_globre(br'.\*\?'))
1055 \.\*\?
1055 \.\*\?
1056 '''
1056 '''
1057 i, n = 0, len(pat)
1057 i, n = 0, len(pat)
1058 res = ''
1058 res = ''
1059 group = 0
1059 group = 0
1060 escape = util.stringutil.regexbytesescapemap.get
1060 escape = util.stringutil.regexbytesescapemap.get
1061 def peek():
1061 def peek():
1062 return i < n and pat[i:i + 1]
1062 return i < n and pat[i:i + 1]
1063 while i < n:
1063 while i < n:
1064 c = pat[i:i + 1]
1064 c = pat[i:i + 1]
1065 i += 1
1065 i += 1
1066 if c not in '*?[{},\\':
1066 if c not in '*?[{},\\':
1067 res += escape(c, c)
1067 res += escape(c, c)
1068 elif c == '*':
1068 elif c == '*':
1069 if peek() == '*':
1069 if peek() == '*':
1070 i += 1
1070 i += 1
1071 if peek() == '/':
1071 if peek() == '/':
1072 i += 1
1072 i += 1
1073 res += '(?:.*/)?'
1073 res += '(?:.*/)?'
1074 else:
1074 else:
1075 res += '.*'
1075 res += '.*'
1076 else:
1076 else:
1077 res += '[^/]*'
1077 res += '[^/]*'
1078 elif c == '?':
1078 elif c == '?':
1079 res += '.'
1079 res += '.'
1080 elif c == '[':
1080 elif c == '[':
1081 j = i
1081 j = i
1082 if j < n and pat[j:j + 1] in '!]':
1082 if j < n and pat[j:j + 1] in '!]':
1083 j += 1
1083 j += 1
1084 while j < n and pat[j:j + 1] != ']':
1084 while j < n and pat[j:j + 1] != ']':
1085 j += 1
1085 j += 1
1086 if j >= n:
1086 if j >= n:
1087 res += '\\['
1087 res += '\\['
1088 else:
1088 else:
1089 stuff = pat[i:j].replace('\\','\\\\')
1089 stuff = pat[i:j].replace('\\','\\\\')
1090 i = j + 1
1090 i = j + 1
1091 if stuff[0:1] == '!':
1091 if stuff[0:1] == '!':
1092 stuff = '^' + stuff[1:]
1092 stuff = '^' + stuff[1:]
1093 elif stuff[0:1] == '^':
1093 elif stuff[0:1] == '^':
1094 stuff = '\\' + stuff
1094 stuff = '\\' + stuff
1095 res = '%s[%s]' % (res, stuff)
1095 res = '%s[%s]' % (res, stuff)
1096 elif c == '{':
1096 elif c == '{':
1097 group += 1
1097 group += 1
1098 res += '(?:'
1098 res += '(?:'
1099 elif c == '}' and group:
1099 elif c == '}' and group:
1100 res += ')'
1100 res += ')'
1101 group -= 1
1101 group -= 1
1102 elif c == ',' and group:
1102 elif c == ',' and group:
1103 res += '|'
1103 res += '|'
1104 elif c == '\\':
1104 elif c == '\\':
1105 p = peek()
1105 p = peek()
1106 if p:
1106 if p:
1107 i += 1
1107 i += 1
1108 res += escape(p, p)
1108 res += escape(p, p)
1109 else:
1109 else:
1110 res += escape(c, c)
1110 res += escape(c, c)
1111 else:
1111 else:
1112 res += escape(c, c)
1112 res += escape(c, c)
1113 return res
1113 return res
1114
1114
1115 def _regex(kind, pat, globsuffix):
1115 def _regex(kind, pat, globsuffix):
1116 '''Convert a (normalized) pattern of any kind into a regular expression.
1116 '''Convert a (normalized) pattern of any kind into a regular expression.
1117 globsuffix is appended to the regexp of globs.'''
1117 globsuffix is appended to the regexp of globs.'''
1118 if not pat:
1118 if not pat:
1119 return ''
1119 return ''
1120 if kind == 're':
1120 if kind == 're':
1121 return pat
1121 return pat
1122 if kind in ('path', 'relpath'):
1122 if kind in ('path', 'relpath'):
1123 if pat == '.':
1123 if pat == '.':
1124 return ''
1124 return ''
1125 return util.stringutil.reescape(pat) + '(?:/|$)'
1125 return util.stringutil.reescape(pat) + '(?:/|$)'
1126 if kind == 'rootfilesin':
1126 if kind == 'rootfilesin':
1127 if pat == '.':
1127 if pat == '.':
1128 escaped = ''
1128 escaped = ''
1129 else:
1129 else:
1130 # Pattern is a directory name.
1130 # Pattern is a directory name.
1131 escaped = util.stringutil.reescape(pat) + '/'
1131 escaped = util.stringutil.reescape(pat) + '/'
1132 # Anything after the pattern must be a non-directory.
1132 # Anything after the pattern must be a non-directory.
1133 return escaped + '[^/]+$'
1133 return escaped + '[^/]+$'
1134 if kind == 'relglob':
1134 if kind == 'relglob':
1135 return '(?:|.*/)' + _globre(pat) + globsuffix
1135 return '(?:|.*/)' + _globre(pat) + globsuffix
1136 if kind == 'relre':
1136 if kind == 'relre':
1137 if pat.startswith('^'):
1137 if pat.startswith('^'):
1138 return pat
1138 return pat
1139 return '.*' + pat
1139 return '.*' + pat
1140 if kind == 'glob':
1140 if kind == 'glob':
1141 return _globre(pat) + globsuffix
1141 return _globre(pat) + globsuffix
1142 raise error.ProgrammingError('not a regex pattern: %s:%s' % (kind, pat))
1142 raise error.ProgrammingError('not a regex pattern: %s:%s' % (kind, pat))
1143
1143
1144 def _buildmatch(kindpats, globsuffix, listsubrepos, root):
1144 def _buildmatch(kindpats, globsuffix, listsubrepos, root):
1145 '''Return regexp string and a matcher function for kindpats.
1145 '''Return regexp string and a matcher function for kindpats.
1146 globsuffix is appended to the regexp of globs.'''
1146 globsuffix is appended to the regexp of globs.'''
1147 matchfuncs = []
1147 matchfuncs = []
1148
1148
1149 subincludes, kindpats = _expandsubinclude(kindpats, root)
1149 subincludes, kindpats = _expandsubinclude(kindpats, root)
1150 if subincludes:
1150 if subincludes:
1151 submatchers = {}
1151 submatchers = {}
1152 def matchsubinclude(f):
1152 def matchsubinclude(f):
1153 for prefix, matcherargs in subincludes:
1153 for prefix, matcherargs in subincludes:
1154 if f.startswith(prefix):
1154 if f.startswith(prefix):
1155 mf = submatchers.get(prefix)
1155 mf = submatchers.get(prefix)
1156 if mf is None:
1156 if mf is None:
1157 mf = match(*matcherargs)
1157 mf = match(*matcherargs)
1158 submatchers[prefix] = mf
1158 submatchers[prefix] = mf
1159
1159
1160 if mf(f[len(prefix):]):
1160 if mf(f[len(prefix):]):
1161 return True
1161 return True
1162 return False
1162 return False
1163 matchfuncs.append(matchsubinclude)
1163 matchfuncs.append(matchsubinclude)
1164
1164
1165 regex = ''
1165 regex = ''
1166 if kindpats:
1166 if kindpats:
1167 if all(k == 'rootfilesin' for k, p, s in kindpats):
1167 if all(k == 'rootfilesin' for k, p, s in kindpats):
1168 dirs = {p for k, p, s in kindpats}
1168 dirs = {p for k, p, s in kindpats}
1169 def mf(f):
1169 def mf(f):
1170 i = f.rfind('/')
1170 i = f.rfind('/')
1171 if i >= 0:
1171 if i >= 0:
1172 dir = f[:i]
1172 dir = f[:i]
1173 else:
1173 else:
1174 dir = '.'
1174 dir = '.'
1175 return dir in dirs
1175 return dir in dirs
1176 regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))
1176 regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))
1177 matchfuncs.append(mf)
1177 matchfuncs.append(mf)
1178 else:
1178 else:
1179 regex, mf = _buildregexmatch(kindpats, globsuffix)
1179 regex, mf = _buildregexmatch(kindpats, globsuffix)
1180 matchfuncs.append(mf)
1180 matchfuncs.append(mf)
1181
1181
1182 if len(matchfuncs) == 1:
1182 if len(matchfuncs) == 1:
1183 return regex, matchfuncs[0]
1183 return regex, matchfuncs[0]
1184 else:
1184 else:
1185 return regex, lambda f: any(mf(f) for mf in matchfuncs)
1185 return regex, lambda f: any(mf(f) for mf in matchfuncs)
1186
1186
1187 MAX_RE_SIZE = 20000
1187 MAX_RE_SIZE = 20000
1188 _BASE_SIZE = len('(?:)') - 1
1188
1189
1189 def _joinregexes(regexps):
1190 def _joinregexes(regexps):
1190 """gather multiple regular expressions into a single one"""
1191 """gather multiple regular expressions into a single one"""
1191 return '(?:%s)' % '|'.join(regexps)
1192 return '(?:%s)' % '|'.join(regexps)
1192
1193
1193 def _buildregexmatch(kindpats, globsuffix):
1194 def _buildregexmatch(kindpats, globsuffix):
1194 """Build a match function from a list of kinds and kindpats,
1195 """Build a match function from a list of kinds and kindpats,
1195 return regexp string and a matcher function.
1196 return regexp string and a matcher function.
1196
1197
1197 Test too large input
1198 Test too large input
1198 >>> _buildregexmatch([
1199 >>> _buildregexmatch([
1199 ... ('relglob', '?' * MAX_RE_SIZE, '')
1200 ... ('relglob', '?' * MAX_RE_SIZE, '')
1200 ... ], '$')
1201 ... ], '$')
1201 Traceback (most recent call last):
1202 Traceback (most recent call last):
1202 ...
1203 ...
1203 OverflowError
1204 OverflowError
1204 """
1205 """
1205 try:
1206 try:
1206 regex = _joinregexes([_regex(k, p, globsuffix)
1207 allgroups = []
1207 for (k, p, s) in kindpats])
1208 regexps = [_regex(k, p, globsuffix) for (k, p, s) in kindpats]
1208 if len(regex) <= MAX_RE_SIZE:
1209 fullregexp = _joinregexes(regexps)
1209 return regex, _rematcher(regex)
1210
1210 # We're using a Python with a tiny regex engine and we
1211 startidx = 0
1211 # made it explode, so we'll divide the pattern list in two
1212 groupsize = _BASE_SIZE
1212 # until it works
1213 for idx, r in enumerate(regexps):
1213 l = len(kindpats)
1214 piecesize = len(r)
1214 if l < 2:
1215 if (piecesize + 4) > MAX_RE_SIZE:
1215 # TODO: raise error.Abort here
1216 raise OverflowError
1216 raise OverflowError
1217 regexa, a = _buildregexmatch(kindpats[:l//2], globsuffix)
1217 elif (groupsize + 1 + piecesize) > MAX_RE_SIZE:
1218 regexb, b = _buildregexmatch(kindpats[l//2:], globsuffix)
1218 group = regexps[startidx:idx]
1219 return regex, lambda s: a(s) or b(s)
1219 allgroups.append(_joinregexes(group))
1220 startidx = idx
1221 groupsize = _BASE_SIZE
1222 groupsize += piecesize + 1
1223
1224 if startidx == 0:
1225 func = _rematcher(fullregexp)
1226 else:
1227 group = regexps[startidx:]
1228 allgroups.append(_joinregexes(group))
1229 allmatchers = [_rematcher(g) for g in allgroups]
1230 func = lambda s: any(m(s) for m in allmatchers)
1231 return fullregexp, func
1220 except re.error:
1232 except re.error:
1221 for k, p, s in kindpats:
1233 for k, p, s in kindpats:
1222 try:
1234 try:
1223 _rematcher('(?:%s)' % _regex(k, p, globsuffix))
1235 _rematcher('(?:%s)' % _regex(k, p, globsuffix))
1224 except re.error:
1236 except re.error:
1225 if s:
1237 if s:
1226 raise error.Abort(_("%s: invalid pattern (%s): %s") %
1238 raise error.Abort(_("%s: invalid pattern (%s): %s") %
1227 (s, k, p))
1239 (s, k, p))
1228 else:
1240 else:
1229 raise error.Abort(_("invalid pattern (%s): %s") % (k, p))
1241 raise error.Abort(_("invalid pattern (%s): %s") % (k, p))
1230 raise error.Abort(_("invalid pattern"))
1242 raise error.Abort(_("invalid pattern"))
1231
1243
1232 def _patternrootsanddirs(kindpats):
1244 def _patternrootsanddirs(kindpats):
1233 '''Returns roots and directories corresponding to each pattern.
1245 '''Returns roots and directories corresponding to each pattern.
1234
1246
1235 This calculates the roots and directories exactly matching the patterns and
1247 This calculates the roots and directories exactly matching the patterns and
1236 returns a tuple of (roots, dirs) for each. It does not return other
1248 returns a tuple of (roots, dirs) for each. It does not return other
1237 directories which may also need to be considered, like the parent
1249 directories which may also need to be considered, like the parent
1238 directories.
1250 directories.
1239 '''
1251 '''
1240 r = []
1252 r = []
1241 d = []
1253 d = []
1242 for kind, pat, source in kindpats:
1254 for kind, pat, source in kindpats:
1243 if kind == 'glob': # find the non-glob prefix
1255 if kind == 'glob': # find the non-glob prefix
1244 root = []
1256 root = []
1245 for p in pat.split('/'):
1257 for p in pat.split('/'):
1246 if '[' in p or '{' in p or '*' in p or '?' in p:
1258 if '[' in p or '{' in p or '*' in p or '?' in p:
1247 break
1259 break
1248 root.append(p)
1260 root.append(p)
1249 r.append('/'.join(root) or '.')
1261 r.append('/'.join(root) or '.')
1250 elif kind in ('relpath', 'path'):
1262 elif kind in ('relpath', 'path'):
1251 r.append(pat or '.')
1263 r.append(pat or '.')
1252 elif kind in ('rootfilesin',):
1264 elif kind in ('rootfilesin',):
1253 d.append(pat or '.')
1265 d.append(pat or '.')
1254 else: # relglob, re, relre
1266 else: # relglob, re, relre
1255 r.append('.')
1267 r.append('.')
1256 return r, d
1268 return r, d
1257
1269
1258 def _roots(kindpats):
1270 def _roots(kindpats):
1259 '''Returns root directories to match recursively from the given patterns.'''
1271 '''Returns root directories to match recursively from the given patterns.'''
1260 roots, dirs = _patternrootsanddirs(kindpats)
1272 roots, dirs = _patternrootsanddirs(kindpats)
1261 return roots
1273 return roots
1262
1274
1263 def _rootsdirsandparents(kindpats):
1275 def _rootsdirsandparents(kindpats):
1264 '''Returns roots and exact directories from patterns.
1276 '''Returns roots and exact directories from patterns.
1265
1277
1266 `roots` are directories to match recursively, `dirs` should
1278 `roots` are directories to match recursively, `dirs` should
1267 be matched non-recursively, and `parents` are the implicitly required
1279 be matched non-recursively, and `parents` are the implicitly required
1268 directories to walk to items in either roots or dirs.
1280 directories to walk to items in either roots or dirs.
1269
1281
1270 Returns a tuple of (roots, dirs, parents).
1282 Returns a tuple of (roots, dirs, parents).
1271
1283
1272 >>> _rootsdirsandparents(
1284 >>> _rootsdirsandparents(
1273 ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
1285 ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
1274 ... (b'glob', b'g*', b'')])
1286 ... (b'glob', b'g*', b'')])
1275 (['g/h', 'g/h', '.'], [], ['g', '.'])
1287 (['g/h', 'g/h', '.'], [], ['g', '.'])
1276 >>> _rootsdirsandparents(
1288 >>> _rootsdirsandparents(
1277 ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
1289 ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
1278 ([], ['g/h', '.'], ['g', '.'])
1290 ([], ['g/h', '.'], ['g', '.'])
1279 >>> _rootsdirsandparents(
1291 >>> _rootsdirsandparents(
1280 ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
1292 ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
1281 ... (b'path', b'', b'')])
1293 ... (b'path', b'', b'')])
1282 (['r', 'p/p', '.'], [], ['p', '.'])
1294 (['r', 'p/p', '.'], [], ['p', '.'])
1283 >>> _rootsdirsandparents(
1295 >>> _rootsdirsandparents(
1284 ... [(b'relglob', b'rg*', b''), (b're', b're/', b''),
1296 ... [(b'relglob', b'rg*', b''), (b're', b're/', b''),
1285 ... (b'relre', b'rr', b'')])
1297 ... (b'relre', b'rr', b'')])
1286 (['.', '.', '.'], [], ['.'])
1298 (['.', '.', '.'], [], ['.'])
1287 '''
1299 '''
1288 r, d = _patternrootsanddirs(kindpats)
1300 r, d = _patternrootsanddirs(kindpats)
1289
1301
1290 p = []
1302 p = []
1291 # Append the parents as non-recursive/exact directories, since they must be
1303 # Append the parents as non-recursive/exact directories, since they must be
1292 # scanned to get to either the roots or the other exact directories.
1304 # scanned to get to either the roots or the other exact directories.
1293 p.extend(util.dirs(d))
1305 p.extend(util.dirs(d))
1294 p.extend(util.dirs(r))
1306 p.extend(util.dirs(r))
1295 # util.dirs() does not include the root directory, so add it manually
1307 # util.dirs() does not include the root directory, so add it manually
1296 p.append('.')
1308 p.append('.')
1297
1309
1298 # FIXME: all uses of this function convert these to sets, do so before
1310 # FIXME: all uses of this function convert these to sets, do so before
1299 # returning.
1311 # returning.
1300 # FIXME: all uses of this function do not need anything in 'roots' and
1312 # FIXME: all uses of this function do not need anything in 'roots' and
1301 # 'dirs' to also be in 'parents', consider removing them before returning.
1313 # 'dirs' to also be in 'parents', consider removing them before returning.
1302 return r, d, p
1314 return r, d, p
1303
1315
1304 def _explicitfiles(kindpats):
1316 def _explicitfiles(kindpats):
1305 '''Returns the potential explicit filenames from the patterns.
1317 '''Returns the potential explicit filenames from the patterns.
1306
1318
1307 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
1319 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
1308 ['foo/bar']
1320 ['foo/bar']
1309 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
1321 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
1310 []
1322 []
1311 '''
1323 '''
1312 # Keep only the pattern kinds where one can specify filenames (vs only
1324 # Keep only the pattern kinds where one can specify filenames (vs only
1313 # directory names).
1325 # directory names).
1314 filable = [kp for kp in kindpats if kp[0] not in ('rootfilesin',)]
1326 filable = [kp for kp in kindpats if kp[0] not in ('rootfilesin',)]
1315 return _roots(filable)
1327 return _roots(filable)
1316
1328
1317 def _prefix(kindpats):
1329 def _prefix(kindpats):
1318 '''Whether all the patterns match a prefix (i.e. recursively)'''
1330 '''Whether all the patterns match a prefix (i.e. recursively)'''
1319 for kind, pat, source in kindpats:
1331 for kind, pat, source in kindpats:
1320 if kind not in ('path', 'relpath'):
1332 if kind not in ('path', 'relpath'):
1321 return False
1333 return False
1322 return True
1334 return True
1323
1335
1324 _commentre = None
1336 _commentre = None
1325
1337
1326 def readpatternfile(filepath, warn, sourceinfo=False):
1338 def readpatternfile(filepath, warn, sourceinfo=False):
1327 '''parse a pattern file, returning a list of
1339 '''parse a pattern file, returning a list of
1328 patterns. These patterns should be given to compile()
1340 patterns. These patterns should be given to compile()
1329 to be validated and converted into a match function.
1341 to be validated and converted into a match function.
1330
1342
1331 trailing white space is dropped.
1343 trailing white space is dropped.
1332 the escape character is backslash.
1344 the escape character is backslash.
1333 comments start with #.
1345 comments start with #.
1334 empty lines are skipped.
1346 empty lines are skipped.
1335
1347
1336 lines can be of the following formats:
1348 lines can be of the following formats:
1337
1349
1338 syntax: regexp # defaults following lines to non-rooted regexps
1350 syntax: regexp # defaults following lines to non-rooted regexps
1339 syntax: glob # defaults following lines to non-rooted globs
1351 syntax: glob # defaults following lines to non-rooted globs
1340 re:pattern # non-rooted regular expression
1352 re:pattern # non-rooted regular expression
1341 glob:pattern # non-rooted glob
1353 glob:pattern # non-rooted glob
1342 pattern # pattern of the current default type
1354 pattern # pattern of the current default type
1343
1355
1344 if sourceinfo is set, returns a list of tuples:
1356 if sourceinfo is set, returns a list of tuples:
1345 (pattern, lineno, originalline). This is useful to debug ignore patterns.
1357 (pattern, lineno, originalline). This is useful to debug ignore patterns.
1346 '''
1358 '''
1347
1359
1348 syntaxes = {
1360 syntaxes = {
1349 're': 'relre:',
1361 're': 'relre:',
1350 'regexp': 'relre:',
1362 'regexp': 'relre:',
1351 'glob': 'relglob:',
1363 'glob': 'relglob:',
1352 'include': 'include',
1364 'include': 'include',
1353 'subinclude': 'subinclude',
1365 'subinclude': 'subinclude',
1354 }
1366 }
1355 syntax = 'relre:'
1367 syntax = 'relre:'
1356 patterns = []
1368 patterns = []
1357
1369
1358 fp = open(filepath, 'rb')
1370 fp = open(filepath, 'rb')
1359 for lineno, line in enumerate(util.iterfile(fp), start=1):
1371 for lineno, line in enumerate(util.iterfile(fp), start=1):
1360 if "#" in line:
1372 if "#" in line:
1361 global _commentre
1373 global _commentre
1362 if not _commentre:
1374 if not _commentre:
1363 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
1375 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
1364 # remove comments prefixed by an even number of escapes
1376 # remove comments prefixed by an even number of escapes
1365 m = _commentre.search(line)
1377 m = _commentre.search(line)
1366 if m:
1378 if m:
1367 line = line[:m.end(1)]
1379 line = line[:m.end(1)]
1368 # fixup properly escaped comments that survived the above
1380 # fixup properly escaped comments that survived the above
1369 line = line.replace("\\#", "#")
1381 line = line.replace("\\#", "#")
1370 line = line.rstrip()
1382 line = line.rstrip()
1371 if not line:
1383 if not line:
1372 continue
1384 continue
1373
1385
1374 if line.startswith('syntax:'):
1386 if line.startswith('syntax:'):
1375 s = line[7:].strip()
1387 s = line[7:].strip()
1376 try:
1388 try:
1377 syntax = syntaxes[s]
1389 syntax = syntaxes[s]
1378 except KeyError:
1390 except KeyError:
1379 if warn:
1391 if warn:
1380 warn(_("%s: ignoring invalid syntax '%s'\n") %
1392 warn(_("%s: ignoring invalid syntax '%s'\n") %
1381 (filepath, s))
1393 (filepath, s))
1382 continue
1394 continue
1383
1395
1384 linesyntax = syntax
1396 linesyntax = syntax
1385 for s, rels in syntaxes.iteritems():
1397 for s, rels in syntaxes.iteritems():
1386 if line.startswith(rels):
1398 if line.startswith(rels):
1387 linesyntax = rels
1399 linesyntax = rels
1388 line = line[len(rels):]
1400 line = line[len(rels):]
1389 break
1401 break
1390 elif line.startswith(s+':'):
1402 elif line.startswith(s+':'):
1391 linesyntax = rels
1403 linesyntax = rels
1392 line = line[len(s) + 1:]
1404 line = line[len(s) + 1:]
1393 break
1405 break
1394 if sourceinfo:
1406 if sourceinfo:
1395 patterns.append((linesyntax + line, lineno, line))
1407 patterns.append((linesyntax + line, lineno, line))
1396 else:
1408 else:
1397 patterns.append(linesyntax + line)
1409 patterns.append(linesyntax + line)
1398 fp.close()
1410 fp.close()
1399 return patterns
1411 return patterns
General Comments 0
You need to be logged in to leave comments. Login now