##// END OF EJS Templates
match: drop support for passing '.' for root dir to visit*() methods...
Martin von Zweigbergk -
r43992:ecd11c4d default
parent child Browse files
Show More
@@ -1,1638 +1,1621 b''
1 # match.py - filename matching
1 # match.py - filename matching
2 #
2 #
3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import, print_function
8 from __future__ import absolute_import, print_function
9
9
10 import copy
10 import copy
11 import itertools
11 import itertools
12 import os
12 import os
13 import re
13 import re
14
14
15 from .i18n import _
15 from .i18n import _
16 from .pycompat import open
16 from .pycompat import open
17 from . import (
17 from . import (
18 encoding,
18 encoding,
19 error,
19 error,
20 pathutil,
20 pathutil,
21 pathutil,
21 pathutil,
22 policy,
22 policy,
23 pycompat,
23 pycompat,
24 util,
24 util,
25 )
25 )
26 from .utils import stringutil
26 from .utils import stringutil
27
27
28 rustmod = policy.importrust('filepatterns')
28 rustmod = policy.importrust('filepatterns')
29
29
30 allpatternkinds = (
30 allpatternkinds = (
31 b're',
31 b're',
32 b'glob',
32 b'glob',
33 b'path',
33 b'path',
34 b'relglob',
34 b'relglob',
35 b'relpath',
35 b'relpath',
36 b'relre',
36 b'relre',
37 b'rootglob',
37 b'rootglob',
38 b'listfile',
38 b'listfile',
39 b'listfile0',
39 b'listfile0',
40 b'set',
40 b'set',
41 b'include',
41 b'include',
42 b'subinclude',
42 b'subinclude',
43 b'rootfilesin',
43 b'rootfilesin',
44 )
44 )
45 cwdrelativepatternkinds = (b'relpath', b'glob')
45 cwdrelativepatternkinds = (b'relpath', b'glob')
46
46
47 propertycache = util.propertycache
47 propertycache = util.propertycache
48
48
49
49
50 def _rematcher(regex):
50 def _rematcher(regex):
51 '''compile the regexp with the best available regexp engine and return a
51 '''compile the regexp with the best available regexp engine and return a
52 matcher function'''
52 matcher function'''
53 m = util.re.compile(regex)
53 m = util.re.compile(regex)
54 try:
54 try:
55 # slightly faster, provided by facebook's re2 bindings
55 # slightly faster, provided by facebook's re2 bindings
56 return m.test_match
56 return m.test_match
57 except AttributeError:
57 except AttributeError:
58 return m.match
58 return m.match
59
59
60
60
61 def _expandsets(kindpats, ctx=None, listsubrepos=False, badfn=None):
61 def _expandsets(kindpats, ctx=None, listsubrepos=False, badfn=None):
62 '''Returns the kindpats list with the 'set' patterns expanded to matchers'''
62 '''Returns the kindpats list with the 'set' patterns expanded to matchers'''
63 matchers = []
63 matchers = []
64 other = []
64 other = []
65
65
66 for kind, pat, source in kindpats:
66 for kind, pat, source in kindpats:
67 if kind == b'set':
67 if kind == b'set':
68 if ctx is None:
68 if ctx is None:
69 raise error.ProgrammingError(
69 raise error.ProgrammingError(
70 b"fileset expression with no context"
70 b"fileset expression with no context"
71 )
71 )
72 matchers.append(ctx.matchfileset(pat, badfn=badfn))
72 matchers.append(ctx.matchfileset(pat, badfn=badfn))
73
73
74 if listsubrepos:
74 if listsubrepos:
75 for subpath in ctx.substate:
75 for subpath in ctx.substate:
76 sm = ctx.sub(subpath).matchfileset(pat, badfn=badfn)
76 sm = ctx.sub(subpath).matchfileset(pat, badfn=badfn)
77 pm = prefixdirmatcher(subpath, sm, badfn=badfn)
77 pm = prefixdirmatcher(subpath, sm, badfn=badfn)
78 matchers.append(pm)
78 matchers.append(pm)
79
79
80 continue
80 continue
81 other.append((kind, pat, source))
81 other.append((kind, pat, source))
82 return matchers, other
82 return matchers, other
83
83
84
84
85 def _expandsubinclude(kindpats, root):
85 def _expandsubinclude(kindpats, root):
86 '''Returns the list of subinclude matcher args and the kindpats without the
86 '''Returns the list of subinclude matcher args and the kindpats without the
87 subincludes in it.'''
87 subincludes in it.'''
88 relmatchers = []
88 relmatchers = []
89 other = []
89 other = []
90
90
91 for kind, pat, source in kindpats:
91 for kind, pat, source in kindpats:
92 if kind == b'subinclude':
92 if kind == b'subinclude':
93 sourceroot = pathutil.dirname(util.normpath(source))
93 sourceroot = pathutil.dirname(util.normpath(source))
94 pat = util.pconvert(pat)
94 pat = util.pconvert(pat)
95 path = pathutil.join(sourceroot, pat)
95 path = pathutil.join(sourceroot, pat)
96
96
97 newroot = pathutil.dirname(path)
97 newroot = pathutil.dirname(path)
98 matcherargs = (newroot, b'', [], [b'include:%s' % path])
98 matcherargs = (newroot, b'', [], [b'include:%s' % path])
99
99
100 prefix = pathutil.canonpath(root, root, newroot)
100 prefix = pathutil.canonpath(root, root, newroot)
101 if prefix:
101 if prefix:
102 prefix += b'/'
102 prefix += b'/'
103 relmatchers.append((prefix, matcherargs))
103 relmatchers.append((prefix, matcherargs))
104 else:
104 else:
105 other.append((kind, pat, source))
105 other.append((kind, pat, source))
106
106
107 return relmatchers, other
107 return relmatchers, other
108
108
109
109
110 def _kindpatsalwaysmatch(kindpats):
110 def _kindpatsalwaysmatch(kindpats):
111 """"Checks whether the kindspats match everything, as e.g.
111 """"Checks whether the kindspats match everything, as e.g.
112 'relpath:.' does.
112 'relpath:.' does.
113 """
113 """
114 for kind, pat, source in kindpats:
114 for kind, pat, source in kindpats:
115 if pat != b'' or kind not in [b'relpath', b'glob']:
115 if pat != b'' or kind not in [b'relpath', b'glob']:
116 return False
116 return False
117 return True
117 return True
118
118
119
119
120 def _buildkindpatsmatcher(
120 def _buildkindpatsmatcher(
121 matchercls, root, kindpats, ctx=None, listsubrepos=False, badfn=None
121 matchercls, root, kindpats, ctx=None, listsubrepos=False, badfn=None
122 ):
122 ):
123 matchers = []
123 matchers = []
124 fms, kindpats = _expandsets(
124 fms, kindpats = _expandsets(
125 kindpats, ctx=ctx, listsubrepos=listsubrepos, badfn=badfn
125 kindpats, ctx=ctx, listsubrepos=listsubrepos, badfn=badfn
126 )
126 )
127 if kindpats:
127 if kindpats:
128 m = matchercls(root, kindpats, badfn=badfn)
128 m = matchercls(root, kindpats, badfn=badfn)
129 matchers.append(m)
129 matchers.append(m)
130 if fms:
130 if fms:
131 matchers.extend(fms)
131 matchers.extend(fms)
132 if not matchers:
132 if not matchers:
133 return nevermatcher(badfn=badfn)
133 return nevermatcher(badfn=badfn)
134 if len(matchers) == 1:
134 if len(matchers) == 1:
135 return matchers[0]
135 return matchers[0]
136 return unionmatcher(matchers)
136 return unionmatcher(matchers)
137
137
138
138
139 def match(
139 def match(
140 root,
140 root,
141 cwd,
141 cwd,
142 patterns=None,
142 patterns=None,
143 include=None,
143 include=None,
144 exclude=None,
144 exclude=None,
145 default=b'glob',
145 default=b'glob',
146 auditor=None,
146 auditor=None,
147 ctx=None,
147 ctx=None,
148 listsubrepos=False,
148 listsubrepos=False,
149 warn=None,
149 warn=None,
150 badfn=None,
150 badfn=None,
151 icasefs=False,
151 icasefs=False,
152 ):
152 ):
153 r"""build an object to match a set of file patterns
153 r"""build an object to match a set of file patterns
154
154
155 arguments:
155 arguments:
156 root - the canonical root of the tree you're matching against
156 root - the canonical root of the tree you're matching against
157 cwd - the current working directory, if relevant
157 cwd - the current working directory, if relevant
158 patterns - patterns to find
158 patterns - patterns to find
159 include - patterns to include (unless they are excluded)
159 include - patterns to include (unless they are excluded)
160 exclude - patterns to exclude (even if they are included)
160 exclude - patterns to exclude (even if they are included)
161 default - if a pattern in patterns has no explicit type, assume this one
161 default - if a pattern in patterns has no explicit type, assume this one
162 auditor - optional path auditor
162 auditor - optional path auditor
163 ctx - optional changecontext
163 ctx - optional changecontext
164 listsubrepos - if True, recurse into subrepositories
164 listsubrepos - if True, recurse into subrepositories
165 warn - optional function used for printing warnings
165 warn - optional function used for printing warnings
166 badfn - optional bad() callback for this matcher instead of the default
166 badfn - optional bad() callback for this matcher instead of the default
167 icasefs - make a matcher for wdir on case insensitive filesystems, which
167 icasefs - make a matcher for wdir on case insensitive filesystems, which
168 normalizes the given patterns to the case in the filesystem
168 normalizes the given patterns to the case in the filesystem
169
169
170 a pattern is one of:
170 a pattern is one of:
171 'glob:<glob>' - a glob relative to cwd
171 'glob:<glob>' - a glob relative to cwd
172 're:<regexp>' - a regular expression
172 're:<regexp>' - a regular expression
173 'path:<path>' - a path relative to repository root, which is matched
173 'path:<path>' - a path relative to repository root, which is matched
174 recursively
174 recursively
175 'rootfilesin:<path>' - a path relative to repository root, which is
175 'rootfilesin:<path>' - a path relative to repository root, which is
176 matched non-recursively (will not match subdirectories)
176 matched non-recursively (will not match subdirectories)
177 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
177 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
178 'relpath:<path>' - a path relative to cwd
178 'relpath:<path>' - a path relative to cwd
179 'relre:<regexp>' - a regexp that needn't match the start of a name
179 'relre:<regexp>' - a regexp that needn't match the start of a name
180 'set:<fileset>' - a fileset expression
180 'set:<fileset>' - a fileset expression
181 'include:<path>' - a file of patterns to read and include
181 'include:<path>' - a file of patterns to read and include
182 'subinclude:<path>' - a file of patterns to match against files under
182 'subinclude:<path>' - a file of patterns to match against files under
183 the same directory
183 the same directory
184 '<something>' - a pattern of the specified default type
184 '<something>' - a pattern of the specified default type
185
185
186 Usually a patternmatcher is returned:
186 Usually a patternmatcher is returned:
187 >>> match(b'foo', b'.', [b're:.*\.c$', b'path:foo/a', b'*.py'])
187 >>> match(b'foo', b'.', [b're:.*\.c$', b'path:foo/a', b'*.py'])
188 <patternmatcher patterns='.*\\.c$|foo/a(?:/|$)|[^/]*\\.py$'>
188 <patternmatcher patterns='.*\\.c$|foo/a(?:/|$)|[^/]*\\.py$'>
189
189
190 Combining 'patterns' with 'include' (resp. 'exclude') gives an
190 Combining 'patterns' with 'include' (resp. 'exclude') gives an
191 intersectionmatcher (resp. a differencematcher):
191 intersectionmatcher (resp. a differencematcher):
192 >>> type(match(b'foo', b'.', [b're:.*\.c$'], include=[b'path:lib']))
192 >>> type(match(b'foo', b'.', [b're:.*\.c$'], include=[b'path:lib']))
193 <class 'mercurial.match.intersectionmatcher'>
193 <class 'mercurial.match.intersectionmatcher'>
194 >>> type(match(b'foo', b'.', [b're:.*\.c$'], exclude=[b'path:build']))
194 >>> type(match(b'foo', b'.', [b're:.*\.c$'], exclude=[b'path:build']))
195 <class 'mercurial.match.differencematcher'>
195 <class 'mercurial.match.differencematcher'>
196
196
197 Notice that, if 'patterns' is empty, an alwaysmatcher is returned:
197 Notice that, if 'patterns' is empty, an alwaysmatcher is returned:
198 >>> match(b'foo', b'.', [])
198 >>> match(b'foo', b'.', [])
199 <alwaysmatcher>
199 <alwaysmatcher>
200
200
201 The 'default' argument determines which kind of pattern is assumed if a
201 The 'default' argument determines which kind of pattern is assumed if a
202 pattern has no prefix:
202 pattern has no prefix:
203 >>> match(b'foo', b'.', [b'.*\.c$'], default=b're')
203 >>> match(b'foo', b'.', [b'.*\.c$'], default=b're')
204 <patternmatcher patterns='.*\\.c$'>
204 <patternmatcher patterns='.*\\.c$'>
205 >>> match(b'foo', b'.', [b'main.py'], default=b'relpath')
205 >>> match(b'foo', b'.', [b'main.py'], default=b'relpath')
206 <patternmatcher patterns='main\\.py(?:/|$)'>
206 <patternmatcher patterns='main\\.py(?:/|$)'>
207 >>> match(b'foo', b'.', [b'main.py'], default=b're')
207 >>> match(b'foo', b'.', [b'main.py'], default=b're')
208 <patternmatcher patterns='main.py'>
208 <patternmatcher patterns='main.py'>
209
209
210 The primary use of matchers is to check whether a value (usually a file
210 The primary use of matchers is to check whether a value (usually a file
211 name) matches againset one of the patterns given at initialization. There
211 name) matches againset one of the patterns given at initialization. There
212 are two ways of doing this check.
212 are two ways of doing this check.
213
213
214 >>> m = match(b'foo', b'', [b're:.*\.c$', b'relpath:a'])
214 >>> m = match(b'foo', b'', [b're:.*\.c$', b'relpath:a'])
215
215
216 1. Calling the matcher with a file name returns True if any pattern
216 1. Calling the matcher with a file name returns True if any pattern
217 matches that file name:
217 matches that file name:
218 >>> m(b'a')
218 >>> m(b'a')
219 True
219 True
220 >>> m(b'main.c')
220 >>> m(b'main.c')
221 True
221 True
222 >>> m(b'test.py')
222 >>> m(b'test.py')
223 False
223 False
224
224
225 2. Using the exact() method only returns True if the file name matches one
225 2. Using the exact() method only returns True if the file name matches one
226 of the exact patterns (i.e. not re: or glob: patterns):
226 of the exact patterns (i.e. not re: or glob: patterns):
227 >>> m.exact(b'a')
227 >>> m.exact(b'a')
228 True
228 True
229 >>> m.exact(b'main.c')
229 >>> m.exact(b'main.c')
230 False
230 False
231 """
231 """
232 normalize = _donormalize
232 normalize = _donormalize
233 if icasefs:
233 if icasefs:
234 dirstate = ctx.repo().dirstate
234 dirstate = ctx.repo().dirstate
235 dsnormalize = dirstate.normalize
235 dsnormalize = dirstate.normalize
236
236
237 def normalize(patterns, default, root, cwd, auditor, warn):
237 def normalize(patterns, default, root, cwd, auditor, warn):
238 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
238 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
239 kindpats = []
239 kindpats = []
240 for kind, pats, source in kp:
240 for kind, pats, source in kp:
241 if kind not in (b're', b'relre'): # regex can't be normalized
241 if kind not in (b're', b'relre'): # regex can't be normalized
242 p = pats
242 p = pats
243 pats = dsnormalize(pats)
243 pats = dsnormalize(pats)
244
244
245 # Preserve the original to handle a case only rename.
245 # Preserve the original to handle a case only rename.
246 if p != pats and p in dirstate:
246 if p != pats and p in dirstate:
247 kindpats.append((kind, p, source))
247 kindpats.append((kind, p, source))
248
248
249 kindpats.append((kind, pats, source))
249 kindpats.append((kind, pats, source))
250 return kindpats
250 return kindpats
251
251
252 if patterns:
252 if patterns:
253 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
253 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
254 if _kindpatsalwaysmatch(kindpats):
254 if _kindpatsalwaysmatch(kindpats):
255 m = alwaysmatcher(badfn)
255 m = alwaysmatcher(badfn)
256 else:
256 else:
257 m = _buildkindpatsmatcher(
257 m = _buildkindpatsmatcher(
258 patternmatcher,
258 patternmatcher,
259 root,
259 root,
260 kindpats,
260 kindpats,
261 ctx=ctx,
261 ctx=ctx,
262 listsubrepos=listsubrepos,
262 listsubrepos=listsubrepos,
263 badfn=badfn,
263 badfn=badfn,
264 )
264 )
265 else:
265 else:
266 # It's a little strange that no patterns means to match everything.
266 # It's a little strange that no patterns means to match everything.
267 # Consider changing this to match nothing (probably using nevermatcher).
267 # Consider changing this to match nothing (probably using nevermatcher).
268 m = alwaysmatcher(badfn)
268 m = alwaysmatcher(badfn)
269
269
270 if include:
270 if include:
271 kindpats = normalize(include, b'glob', root, cwd, auditor, warn)
271 kindpats = normalize(include, b'glob', root, cwd, auditor, warn)
272 im = _buildkindpatsmatcher(
272 im = _buildkindpatsmatcher(
273 includematcher,
273 includematcher,
274 root,
274 root,
275 kindpats,
275 kindpats,
276 ctx=ctx,
276 ctx=ctx,
277 listsubrepos=listsubrepos,
277 listsubrepos=listsubrepos,
278 badfn=None,
278 badfn=None,
279 )
279 )
280 m = intersectmatchers(m, im)
280 m = intersectmatchers(m, im)
281 if exclude:
281 if exclude:
282 kindpats = normalize(exclude, b'glob', root, cwd, auditor, warn)
282 kindpats = normalize(exclude, b'glob', root, cwd, auditor, warn)
283 em = _buildkindpatsmatcher(
283 em = _buildkindpatsmatcher(
284 includematcher,
284 includematcher,
285 root,
285 root,
286 kindpats,
286 kindpats,
287 ctx=ctx,
287 ctx=ctx,
288 listsubrepos=listsubrepos,
288 listsubrepos=listsubrepos,
289 badfn=None,
289 badfn=None,
290 )
290 )
291 m = differencematcher(m, em)
291 m = differencematcher(m, em)
292 return m
292 return m
293
293
294
294
295 def exact(files, badfn=None):
295 def exact(files, badfn=None):
296 return exactmatcher(files, badfn=badfn)
296 return exactmatcher(files, badfn=badfn)
297
297
298
298
299 def always(badfn=None):
299 def always(badfn=None):
300 return alwaysmatcher(badfn)
300 return alwaysmatcher(badfn)
301
301
302
302
303 def never(badfn=None):
303 def never(badfn=None):
304 return nevermatcher(badfn)
304 return nevermatcher(badfn)
305
305
306
306
307 def badmatch(match, badfn):
307 def badmatch(match, badfn):
308 """Make a copy of the given matcher, replacing its bad method with the given
308 """Make a copy of the given matcher, replacing its bad method with the given
309 one.
309 one.
310 """
310 """
311 m = copy.copy(match)
311 m = copy.copy(match)
312 m.bad = badfn
312 m.bad = badfn
313 return m
313 return m
314
314
315
315
316 def _donormalize(patterns, default, root, cwd, auditor=None, warn=None):
316 def _donormalize(patterns, default, root, cwd, auditor=None, warn=None):
317 '''Convert 'kind:pat' from the patterns list to tuples with kind and
317 '''Convert 'kind:pat' from the patterns list to tuples with kind and
318 normalized and rooted patterns and with listfiles expanded.'''
318 normalized and rooted patterns and with listfiles expanded.'''
319 kindpats = []
319 kindpats = []
320 for kind, pat in [_patsplit(p, default) for p in patterns]:
320 for kind, pat in [_patsplit(p, default) for p in patterns]:
321 if kind in cwdrelativepatternkinds:
321 if kind in cwdrelativepatternkinds:
322 pat = pathutil.canonpath(root, cwd, pat, auditor=auditor)
322 pat = pathutil.canonpath(root, cwd, pat, auditor=auditor)
323 elif kind in (b'relglob', b'path', b'rootfilesin', b'rootglob'):
323 elif kind in (b'relglob', b'path', b'rootfilesin', b'rootglob'):
324 pat = util.normpath(pat)
324 pat = util.normpath(pat)
325 elif kind in (b'listfile', b'listfile0'):
325 elif kind in (b'listfile', b'listfile0'):
326 try:
326 try:
327 files = util.readfile(pat)
327 files = util.readfile(pat)
328 if kind == b'listfile0':
328 if kind == b'listfile0':
329 files = files.split(b'\0')
329 files = files.split(b'\0')
330 else:
330 else:
331 files = files.splitlines()
331 files = files.splitlines()
332 files = [f for f in files if f]
332 files = [f for f in files if f]
333 except EnvironmentError:
333 except EnvironmentError:
334 raise error.Abort(_(b"unable to read file list (%s)") % pat)
334 raise error.Abort(_(b"unable to read file list (%s)") % pat)
335 for k, p, source in _donormalize(
335 for k, p, source in _donormalize(
336 files, default, root, cwd, auditor, warn
336 files, default, root, cwd, auditor, warn
337 ):
337 ):
338 kindpats.append((k, p, pat))
338 kindpats.append((k, p, pat))
339 continue
339 continue
340 elif kind == b'include':
340 elif kind == b'include':
341 try:
341 try:
342 fullpath = os.path.join(root, util.localpath(pat))
342 fullpath = os.path.join(root, util.localpath(pat))
343 includepats = readpatternfile(fullpath, warn)
343 includepats = readpatternfile(fullpath, warn)
344 for k, p, source in _donormalize(
344 for k, p, source in _donormalize(
345 includepats, default, root, cwd, auditor, warn
345 includepats, default, root, cwd, auditor, warn
346 ):
346 ):
347 kindpats.append((k, p, source or pat))
347 kindpats.append((k, p, source or pat))
348 except error.Abort as inst:
348 except error.Abort as inst:
349 raise error.Abort(
349 raise error.Abort(
350 b'%s: %s'
350 b'%s: %s'
351 % (pat, inst[0]) # pytype: disable=unsupported-operands
351 % (pat, inst[0]) # pytype: disable=unsupported-operands
352 )
352 )
353 except IOError as inst:
353 except IOError as inst:
354 if warn:
354 if warn:
355 warn(
355 warn(
356 _(b"skipping unreadable pattern file '%s': %s\n")
356 _(b"skipping unreadable pattern file '%s': %s\n")
357 % (pat, stringutil.forcebytestr(inst.strerror))
357 % (pat, stringutil.forcebytestr(inst.strerror))
358 )
358 )
359 continue
359 continue
360 # else: re or relre - which cannot be normalized
360 # else: re or relre - which cannot be normalized
361 kindpats.append((kind, pat, b''))
361 kindpats.append((kind, pat, b''))
362 return kindpats
362 return kindpats
363
363
364
364
365 class basematcher(object):
365 class basematcher(object):
366 def __init__(self, badfn=None):
366 def __init__(self, badfn=None):
367 if badfn is not None:
367 if badfn is not None:
368 self.bad = badfn
368 self.bad = badfn
369
369
370 def __call__(self, fn):
370 def __call__(self, fn):
371 return self.matchfn(fn)
371 return self.matchfn(fn)
372
372
373 # Callbacks related to how the matcher is used by dirstate.walk.
373 # Callbacks related to how the matcher is used by dirstate.walk.
374 # Subscribers to these events must monkeypatch the matcher object.
374 # Subscribers to these events must monkeypatch the matcher object.
375 def bad(self, f, msg):
375 def bad(self, f, msg):
376 '''Callback from dirstate.walk for each explicit file that can't be
376 '''Callback from dirstate.walk for each explicit file that can't be
377 found/accessed, with an error message.'''
377 found/accessed, with an error message.'''
378
378
379 # If an explicitdir is set, it will be called when an explicitly listed
379 # If an explicitdir is set, it will be called when an explicitly listed
380 # directory is visited.
380 # directory is visited.
381 explicitdir = None
381 explicitdir = None
382
382
383 # If an traversedir is set, it will be called when a directory discovered
383 # If an traversedir is set, it will be called when a directory discovered
384 # by recursive traversal is visited.
384 # by recursive traversal is visited.
385 traversedir = None
385 traversedir = None
386
386
387 @propertycache
387 @propertycache
388 def _files(self):
388 def _files(self):
389 return []
389 return []
390
390
391 def files(self):
391 def files(self):
392 '''Explicitly listed files or patterns or roots:
392 '''Explicitly listed files or patterns or roots:
393 if no patterns or .always(): empty list,
393 if no patterns or .always(): empty list,
394 if exact: list exact files,
394 if exact: list exact files,
395 if not .anypats(): list all files and dirs,
395 if not .anypats(): list all files and dirs,
396 else: optimal roots'''
396 else: optimal roots'''
397 return self._files
397 return self._files
398
398
399 @propertycache
399 @propertycache
400 def _fileset(self):
400 def _fileset(self):
401 return set(self._files)
401 return set(self._files)
402
402
403 def exact(self, f):
403 def exact(self, f):
404 '''Returns True if f is in .files().'''
404 '''Returns True if f is in .files().'''
405 return f in self._fileset
405 return f in self._fileset
406
406
407 def matchfn(self, f):
407 def matchfn(self, f):
408 return False
408 return False
409
409
410 def visitdir(self, dir):
410 def visitdir(self, dir):
411 '''Decides whether a directory should be visited based on whether it
411 '''Decides whether a directory should be visited based on whether it
412 has potential matches in it or one of its subdirectories. This is
412 has potential matches in it or one of its subdirectories. This is
413 based on the match's primary, included, and excluded patterns.
413 based on the match's primary, included, and excluded patterns.
414
414
415 Returns the string 'all' if the given directory and all subdirectories
415 Returns the string 'all' if the given directory and all subdirectories
416 should be visited. Otherwise returns True or False indicating whether
416 should be visited. Otherwise returns True or False indicating whether
417 the given directory should be visited.
417 the given directory should be visited.
418 '''
418 '''
419 return True
419 return True
420
420
421 def visitchildrenset(self, dir):
421 def visitchildrenset(self, dir):
422 '''Decides whether a directory should be visited based on whether it
422 '''Decides whether a directory should be visited based on whether it
423 has potential matches in it or one of its subdirectories, and
423 has potential matches in it or one of its subdirectories, and
424 potentially lists which subdirectories of that directory should be
424 potentially lists which subdirectories of that directory should be
425 visited. This is based on the match's primary, included, and excluded
425 visited. This is based on the match's primary, included, and excluded
426 patterns.
426 patterns.
427
427
428 This function is very similar to 'visitdir', and the following mapping
428 This function is very similar to 'visitdir', and the following mapping
429 can be applied:
429 can be applied:
430
430
431 visitdir | visitchildrenlist
431 visitdir | visitchildrenlist
432 ----------+-------------------
432 ----------+-------------------
433 False | set()
433 False | set()
434 'all' | 'all'
434 'all' | 'all'
435 True | 'this' OR non-empty set of subdirs -or files- to visit
435 True | 'this' OR non-empty set of subdirs -or files- to visit
436
436
437 Example:
437 Example:
438 Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return
438 Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return
439 the following values (assuming the implementation of visitchildrenset
439 the following values (assuming the implementation of visitchildrenset
440 is capable of recognizing this; some implementations are not).
440 is capable of recognizing this; some implementations are not).
441
441
442 '' -> {'foo', 'qux'}
442 '' -> {'foo', 'qux'}
443 'baz' -> set()
443 'baz' -> set()
444 'foo' -> {'bar'}
444 'foo' -> {'bar'}
445 # Ideally this would be 'all', but since the prefix nature of matchers
445 # Ideally this would be 'all', but since the prefix nature of matchers
446 # is applied to the entire matcher, we have to downgrade this to
446 # is applied to the entire matcher, we have to downgrade this to
447 # 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed
447 # 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed
448 # in.
448 # in.
449 'foo/bar' -> 'this'
449 'foo/bar' -> 'this'
450 'qux' -> 'this'
450 'qux' -> 'this'
451
451
452 Important:
452 Important:
453 Most matchers do not know if they're representing files or
453 Most matchers do not know if they're representing files or
454 directories. They see ['path:dir/f'] and don't know whether 'f' is a
454 directories. They see ['path:dir/f'] and don't know whether 'f' is a
455 file or a directory, so visitchildrenset('dir') for most matchers will
455 file or a directory, so visitchildrenset('dir') for most matchers will
456 return {'f'}, but if the matcher knows it's a file (like exactmatcher
456 return {'f'}, but if the matcher knows it's a file (like exactmatcher
457 does), it may return 'this'. Do not rely on the return being a set
457 does), it may return 'this'. Do not rely on the return being a set
458 indicating that there are no files in this dir to investigate (or
458 indicating that there are no files in this dir to investigate (or
459 equivalently that if there are files to investigate in 'dir' that it
459 equivalently that if there are files to investigate in 'dir' that it
460 will always return 'this').
460 will always return 'this').
461 '''
461 '''
462 return b'this'
462 return b'this'
463
463
464 def always(self):
464 def always(self):
465 '''Matcher will match everything and .files() will be empty --
465 '''Matcher will match everything and .files() will be empty --
466 optimization might be possible.'''
466 optimization might be possible.'''
467 return False
467 return False
468
468
469 def isexact(self):
469 def isexact(self):
470 '''Matcher will match exactly the list of files in .files() --
470 '''Matcher will match exactly the list of files in .files() --
471 optimization might be possible.'''
471 optimization might be possible.'''
472 return False
472 return False
473
473
474 def prefix(self):
474 def prefix(self):
475 '''Matcher will match the paths in .files() recursively --
475 '''Matcher will match the paths in .files() recursively --
476 optimization might be possible.'''
476 optimization might be possible.'''
477 return False
477 return False
478
478
479 def anypats(self):
479 def anypats(self):
480 '''None of .always(), .isexact(), and .prefix() is true --
480 '''None of .always(), .isexact(), and .prefix() is true --
481 optimizations will be difficult.'''
481 optimizations will be difficult.'''
482 return not self.always() and not self.isexact() and not self.prefix()
482 return not self.always() and not self.isexact() and not self.prefix()
483
483
484
484
485 class alwaysmatcher(basematcher):
485 class alwaysmatcher(basematcher):
486 '''Matches everything.'''
486 '''Matches everything.'''
487
487
488 def __init__(self, badfn=None):
488 def __init__(self, badfn=None):
489 super(alwaysmatcher, self).__init__(badfn)
489 super(alwaysmatcher, self).__init__(badfn)
490
490
491 def always(self):
491 def always(self):
492 return True
492 return True
493
493
494 def matchfn(self, f):
494 def matchfn(self, f):
495 return True
495 return True
496
496
497 def visitdir(self, dir):
497 def visitdir(self, dir):
498 return b'all'
498 return b'all'
499
499
500 def visitchildrenset(self, dir):
500 def visitchildrenset(self, dir):
501 return b'all'
501 return b'all'
502
502
503 def __repr__(self):
503 def __repr__(self):
504 return r'<alwaysmatcher>'
504 return r'<alwaysmatcher>'
505
505
506
506
507 class nevermatcher(basematcher):
507 class nevermatcher(basematcher):
508 '''Matches nothing.'''
508 '''Matches nothing.'''
509
509
510 def __init__(self, badfn=None):
510 def __init__(self, badfn=None):
511 super(nevermatcher, self).__init__(badfn)
511 super(nevermatcher, self).__init__(badfn)
512
512
513 # It's a little weird to say that the nevermatcher is an exact matcher
513 # It's a little weird to say that the nevermatcher is an exact matcher
514 # or a prefix matcher, but it seems to make sense to let callers take
514 # or a prefix matcher, but it seems to make sense to let callers take
515 # fast paths based on either. There will be no exact matches, nor any
515 # fast paths based on either. There will be no exact matches, nor any
516 # prefixes (files() returns []), so fast paths iterating over them should
516 # prefixes (files() returns []), so fast paths iterating over them should
517 # be efficient (and correct).
517 # be efficient (and correct).
518 def isexact(self):
518 def isexact(self):
519 return True
519 return True
520
520
521 def prefix(self):
521 def prefix(self):
522 return True
522 return True
523
523
524 def visitdir(self, dir):
524 def visitdir(self, dir):
525 return False
525 return False
526
526
527 def visitchildrenset(self, dir):
527 def visitchildrenset(self, dir):
528 return set()
528 return set()
529
529
530 def __repr__(self):
530 def __repr__(self):
531 return r'<nevermatcher>'
531 return r'<nevermatcher>'
532
532
533
533
534 class predicatematcher(basematcher):
534 class predicatematcher(basematcher):
535 """A matcher adapter for a simple boolean function"""
535 """A matcher adapter for a simple boolean function"""
536
536
537 def __init__(self, predfn, predrepr=None, badfn=None):
537 def __init__(self, predfn, predrepr=None, badfn=None):
538 super(predicatematcher, self).__init__(badfn)
538 super(predicatematcher, self).__init__(badfn)
539 self.matchfn = predfn
539 self.matchfn = predfn
540 self._predrepr = predrepr
540 self._predrepr = predrepr
541
541
542 @encoding.strmethod
542 @encoding.strmethod
543 def __repr__(self):
543 def __repr__(self):
544 s = stringutil.buildrepr(self._predrepr) or pycompat.byterepr(
544 s = stringutil.buildrepr(self._predrepr) or pycompat.byterepr(
545 self.matchfn
545 self.matchfn
546 )
546 )
547 return b'<predicatenmatcher pred=%s>' % s
547 return b'<predicatenmatcher pred=%s>' % s
548
548
549
549
550 def normalizerootdir(dir, funcname):
551 if dir == b'.':
552 util.nouideprecwarn(
553 b"match.%s() no longer accepts '.', use '' instead." % funcname,
554 b'5.1',
555 )
556 return b''
557 return dir
558
559
560 class patternmatcher(basematcher):
550 class patternmatcher(basematcher):
561 """Matches a set of (kind, pat, source) against a 'root' directory.
551 """Matches a set of (kind, pat, source) against a 'root' directory.
562
552
563 >>> kindpats = [
553 >>> kindpats = [
564 ... (b're', br'.*\.c$', b''),
554 ... (b're', br'.*\.c$', b''),
565 ... (b'path', b'foo/a', b''),
555 ... (b'path', b'foo/a', b''),
566 ... (b'relpath', b'b', b''),
556 ... (b'relpath', b'b', b''),
567 ... (b'glob', b'*.h', b''),
557 ... (b'glob', b'*.h', b''),
568 ... ]
558 ... ]
569 >>> m = patternmatcher(b'foo', kindpats)
559 >>> m = patternmatcher(b'foo', kindpats)
570 >>> m(b'main.c') # matches re:.*\.c$
560 >>> m(b'main.c') # matches re:.*\.c$
571 True
561 True
572 >>> m(b'b.txt')
562 >>> m(b'b.txt')
573 False
563 False
574 >>> m(b'foo/a') # matches path:foo/a
564 >>> m(b'foo/a') # matches path:foo/a
575 True
565 True
576 >>> m(b'a') # does not match path:b, since 'root' is 'foo'
566 >>> m(b'a') # does not match path:b, since 'root' is 'foo'
577 False
567 False
578 >>> m(b'b') # matches relpath:b, since 'root' is 'foo'
568 >>> m(b'b') # matches relpath:b, since 'root' is 'foo'
579 True
569 True
580 >>> m(b'lib.h') # matches glob:*.h
570 >>> m(b'lib.h') # matches glob:*.h
581 True
571 True
582
572
583 >>> m.files()
573 >>> m.files()
584 ['', 'foo/a', 'b', '']
574 ['', 'foo/a', 'b', '']
585 >>> m.exact(b'foo/a')
575 >>> m.exact(b'foo/a')
586 True
576 True
587 >>> m.exact(b'b')
577 >>> m.exact(b'b')
588 True
578 True
589 >>> m.exact(b'lib.h') # exact matches are for (rel)path kinds
579 >>> m.exact(b'lib.h') # exact matches are for (rel)path kinds
590 False
580 False
591 """
581 """
592
582
593 def __init__(self, root, kindpats, badfn=None):
583 def __init__(self, root, kindpats, badfn=None):
594 super(patternmatcher, self).__init__(badfn)
584 super(patternmatcher, self).__init__(badfn)
595
585
596 self._files = _explicitfiles(kindpats)
586 self._files = _explicitfiles(kindpats)
597 self._prefix = _prefix(kindpats)
587 self._prefix = _prefix(kindpats)
598 self._pats, self.matchfn = _buildmatch(kindpats, b'$', root)
588 self._pats, self.matchfn = _buildmatch(kindpats, b'$', root)
599
589
600 @propertycache
590 @propertycache
601 def _dirs(self):
591 def _dirs(self):
602 return set(pathutil.dirs(self._fileset))
592 return set(pathutil.dirs(self._fileset))
603
593
604 def visitdir(self, dir):
594 def visitdir(self, dir):
605 dir = normalizerootdir(dir, b'visitdir')
606 if self._prefix and dir in self._fileset:
595 if self._prefix and dir in self._fileset:
607 return b'all'
596 return b'all'
608 return (
597 return (
609 dir in self._fileset
598 dir in self._fileset
610 or dir in self._dirs
599 or dir in self._dirs
611 or any(
600 or any(
612 parentdir in self._fileset for parentdir in util.finddirs(dir)
601 parentdir in self._fileset for parentdir in util.finddirs(dir)
613 )
602 )
614 )
603 )
615
604
616 def visitchildrenset(self, dir):
605 def visitchildrenset(self, dir):
617 ret = self.visitdir(dir)
606 ret = self.visitdir(dir)
618 if ret is True:
607 if ret is True:
619 return b'this'
608 return b'this'
620 elif not ret:
609 elif not ret:
621 return set()
610 return set()
622 assert ret == b'all'
611 assert ret == b'all'
623 return b'all'
612 return b'all'
624
613
625 def prefix(self):
614 def prefix(self):
626 return self._prefix
615 return self._prefix
627
616
628 @encoding.strmethod
617 @encoding.strmethod
629 def __repr__(self):
618 def __repr__(self):
630 return b'<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats)
619 return b'<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats)
631
620
632
621
633 # This is basically a reimplementation of pathutil.dirs that stores the
622 # This is basically a reimplementation of pathutil.dirs that stores the
634 # children instead of just a count of them, plus a small optional optimization
623 # children instead of just a count of them, plus a small optional optimization
635 # to avoid some directories we don't need.
624 # to avoid some directories we don't need.
636 class _dirchildren(object):
625 class _dirchildren(object):
637 def __init__(self, paths, onlyinclude=None):
626 def __init__(self, paths, onlyinclude=None):
638 self._dirs = {}
627 self._dirs = {}
639 self._onlyinclude = onlyinclude or []
628 self._onlyinclude = onlyinclude or []
640 addpath = self.addpath
629 addpath = self.addpath
641 for f in paths:
630 for f in paths:
642 addpath(f)
631 addpath(f)
643
632
644 def addpath(self, path):
633 def addpath(self, path):
645 if path == b'':
634 if path == b'':
646 return
635 return
647 dirs = self._dirs
636 dirs = self._dirs
648 findsplitdirs = _dirchildren._findsplitdirs
637 findsplitdirs = _dirchildren._findsplitdirs
649 for d, b in findsplitdirs(path):
638 for d, b in findsplitdirs(path):
650 if d not in self._onlyinclude:
639 if d not in self._onlyinclude:
651 continue
640 continue
652 dirs.setdefault(d, set()).add(b)
641 dirs.setdefault(d, set()).add(b)
653
642
654 @staticmethod
643 @staticmethod
655 def _findsplitdirs(path):
644 def _findsplitdirs(path):
656 # yields (dirname, basename) tuples, walking back to the root. This is
645 # yields (dirname, basename) tuples, walking back to the root. This is
657 # very similar to util.finddirs, except:
646 # very similar to util.finddirs, except:
658 # - produces a (dirname, basename) tuple, not just 'dirname'
647 # - produces a (dirname, basename) tuple, not just 'dirname'
659 # Unlike manifest._splittopdir, this does not suffix `dirname` with a
648 # Unlike manifest._splittopdir, this does not suffix `dirname` with a
660 # slash.
649 # slash.
661 oldpos = len(path)
650 oldpos = len(path)
662 pos = path.rfind(b'/')
651 pos = path.rfind(b'/')
663 while pos != -1:
652 while pos != -1:
664 yield path[:pos], path[pos + 1 : oldpos]
653 yield path[:pos], path[pos + 1 : oldpos]
665 oldpos = pos
654 oldpos = pos
666 pos = path.rfind(b'/', 0, pos)
655 pos = path.rfind(b'/', 0, pos)
667 yield b'', path[:oldpos]
656 yield b'', path[:oldpos]
668
657
669 def get(self, path):
658 def get(self, path):
670 return self._dirs.get(path, set())
659 return self._dirs.get(path, set())
671
660
672
661
673 class includematcher(basematcher):
662 class includematcher(basematcher):
674 def __init__(self, root, kindpats, badfn=None):
663 def __init__(self, root, kindpats, badfn=None):
675 super(includematcher, self).__init__(badfn)
664 super(includematcher, self).__init__(badfn)
676
665
677 self._pats, self.matchfn = _buildmatch(kindpats, b'(?:/|$)', root)
666 self._pats, self.matchfn = _buildmatch(kindpats, b'(?:/|$)', root)
678 self._prefix = _prefix(kindpats)
667 self._prefix = _prefix(kindpats)
679 roots, dirs, parents = _rootsdirsandparents(kindpats)
668 roots, dirs, parents = _rootsdirsandparents(kindpats)
680 # roots are directories which are recursively included.
669 # roots are directories which are recursively included.
681 self._roots = set(roots)
670 self._roots = set(roots)
682 # dirs are directories which are non-recursively included.
671 # dirs are directories which are non-recursively included.
683 self._dirs = set(dirs)
672 self._dirs = set(dirs)
684 # parents are directories which are non-recursively included because
673 # parents are directories which are non-recursively included because
685 # they are needed to get to items in _dirs or _roots.
674 # they are needed to get to items in _dirs or _roots.
686 self._parents = parents
675 self._parents = parents
687
676
688 def visitdir(self, dir):
677 def visitdir(self, dir):
689 dir = normalizerootdir(dir, b'visitdir')
690 if self._prefix and dir in self._roots:
678 if self._prefix and dir in self._roots:
691 return b'all'
679 return b'all'
692 return (
680 return (
693 dir in self._roots
681 dir in self._roots
694 or dir in self._dirs
682 or dir in self._dirs
695 or dir in self._parents
683 or dir in self._parents
696 or any(parentdir in self._roots for parentdir in util.finddirs(dir))
684 or any(parentdir in self._roots for parentdir in util.finddirs(dir))
697 )
685 )
698
686
699 @propertycache
687 @propertycache
700 def _allparentschildren(self):
688 def _allparentschildren(self):
701 # It may seem odd that we add dirs, roots, and parents, and then
689 # It may seem odd that we add dirs, roots, and parents, and then
702 # restrict to only parents. This is to catch the case of:
690 # restrict to only parents. This is to catch the case of:
703 # dirs = ['foo/bar']
691 # dirs = ['foo/bar']
704 # parents = ['foo']
692 # parents = ['foo']
705 # if we asked for the children of 'foo', but had only added
693 # if we asked for the children of 'foo', but had only added
706 # self._parents, we wouldn't be able to respond ['bar'].
694 # self._parents, we wouldn't be able to respond ['bar'].
707 return _dirchildren(
695 return _dirchildren(
708 itertools.chain(self._dirs, self._roots, self._parents),
696 itertools.chain(self._dirs, self._roots, self._parents),
709 onlyinclude=self._parents,
697 onlyinclude=self._parents,
710 )
698 )
711
699
712 def visitchildrenset(self, dir):
700 def visitchildrenset(self, dir):
713 if self._prefix and dir in self._roots:
701 if self._prefix and dir in self._roots:
714 return b'all'
702 return b'all'
715 # Note: this does *not* include the 'dir in self._parents' case from
703 # Note: this does *not* include the 'dir in self._parents' case from
716 # visitdir, that's handled below.
704 # visitdir, that's handled below.
717 if (
705 if (
718 b'' in self._roots
706 b'' in self._roots
719 or dir in self._roots
707 or dir in self._roots
720 or dir in self._dirs
708 or dir in self._dirs
721 or any(parentdir in self._roots for parentdir in util.finddirs(dir))
709 or any(parentdir in self._roots for parentdir in util.finddirs(dir))
722 ):
710 ):
723 return b'this'
711 return b'this'
724
712
725 if dir in self._parents:
713 if dir in self._parents:
726 return self._allparentschildren.get(dir) or set()
714 return self._allparentschildren.get(dir) or set()
727 return set()
715 return set()
728
716
729 @encoding.strmethod
717 @encoding.strmethod
730 def __repr__(self):
718 def __repr__(self):
731 return b'<includematcher includes=%r>' % pycompat.bytestr(self._pats)
719 return b'<includematcher includes=%r>' % pycompat.bytestr(self._pats)
732
720
733
721
734 class exactmatcher(basematcher):
722 class exactmatcher(basematcher):
735 r'''Matches the input files exactly. They are interpreted as paths, not
723 r'''Matches the input files exactly. They are interpreted as paths, not
736 patterns (so no kind-prefixes).
724 patterns (so no kind-prefixes).
737
725
738 >>> m = exactmatcher([b'a.txt', br're:.*\.c$'])
726 >>> m = exactmatcher([b'a.txt', br're:.*\.c$'])
739 >>> m(b'a.txt')
727 >>> m(b'a.txt')
740 True
728 True
741 >>> m(b'b.txt')
729 >>> m(b'b.txt')
742 False
730 False
743
731
744 Input files that would be matched are exactly those returned by .files()
732 Input files that would be matched are exactly those returned by .files()
745 >>> m.files()
733 >>> m.files()
746 ['a.txt', 're:.*\\.c$']
734 ['a.txt', 're:.*\\.c$']
747
735
748 So pattern 're:.*\.c$' is not considered as a regex, but as a file name
736 So pattern 're:.*\.c$' is not considered as a regex, but as a file name
749 >>> m(b'main.c')
737 >>> m(b'main.c')
750 False
738 False
751 >>> m(br're:.*\.c$')
739 >>> m(br're:.*\.c$')
752 True
740 True
753 '''
741 '''
754
742
755 def __init__(self, files, badfn=None):
743 def __init__(self, files, badfn=None):
756 super(exactmatcher, self).__init__(badfn)
744 super(exactmatcher, self).__init__(badfn)
757
745
758 if isinstance(files, list):
746 if isinstance(files, list):
759 self._files = files
747 self._files = files
760 else:
748 else:
761 self._files = list(files)
749 self._files = list(files)
762
750
763 matchfn = basematcher.exact
751 matchfn = basematcher.exact
764
752
765 @propertycache
753 @propertycache
766 def _dirs(self):
754 def _dirs(self):
767 return set(pathutil.dirs(self._fileset))
755 return set(pathutil.dirs(self._fileset))
768
756
769 def visitdir(self, dir):
757 def visitdir(self, dir):
770 dir = normalizerootdir(dir, b'visitdir')
771 return dir in self._dirs
758 return dir in self._dirs
772
759
773 def visitchildrenset(self, dir):
760 def visitchildrenset(self, dir):
774 dir = normalizerootdir(dir, b'visitchildrenset')
775
776 if not self._fileset or dir not in self._dirs:
761 if not self._fileset or dir not in self._dirs:
777 return set()
762 return set()
778
763
779 candidates = self._fileset | self._dirs - {b''}
764 candidates = self._fileset | self._dirs - {b''}
780 if dir != b'':
765 if dir != b'':
781 d = dir + b'/'
766 d = dir + b'/'
782 candidates = set(c[len(d) :] for c in candidates if c.startswith(d))
767 candidates = set(c[len(d) :] for c in candidates if c.startswith(d))
783 # self._dirs includes all of the directories, recursively, so if
768 # self._dirs includes all of the directories, recursively, so if
784 # we're attempting to match foo/bar/baz.txt, it'll have '', 'foo',
769 # we're attempting to match foo/bar/baz.txt, it'll have '', 'foo',
785 # 'foo/bar' in it. Thus we can safely ignore a candidate that has a
770 # 'foo/bar' in it. Thus we can safely ignore a candidate that has a
786 # '/' in it, indicating a it's for a subdir-of-a-subdir; the
771 # '/' in it, indicating a it's for a subdir-of-a-subdir; the
787 # immediate subdir will be in there without a slash.
772 # immediate subdir will be in there without a slash.
788 ret = {c for c in candidates if b'/' not in c}
773 ret = {c for c in candidates if b'/' not in c}
789 # We really do not expect ret to be empty, since that would imply that
774 # We really do not expect ret to be empty, since that would imply that
790 # there's something in _dirs that didn't have a file in _fileset.
775 # there's something in _dirs that didn't have a file in _fileset.
791 assert ret
776 assert ret
792 return ret
777 return ret
793
778
794 def isexact(self):
779 def isexact(self):
795 return True
780 return True
796
781
797 @encoding.strmethod
782 @encoding.strmethod
798 def __repr__(self):
783 def __repr__(self):
799 return b'<exactmatcher files=%r>' % self._files
784 return b'<exactmatcher files=%r>' % self._files
800
785
801
786
802 class differencematcher(basematcher):
787 class differencematcher(basematcher):
803 '''Composes two matchers by matching if the first matches and the second
788 '''Composes two matchers by matching if the first matches and the second
804 does not.
789 does not.
805
790
806 The second matcher's non-matching-attributes (bad, explicitdir,
791 The second matcher's non-matching-attributes (bad, explicitdir,
807 traversedir) are ignored.
792 traversedir) are ignored.
808 '''
793 '''
809
794
810 def __init__(self, m1, m2):
795 def __init__(self, m1, m2):
811 super(differencematcher, self).__init__()
796 super(differencematcher, self).__init__()
812 self._m1 = m1
797 self._m1 = m1
813 self._m2 = m2
798 self._m2 = m2
814 self.bad = m1.bad
799 self.bad = m1.bad
815 self.explicitdir = m1.explicitdir
800 self.explicitdir = m1.explicitdir
816 self.traversedir = m1.traversedir
801 self.traversedir = m1.traversedir
817
802
818 def matchfn(self, f):
803 def matchfn(self, f):
819 return self._m1(f) and not self._m2(f)
804 return self._m1(f) and not self._m2(f)
820
805
821 @propertycache
806 @propertycache
822 def _files(self):
807 def _files(self):
823 if self.isexact():
808 if self.isexact():
824 return [f for f in self._m1.files() if self(f)]
809 return [f for f in self._m1.files() if self(f)]
825 # If m1 is not an exact matcher, we can't easily figure out the set of
810 # If m1 is not an exact matcher, we can't easily figure out the set of
826 # files, because its files() are not always files. For example, if
811 # files, because its files() are not always files. For example, if
827 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
812 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
828 # want to remove "dir" from the set even though it would match m2,
813 # want to remove "dir" from the set even though it would match m2,
829 # because the "dir" in m1 may not be a file.
814 # because the "dir" in m1 may not be a file.
830 return self._m1.files()
815 return self._m1.files()
831
816
832 def visitdir(self, dir):
817 def visitdir(self, dir):
833 if self._m2.visitdir(dir) == b'all':
818 if self._m2.visitdir(dir) == b'all':
834 return False
819 return False
835 elif not self._m2.visitdir(dir):
820 elif not self._m2.visitdir(dir):
836 # m2 does not match dir, we can return 'all' here if possible
821 # m2 does not match dir, we can return 'all' here if possible
837 return self._m1.visitdir(dir)
822 return self._m1.visitdir(dir)
838 return bool(self._m1.visitdir(dir))
823 return bool(self._m1.visitdir(dir))
839
824
840 def visitchildrenset(self, dir):
825 def visitchildrenset(self, dir):
841 m2_set = self._m2.visitchildrenset(dir)
826 m2_set = self._m2.visitchildrenset(dir)
842 if m2_set == b'all':
827 if m2_set == b'all':
843 return set()
828 return set()
844 m1_set = self._m1.visitchildrenset(dir)
829 m1_set = self._m1.visitchildrenset(dir)
845 # Possible values for m1: 'all', 'this', set(...), set()
830 # Possible values for m1: 'all', 'this', set(...), set()
846 # Possible values for m2: 'this', set(...), set()
831 # Possible values for m2: 'this', set(...), set()
847 # If m2 has nothing under here that we care about, return m1, even if
832 # If m2 has nothing under here that we care about, return m1, even if
848 # it's 'all'. This is a change in behavior from visitdir, which would
833 # it's 'all'. This is a change in behavior from visitdir, which would
849 # return True, not 'all', for some reason.
834 # return True, not 'all', for some reason.
850 if not m2_set:
835 if not m2_set:
851 return m1_set
836 return m1_set
852 if m1_set in [b'all', b'this']:
837 if m1_set in [b'all', b'this']:
853 # Never return 'all' here if m2_set is any kind of non-empty (either
838 # Never return 'all' here if m2_set is any kind of non-empty (either
854 # 'this' or set(foo)), since m2 might return set() for a
839 # 'this' or set(foo)), since m2 might return set() for a
855 # subdirectory.
840 # subdirectory.
856 return b'this'
841 return b'this'
857 # Possible values for m1: set(...), set()
842 # Possible values for m1: set(...), set()
858 # Possible values for m2: 'this', set(...)
843 # Possible values for m2: 'this', set(...)
859 # We ignore m2's set results. They're possibly incorrect:
844 # We ignore m2's set results. They're possibly incorrect:
860 # m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset(''):
845 # m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset(''):
861 # m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd
846 # m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd
862 # return set(), which is *not* correct, we still need to visit 'dir'!
847 # return set(), which is *not* correct, we still need to visit 'dir'!
863 return m1_set
848 return m1_set
864
849
865 def isexact(self):
850 def isexact(self):
866 return self._m1.isexact()
851 return self._m1.isexact()
867
852
868 @encoding.strmethod
853 @encoding.strmethod
869 def __repr__(self):
854 def __repr__(self):
870 return b'<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2)
855 return b'<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2)
871
856
872
857
873 def intersectmatchers(m1, m2):
858 def intersectmatchers(m1, m2):
874 '''Composes two matchers by matching if both of them match.
859 '''Composes two matchers by matching if both of them match.
875
860
876 The second matcher's non-matching-attributes (bad, explicitdir,
861 The second matcher's non-matching-attributes (bad, explicitdir,
877 traversedir) are ignored.
862 traversedir) are ignored.
878 '''
863 '''
879 if m1 is None or m2 is None:
864 if m1 is None or m2 is None:
880 return m1 or m2
865 return m1 or m2
881 if m1.always():
866 if m1.always():
882 m = copy.copy(m2)
867 m = copy.copy(m2)
883 # TODO: Consider encapsulating these things in a class so there's only
868 # TODO: Consider encapsulating these things in a class so there's only
884 # one thing to copy from m1.
869 # one thing to copy from m1.
885 m.bad = m1.bad
870 m.bad = m1.bad
886 m.explicitdir = m1.explicitdir
871 m.explicitdir = m1.explicitdir
887 m.traversedir = m1.traversedir
872 m.traversedir = m1.traversedir
888 return m
873 return m
889 if m2.always():
874 if m2.always():
890 m = copy.copy(m1)
875 m = copy.copy(m1)
891 return m
876 return m
892 return intersectionmatcher(m1, m2)
877 return intersectionmatcher(m1, m2)
893
878
894
879
895 class intersectionmatcher(basematcher):
880 class intersectionmatcher(basematcher):
896 def __init__(self, m1, m2):
881 def __init__(self, m1, m2):
897 super(intersectionmatcher, self).__init__()
882 super(intersectionmatcher, self).__init__()
898 self._m1 = m1
883 self._m1 = m1
899 self._m2 = m2
884 self._m2 = m2
900 self.bad = m1.bad
885 self.bad = m1.bad
901 self.explicitdir = m1.explicitdir
886 self.explicitdir = m1.explicitdir
902 self.traversedir = m1.traversedir
887 self.traversedir = m1.traversedir
903
888
904 @propertycache
889 @propertycache
905 def _files(self):
890 def _files(self):
906 if self.isexact():
891 if self.isexact():
907 m1, m2 = self._m1, self._m2
892 m1, m2 = self._m1, self._m2
908 if not m1.isexact():
893 if not m1.isexact():
909 m1, m2 = m2, m1
894 m1, m2 = m2, m1
910 return [f for f in m1.files() if m2(f)]
895 return [f for f in m1.files() if m2(f)]
911 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
896 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
912 # the set of files, because their files() are not always files. For
897 # the set of files, because their files() are not always files. For
913 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
898 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
914 # "path:dir2", we don't want to remove "dir2" from the set.
899 # "path:dir2", we don't want to remove "dir2" from the set.
915 return self._m1.files() + self._m2.files()
900 return self._m1.files() + self._m2.files()
916
901
917 def matchfn(self, f):
902 def matchfn(self, f):
918 return self._m1(f) and self._m2(f)
903 return self._m1(f) and self._m2(f)
919
904
920 def visitdir(self, dir):
905 def visitdir(self, dir):
921 visit1 = self._m1.visitdir(dir)
906 visit1 = self._m1.visitdir(dir)
922 if visit1 == b'all':
907 if visit1 == b'all':
923 return self._m2.visitdir(dir)
908 return self._m2.visitdir(dir)
924 # bool() because visit1=True + visit2='all' should not be 'all'
909 # bool() because visit1=True + visit2='all' should not be 'all'
925 return bool(visit1 and self._m2.visitdir(dir))
910 return bool(visit1 and self._m2.visitdir(dir))
926
911
927 def visitchildrenset(self, dir):
912 def visitchildrenset(self, dir):
928 m1_set = self._m1.visitchildrenset(dir)
913 m1_set = self._m1.visitchildrenset(dir)
929 if not m1_set:
914 if not m1_set:
930 return set()
915 return set()
931 m2_set = self._m2.visitchildrenset(dir)
916 m2_set = self._m2.visitchildrenset(dir)
932 if not m2_set:
917 if not m2_set:
933 return set()
918 return set()
934
919
935 if m1_set == b'all':
920 if m1_set == b'all':
936 return m2_set
921 return m2_set
937 elif m2_set == b'all':
922 elif m2_set == b'all':
938 return m1_set
923 return m1_set
939
924
940 if m1_set == b'this' or m2_set == b'this':
925 if m1_set == b'this' or m2_set == b'this':
941 return b'this'
926 return b'this'
942
927
943 assert isinstance(m1_set, set) and isinstance(m2_set, set)
928 assert isinstance(m1_set, set) and isinstance(m2_set, set)
944 return m1_set.intersection(m2_set)
929 return m1_set.intersection(m2_set)
945
930
946 def always(self):
931 def always(self):
947 return self._m1.always() and self._m2.always()
932 return self._m1.always() and self._m2.always()
948
933
949 def isexact(self):
934 def isexact(self):
950 return self._m1.isexact() or self._m2.isexact()
935 return self._m1.isexact() or self._m2.isexact()
951
936
952 @encoding.strmethod
937 @encoding.strmethod
953 def __repr__(self):
938 def __repr__(self):
954 return b'<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2)
939 return b'<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2)
955
940
956
941
957 class subdirmatcher(basematcher):
942 class subdirmatcher(basematcher):
958 """Adapt a matcher to work on a subdirectory only.
943 """Adapt a matcher to work on a subdirectory only.
959
944
960 The paths are remapped to remove/insert the path as needed:
945 The paths are remapped to remove/insert the path as needed:
961
946
962 >>> from . import pycompat
947 >>> from . import pycompat
963 >>> m1 = match(b'root', b'', [b'a.txt', b'sub/b.txt'])
948 >>> m1 = match(b'root', b'', [b'a.txt', b'sub/b.txt'])
964 >>> m2 = subdirmatcher(b'sub', m1)
949 >>> m2 = subdirmatcher(b'sub', m1)
965 >>> m2(b'a.txt')
950 >>> m2(b'a.txt')
966 False
951 False
967 >>> m2(b'b.txt')
952 >>> m2(b'b.txt')
968 True
953 True
969 >>> m2.matchfn(b'a.txt')
954 >>> m2.matchfn(b'a.txt')
970 False
955 False
971 >>> m2.matchfn(b'b.txt')
956 >>> m2.matchfn(b'b.txt')
972 True
957 True
973 >>> m2.files()
958 >>> m2.files()
974 ['b.txt']
959 ['b.txt']
975 >>> m2.exact(b'b.txt')
960 >>> m2.exact(b'b.txt')
976 True
961 True
977 >>> def bad(f, msg):
962 >>> def bad(f, msg):
978 ... print(pycompat.sysstr(b"%s: %s" % (f, msg)))
963 ... print(pycompat.sysstr(b"%s: %s" % (f, msg)))
979 >>> m1.bad = bad
964 >>> m1.bad = bad
980 >>> m2.bad(b'x.txt', b'No such file')
965 >>> m2.bad(b'x.txt', b'No such file')
981 sub/x.txt: No such file
966 sub/x.txt: No such file
982 """
967 """
983
968
984 def __init__(self, path, matcher):
969 def __init__(self, path, matcher):
985 super(subdirmatcher, self).__init__()
970 super(subdirmatcher, self).__init__()
986 self._path = path
971 self._path = path
987 self._matcher = matcher
972 self._matcher = matcher
988 self._always = matcher.always()
973 self._always = matcher.always()
989
974
990 self._files = [
975 self._files = [
991 f[len(path) + 1 :]
976 f[len(path) + 1 :]
992 for f in matcher._files
977 for f in matcher._files
993 if f.startswith(path + b"/")
978 if f.startswith(path + b"/")
994 ]
979 ]
995
980
996 # If the parent repo had a path to this subrepo and the matcher is
981 # If the parent repo had a path to this subrepo and the matcher is
997 # a prefix matcher, this submatcher always matches.
982 # a prefix matcher, this submatcher always matches.
998 if matcher.prefix():
983 if matcher.prefix():
999 self._always = any(f == path for f in matcher._files)
984 self._always = any(f == path for f in matcher._files)
1000
985
1001 def bad(self, f, msg):
986 def bad(self, f, msg):
1002 self._matcher.bad(self._path + b"/" + f, msg)
987 self._matcher.bad(self._path + b"/" + f, msg)
1003
988
1004 def matchfn(self, f):
989 def matchfn(self, f):
1005 # Some information is lost in the superclass's constructor, so we
990 # Some information is lost in the superclass's constructor, so we
1006 # can not accurately create the matching function for the subdirectory
991 # can not accurately create the matching function for the subdirectory
1007 # from the inputs. Instead, we override matchfn() and visitdir() to
992 # from the inputs. Instead, we override matchfn() and visitdir() to
1008 # call the original matcher with the subdirectory path prepended.
993 # call the original matcher with the subdirectory path prepended.
1009 return self._matcher.matchfn(self._path + b"/" + f)
994 return self._matcher.matchfn(self._path + b"/" + f)
1010
995
1011 def visitdir(self, dir):
996 def visitdir(self, dir):
1012 dir = normalizerootdir(dir, b'visitdir')
1013 if dir == b'':
997 if dir == b'':
1014 dir = self._path
998 dir = self._path
1015 else:
999 else:
1016 dir = self._path + b"/" + dir
1000 dir = self._path + b"/" + dir
1017 return self._matcher.visitdir(dir)
1001 return self._matcher.visitdir(dir)
1018
1002
1019 def visitchildrenset(self, dir):
1003 def visitchildrenset(self, dir):
1020 dir = normalizerootdir(dir, b'visitchildrenset')
1021 if dir == b'':
1004 if dir == b'':
1022 dir = self._path
1005 dir = self._path
1023 else:
1006 else:
1024 dir = self._path + b"/" + dir
1007 dir = self._path + b"/" + dir
1025 return self._matcher.visitchildrenset(dir)
1008 return self._matcher.visitchildrenset(dir)
1026
1009
1027 def always(self):
1010 def always(self):
1028 return self._always
1011 return self._always
1029
1012
1030 def prefix(self):
1013 def prefix(self):
1031 return self._matcher.prefix() and not self._always
1014 return self._matcher.prefix() and not self._always
1032
1015
1033 @encoding.strmethod
1016 @encoding.strmethod
1034 def __repr__(self):
1017 def __repr__(self):
1035 return b'<subdirmatcher path=%r, matcher=%r>' % (
1018 return b'<subdirmatcher path=%r, matcher=%r>' % (
1036 self._path,
1019 self._path,
1037 self._matcher,
1020 self._matcher,
1038 )
1021 )
1039
1022
1040
1023
1041 class prefixdirmatcher(basematcher):
1024 class prefixdirmatcher(basematcher):
1042 """Adapt a matcher to work on a parent directory.
1025 """Adapt a matcher to work on a parent directory.
1043
1026
1044 The matcher's non-matching-attributes (bad, explicitdir, traversedir) are
1027 The matcher's non-matching-attributes (bad, explicitdir, traversedir) are
1045 ignored.
1028 ignored.
1046
1029
1047 The prefix path should usually be the relative path from the root of
1030 The prefix path should usually be the relative path from the root of
1048 this matcher to the root of the wrapped matcher.
1031 this matcher to the root of the wrapped matcher.
1049
1032
1050 >>> m1 = match(util.localpath(b'root/d/e'), b'f', [b'../a.txt', b'b.txt'])
1033 >>> m1 = match(util.localpath(b'root/d/e'), b'f', [b'../a.txt', b'b.txt'])
1051 >>> m2 = prefixdirmatcher(b'd/e', m1)
1034 >>> m2 = prefixdirmatcher(b'd/e', m1)
1052 >>> m2(b'a.txt')
1035 >>> m2(b'a.txt')
1053 False
1036 False
1054 >>> m2(b'd/e/a.txt')
1037 >>> m2(b'd/e/a.txt')
1055 True
1038 True
1056 >>> m2(b'd/e/b.txt')
1039 >>> m2(b'd/e/b.txt')
1057 False
1040 False
1058 >>> m2.files()
1041 >>> m2.files()
1059 ['d/e/a.txt', 'd/e/f/b.txt']
1042 ['d/e/a.txt', 'd/e/f/b.txt']
1060 >>> m2.exact(b'd/e/a.txt')
1043 >>> m2.exact(b'd/e/a.txt')
1061 True
1044 True
1062 >>> m2.visitdir(b'd')
1045 >>> m2.visitdir(b'd')
1063 True
1046 True
1064 >>> m2.visitdir(b'd/e')
1047 >>> m2.visitdir(b'd/e')
1065 True
1048 True
1066 >>> m2.visitdir(b'd/e/f')
1049 >>> m2.visitdir(b'd/e/f')
1067 True
1050 True
1068 >>> m2.visitdir(b'd/e/g')
1051 >>> m2.visitdir(b'd/e/g')
1069 False
1052 False
1070 >>> m2.visitdir(b'd/ef')
1053 >>> m2.visitdir(b'd/ef')
1071 False
1054 False
1072 """
1055 """
1073
1056
1074 def __init__(self, path, matcher, badfn=None):
1057 def __init__(self, path, matcher, badfn=None):
1075 super(prefixdirmatcher, self).__init__(badfn)
1058 super(prefixdirmatcher, self).__init__(badfn)
1076 if not path:
1059 if not path:
1077 raise error.ProgrammingError(b'prefix path must not be empty')
1060 raise error.ProgrammingError(b'prefix path must not be empty')
1078 self._path = path
1061 self._path = path
1079 self._pathprefix = path + b'/'
1062 self._pathprefix = path + b'/'
1080 self._matcher = matcher
1063 self._matcher = matcher
1081
1064
1082 @propertycache
1065 @propertycache
1083 def _files(self):
1066 def _files(self):
1084 return [self._pathprefix + f for f in self._matcher._files]
1067 return [self._pathprefix + f for f in self._matcher._files]
1085
1068
1086 def matchfn(self, f):
1069 def matchfn(self, f):
1087 if not f.startswith(self._pathprefix):
1070 if not f.startswith(self._pathprefix):
1088 return False
1071 return False
1089 return self._matcher.matchfn(f[len(self._pathprefix) :])
1072 return self._matcher.matchfn(f[len(self._pathprefix) :])
1090
1073
1091 @propertycache
1074 @propertycache
1092 def _pathdirs(self):
1075 def _pathdirs(self):
1093 return set(util.finddirs(self._path))
1076 return set(util.finddirs(self._path))
1094
1077
1095 def visitdir(self, dir):
1078 def visitdir(self, dir):
1096 if dir == self._path:
1079 if dir == self._path:
1097 return self._matcher.visitdir(b'')
1080 return self._matcher.visitdir(b'')
1098 if dir.startswith(self._pathprefix):
1081 if dir.startswith(self._pathprefix):
1099 return self._matcher.visitdir(dir[len(self._pathprefix) :])
1082 return self._matcher.visitdir(dir[len(self._pathprefix) :])
1100 return dir in self._pathdirs
1083 return dir in self._pathdirs
1101
1084
1102 def visitchildrenset(self, dir):
1085 def visitchildrenset(self, dir):
1103 if dir == self._path:
1086 if dir == self._path:
1104 return self._matcher.visitchildrenset(b'')
1087 return self._matcher.visitchildrenset(b'')
1105 if dir.startswith(self._pathprefix):
1088 if dir.startswith(self._pathprefix):
1106 return self._matcher.visitchildrenset(dir[len(self._pathprefix) :])
1089 return self._matcher.visitchildrenset(dir[len(self._pathprefix) :])
1107 if dir in self._pathdirs:
1090 if dir in self._pathdirs:
1108 return b'this'
1091 return b'this'
1109 return set()
1092 return set()
1110
1093
1111 def isexact(self):
1094 def isexact(self):
1112 return self._matcher.isexact()
1095 return self._matcher.isexact()
1113
1096
1114 def prefix(self):
1097 def prefix(self):
1115 return self._matcher.prefix()
1098 return self._matcher.prefix()
1116
1099
1117 @encoding.strmethod
1100 @encoding.strmethod
1118 def __repr__(self):
1101 def __repr__(self):
1119 return b'<prefixdirmatcher path=%r, matcher=%r>' % (
1102 return b'<prefixdirmatcher path=%r, matcher=%r>' % (
1120 pycompat.bytestr(self._path),
1103 pycompat.bytestr(self._path),
1121 self._matcher,
1104 self._matcher,
1122 )
1105 )
1123
1106
1124
1107
1125 class unionmatcher(basematcher):
1108 class unionmatcher(basematcher):
1126 """A matcher that is the union of several matchers.
1109 """A matcher that is the union of several matchers.
1127
1110
1128 The non-matching-attributes (bad, explicitdir, traversedir) are taken from
1111 The non-matching-attributes (bad, explicitdir, traversedir) are taken from
1129 the first matcher.
1112 the first matcher.
1130 """
1113 """
1131
1114
1132 def __init__(self, matchers):
1115 def __init__(self, matchers):
1133 m1 = matchers[0]
1116 m1 = matchers[0]
1134 super(unionmatcher, self).__init__()
1117 super(unionmatcher, self).__init__()
1135 self.explicitdir = m1.explicitdir
1118 self.explicitdir = m1.explicitdir
1136 self.traversedir = m1.traversedir
1119 self.traversedir = m1.traversedir
1137 self._matchers = matchers
1120 self._matchers = matchers
1138
1121
1139 def matchfn(self, f):
1122 def matchfn(self, f):
1140 for match in self._matchers:
1123 for match in self._matchers:
1141 if match(f):
1124 if match(f):
1142 return True
1125 return True
1143 return False
1126 return False
1144
1127
1145 def visitdir(self, dir):
1128 def visitdir(self, dir):
1146 r = False
1129 r = False
1147 for m in self._matchers:
1130 for m in self._matchers:
1148 v = m.visitdir(dir)
1131 v = m.visitdir(dir)
1149 if v == b'all':
1132 if v == b'all':
1150 return v
1133 return v
1151 r |= v
1134 r |= v
1152 return r
1135 return r
1153
1136
1154 def visitchildrenset(self, dir):
1137 def visitchildrenset(self, dir):
1155 r = set()
1138 r = set()
1156 this = False
1139 this = False
1157 for m in self._matchers:
1140 for m in self._matchers:
1158 v = m.visitchildrenset(dir)
1141 v = m.visitchildrenset(dir)
1159 if not v:
1142 if not v:
1160 continue
1143 continue
1161 if v == b'all':
1144 if v == b'all':
1162 return v
1145 return v
1163 if this or v == b'this':
1146 if this or v == b'this':
1164 this = True
1147 this = True
1165 # don't break, we might have an 'all' in here.
1148 # don't break, we might have an 'all' in here.
1166 continue
1149 continue
1167 assert isinstance(v, set)
1150 assert isinstance(v, set)
1168 r = r.union(v)
1151 r = r.union(v)
1169 if this:
1152 if this:
1170 return b'this'
1153 return b'this'
1171 return r
1154 return r
1172
1155
1173 @encoding.strmethod
1156 @encoding.strmethod
1174 def __repr__(self):
1157 def __repr__(self):
1175 return b'<unionmatcher matchers=%r>' % self._matchers
1158 return b'<unionmatcher matchers=%r>' % self._matchers
1176
1159
1177
1160
1178 def patkind(pattern, default=None):
1161 def patkind(pattern, default=None):
1179 '''If pattern is 'kind:pat' with a known kind, return kind.
1162 '''If pattern is 'kind:pat' with a known kind, return kind.
1180
1163
1181 >>> patkind(br're:.*\.c$')
1164 >>> patkind(br're:.*\.c$')
1182 're'
1165 're'
1183 >>> patkind(b'glob:*.c')
1166 >>> patkind(b'glob:*.c')
1184 'glob'
1167 'glob'
1185 >>> patkind(b'relpath:test.py')
1168 >>> patkind(b'relpath:test.py')
1186 'relpath'
1169 'relpath'
1187 >>> patkind(b'main.py')
1170 >>> patkind(b'main.py')
1188 >>> patkind(b'main.py', default=b're')
1171 >>> patkind(b'main.py', default=b're')
1189 're'
1172 're'
1190 '''
1173 '''
1191 return _patsplit(pattern, default)[0]
1174 return _patsplit(pattern, default)[0]
1192
1175
1193
1176
1194 def _patsplit(pattern, default):
1177 def _patsplit(pattern, default):
1195 """Split a string into the optional pattern kind prefix and the actual
1178 """Split a string into the optional pattern kind prefix and the actual
1196 pattern."""
1179 pattern."""
1197 if b':' in pattern:
1180 if b':' in pattern:
1198 kind, pat = pattern.split(b':', 1)
1181 kind, pat = pattern.split(b':', 1)
1199 if kind in allpatternkinds:
1182 if kind in allpatternkinds:
1200 return kind, pat
1183 return kind, pat
1201 return default, pattern
1184 return default, pattern
1202
1185
1203
1186
1204 def _globre(pat):
1187 def _globre(pat):
1205 r'''Convert an extended glob string to a regexp string.
1188 r'''Convert an extended glob string to a regexp string.
1206
1189
1207 >>> from . import pycompat
1190 >>> from . import pycompat
1208 >>> def bprint(s):
1191 >>> def bprint(s):
1209 ... print(pycompat.sysstr(s))
1192 ... print(pycompat.sysstr(s))
1210 >>> bprint(_globre(br'?'))
1193 >>> bprint(_globre(br'?'))
1211 .
1194 .
1212 >>> bprint(_globre(br'*'))
1195 >>> bprint(_globre(br'*'))
1213 [^/]*
1196 [^/]*
1214 >>> bprint(_globre(br'**'))
1197 >>> bprint(_globre(br'**'))
1215 .*
1198 .*
1216 >>> bprint(_globre(br'**/a'))
1199 >>> bprint(_globre(br'**/a'))
1217 (?:.*/)?a
1200 (?:.*/)?a
1218 >>> bprint(_globre(br'a/**/b'))
1201 >>> bprint(_globre(br'a/**/b'))
1219 a/(?:.*/)?b
1202 a/(?:.*/)?b
1220 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
1203 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
1221 [a*?!^][\^b][^c]
1204 [a*?!^][\^b][^c]
1222 >>> bprint(_globre(br'{a,b}'))
1205 >>> bprint(_globre(br'{a,b}'))
1223 (?:a|b)
1206 (?:a|b)
1224 >>> bprint(_globre(br'.\*\?'))
1207 >>> bprint(_globre(br'.\*\?'))
1225 \.\*\?
1208 \.\*\?
1226 '''
1209 '''
1227 i, n = 0, len(pat)
1210 i, n = 0, len(pat)
1228 res = b''
1211 res = b''
1229 group = 0
1212 group = 0
1230 escape = util.stringutil.regexbytesescapemap.get
1213 escape = util.stringutil.regexbytesescapemap.get
1231
1214
1232 def peek():
1215 def peek():
1233 return i < n and pat[i : i + 1]
1216 return i < n and pat[i : i + 1]
1234
1217
1235 while i < n:
1218 while i < n:
1236 c = pat[i : i + 1]
1219 c = pat[i : i + 1]
1237 i += 1
1220 i += 1
1238 if c not in b'*?[{},\\':
1221 if c not in b'*?[{},\\':
1239 res += escape(c, c)
1222 res += escape(c, c)
1240 elif c == b'*':
1223 elif c == b'*':
1241 if peek() == b'*':
1224 if peek() == b'*':
1242 i += 1
1225 i += 1
1243 if peek() == b'/':
1226 if peek() == b'/':
1244 i += 1
1227 i += 1
1245 res += b'(?:.*/)?'
1228 res += b'(?:.*/)?'
1246 else:
1229 else:
1247 res += b'.*'
1230 res += b'.*'
1248 else:
1231 else:
1249 res += b'[^/]*'
1232 res += b'[^/]*'
1250 elif c == b'?':
1233 elif c == b'?':
1251 res += b'.'
1234 res += b'.'
1252 elif c == b'[':
1235 elif c == b'[':
1253 j = i
1236 j = i
1254 if j < n and pat[j : j + 1] in b'!]':
1237 if j < n and pat[j : j + 1] in b'!]':
1255 j += 1
1238 j += 1
1256 while j < n and pat[j : j + 1] != b']':
1239 while j < n and pat[j : j + 1] != b']':
1257 j += 1
1240 j += 1
1258 if j >= n:
1241 if j >= n:
1259 res += b'\\['
1242 res += b'\\['
1260 else:
1243 else:
1261 stuff = pat[i:j].replace(b'\\', b'\\\\')
1244 stuff = pat[i:j].replace(b'\\', b'\\\\')
1262 i = j + 1
1245 i = j + 1
1263 if stuff[0:1] == b'!':
1246 if stuff[0:1] == b'!':
1264 stuff = b'^' + stuff[1:]
1247 stuff = b'^' + stuff[1:]
1265 elif stuff[0:1] == b'^':
1248 elif stuff[0:1] == b'^':
1266 stuff = b'\\' + stuff
1249 stuff = b'\\' + stuff
1267 res = b'%s[%s]' % (res, stuff)
1250 res = b'%s[%s]' % (res, stuff)
1268 elif c == b'{':
1251 elif c == b'{':
1269 group += 1
1252 group += 1
1270 res += b'(?:'
1253 res += b'(?:'
1271 elif c == b'}' and group:
1254 elif c == b'}' and group:
1272 res += b')'
1255 res += b')'
1273 group -= 1
1256 group -= 1
1274 elif c == b',' and group:
1257 elif c == b',' and group:
1275 res += b'|'
1258 res += b'|'
1276 elif c == b'\\':
1259 elif c == b'\\':
1277 p = peek()
1260 p = peek()
1278 if p:
1261 if p:
1279 i += 1
1262 i += 1
1280 res += escape(p, p)
1263 res += escape(p, p)
1281 else:
1264 else:
1282 res += escape(c, c)
1265 res += escape(c, c)
1283 else:
1266 else:
1284 res += escape(c, c)
1267 res += escape(c, c)
1285 return res
1268 return res
1286
1269
1287
1270
1288 def _regex(kind, pat, globsuffix):
1271 def _regex(kind, pat, globsuffix):
1289 '''Convert a (normalized) pattern of any kind into a
1272 '''Convert a (normalized) pattern of any kind into a
1290 regular expression.
1273 regular expression.
1291 globsuffix is appended to the regexp of globs.'''
1274 globsuffix is appended to the regexp of globs.'''
1292
1275
1293 if rustmod is not None:
1276 if rustmod is not None:
1294 try:
1277 try:
1295 return rustmod.build_single_regex(kind, pat, globsuffix)
1278 return rustmod.build_single_regex(kind, pat, globsuffix)
1296 except rustmod.PatternError:
1279 except rustmod.PatternError:
1297 raise error.ProgrammingError(
1280 raise error.ProgrammingError(
1298 b'not a regex pattern: %s:%s' % (kind, pat)
1281 b'not a regex pattern: %s:%s' % (kind, pat)
1299 )
1282 )
1300
1283
1301 if not pat and kind in (b'glob', b'relpath'):
1284 if not pat and kind in (b'glob', b'relpath'):
1302 return b''
1285 return b''
1303 if kind == b're':
1286 if kind == b're':
1304 return pat
1287 return pat
1305 if kind in (b'path', b'relpath'):
1288 if kind in (b'path', b'relpath'):
1306 if pat == b'.':
1289 if pat == b'.':
1307 return b''
1290 return b''
1308 return util.stringutil.reescape(pat) + b'(?:/|$)'
1291 return util.stringutil.reescape(pat) + b'(?:/|$)'
1309 if kind == b'rootfilesin':
1292 if kind == b'rootfilesin':
1310 if pat == b'.':
1293 if pat == b'.':
1311 escaped = b''
1294 escaped = b''
1312 else:
1295 else:
1313 # Pattern is a directory name.
1296 # Pattern is a directory name.
1314 escaped = util.stringutil.reescape(pat) + b'/'
1297 escaped = util.stringutil.reescape(pat) + b'/'
1315 # Anything after the pattern must be a non-directory.
1298 # Anything after the pattern must be a non-directory.
1316 return escaped + b'[^/]+$'
1299 return escaped + b'[^/]+$'
1317 if kind == b'relglob':
1300 if kind == b'relglob':
1318 globre = _globre(pat)
1301 globre = _globre(pat)
1319 if globre.startswith(b'[^/]*'):
1302 if globre.startswith(b'[^/]*'):
1320 # When pat has the form *XYZ (common), make the returned regex more
1303 # When pat has the form *XYZ (common), make the returned regex more
1321 # legible by returning the regex for **XYZ instead of **/*XYZ.
1304 # legible by returning the regex for **XYZ instead of **/*XYZ.
1322 return b'.*' + globre[len(b'[^/]*') :] + globsuffix
1305 return b'.*' + globre[len(b'[^/]*') :] + globsuffix
1323 return b'(?:|.*/)' + globre + globsuffix
1306 return b'(?:|.*/)' + globre + globsuffix
1324 if kind == b'relre':
1307 if kind == b'relre':
1325 if pat.startswith(b'^'):
1308 if pat.startswith(b'^'):
1326 return pat
1309 return pat
1327 return b'.*' + pat
1310 return b'.*' + pat
1328 if kind in (b'glob', b'rootglob'):
1311 if kind in (b'glob', b'rootglob'):
1329 return _globre(pat) + globsuffix
1312 return _globre(pat) + globsuffix
1330 raise error.ProgrammingError(b'not a regex pattern: %s:%s' % (kind, pat))
1313 raise error.ProgrammingError(b'not a regex pattern: %s:%s' % (kind, pat))
1331
1314
1332
1315
1333 def _buildmatch(kindpats, globsuffix, root):
1316 def _buildmatch(kindpats, globsuffix, root):
1334 '''Return regexp string and a matcher function for kindpats.
1317 '''Return regexp string and a matcher function for kindpats.
1335 globsuffix is appended to the regexp of globs.'''
1318 globsuffix is appended to the regexp of globs.'''
1336 matchfuncs = []
1319 matchfuncs = []
1337
1320
1338 subincludes, kindpats = _expandsubinclude(kindpats, root)
1321 subincludes, kindpats = _expandsubinclude(kindpats, root)
1339 if subincludes:
1322 if subincludes:
1340 submatchers = {}
1323 submatchers = {}
1341
1324
1342 def matchsubinclude(f):
1325 def matchsubinclude(f):
1343 for prefix, matcherargs in subincludes:
1326 for prefix, matcherargs in subincludes:
1344 if f.startswith(prefix):
1327 if f.startswith(prefix):
1345 mf = submatchers.get(prefix)
1328 mf = submatchers.get(prefix)
1346 if mf is None:
1329 if mf is None:
1347 mf = match(*matcherargs)
1330 mf = match(*matcherargs)
1348 submatchers[prefix] = mf
1331 submatchers[prefix] = mf
1349
1332
1350 if mf(f[len(prefix) :]):
1333 if mf(f[len(prefix) :]):
1351 return True
1334 return True
1352 return False
1335 return False
1353
1336
1354 matchfuncs.append(matchsubinclude)
1337 matchfuncs.append(matchsubinclude)
1355
1338
1356 regex = b''
1339 regex = b''
1357 if kindpats:
1340 if kindpats:
1358 if all(k == b'rootfilesin' for k, p, s in kindpats):
1341 if all(k == b'rootfilesin' for k, p, s in kindpats):
1359 dirs = {p for k, p, s in kindpats}
1342 dirs = {p for k, p, s in kindpats}
1360
1343
1361 def mf(f):
1344 def mf(f):
1362 i = f.rfind(b'/')
1345 i = f.rfind(b'/')
1363 if i >= 0:
1346 if i >= 0:
1364 dir = f[:i]
1347 dir = f[:i]
1365 else:
1348 else:
1366 dir = b'.'
1349 dir = b'.'
1367 return dir in dirs
1350 return dir in dirs
1368
1351
1369 regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))
1352 regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))
1370 matchfuncs.append(mf)
1353 matchfuncs.append(mf)
1371 else:
1354 else:
1372 regex, mf = _buildregexmatch(kindpats, globsuffix)
1355 regex, mf = _buildregexmatch(kindpats, globsuffix)
1373 matchfuncs.append(mf)
1356 matchfuncs.append(mf)
1374
1357
1375 if len(matchfuncs) == 1:
1358 if len(matchfuncs) == 1:
1376 return regex, matchfuncs[0]
1359 return regex, matchfuncs[0]
1377 else:
1360 else:
1378 return regex, lambda f: any(mf(f) for mf in matchfuncs)
1361 return regex, lambda f: any(mf(f) for mf in matchfuncs)
1379
1362
1380
1363
1381 MAX_RE_SIZE = 20000
1364 MAX_RE_SIZE = 20000
1382
1365
1383
1366
1384 def _joinregexes(regexps):
1367 def _joinregexes(regexps):
1385 """gather multiple regular expressions into a single one"""
1368 """gather multiple regular expressions into a single one"""
1386 return b'|'.join(regexps)
1369 return b'|'.join(regexps)
1387
1370
1388
1371
1389 def _buildregexmatch(kindpats, globsuffix):
1372 def _buildregexmatch(kindpats, globsuffix):
1390 """Build a match function from a list of kinds and kindpats,
1373 """Build a match function from a list of kinds and kindpats,
1391 return regexp string and a matcher function.
1374 return regexp string and a matcher function.
1392
1375
1393 Test too large input
1376 Test too large input
1394 >>> _buildregexmatch([
1377 >>> _buildregexmatch([
1395 ... (b'relglob', b'?' * MAX_RE_SIZE, b'')
1378 ... (b'relglob', b'?' * MAX_RE_SIZE, b'')
1396 ... ], b'$')
1379 ... ], b'$')
1397 Traceback (most recent call last):
1380 Traceback (most recent call last):
1398 ...
1381 ...
1399 Abort: matcher pattern is too long (20009 bytes)
1382 Abort: matcher pattern is too long (20009 bytes)
1400 """
1383 """
1401 try:
1384 try:
1402 allgroups = []
1385 allgroups = []
1403 regexps = [_regex(k, p, globsuffix) for (k, p, s) in kindpats]
1386 regexps = [_regex(k, p, globsuffix) for (k, p, s) in kindpats]
1404 fullregexp = _joinregexes(regexps)
1387 fullregexp = _joinregexes(regexps)
1405
1388
1406 startidx = 0
1389 startidx = 0
1407 groupsize = 0
1390 groupsize = 0
1408 for idx, r in enumerate(regexps):
1391 for idx, r in enumerate(regexps):
1409 piecesize = len(r)
1392 piecesize = len(r)
1410 if piecesize > MAX_RE_SIZE:
1393 if piecesize > MAX_RE_SIZE:
1411 msg = _(b"matcher pattern is too long (%d bytes)") % piecesize
1394 msg = _(b"matcher pattern is too long (%d bytes)") % piecesize
1412 raise error.Abort(msg)
1395 raise error.Abort(msg)
1413 elif (groupsize + piecesize) > MAX_RE_SIZE:
1396 elif (groupsize + piecesize) > MAX_RE_SIZE:
1414 group = regexps[startidx:idx]
1397 group = regexps[startidx:idx]
1415 allgroups.append(_joinregexes(group))
1398 allgroups.append(_joinregexes(group))
1416 startidx = idx
1399 startidx = idx
1417 groupsize = 0
1400 groupsize = 0
1418 groupsize += piecesize + 1
1401 groupsize += piecesize + 1
1419
1402
1420 if startidx == 0:
1403 if startidx == 0:
1421 matcher = _rematcher(fullregexp)
1404 matcher = _rematcher(fullregexp)
1422 func = lambda s: bool(matcher(s))
1405 func = lambda s: bool(matcher(s))
1423 else:
1406 else:
1424 group = regexps[startidx:]
1407 group = regexps[startidx:]
1425 allgroups.append(_joinregexes(group))
1408 allgroups.append(_joinregexes(group))
1426 allmatchers = [_rematcher(g) for g in allgroups]
1409 allmatchers = [_rematcher(g) for g in allgroups]
1427 func = lambda s: any(m(s) for m in allmatchers)
1410 func = lambda s: any(m(s) for m in allmatchers)
1428 return fullregexp, func
1411 return fullregexp, func
1429 except re.error:
1412 except re.error:
1430 for k, p, s in kindpats:
1413 for k, p, s in kindpats:
1431 try:
1414 try:
1432 _rematcher(_regex(k, p, globsuffix))
1415 _rematcher(_regex(k, p, globsuffix))
1433 except re.error:
1416 except re.error:
1434 if s:
1417 if s:
1435 raise error.Abort(
1418 raise error.Abort(
1436 _(b"%s: invalid pattern (%s): %s") % (s, k, p)
1419 _(b"%s: invalid pattern (%s): %s") % (s, k, p)
1437 )
1420 )
1438 else:
1421 else:
1439 raise error.Abort(_(b"invalid pattern (%s): %s") % (k, p))
1422 raise error.Abort(_(b"invalid pattern (%s): %s") % (k, p))
1440 raise error.Abort(_(b"invalid pattern"))
1423 raise error.Abort(_(b"invalid pattern"))
1441
1424
1442
1425
1443 def _patternrootsanddirs(kindpats):
1426 def _patternrootsanddirs(kindpats):
1444 '''Returns roots and directories corresponding to each pattern.
1427 '''Returns roots and directories corresponding to each pattern.
1445
1428
1446 This calculates the roots and directories exactly matching the patterns and
1429 This calculates the roots and directories exactly matching the patterns and
1447 returns a tuple of (roots, dirs) for each. It does not return other
1430 returns a tuple of (roots, dirs) for each. It does not return other
1448 directories which may also need to be considered, like the parent
1431 directories which may also need to be considered, like the parent
1449 directories.
1432 directories.
1450 '''
1433 '''
1451 r = []
1434 r = []
1452 d = []
1435 d = []
1453 for kind, pat, source in kindpats:
1436 for kind, pat, source in kindpats:
1454 if kind in (b'glob', b'rootglob'): # find the non-glob prefix
1437 if kind in (b'glob', b'rootglob'): # find the non-glob prefix
1455 root = []
1438 root = []
1456 for p in pat.split(b'/'):
1439 for p in pat.split(b'/'):
1457 if b'[' in p or b'{' in p or b'*' in p or b'?' in p:
1440 if b'[' in p or b'{' in p or b'*' in p or b'?' in p:
1458 break
1441 break
1459 root.append(p)
1442 root.append(p)
1460 r.append(b'/'.join(root))
1443 r.append(b'/'.join(root))
1461 elif kind in (b'relpath', b'path'):
1444 elif kind in (b'relpath', b'path'):
1462 if pat == b'.':
1445 if pat == b'.':
1463 pat = b''
1446 pat = b''
1464 r.append(pat)
1447 r.append(pat)
1465 elif kind in (b'rootfilesin',):
1448 elif kind in (b'rootfilesin',):
1466 if pat == b'.':
1449 if pat == b'.':
1467 pat = b''
1450 pat = b''
1468 d.append(pat)
1451 d.append(pat)
1469 else: # relglob, re, relre
1452 else: # relglob, re, relre
1470 r.append(b'')
1453 r.append(b'')
1471 return r, d
1454 return r, d
1472
1455
1473
1456
1474 def _roots(kindpats):
1457 def _roots(kindpats):
1475 '''Returns root directories to match recursively from the given patterns.'''
1458 '''Returns root directories to match recursively from the given patterns.'''
1476 roots, dirs = _patternrootsanddirs(kindpats)
1459 roots, dirs = _patternrootsanddirs(kindpats)
1477 return roots
1460 return roots
1478
1461
1479
1462
1480 def _rootsdirsandparents(kindpats):
1463 def _rootsdirsandparents(kindpats):
1481 '''Returns roots and exact directories from patterns.
1464 '''Returns roots and exact directories from patterns.
1482
1465
1483 `roots` are directories to match recursively, `dirs` should
1466 `roots` are directories to match recursively, `dirs` should
1484 be matched non-recursively, and `parents` are the implicitly required
1467 be matched non-recursively, and `parents` are the implicitly required
1485 directories to walk to items in either roots or dirs.
1468 directories to walk to items in either roots or dirs.
1486
1469
1487 Returns a tuple of (roots, dirs, parents).
1470 Returns a tuple of (roots, dirs, parents).
1488
1471
1489 >>> r = _rootsdirsandparents(
1472 >>> r = _rootsdirsandparents(
1490 ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
1473 ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
1491 ... (b'glob', b'g*', b'')])
1474 ... (b'glob', b'g*', b'')])
1492 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1475 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1493 (['g/h', 'g/h', ''], []) ['', 'g']
1476 (['g/h', 'g/h', ''], []) ['', 'g']
1494 >>> r = _rootsdirsandparents(
1477 >>> r = _rootsdirsandparents(
1495 ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
1478 ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
1496 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1479 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1497 ([], ['g/h', '']) ['', 'g']
1480 ([], ['g/h', '']) ['', 'g']
1498 >>> r = _rootsdirsandparents(
1481 >>> r = _rootsdirsandparents(
1499 ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
1482 ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
1500 ... (b'path', b'', b'')])
1483 ... (b'path', b'', b'')])
1501 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1484 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1502 (['r', 'p/p', ''], []) ['', 'p']
1485 (['r', 'p/p', ''], []) ['', 'p']
1503 >>> r = _rootsdirsandparents(
1486 >>> r = _rootsdirsandparents(
1504 ... [(b'relglob', b'rg*', b''), (b're', b're/', b''),
1487 ... [(b'relglob', b'rg*', b''), (b're', b're/', b''),
1505 ... (b'relre', b'rr', b'')])
1488 ... (b'relre', b'rr', b'')])
1506 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1489 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1507 (['', '', ''], []) ['']
1490 (['', '', ''], []) ['']
1508 '''
1491 '''
1509 r, d = _patternrootsanddirs(kindpats)
1492 r, d = _patternrootsanddirs(kindpats)
1510
1493
1511 p = set()
1494 p = set()
1512 # Add the parents as non-recursive/exact directories, since they must be
1495 # Add the parents as non-recursive/exact directories, since they must be
1513 # scanned to get to either the roots or the other exact directories.
1496 # scanned to get to either the roots or the other exact directories.
1514 p.update(pathutil.dirs(d))
1497 p.update(pathutil.dirs(d))
1515 p.update(pathutil.dirs(r))
1498 p.update(pathutil.dirs(r))
1516
1499
1517 # FIXME: all uses of this function convert these to sets, do so before
1500 # FIXME: all uses of this function convert these to sets, do so before
1518 # returning.
1501 # returning.
1519 # FIXME: all uses of this function do not need anything in 'roots' and
1502 # FIXME: all uses of this function do not need anything in 'roots' and
1520 # 'dirs' to also be in 'parents', consider removing them before returning.
1503 # 'dirs' to also be in 'parents', consider removing them before returning.
1521 return r, d, p
1504 return r, d, p
1522
1505
1523
1506
1524 def _explicitfiles(kindpats):
1507 def _explicitfiles(kindpats):
1525 '''Returns the potential explicit filenames from the patterns.
1508 '''Returns the potential explicit filenames from the patterns.
1526
1509
1527 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
1510 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
1528 ['foo/bar']
1511 ['foo/bar']
1529 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
1512 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
1530 []
1513 []
1531 '''
1514 '''
1532 # Keep only the pattern kinds where one can specify filenames (vs only
1515 # Keep only the pattern kinds where one can specify filenames (vs only
1533 # directory names).
1516 # directory names).
1534 filable = [kp for kp in kindpats if kp[0] not in (b'rootfilesin',)]
1517 filable = [kp for kp in kindpats if kp[0] not in (b'rootfilesin',)]
1535 return _roots(filable)
1518 return _roots(filable)
1536
1519
1537
1520
1538 def _prefix(kindpats):
1521 def _prefix(kindpats):
1539 '''Whether all the patterns match a prefix (i.e. recursively)'''
1522 '''Whether all the patterns match a prefix (i.e. recursively)'''
1540 for kind, pat, source in kindpats:
1523 for kind, pat, source in kindpats:
1541 if kind not in (b'path', b'relpath'):
1524 if kind not in (b'path', b'relpath'):
1542 return False
1525 return False
1543 return True
1526 return True
1544
1527
1545
1528
1546 _commentre = None
1529 _commentre = None
1547
1530
1548
1531
1549 def readpatternfile(filepath, warn, sourceinfo=False):
1532 def readpatternfile(filepath, warn, sourceinfo=False):
1550 '''parse a pattern file, returning a list of
1533 '''parse a pattern file, returning a list of
1551 patterns. These patterns should be given to compile()
1534 patterns. These patterns should be given to compile()
1552 to be validated and converted into a match function.
1535 to be validated and converted into a match function.
1553
1536
1554 trailing white space is dropped.
1537 trailing white space is dropped.
1555 the escape character is backslash.
1538 the escape character is backslash.
1556 comments start with #.
1539 comments start with #.
1557 empty lines are skipped.
1540 empty lines are skipped.
1558
1541
1559 lines can be of the following formats:
1542 lines can be of the following formats:
1560
1543
1561 syntax: regexp # defaults following lines to non-rooted regexps
1544 syntax: regexp # defaults following lines to non-rooted regexps
1562 syntax: glob # defaults following lines to non-rooted globs
1545 syntax: glob # defaults following lines to non-rooted globs
1563 re:pattern # non-rooted regular expression
1546 re:pattern # non-rooted regular expression
1564 glob:pattern # non-rooted glob
1547 glob:pattern # non-rooted glob
1565 rootglob:pat # rooted glob (same root as ^ in regexps)
1548 rootglob:pat # rooted glob (same root as ^ in regexps)
1566 pattern # pattern of the current default type
1549 pattern # pattern of the current default type
1567
1550
1568 if sourceinfo is set, returns a list of tuples:
1551 if sourceinfo is set, returns a list of tuples:
1569 (pattern, lineno, originalline).
1552 (pattern, lineno, originalline).
1570 This is useful to debug ignore patterns.
1553 This is useful to debug ignore patterns.
1571 '''
1554 '''
1572
1555
1573 if rustmod is not None:
1556 if rustmod is not None:
1574 result, warnings = rustmod.read_pattern_file(
1557 result, warnings = rustmod.read_pattern_file(
1575 filepath, bool(warn), sourceinfo,
1558 filepath, bool(warn), sourceinfo,
1576 )
1559 )
1577
1560
1578 for warning_params in warnings:
1561 for warning_params in warnings:
1579 # Can't be easily emitted from Rust, because it would require
1562 # Can't be easily emitted from Rust, because it would require
1580 # a mechanism for both gettext and calling the `warn` function.
1563 # a mechanism for both gettext and calling the `warn` function.
1581 warn(_(b"%s: ignoring invalid syntax '%s'\n") % warning_params)
1564 warn(_(b"%s: ignoring invalid syntax '%s'\n") % warning_params)
1582
1565
1583 return result
1566 return result
1584
1567
1585 syntaxes = {
1568 syntaxes = {
1586 b're': b'relre:',
1569 b're': b'relre:',
1587 b'regexp': b'relre:',
1570 b'regexp': b'relre:',
1588 b'glob': b'relglob:',
1571 b'glob': b'relglob:',
1589 b'rootglob': b'rootglob:',
1572 b'rootglob': b'rootglob:',
1590 b'include': b'include',
1573 b'include': b'include',
1591 b'subinclude': b'subinclude',
1574 b'subinclude': b'subinclude',
1592 }
1575 }
1593 syntax = b'relre:'
1576 syntax = b'relre:'
1594 patterns = []
1577 patterns = []
1595
1578
1596 fp = open(filepath, b'rb')
1579 fp = open(filepath, b'rb')
1597 for lineno, line in enumerate(util.iterfile(fp), start=1):
1580 for lineno, line in enumerate(util.iterfile(fp), start=1):
1598 if b"#" in line:
1581 if b"#" in line:
1599 global _commentre
1582 global _commentre
1600 if not _commentre:
1583 if not _commentre:
1601 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
1584 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
1602 # remove comments prefixed by an even number of escapes
1585 # remove comments prefixed by an even number of escapes
1603 m = _commentre.search(line)
1586 m = _commentre.search(line)
1604 if m:
1587 if m:
1605 line = line[: m.end(1)]
1588 line = line[: m.end(1)]
1606 # fixup properly escaped comments that survived the above
1589 # fixup properly escaped comments that survived the above
1607 line = line.replace(b"\\#", b"#")
1590 line = line.replace(b"\\#", b"#")
1608 line = line.rstrip()
1591 line = line.rstrip()
1609 if not line:
1592 if not line:
1610 continue
1593 continue
1611
1594
1612 if line.startswith(b'syntax:'):
1595 if line.startswith(b'syntax:'):
1613 s = line[7:].strip()
1596 s = line[7:].strip()
1614 try:
1597 try:
1615 syntax = syntaxes[s]
1598 syntax = syntaxes[s]
1616 except KeyError:
1599 except KeyError:
1617 if warn:
1600 if warn:
1618 warn(
1601 warn(
1619 _(b"%s: ignoring invalid syntax '%s'\n") % (filepath, s)
1602 _(b"%s: ignoring invalid syntax '%s'\n") % (filepath, s)
1620 )
1603 )
1621 continue
1604 continue
1622
1605
1623 linesyntax = syntax
1606 linesyntax = syntax
1624 for s, rels in pycompat.iteritems(syntaxes):
1607 for s, rels in pycompat.iteritems(syntaxes):
1625 if line.startswith(rels):
1608 if line.startswith(rels):
1626 linesyntax = rels
1609 linesyntax = rels
1627 line = line[len(rels) :]
1610 line = line[len(rels) :]
1628 break
1611 break
1629 elif line.startswith(s + b':'):
1612 elif line.startswith(s + b':'):
1630 linesyntax = rels
1613 linesyntax = rels
1631 line = line[len(s) + 1 :]
1614 line = line[len(s) + 1 :]
1632 break
1615 break
1633 if sourceinfo:
1616 if sourceinfo:
1634 patterns.append((linesyntax + line, lineno, line))
1617 patterns.append((linesyntax + line, lineno, line))
1635 else:
1618 else:
1636 patterns.append(linesyntax + line)
1619 patterns.append(linesyntax + line)
1637 fp.close()
1620 fp.close()
1638 return patterns
1621 return patterns
General Comments 0
You need to be logged in to leave comments. Login now