##// END OF EJS Templates
match: don't util.normpath() cwd...
Matt Harbison -
r44417:8a81fa44 default
parent child Browse files
Show More
@@ -1,1619 +1,1619 b''
1 # match.py - filename matching
1 # match.py - filename matching
2 #
2 #
3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import, print_function
8 from __future__ import absolute_import, print_function
9
9
10 import copy
10 import copy
11 import itertools
11 import itertools
12 import os
12 import os
13 import re
13 import re
14
14
15 from .i18n import _
15 from .i18n import _
16 from .pycompat import open
16 from .pycompat import open
17 from . import (
17 from . import (
18 encoding,
18 encoding,
19 error,
19 error,
20 pathutil,
20 pathutil,
21 policy,
21 policy,
22 pycompat,
22 pycompat,
23 util,
23 util,
24 )
24 )
25 from .utils import stringutil
25 from .utils import stringutil
26
26
27 rustmod = policy.importrust('filepatterns')
27 rustmod = policy.importrust('filepatterns')
28
28
29 allpatternkinds = (
29 allpatternkinds = (
30 b're',
30 b're',
31 b'glob',
31 b'glob',
32 b'path',
32 b'path',
33 b'relglob',
33 b'relglob',
34 b'relpath',
34 b'relpath',
35 b'relre',
35 b'relre',
36 b'rootglob',
36 b'rootglob',
37 b'listfile',
37 b'listfile',
38 b'listfile0',
38 b'listfile0',
39 b'set',
39 b'set',
40 b'include',
40 b'include',
41 b'subinclude',
41 b'subinclude',
42 b'rootfilesin',
42 b'rootfilesin',
43 )
43 )
44 cwdrelativepatternkinds = (b'relpath', b'glob')
44 cwdrelativepatternkinds = (b'relpath', b'glob')
45
45
46 propertycache = util.propertycache
46 propertycache = util.propertycache
47
47
48
48
49 def _rematcher(regex):
49 def _rematcher(regex):
50 '''compile the regexp with the best available regexp engine and return a
50 '''compile the regexp with the best available regexp engine and return a
51 matcher function'''
51 matcher function'''
52 m = util.re.compile(regex)
52 m = util.re.compile(regex)
53 try:
53 try:
54 # slightly faster, provided by facebook's re2 bindings
54 # slightly faster, provided by facebook's re2 bindings
55 return m.test_match
55 return m.test_match
56 except AttributeError:
56 except AttributeError:
57 return m.match
57 return m.match
58
58
59
59
60 def _expandsets(kindpats, ctx=None, listsubrepos=False, badfn=None):
60 def _expandsets(kindpats, ctx=None, listsubrepos=False, badfn=None):
61 '''Returns the kindpats list with the 'set' patterns expanded to matchers'''
61 '''Returns the kindpats list with the 'set' patterns expanded to matchers'''
62 matchers = []
62 matchers = []
63 other = []
63 other = []
64
64
65 for kind, pat, source in kindpats:
65 for kind, pat, source in kindpats:
66 if kind == b'set':
66 if kind == b'set':
67 if ctx is None:
67 if ctx is None:
68 raise error.ProgrammingError(
68 raise error.ProgrammingError(
69 b"fileset expression with no context"
69 b"fileset expression with no context"
70 )
70 )
71 matchers.append(ctx.matchfileset(pat, badfn=badfn))
71 matchers.append(ctx.matchfileset(pat, badfn=badfn))
72
72
73 if listsubrepos:
73 if listsubrepos:
74 for subpath in ctx.substate:
74 for subpath in ctx.substate:
75 sm = ctx.sub(subpath).matchfileset(pat, badfn=badfn)
75 sm = ctx.sub(subpath).matchfileset(pat, badfn=badfn)
76 pm = prefixdirmatcher(subpath, sm, badfn=badfn)
76 pm = prefixdirmatcher(subpath, sm, badfn=badfn)
77 matchers.append(pm)
77 matchers.append(pm)
78
78
79 continue
79 continue
80 other.append((kind, pat, source))
80 other.append((kind, pat, source))
81 return matchers, other
81 return matchers, other
82
82
83
83
84 def _expandsubinclude(kindpats, root):
84 def _expandsubinclude(kindpats, root):
85 '''Returns the list of subinclude matcher args and the kindpats without the
85 '''Returns the list of subinclude matcher args and the kindpats without the
86 subincludes in it.'''
86 subincludes in it.'''
87 relmatchers = []
87 relmatchers = []
88 other = []
88 other = []
89
89
90 for kind, pat, source in kindpats:
90 for kind, pat, source in kindpats:
91 if kind == b'subinclude':
91 if kind == b'subinclude':
92 sourceroot = pathutil.dirname(util.normpath(source))
92 sourceroot = pathutil.dirname(util.normpath(source))
93 pat = util.pconvert(pat)
93 pat = util.pconvert(pat)
94 path = pathutil.join(sourceroot, pat)
94 path = pathutil.join(sourceroot, pat)
95
95
96 newroot = pathutil.dirname(path)
96 newroot = pathutil.dirname(path)
97 matcherargs = (newroot, b'', [], [b'include:%s' % path])
97 matcherargs = (newroot, b'', [], [b'include:%s' % path])
98
98
99 prefix = pathutil.canonpath(root, root, newroot)
99 prefix = pathutil.canonpath(root, root, newroot)
100 if prefix:
100 if prefix:
101 prefix += b'/'
101 prefix += b'/'
102 relmatchers.append((prefix, matcherargs))
102 relmatchers.append((prefix, matcherargs))
103 else:
103 else:
104 other.append((kind, pat, source))
104 other.append((kind, pat, source))
105
105
106 return relmatchers, other
106 return relmatchers, other
107
107
108
108
109 def _kindpatsalwaysmatch(kindpats):
109 def _kindpatsalwaysmatch(kindpats):
110 """"Checks whether the kindspats match everything, as e.g.
110 """"Checks whether the kindspats match everything, as e.g.
111 'relpath:.' does.
111 'relpath:.' does.
112 """
112 """
113 for kind, pat, source in kindpats:
113 for kind, pat, source in kindpats:
114 if pat != b'' or kind not in [b'relpath', b'glob']:
114 if pat != b'' or kind not in [b'relpath', b'glob']:
115 return False
115 return False
116 return True
116 return True
117
117
118
118
119 def _buildkindpatsmatcher(
119 def _buildkindpatsmatcher(
120 matchercls, root, kindpats, ctx=None, listsubrepos=False, badfn=None
120 matchercls, root, kindpats, ctx=None, listsubrepos=False, badfn=None
121 ):
121 ):
122 matchers = []
122 matchers = []
123 fms, kindpats = _expandsets(
123 fms, kindpats = _expandsets(
124 kindpats, ctx=ctx, listsubrepos=listsubrepos, badfn=badfn
124 kindpats, ctx=ctx, listsubrepos=listsubrepos, badfn=badfn
125 )
125 )
126 if kindpats:
126 if kindpats:
127 m = matchercls(root, kindpats, badfn=badfn)
127 m = matchercls(root, kindpats, badfn=badfn)
128 matchers.append(m)
128 matchers.append(m)
129 if fms:
129 if fms:
130 matchers.extend(fms)
130 matchers.extend(fms)
131 if not matchers:
131 if not matchers:
132 return nevermatcher(badfn=badfn)
132 return nevermatcher(badfn=badfn)
133 if len(matchers) == 1:
133 if len(matchers) == 1:
134 return matchers[0]
134 return matchers[0]
135 return unionmatcher(matchers)
135 return unionmatcher(matchers)
136
136
137
137
138 def match(
138 def match(
139 root,
139 root,
140 cwd,
140 cwd,
141 patterns=None,
141 patterns=None,
142 include=None,
142 include=None,
143 exclude=None,
143 exclude=None,
144 default=b'glob',
144 default=b'glob',
145 auditor=None,
145 auditor=None,
146 ctx=None,
146 ctx=None,
147 listsubrepos=False,
147 listsubrepos=False,
148 warn=None,
148 warn=None,
149 badfn=None,
149 badfn=None,
150 icasefs=False,
150 icasefs=False,
151 ):
151 ):
152 r"""build an object to match a set of file patterns
152 r"""build an object to match a set of file patterns
153
153
154 arguments:
154 arguments:
155 root - the canonical root of the tree you're matching against
155 root - the canonical root of the tree you're matching against
156 cwd - the current working directory, if relevant
156 cwd - the current working directory, if relevant
157 patterns - patterns to find
157 patterns - patterns to find
158 include - patterns to include (unless they are excluded)
158 include - patterns to include (unless they are excluded)
159 exclude - patterns to exclude (even if they are included)
159 exclude - patterns to exclude (even if they are included)
160 default - if a pattern in patterns has no explicit type, assume this one
160 default - if a pattern in patterns has no explicit type, assume this one
161 auditor - optional path auditor
161 auditor - optional path auditor
162 ctx - optional changecontext
162 ctx - optional changecontext
163 listsubrepos - if True, recurse into subrepositories
163 listsubrepos - if True, recurse into subrepositories
164 warn - optional function used for printing warnings
164 warn - optional function used for printing warnings
165 badfn - optional bad() callback for this matcher instead of the default
165 badfn - optional bad() callback for this matcher instead of the default
166 icasefs - make a matcher for wdir on case insensitive filesystems, which
166 icasefs - make a matcher for wdir on case insensitive filesystems, which
167 normalizes the given patterns to the case in the filesystem
167 normalizes the given patterns to the case in the filesystem
168
168
169 a pattern is one of:
169 a pattern is one of:
170 'glob:<glob>' - a glob relative to cwd
170 'glob:<glob>' - a glob relative to cwd
171 're:<regexp>' - a regular expression
171 're:<regexp>' - a regular expression
172 'path:<path>' - a path relative to repository root, which is matched
172 'path:<path>' - a path relative to repository root, which is matched
173 recursively
173 recursively
174 'rootfilesin:<path>' - a path relative to repository root, which is
174 'rootfilesin:<path>' - a path relative to repository root, which is
175 matched non-recursively (will not match subdirectories)
175 matched non-recursively (will not match subdirectories)
176 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
176 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
177 'relpath:<path>' - a path relative to cwd
177 'relpath:<path>' - a path relative to cwd
178 'relre:<regexp>' - a regexp that needn't match the start of a name
178 'relre:<regexp>' - a regexp that needn't match the start of a name
179 'set:<fileset>' - a fileset expression
179 'set:<fileset>' - a fileset expression
180 'include:<path>' - a file of patterns to read and include
180 'include:<path>' - a file of patterns to read and include
181 'subinclude:<path>' - a file of patterns to match against files under
181 'subinclude:<path>' - a file of patterns to match against files under
182 the same directory
182 the same directory
183 '<something>' - a pattern of the specified default type
183 '<something>' - a pattern of the specified default type
184
184
185 >>> def _match(root, *args, **kwargs):
185 >>> def _match(root, *args, **kwargs):
186 ... return match(util.localpath(root), *args, **kwargs)
186 ... return match(util.localpath(root), *args, **kwargs)
187
187
188 Usually a patternmatcher is returned:
188 Usually a patternmatcher is returned:
189 >>> _match(b'/foo', b'.', [b're:.*\.c$', b'path:foo/a', b'*.py'])
189 >>> _match(b'/foo', b'.', [b're:.*\.c$', b'path:foo/a', b'*.py'])
190 <patternmatcher patterns='.*\\.c$|foo/a(?:/|$)|[^/]*\\.py$'>
190 <patternmatcher patterns='.*\\.c$|foo/a(?:/|$)|[^/]*\\.py$'>
191
191
192 Combining 'patterns' with 'include' (resp. 'exclude') gives an
192 Combining 'patterns' with 'include' (resp. 'exclude') gives an
193 intersectionmatcher (resp. a differencematcher):
193 intersectionmatcher (resp. a differencematcher):
194 >>> type(_match(b'/foo', b'.', [b're:.*\.c$'], include=[b'path:lib']))
194 >>> type(_match(b'/foo', b'.', [b're:.*\.c$'], include=[b'path:lib']))
195 <class 'mercurial.match.intersectionmatcher'>
195 <class 'mercurial.match.intersectionmatcher'>
196 >>> type(_match(b'/foo', b'.', [b're:.*\.c$'], exclude=[b'path:build']))
196 >>> type(_match(b'/foo', b'.', [b're:.*\.c$'], exclude=[b'path:build']))
197 <class 'mercurial.match.differencematcher'>
197 <class 'mercurial.match.differencematcher'>
198
198
199 Notice that, if 'patterns' is empty, an alwaysmatcher is returned:
199 Notice that, if 'patterns' is empty, an alwaysmatcher is returned:
200 >>> _match(b'/foo', b'.', [])
200 >>> _match(b'/foo', b'.', [])
201 <alwaysmatcher>
201 <alwaysmatcher>
202
202
203 The 'default' argument determines which kind of pattern is assumed if a
203 The 'default' argument determines which kind of pattern is assumed if a
204 pattern has no prefix:
204 pattern has no prefix:
205 >>> _match(b'/foo', b'.', [b'.*\.c$'], default=b're')
205 >>> _match(b'/foo', b'.', [b'.*\.c$'], default=b're')
206 <patternmatcher patterns='.*\\.c$'>
206 <patternmatcher patterns='.*\\.c$'>
207 >>> _match(b'/foo', b'.', [b'main.py'], default=b'relpath')
207 >>> _match(b'/foo', b'.', [b'main.py'], default=b'relpath')
208 <patternmatcher patterns='main\\.py(?:/|$)'>
208 <patternmatcher patterns='main\\.py(?:/|$)'>
209 >>> _match(b'/foo', b'.', [b'main.py'], default=b're')
209 >>> _match(b'/foo', b'.', [b'main.py'], default=b're')
210 <patternmatcher patterns='main.py'>
210 <patternmatcher patterns='main.py'>
211
211
212 The primary use of matchers is to check whether a value (usually a file
212 The primary use of matchers is to check whether a value (usually a file
213 name) matches againset one of the patterns given at initialization. There
213 name) matches againset one of the patterns given at initialization. There
214 are two ways of doing this check.
214 are two ways of doing this check.
215
215
216 >>> m = _match(b'/foo', b'', [b're:.*\.c$', b'relpath:a'])
216 >>> m = _match(b'/foo', b'', [b're:.*\.c$', b'relpath:a'])
217
217
218 1. Calling the matcher with a file name returns True if any pattern
218 1. Calling the matcher with a file name returns True if any pattern
219 matches that file name:
219 matches that file name:
220 >>> m(b'a')
220 >>> m(b'a')
221 True
221 True
222 >>> m(b'main.c')
222 >>> m(b'main.c')
223 True
223 True
224 >>> m(b'test.py')
224 >>> m(b'test.py')
225 False
225 False
226
226
227 2. Using the exact() method only returns True if the file name matches one
227 2. Using the exact() method only returns True if the file name matches one
228 of the exact patterns (i.e. not re: or glob: patterns):
228 of the exact patterns (i.e. not re: or glob: patterns):
229 >>> m.exact(b'a')
229 >>> m.exact(b'a')
230 True
230 True
231 >>> m.exact(b'main.c')
231 >>> m.exact(b'main.c')
232 False
232 False
233 """
233 """
234 assert os.path.isabs(root)
234 assert os.path.isabs(root)
235 cwd = util.normpath(os.path.join(root, cwd))
235 cwd = os.path.join(root, util.localpath(cwd))
236 normalize = _donormalize
236 normalize = _donormalize
237 if icasefs:
237 if icasefs:
238 dirstate = ctx.repo().dirstate
238 dirstate = ctx.repo().dirstate
239 dsnormalize = dirstate.normalize
239 dsnormalize = dirstate.normalize
240
240
241 def normalize(patterns, default, root, cwd, auditor, warn):
241 def normalize(patterns, default, root, cwd, auditor, warn):
242 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
242 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
243 kindpats = []
243 kindpats = []
244 for kind, pats, source in kp:
244 for kind, pats, source in kp:
245 if kind not in (b're', b'relre'): # regex can't be normalized
245 if kind not in (b're', b'relre'): # regex can't be normalized
246 p = pats
246 p = pats
247 pats = dsnormalize(pats)
247 pats = dsnormalize(pats)
248
248
249 # Preserve the original to handle a case only rename.
249 # Preserve the original to handle a case only rename.
250 if p != pats and p in dirstate:
250 if p != pats and p in dirstate:
251 kindpats.append((kind, p, source))
251 kindpats.append((kind, p, source))
252
252
253 kindpats.append((kind, pats, source))
253 kindpats.append((kind, pats, source))
254 return kindpats
254 return kindpats
255
255
256 if patterns:
256 if patterns:
257 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
257 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
258 if _kindpatsalwaysmatch(kindpats):
258 if _kindpatsalwaysmatch(kindpats):
259 m = alwaysmatcher(badfn)
259 m = alwaysmatcher(badfn)
260 else:
260 else:
261 m = _buildkindpatsmatcher(
261 m = _buildkindpatsmatcher(
262 patternmatcher,
262 patternmatcher,
263 root,
263 root,
264 kindpats,
264 kindpats,
265 ctx=ctx,
265 ctx=ctx,
266 listsubrepos=listsubrepos,
266 listsubrepos=listsubrepos,
267 badfn=badfn,
267 badfn=badfn,
268 )
268 )
269 else:
269 else:
270 # It's a little strange that no patterns means to match everything.
270 # It's a little strange that no patterns means to match everything.
271 # Consider changing this to match nothing (probably using nevermatcher).
271 # Consider changing this to match nothing (probably using nevermatcher).
272 m = alwaysmatcher(badfn)
272 m = alwaysmatcher(badfn)
273
273
274 if include:
274 if include:
275 kindpats = normalize(include, b'glob', root, cwd, auditor, warn)
275 kindpats = normalize(include, b'glob', root, cwd, auditor, warn)
276 im = _buildkindpatsmatcher(
276 im = _buildkindpatsmatcher(
277 includematcher,
277 includematcher,
278 root,
278 root,
279 kindpats,
279 kindpats,
280 ctx=ctx,
280 ctx=ctx,
281 listsubrepos=listsubrepos,
281 listsubrepos=listsubrepos,
282 badfn=None,
282 badfn=None,
283 )
283 )
284 m = intersectmatchers(m, im)
284 m = intersectmatchers(m, im)
285 if exclude:
285 if exclude:
286 kindpats = normalize(exclude, b'glob', root, cwd, auditor, warn)
286 kindpats = normalize(exclude, b'glob', root, cwd, auditor, warn)
287 em = _buildkindpatsmatcher(
287 em = _buildkindpatsmatcher(
288 includematcher,
288 includematcher,
289 root,
289 root,
290 kindpats,
290 kindpats,
291 ctx=ctx,
291 ctx=ctx,
292 listsubrepos=listsubrepos,
292 listsubrepos=listsubrepos,
293 badfn=None,
293 badfn=None,
294 )
294 )
295 m = differencematcher(m, em)
295 m = differencematcher(m, em)
296 return m
296 return m
297
297
298
298
299 def exact(files, badfn=None):
299 def exact(files, badfn=None):
300 return exactmatcher(files, badfn=badfn)
300 return exactmatcher(files, badfn=badfn)
301
301
302
302
303 def always(badfn=None):
303 def always(badfn=None):
304 return alwaysmatcher(badfn)
304 return alwaysmatcher(badfn)
305
305
306
306
307 def never(badfn=None):
307 def never(badfn=None):
308 return nevermatcher(badfn)
308 return nevermatcher(badfn)
309
309
310
310
311 def badmatch(match, badfn):
311 def badmatch(match, badfn):
312 """Make a copy of the given matcher, replacing its bad method with the given
312 """Make a copy of the given matcher, replacing its bad method with the given
313 one.
313 one.
314 """
314 """
315 m = copy.copy(match)
315 m = copy.copy(match)
316 m.bad = badfn
316 m.bad = badfn
317 return m
317 return m
318
318
319
319
320 def _donormalize(patterns, default, root, cwd, auditor=None, warn=None):
320 def _donormalize(patterns, default, root, cwd, auditor=None, warn=None):
321 '''Convert 'kind:pat' from the patterns list to tuples with kind and
321 '''Convert 'kind:pat' from the patterns list to tuples with kind and
322 normalized and rooted patterns and with listfiles expanded.'''
322 normalized and rooted patterns and with listfiles expanded.'''
323 kindpats = []
323 kindpats = []
324 for kind, pat in [_patsplit(p, default) for p in patterns]:
324 for kind, pat in [_patsplit(p, default) for p in patterns]:
325 if kind in cwdrelativepatternkinds:
325 if kind in cwdrelativepatternkinds:
326 pat = pathutil.canonpath(root, cwd, pat, auditor=auditor)
326 pat = pathutil.canonpath(root, cwd, pat, auditor=auditor)
327 elif kind in (b'relglob', b'path', b'rootfilesin', b'rootglob'):
327 elif kind in (b'relglob', b'path', b'rootfilesin', b'rootglob'):
328 pat = util.normpath(pat)
328 pat = util.normpath(pat)
329 elif kind in (b'listfile', b'listfile0'):
329 elif kind in (b'listfile', b'listfile0'):
330 try:
330 try:
331 files = util.readfile(pat)
331 files = util.readfile(pat)
332 if kind == b'listfile0':
332 if kind == b'listfile0':
333 files = files.split(b'\0')
333 files = files.split(b'\0')
334 else:
334 else:
335 files = files.splitlines()
335 files = files.splitlines()
336 files = [f for f in files if f]
336 files = [f for f in files if f]
337 except EnvironmentError:
337 except EnvironmentError:
338 raise error.Abort(_(b"unable to read file list (%s)") % pat)
338 raise error.Abort(_(b"unable to read file list (%s)") % pat)
339 for k, p, source in _donormalize(
339 for k, p, source in _donormalize(
340 files, default, root, cwd, auditor, warn
340 files, default, root, cwd, auditor, warn
341 ):
341 ):
342 kindpats.append((k, p, pat))
342 kindpats.append((k, p, pat))
343 continue
343 continue
344 elif kind == b'include':
344 elif kind == b'include':
345 try:
345 try:
346 fullpath = os.path.join(root, util.localpath(pat))
346 fullpath = os.path.join(root, util.localpath(pat))
347 includepats = readpatternfile(fullpath, warn)
347 includepats = readpatternfile(fullpath, warn)
348 for k, p, source in _donormalize(
348 for k, p, source in _donormalize(
349 includepats, default, root, cwd, auditor, warn
349 includepats, default, root, cwd, auditor, warn
350 ):
350 ):
351 kindpats.append((k, p, source or pat))
351 kindpats.append((k, p, source or pat))
352 except error.Abort as inst:
352 except error.Abort as inst:
353 raise error.Abort(
353 raise error.Abort(
354 b'%s: %s'
354 b'%s: %s'
355 % (pat, inst[0]) # pytype: disable=unsupported-operands
355 % (pat, inst[0]) # pytype: disable=unsupported-operands
356 )
356 )
357 except IOError as inst:
357 except IOError as inst:
358 if warn:
358 if warn:
359 warn(
359 warn(
360 _(b"skipping unreadable pattern file '%s': %s\n")
360 _(b"skipping unreadable pattern file '%s': %s\n")
361 % (pat, stringutil.forcebytestr(inst.strerror))
361 % (pat, stringutil.forcebytestr(inst.strerror))
362 )
362 )
363 continue
363 continue
364 # else: re or relre - which cannot be normalized
364 # else: re or relre - which cannot be normalized
365 kindpats.append((kind, pat, b''))
365 kindpats.append((kind, pat, b''))
366 return kindpats
366 return kindpats
367
367
368
368
369 class basematcher(object):
369 class basematcher(object):
370 def __init__(self, badfn=None):
370 def __init__(self, badfn=None):
371 if badfn is not None:
371 if badfn is not None:
372 self.bad = badfn
372 self.bad = badfn
373
373
374 def __call__(self, fn):
374 def __call__(self, fn):
375 return self.matchfn(fn)
375 return self.matchfn(fn)
376
376
377 # Callbacks related to how the matcher is used by dirstate.walk.
377 # Callbacks related to how the matcher is used by dirstate.walk.
378 # Subscribers to these events must monkeypatch the matcher object.
378 # Subscribers to these events must monkeypatch the matcher object.
379 def bad(self, f, msg):
379 def bad(self, f, msg):
380 '''Callback from dirstate.walk for each explicit file that can't be
380 '''Callback from dirstate.walk for each explicit file that can't be
381 found/accessed, with an error message.'''
381 found/accessed, with an error message.'''
382
382
383 # If an traversedir is set, it will be called when a directory discovered
383 # If an traversedir is set, it will be called when a directory discovered
384 # by recursive traversal is visited.
384 # by recursive traversal is visited.
385 traversedir = None
385 traversedir = None
386
386
387 @propertycache
387 @propertycache
388 def _files(self):
388 def _files(self):
389 return []
389 return []
390
390
391 def files(self):
391 def files(self):
392 '''Explicitly listed files or patterns or roots:
392 '''Explicitly listed files or patterns or roots:
393 if no patterns or .always(): empty list,
393 if no patterns or .always(): empty list,
394 if exact: list exact files,
394 if exact: list exact files,
395 if not .anypats(): list all files and dirs,
395 if not .anypats(): list all files and dirs,
396 else: optimal roots'''
396 else: optimal roots'''
397 return self._files
397 return self._files
398
398
399 @propertycache
399 @propertycache
400 def _fileset(self):
400 def _fileset(self):
401 return set(self._files)
401 return set(self._files)
402
402
403 def exact(self, f):
403 def exact(self, f):
404 '''Returns True if f is in .files().'''
404 '''Returns True if f is in .files().'''
405 return f in self._fileset
405 return f in self._fileset
406
406
407 def matchfn(self, f):
407 def matchfn(self, f):
408 return False
408 return False
409
409
410 def visitdir(self, dir):
410 def visitdir(self, dir):
411 '''Decides whether a directory should be visited based on whether it
411 '''Decides whether a directory should be visited based on whether it
412 has potential matches in it or one of its subdirectories. This is
412 has potential matches in it or one of its subdirectories. This is
413 based on the match's primary, included, and excluded patterns.
413 based on the match's primary, included, and excluded patterns.
414
414
415 Returns the string 'all' if the given directory and all subdirectories
415 Returns the string 'all' if the given directory and all subdirectories
416 should be visited. Otherwise returns True or False indicating whether
416 should be visited. Otherwise returns True or False indicating whether
417 the given directory should be visited.
417 the given directory should be visited.
418 '''
418 '''
419 return True
419 return True
420
420
421 def visitchildrenset(self, dir):
421 def visitchildrenset(self, dir):
422 '''Decides whether a directory should be visited based on whether it
422 '''Decides whether a directory should be visited based on whether it
423 has potential matches in it or one of its subdirectories, and
423 has potential matches in it or one of its subdirectories, and
424 potentially lists which subdirectories of that directory should be
424 potentially lists which subdirectories of that directory should be
425 visited. This is based on the match's primary, included, and excluded
425 visited. This is based on the match's primary, included, and excluded
426 patterns.
426 patterns.
427
427
428 This function is very similar to 'visitdir', and the following mapping
428 This function is very similar to 'visitdir', and the following mapping
429 can be applied:
429 can be applied:
430
430
431 visitdir | visitchildrenlist
431 visitdir | visitchildrenlist
432 ----------+-------------------
432 ----------+-------------------
433 False | set()
433 False | set()
434 'all' | 'all'
434 'all' | 'all'
435 True | 'this' OR non-empty set of subdirs -or files- to visit
435 True | 'this' OR non-empty set of subdirs -or files- to visit
436
436
437 Example:
437 Example:
438 Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return
438 Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return
439 the following values (assuming the implementation of visitchildrenset
439 the following values (assuming the implementation of visitchildrenset
440 is capable of recognizing this; some implementations are not).
440 is capable of recognizing this; some implementations are not).
441
441
442 '' -> {'foo', 'qux'}
442 '' -> {'foo', 'qux'}
443 'baz' -> set()
443 'baz' -> set()
444 'foo' -> {'bar'}
444 'foo' -> {'bar'}
445 # Ideally this would be 'all', but since the prefix nature of matchers
445 # Ideally this would be 'all', but since the prefix nature of matchers
446 # is applied to the entire matcher, we have to downgrade this to
446 # is applied to the entire matcher, we have to downgrade this to
447 # 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed
447 # 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed
448 # in.
448 # in.
449 'foo/bar' -> 'this'
449 'foo/bar' -> 'this'
450 'qux' -> 'this'
450 'qux' -> 'this'
451
451
452 Important:
452 Important:
453 Most matchers do not know if they're representing files or
453 Most matchers do not know if they're representing files or
454 directories. They see ['path:dir/f'] and don't know whether 'f' is a
454 directories. They see ['path:dir/f'] and don't know whether 'f' is a
455 file or a directory, so visitchildrenset('dir') for most matchers will
455 file or a directory, so visitchildrenset('dir') for most matchers will
456 return {'f'}, but if the matcher knows it's a file (like exactmatcher
456 return {'f'}, but if the matcher knows it's a file (like exactmatcher
457 does), it may return 'this'. Do not rely on the return being a set
457 does), it may return 'this'. Do not rely on the return being a set
458 indicating that there are no files in this dir to investigate (or
458 indicating that there are no files in this dir to investigate (or
459 equivalently that if there are files to investigate in 'dir' that it
459 equivalently that if there are files to investigate in 'dir' that it
460 will always return 'this').
460 will always return 'this').
461 '''
461 '''
462 return b'this'
462 return b'this'
463
463
464 def always(self):
464 def always(self):
465 '''Matcher will match everything and .files() will be empty --
465 '''Matcher will match everything and .files() will be empty --
466 optimization might be possible.'''
466 optimization might be possible.'''
467 return False
467 return False
468
468
469 def isexact(self):
469 def isexact(self):
470 '''Matcher will match exactly the list of files in .files() --
470 '''Matcher will match exactly the list of files in .files() --
471 optimization might be possible.'''
471 optimization might be possible.'''
472 return False
472 return False
473
473
474 def prefix(self):
474 def prefix(self):
475 '''Matcher will match the paths in .files() recursively --
475 '''Matcher will match the paths in .files() recursively --
476 optimization might be possible.'''
476 optimization might be possible.'''
477 return False
477 return False
478
478
479 def anypats(self):
479 def anypats(self):
480 '''None of .always(), .isexact(), and .prefix() is true --
480 '''None of .always(), .isexact(), and .prefix() is true --
481 optimizations will be difficult.'''
481 optimizations will be difficult.'''
482 return not self.always() and not self.isexact() and not self.prefix()
482 return not self.always() and not self.isexact() and not self.prefix()
483
483
484
484
485 class alwaysmatcher(basematcher):
485 class alwaysmatcher(basematcher):
486 '''Matches everything.'''
486 '''Matches everything.'''
487
487
488 def __init__(self, badfn=None):
488 def __init__(self, badfn=None):
489 super(alwaysmatcher, self).__init__(badfn)
489 super(alwaysmatcher, self).__init__(badfn)
490
490
491 def always(self):
491 def always(self):
492 return True
492 return True
493
493
494 def matchfn(self, f):
494 def matchfn(self, f):
495 return True
495 return True
496
496
497 def visitdir(self, dir):
497 def visitdir(self, dir):
498 return b'all'
498 return b'all'
499
499
500 def visitchildrenset(self, dir):
500 def visitchildrenset(self, dir):
501 return b'all'
501 return b'all'
502
502
503 def __repr__(self):
503 def __repr__(self):
504 return r'<alwaysmatcher>'
504 return r'<alwaysmatcher>'
505
505
506
506
507 class nevermatcher(basematcher):
507 class nevermatcher(basematcher):
508 '''Matches nothing.'''
508 '''Matches nothing.'''
509
509
510 def __init__(self, badfn=None):
510 def __init__(self, badfn=None):
511 super(nevermatcher, self).__init__(badfn)
511 super(nevermatcher, self).__init__(badfn)
512
512
513 # It's a little weird to say that the nevermatcher is an exact matcher
513 # It's a little weird to say that the nevermatcher is an exact matcher
514 # or a prefix matcher, but it seems to make sense to let callers take
514 # or a prefix matcher, but it seems to make sense to let callers take
515 # fast paths based on either. There will be no exact matches, nor any
515 # fast paths based on either. There will be no exact matches, nor any
516 # prefixes (files() returns []), so fast paths iterating over them should
516 # prefixes (files() returns []), so fast paths iterating over them should
517 # be efficient (and correct).
517 # be efficient (and correct).
518 def isexact(self):
518 def isexact(self):
519 return True
519 return True
520
520
521 def prefix(self):
521 def prefix(self):
522 return True
522 return True
523
523
524 def visitdir(self, dir):
524 def visitdir(self, dir):
525 return False
525 return False
526
526
527 def visitchildrenset(self, dir):
527 def visitchildrenset(self, dir):
528 return set()
528 return set()
529
529
530 def __repr__(self):
530 def __repr__(self):
531 return r'<nevermatcher>'
531 return r'<nevermatcher>'
532
532
533
533
534 class predicatematcher(basematcher):
534 class predicatematcher(basematcher):
535 """A matcher adapter for a simple boolean function"""
535 """A matcher adapter for a simple boolean function"""
536
536
537 def __init__(self, predfn, predrepr=None, badfn=None):
537 def __init__(self, predfn, predrepr=None, badfn=None):
538 super(predicatematcher, self).__init__(badfn)
538 super(predicatematcher, self).__init__(badfn)
539 self.matchfn = predfn
539 self.matchfn = predfn
540 self._predrepr = predrepr
540 self._predrepr = predrepr
541
541
542 @encoding.strmethod
542 @encoding.strmethod
543 def __repr__(self):
543 def __repr__(self):
544 s = stringutil.buildrepr(self._predrepr) or pycompat.byterepr(
544 s = stringutil.buildrepr(self._predrepr) or pycompat.byterepr(
545 self.matchfn
545 self.matchfn
546 )
546 )
547 return b'<predicatenmatcher pred=%s>' % s
547 return b'<predicatenmatcher pred=%s>' % s
548
548
549
549
550 class patternmatcher(basematcher):
550 class patternmatcher(basematcher):
551 r"""Matches a set of (kind, pat, source) against a 'root' directory.
551 r"""Matches a set of (kind, pat, source) against a 'root' directory.
552
552
553 >>> kindpats = [
553 >>> kindpats = [
554 ... (b're', br'.*\.c$', b''),
554 ... (b're', br'.*\.c$', b''),
555 ... (b'path', b'foo/a', b''),
555 ... (b'path', b'foo/a', b''),
556 ... (b'relpath', b'b', b''),
556 ... (b'relpath', b'b', b''),
557 ... (b'glob', b'*.h', b''),
557 ... (b'glob', b'*.h', b''),
558 ... ]
558 ... ]
559 >>> m = patternmatcher(b'foo', kindpats)
559 >>> m = patternmatcher(b'foo', kindpats)
560 >>> m(b'main.c') # matches re:.*\.c$
560 >>> m(b'main.c') # matches re:.*\.c$
561 True
561 True
562 >>> m(b'b.txt')
562 >>> m(b'b.txt')
563 False
563 False
564 >>> m(b'foo/a') # matches path:foo/a
564 >>> m(b'foo/a') # matches path:foo/a
565 True
565 True
566 >>> m(b'a') # does not match path:b, since 'root' is 'foo'
566 >>> m(b'a') # does not match path:b, since 'root' is 'foo'
567 False
567 False
568 >>> m(b'b') # matches relpath:b, since 'root' is 'foo'
568 >>> m(b'b') # matches relpath:b, since 'root' is 'foo'
569 True
569 True
570 >>> m(b'lib.h') # matches glob:*.h
570 >>> m(b'lib.h') # matches glob:*.h
571 True
571 True
572
572
573 >>> m.files()
573 >>> m.files()
574 ['', 'foo/a', 'b', '']
574 ['', 'foo/a', 'b', '']
575 >>> m.exact(b'foo/a')
575 >>> m.exact(b'foo/a')
576 True
576 True
577 >>> m.exact(b'b')
577 >>> m.exact(b'b')
578 True
578 True
579 >>> m.exact(b'lib.h') # exact matches are for (rel)path kinds
579 >>> m.exact(b'lib.h') # exact matches are for (rel)path kinds
580 False
580 False
581 """
581 """
582
582
583 def __init__(self, root, kindpats, badfn=None):
583 def __init__(self, root, kindpats, badfn=None):
584 super(patternmatcher, self).__init__(badfn)
584 super(patternmatcher, self).__init__(badfn)
585
585
586 self._files = _explicitfiles(kindpats)
586 self._files = _explicitfiles(kindpats)
587 self._prefix = _prefix(kindpats)
587 self._prefix = _prefix(kindpats)
588 self._pats, self.matchfn = _buildmatch(kindpats, b'$', root)
588 self._pats, self.matchfn = _buildmatch(kindpats, b'$', root)
589
589
590 @propertycache
590 @propertycache
591 def _dirs(self):
591 def _dirs(self):
592 return set(pathutil.dirs(self._fileset))
592 return set(pathutil.dirs(self._fileset))
593
593
594 def visitdir(self, dir):
594 def visitdir(self, dir):
595 if self._prefix and dir in self._fileset:
595 if self._prefix and dir in self._fileset:
596 return b'all'
596 return b'all'
597 return (
597 return (
598 dir in self._fileset
598 dir in self._fileset
599 or dir in self._dirs
599 or dir in self._dirs
600 or any(
600 or any(
601 parentdir in self._fileset
601 parentdir in self._fileset
602 for parentdir in pathutil.finddirs(dir)
602 for parentdir in pathutil.finddirs(dir)
603 )
603 )
604 )
604 )
605
605
606 def visitchildrenset(self, dir):
606 def visitchildrenset(self, dir):
607 ret = self.visitdir(dir)
607 ret = self.visitdir(dir)
608 if ret is True:
608 if ret is True:
609 return b'this'
609 return b'this'
610 elif not ret:
610 elif not ret:
611 return set()
611 return set()
612 assert ret == b'all'
612 assert ret == b'all'
613 return b'all'
613 return b'all'
614
614
615 def prefix(self):
615 def prefix(self):
616 return self._prefix
616 return self._prefix
617
617
618 @encoding.strmethod
618 @encoding.strmethod
619 def __repr__(self):
619 def __repr__(self):
620 return b'<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats)
620 return b'<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats)
621
621
622
622
623 # This is basically a reimplementation of pathutil.dirs that stores the
623 # This is basically a reimplementation of pathutil.dirs that stores the
624 # children instead of just a count of them, plus a small optional optimization
624 # children instead of just a count of them, plus a small optional optimization
625 # to avoid some directories we don't need.
625 # to avoid some directories we don't need.
626 class _dirchildren(object):
626 class _dirchildren(object):
627 def __init__(self, paths, onlyinclude=None):
627 def __init__(self, paths, onlyinclude=None):
628 self._dirs = {}
628 self._dirs = {}
629 self._onlyinclude = onlyinclude or []
629 self._onlyinclude = onlyinclude or []
630 addpath = self.addpath
630 addpath = self.addpath
631 for f in paths:
631 for f in paths:
632 addpath(f)
632 addpath(f)
633
633
634 def addpath(self, path):
634 def addpath(self, path):
635 if path == b'':
635 if path == b'':
636 return
636 return
637 dirs = self._dirs
637 dirs = self._dirs
638 findsplitdirs = _dirchildren._findsplitdirs
638 findsplitdirs = _dirchildren._findsplitdirs
639 for d, b in findsplitdirs(path):
639 for d, b in findsplitdirs(path):
640 if d not in self._onlyinclude:
640 if d not in self._onlyinclude:
641 continue
641 continue
642 dirs.setdefault(d, set()).add(b)
642 dirs.setdefault(d, set()).add(b)
643
643
644 @staticmethod
644 @staticmethod
645 def _findsplitdirs(path):
645 def _findsplitdirs(path):
646 # yields (dirname, basename) tuples, walking back to the root. This is
646 # yields (dirname, basename) tuples, walking back to the root. This is
647 # very similar to pathutil.finddirs, except:
647 # very similar to pathutil.finddirs, except:
648 # - produces a (dirname, basename) tuple, not just 'dirname'
648 # - produces a (dirname, basename) tuple, not just 'dirname'
649 # Unlike manifest._splittopdir, this does not suffix `dirname` with a
649 # Unlike manifest._splittopdir, this does not suffix `dirname` with a
650 # slash.
650 # slash.
651 oldpos = len(path)
651 oldpos = len(path)
652 pos = path.rfind(b'/')
652 pos = path.rfind(b'/')
653 while pos != -1:
653 while pos != -1:
654 yield path[:pos], path[pos + 1 : oldpos]
654 yield path[:pos], path[pos + 1 : oldpos]
655 oldpos = pos
655 oldpos = pos
656 pos = path.rfind(b'/', 0, pos)
656 pos = path.rfind(b'/', 0, pos)
657 yield b'', path[:oldpos]
657 yield b'', path[:oldpos]
658
658
659 def get(self, path):
659 def get(self, path):
660 return self._dirs.get(path, set())
660 return self._dirs.get(path, set())
661
661
662
662
663 class includematcher(basematcher):
663 class includematcher(basematcher):
664 def __init__(self, root, kindpats, badfn=None):
664 def __init__(self, root, kindpats, badfn=None):
665 super(includematcher, self).__init__(badfn)
665 super(includematcher, self).__init__(badfn)
666
666
667 self._pats, self.matchfn = _buildmatch(kindpats, b'(?:/|$)', root)
667 self._pats, self.matchfn = _buildmatch(kindpats, b'(?:/|$)', root)
668 self._prefix = _prefix(kindpats)
668 self._prefix = _prefix(kindpats)
669 roots, dirs, parents = _rootsdirsandparents(kindpats)
669 roots, dirs, parents = _rootsdirsandparents(kindpats)
670 # roots are directories which are recursively included.
670 # roots are directories which are recursively included.
671 self._roots = set(roots)
671 self._roots = set(roots)
672 # dirs are directories which are non-recursively included.
672 # dirs are directories which are non-recursively included.
673 self._dirs = set(dirs)
673 self._dirs = set(dirs)
674 # parents are directories which are non-recursively included because
674 # parents are directories which are non-recursively included because
675 # they are needed to get to items in _dirs or _roots.
675 # they are needed to get to items in _dirs or _roots.
676 self._parents = parents
676 self._parents = parents
677
677
678 def visitdir(self, dir):
678 def visitdir(self, dir):
679 if self._prefix and dir in self._roots:
679 if self._prefix and dir in self._roots:
680 return b'all'
680 return b'all'
681 return (
681 return (
682 dir in self._roots
682 dir in self._roots
683 or dir in self._dirs
683 or dir in self._dirs
684 or dir in self._parents
684 or dir in self._parents
685 or any(
685 or any(
686 parentdir in self._roots for parentdir in pathutil.finddirs(dir)
686 parentdir in self._roots for parentdir in pathutil.finddirs(dir)
687 )
687 )
688 )
688 )
689
689
690 @propertycache
690 @propertycache
691 def _allparentschildren(self):
691 def _allparentschildren(self):
692 # It may seem odd that we add dirs, roots, and parents, and then
692 # It may seem odd that we add dirs, roots, and parents, and then
693 # restrict to only parents. This is to catch the case of:
693 # restrict to only parents. This is to catch the case of:
694 # dirs = ['foo/bar']
694 # dirs = ['foo/bar']
695 # parents = ['foo']
695 # parents = ['foo']
696 # if we asked for the children of 'foo', but had only added
696 # if we asked for the children of 'foo', but had only added
697 # self._parents, we wouldn't be able to respond ['bar'].
697 # self._parents, we wouldn't be able to respond ['bar'].
698 return _dirchildren(
698 return _dirchildren(
699 itertools.chain(self._dirs, self._roots, self._parents),
699 itertools.chain(self._dirs, self._roots, self._parents),
700 onlyinclude=self._parents,
700 onlyinclude=self._parents,
701 )
701 )
702
702
703 def visitchildrenset(self, dir):
703 def visitchildrenset(self, dir):
704 if self._prefix and dir in self._roots:
704 if self._prefix and dir in self._roots:
705 return b'all'
705 return b'all'
706 # Note: this does *not* include the 'dir in self._parents' case from
706 # Note: this does *not* include the 'dir in self._parents' case from
707 # visitdir, that's handled below.
707 # visitdir, that's handled below.
708 if (
708 if (
709 b'' in self._roots
709 b'' in self._roots
710 or dir in self._roots
710 or dir in self._roots
711 or dir in self._dirs
711 or dir in self._dirs
712 or any(
712 or any(
713 parentdir in self._roots for parentdir in pathutil.finddirs(dir)
713 parentdir in self._roots for parentdir in pathutil.finddirs(dir)
714 )
714 )
715 ):
715 ):
716 return b'this'
716 return b'this'
717
717
718 if dir in self._parents:
718 if dir in self._parents:
719 return self._allparentschildren.get(dir) or set()
719 return self._allparentschildren.get(dir) or set()
720 return set()
720 return set()
721
721
722 @encoding.strmethod
722 @encoding.strmethod
723 def __repr__(self):
723 def __repr__(self):
724 return b'<includematcher includes=%r>' % pycompat.bytestr(self._pats)
724 return b'<includematcher includes=%r>' % pycompat.bytestr(self._pats)
725
725
726
726
727 class exactmatcher(basematcher):
727 class exactmatcher(basematcher):
728 r'''Matches the input files exactly. They are interpreted as paths, not
728 r'''Matches the input files exactly. They are interpreted as paths, not
729 patterns (so no kind-prefixes).
729 patterns (so no kind-prefixes).
730
730
731 >>> m = exactmatcher([b'a.txt', br're:.*\.c$'])
731 >>> m = exactmatcher([b'a.txt', br're:.*\.c$'])
732 >>> m(b'a.txt')
732 >>> m(b'a.txt')
733 True
733 True
734 >>> m(b'b.txt')
734 >>> m(b'b.txt')
735 False
735 False
736
736
737 Input files that would be matched are exactly those returned by .files()
737 Input files that would be matched are exactly those returned by .files()
738 >>> m.files()
738 >>> m.files()
739 ['a.txt', 're:.*\\.c$']
739 ['a.txt', 're:.*\\.c$']
740
740
741 So pattern 're:.*\.c$' is not considered as a regex, but as a file name
741 So pattern 're:.*\.c$' is not considered as a regex, but as a file name
742 >>> m(b'main.c')
742 >>> m(b'main.c')
743 False
743 False
744 >>> m(br're:.*\.c$')
744 >>> m(br're:.*\.c$')
745 True
745 True
746 '''
746 '''
747
747
748 def __init__(self, files, badfn=None):
748 def __init__(self, files, badfn=None):
749 super(exactmatcher, self).__init__(badfn)
749 super(exactmatcher, self).__init__(badfn)
750
750
751 if isinstance(files, list):
751 if isinstance(files, list):
752 self._files = files
752 self._files = files
753 else:
753 else:
754 self._files = list(files)
754 self._files = list(files)
755
755
756 matchfn = basematcher.exact
756 matchfn = basematcher.exact
757
757
758 @propertycache
758 @propertycache
759 def _dirs(self):
759 def _dirs(self):
760 return set(pathutil.dirs(self._fileset))
760 return set(pathutil.dirs(self._fileset))
761
761
762 def visitdir(self, dir):
762 def visitdir(self, dir):
763 return dir in self._dirs
763 return dir in self._dirs
764
764
765 def visitchildrenset(self, dir):
765 def visitchildrenset(self, dir):
766 if not self._fileset or dir not in self._dirs:
766 if not self._fileset or dir not in self._dirs:
767 return set()
767 return set()
768
768
769 candidates = self._fileset | self._dirs - {b''}
769 candidates = self._fileset | self._dirs - {b''}
770 if dir != b'':
770 if dir != b'':
771 d = dir + b'/'
771 d = dir + b'/'
772 candidates = set(c[len(d) :] for c in candidates if c.startswith(d))
772 candidates = set(c[len(d) :] for c in candidates if c.startswith(d))
773 # self._dirs includes all of the directories, recursively, so if
773 # self._dirs includes all of the directories, recursively, so if
774 # we're attempting to match foo/bar/baz.txt, it'll have '', 'foo',
774 # we're attempting to match foo/bar/baz.txt, it'll have '', 'foo',
775 # 'foo/bar' in it. Thus we can safely ignore a candidate that has a
775 # 'foo/bar' in it. Thus we can safely ignore a candidate that has a
776 # '/' in it, indicating a it's for a subdir-of-a-subdir; the
776 # '/' in it, indicating a it's for a subdir-of-a-subdir; the
777 # immediate subdir will be in there without a slash.
777 # immediate subdir will be in there without a slash.
778 ret = {c for c in candidates if b'/' not in c}
778 ret = {c for c in candidates if b'/' not in c}
779 # We really do not expect ret to be empty, since that would imply that
779 # We really do not expect ret to be empty, since that would imply that
780 # there's something in _dirs that didn't have a file in _fileset.
780 # there's something in _dirs that didn't have a file in _fileset.
781 assert ret
781 assert ret
782 return ret
782 return ret
783
783
784 def isexact(self):
784 def isexact(self):
785 return True
785 return True
786
786
787 @encoding.strmethod
787 @encoding.strmethod
788 def __repr__(self):
788 def __repr__(self):
789 return b'<exactmatcher files=%r>' % self._files
789 return b'<exactmatcher files=%r>' % self._files
790
790
791
791
792 class differencematcher(basematcher):
792 class differencematcher(basematcher):
793 '''Composes two matchers by matching if the first matches and the second
793 '''Composes two matchers by matching if the first matches and the second
794 does not.
794 does not.
795
795
796 The second matcher's non-matching-attributes (bad, traversedir) are ignored.
796 The second matcher's non-matching-attributes (bad, traversedir) are ignored.
797 '''
797 '''
798
798
799 def __init__(self, m1, m2):
799 def __init__(self, m1, m2):
800 super(differencematcher, self).__init__()
800 super(differencematcher, self).__init__()
801 self._m1 = m1
801 self._m1 = m1
802 self._m2 = m2
802 self._m2 = m2
803 self.bad = m1.bad
803 self.bad = m1.bad
804 self.traversedir = m1.traversedir
804 self.traversedir = m1.traversedir
805
805
806 def matchfn(self, f):
806 def matchfn(self, f):
807 return self._m1(f) and not self._m2(f)
807 return self._m1(f) and not self._m2(f)
808
808
809 @propertycache
809 @propertycache
810 def _files(self):
810 def _files(self):
811 if self.isexact():
811 if self.isexact():
812 return [f for f in self._m1.files() if self(f)]
812 return [f for f in self._m1.files() if self(f)]
813 # If m1 is not an exact matcher, we can't easily figure out the set of
813 # If m1 is not an exact matcher, we can't easily figure out the set of
814 # files, because its files() are not always files. For example, if
814 # files, because its files() are not always files. For example, if
815 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
815 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
816 # want to remove "dir" from the set even though it would match m2,
816 # want to remove "dir" from the set even though it would match m2,
817 # because the "dir" in m1 may not be a file.
817 # because the "dir" in m1 may not be a file.
818 return self._m1.files()
818 return self._m1.files()
819
819
820 def visitdir(self, dir):
820 def visitdir(self, dir):
821 if self._m2.visitdir(dir) == b'all':
821 if self._m2.visitdir(dir) == b'all':
822 return False
822 return False
823 elif not self._m2.visitdir(dir):
823 elif not self._m2.visitdir(dir):
824 # m2 does not match dir, we can return 'all' here if possible
824 # m2 does not match dir, we can return 'all' here if possible
825 return self._m1.visitdir(dir)
825 return self._m1.visitdir(dir)
826 return bool(self._m1.visitdir(dir))
826 return bool(self._m1.visitdir(dir))
827
827
828 def visitchildrenset(self, dir):
828 def visitchildrenset(self, dir):
829 m2_set = self._m2.visitchildrenset(dir)
829 m2_set = self._m2.visitchildrenset(dir)
830 if m2_set == b'all':
830 if m2_set == b'all':
831 return set()
831 return set()
832 m1_set = self._m1.visitchildrenset(dir)
832 m1_set = self._m1.visitchildrenset(dir)
833 # Possible values for m1: 'all', 'this', set(...), set()
833 # Possible values for m1: 'all', 'this', set(...), set()
834 # Possible values for m2: 'this', set(...), set()
834 # Possible values for m2: 'this', set(...), set()
835 # If m2 has nothing under here that we care about, return m1, even if
835 # If m2 has nothing under here that we care about, return m1, even if
836 # it's 'all'. This is a change in behavior from visitdir, which would
836 # it's 'all'. This is a change in behavior from visitdir, which would
837 # return True, not 'all', for some reason.
837 # return True, not 'all', for some reason.
838 if not m2_set:
838 if not m2_set:
839 return m1_set
839 return m1_set
840 if m1_set in [b'all', b'this']:
840 if m1_set in [b'all', b'this']:
841 # Never return 'all' here if m2_set is any kind of non-empty (either
841 # Never return 'all' here if m2_set is any kind of non-empty (either
842 # 'this' or set(foo)), since m2 might return set() for a
842 # 'this' or set(foo)), since m2 might return set() for a
843 # subdirectory.
843 # subdirectory.
844 return b'this'
844 return b'this'
845 # Possible values for m1: set(...), set()
845 # Possible values for m1: set(...), set()
846 # Possible values for m2: 'this', set(...)
846 # Possible values for m2: 'this', set(...)
847 # We ignore m2's set results. They're possibly incorrect:
847 # We ignore m2's set results. They're possibly incorrect:
848 # m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset(''):
848 # m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset(''):
849 # m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd
849 # m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd
850 # return set(), which is *not* correct, we still need to visit 'dir'!
850 # return set(), which is *not* correct, we still need to visit 'dir'!
851 return m1_set
851 return m1_set
852
852
853 def isexact(self):
853 def isexact(self):
854 return self._m1.isexact()
854 return self._m1.isexact()
855
855
856 @encoding.strmethod
856 @encoding.strmethod
857 def __repr__(self):
857 def __repr__(self):
858 return b'<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2)
858 return b'<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2)
859
859
860
860
861 def intersectmatchers(m1, m2):
861 def intersectmatchers(m1, m2):
862 '''Composes two matchers by matching if both of them match.
862 '''Composes two matchers by matching if both of them match.
863
863
864 The second matcher's non-matching-attributes (bad, traversedir) are ignored.
864 The second matcher's non-matching-attributes (bad, traversedir) are ignored.
865 '''
865 '''
866 if m1 is None or m2 is None:
866 if m1 is None or m2 is None:
867 return m1 or m2
867 return m1 or m2
868 if m1.always():
868 if m1.always():
869 m = copy.copy(m2)
869 m = copy.copy(m2)
870 # TODO: Consider encapsulating these things in a class so there's only
870 # TODO: Consider encapsulating these things in a class so there's only
871 # one thing to copy from m1.
871 # one thing to copy from m1.
872 m.bad = m1.bad
872 m.bad = m1.bad
873 m.traversedir = m1.traversedir
873 m.traversedir = m1.traversedir
874 return m
874 return m
875 if m2.always():
875 if m2.always():
876 m = copy.copy(m1)
876 m = copy.copy(m1)
877 return m
877 return m
878 return intersectionmatcher(m1, m2)
878 return intersectionmatcher(m1, m2)
879
879
880
880
881 class intersectionmatcher(basematcher):
881 class intersectionmatcher(basematcher):
882 def __init__(self, m1, m2):
882 def __init__(self, m1, m2):
883 super(intersectionmatcher, self).__init__()
883 super(intersectionmatcher, self).__init__()
884 self._m1 = m1
884 self._m1 = m1
885 self._m2 = m2
885 self._m2 = m2
886 self.bad = m1.bad
886 self.bad = m1.bad
887 self.traversedir = m1.traversedir
887 self.traversedir = m1.traversedir
888
888
889 @propertycache
889 @propertycache
890 def _files(self):
890 def _files(self):
891 if self.isexact():
891 if self.isexact():
892 m1, m2 = self._m1, self._m2
892 m1, m2 = self._m1, self._m2
893 if not m1.isexact():
893 if not m1.isexact():
894 m1, m2 = m2, m1
894 m1, m2 = m2, m1
895 return [f for f in m1.files() if m2(f)]
895 return [f for f in m1.files() if m2(f)]
896 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
896 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
897 # the set of files, because their files() are not always files. For
897 # the set of files, because their files() are not always files. For
898 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
898 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
899 # "path:dir2", we don't want to remove "dir2" from the set.
899 # "path:dir2", we don't want to remove "dir2" from the set.
900 return self._m1.files() + self._m2.files()
900 return self._m1.files() + self._m2.files()
901
901
902 def matchfn(self, f):
902 def matchfn(self, f):
903 return self._m1(f) and self._m2(f)
903 return self._m1(f) and self._m2(f)
904
904
905 def visitdir(self, dir):
905 def visitdir(self, dir):
906 visit1 = self._m1.visitdir(dir)
906 visit1 = self._m1.visitdir(dir)
907 if visit1 == b'all':
907 if visit1 == b'all':
908 return self._m2.visitdir(dir)
908 return self._m2.visitdir(dir)
909 # bool() because visit1=True + visit2='all' should not be 'all'
909 # bool() because visit1=True + visit2='all' should not be 'all'
910 return bool(visit1 and self._m2.visitdir(dir))
910 return bool(visit1 and self._m2.visitdir(dir))
911
911
912 def visitchildrenset(self, dir):
912 def visitchildrenset(self, dir):
913 m1_set = self._m1.visitchildrenset(dir)
913 m1_set = self._m1.visitchildrenset(dir)
914 if not m1_set:
914 if not m1_set:
915 return set()
915 return set()
916 m2_set = self._m2.visitchildrenset(dir)
916 m2_set = self._m2.visitchildrenset(dir)
917 if not m2_set:
917 if not m2_set:
918 return set()
918 return set()
919
919
920 if m1_set == b'all':
920 if m1_set == b'all':
921 return m2_set
921 return m2_set
922 elif m2_set == b'all':
922 elif m2_set == b'all':
923 return m1_set
923 return m1_set
924
924
925 if m1_set == b'this' or m2_set == b'this':
925 if m1_set == b'this' or m2_set == b'this':
926 return b'this'
926 return b'this'
927
927
928 assert isinstance(m1_set, set) and isinstance(m2_set, set)
928 assert isinstance(m1_set, set) and isinstance(m2_set, set)
929 return m1_set.intersection(m2_set)
929 return m1_set.intersection(m2_set)
930
930
931 def always(self):
931 def always(self):
932 return self._m1.always() and self._m2.always()
932 return self._m1.always() and self._m2.always()
933
933
934 def isexact(self):
934 def isexact(self):
935 return self._m1.isexact() or self._m2.isexact()
935 return self._m1.isexact() or self._m2.isexact()
936
936
937 @encoding.strmethod
937 @encoding.strmethod
938 def __repr__(self):
938 def __repr__(self):
939 return b'<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2)
939 return b'<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2)
940
940
941
941
942 class subdirmatcher(basematcher):
942 class subdirmatcher(basematcher):
943 """Adapt a matcher to work on a subdirectory only.
943 """Adapt a matcher to work on a subdirectory only.
944
944
945 The paths are remapped to remove/insert the path as needed:
945 The paths are remapped to remove/insert the path as needed:
946
946
947 >>> from . import pycompat
947 >>> from . import pycompat
948 >>> m1 = match(util.localpath(b'/root'), b'', [b'a.txt', b'sub/b.txt'])
948 >>> m1 = match(util.localpath(b'/root'), b'', [b'a.txt', b'sub/b.txt'])
949 >>> m2 = subdirmatcher(b'sub', m1)
949 >>> m2 = subdirmatcher(b'sub', m1)
950 >>> m2(b'a.txt')
950 >>> m2(b'a.txt')
951 False
951 False
952 >>> m2(b'b.txt')
952 >>> m2(b'b.txt')
953 True
953 True
954 >>> m2.matchfn(b'a.txt')
954 >>> m2.matchfn(b'a.txt')
955 False
955 False
956 >>> m2.matchfn(b'b.txt')
956 >>> m2.matchfn(b'b.txt')
957 True
957 True
958 >>> m2.files()
958 >>> m2.files()
959 ['b.txt']
959 ['b.txt']
960 >>> m2.exact(b'b.txt')
960 >>> m2.exact(b'b.txt')
961 True
961 True
962 >>> def bad(f, msg):
962 >>> def bad(f, msg):
963 ... print(pycompat.sysstr(b"%s: %s" % (f, msg)))
963 ... print(pycompat.sysstr(b"%s: %s" % (f, msg)))
964 >>> m1.bad = bad
964 >>> m1.bad = bad
965 >>> m2.bad(b'x.txt', b'No such file')
965 >>> m2.bad(b'x.txt', b'No such file')
966 sub/x.txt: No such file
966 sub/x.txt: No such file
967 """
967 """
968
968
969 def __init__(self, path, matcher):
969 def __init__(self, path, matcher):
970 super(subdirmatcher, self).__init__()
970 super(subdirmatcher, self).__init__()
971 self._path = path
971 self._path = path
972 self._matcher = matcher
972 self._matcher = matcher
973 self._always = matcher.always()
973 self._always = matcher.always()
974
974
975 self._files = [
975 self._files = [
976 f[len(path) + 1 :]
976 f[len(path) + 1 :]
977 for f in matcher._files
977 for f in matcher._files
978 if f.startswith(path + b"/")
978 if f.startswith(path + b"/")
979 ]
979 ]
980
980
981 # If the parent repo had a path to this subrepo and the matcher is
981 # If the parent repo had a path to this subrepo and the matcher is
982 # a prefix matcher, this submatcher always matches.
982 # a prefix matcher, this submatcher always matches.
983 if matcher.prefix():
983 if matcher.prefix():
984 self._always = any(f == path for f in matcher._files)
984 self._always = any(f == path for f in matcher._files)
985
985
986 def bad(self, f, msg):
986 def bad(self, f, msg):
987 self._matcher.bad(self._path + b"/" + f, msg)
987 self._matcher.bad(self._path + b"/" + f, msg)
988
988
989 def matchfn(self, f):
989 def matchfn(self, f):
990 # Some information is lost in the superclass's constructor, so we
990 # Some information is lost in the superclass's constructor, so we
991 # can not accurately create the matching function for the subdirectory
991 # can not accurately create the matching function for the subdirectory
992 # from the inputs. Instead, we override matchfn() and visitdir() to
992 # from the inputs. Instead, we override matchfn() and visitdir() to
993 # call the original matcher with the subdirectory path prepended.
993 # call the original matcher with the subdirectory path prepended.
994 return self._matcher.matchfn(self._path + b"/" + f)
994 return self._matcher.matchfn(self._path + b"/" + f)
995
995
996 def visitdir(self, dir):
996 def visitdir(self, dir):
997 if dir == b'':
997 if dir == b'':
998 dir = self._path
998 dir = self._path
999 else:
999 else:
1000 dir = self._path + b"/" + dir
1000 dir = self._path + b"/" + dir
1001 return self._matcher.visitdir(dir)
1001 return self._matcher.visitdir(dir)
1002
1002
1003 def visitchildrenset(self, dir):
1003 def visitchildrenset(self, dir):
1004 if dir == b'':
1004 if dir == b'':
1005 dir = self._path
1005 dir = self._path
1006 else:
1006 else:
1007 dir = self._path + b"/" + dir
1007 dir = self._path + b"/" + dir
1008 return self._matcher.visitchildrenset(dir)
1008 return self._matcher.visitchildrenset(dir)
1009
1009
1010 def always(self):
1010 def always(self):
1011 return self._always
1011 return self._always
1012
1012
1013 def prefix(self):
1013 def prefix(self):
1014 return self._matcher.prefix() and not self._always
1014 return self._matcher.prefix() and not self._always
1015
1015
1016 @encoding.strmethod
1016 @encoding.strmethod
1017 def __repr__(self):
1017 def __repr__(self):
1018 return b'<subdirmatcher path=%r, matcher=%r>' % (
1018 return b'<subdirmatcher path=%r, matcher=%r>' % (
1019 self._path,
1019 self._path,
1020 self._matcher,
1020 self._matcher,
1021 )
1021 )
1022
1022
1023
1023
1024 class prefixdirmatcher(basematcher):
1024 class prefixdirmatcher(basematcher):
1025 """Adapt a matcher to work on a parent directory.
1025 """Adapt a matcher to work on a parent directory.
1026
1026
1027 The matcher's non-matching-attributes (bad, traversedir) are ignored.
1027 The matcher's non-matching-attributes (bad, traversedir) are ignored.
1028
1028
1029 The prefix path should usually be the relative path from the root of
1029 The prefix path should usually be the relative path from the root of
1030 this matcher to the root of the wrapped matcher.
1030 this matcher to the root of the wrapped matcher.
1031
1031
1032 >>> m1 = match(util.localpath(b'/root/d/e'), b'f', [b'../a.txt', b'b.txt'], auditor=lambda name: None)
1032 >>> m1 = match(util.localpath(b'/root/d/e'), b'f', [b'../a.txt', b'b.txt'], auditor=lambda name: None)
1033 >>> m2 = prefixdirmatcher(b'd/e', m1)
1033 >>> m2 = prefixdirmatcher(b'd/e', m1)
1034 >>> m2(b'a.txt')
1034 >>> m2(b'a.txt')
1035 False
1035 False
1036 >>> m2(b'd/e/a.txt')
1036 >>> m2(b'd/e/a.txt')
1037 True
1037 True
1038 >>> m2(b'd/e/b.txt')
1038 >>> m2(b'd/e/b.txt')
1039 False
1039 False
1040 >>> m2.files()
1040 >>> m2.files()
1041 ['d/e/a.txt', 'd/e/f/b.txt']
1041 ['d/e/a.txt', 'd/e/f/b.txt']
1042 >>> m2.exact(b'd/e/a.txt')
1042 >>> m2.exact(b'd/e/a.txt')
1043 True
1043 True
1044 >>> m2.visitdir(b'd')
1044 >>> m2.visitdir(b'd')
1045 True
1045 True
1046 >>> m2.visitdir(b'd/e')
1046 >>> m2.visitdir(b'd/e')
1047 True
1047 True
1048 >>> m2.visitdir(b'd/e/f')
1048 >>> m2.visitdir(b'd/e/f')
1049 True
1049 True
1050 >>> m2.visitdir(b'd/e/g')
1050 >>> m2.visitdir(b'd/e/g')
1051 False
1051 False
1052 >>> m2.visitdir(b'd/ef')
1052 >>> m2.visitdir(b'd/ef')
1053 False
1053 False
1054 """
1054 """
1055
1055
1056 def __init__(self, path, matcher, badfn=None):
1056 def __init__(self, path, matcher, badfn=None):
1057 super(prefixdirmatcher, self).__init__(badfn)
1057 super(prefixdirmatcher, self).__init__(badfn)
1058 if not path:
1058 if not path:
1059 raise error.ProgrammingError(b'prefix path must not be empty')
1059 raise error.ProgrammingError(b'prefix path must not be empty')
1060 self._path = path
1060 self._path = path
1061 self._pathprefix = path + b'/'
1061 self._pathprefix = path + b'/'
1062 self._matcher = matcher
1062 self._matcher = matcher
1063
1063
1064 @propertycache
1064 @propertycache
1065 def _files(self):
1065 def _files(self):
1066 return [self._pathprefix + f for f in self._matcher._files]
1066 return [self._pathprefix + f for f in self._matcher._files]
1067
1067
1068 def matchfn(self, f):
1068 def matchfn(self, f):
1069 if not f.startswith(self._pathprefix):
1069 if not f.startswith(self._pathprefix):
1070 return False
1070 return False
1071 return self._matcher.matchfn(f[len(self._pathprefix) :])
1071 return self._matcher.matchfn(f[len(self._pathprefix) :])
1072
1072
1073 @propertycache
1073 @propertycache
1074 def _pathdirs(self):
1074 def _pathdirs(self):
1075 return set(pathutil.finddirs(self._path))
1075 return set(pathutil.finddirs(self._path))
1076
1076
1077 def visitdir(self, dir):
1077 def visitdir(self, dir):
1078 if dir == self._path:
1078 if dir == self._path:
1079 return self._matcher.visitdir(b'')
1079 return self._matcher.visitdir(b'')
1080 if dir.startswith(self._pathprefix):
1080 if dir.startswith(self._pathprefix):
1081 return self._matcher.visitdir(dir[len(self._pathprefix) :])
1081 return self._matcher.visitdir(dir[len(self._pathprefix) :])
1082 return dir in self._pathdirs
1082 return dir in self._pathdirs
1083
1083
1084 def visitchildrenset(self, dir):
1084 def visitchildrenset(self, dir):
1085 if dir == self._path:
1085 if dir == self._path:
1086 return self._matcher.visitchildrenset(b'')
1086 return self._matcher.visitchildrenset(b'')
1087 if dir.startswith(self._pathprefix):
1087 if dir.startswith(self._pathprefix):
1088 return self._matcher.visitchildrenset(dir[len(self._pathprefix) :])
1088 return self._matcher.visitchildrenset(dir[len(self._pathprefix) :])
1089 if dir in self._pathdirs:
1089 if dir in self._pathdirs:
1090 return b'this'
1090 return b'this'
1091 return set()
1091 return set()
1092
1092
1093 def isexact(self):
1093 def isexact(self):
1094 return self._matcher.isexact()
1094 return self._matcher.isexact()
1095
1095
1096 def prefix(self):
1096 def prefix(self):
1097 return self._matcher.prefix()
1097 return self._matcher.prefix()
1098
1098
1099 @encoding.strmethod
1099 @encoding.strmethod
1100 def __repr__(self):
1100 def __repr__(self):
1101 return b'<prefixdirmatcher path=%r, matcher=%r>' % (
1101 return b'<prefixdirmatcher path=%r, matcher=%r>' % (
1102 pycompat.bytestr(self._path),
1102 pycompat.bytestr(self._path),
1103 self._matcher,
1103 self._matcher,
1104 )
1104 )
1105
1105
1106
1106
1107 class unionmatcher(basematcher):
1107 class unionmatcher(basematcher):
1108 """A matcher that is the union of several matchers.
1108 """A matcher that is the union of several matchers.
1109
1109
1110 The non-matching-attributes (bad, traversedir) are taken from the first
1110 The non-matching-attributes (bad, traversedir) are taken from the first
1111 matcher.
1111 matcher.
1112 """
1112 """
1113
1113
1114 def __init__(self, matchers):
1114 def __init__(self, matchers):
1115 m1 = matchers[0]
1115 m1 = matchers[0]
1116 super(unionmatcher, self).__init__()
1116 super(unionmatcher, self).__init__()
1117 self.traversedir = m1.traversedir
1117 self.traversedir = m1.traversedir
1118 self._matchers = matchers
1118 self._matchers = matchers
1119
1119
1120 def matchfn(self, f):
1120 def matchfn(self, f):
1121 for match in self._matchers:
1121 for match in self._matchers:
1122 if match(f):
1122 if match(f):
1123 return True
1123 return True
1124 return False
1124 return False
1125
1125
1126 def visitdir(self, dir):
1126 def visitdir(self, dir):
1127 r = False
1127 r = False
1128 for m in self._matchers:
1128 for m in self._matchers:
1129 v = m.visitdir(dir)
1129 v = m.visitdir(dir)
1130 if v == b'all':
1130 if v == b'all':
1131 return v
1131 return v
1132 r |= v
1132 r |= v
1133 return r
1133 return r
1134
1134
1135 def visitchildrenset(self, dir):
1135 def visitchildrenset(self, dir):
1136 r = set()
1136 r = set()
1137 this = False
1137 this = False
1138 for m in self._matchers:
1138 for m in self._matchers:
1139 v = m.visitchildrenset(dir)
1139 v = m.visitchildrenset(dir)
1140 if not v:
1140 if not v:
1141 continue
1141 continue
1142 if v == b'all':
1142 if v == b'all':
1143 return v
1143 return v
1144 if this or v == b'this':
1144 if this or v == b'this':
1145 this = True
1145 this = True
1146 # don't break, we might have an 'all' in here.
1146 # don't break, we might have an 'all' in here.
1147 continue
1147 continue
1148 assert isinstance(v, set)
1148 assert isinstance(v, set)
1149 r = r.union(v)
1149 r = r.union(v)
1150 if this:
1150 if this:
1151 return b'this'
1151 return b'this'
1152 return r
1152 return r
1153
1153
1154 @encoding.strmethod
1154 @encoding.strmethod
1155 def __repr__(self):
1155 def __repr__(self):
1156 return b'<unionmatcher matchers=%r>' % self._matchers
1156 return b'<unionmatcher matchers=%r>' % self._matchers
1157
1157
1158
1158
1159 def patkind(pattern, default=None):
1159 def patkind(pattern, default=None):
1160 r'''If pattern is 'kind:pat' with a known kind, return kind.
1160 r'''If pattern is 'kind:pat' with a known kind, return kind.
1161
1161
1162 >>> patkind(br're:.*\.c$')
1162 >>> patkind(br're:.*\.c$')
1163 're'
1163 're'
1164 >>> patkind(b'glob:*.c')
1164 >>> patkind(b'glob:*.c')
1165 'glob'
1165 'glob'
1166 >>> patkind(b'relpath:test.py')
1166 >>> patkind(b'relpath:test.py')
1167 'relpath'
1167 'relpath'
1168 >>> patkind(b'main.py')
1168 >>> patkind(b'main.py')
1169 >>> patkind(b'main.py', default=b're')
1169 >>> patkind(b'main.py', default=b're')
1170 're'
1170 're'
1171 '''
1171 '''
1172 return _patsplit(pattern, default)[0]
1172 return _patsplit(pattern, default)[0]
1173
1173
1174
1174
1175 def _patsplit(pattern, default):
1175 def _patsplit(pattern, default):
1176 """Split a string into the optional pattern kind prefix and the actual
1176 """Split a string into the optional pattern kind prefix and the actual
1177 pattern."""
1177 pattern."""
1178 if b':' in pattern:
1178 if b':' in pattern:
1179 kind, pat = pattern.split(b':', 1)
1179 kind, pat = pattern.split(b':', 1)
1180 if kind in allpatternkinds:
1180 if kind in allpatternkinds:
1181 return kind, pat
1181 return kind, pat
1182 return default, pattern
1182 return default, pattern
1183
1183
1184
1184
1185 def _globre(pat):
1185 def _globre(pat):
1186 r'''Convert an extended glob string to a regexp string.
1186 r'''Convert an extended glob string to a regexp string.
1187
1187
1188 >>> from . import pycompat
1188 >>> from . import pycompat
1189 >>> def bprint(s):
1189 >>> def bprint(s):
1190 ... print(pycompat.sysstr(s))
1190 ... print(pycompat.sysstr(s))
1191 >>> bprint(_globre(br'?'))
1191 >>> bprint(_globre(br'?'))
1192 .
1192 .
1193 >>> bprint(_globre(br'*'))
1193 >>> bprint(_globre(br'*'))
1194 [^/]*
1194 [^/]*
1195 >>> bprint(_globre(br'**'))
1195 >>> bprint(_globre(br'**'))
1196 .*
1196 .*
1197 >>> bprint(_globre(br'**/a'))
1197 >>> bprint(_globre(br'**/a'))
1198 (?:.*/)?a
1198 (?:.*/)?a
1199 >>> bprint(_globre(br'a/**/b'))
1199 >>> bprint(_globre(br'a/**/b'))
1200 a/(?:.*/)?b
1200 a/(?:.*/)?b
1201 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
1201 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
1202 [a*?!^][\^b][^c]
1202 [a*?!^][\^b][^c]
1203 >>> bprint(_globre(br'{a,b}'))
1203 >>> bprint(_globre(br'{a,b}'))
1204 (?:a|b)
1204 (?:a|b)
1205 >>> bprint(_globre(br'.\*\?'))
1205 >>> bprint(_globre(br'.\*\?'))
1206 \.\*\?
1206 \.\*\?
1207 '''
1207 '''
1208 i, n = 0, len(pat)
1208 i, n = 0, len(pat)
1209 res = b''
1209 res = b''
1210 group = 0
1210 group = 0
1211 escape = util.stringutil.regexbytesescapemap.get
1211 escape = util.stringutil.regexbytesescapemap.get
1212
1212
1213 def peek():
1213 def peek():
1214 return i < n and pat[i : i + 1]
1214 return i < n and pat[i : i + 1]
1215
1215
1216 while i < n:
1216 while i < n:
1217 c = pat[i : i + 1]
1217 c = pat[i : i + 1]
1218 i += 1
1218 i += 1
1219 if c not in b'*?[{},\\':
1219 if c not in b'*?[{},\\':
1220 res += escape(c, c)
1220 res += escape(c, c)
1221 elif c == b'*':
1221 elif c == b'*':
1222 if peek() == b'*':
1222 if peek() == b'*':
1223 i += 1
1223 i += 1
1224 if peek() == b'/':
1224 if peek() == b'/':
1225 i += 1
1225 i += 1
1226 res += b'(?:.*/)?'
1226 res += b'(?:.*/)?'
1227 else:
1227 else:
1228 res += b'.*'
1228 res += b'.*'
1229 else:
1229 else:
1230 res += b'[^/]*'
1230 res += b'[^/]*'
1231 elif c == b'?':
1231 elif c == b'?':
1232 res += b'.'
1232 res += b'.'
1233 elif c == b'[':
1233 elif c == b'[':
1234 j = i
1234 j = i
1235 if j < n and pat[j : j + 1] in b'!]':
1235 if j < n and pat[j : j + 1] in b'!]':
1236 j += 1
1236 j += 1
1237 while j < n and pat[j : j + 1] != b']':
1237 while j < n and pat[j : j + 1] != b']':
1238 j += 1
1238 j += 1
1239 if j >= n:
1239 if j >= n:
1240 res += b'\\['
1240 res += b'\\['
1241 else:
1241 else:
1242 stuff = pat[i:j].replace(b'\\', b'\\\\')
1242 stuff = pat[i:j].replace(b'\\', b'\\\\')
1243 i = j + 1
1243 i = j + 1
1244 if stuff[0:1] == b'!':
1244 if stuff[0:1] == b'!':
1245 stuff = b'^' + stuff[1:]
1245 stuff = b'^' + stuff[1:]
1246 elif stuff[0:1] == b'^':
1246 elif stuff[0:1] == b'^':
1247 stuff = b'\\' + stuff
1247 stuff = b'\\' + stuff
1248 res = b'%s[%s]' % (res, stuff)
1248 res = b'%s[%s]' % (res, stuff)
1249 elif c == b'{':
1249 elif c == b'{':
1250 group += 1
1250 group += 1
1251 res += b'(?:'
1251 res += b'(?:'
1252 elif c == b'}' and group:
1252 elif c == b'}' and group:
1253 res += b')'
1253 res += b')'
1254 group -= 1
1254 group -= 1
1255 elif c == b',' and group:
1255 elif c == b',' and group:
1256 res += b'|'
1256 res += b'|'
1257 elif c == b'\\':
1257 elif c == b'\\':
1258 p = peek()
1258 p = peek()
1259 if p:
1259 if p:
1260 i += 1
1260 i += 1
1261 res += escape(p, p)
1261 res += escape(p, p)
1262 else:
1262 else:
1263 res += escape(c, c)
1263 res += escape(c, c)
1264 else:
1264 else:
1265 res += escape(c, c)
1265 res += escape(c, c)
1266 return res
1266 return res
1267
1267
1268
1268
1269 def _regex(kind, pat, globsuffix):
1269 def _regex(kind, pat, globsuffix):
1270 '''Convert a (normalized) pattern of any kind into a
1270 '''Convert a (normalized) pattern of any kind into a
1271 regular expression.
1271 regular expression.
1272 globsuffix is appended to the regexp of globs.'''
1272 globsuffix is appended to the regexp of globs.'''
1273
1273
1274 if rustmod is not None:
1274 if rustmod is not None:
1275 try:
1275 try:
1276 return rustmod.build_single_regex(kind, pat, globsuffix)
1276 return rustmod.build_single_regex(kind, pat, globsuffix)
1277 except rustmod.PatternError:
1277 except rustmod.PatternError:
1278 raise error.ProgrammingError(
1278 raise error.ProgrammingError(
1279 b'not a regex pattern: %s:%s' % (kind, pat)
1279 b'not a regex pattern: %s:%s' % (kind, pat)
1280 )
1280 )
1281
1281
1282 if not pat and kind in (b'glob', b'relpath'):
1282 if not pat and kind in (b'glob', b'relpath'):
1283 return b''
1283 return b''
1284 if kind == b're':
1284 if kind == b're':
1285 return pat
1285 return pat
1286 if kind in (b'path', b'relpath'):
1286 if kind in (b'path', b'relpath'):
1287 if pat == b'.':
1287 if pat == b'.':
1288 return b''
1288 return b''
1289 return util.stringutil.reescape(pat) + b'(?:/|$)'
1289 return util.stringutil.reescape(pat) + b'(?:/|$)'
1290 if kind == b'rootfilesin':
1290 if kind == b'rootfilesin':
1291 if pat == b'.':
1291 if pat == b'.':
1292 escaped = b''
1292 escaped = b''
1293 else:
1293 else:
1294 # Pattern is a directory name.
1294 # Pattern is a directory name.
1295 escaped = util.stringutil.reescape(pat) + b'/'
1295 escaped = util.stringutil.reescape(pat) + b'/'
1296 # Anything after the pattern must be a non-directory.
1296 # Anything after the pattern must be a non-directory.
1297 return escaped + b'[^/]+$'
1297 return escaped + b'[^/]+$'
1298 if kind == b'relglob':
1298 if kind == b'relglob':
1299 globre = _globre(pat)
1299 globre = _globre(pat)
1300 if globre.startswith(b'[^/]*'):
1300 if globre.startswith(b'[^/]*'):
1301 # When pat has the form *XYZ (common), make the returned regex more
1301 # When pat has the form *XYZ (common), make the returned regex more
1302 # legible by returning the regex for **XYZ instead of **/*XYZ.
1302 # legible by returning the regex for **XYZ instead of **/*XYZ.
1303 return b'.*' + globre[len(b'[^/]*') :] + globsuffix
1303 return b'.*' + globre[len(b'[^/]*') :] + globsuffix
1304 return b'(?:|.*/)' + globre + globsuffix
1304 return b'(?:|.*/)' + globre + globsuffix
1305 if kind == b'relre':
1305 if kind == b'relre':
1306 if pat.startswith(b'^'):
1306 if pat.startswith(b'^'):
1307 return pat
1307 return pat
1308 return b'.*' + pat
1308 return b'.*' + pat
1309 if kind in (b'glob', b'rootglob'):
1309 if kind in (b'glob', b'rootglob'):
1310 return _globre(pat) + globsuffix
1310 return _globre(pat) + globsuffix
1311 raise error.ProgrammingError(b'not a regex pattern: %s:%s' % (kind, pat))
1311 raise error.ProgrammingError(b'not a regex pattern: %s:%s' % (kind, pat))
1312
1312
1313
1313
1314 def _buildmatch(kindpats, globsuffix, root):
1314 def _buildmatch(kindpats, globsuffix, root):
1315 '''Return regexp string and a matcher function for kindpats.
1315 '''Return regexp string and a matcher function for kindpats.
1316 globsuffix is appended to the regexp of globs.'''
1316 globsuffix is appended to the regexp of globs.'''
1317 matchfuncs = []
1317 matchfuncs = []
1318
1318
1319 subincludes, kindpats = _expandsubinclude(kindpats, root)
1319 subincludes, kindpats = _expandsubinclude(kindpats, root)
1320 if subincludes:
1320 if subincludes:
1321 submatchers = {}
1321 submatchers = {}
1322
1322
1323 def matchsubinclude(f):
1323 def matchsubinclude(f):
1324 for prefix, matcherargs in subincludes:
1324 for prefix, matcherargs in subincludes:
1325 if f.startswith(prefix):
1325 if f.startswith(prefix):
1326 mf = submatchers.get(prefix)
1326 mf = submatchers.get(prefix)
1327 if mf is None:
1327 if mf is None:
1328 mf = match(*matcherargs)
1328 mf = match(*matcherargs)
1329 submatchers[prefix] = mf
1329 submatchers[prefix] = mf
1330
1330
1331 if mf(f[len(prefix) :]):
1331 if mf(f[len(prefix) :]):
1332 return True
1332 return True
1333 return False
1333 return False
1334
1334
1335 matchfuncs.append(matchsubinclude)
1335 matchfuncs.append(matchsubinclude)
1336
1336
1337 regex = b''
1337 regex = b''
1338 if kindpats:
1338 if kindpats:
1339 if all(k == b'rootfilesin' for k, p, s in kindpats):
1339 if all(k == b'rootfilesin' for k, p, s in kindpats):
1340 dirs = {p for k, p, s in kindpats}
1340 dirs = {p for k, p, s in kindpats}
1341
1341
1342 def mf(f):
1342 def mf(f):
1343 i = f.rfind(b'/')
1343 i = f.rfind(b'/')
1344 if i >= 0:
1344 if i >= 0:
1345 dir = f[:i]
1345 dir = f[:i]
1346 else:
1346 else:
1347 dir = b'.'
1347 dir = b'.'
1348 return dir in dirs
1348 return dir in dirs
1349
1349
1350 regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))
1350 regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))
1351 matchfuncs.append(mf)
1351 matchfuncs.append(mf)
1352 else:
1352 else:
1353 regex, mf = _buildregexmatch(kindpats, globsuffix)
1353 regex, mf = _buildregexmatch(kindpats, globsuffix)
1354 matchfuncs.append(mf)
1354 matchfuncs.append(mf)
1355
1355
1356 if len(matchfuncs) == 1:
1356 if len(matchfuncs) == 1:
1357 return regex, matchfuncs[0]
1357 return regex, matchfuncs[0]
1358 else:
1358 else:
1359 return regex, lambda f: any(mf(f) for mf in matchfuncs)
1359 return regex, lambda f: any(mf(f) for mf in matchfuncs)
1360
1360
1361
1361
1362 MAX_RE_SIZE = 20000
1362 MAX_RE_SIZE = 20000
1363
1363
1364
1364
1365 def _joinregexes(regexps):
1365 def _joinregexes(regexps):
1366 """gather multiple regular expressions into a single one"""
1366 """gather multiple regular expressions into a single one"""
1367 return b'|'.join(regexps)
1367 return b'|'.join(regexps)
1368
1368
1369
1369
1370 def _buildregexmatch(kindpats, globsuffix):
1370 def _buildregexmatch(kindpats, globsuffix):
1371 """Build a match function from a list of kinds and kindpats,
1371 """Build a match function from a list of kinds and kindpats,
1372 return regexp string and a matcher function.
1372 return regexp string and a matcher function.
1373
1373
1374 Test too large input
1374 Test too large input
1375 >>> _buildregexmatch([
1375 >>> _buildregexmatch([
1376 ... (b'relglob', b'?' * MAX_RE_SIZE, b'')
1376 ... (b'relglob', b'?' * MAX_RE_SIZE, b'')
1377 ... ], b'$')
1377 ... ], b'$')
1378 Traceback (most recent call last):
1378 Traceback (most recent call last):
1379 ...
1379 ...
1380 Abort: matcher pattern is too long (20009 bytes)
1380 Abort: matcher pattern is too long (20009 bytes)
1381 """
1381 """
1382 try:
1382 try:
1383 allgroups = []
1383 allgroups = []
1384 regexps = [_regex(k, p, globsuffix) for (k, p, s) in kindpats]
1384 regexps = [_regex(k, p, globsuffix) for (k, p, s) in kindpats]
1385 fullregexp = _joinregexes(regexps)
1385 fullregexp = _joinregexes(regexps)
1386
1386
1387 startidx = 0
1387 startidx = 0
1388 groupsize = 0
1388 groupsize = 0
1389 for idx, r in enumerate(regexps):
1389 for idx, r in enumerate(regexps):
1390 piecesize = len(r)
1390 piecesize = len(r)
1391 if piecesize > MAX_RE_SIZE:
1391 if piecesize > MAX_RE_SIZE:
1392 msg = _(b"matcher pattern is too long (%d bytes)") % piecesize
1392 msg = _(b"matcher pattern is too long (%d bytes)") % piecesize
1393 raise error.Abort(msg)
1393 raise error.Abort(msg)
1394 elif (groupsize + piecesize) > MAX_RE_SIZE:
1394 elif (groupsize + piecesize) > MAX_RE_SIZE:
1395 group = regexps[startidx:idx]
1395 group = regexps[startidx:idx]
1396 allgroups.append(_joinregexes(group))
1396 allgroups.append(_joinregexes(group))
1397 startidx = idx
1397 startidx = idx
1398 groupsize = 0
1398 groupsize = 0
1399 groupsize += piecesize + 1
1399 groupsize += piecesize + 1
1400
1400
1401 if startidx == 0:
1401 if startidx == 0:
1402 matcher = _rematcher(fullregexp)
1402 matcher = _rematcher(fullregexp)
1403 func = lambda s: bool(matcher(s))
1403 func = lambda s: bool(matcher(s))
1404 else:
1404 else:
1405 group = regexps[startidx:]
1405 group = regexps[startidx:]
1406 allgroups.append(_joinregexes(group))
1406 allgroups.append(_joinregexes(group))
1407 allmatchers = [_rematcher(g) for g in allgroups]
1407 allmatchers = [_rematcher(g) for g in allgroups]
1408 func = lambda s: any(m(s) for m in allmatchers)
1408 func = lambda s: any(m(s) for m in allmatchers)
1409 return fullregexp, func
1409 return fullregexp, func
1410 except re.error:
1410 except re.error:
1411 for k, p, s in kindpats:
1411 for k, p, s in kindpats:
1412 try:
1412 try:
1413 _rematcher(_regex(k, p, globsuffix))
1413 _rematcher(_regex(k, p, globsuffix))
1414 except re.error:
1414 except re.error:
1415 if s:
1415 if s:
1416 raise error.Abort(
1416 raise error.Abort(
1417 _(b"%s: invalid pattern (%s): %s") % (s, k, p)
1417 _(b"%s: invalid pattern (%s): %s") % (s, k, p)
1418 )
1418 )
1419 else:
1419 else:
1420 raise error.Abort(_(b"invalid pattern (%s): %s") % (k, p))
1420 raise error.Abort(_(b"invalid pattern (%s): %s") % (k, p))
1421 raise error.Abort(_(b"invalid pattern"))
1421 raise error.Abort(_(b"invalid pattern"))
1422
1422
1423
1423
1424 def _patternrootsanddirs(kindpats):
1424 def _patternrootsanddirs(kindpats):
1425 '''Returns roots and directories corresponding to each pattern.
1425 '''Returns roots and directories corresponding to each pattern.
1426
1426
1427 This calculates the roots and directories exactly matching the patterns and
1427 This calculates the roots and directories exactly matching the patterns and
1428 returns a tuple of (roots, dirs) for each. It does not return other
1428 returns a tuple of (roots, dirs) for each. It does not return other
1429 directories which may also need to be considered, like the parent
1429 directories which may also need to be considered, like the parent
1430 directories.
1430 directories.
1431 '''
1431 '''
1432 r = []
1432 r = []
1433 d = []
1433 d = []
1434 for kind, pat, source in kindpats:
1434 for kind, pat, source in kindpats:
1435 if kind in (b'glob', b'rootglob'): # find the non-glob prefix
1435 if kind in (b'glob', b'rootglob'): # find the non-glob prefix
1436 root = []
1436 root = []
1437 for p in pat.split(b'/'):
1437 for p in pat.split(b'/'):
1438 if b'[' in p or b'{' in p or b'*' in p or b'?' in p:
1438 if b'[' in p or b'{' in p or b'*' in p or b'?' in p:
1439 break
1439 break
1440 root.append(p)
1440 root.append(p)
1441 r.append(b'/'.join(root))
1441 r.append(b'/'.join(root))
1442 elif kind in (b'relpath', b'path'):
1442 elif kind in (b'relpath', b'path'):
1443 if pat == b'.':
1443 if pat == b'.':
1444 pat = b''
1444 pat = b''
1445 r.append(pat)
1445 r.append(pat)
1446 elif kind in (b'rootfilesin',):
1446 elif kind in (b'rootfilesin',):
1447 if pat == b'.':
1447 if pat == b'.':
1448 pat = b''
1448 pat = b''
1449 d.append(pat)
1449 d.append(pat)
1450 else: # relglob, re, relre
1450 else: # relglob, re, relre
1451 r.append(b'')
1451 r.append(b'')
1452 return r, d
1452 return r, d
1453
1453
1454
1454
1455 def _roots(kindpats):
1455 def _roots(kindpats):
1456 '''Returns root directories to match recursively from the given patterns.'''
1456 '''Returns root directories to match recursively from the given patterns.'''
1457 roots, dirs = _patternrootsanddirs(kindpats)
1457 roots, dirs = _patternrootsanddirs(kindpats)
1458 return roots
1458 return roots
1459
1459
1460
1460
1461 def _rootsdirsandparents(kindpats):
1461 def _rootsdirsandparents(kindpats):
1462 '''Returns roots and exact directories from patterns.
1462 '''Returns roots and exact directories from patterns.
1463
1463
1464 `roots` are directories to match recursively, `dirs` should
1464 `roots` are directories to match recursively, `dirs` should
1465 be matched non-recursively, and `parents` are the implicitly required
1465 be matched non-recursively, and `parents` are the implicitly required
1466 directories to walk to items in either roots or dirs.
1466 directories to walk to items in either roots or dirs.
1467
1467
1468 Returns a tuple of (roots, dirs, parents).
1468 Returns a tuple of (roots, dirs, parents).
1469
1469
1470 >>> r = _rootsdirsandparents(
1470 >>> r = _rootsdirsandparents(
1471 ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
1471 ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
1472 ... (b'glob', b'g*', b'')])
1472 ... (b'glob', b'g*', b'')])
1473 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1473 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1474 (['g/h', 'g/h', ''], []) ['', 'g']
1474 (['g/h', 'g/h', ''], []) ['', 'g']
1475 >>> r = _rootsdirsandparents(
1475 >>> r = _rootsdirsandparents(
1476 ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
1476 ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
1477 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1477 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1478 ([], ['g/h', '']) ['', 'g']
1478 ([], ['g/h', '']) ['', 'g']
1479 >>> r = _rootsdirsandparents(
1479 >>> r = _rootsdirsandparents(
1480 ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
1480 ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
1481 ... (b'path', b'', b'')])
1481 ... (b'path', b'', b'')])
1482 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1482 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1483 (['r', 'p/p', ''], []) ['', 'p']
1483 (['r', 'p/p', ''], []) ['', 'p']
1484 >>> r = _rootsdirsandparents(
1484 >>> r = _rootsdirsandparents(
1485 ... [(b'relglob', b'rg*', b''), (b're', b're/', b''),
1485 ... [(b'relglob', b'rg*', b''), (b're', b're/', b''),
1486 ... (b'relre', b'rr', b'')])
1486 ... (b'relre', b'rr', b'')])
1487 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1487 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1488 (['', '', ''], []) ['']
1488 (['', '', ''], []) ['']
1489 '''
1489 '''
1490 r, d = _patternrootsanddirs(kindpats)
1490 r, d = _patternrootsanddirs(kindpats)
1491
1491
1492 p = set()
1492 p = set()
1493 # Add the parents as non-recursive/exact directories, since they must be
1493 # Add the parents as non-recursive/exact directories, since they must be
1494 # scanned to get to either the roots or the other exact directories.
1494 # scanned to get to either the roots or the other exact directories.
1495 p.update(pathutil.dirs(d))
1495 p.update(pathutil.dirs(d))
1496 p.update(pathutil.dirs(r))
1496 p.update(pathutil.dirs(r))
1497
1497
1498 # FIXME: all uses of this function convert these to sets, do so before
1498 # FIXME: all uses of this function convert these to sets, do so before
1499 # returning.
1499 # returning.
1500 # FIXME: all uses of this function do not need anything in 'roots' and
1500 # FIXME: all uses of this function do not need anything in 'roots' and
1501 # 'dirs' to also be in 'parents', consider removing them before returning.
1501 # 'dirs' to also be in 'parents', consider removing them before returning.
1502 return r, d, p
1502 return r, d, p
1503
1503
1504
1504
1505 def _explicitfiles(kindpats):
1505 def _explicitfiles(kindpats):
1506 '''Returns the potential explicit filenames from the patterns.
1506 '''Returns the potential explicit filenames from the patterns.
1507
1507
1508 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
1508 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
1509 ['foo/bar']
1509 ['foo/bar']
1510 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
1510 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
1511 []
1511 []
1512 '''
1512 '''
1513 # Keep only the pattern kinds where one can specify filenames (vs only
1513 # Keep only the pattern kinds where one can specify filenames (vs only
1514 # directory names).
1514 # directory names).
1515 filable = [kp for kp in kindpats if kp[0] not in (b'rootfilesin',)]
1515 filable = [kp for kp in kindpats if kp[0] not in (b'rootfilesin',)]
1516 return _roots(filable)
1516 return _roots(filable)
1517
1517
1518
1518
1519 def _prefix(kindpats):
1519 def _prefix(kindpats):
1520 '''Whether all the patterns match a prefix (i.e. recursively)'''
1520 '''Whether all the patterns match a prefix (i.e. recursively)'''
1521 for kind, pat, source in kindpats:
1521 for kind, pat, source in kindpats:
1522 if kind not in (b'path', b'relpath'):
1522 if kind not in (b'path', b'relpath'):
1523 return False
1523 return False
1524 return True
1524 return True
1525
1525
1526
1526
1527 _commentre = None
1527 _commentre = None
1528
1528
1529
1529
1530 def readpatternfile(filepath, warn, sourceinfo=False):
1530 def readpatternfile(filepath, warn, sourceinfo=False):
1531 '''parse a pattern file, returning a list of
1531 '''parse a pattern file, returning a list of
1532 patterns. These patterns should be given to compile()
1532 patterns. These patterns should be given to compile()
1533 to be validated and converted into a match function.
1533 to be validated and converted into a match function.
1534
1534
1535 trailing white space is dropped.
1535 trailing white space is dropped.
1536 the escape character is backslash.
1536 the escape character is backslash.
1537 comments start with #.
1537 comments start with #.
1538 empty lines are skipped.
1538 empty lines are skipped.
1539
1539
1540 lines can be of the following formats:
1540 lines can be of the following formats:
1541
1541
1542 syntax: regexp # defaults following lines to non-rooted regexps
1542 syntax: regexp # defaults following lines to non-rooted regexps
1543 syntax: glob # defaults following lines to non-rooted globs
1543 syntax: glob # defaults following lines to non-rooted globs
1544 re:pattern # non-rooted regular expression
1544 re:pattern # non-rooted regular expression
1545 glob:pattern # non-rooted glob
1545 glob:pattern # non-rooted glob
1546 rootglob:pat # rooted glob (same root as ^ in regexps)
1546 rootglob:pat # rooted glob (same root as ^ in regexps)
1547 pattern # pattern of the current default type
1547 pattern # pattern of the current default type
1548
1548
1549 if sourceinfo is set, returns a list of tuples:
1549 if sourceinfo is set, returns a list of tuples:
1550 (pattern, lineno, originalline).
1550 (pattern, lineno, originalline).
1551 This is useful to debug ignore patterns.
1551 This is useful to debug ignore patterns.
1552 '''
1552 '''
1553
1553
1554 if rustmod is not None:
1554 if rustmod is not None:
1555 result, warnings = rustmod.read_pattern_file(
1555 result, warnings = rustmod.read_pattern_file(
1556 filepath, bool(warn), sourceinfo,
1556 filepath, bool(warn), sourceinfo,
1557 )
1557 )
1558
1558
1559 for warning_params in warnings:
1559 for warning_params in warnings:
1560 # Can't be easily emitted from Rust, because it would require
1560 # Can't be easily emitted from Rust, because it would require
1561 # a mechanism for both gettext and calling the `warn` function.
1561 # a mechanism for both gettext and calling the `warn` function.
1562 warn(_(b"%s: ignoring invalid syntax '%s'\n") % warning_params)
1562 warn(_(b"%s: ignoring invalid syntax '%s'\n") % warning_params)
1563
1563
1564 return result
1564 return result
1565
1565
1566 syntaxes = {
1566 syntaxes = {
1567 b're': b'relre:',
1567 b're': b'relre:',
1568 b'regexp': b'relre:',
1568 b'regexp': b'relre:',
1569 b'glob': b'relglob:',
1569 b'glob': b'relglob:',
1570 b'rootglob': b'rootglob:',
1570 b'rootglob': b'rootglob:',
1571 b'include': b'include',
1571 b'include': b'include',
1572 b'subinclude': b'subinclude',
1572 b'subinclude': b'subinclude',
1573 }
1573 }
1574 syntax = b'relre:'
1574 syntax = b'relre:'
1575 patterns = []
1575 patterns = []
1576
1576
1577 fp = open(filepath, b'rb')
1577 fp = open(filepath, b'rb')
1578 for lineno, line in enumerate(util.iterfile(fp), start=1):
1578 for lineno, line in enumerate(util.iterfile(fp), start=1):
1579 if b"#" in line:
1579 if b"#" in line:
1580 global _commentre
1580 global _commentre
1581 if not _commentre:
1581 if not _commentre:
1582 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
1582 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
1583 # remove comments prefixed by an even number of escapes
1583 # remove comments prefixed by an even number of escapes
1584 m = _commentre.search(line)
1584 m = _commentre.search(line)
1585 if m:
1585 if m:
1586 line = line[: m.end(1)]
1586 line = line[: m.end(1)]
1587 # fixup properly escaped comments that survived the above
1587 # fixup properly escaped comments that survived the above
1588 line = line.replace(b"\\#", b"#")
1588 line = line.replace(b"\\#", b"#")
1589 line = line.rstrip()
1589 line = line.rstrip()
1590 if not line:
1590 if not line:
1591 continue
1591 continue
1592
1592
1593 if line.startswith(b'syntax:'):
1593 if line.startswith(b'syntax:'):
1594 s = line[7:].strip()
1594 s = line[7:].strip()
1595 try:
1595 try:
1596 syntax = syntaxes[s]
1596 syntax = syntaxes[s]
1597 except KeyError:
1597 except KeyError:
1598 if warn:
1598 if warn:
1599 warn(
1599 warn(
1600 _(b"%s: ignoring invalid syntax '%s'\n") % (filepath, s)
1600 _(b"%s: ignoring invalid syntax '%s'\n") % (filepath, s)
1601 )
1601 )
1602 continue
1602 continue
1603
1603
1604 linesyntax = syntax
1604 linesyntax = syntax
1605 for s, rels in pycompat.iteritems(syntaxes):
1605 for s, rels in pycompat.iteritems(syntaxes):
1606 if line.startswith(rels):
1606 if line.startswith(rels):
1607 linesyntax = rels
1607 linesyntax = rels
1608 line = line[len(rels) :]
1608 line = line[len(rels) :]
1609 break
1609 break
1610 elif line.startswith(s + b':'):
1610 elif line.startswith(s + b':'):
1611 linesyntax = rels
1611 linesyntax = rels
1612 line = line[len(s) + 1 :]
1612 line = line[len(s) + 1 :]
1613 break
1613 break
1614 if sourceinfo:
1614 if sourceinfo:
1615 patterns.append((linesyntax + line, lineno, line))
1615 patterns.append((linesyntax + line, lineno, line))
1616 else:
1616 else:
1617 patterns.append(linesyntax + line)
1617 patterns.append(linesyntax + line)
1618 fp.close()
1618 fp.close()
1619 return patterns
1619 return patterns
General Comments 0
You need to be logged in to leave comments. Login now