##// END OF EJS Templates
typing: add type hints for the overloads of `matchmod.readpatternfile()`...
Matt Harbison -
r52819:70fe33bd default
parent child Browse files
Show More
@@ -1,1745 +1,1782 b''
1 # match.py - filename matching
1 # match.py - filename matching
2 #
2 #
3 # Copyright 2008, 2009 Olivia Mackall <olivia@selenic.com> and others
3 # Copyright 2008, 2009 Olivia Mackall <olivia@selenic.com> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import annotations
8 from __future__ import annotations
9
9
10 import bisect
10 import bisect
11 import copy
11 import copy
12 import itertools
12 import itertools
13 import os
13 import os
14 import re
14 import re
15 import typing
16
17 from typing import (
18 Any,
19 Callable,
20 List,
21 Tuple,
22 Union,
23 overload,
24 )
15
25
16 from .i18n import _
26 from .i18n import _
17 from .pycompat import open
27 from .pycompat import open
18 from . import (
28 from . import (
19 encoding,
29 encoding,
20 error,
30 error,
21 pathutil,
31 pathutil,
22 policy,
32 policy,
23 pycompat,
33 pycompat,
24 util,
34 util,
25 )
35 )
26 from .utils import stringutil
36 from .utils import stringutil
27
37
28 rustmod = policy.importrust('dirstate')
38 rustmod = policy.importrust('dirstate')
29
39
30 allpatternkinds = (
40 allpatternkinds = (
31 b're',
41 b're',
32 b'glob',
42 b'glob',
33 b'path',
43 b'path',
34 b'filepath',
44 b'filepath',
35 b'relglob',
45 b'relglob',
36 b'relpath',
46 b'relpath',
37 b'relre',
47 b'relre',
38 b'rootglob',
48 b'rootglob',
39 b'listfile',
49 b'listfile',
40 b'listfile0',
50 b'listfile0',
41 b'set',
51 b'set',
42 b'include',
52 b'include',
43 b'subinclude',
53 b'subinclude',
44 b'rootfilesin',
54 b'rootfilesin',
45 )
55 )
46 cwdrelativepatternkinds = (b'relpath', b'glob')
56 cwdrelativepatternkinds = (b'relpath', b'glob')
47
57
48 propertycache = util.propertycache
58 propertycache = util.propertycache
49
59
50
60
51 def _rematcher(regex):
61 def _rematcher(regex):
52 """compile the regexp with the best available regexp engine and return a
62 """compile the regexp with the best available regexp engine and return a
53 matcher function"""
63 matcher function"""
54 m = util.re.compile(regex)
64 m = util.re.compile(regex)
55 try:
65 try:
56 # slightly faster, provided by facebook's re2 bindings
66 # slightly faster, provided by facebook's re2 bindings
57 return m.test_match
67 return m.test_match
58 except AttributeError:
68 except AttributeError:
59 return m.match
69 return m.match
60
70
61
71
62 def _expandsets(cwd, kindpats, ctx=None, listsubrepos=False, badfn=None):
72 def _expandsets(cwd, kindpats, ctx=None, listsubrepos=False, badfn=None):
63 '''Returns the kindpats list with the 'set' patterns expanded to matchers'''
73 '''Returns the kindpats list with the 'set' patterns expanded to matchers'''
64 matchers = []
74 matchers = []
65 other = []
75 other = []
66
76
67 for kind, pat, source in kindpats:
77 for kind, pat, source in kindpats:
68 if kind == b'set':
78 if kind == b'set':
69 if ctx is None:
79 if ctx is None:
70 raise error.ProgrammingError(
80 raise error.ProgrammingError(
71 b"fileset expression with no context"
81 b"fileset expression with no context"
72 )
82 )
73 matchers.append(ctx.matchfileset(cwd, pat, badfn=badfn))
83 matchers.append(ctx.matchfileset(cwd, pat, badfn=badfn))
74
84
75 if listsubrepos:
85 if listsubrepos:
76 for subpath in ctx.substate:
86 for subpath in ctx.substate:
77 sm = ctx.sub(subpath).matchfileset(cwd, pat, badfn=badfn)
87 sm = ctx.sub(subpath).matchfileset(cwd, pat, badfn=badfn)
78 pm = prefixdirmatcher(subpath, sm, badfn=badfn)
88 pm = prefixdirmatcher(subpath, sm, badfn=badfn)
79 matchers.append(pm)
89 matchers.append(pm)
80
90
81 continue
91 continue
82 other.append((kind, pat, source))
92 other.append((kind, pat, source))
83 return matchers, other
93 return matchers, other
84
94
85
95
86 def _expandsubinclude(kindpats, root):
96 def _expandsubinclude(kindpats, root):
87 """Returns the list of subinclude matcher args and the kindpats without the
97 """Returns the list of subinclude matcher args and the kindpats without the
88 subincludes in it."""
98 subincludes in it."""
89 relmatchers = []
99 relmatchers = []
90 other = []
100 other = []
91
101
92 for kind, pat, source in kindpats:
102 for kind, pat, source in kindpats:
93 if kind == b'subinclude':
103 if kind == b'subinclude':
94 sourceroot = pathutil.dirname(util.normpath(source))
104 sourceroot = pathutil.dirname(util.normpath(source))
95 pat = util.pconvert(pat)
105 pat = util.pconvert(pat)
96 path = pathutil.join(sourceroot, pat)
106 path = pathutil.join(sourceroot, pat)
97
107
98 newroot = pathutil.dirname(path)
108 newroot = pathutil.dirname(path)
99 matcherargs = (newroot, b'', [], [b'include:%s' % path])
109 matcherargs = (newroot, b'', [], [b'include:%s' % path])
100
110
101 prefix = pathutil.canonpath(root, root, newroot)
111 prefix = pathutil.canonpath(root, root, newroot)
102 if prefix:
112 if prefix:
103 prefix += b'/'
113 prefix += b'/'
104 relmatchers.append((prefix, matcherargs))
114 relmatchers.append((prefix, matcherargs))
105 else:
115 else:
106 other.append((kind, pat, source))
116 other.append((kind, pat, source))
107
117
108 return relmatchers, other
118 return relmatchers, other
109
119
110
120
111 def _kindpatsalwaysmatch(kindpats):
121 def _kindpatsalwaysmatch(kindpats):
112 """Checks whether the kindspats match everything, as e.g.
122 """Checks whether the kindspats match everything, as e.g.
113 'relpath:.' does.
123 'relpath:.' does.
114 """
124 """
115 for kind, pat, source in kindpats:
125 for kind, pat, source in kindpats:
116 if pat != b'' or kind not in [b'relpath', b'glob']:
126 if pat != b'' or kind not in [b'relpath', b'glob']:
117 return False
127 return False
118 return True
128 return True
119
129
120
130
121 def _buildkindpatsmatcher(
131 def _buildkindpatsmatcher(
122 matchercls,
132 matchercls,
123 root,
133 root,
124 cwd,
134 cwd,
125 kindpats,
135 kindpats,
126 ctx=None,
136 ctx=None,
127 listsubrepos=False,
137 listsubrepos=False,
128 badfn=None,
138 badfn=None,
129 ):
139 ):
130 matchers = []
140 matchers = []
131 fms, kindpats = _expandsets(
141 fms, kindpats = _expandsets(
132 cwd,
142 cwd,
133 kindpats,
143 kindpats,
134 ctx=ctx,
144 ctx=ctx,
135 listsubrepos=listsubrepos,
145 listsubrepos=listsubrepos,
136 badfn=badfn,
146 badfn=badfn,
137 )
147 )
138 if kindpats:
148 if kindpats:
139 m = matchercls(root, kindpats, badfn=badfn)
149 m = matchercls(root, kindpats, badfn=badfn)
140 matchers.append(m)
150 matchers.append(m)
141 if fms:
151 if fms:
142 matchers.extend(fms)
152 matchers.extend(fms)
143 if not matchers:
153 if not matchers:
144 return nevermatcher(badfn=badfn)
154 return nevermatcher(badfn=badfn)
145 if len(matchers) == 1:
155 if len(matchers) == 1:
146 return matchers[0]
156 return matchers[0]
147 return unionmatcher(matchers)
157 return unionmatcher(matchers)
148
158
149
159
150 def match(
160 def match(
151 root,
161 root,
152 cwd,
162 cwd,
153 patterns=None,
163 patterns=None,
154 include=None,
164 include=None,
155 exclude=None,
165 exclude=None,
156 default=b'glob',
166 default=b'glob',
157 auditor=None,
167 auditor=None,
158 ctx=None,
168 ctx=None,
159 listsubrepos=False,
169 listsubrepos=False,
160 warn=None,
170 warn=None,
161 badfn=None,
171 badfn=None,
162 icasefs=False,
172 icasefs=False,
163 ):
173 ):
164 r"""build an object to match a set of file patterns
174 r"""build an object to match a set of file patterns
165
175
166 arguments:
176 arguments:
167 root - the canonical root of the tree you're matching against
177 root - the canonical root of the tree you're matching against
168 cwd - the current working directory, if relevant
178 cwd - the current working directory, if relevant
169 patterns - patterns to find
179 patterns - patterns to find
170 include - patterns to include (unless they are excluded)
180 include - patterns to include (unless they are excluded)
171 exclude - patterns to exclude (even if they are included)
181 exclude - patterns to exclude (even if they are included)
172 default - if a pattern in patterns has no explicit type, assume this one
182 default - if a pattern in patterns has no explicit type, assume this one
173 auditor - optional path auditor
183 auditor - optional path auditor
174 ctx - optional changecontext
184 ctx - optional changecontext
175 listsubrepos - if True, recurse into subrepositories
185 listsubrepos - if True, recurse into subrepositories
176 warn - optional function used for printing warnings
186 warn - optional function used for printing warnings
177 badfn - optional bad() callback for this matcher instead of the default
187 badfn - optional bad() callback for this matcher instead of the default
178 icasefs - make a matcher for wdir on case insensitive filesystems, which
188 icasefs - make a matcher for wdir on case insensitive filesystems, which
179 normalizes the given patterns to the case in the filesystem
189 normalizes the given patterns to the case in the filesystem
180
190
181 a pattern is one of:
191 a pattern is one of:
182 'glob:<glob>' - a glob relative to cwd
192 'glob:<glob>' - a glob relative to cwd
183 're:<regexp>' - a regular expression
193 're:<regexp>' - a regular expression
184 'path:<path>' - a path relative to repository root, which is matched
194 'path:<path>' - a path relative to repository root, which is matched
185 recursively
195 recursively
186 'filepath:<path>' - an exact path to a single file, relative to the
196 'filepath:<path>' - an exact path to a single file, relative to the
187 repository root
197 repository root
188 'rootfilesin:<path>' - a path relative to repository root, which is
198 'rootfilesin:<path>' - a path relative to repository root, which is
189 matched non-recursively (will not match subdirectories)
199 matched non-recursively (will not match subdirectories)
190 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
200 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
191 'relpath:<path>' - a path relative to cwd
201 'relpath:<path>' - a path relative to cwd
192 'relre:<regexp>' - a regexp that needn't match the start of a name
202 'relre:<regexp>' - a regexp that needn't match the start of a name
193 'set:<fileset>' - a fileset expression
203 'set:<fileset>' - a fileset expression
194 'include:<path>' - a file of patterns to read and include
204 'include:<path>' - a file of patterns to read and include
195 'subinclude:<path>' - a file of patterns to match against files under
205 'subinclude:<path>' - a file of patterns to match against files under
196 the same directory
206 the same directory
197 '<something>' - a pattern of the specified default type
207 '<something>' - a pattern of the specified default type
198
208
199 >>> def _match(root, *args, **kwargs):
209 >>> def _match(root, *args, **kwargs):
200 ... return match(util.localpath(root), *args, **kwargs)
210 ... return match(util.localpath(root), *args, **kwargs)
201
211
202 Usually a patternmatcher is returned:
212 Usually a patternmatcher is returned:
203 >>> _match(b'/foo', b'.', [br're:.*\.c$', b'path:foo/a', b'*.py'])
213 >>> _match(b'/foo', b'.', [br're:.*\.c$', b'path:foo/a', b'*.py'])
204 <patternmatcher patterns='[^/]*\\.py$|foo/a(?:/|$)|.*\\.c$'>
214 <patternmatcher patterns='[^/]*\\.py$|foo/a(?:/|$)|.*\\.c$'>
205
215
206 Combining 'patterns' with 'include' (resp. 'exclude') gives an
216 Combining 'patterns' with 'include' (resp. 'exclude') gives an
207 intersectionmatcher (resp. a differencematcher):
217 intersectionmatcher (resp. a differencematcher):
208 >>> type(_match(b'/foo', b'.', [br're:.*\.c$'], include=[b'path:lib']))
218 >>> type(_match(b'/foo', b'.', [br're:.*\.c$'], include=[b'path:lib']))
209 <class 'mercurial.match.intersectionmatcher'>
219 <class 'mercurial.match.intersectionmatcher'>
210 >>> type(_match(b'/foo', b'.', [br're:.*\.c$'], exclude=[b'path:build']))
220 >>> type(_match(b'/foo', b'.', [br're:.*\.c$'], exclude=[b'path:build']))
211 <class 'mercurial.match.differencematcher'>
221 <class 'mercurial.match.differencematcher'>
212
222
213 Notice that, if 'patterns' is empty, an alwaysmatcher is returned:
223 Notice that, if 'patterns' is empty, an alwaysmatcher is returned:
214 >>> _match(b'/foo', b'.', [])
224 >>> _match(b'/foo', b'.', [])
215 <alwaysmatcher>
225 <alwaysmatcher>
216
226
217 The 'default' argument determines which kind of pattern is assumed if a
227 The 'default' argument determines which kind of pattern is assumed if a
218 pattern has no prefix:
228 pattern has no prefix:
219 >>> _match(b'/foo', b'.', [br'.*\.c$'], default=b're')
229 >>> _match(b'/foo', b'.', [br'.*\.c$'], default=b're')
220 <patternmatcher patterns='.*\\.c$'>
230 <patternmatcher patterns='.*\\.c$'>
221 >>> _match(b'/foo', b'.', [b'main.py'], default=b'relpath')
231 >>> _match(b'/foo', b'.', [b'main.py'], default=b'relpath')
222 <patternmatcher patterns='main\\.py(?:/|$)'>
232 <patternmatcher patterns='main\\.py(?:/|$)'>
223 >>> _match(b'/foo', b'.', [b'main.py'], default=b're')
233 >>> _match(b'/foo', b'.', [b'main.py'], default=b're')
224 <patternmatcher patterns='main.py'>
234 <patternmatcher patterns='main.py'>
225
235
226 The primary use of matchers is to check whether a value (usually a file
236 The primary use of matchers is to check whether a value (usually a file
227 name) matches againset one of the patterns given at initialization. There
237 name) matches againset one of the patterns given at initialization. There
228 are two ways of doing this check.
238 are two ways of doing this check.
229
239
230 >>> m = _match(b'/foo', b'', [br're:.*\.c$', b'relpath:a'])
240 >>> m = _match(b'/foo', b'', [br're:.*\.c$', b'relpath:a'])
231
241
232 1. Calling the matcher with a file name returns True if any pattern
242 1. Calling the matcher with a file name returns True if any pattern
233 matches that file name:
243 matches that file name:
234 >>> m(b'a')
244 >>> m(b'a')
235 True
245 True
236 >>> m(b'main.c')
246 >>> m(b'main.c')
237 True
247 True
238 >>> m(b'test.py')
248 >>> m(b'test.py')
239 False
249 False
240
250
241 2. Using the exact() method only returns True if the file name matches one
251 2. Using the exact() method only returns True if the file name matches one
242 of the exact patterns (i.e. not re: or glob: patterns):
252 of the exact patterns (i.e. not re: or glob: patterns):
243 >>> m.exact(b'a')
253 >>> m.exact(b'a')
244 True
254 True
245 >>> m.exact(b'main.c')
255 >>> m.exact(b'main.c')
246 False
256 False
247 """
257 """
248 assert os.path.isabs(root)
258 assert os.path.isabs(root)
249 cwd = os.path.join(root, util.localpath(cwd))
259 cwd = os.path.join(root, util.localpath(cwd))
250 normalize = _donormalize
260 normalize = _donormalize
251 if icasefs:
261 if icasefs:
252 dirstate = ctx.repo().dirstate
262 dirstate = ctx.repo().dirstate
253 dsnormalize = dirstate.normalize
263 dsnormalize = dirstate.normalize
254
264
255 def normalize(patterns, default, root, cwd, auditor, warn):
265 def normalize(patterns, default, root, cwd, auditor, warn):
256 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
266 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
257 kindpats = []
267 kindpats = []
258 for kind, pats, source in kp:
268 for kind, pats, source in kp:
259 if kind not in (b're', b'relre'): # regex can't be normalized
269 if kind not in (b're', b'relre'): # regex can't be normalized
260 p = pats
270 p = pats
261 pats = dsnormalize(pats)
271 pats = dsnormalize(pats)
262
272
263 # Preserve the original to handle a case only rename.
273 # Preserve the original to handle a case only rename.
264 if p != pats and p in dirstate:
274 if p != pats and p in dirstate:
265 kindpats.append((kind, p, source))
275 kindpats.append((kind, p, source))
266
276
267 kindpats.append((kind, pats, source))
277 kindpats.append((kind, pats, source))
268 return kindpats
278 return kindpats
269
279
270 if patterns:
280 if patterns:
271 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
281 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
272 if _kindpatsalwaysmatch(kindpats):
282 if _kindpatsalwaysmatch(kindpats):
273 m = alwaysmatcher(badfn)
283 m = alwaysmatcher(badfn)
274 else:
284 else:
275 m = _buildkindpatsmatcher(
285 m = _buildkindpatsmatcher(
276 patternmatcher,
286 patternmatcher,
277 root,
287 root,
278 cwd,
288 cwd,
279 kindpats,
289 kindpats,
280 ctx=ctx,
290 ctx=ctx,
281 listsubrepos=listsubrepos,
291 listsubrepos=listsubrepos,
282 badfn=badfn,
292 badfn=badfn,
283 )
293 )
284 else:
294 else:
285 # It's a little strange that no patterns means to match everything.
295 # It's a little strange that no patterns means to match everything.
286 # Consider changing this to match nothing (probably using nevermatcher).
296 # Consider changing this to match nothing (probably using nevermatcher).
287 m = alwaysmatcher(badfn)
297 m = alwaysmatcher(badfn)
288
298
289 if include:
299 if include:
290 kindpats = normalize(include, b'glob', root, cwd, auditor, warn)
300 kindpats = normalize(include, b'glob', root, cwd, auditor, warn)
291 im = _buildkindpatsmatcher(
301 im = _buildkindpatsmatcher(
292 includematcher,
302 includematcher,
293 root,
303 root,
294 cwd,
304 cwd,
295 kindpats,
305 kindpats,
296 ctx=ctx,
306 ctx=ctx,
297 listsubrepos=listsubrepos,
307 listsubrepos=listsubrepos,
298 badfn=None,
308 badfn=None,
299 )
309 )
300 m = intersectmatchers(m, im)
310 m = intersectmatchers(m, im)
301 if exclude:
311 if exclude:
302 kindpats = normalize(exclude, b'glob', root, cwd, auditor, warn)
312 kindpats = normalize(exclude, b'glob', root, cwd, auditor, warn)
303 em = _buildkindpatsmatcher(
313 em = _buildkindpatsmatcher(
304 includematcher,
314 includematcher,
305 root,
315 root,
306 cwd,
316 cwd,
307 kindpats,
317 kindpats,
308 ctx=ctx,
318 ctx=ctx,
309 listsubrepos=listsubrepos,
319 listsubrepos=listsubrepos,
310 badfn=None,
320 badfn=None,
311 )
321 )
312 m = differencematcher(m, em)
322 m = differencematcher(m, em)
313 return m
323 return m
314
324
315
325
316 def exact(files, badfn=None):
326 def exact(files, badfn=None):
317 return exactmatcher(files, badfn=badfn)
327 return exactmatcher(files, badfn=badfn)
318
328
319
329
320 def always(badfn=None):
330 def always(badfn=None):
321 return alwaysmatcher(badfn)
331 return alwaysmatcher(badfn)
322
332
323
333
324 def never(badfn=None):
334 def never(badfn=None):
325 return nevermatcher(badfn)
335 return nevermatcher(badfn)
326
336
327
337
328 def badmatch(match, badfn):
338 def badmatch(match, badfn):
329 """Make a copy of the given matcher, replacing its bad method with the given
339 """Make a copy of the given matcher, replacing its bad method with the given
330 one.
340 one.
331 """
341 """
332 m = copy.copy(match)
342 m = copy.copy(match)
333 m.bad = badfn
343 m.bad = badfn
334 return m
344 return m
335
345
336
346
337 def _donormalize(patterns, default, root, cwd, auditor=None, warn=None):
347 def _donormalize(patterns, default, root, cwd, auditor=None, warn=None):
338 """Convert 'kind:pat' from the patterns list to tuples with kind and
348 """Convert 'kind:pat' from the patterns list to tuples with kind and
339 normalized and rooted patterns and with listfiles expanded."""
349 normalized and rooted patterns and with listfiles expanded."""
340 kindpats = []
350 kindpats = []
341 kinds_to_normalize = (
351 kinds_to_normalize = (
342 b'relglob',
352 b'relglob',
343 b'path',
353 b'path',
344 b'filepath',
354 b'filepath',
345 b'rootfilesin',
355 b'rootfilesin',
346 b'rootglob',
356 b'rootglob',
347 )
357 )
348
358
349 for kind, pat in [_patsplit(p, default) for p in patterns]:
359 for kind, pat in [_patsplit(p, default) for p in patterns]:
350 if kind in cwdrelativepatternkinds:
360 if kind in cwdrelativepatternkinds:
351 pat = pathutil.canonpath(root, cwd, pat, auditor=auditor)
361 pat = pathutil.canonpath(root, cwd, pat, auditor=auditor)
352 elif kind in kinds_to_normalize:
362 elif kind in kinds_to_normalize:
353 pat = util.normpath(pat)
363 pat = util.normpath(pat)
354 elif kind in (b'listfile', b'listfile0'):
364 elif kind in (b'listfile', b'listfile0'):
355 try:
365 try:
356 files = util.readfile(pat)
366 files = util.readfile(pat)
357 if kind == b'listfile0':
367 if kind == b'listfile0':
358 files = files.split(b'\0')
368 files = files.split(b'\0')
359 else:
369 else:
360 files = files.splitlines()
370 files = files.splitlines()
361 files = [f for f in files if f]
371 files = [f for f in files if f]
362 except EnvironmentError:
372 except EnvironmentError:
363 raise error.Abort(_(b"unable to read file list (%s)") % pat)
373 raise error.Abort(_(b"unable to read file list (%s)") % pat)
364 for k, p, source in _donormalize(
374 for k, p, source in _donormalize(
365 files, default, root, cwd, auditor, warn
375 files, default, root, cwd, auditor, warn
366 ):
376 ):
367 kindpats.append((k, p, pat))
377 kindpats.append((k, p, pat))
368 continue
378 continue
369 elif kind == b'include':
379 elif kind == b'include':
370 try:
380 try:
371 fullpath = os.path.join(root, util.localpath(pat))
381 fullpath = os.path.join(root, util.localpath(pat))
372 includepats = readpatternfile(fullpath, warn)
382 includepats = readpatternfile(fullpath, warn)
373 for k, p, source in _donormalize(
383 for k, p, source in _donormalize(
374 includepats, default, root, cwd, auditor, warn
384 includepats, default, root, cwd, auditor, warn
375 ):
385 ):
376 kindpats.append((k, p, source or pat))
386 kindpats.append((k, p, source or pat))
377 except error.Abort as inst:
387 except error.Abort as inst:
378 raise error.Abort(
388 raise error.Abort(
379 b'%s: %s'
389 b'%s: %s'
380 % (
390 % (
381 pat,
391 pat,
382 inst.message,
392 inst.message,
383 )
393 )
384 )
394 )
385 except IOError as inst:
395 except IOError as inst:
386 if warn:
396 if warn:
387 warn(
397 warn(
388 _(b"skipping unreadable pattern file '%s': %s\n")
398 _(b"skipping unreadable pattern file '%s': %s\n")
389 % (pat, stringutil.forcebytestr(inst.strerror))
399 % (pat, stringutil.forcebytestr(inst.strerror))
390 )
400 )
391 continue
401 continue
392 # else: re or relre - which cannot be normalized
402 # else: re or relre - which cannot be normalized
393 kindpats.append((kind, pat, b''))
403 kindpats.append((kind, pat, b''))
394 return kindpats
404 return kindpats
395
405
396
406
397 class basematcher:
407 class basematcher:
398 def __init__(self, badfn=None):
408 def __init__(self, badfn=None):
399 self._was_tampered_with = False
409 self._was_tampered_with = False
400 if badfn is not None:
410 if badfn is not None:
401 self.bad = badfn
411 self.bad = badfn
402
412
403 def was_tampered_with_nonrec(self) -> bool:
413 def was_tampered_with_nonrec(self) -> bool:
404 # [_was_tampered_with] is used to track if when extensions changed the matcher
414 # [_was_tampered_with] is used to track if when extensions changed the matcher
405 # behavior (crazy stuff!), so we disable the rust fast path.
415 # behavior (crazy stuff!), so we disable the rust fast path.
406 return self._was_tampered_with
416 return self._was_tampered_with
407
417
408 def was_tampered_with(self) -> bool:
418 def was_tampered_with(self) -> bool:
409 return self.was_tampered_with_nonrec()
419 return self.was_tampered_with_nonrec()
410
420
411 def __call__(self, fn):
421 def __call__(self, fn):
412 return self.matchfn(fn)
422 return self.matchfn(fn)
413
423
414 # Callbacks related to how the matcher is used by dirstate.walk.
424 # Callbacks related to how the matcher is used by dirstate.walk.
415 # Subscribers to these events must monkeypatch the matcher object.
425 # Subscribers to these events must monkeypatch the matcher object.
416 def bad(self, f, msg):
426 def bad(self, f, msg):
417 """Callback from dirstate.walk for each explicit file that can't be
427 """Callback from dirstate.walk for each explicit file that can't be
418 found/accessed, with an error message."""
428 found/accessed, with an error message."""
419
429
420 # If an traversedir is set, it will be called when a directory discovered
430 # If an traversedir is set, it will be called when a directory discovered
421 # by recursive traversal is visited.
431 # by recursive traversal is visited.
422 traversedir = None
432 traversedir = None
423
433
424 @propertycache
434 @propertycache
425 def _files(self):
435 def _files(self):
426 return []
436 return []
427
437
428 def files(self):
438 def files(self):
429 """Explicitly listed files or patterns or roots:
439 """Explicitly listed files or patterns or roots:
430 if no patterns or .always(): empty list,
440 if no patterns or .always(): empty list,
431 if exact: list exact files,
441 if exact: list exact files,
432 if not .anypats(): list all files and dirs,
442 if not .anypats(): list all files and dirs,
433 else: optimal roots"""
443 else: optimal roots"""
434 return self._files
444 return self._files
435
445
436 @propertycache
446 @propertycache
437 def _fileset(self):
447 def _fileset(self):
438 return set(self._files)
448 return set(self._files)
439
449
440 def exact(self, f):
450 def exact(self, f):
441 '''Returns True if f is in .files().'''
451 '''Returns True if f is in .files().'''
442 return f in self._fileset
452 return f in self._fileset
443
453
444 def matchfn(self, f):
454 def matchfn(self, f):
445 return False
455 return False
446
456
447 def visitdir(self, dir):
457 def visitdir(self, dir):
448 """Decides whether a directory should be visited based on whether it
458 """Decides whether a directory should be visited based on whether it
449 has potential matches in it or one of its subdirectories. This is
459 has potential matches in it or one of its subdirectories. This is
450 based on the match's primary, included, and excluded patterns.
460 based on the match's primary, included, and excluded patterns.
451
461
452 Returns the string 'all' if the given directory and all subdirectories
462 Returns the string 'all' if the given directory and all subdirectories
453 should be visited. Otherwise returns True or False indicating whether
463 should be visited. Otherwise returns True or False indicating whether
454 the given directory should be visited.
464 the given directory should be visited.
455 """
465 """
456 return True
466 return True
457
467
458 def visitchildrenset(self, dir):
468 def visitchildrenset(self, dir):
459 """Decides whether a directory should be visited based on whether it
469 """Decides whether a directory should be visited based on whether it
460 has potential matches in it or one of its subdirectories, and
470 has potential matches in it or one of its subdirectories, and
461 potentially lists which subdirectories of that directory should be
471 potentially lists which subdirectories of that directory should be
462 visited. This is based on the match's primary, included, and excluded
472 visited. This is based on the match's primary, included, and excluded
463 patterns.
473 patterns.
464
474
465 This function is very similar to 'visitdir', and the following mapping
475 This function is very similar to 'visitdir', and the following mapping
466 can be applied:
476 can be applied:
467
477
468 visitdir | visitchildrenlist
478 visitdir | visitchildrenlist
469 ----------+-------------------
479 ----------+-------------------
470 False | set()
480 False | set()
471 'all' | 'all'
481 'all' | 'all'
472 True | 'this' OR non-empty set of subdirs -or files- to visit
482 True | 'this' OR non-empty set of subdirs -or files- to visit
473
483
474 Example:
484 Example:
475 Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return
485 Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return
476 the following values (assuming the implementation of visitchildrenset
486 the following values (assuming the implementation of visitchildrenset
477 is capable of recognizing this; some implementations are not).
487 is capable of recognizing this; some implementations are not).
478
488
479 '' -> {'foo', 'qux'}
489 '' -> {'foo', 'qux'}
480 'baz' -> set()
490 'baz' -> set()
481 'foo' -> {'bar'}
491 'foo' -> {'bar'}
482 # Ideally this would be 'all', but since the prefix nature of matchers
492 # Ideally this would be 'all', but since the prefix nature of matchers
483 # is applied to the entire matcher, we have to downgrade this to
493 # is applied to the entire matcher, we have to downgrade this to
484 # 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed
494 # 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed
485 # in.
495 # in.
486 'foo/bar' -> 'this'
496 'foo/bar' -> 'this'
487 'qux' -> 'this'
497 'qux' -> 'this'
488
498
489 Important:
499 Important:
490 Most matchers do not know if they're representing files or
500 Most matchers do not know if they're representing files or
491 directories. They see ['path:dir/f'] and don't know whether 'f' is a
501 directories. They see ['path:dir/f'] and don't know whether 'f' is a
492 file or a directory, so visitchildrenset('dir') for most matchers will
502 file or a directory, so visitchildrenset('dir') for most matchers will
493 return {'f'}, but if the matcher knows it's a file (like exactmatcher
503 return {'f'}, but if the matcher knows it's a file (like exactmatcher
494 does), it may return 'this'. Do not rely on the return being a set
504 does), it may return 'this'. Do not rely on the return being a set
495 indicating that there are no files in this dir to investigate (or
505 indicating that there are no files in this dir to investigate (or
496 equivalently that if there are files to investigate in 'dir' that it
506 equivalently that if there are files to investigate in 'dir' that it
497 will always return 'this').
507 will always return 'this').
498 """
508 """
499 return b'this'
509 return b'this'
500
510
501 def always(self):
511 def always(self):
502 """Matcher will match everything and .files() will be empty --
512 """Matcher will match everything and .files() will be empty --
503 optimization might be possible."""
513 optimization might be possible."""
504 return False
514 return False
505
515
506 def isexact(self):
516 def isexact(self):
507 """Matcher will match exactly the list of files in .files() --
517 """Matcher will match exactly the list of files in .files() --
508 optimization might be possible."""
518 optimization might be possible."""
509 return False
519 return False
510
520
511 def prefix(self):
521 def prefix(self):
512 """Matcher will match the paths in .files() recursively --
522 """Matcher will match the paths in .files() recursively --
513 optimization might be possible."""
523 optimization might be possible."""
514 return False
524 return False
515
525
516 def anypats(self):
526 def anypats(self):
517 """None of .always(), .isexact(), and .prefix() is true --
527 """None of .always(), .isexact(), and .prefix() is true --
518 optimizations will be difficult."""
528 optimizations will be difficult."""
519 return not self.always() and not self.isexact() and not self.prefix()
529 return not self.always() and not self.isexact() and not self.prefix()
520
530
521
531
522 class alwaysmatcher(basematcher):
532 class alwaysmatcher(basematcher):
523 '''Matches everything.'''
533 '''Matches everything.'''
524
534
525 def __init__(self, badfn=None):
535 def __init__(self, badfn=None):
526 super(alwaysmatcher, self).__init__(badfn)
536 super(alwaysmatcher, self).__init__(badfn)
527
537
528 def always(self):
538 def always(self):
529 return True
539 return True
530
540
531 def matchfn(self, f):
541 def matchfn(self, f):
532 return True
542 return True
533
543
534 def visitdir(self, dir):
544 def visitdir(self, dir):
535 return b'all'
545 return b'all'
536
546
537 def visitchildrenset(self, dir):
547 def visitchildrenset(self, dir):
538 return b'all'
548 return b'all'
539
549
540 def __repr__(self):
550 def __repr__(self):
541 return r'<alwaysmatcher>'
551 return r'<alwaysmatcher>'
542
552
543
553
544 class nevermatcher(basematcher):
554 class nevermatcher(basematcher):
545 '''Matches nothing.'''
555 '''Matches nothing.'''
546
556
547 def __init__(self, badfn=None):
557 def __init__(self, badfn=None):
548 super(nevermatcher, self).__init__(badfn)
558 super(nevermatcher, self).__init__(badfn)
549
559
550 # It's a little weird to say that the nevermatcher is an exact matcher
560 # It's a little weird to say that the nevermatcher is an exact matcher
551 # or a prefix matcher, but it seems to make sense to let callers take
561 # or a prefix matcher, but it seems to make sense to let callers take
552 # fast paths based on either. There will be no exact matches, nor any
562 # fast paths based on either. There will be no exact matches, nor any
553 # prefixes (files() returns []), so fast paths iterating over them should
563 # prefixes (files() returns []), so fast paths iterating over them should
554 # be efficient (and correct).
564 # be efficient (and correct).
555 def isexact(self):
565 def isexact(self):
556 return True
566 return True
557
567
558 def prefix(self):
568 def prefix(self):
559 return True
569 return True
560
570
561 def visitdir(self, dir):
571 def visitdir(self, dir):
562 return False
572 return False
563
573
564 def visitchildrenset(self, dir):
574 def visitchildrenset(self, dir):
565 return set()
575 return set()
566
576
567 def __repr__(self):
577 def __repr__(self):
568 return r'<nevermatcher>'
578 return r'<nevermatcher>'
569
579
570
580
571 class predicatematcher(basematcher):
581 class predicatematcher(basematcher):
572 """A matcher adapter for a simple boolean function"""
582 """A matcher adapter for a simple boolean function"""
573
583
574 def __init__(self, predfn, predrepr=None, badfn=None):
584 def __init__(self, predfn, predrepr=None, badfn=None):
575 super(predicatematcher, self).__init__(badfn)
585 super(predicatematcher, self).__init__(badfn)
576 self.matchfn = predfn
586 self.matchfn = predfn
577 self._predrepr = predrepr
587 self._predrepr = predrepr
578
588
579 @encoding.strmethod
589 @encoding.strmethod
580 def __repr__(self):
590 def __repr__(self):
581 s = stringutil.buildrepr(self._predrepr) or pycompat.byterepr(
591 s = stringutil.buildrepr(self._predrepr) or pycompat.byterepr(
582 self.matchfn
592 self.matchfn
583 )
593 )
584 return b'<predicatenmatcher pred=%s>' % s
594 return b'<predicatenmatcher pred=%s>' % s
585
595
586
596
587 def path_or_parents_in_set(path, prefix_set):
597 def path_or_parents_in_set(path, prefix_set):
588 """Returns True if `path` (or any parent of `path`) is in `prefix_set`."""
598 """Returns True if `path` (or any parent of `path`) is in `prefix_set`."""
589 l = len(prefix_set)
599 l = len(prefix_set)
590 if l == 0:
600 if l == 0:
591 return False
601 return False
592 if path in prefix_set:
602 if path in prefix_set:
593 return True
603 return True
594 # If there's more than 5 paths in prefix_set, it's *probably* quicker to
604 # If there's more than 5 paths in prefix_set, it's *probably* quicker to
595 # "walk up" the directory hierarchy instead, with the assumption that most
605 # "walk up" the directory hierarchy instead, with the assumption that most
596 # directory hierarchies are relatively shallow and hash lookup is cheap.
606 # directory hierarchies are relatively shallow and hash lookup is cheap.
597 if l > 5:
607 if l > 5:
598 return any(
608 return any(
599 parentdir in prefix_set for parentdir in pathutil.finddirs(path)
609 parentdir in prefix_set for parentdir in pathutil.finddirs(path)
600 )
610 )
601
611
602 # FIXME: Ideally we'd never get to this point if this is the case - we'd
612 # FIXME: Ideally we'd never get to this point if this is the case - we'd
603 # recognize ourselves as an 'always' matcher and skip this.
613 # recognize ourselves as an 'always' matcher and skip this.
604 if b'' in prefix_set:
614 if b'' in prefix_set:
605 return True
615 return True
606
616
607 sl = ord(b'/')
617 sl = ord(b'/')
608
618
609 # We already checked that path isn't in prefix_set exactly, so
619 # We already checked that path isn't in prefix_set exactly, so
610 # `path[len(pf)] should never raise IndexError.
620 # `path[len(pf)] should never raise IndexError.
611 return any(path.startswith(pf) and path[len(pf)] == sl for pf in prefix_set)
621 return any(path.startswith(pf) and path[len(pf)] == sl for pf in prefix_set)
612
622
613
623
614 class patternmatcher(basematcher):
624 class patternmatcher(basematcher):
615 r"""Matches a set of (kind, pat, source) against a 'root' directory.
625 r"""Matches a set of (kind, pat, source) against a 'root' directory.
616
626
617 >>> kindpats = [
627 >>> kindpats = [
618 ... (b're', br'.*\.c$', b''),
628 ... (b're', br'.*\.c$', b''),
619 ... (b'path', b'foo/a', b''),
629 ... (b'path', b'foo/a', b''),
620 ... (b'relpath', b'b', b''),
630 ... (b'relpath', b'b', b''),
621 ... (b'glob', b'*.h', b''),
631 ... (b'glob', b'*.h', b''),
622 ... ]
632 ... ]
623 >>> m = patternmatcher(b'foo', kindpats)
633 >>> m = patternmatcher(b'foo', kindpats)
624 >>> m(b'main.c') # matches re:.*\.c$
634 >>> m(b'main.c') # matches re:.*\.c$
625 True
635 True
626 >>> m(b'b.txt')
636 >>> m(b'b.txt')
627 False
637 False
628 >>> m(b'foo/a') # matches path:foo/a
638 >>> m(b'foo/a') # matches path:foo/a
629 True
639 True
630 >>> m(b'a') # does not match path:b, since 'root' is 'foo'
640 >>> m(b'a') # does not match path:b, since 'root' is 'foo'
631 False
641 False
632 >>> m(b'b') # matches relpath:b, since 'root' is 'foo'
642 >>> m(b'b') # matches relpath:b, since 'root' is 'foo'
633 True
643 True
634 >>> m(b'lib.h') # matches glob:*.h
644 >>> m(b'lib.h') # matches glob:*.h
635 True
645 True
636
646
637 >>> m.files()
647 >>> m.files()
638 [b'', b'foo/a', b'', b'b']
648 [b'', b'foo/a', b'', b'b']
639 >>> m.exact(b'foo/a')
649 >>> m.exact(b'foo/a')
640 True
650 True
641 >>> m.exact(b'b')
651 >>> m.exact(b'b')
642 True
652 True
643 >>> m.exact(b'lib.h') # exact matches are for (rel)path kinds
653 >>> m.exact(b'lib.h') # exact matches are for (rel)path kinds
644 False
654 False
645 """
655 """
646
656
647 def __init__(self, root, kindpats, badfn=None):
657 def __init__(self, root, kindpats, badfn=None):
648 super(patternmatcher, self).__init__(badfn)
658 super(patternmatcher, self).__init__(badfn)
649 kindpats.sort()
659 kindpats.sort()
650
660
651 if rustmod is not None:
661 if rustmod is not None:
652 # We need to pass the patterns to Rust because they can contain
662 # We need to pass the patterns to Rust because they can contain
653 # patterns from the user interface
663 # patterns from the user interface
654 self._kindpats = kindpats
664 self._kindpats = kindpats
655
665
656 roots, dirs, parents = _rootsdirsandparents(kindpats)
666 roots, dirs, parents = _rootsdirsandparents(kindpats)
657 self._files = _explicitfiles(kindpats)
667 self._files = _explicitfiles(kindpats)
658 self._dirs_explicit = set(dirs)
668 self._dirs_explicit = set(dirs)
659 self._dirs = parents
669 self._dirs = parents
660 self._prefix = _prefix(kindpats)
670 self._prefix = _prefix(kindpats)
661 self._pats, self._matchfn = _buildmatch(kindpats, b'$', root)
671 self._pats, self._matchfn = _buildmatch(kindpats, b'$', root)
662
672
663 def matchfn(self, fn):
673 def matchfn(self, fn):
664 if fn in self._fileset:
674 if fn in self._fileset:
665 return True
675 return True
666 return self._matchfn(fn)
676 return self._matchfn(fn)
667
677
668 def visitdir(self, dir):
678 def visitdir(self, dir):
669 if self._prefix and dir in self._fileset:
679 if self._prefix and dir in self._fileset:
670 return b'all'
680 return b'all'
671 return (
681 return (
672 dir in self._dirs
682 dir in self._dirs
673 or path_or_parents_in_set(dir, self._fileset)
683 or path_or_parents_in_set(dir, self._fileset)
674 or path_or_parents_in_set(dir, self._dirs_explicit)
684 or path_or_parents_in_set(dir, self._dirs_explicit)
675 )
685 )
676
686
677 def visitchildrenset(self, dir):
687 def visitchildrenset(self, dir):
678 ret = self.visitdir(dir)
688 ret = self.visitdir(dir)
679 if ret is True:
689 if ret is True:
680 return b'this'
690 return b'this'
681 elif not ret:
691 elif not ret:
682 return set()
692 return set()
683 assert ret == b'all'
693 assert ret == b'all'
684 return b'all'
694 return b'all'
685
695
686 def prefix(self):
696 def prefix(self):
687 return self._prefix
697 return self._prefix
688
698
689 @encoding.strmethod
699 @encoding.strmethod
690 def __repr__(self):
700 def __repr__(self):
691 return b'<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats)
701 return b'<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats)
692
702
693
703
694 # This is basically a reimplementation of pathutil.dirs that stores the
704 # This is basically a reimplementation of pathutil.dirs that stores the
695 # children instead of just a count of them, plus a small optional optimization
705 # children instead of just a count of them, plus a small optional optimization
696 # to avoid some directories we don't need.
706 # to avoid some directories we don't need.
697 class _dirchildren:
707 class _dirchildren:
698 def __init__(self, paths, onlyinclude=None):
708 def __init__(self, paths, onlyinclude=None):
699 self._dirs = {}
709 self._dirs = {}
700 self._onlyinclude = onlyinclude or []
710 self._onlyinclude = onlyinclude or []
701 addpath = self.addpath
711 addpath = self.addpath
702 for f in paths:
712 for f in paths:
703 addpath(f)
713 addpath(f)
704
714
705 def addpath(self, path):
715 def addpath(self, path):
706 if path == b'':
716 if path == b'':
707 return
717 return
708 dirs = self._dirs
718 dirs = self._dirs
709 findsplitdirs = _dirchildren._findsplitdirs
719 findsplitdirs = _dirchildren._findsplitdirs
710 for d, b in findsplitdirs(path):
720 for d, b in findsplitdirs(path):
711 if d not in self._onlyinclude:
721 if d not in self._onlyinclude:
712 continue
722 continue
713 dirs.setdefault(d, set()).add(b)
723 dirs.setdefault(d, set()).add(b)
714
724
715 @staticmethod
725 @staticmethod
716 def _findsplitdirs(path):
726 def _findsplitdirs(path):
717 # yields (dirname, basename) tuples, walking back to the root. This is
727 # yields (dirname, basename) tuples, walking back to the root. This is
718 # very similar to pathutil.finddirs, except:
728 # very similar to pathutil.finddirs, except:
719 # - produces a (dirname, basename) tuple, not just 'dirname'
729 # - produces a (dirname, basename) tuple, not just 'dirname'
720 # Unlike manifest._splittopdir, this does not suffix `dirname` with a
730 # Unlike manifest._splittopdir, this does not suffix `dirname` with a
721 # slash.
731 # slash.
722 oldpos = len(path)
732 oldpos = len(path)
723 pos = path.rfind(b'/')
733 pos = path.rfind(b'/')
724 while pos != -1:
734 while pos != -1:
725 yield path[:pos], path[pos + 1 : oldpos]
735 yield path[:pos], path[pos + 1 : oldpos]
726 oldpos = pos
736 oldpos = pos
727 pos = path.rfind(b'/', 0, pos)
737 pos = path.rfind(b'/', 0, pos)
728 yield b'', path[:oldpos]
738 yield b'', path[:oldpos]
729
739
730 def get(self, path):
740 def get(self, path):
731 return self._dirs.get(path, set())
741 return self._dirs.get(path, set())
732
742
733
743
734 class includematcher(basematcher):
744 class includematcher(basematcher):
735 def __init__(self, root, kindpats, badfn=None):
745 def __init__(self, root, kindpats, badfn=None):
736 super(includematcher, self).__init__(badfn)
746 super(includematcher, self).__init__(badfn)
737 if rustmod is not None:
747 if rustmod is not None:
738 # We need to pass the patterns to Rust because they can contain
748 # We need to pass the patterns to Rust because they can contain
739 # patterns from the user interface
749 # patterns from the user interface
740 self._kindpats = kindpats
750 self._kindpats = kindpats
741 self._pats, self.matchfn = _buildmatch(kindpats, b'(?:/|$)', root)
751 self._pats, self.matchfn = _buildmatch(kindpats, b'(?:/|$)', root)
742 self._prefix = _prefix(kindpats)
752 self._prefix = _prefix(kindpats)
743 roots, dirs, parents = _rootsdirsandparents(kindpats)
753 roots, dirs, parents = _rootsdirsandparents(kindpats)
744 # roots are directories which are recursively included.
754 # roots are directories which are recursively included.
745 self._roots = set(roots)
755 self._roots = set(roots)
746 # dirs are directories which are non-recursively included.
756 # dirs are directories which are non-recursively included.
747 self._dirs = set(dirs)
757 self._dirs = set(dirs)
748 # parents are directories which are non-recursively included because
758 # parents are directories which are non-recursively included because
749 # they are needed to get to items in _dirs or _roots.
759 # they are needed to get to items in _dirs or _roots.
750 self._parents = parents
760 self._parents = parents
751
761
752 def visitdir(self, dir):
762 def visitdir(self, dir):
753 if self._prefix and dir in self._roots:
763 if self._prefix and dir in self._roots:
754 return b'all'
764 return b'all'
755 return (
765 return (
756 dir in self._dirs
766 dir in self._dirs
757 or dir in self._parents
767 or dir in self._parents
758 or path_or_parents_in_set(dir, self._roots)
768 or path_or_parents_in_set(dir, self._roots)
759 )
769 )
760
770
761 @propertycache
771 @propertycache
762 def _allparentschildren(self):
772 def _allparentschildren(self):
763 # It may seem odd that we add dirs, roots, and parents, and then
773 # It may seem odd that we add dirs, roots, and parents, and then
764 # restrict to only parents. This is to catch the case of:
774 # restrict to only parents. This is to catch the case of:
765 # dirs = ['foo/bar']
775 # dirs = ['foo/bar']
766 # parents = ['foo']
776 # parents = ['foo']
767 # if we asked for the children of 'foo', but had only added
777 # if we asked for the children of 'foo', but had only added
768 # self._parents, we wouldn't be able to respond ['bar'].
778 # self._parents, we wouldn't be able to respond ['bar'].
769 return _dirchildren(
779 return _dirchildren(
770 itertools.chain(self._dirs, self._roots, self._parents),
780 itertools.chain(self._dirs, self._roots, self._parents),
771 onlyinclude=self._parents,
781 onlyinclude=self._parents,
772 )
782 )
773
783
774 def visitchildrenset(self, dir):
784 def visitchildrenset(self, dir):
775 if self._prefix and dir in self._roots:
785 if self._prefix and dir in self._roots:
776 return b'all'
786 return b'all'
777 # Note: this does *not* include the 'dir in self._parents' case from
787 # Note: this does *not* include the 'dir in self._parents' case from
778 # visitdir, that's handled below.
788 # visitdir, that's handled below.
779 if (
789 if (
780 b'' in self._roots
790 b'' in self._roots
781 or dir in self._dirs
791 or dir in self._dirs
782 or path_or_parents_in_set(dir, self._roots)
792 or path_or_parents_in_set(dir, self._roots)
783 ):
793 ):
784 return b'this'
794 return b'this'
785
795
786 if dir in self._parents:
796 if dir in self._parents:
787 return self._allparentschildren.get(dir) or set()
797 return self._allparentschildren.get(dir) or set()
788 return set()
798 return set()
789
799
790 @encoding.strmethod
800 @encoding.strmethod
791 def __repr__(self):
801 def __repr__(self):
792 return b'<includematcher includes=%r>' % pycompat.bytestr(self._pats)
802 return b'<includematcher includes=%r>' % pycompat.bytestr(self._pats)
793
803
794
804
795 class exactmatcher(basematcher):
805 class exactmatcher(basematcher):
796 r"""Matches the input files exactly. They are interpreted as paths, not
806 r"""Matches the input files exactly. They are interpreted as paths, not
797 patterns (so no kind-prefixes).
807 patterns (so no kind-prefixes).
798
808
799 >>> m = exactmatcher([b'a.txt', br're:.*\.c$'])
809 >>> m = exactmatcher([b'a.txt', br're:.*\.c$'])
800 >>> m(b'a.txt')
810 >>> m(b'a.txt')
801 True
811 True
802 >>> m(b'b.txt')
812 >>> m(b'b.txt')
803 False
813 False
804
814
805 Input files that would be matched are exactly those returned by .files()
815 Input files that would be matched are exactly those returned by .files()
806 >>> m.files()
816 >>> m.files()
807 ['a.txt', 're:.*\\.c$']
817 ['a.txt', 're:.*\\.c$']
808
818
809 So pattern 're:.*\.c$' is not considered as a regex, but as a file name
819 So pattern 're:.*\.c$' is not considered as a regex, but as a file name
810 >>> m(b'main.c')
820 >>> m(b'main.c')
811 False
821 False
812 >>> m(br're:.*\.c$')
822 >>> m(br're:.*\.c$')
813 True
823 True
814 """
824 """
815
825
816 def __init__(self, files, badfn=None):
826 def __init__(self, files, badfn=None):
817 super(exactmatcher, self).__init__(badfn)
827 super(exactmatcher, self).__init__(badfn)
818
828
819 if isinstance(files, list):
829 if isinstance(files, list):
820 self._files = files
830 self._files = files
821 else:
831 else:
822 self._files = list(files)
832 self._files = list(files)
823
833
824 matchfn = basematcher.exact
834 matchfn = basematcher.exact
825
835
826 @propertycache
836 @propertycache
827 def _dirs(self):
837 def _dirs(self):
828 return set(pathutil.dirs(self._fileset))
838 return set(pathutil.dirs(self._fileset))
829
839
830 def visitdir(self, dir):
840 def visitdir(self, dir):
831 return dir in self._dirs
841 return dir in self._dirs
832
842
833 @propertycache
843 @propertycache
834 def _visitchildrenset_candidates(self):
844 def _visitchildrenset_candidates(self):
835 """A memoized set of candidates for visitchildrenset."""
845 """A memoized set of candidates for visitchildrenset."""
836 return self._fileset | self._dirs - {b''}
846 return self._fileset | self._dirs - {b''}
837
847
838 @propertycache
848 @propertycache
839 def _sorted_visitchildrenset_candidates(self):
849 def _sorted_visitchildrenset_candidates(self):
840 """A memoized sorted list of candidates for visitchildrenset."""
850 """A memoized sorted list of candidates for visitchildrenset."""
841 return sorted(self._visitchildrenset_candidates)
851 return sorted(self._visitchildrenset_candidates)
842
852
843 def visitchildrenset(self, dir):
853 def visitchildrenset(self, dir):
844 if not self._fileset or dir not in self._dirs:
854 if not self._fileset or dir not in self._dirs:
845 return set()
855 return set()
846
856
847 if dir == b'':
857 if dir == b'':
848 candidates = self._visitchildrenset_candidates
858 candidates = self._visitchildrenset_candidates
849 else:
859 else:
850 candidates = self._sorted_visitchildrenset_candidates
860 candidates = self._sorted_visitchildrenset_candidates
851 d = dir + b'/'
861 d = dir + b'/'
852 # Use bisect to find the first element potentially starting with d
862 # Use bisect to find the first element potentially starting with d
853 # (i.e. >= d). This should always find at least one element (we'll
863 # (i.e. >= d). This should always find at least one element (we'll
854 # assert later if this is not the case).
864 # assert later if this is not the case).
855 first = bisect.bisect_left(candidates, d)
865 first = bisect.bisect_left(candidates, d)
856 # We need a representation of the first element that is > d that
866 # We need a representation of the first element that is > d that
857 # does not start with d, so since we added a `/` on the end of dir,
867 # does not start with d, so since we added a `/` on the end of dir,
858 # we'll add whatever comes after slash (we could probably assume
868 # we'll add whatever comes after slash (we could probably assume
859 # that `0` is after `/`, but let's not) to the end of dir instead.
869 # that `0` is after `/`, but let's not) to the end of dir instead.
860 dnext = dir + encoding.strtolocal(chr(ord(b'/') + 1))
870 dnext = dir + encoding.strtolocal(chr(ord(b'/') + 1))
861 # Use bisect to find the first element >= d_next
871 # Use bisect to find the first element >= d_next
862 last = bisect.bisect_left(candidates, dnext, lo=first)
872 last = bisect.bisect_left(candidates, dnext, lo=first)
863 dlen = len(d)
873 dlen = len(d)
864 candidates = {c[dlen:] for c in candidates[first:last]}
874 candidates = {c[dlen:] for c in candidates[first:last]}
865 # self._dirs includes all of the directories, recursively, so if
875 # self._dirs includes all of the directories, recursively, so if
866 # we're attempting to match foo/bar/baz.txt, it'll have '', 'foo',
876 # we're attempting to match foo/bar/baz.txt, it'll have '', 'foo',
867 # 'foo/bar' in it. Thus we can safely ignore a candidate that has a
877 # 'foo/bar' in it. Thus we can safely ignore a candidate that has a
868 # '/' in it, indicating a it's for a subdir-of-a-subdir; the
878 # '/' in it, indicating a it's for a subdir-of-a-subdir; the
869 # immediate subdir will be in there without a slash.
879 # immediate subdir will be in there without a slash.
870 ret = {c for c in candidates if b'/' not in c}
880 ret = {c for c in candidates if b'/' not in c}
871 # We really do not expect ret to be empty, since that would imply that
881 # We really do not expect ret to be empty, since that would imply that
872 # there's something in _dirs that didn't have a file in _fileset.
882 # there's something in _dirs that didn't have a file in _fileset.
873 assert ret
883 assert ret
874 return ret
884 return ret
875
885
876 def isexact(self):
886 def isexact(self):
877 return True
887 return True
878
888
879 @encoding.strmethod
889 @encoding.strmethod
880 def __repr__(self):
890 def __repr__(self):
881 return b'<exactmatcher files=%r>' % self._files
891 return b'<exactmatcher files=%r>' % self._files
882
892
883
893
884 class differencematcher(basematcher):
894 class differencematcher(basematcher):
885 """Composes two matchers by matching if the first matches and the second
895 """Composes two matchers by matching if the first matches and the second
886 does not.
896 does not.
887
897
888 The second matcher's non-matching-attributes (bad, traversedir) are ignored.
898 The second matcher's non-matching-attributes (bad, traversedir) are ignored.
889 """
899 """
890
900
891 def __init__(self, m1, m2):
901 def __init__(self, m1, m2):
892 super(differencematcher, self).__init__()
902 super(differencematcher, self).__init__()
893 self._m1 = m1
903 self._m1 = m1
894 self._m2 = m2
904 self._m2 = m2
895 self.bad = m1.bad
905 self.bad = m1.bad
896 self.traversedir = m1.traversedir
906 self.traversedir = m1.traversedir
897
907
898 def was_tampered_with(self) -> bool:
908 def was_tampered_with(self) -> bool:
899 return (
909 return (
900 self.was_tampered_with_nonrec()
910 self.was_tampered_with_nonrec()
901 or self._m1.was_tampered_with()
911 or self._m1.was_tampered_with()
902 or self._m2.was_tampered_with()
912 or self._m2.was_tampered_with()
903 )
913 )
904
914
905 def matchfn(self, f):
915 def matchfn(self, f):
906 return self._m1(f) and not self._m2(f)
916 return self._m1(f) and not self._m2(f)
907
917
908 @propertycache
918 @propertycache
909 def _files(self):
919 def _files(self):
910 if self.isexact():
920 if self.isexact():
911 return [f for f in self._m1.files() if self(f)]
921 return [f for f in self._m1.files() if self(f)]
912 # If m1 is not an exact matcher, we can't easily figure out the set of
922 # If m1 is not an exact matcher, we can't easily figure out the set of
913 # files, because its files() are not always files. For example, if
923 # files, because its files() are not always files. For example, if
914 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
924 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
915 # want to remove "dir" from the set even though it would match m2,
925 # want to remove "dir" from the set even though it would match m2,
916 # because the "dir" in m1 may not be a file.
926 # because the "dir" in m1 may not be a file.
917 return self._m1.files()
927 return self._m1.files()
918
928
919 def visitdir(self, dir):
929 def visitdir(self, dir):
920 if self._m2.visitdir(dir) == b'all':
930 if self._m2.visitdir(dir) == b'all':
921 return False
931 return False
922 elif not self._m2.visitdir(dir):
932 elif not self._m2.visitdir(dir):
923 # m2 does not match dir, we can return 'all' here if possible
933 # m2 does not match dir, we can return 'all' here if possible
924 return self._m1.visitdir(dir)
934 return self._m1.visitdir(dir)
925 return bool(self._m1.visitdir(dir))
935 return bool(self._m1.visitdir(dir))
926
936
927 def visitchildrenset(self, dir):
937 def visitchildrenset(self, dir):
928 m2_set = self._m2.visitchildrenset(dir)
938 m2_set = self._m2.visitchildrenset(dir)
929 if m2_set == b'all':
939 if m2_set == b'all':
930 return set()
940 return set()
931 m1_set = self._m1.visitchildrenset(dir)
941 m1_set = self._m1.visitchildrenset(dir)
932 # Possible values for m1: 'all', 'this', set(...), set()
942 # Possible values for m1: 'all', 'this', set(...), set()
933 # Possible values for m2: 'this', set(...), set()
943 # Possible values for m2: 'this', set(...), set()
934 # If m2 has nothing under here that we care about, return m1, even if
944 # If m2 has nothing under here that we care about, return m1, even if
935 # it's 'all'. This is a change in behavior from visitdir, which would
945 # it's 'all'. This is a change in behavior from visitdir, which would
936 # return True, not 'all', for some reason.
946 # return True, not 'all', for some reason.
937 if not m2_set:
947 if not m2_set:
938 return m1_set
948 return m1_set
939 if m1_set in [b'all', b'this']:
949 if m1_set in [b'all', b'this']:
940 # Never return 'all' here if m2_set is any kind of non-empty (either
950 # Never return 'all' here if m2_set is any kind of non-empty (either
941 # 'this' or set(foo)), since m2 might return set() for a
951 # 'this' or set(foo)), since m2 might return set() for a
942 # subdirectory.
952 # subdirectory.
943 return b'this'
953 return b'this'
944 # Possible values for m1: set(...), set()
954 # Possible values for m1: set(...), set()
945 # Possible values for m2: 'this', set(...)
955 # Possible values for m2: 'this', set(...)
946 # We ignore m2's set results. They're possibly incorrect:
956 # We ignore m2's set results. They're possibly incorrect:
947 # m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset(''):
957 # m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset(''):
948 # m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd
958 # m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd
949 # return set(), which is *not* correct, we still need to visit 'dir'!
959 # return set(), which is *not* correct, we still need to visit 'dir'!
950 return m1_set
960 return m1_set
951
961
952 def isexact(self):
962 def isexact(self):
953 return self._m1.isexact()
963 return self._m1.isexact()
954
964
955 @encoding.strmethod
965 @encoding.strmethod
956 def __repr__(self):
966 def __repr__(self):
957 return b'<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2)
967 return b'<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2)
958
968
959
969
960 def intersectmatchers(m1, m2):
970 def intersectmatchers(m1, m2):
961 """Composes two matchers by matching if both of them match.
971 """Composes two matchers by matching if both of them match.
962
972
963 The second matcher's non-matching-attributes (bad, traversedir) are ignored.
973 The second matcher's non-matching-attributes (bad, traversedir) are ignored.
964 """
974 """
965 if m1 is None or m2 is None:
975 if m1 is None or m2 is None:
966 return m1 or m2
976 return m1 or m2
967 if m1.always():
977 if m1.always():
968 m = copy.copy(m2)
978 m = copy.copy(m2)
969 # TODO: Consider encapsulating these things in a class so there's only
979 # TODO: Consider encapsulating these things in a class so there's only
970 # one thing to copy from m1.
980 # one thing to copy from m1.
971 m.bad = m1.bad
981 m.bad = m1.bad
972 m.traversedir = m1.traversedir
982 m.traversedir = m1.traversedir
973 return m
983 return m
974 if m2.always():
984 if m2.always():
975 m = copy.copy(m1)
985 m = copy.copy(m1)
976 return m
986 return m
977 return intersectionmatcher(m1, m2)
987 return intersectionmatcher(m1, m2)
978
988
979
989
980 class intersectionmatcher(basematcher):
990 class intersectionmatcher(basematcher):
981 def __init__(self, m1, m2):
991 def __init__(self, m1, m2):
982 super(intersectionmatcher, self).__init__()
992 super(intersectionmatcher, self).__init__()
983 self._m1 = m1
993 self._m1 = m1
984 self._m2 = m2
994 self._m2 = m2
985 self.bad = m1.bad
995 self.bad = m1.bad
986 self.traversedir = m1.traversedir
996 self.traversedir = m1.traversedir
987
997
988 def was_tampered_with(self) -> bool:
998 def was_tampered_with(self) -> bool:
989 return (
999 return (
990 self.was_tampered_with_nonrec()
1000 self.was_tampered_with_nonrec()
991 or self._m1.was_tampered_with()
1001 or self._m1.was_tampered_with()
992 or self._m2.was_tampered_with()
1002 or self._m2.was_tampered_with()
993 )
1003 )
994
1004
995 @propertycache
1005 @propertycache
996 def _files(self):
1006 def _files(self):
997 if self.isexact():
1007 if self.isexact():
998 m1, m2 = self._m1, self._m2
1008 m1, m2 = self._m1, self._m2
999 if not m1.isexact():
1009 if not m1.isexact():
1000 m1, m2 = m2, m1
1010 m1, m2 = m2, m1
1001 return [f for f in m1.files() if m2(f)]
1011 return [f for f in m1.files() if m2(f)]
1002 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
1012 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
1003 # the set of files, because their files() are not always files. For
1013 # the set of files, because their files() are not always files. For
1004 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
1014 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
1005 # "path:dir2", we don't want to remove "dir2" from the set.
1015 # "path:dir2", we don't want to remove "dir2" from the set.
1006 return self._m1.files() + self._m2.files()
1016 return self._m1.files() + self._m2.files()
1007
1017
1008 def matchfn(self, f):
1018 def matchfn(self, f):
1009 return self._m1(f) and self._m2(f)
1019 return self._m1(f) and self._m2(f)
1010
1020
1011 def visitdir(self, dir):
1021 def visitdir(self, dir):
1012 visit1 = self._m1.visitdir(dir)
1022 visit1 = self._m1.visitdir(dir)
1013 if visit1 == b'all':
1023 if visit1 == b'all':
1014 return self._m2.visitdir(dir)
1024 return self._m2.visitdir(dir)
1015 # bool() because visit1=True + visit2='all' should not be 'all'
1025 # bool() because visit1=True + visit2='all' should not be 'all'
1016 return bool(visit1 and self._m2.visitdir(dir))
1026 return bool(visit1 and self._m2.visitdir(dir))
1017
1027
1018 def visitchildrenset(self, dir):
1028 def visitchildrenset(self, dir):
1019 m1_set = self._m1.visitchildrenset(dir)
1029 m1_set = self._m1.visitchildrenset(dir)
1020 if not m1_set:
1030 if not m1_set:
1021 return set()
1031 return set()
1022 m2_set = self._m2.visitchildrenset(dir)
1032 m2_set = self._m2.visitchildrenset(dir)
1023 if not m2_set:
1033 if not m2_set:
1024 return set()
1034 return set()
1025
1035
1026 if m1_set == b'all':
1036 if m1_set == b'all':
1027 return m2_set
1037 return m2_set
1028 elif m2_set == b'all':
1038 elif m2_set == b'all':
1029 return m1_set
1039 return m1_set
1030
1040
1031 if m1_set == b'this' or m2_set == b'this':
1041 if m1_set == b'this' or m2_set == b'this':
1032 return b'this'
1042 return b'this'
1033
1043
1034 assert isinstance(m1_set, set) and isinstance(m2_set, set)
1044 assert isinstance(m1_set, set) and isinstance(m2_set, set)
1035 return m1_set.intersection(m2_set)
1045 return m1_set.intersection(m2_set)
1036
1046
1037 def always(self):
1047 def always(self):
1038 return self._m1.always() and self._m2.always()
1048 return self._m1.always() and self._m2.always()
1039
1049
1040 def isexact(self):
1050 def isexact(self):
1041 return self._m1.isexact() or self._m2.isexact()
1051 return self._m1.isexact() or self._m2.isexact()
1042
1052
1043 @encoding.strmethod
1053 @encoding.strmethod
1044 def __repr__(self):
1054 def __repr__(self):
1045 return b'<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2)
1055 return b'<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2)
1046
1056
1047
1057
1048 class subdirmatcher(basematcher):
1058 class subdirmatcher(basematcher):
1049 """Adapt a matcher to work on a subdirectory only.
1059 """Adapt a matcher to work on a subdirectory only.
1050
1060
1051 The paths are remapped to remove/insert the path as needed:
1061 The paths are remapped to remove/insert the path as needed:
1052
1062
1053 >>> from . import pycompat
1063 >>> from . import pycompat
1054 >>> m1 = match(util.localpath(b'/root'), b'', [b'a.txt', b'sub/b.txt'], auditor=lambda name: None)
1064 >>> m1 = match(util.localpath(b'/root'), b'', [b'a.txt', b'sub/b.txt'], auditor=lambda name: None)
1055 >>> m2 = subdirmatcher(b'sub', m1)
1065 >>> m2 = subdirmatcher(b'sub', m1)
1056 >>> m2(b'a.txt')
1066 >>> m2(b'a.txt')
1057 False
1067 False
1058 >>> m2(b'b.txt')
1068 >>> m2(b'b.txt')
1059 True
1069 True
1060 >>> m2.matchfn(b'a.txt')
1070 >>> m2.matchfn(b'a.txt')
1061 False
1071 False
1062 >>> m2.matchfn(b'b.txt')
1072 >>> m2.matchfn(b'b.txt')
1063 True
1073 True
1064 >>> m2.files()
1074 >>> m2.files()
1065 ['b.txt']
1075 ['b.txt']
1066 >>> m2.exact(b'b.txt')
1076 >>> m2.exact(b'b.txt')
1067 True
1077 True
1068 >>> def bad(f, msg):
1078 >>> def bad(f, msg):
1069 ... print(pycompat.sysstr(b"%s: %s" % (f, msg)))
1079 ... print(pycompat.sysstr(b"%s: %s" % (f, msg)))
1070 >>> m1.bad = bad
1080 >>> m1.bad = bad
1071 >>> m2.bad(b'x.txt', b'No such file')
1081 >>> m2.bad(b'x.txt', b'No such file')
1072 sub/x.txt: No such file
1082 sub/x.txt: No such file
1073 """
1083 """
1074
1084
1075 def __init__(self, path: bytes, matcher: basematcher) -> None:
1085 def __init__(self, path: bytes, matcher: basematcher) -> None:
1076 super(subdirmatcher, self).__init__()
1086 super(subdirmatcher, self).__init__()
1077 self._path = path
1087 self._path = path
1078 self._matcher = matcher
1088 self._matcher = matcher
1079 self._always = matcher.always()
1089 self._always = matcher.always()
1080
1090
1081 self._files = [
1091 self._files = [
1082 f[len(path) + 1 :]
1092 f[len(path) + 1 :]
1083 for f in matcher._files
1093 for f in matcher._files
1084 if f.startswith(path + b"/")
1094 if f.startswith(path + b"/")
1085 ]
1095 ]
1086
1096
1087 # If the parent repo had a path to this subrepo and the matcher is
1097 # If the parent repo had a path to this subrepo and the matcher is
1088 # a prefix matcher, this submatcher always matches.
1098 # a prefix matcher, this submatcher always matches.
1089 if matcher.prefix():
1099 if matcher.prefix():
1090 self._always = any(f == path for f in matcher._files)
1100 self._always = any(f == path for f in matcher._files)
1091
1101
1092 def was_tampered_with(self) -> bool:
1102 def was_tampered_with(self) -> bool:
1093 return (
1103 return (
1094 self.was_tampered_with_nonrec() or self._matcher.was_tampered_with()
1104 self.was_tampered_with_nonrec() or self._matcher.was_tampered_with()
1095 )
1105 )
1096
1106
1097 def bad(self, f, msg):
1107 def bad(self, f, msg):
1098 self._matcher.bad(self._path + b"/" + f, msg)
1108 self._matcher.bad(self._path + b"/" + f, msg)
1099
1109
1100 def matchfn(self, f):
1110 def matchfn(self, f):
1101 # Some information is lost in the superclass's constructor, so we
1111 # Some information is lost in the superclass's constructor, so we
1102 # can not accurately create the matching function for the subdirectory
1112 # can not accurately create the matching function for the subdirectory
1103 # from the inputs. Instead, we override matchfn() and visitdir() to
1113 # from the inputs. Instead, we override matchfn() and visitdir() to
1104 # call the original matcher with the subdirectory path prepended.
1114 # call the original matcher with the subdirectory path prepended.
1105 return self._matcher.matchfn(self._path + b"/" + f)
1115 return self._matcher.matchfn(self._path + b"/" + f)
1106
1116
1107 def visitdir(self, dir):
1117 def visitdir(self, dir):
1108 if dir == b'':
1118 if dir == b'':
1109 dir = self._path
1119 dir = self._path
1110 else:
1120 else:
1111 dir = self._path + b"/" + dir
1121 dir = self._path + b"/" + dir
1112 return self._matcher.visitdir(dir)
1122 return self._matcher.visitdir(dir)
1113
1123
1114 def visitchildrenset(self, dir):
1124 def visitchildrenset(self, dir):
1115 if dir == b'':
1125 if dir == b'':
1116 dir = self._path
1126 dir = self._path
1117 else:
1127 else:
1118 dir = self._path + b"/" + dir
1128 dir = self._path + b"/" + dir
1119 return self._matcher.visitchildrenset(dir)
1129 return self._matcher.visitchildrenset(dir)
1120
1130
1121 def always(self):
1131 def always(self):
1122 return self._always
1132 return self._always
1123
1133
1124 def prefix(self):
1134 def prefix(self):
1125 return self._matcher.prefix() and not self._always
1135 return self._matcher.prefix() and not self._always
1126
1136
1127 @encoding.strmethod
1137 @encoding.strmethod
1128 def __repr__(self):
1138 def __repr__(self):
1129 return b'<subdirmatcher path=%r, matcher=%r>' % (
1139 return b'<subdirmatcher path=%r, matcher=%r>' % (
1130 self._path,
1140 self._path,
1131 self._matcher,
1141 self._matcher,
1132 )
1142 )
1133
1143
1134
1144
1135 class prefixdirmatcher(basematcher):
1145 class prefixdirmatcher(basematcher):
1136 """Adapt a matcher to work on a parent directory.
1146 """Adapt a matcher to work on a parent directory.
1137
1147
1138 The matcher's non-matching-attributes (bad, traversedir) are ignored.
1148 The matcher's non-matching-attributes (bad, traversedir) are ignored.
1139
1149
1140 The prefix path should usually be the relative path from the root of
1150 The prefix path should usually be the relative path from the root of
1141 this matcher to the root of the wrapped matcher.
1151 this matcher to the root of the wrapped matcher.
1142
1152
1143 >>> m1 = match(util.localpath(b'/root/d/e'), b'f', [b'../a.txt', b'b.txt'], auditor=lambda name: None)
1153 >>> m1 = match(util.localpath(b'/root/d/e'), b'f', [b'../a.txt', b'b.txt'], auditor=lambda name: None)
1144 >>> m2 = prefixdirmatcher(b'd/e', m1)
1154 >>> m2 = prefixdirmatcher(b'd/e', m1)
1145 >>> m2(b'a.txt')
1155 >>> m2(b'a.txt')
1146 False
1156 False
1147 >>> m2(b'd/e/a.txt')
1157 >>> m2(b'd/e/a.txt')
1148 True
1158 True
1149 >>> m2(b'd/e/b.txt')
1159 >>> m2(b'd/e/b.txt')
1150 False
1160 False
1151 >>> m2.files()
1161 >>> m2.files()
1152 ['d/e/a.txt', 'd/e/f/b.txt']
1162 ['d/e/a.txt', 'd/e/f/b.txt']
1153 >>> m2.exact(b'd/e/a.txt')
1163 >>> m2.exact(b'd/e/a.txt')
1154 True
1164 True
1155 >>> m2.visitdir(b'd')
1165 >>> m2.visitdir(b'd')
1156 True
1166 True
1157 >>> m2.visitdir(b'd/e')
1167 >>> m2.visitdir(b'd/e')
1158 True
1168 True
1159 >>> m2.visitdir(b'd/e/f')
1169 >>> m2.visitdir(b'd/e/f')
1160 True
1170 True
1161 >>> m2.visitdir(b'd/e/g')
1171 >>> m2.visitdir(b'd/e/g')
1162 False
1172 False
1163 >>> m2.visitdir(b'd/ef')
1173 >>> m2.visitdir(b'd/ef')
1164 False
1174 False
1165 """
1175 """
1166
1176
1167 def __init__(self, path, matcher, badfn=None):
1177 def __init__(self, path, matcher, badfn=None):
1168 super(prefixdirmatcher, self).__init__(badfn)
1178 super(prefixdirmatcher, self).__init__(badfn)
1169 if not path:
1179 if not path:
1170 raise error.ProgrammingError(b'prefix path must not be empty')
1180 raise error.ProgrammingError(b'prefix path must not be empty')
1171 self._path = path
1181 self._path = path
1172 self._pathprefix = path + b'/'
1182 self._pathprefix = path + b'/'
1173 self._matcher = matcher
1183 self._matcher = matcher
1174
1184
1175 @propertycache
1185 @propertycache
1176 def _files(self):
1186 def _files(self):
1177 return [self._pathprefix + f for f in self._matcher._files]
1187 return [self._pathprefix + f for f in self._matcher._files]
1178
1188
1179 def matchfn(self, f):
1189 def matchfn(self, f):
1180 if not f.startswith(self._pathprefix):
1190 if not f.startswith(self._pathprefix):
1181 return False
1191 return False
1182 return self._matcher.matchfn(f[len(self._pathprefix) :])
1192 return self._matcher.matchfn(f[len(self._pathprefix) :])
1183
1193
1184 @propertycache
1194 @propertycache
1185 def _pathdirs(self):
1195 def _pathdirs(self):
1186 return set(pathutil.finddirs(self._path))
1196 return set(pathutil.finddirs(self._path))
1187
1197
1188 def visitdir(self, dir):
1198 def visitdir(self, dir):
1189 if dir == self._path:
1199 if dir == self._path:
1190 return self._matcher.visitdir(b'')
1200 return self._matcher.visitdir(b'')
1191 if dir.startswith(self._pathprefix):
1201 if dir.startswith(self._pathprefix):
1192 return self._matcher.visitdir(dir[len(self._pathprefix) :])
1202 return self._matcher.visitdir(dir[len(self._pathprefix) :])
1193 return dir in self._pathdirs
1203 return dir in self._pathdirs
1194
1204
1195 def visitchildrenset(self, dir):
1205 def visitchildrenset(self, dir):
1196 if dir == self._path:
1206 if dir == self._path:
1197 return self._matcher.visitchildrenset(b'')
1207 return self._matcher.visitchildrenset(b'')
1198 if dir.startswith(self._pathprefix):
1208 if dir.startswith(self._pathprefix):
1199 return self._matcher.visitchildrenset(dir[len(self._pathprefix) :])
1209 return self._matcher.visitchildrenset(dir[len(self._pathprefix) :])
1200 if dir in self._pathdirs:
1210 if dir in self._pathdirs:
1201 return b'this'
1211 return b'this'
1202 return set()
1212 return set()
1203
1213
1204 def isexact(self):
1214 def isexact(self):
1205 return self._matcher.isexact()
1215 return self._matcher.isexact()
1206
1216
1207 def prefix(self):
1217 def prefix(self):
1208 return self._matcher.prefix()
1218 return self._matcher.prefix()
1209
1219
1210 @encoding.strmethod
1220 @encoding.strmethod
1211 def __repr__(self):
1221 def __repr__(self):
1212 return b'<prefixdirmatcher path=%r, matcher=%r>' % (
1222 return b'<prefixdirmatcher path=%r, matcher=%r>' % (
1213 pycompat.bytestr(self._path),
1223 pycompat.bytestr(self._path),
1214 self._matcher,
1224 self._matcher,
1215 )
1225 )
1216
1226
1217
1227
1218 class unionmatcher(basematcher):
1228 class unionmatcher(basematcher):
1219 """A matcher that is the union of several matchers.
1229 """A matcher that is the union of several matchers.
1220
1230
1221 The non-matching-attributes (bad, traversedir) are taken from the first
1231 The non-matching-attributes (bad, traversedir) are taken from the first
1222 matcher.
1232 matcher.
1223 """
1233 """
1224
1234
1225 def __init__(self, matchers):
1235 def __init__(self, matchers):
1226 m1 = matchers[0]
1236 m1 = matchers[0]
1227 super(unionmatcher, self).__init__()
1237 super(unionmatcher, self).__init__()
1228 self.traversedir = m1.traversedir
1238 self.traversedir = m1.traversedir
1229 self._matchers = matchers
1239 self._matchers = matchers
1230
1240
1231 def was_tampered_with(self) -> bool:
1241 def was_tampered_with(self) -> bool:
1232 return self.was_tampered_with_nonrec() or any(
1242 return self.was_tampered_with_nonrec() or any(
1233 map(lambda m: m.was_tampered_with(), self._matchers)
1243 map(lambda m: m.was_tampered_with(), self._matchers)
1234 )
1244 )
1235
1245
1236 def matchfn(self, f):
1246 def matchfn(self, f):
1237 for match in self._matchers:
1247 for match in self._matchers:
1238 if match(f):
1248 if match(f):
1239 return True
1249 return True
1240 return False
1250 return False
1241
1251
1242 def visitdir(self, dir):
1252 def visitdir(self, dir):
1243 r = False
1253 r = False
1244 for m in self._matchers:
1254 for m in self._matchers:
1245 v = m.visitdir(dir)
1255 v = m.visitdir(dir)
1246 if v == b'all':
1256 if v == b'all':
1247 return v
1257 return v
1248 r |= v
1258 r |= v
1249 return r
1259 return r
1250
1260
1251 def visitchildrenset(self, dir):
1261 def visitchildrenset(self, dir):
1252 r = set()
1262 r = set()
1253 this = False
1263 this = False
1254 for m in self._matchers:
1264 for m in self._matchers:
1255 v = m.visitchildrenset(dir)
1265 v = m.visitchildrenset(dir)
1256 if not v:
1266 if not v:
1257 continue
1267 continue
1258 if v == b'all':
1268 if v == b'all':
1259 return v
1269 return v
1260 if this or v == b'this':
1270 if this or v == b'this':
1261 this = True
1271 this = True
1262 # don't break, we might have an 'all' in here.
1272 # don't break, we might have an 'all' in here.
1263 continue
1273 continue
1264 assert isinstance(v, set)
1274 assert isinstance(v, set)
1265 r = r.union(v)
1275 r = r.union(v)
1266 if this:
1276 if this:
1267 return b'this'
1277 return b'this'
1268 return r
1278 return r
1269
1279
1270 @encoding.strmethod
1280 @encoding.strmethod
1271 def __repr__(self):
1281 def __repr__(self):
1272 return b'<unionmatcher matchers=%r>' % self._matchers
1282 return b'<unionmatcher matchers=%r>' % self._matchers
1273
1283
1274
1284
1275 def patkind(pattern, default=None):
1285 def patkind(pattern, default=None):
1276 r"""If pattern is 'kind:pat' with a known kind, return kind.
1286 r"""If pattern is 'kind:pat' with a known kind, return kind.
1277
1287
1278 >>> patkind(br're:.*\.c$')
1288 >>> patkind(br're:.*\.c$')
1279 're'
1289 're'
1280 >>> patkind(b'glob:*.c')
1290 >>> patkind(b'glob:*.c')
1281 'glob'
1291 'glob'
1282 >>> patkind(b'relpath:test.py')
1292 >>> patkind(b'relpath:test.py')
1283 'relpath'
1293 'relpath'
1284 >>> patkind(b'main.py')
1294 >>> patkind(b'main.py')
1285 >>> patkind(b'main.py', default=b're')
1295 >>> patkind(b'main.py', default=b're')
1286 're'
1296 're'
1287 """
1297 """
1288 return _patsplit(pattern, default)[0]
1298 return _patsplit(pattern, default)[0]
1289
1299
1290
1300
1291 def _patsplit(pattern, default):
1301 def _patsplit(pattern, default):
1292 """Split a string into the optional pattern kind prefix and the actual
1302 """Split a string into the optional pattern kind prefix and the actual
1293 pattern."""
1303 pattern."""
1294 if b':' in pattern:
1304 if b':' in pattern:
1295 kind, pat = pattern.split(b':', 1)
1305 kind, pat = pattern.split(b':', 1)
1296 if kind in allpatternkinds:
1306 if kind in allpatternkinds:
1297 return kind, pat
1307 return kind, pat
1298 return default, pattern
1308 return default, pattern
1299
1309
1300
1310
1301 def _globre(pat):
1311 def _globre(pat):
1302 r"""Convert an extended glob string to a regexp string.
1312 r"""Convert an extended glob string to a regexp string.
1303
1313
1304 >>> from . import pycompat
1314 >>> from . import pycompat
1305 >>> def bprint(s):
1315 >>> def bprint(s):
1306 ... print(pycompat.sysstr(s))
1316 ... print(pycompat.sysstr(s))
1307 >>> bprint(_globre(br'?'))
1317 >>> bprint(_globre(br'?'))
1308 .
1318 .
1309 >>> bprint(_globre(br'*'))
1319 >>> bprint(_globre(br'*'))
1310 [^/]*
1320 [^/]*
1311 >>> bprint(_globre(br'**'))
1321 >>> bprint(_globre(br'**'))
1312 .*
1322 .*
1313 >>> bprint(_globre(br'**/a'))
1323 >>> bprint(_globre(br'**/a'))
1314 (?:.*/)?a
1324 (?:.*/)?a
1315 >>> bprint(_globre(br'a/**/b'))
1325 >>> bprint(_globre(br'a/**/b'))
1316 a/(?:.*/)?b
1326 a/(?:.*/)?b
1317 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
1327 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
1318 [a*?!^][\^b][^c]
1328 [a*?!^][\^b][^c]
1319 >>> bprint(_globre(br'{a,b}'))
1329 >>> bprint(_globre(br'{a,b}'))
1320 (?:a|b)
1330 (?:a|b)
1321 >>> bprint(_globre(br'.\*\?'))
1331 >>> bprint(_globre(br'.\*\?'))
1322 \.\*\?
1332 \.\*\?
1323 """
1333 """
1324 i, n = 0, len(pat)
1334 i, n = 0, len(pat)
1325 res = b''
1335 res = b''
1326 group = 0
1336 group = 0
1327 escape = util.stringutil.regexbytesescapemap.get
1337 escape = util.stringutil.regexbytesescapemap.get
1328
1338
1329 def peek():
1339 def peek():
1330 return i < n and pat[i : i + 1]
1340 return i < n and pat[i : i + 1]
1331
1341
1332 while i < n:
1342 while i < n:
1333 c = pat[i : i + 1]
1343 c = pat[i : i + 1]
1334 i += 1
1344 i += 1
1335 if c not in b'*?[{},\\':
1345 if c not in b'*?[{},\\':
1336 res += escape(c, c)
1346 res += escape(c, c)
1337 elif c == b'*':
1347 elif c == b'*':
1338 if peek() == b'*':
1348 if peek() == b'*':
1339 i += 1
1349 i += 1
1340 if peek() == b'/':
1350 if peek() == b'/':
1341 i += 1
1351 i += 1
1342 res += b'(?:.*/)?'
1352 res += b'(?:.*/)?'
1343 else:
1353 else:
1344 res += b'.*'
1354 res += b'.*'
1345 else:
1355 else:
1346 res += b'[^/]*'
1356 res += b'[^/]*'
1347 elif c == b'?':
1357 elif c == b'?':
1348 res += b'.'
1358 res += b'.'
1349 elif c == b'[':
1359 elif c == b'[':
1350 j = i
1360 j = i
1351 if j < n and pat[j : j + 1] in b'!]':
1361 if j < n and pat[j : j + 1] in b'!]':
1352 j += 1
1362 j += 1
1353 while j < n and pat[j : j + 1] != b']':
1363 while j < n and pat[j : j + 1] != b']':
1354 j += 1
1364 j += 1
1355 if j >= n:
1365 if j >= n:
1356 res += b'\\['
1366 res += b'\\['
1357 else:
1367 else:
1358 stuff = pat[i:j].replace(b'\\', b'\\\\')
1368 stuff = pat[i:j].replace(b'\\', b'\\\\')
1359 i = j + 1
1369 i = j + 1
1360 if stuff[0:1] == b'!':
1370 if stuff[0:1] == b'!':
1361 stuff = b'^' + stuff[1:]
1371 stuff = b'^' + stuff[1:]
1362 elif stuff[0:1] == b'^':
1372 elif stuff[0:1] == b'^':
1363 stuff = b'\\' + stuff
1373 stuff = b'\\' + stuff
1364 res = b'%s[%s]' % (res, stuff)
1374 res = b'%s[%s]' % (res, stuff)
1365 elif c == b'{':
1375 elif c == b'{':
1366 group += 1
1376 group += 1
1367 res += b'(?:'
1377 res += b'(?:'
1368 elif c == b'}' and group:
1378 elif c == b'}' and group:
1369 res += b')'
1379 res += b')'
1370 group -= 1
1380 group -= 1
1371 elif c == b',' and group:
1381 elif c == b',' and group:
1372 res += b'|'
1382 res += b'|'
1373 elif c == b'\\':
1383 elif c == b'\\':
1374 p = peek()
1384 p = peek()
1375 if p:
1385 if p:
1376 i += 1
1386 i += 1
1377 res += escape(p, p)
1387 res += escape(p, p)
1378 else:
1388 else:
1379 res += escape(c, c)
1389 res += escape(c, c)
1380 else:
1390 else:
1381 res += escape(c, c)
1391 res += escape(c, c)
1382 return res
1392 return res
1383
1393
1384
1394
1385 FLAG_RE = util.re.compile(br'^\(\?([aiLmsux]+)\)(.*)')
1395 FLAG_RE = util.re.compile(br'^\(\?([aiLmsux]+)\)(.*)')
1386
1396
1387
1397
1388 def _regex(kind, pat, globsuffix):
1398 def _regex(kind, pat, globsuffix):
1389 """Convert a (normalized) pattern of any kind into a
1399 """Convert a (normalized) pattern of any kind into a
1390 regular expression.
1400 regular expression.
1391 globsuffix is appended to the regexp of globs."""
1401 globsuffix is appended to the regexp of globs."""
1392 if not pat and kind in (b'glob', b'relpath'):
1402 if not pat and kind in (b'glob', b'relpath'):
1393 return b''
1403 return b''
1394 if kind == b're':
1404 if kind == b're':
1395 return pat
1405 return pat
1396 if kind == b'filepath':
1406 if kind == b'filepath':
1397 raise error.ProgrammingError(
1407 raise error.ProgrammingError(
1398 "'filepath:' patterns should not be converted to a regex"
1408 "'filepath:' patterns should not be converted to a regex"
1399 )
1409 )
1400 if kind in (b'path', b'relpath'):
1410 if kind in (b'path', b'relpath'):
1401 if pat == b'.':
1411 if pat == b'.':
1402 return b''
1412 return b''
1403 return util.stringutil.reescape(pat) + b'(?:/|$)'
1413 return util.stringutil.reescape(pat) + b'(?:/|$)'
1404 if kind == b'rootfilesin':
1414 if kind == b'rootfilesin':
1405 if pat == b'.':
1415 if pat == b'.':
1406 escaped = b''
1416 escaped = b''
1407 else:
1417 else:
1408 # Pattern is a directory name.
1418 # Pattern is a directory name.
1409 escaped = util.stringutil.reescape(pat) + b'/'
1419 escaped = util.stringutil.reescape(pat) + b'/'
1410 # Anything after the pattern must be a non-directory.
1420 # Anything after the pattern must be a non-directory.
1411 return escaped + b'[^/]+$'
1421 return escaped + b'[^/]+$'
1412 if kind == b'relglob':
1422 if kind == b'relglob':
1413 globre = _globre(pat)
1423 globre = _globre(pat)
1414 if globre.startswith(b'[^/]*'):
1424 if globre.startswith(b'[^/]*'):
1415 # When pat has the form *XYZ (common), make the returned regex more
1425 # When pat has the form *XYZ (common), make the returned regex more
1416 # legible by returning the regex for **XYZ instead of **/*XYZ.
1426 # legible by returning the regex for **XYZ instead of **/*XYZ.
1417 return b'.*' + globre[len(b'[^/]*') :] + globsuffix
1427 return b'.*' + globre[len(b'[^/]*') :] + globsuffix
1418 return b'(?:|.*/)' + globre + globsuffix
1428 return b'(?:|.*/)' + globre + globsuffix
1419 if kind == b'relre':
1429 if kind == b'relre':
1420 flag = None
1430 flag = None
1421 m = FLAG_RE.match(pat)
1431 m = FLAG_RE.match(pat)
1422 if m:
1432 if m:
1423 flag, pat = m.groups()
1433 flag, pat = m.groups()
1424 if not pat.startswith(b'^'):
1434 if not pat.startswith(b'^'):
1425 pat = b'.*' + pat
1435 pat = b'.*' + pat
1426 if flag is not None:
1436 if flag is not None:
1427 pat = br'(?%s:%s)' % (flag, pat)
1437 pat = br'(?%s:%s)' % (flag, pat)
1428 return pat
1438 return pat
1429 if kind in (b'glob', b'rootglob'):
1439 if kind in (b'glob', b'rootglob'):
1430 return _globre(pat) + globsuffix
1440 return _globre(pat) + globsuffix
1431 raise error.ProgrammingError(b'not a regex pattern: %s:%s' % (kind, pat))
1441 raise error.ProgrammingError(b'not a regex pattern: %s:%s' % (kind, pat))
1432
1442
1433
1443
1434 def _buildmatch(kindpats, globsuffix, root):
1444 def _buildmatch(kindpats, globsuffix, root):
1435 """Return regexp string and a matcher function for kindpats.
1445 """Return regexp string and a matcher function for kindpats.
1436 globsuffix is appended to the regexp of globs."""
1446 globsuffix is appended to the regexp of globs."""
1437 matchfuncs = []
1447 matchfuncs = []
1438
1448
1439 subincludes, kindpats = _expandsubinclude(kindpats, root)
1449 subincludes, kindpats = _expandsubinclude(kindpats, root)
1440 if subincludes:
1450 if subincludes:
1441 submatchers = {}
1451 submatchers = {}
1442
1452
1443 def matchsubinclude(f):
1453 def matchsubinclude(f):
1444 for prefix, matcherargs in subincludes:
1454 for prefix, matcherargs in subincludes:
1445 if f.startswith(prefix):
1455 if f.startswith(prefix):
1446 mf = submatchers.get(prefix)
1456 mf = submatchers.get(prefix)
1447 if mf is None:
1457 if mf is None:
1448 mf = match(*matcherargs)
1458 mf = match(*matcherargs)
1449 submatchers[prefix] = mf
1459 submatchers[prefix] = mf
1450
1460
1451 if mf(f[len(prefix) :]):
1461 if mf(f[len(prefix) :]):
1452 return True
1462 return True
1453 return False
1463 return False
1454
1464
1455 matchfuncs.append(matchsubinclude)
1465 matchfuncs.append(matchsubinclude)
1456
1466
1457 regex = b''
1467 regex = b''
1458 if kindpats:
1468 if kindpats:
1459 if all(k == b'rootfilesin' for k, p, s in kindpats):
1469 if all(k == b'rootfilesin' for k, p, s in kindpats):
1460 dirs = {p for k, p, s in kindpats}
1470 dirs = {p for k, p, s in kindpats}
1461
1471
1462 def mf(f):
1472 def mf(f):
1463 i = f.rfind(b'/')
1473 i = f.rfind(b'/')
1464 if i >= 0:
1474 if i >= 0:
1465 dir = f[:i]
1475 dir = f[:i]
1466 else:
1476 else:
1467 dir = b'.'
1477 dir = b'.'
1468 return dir in dirs
1478 return dir in dirs
1469
1479
1470 regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))
1480 regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))
1471 matchfuncs.append(mf)
1481 matchfuncs.append(mf)
1472 else:
1482 else:
1473 regex, mf = _buildregexmatch(kindpats, globsuffix)
1483 regex, mf = _buildregexmatch(kindpats, globsuffix)
1474 matchfuncs.append(mf)
1484 matchfuncs.append(mf)
1475
1485
1476 if len(matchfuncs) == 1:
1486 if len(matchfuncs) == 1:
1477 return regex, matchfuncs[0]
1487 return regex, matchfuncs[0]
1478 else:
1488 else:
1479 return regex, lambda f: any(mf(f) for mf in matchfuncs)
1489 return regex, lambda f: any(mf(f) for mf in matchfuncs)
1480
1490
1481
1491
1482 MAX_RE_SIZE = 20000
1492 MAX_RE_SIZE = 20000
1483
1493
1484
1494
1485 def _joinregexes(regexps):
1495 def _joinregexes(regexps):
1486 """gather multiple regular expressions into a single one"""
1496 """gather multiple regular expressions into a single one"""
1487 return b'|'.join(regexps)
1497 return b'|'.join(regexps)
1488
1498
1489
1499
1490 def _buildregexmatch(kindpats, globsuffix):
1500 def _buildregexmatch(kindpats, globsuffix):
1491 """Build a match function from a list of kinds and kindpats,
1501 """Build a match function from a list of kinds and kindpats,
1492 return regexp string and a matcher function.
1502 return regexp string and a matcher function.
1493
1503
1494 Test too large input
1504 Test too large input
1495 >>> _buildregexmatch([
1505 >>> _buildregexmatch([
1496 ... (b'relglob', b'?' * MAX_RE_SIZE, b'')
1506 ... (b'relglob', b'?' * MAX_RE_SIZE, b'')
1497 ... ], b'$')
1507 ... ], b'$')
1498 Traceback (most recent call last):
1508 Traceback (most recent call last):
1499 ...
1509 ...
1500 Abort: matcher pattern is too long (20009 bytes)
1510 Abort: matcher pattern is too long (20009 bytes)
1501 """
1511 """
1502 try:
1512 try:
1503 allgroups = []
1513 allgroups = []
1504 regexps = []
1514 regexps = []
1505 exact = set()
1515 exact = set()
1506 for kind, pattern, _source in kindpats:
1516 for kind, pattern, _source in kindpats:
1507 if kind == b'filepath':
1517 if kind == b'filepath':
1508 exact.add(pattern)
1518 exact.add(pattern)
1509 continue
1519 continue
1510 regexps.append(_regex(kind, pattern, globsuffix))
1520 regexps.append(_regex(kind, pattern, globsuffix))
1511
1521
1512 fullregexp = _joinregexes(regexps)
1522 fullregexp = _joinregexes(regexps)
1513
1523
1514 startidx = 0
1524 startidx = 0
1515 groupsize = 0
1525 groupsize = 0
1516 for idx, r in enumerate(regexps):
1526 for idx, r in enumerate(regexps):
1517 piecesize = len(r)
1527 piecesize = len(r)
1518 if piecesize > MAX_RE_SIZE:
1528 if piecesize > MAX_RE_SIZE:
1519 msg = _(b"matcher pattern is too long (%d bytes)") % piecesize
1529 msg = _(b"matcher pattern is too long (%d bytes)") % piecesize
1520 raise error.Abort(msg)
1530 raise error.Abort(msg)
1521 elif (groupsize + piecesize) > MAX_RE_SIZE:
1531 elif (groupsize + piecesize) > MAX_RE_SIZE:
1522 group = regexps[startidx:idx]
1532 group = regexps[startidx:idx]
1523 allgroups.append(_joinregexes(group))
1533 allgroups.append(_joinregexes(group))
1524 startidx = idx
1534 startidx = idx
1525 groupsize = 0
1535 groupsize = 0
1526 groupsize += piecesize + 1
1536 groupsize += piecesize + 1
1527
1537
1528 if startidx == 0:
1538 if startidx == 0:
1529 matcher = _rematcher(fullregexp)
1539 matcher = _rematcher(fullregexp)
1530 func = lambda s: bool(matcher(s))
1540 func = lambda s: bool(matcher(s))
1531 else:
1541 else:
1532 group = regexps[startidx:]
1542 group = regexps[startidx:]
1533 allgroups.append(_joinregexes(group))
1543 allgroups.append(_joinregexes(group))
1534 allmatchers = [_rematcher(g) for g in allgroups]
1544 allmatchers = [_rematcher(g) for g in allgroups]
1535 func = lambda s: any(m(s) for m in allmatchers)
1545 func = lambda s: any(m(s) for m in allmatchers)
1536
1546
1537 actualfunc = func
1547 actualfunc = func
1538 if exact:
1548 if exact:
1539 # An empty regex will always match, so only call the regex if
1549 # An empty regex will always match, so only call the regex if
1540 # there were any actual patterns to match.
1550 # there were any actual patterns to match.
1541 if not regexps:
1551 if not regexps:
1542 actualfunc = lambda s: s in exact
1552 actualfunc = lambda s: s in exact
1543 else:
1553 else:
1544 actualfunc = lambda s: s in exact or func(s)
1554 actualfunc = lambda s: s in exact or func(s)
1545 return fullregexp, actualfunc
1555 return fullregexp, actualfunc
1546 except re.error:
1556 except re.error:
1547 for k, p, s in kindpats:
1557 for k, p, s in kindpats:
1548 if k == b'filepath':
1558 if k == b'filepath':
1549 continue
1559 continue
1550 try:
1560 try:
1551 _rematcher(_regex(k, p, globsuffix))
1561 _rematcher(_regex(k, p, globsuffix))
1552 except re.error:
1562 except re.error:
1553 if s:
1563 if s:
1554 raise error.Abort(
1564 raise error.Abort(
1555 _(b"%s: invalid pattern (%s): %s") % (s, k, p)
1565 _(b"%s: invalid pattern (%s): %s") % (s, k, p)
1556 )
1566 )
1557 else:
1567 else:
1558 raise error.Abort(_(b"invalid pattern (%s): %s") % (k, p))
1568 raise error.Abort(_(b"invalid pattern (%s): %s") % (k, p))
1559 raise error.Abort(_(b"invalid pattern"))
1569 raise error.Abort(_(b"invalid pattern"))
1560
1570
1561
1571
1562 def _patternrootsanddirs(kindpats):
1572 def _patternrootsanddirs(kindpats):
1563 """Returns roots and directories corresponding to each pattern.
1573 """Returns roots and directories corresponding to each pattern.
1564
1574
1565 This calculates the roots and directories exactly matching the patterns and
1575 This calculates the roots and directories exactly matching the patterns and
1566 returns a tuple of (roots, dirs) for each. It does not return other
1576 returns a tuple of (roots, dirs) for each. It does not return other
1567 directories which may also need to be considered, like the parent
1577 directories which may also need to be considered, like the parent
1568 directories.
1578 directories.
1569 """
1579 """
1570 r = []
1580 r = []
1571 d = []
1581 d = []
1572 for kind, pat, source in kindpats:
1582 for kind, pat, source in kindpats:
1573 if kind in (b'glob', b'rootglob'): # find the non-glob prefix
1583 if kind in (b'glob', b'rootglob'): # find the non-glob prefix
1574 root = []
1584 root = []
1575 for p in pat.split(b'/'):
1585 for p in pat.split(b'/'):
1576 if b'[' in p or b'{' in p or b'*' in p or b'?' in p:
1586 if b'[' in p or b'{' in p or b'*' in p or b'?' in p:
1577 break
1587 break
1578 root.append(p)
1588 root.append(p)
1579 r.append(b'/'.join(root))
1589 r.append(b'/'.join(root))
1580 elif kind in (b'relpath', b'path', b'filepath'):
1590 elif kind in (b'relpath', b'path', b'filepath'):
1581 if pat == b'.':
1591 if pat == b'.':
1582 pat = b''
1592 pat = b''
1583 r.append(pat)
1593 r.append(pat)
1584 elif kind in (b'rootfilesin',):
1594 elif kind in (b'rootfilesin',):
1585 if pat == b'.':
1595 if pat == b'.':
1586 pat = b''
1596 pat = b''
1587 d.append(pat)
1597 d.append(pat)
1588 else: # relglob, re, relre
1598 else: # relglob, re, relre
1589 r.append(b'')
1599 r.append(b'')
1590 return r, d
1600 return r, d
1591
1601
1592
1602
1593 def _roots(kindpats):
1603 def _roots(kindpats):
1594 '''Returns root directories to match recursively from the given patterns.'''
1604 '''Returns root directories to match recursively from the given patterns.'''
1595 roots, dirs = _patternrootsanddirs(kindpats)
1605 roots, dirs = _patternrootsanddirs(kindpats)
1596 return roots
1606 return roots
1597
1607
1598
1608
1599 def _rootsdirsandparents(kindpats):
1609 def _rootsdirsandparents(kindpats):
1600 """Returns roots and exact directories from patterns.
1610 """Returns roots and exact directories from patterns.
1601
1611
1602 `roots` are directories to match recursively, `dirs` should
1612 `roots` are directories to match recursively, `dirs` should
1603 be matched non-recursively, and `parents` are the implicitly required
1613 be matched non-recursively, and `parents` are the implicitly required
1604 directories to walk to items in either roots or dirs.
1614 directories to walk to items in either roots or dirs.
1605
1615
1606 Returns a tuple of (roots, dirs, parents).
1616 Returns a tuple of (roots, dirs, parents).
1607
1617
1608 >>> r = _rootsdirsandparents(
1618 >>> r = _rootsdirsandparents(
1609 ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
1619 ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
1610 ... (b'glob', b'g*', b'')])
1620 ... (b'glob', b'g*', b'')])
1611 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1621 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1612 (['g/h', 'g/h', ''], []) ['', 'g']
1622 (['g/h', 'g/h', ''], []) ['', 'g']
1613 >>> r = _rootsdirsandparents(
1623 >>> r = _rootsdirsandparents(
1614 ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
1624 ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
1615 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1625 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1616 ([], ['g/h', '']) ['', 'g']
1626 ([], ['g/h', '']) ['', 'g']
1617 >>> r = _rootsdirsandparents(
1627 >>> r = _rootsdirsandparents(
1618 ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
1628 ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
1619 ... (b'path', b'', b'')])
1629 ... (b'path', b'', b'')])
1620 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1630 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1621 (['r', 'p/p', ''], []) ['', 'p']
1631 (['r', 'p/p', ''], []) ['', 'p']
1622 >>> r = _rootsdirsandparents(
1632 >>> r = _rootsdirsandparents(
1623 ... [(b'relglob', b'rg*', b''), (b're', b're/', b''),
1633 ... [(b'relglob', b'rg*', b''), (b're', b're/', b''),
1624 ... (b'relre', b'rr', b'')])
1634 ... (b'relre', b'rr', b'')])
1625 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1635 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1626 (['', '', ''], []) ['']
1636 (['', '', ''], []) ['']
1627 """
1637 """
1628 r, d = _patternrootsanddirs(kindpats)
1638 r, d = _patternrootsanddirs(kindpats)
1629
1639
1630 p = set()
1640 p = set()
1631 # Add the parents as non-recursive/exact directories, since they must be
1641 # Add the parents as non-recursive/exact directories, since they must be
1632 # scanned to get to either the roots or the other exact directories.
1642 # scanned to get to either the roots or the other exact directories.
1633 p.update(pathutil.dirs(d))
1643 p.update(pathutil.dirs(d))
1634 p.update(pathutil.dirs(r))
1644 p.update(pathutil.dirs(r))
1635
1645
1636 # FIXME: all uses of this function convert these to sets, do so before
1646 # FIXME: all uses of this function convert these to sets, do so before
1637 # returning.
1647 # returning.
1638 # FIXME: all uses of this function do not need anything in 'roots' and
1648 # FIXME: all uses of this function do not need anything in 'roots' and
1639 # 'dirs' to also be in 'parents', consider removing them before returning.
1649 # 'dirs' to also be in 'parents', consider removing them before returning.
1640 return r, d, p
1650 return r, d, p
1641
1651
1642
1652
1643 def _explicitfiles(kindpats):
1653 def _explicitfiles(kindpats):
1644 """Returns the potential explicit filenames from the patterns.
1654 """Returns the potential explicit filenames from the patterns.
1645
1655
1646 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
1656 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
1647 ['foo/bar']
1657 ['foo/bar']
1648 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
1658 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
1649 []
1659 []
1650 """
1660 """
1651 # Keep only the pattern kinds where one can specify filenames (vs only
1661 # Keep only the pattern kinds where one can specify filenames (vs only
1652 # directory names).
1662 # directory names).
1653 filable = [kp for kp in kindpats if kp[0] not in (b'rootfilesin',)]
1663 filable = [kp for kp in kindpats if kp[0] not in (b'rootfilesin',)]
1654 return _roots(filable)
1664 return _roots(filable)
1655
1665
1656
1666
1657 def _prefix(kindpats):
1667 def _prefix(kindpats):
1658 '''Whether all the patterns match a prefix (i.e. recursively)'''
1668 '''Whether all the patterns match a prefix (i.e. recursively)'''
1659 for kind, pat, source in kindpats:
1669 for kind, pat, source in kindpats:
1660 if kind not in (b'path', b'relpath'):
1670 if kind not in (b'path', b'relpath'):
1661 return False
1671 return False
1662 return True
1672 return True
1663
1673
1664
1674
1665 _commentre = None
1675 _commentre = None
1666
1676
1677 if typing.TYPE_CHECKING:
1678 from typing_extensions import (
1679 Literal,
1680 )
1681
1682 @overload
1683 def readpatternfile(
1684 filepath: bytes, warn: Callable[[bytes], Any], sourceinfo: Literal[True]
1685 ) -> List[Tuple[bytes, int, bytes]]:
1686 ...
1687
1688 @overload
1689 def readpatternfile(
1690 filepath: bytes,
1691 warn: Callable[[bytes], Any],
1692 sourceinfo: Literal[False],
1693 ) -> List[bytes]:
1694 ...
1695
1696 @overload
1697 def readpatternfile(
1698 filepath: bytes,
1699 warn: Callable[[bytes], Any],
1700 sourceinfo: bool = False,
1701 ) -> List[Union[Tuple[bytes, int, bytes], bytes]]:
1702 ...
1703
1667
1704
1668 def readpatternfile(filepath, warn, sourceinfo=False):
1705 def readpatternfile(filepath, warn, sourceinfo=False):
1669 """parse a pattern file, returning a list of
1706 """parse a pattern file, returning a list of
1670 patterns. These patterns should be given to compile()
1707 patterns. These patterns should be given to compile()
1671 to be validated and converted into a match function.
1708 to be validated and converted into a match function.
1672
1709
1673 trailing white space is dropped.
1710 trailing white space is dropped.
1674 the escape character is backslash.
1711 the escape character is backslash.
1675 comments start with #.
1712 comments start with #.
1676 empty lines are skipped.
1713 empty lines are skipped.
1677
1714
1678 lines can be of the following formats:
1715 lines can be of the following formats:
1679
1716
1680 syntax: regexp # defaults following lines to non-rooted regexps
1717 syntax: regexp # defaults following lines to non-rooted regexps
1681 syntax: glob # defaults following lines to non-rooted globs
1718 syntax: glob # defaults following lines to non-rooted globs
1682 re:pattern # non-rooted regular expression
1719 re:pattern # non-rooted regular expression
1683 glob:pattern # non-rooted glob
1720 glob:pattern # non-rooted glob
1684 rootglob:pat # rooted glob (same root as ^ in regexps)
1721 rootglob:pat # rooted glob (same root as ^ in regexps)
1685 pattern # pattern of the current default type
1722 pattern # pattern of the current default type
1686
1723
1687 if sourceinfo is set, returns a list of tuples:
1724 if sourceinfo is set, returns a list of tuples:
1688 (pattern, lineno, originalline).
1725 (pattern, lineno, originalline).
1689 This is useful to debug ignore patterns.
1726 This is useful to debug ignore patterns.
1690 """
1727 """
1691
1728
1692 syntaxes = {
1729 syntaxes = {
1693 b're': b'relre:',
1730 b're': b'relre:',
1694 b'regexp': b'relre:',
1731 b'regexp': b'relre:',
1695 b'glob': b'relglob:',
1732 b'glob': b'relglob:',
1696 b'rootglob': b'rootglob:',
1733 b'rootglob': b'rootglob:',
1697 b'include': b'include',
1734 b'include': b'include',
1698 b'subinclude': b'subinclude',
1735 b'subinclude': b'subinclude',
1699 }
1736 }
1700 syntax = b'relre:'
1737 syntax = b'relre:'
1701 patterns = []
1738 patterns = []
1702
1739
1703 fp = open(filepath, b'rb')
1740 fp = open(filepath, b'rb')
1704 for lineno, line in enumerate(fp, start=1):
1741 for lineno, line in enumerate(fp, start=1):
1705 if b"#" in line:
1742 if b"#" in line:
1706 global _commentre
1743 global _commentre
1707 if not _commentre:
1744 if not _commentre:
1708 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
1745 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
1709 # remove comments prefixed by an even number of escapes
1746 # remove comments prefixed by an even number of escapes
1710 m = _commentre.search(line)
1747 m = _commentre.search(line)
1711 if m:
1748 if m:
1712 line = line[: m.end(1)]
1749 line = line[: m.end(1)]
1713 # fixup properly escaped comments that survived the above
1750 # fixup properly escaped comments that survived the above
1714 line = line.replace(b"\\#", b"#")
1751 line = line.replace(b"\\#", b"#")
1715 line = line.rstrip()
1752 line = line.rstrip()
1716 if not line:
1753 if not line:
1717 continue
1754 continue
1718
1755
1719 if line.startswith(b'syntax:'):
1756 if line.startswith(b'syntax:'):
1720 s = line[7:].strip()
1757 s = line[7:].strip()
1721 try:
1758 try:
1722 syntax = syntaxes[s]
1759 syntax = syntaxes[s]
1723 except KeyError:
1760 except KeyError:
1724 if warn:
1761 if warn:
1725 warn(
1762 warn(
1726 _(b"%s: ignoring invalid syntax '%s'\n") % (filepath, s)
1763 _(b"%s: ignoring invalid syntax '%s'\n") % (filepath, s)
1727 )
1764 )
1728 continue
1765 continue
1729
1766
1730 linesyntax = syntax
1767 linesyntax = syntax
1731 for s, rels in syntaxes.items():
1768 for s, rels in syntaxes.items():
1732 if line.startswith(rels):
1769 if line.startswith(rels):
1733 linesyntax = rels
1770 linesyntax = rels
1734 line = line[len(rels) :]
1771 line = line[len(rels) :]
1735 break
1772 break
1736 elif line.startswith(s + b':'):
1773 elif line.startswith(s + b':'):
1737 linesyntax = rels
1774 linesyntax = rels
1738 line = line[len(s) + 1 :]
1775 line = line[len(s) + 1 :]
1739 break
1776 break
1740 if sourceinfo:
1777 if sourceinfo:
1741 patterns.append((linesyntax + line, lineno, line))
1778 patterns.append((linesyntax + line, lineno, line))
1742 else:
1779 else:
1743 patterns.append(linesyntax + line)
1780 patterns.append(linesyntax + line)
1744 fp.close()
1781 fp.close()
1745 return patterns
1782 return patterns
General Comments 0
You need to be logged in to leave comments. Login now