##// END OF EJS Templates
match: match explicit file using a set...
marmoute -
r51286:81c7d04f stable
parent child Browse files
Show More
@@ -1,1665 +1,1670 b''
1 # match.py - filename matching
1 # match.py - filename matching
2 #
2 #
3 # Copyright 2008, 2009 Olivia Mackall <olivia@selenic.com> and others
3 # Copyright 2008, 2009 Olivia Mackall <olivia@selenic.com> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8
8
9 import bisect
9 import bisect
10 import copy
10 import copy
11 import itertools
11 import itertools
12 import os
12 import os
13 import re
13 import re
14
14
15 from .i18n import _
15 from .i18n import _
16 from .pycompat import open
16 from .pycompat import open
17 from . import (
17 from . import (
18 encoding,
18 encoding,
19 error,
19 error,
20 pathutil,
20 pathutil,
21 policy,
21 policy,
22 pycompat,
22 pycompat,
23 util,
23 util,
24 )
24 )
25 from .utils import stringutil
25 from .utils import stringutil
26
26
27 rustmod = policy.importrust('dirstate')
27 rustmod = policy.importrust('dirstate')
28
28
29 allpatternkinds = (
29 allpatternkinds = (
30 b're',
30 b're',
31 b'glob',
31 b'glob',
32 b'path',
32 b'path',
33 b'relglob',
33 b'relglob',
34 b'relpath',
34 b'relpath',
35 b'relre',
35 b'relre',
36 b'rootglob',
36 b'rootglob',
37 b'listfile',
37 b'listfile',
38 b'listfile0',
38 b'listfile0',
39 b'set',
39 b'set',
40 b'include',
40 b'include',
41 b'subinclude',
41 b'subinclude',
42 b'rootfilesin',
42 b'rootfilesin',
43 )
43 )
44 cwdrelativepatternkinds = (b'relpath', b'glob')
44 cwdrelativepatternkinds = (b'relpath', b'glob')
45
45
46 propertycache = util.propertycache
46 propertycache = util.propertycache
47
47
48
48
49 def _rematcher(regex):
49 def _rematcher(regex):
50 """compile the regexp with the best available regexp engine and return a
50 """compile the regexp with the best available regexp engine and return a
51 matcher function"""
51 matcher function"""
52 m = util.re.compile(regex)
52 m = util.re.compile(regex)
53 try:
53 try:
54 # slightly faster, provided by facebook's re2 bindings
54 # slightly faster, provided by facebook's re2 bindings
55 return m.test_match
55 return m.test_match
56 except AttributeError:
56 except AttributeError:
57 return m.match
57 return m.match
58
58
59
59
60 def _expandsets(cwd, kindpats, ctx=None, listsubrepos=False, badfn=None):
60 def _expandsets(cwd, kindpats, ctx=None, listsubrepos=False, badfn=None):
61 '''Returns the kindpats list with the 'set' patterns expanded to matchers'''
61 '''Returns the kindpats list with the 'set' patterns expanded to matchers'''
62 matchers = []
62 matchers = []
63 other = []
63 other = []
64
64
65 for kind, pat, source in kindpats:
65 for kind, pat, source in kindpats:
66 if kind == b'set':
66 if kind == b'set':
67 if ctx is None:
67 if ctx is None:
68 raise error.ProgrammingError(
68 raise error.ProgrammingError(
69 b"fileset expression with no context"
69 b"fileset expression with no context"
70 )
70 )
71 matchers.append(ctx.matchfileset(cwd, pat, badfn=badfn))
71 matchers.append(ctx.matchfileset(cwd, pat, badfn=badfn))
72
72
73 if listsubrepos:
73 if listsubrepos:
74 for subpath in ctx.substate:
74 for subpath in ctx.substate:
75 sm = ctx.sub(subpath).matchfileset(cwd, pat, badfn=badfn)
75 sm = ctx.sub(subpath).matchfileset(cwd, pat, badfn=badfn)
76 pm = prefixdirmatcher(subpath, sm, badfn=badfn)
76 pm = prefixdirmatcher(subpath, sm, badfn=badfn)
77 matchers.append(pm)
77 matchers.append(pm)
78
78
79 continue
79 continue
80 other.append((kind, pat, source))
80 other.append((kind, pat, source))
81 return matchers, other
81 return matchers, other
82
82
83
83
84 def _expandsubinclude(kindpats, root):
84 def _expandsubinclude(kindpats, root):
85 """Returns the list of subinclude matcher args and the kindpats without the
85 """Returns the list of subinclude matcher args and the kindpats without the
86 subincludes in it."""
86 subincludes in it."""
87 relmatchers = []
87 relmatchers = []
88 other = []
88 other = []
89
89
90 for kind, pat, source in kindpats:
90 for kind, pat, source in kindpats:
91 if kind == b'subinclude':
91 if kind == b'subinclude':
92 sourceroot = pathutil.dirname(util.normpath(source))
92 sourceroot = pathutil.dirname(util.normpath(source))
93 pat = util.pconvert(pat)
93 pat = util.pconvert(pat)
94 path = pathutil.join(sourceroot, pat)
94 path = pathutil.join(sourceroot, pat)
95
95
96 newroot = pathutil.dirname(path)
96 newroot = pathutil.dirname(path)
97 matcherargs = (newroot, b'', [], [b'include:%s' % path])
97 matcherargs = (newroot, b'', [], [b'include:%s' % path])
98
98
99 prefix = pathutil.canonpath(root, root, newroot)
99 prefix = pathutil.canonpath(root, root, newroot)
100 if prefix:
100 if prefix:
101 prefix += b'/'
101 prefix += b'/'
102 relmatchers.append((prefix, matcherargs))
102 relmatchers.append((prefix, matcherargs))
103 else:
103 else:
104 other.append((kind, pat, source))
104 other.append((kind, pat, source))
105
105
106 return relmatchers, other
106 return relmatchers, other
107
107
108
108
109 def _kindpatsalwaysmatch(kindpats):
109 def _kindpatsalwaysmatch(kindpats):
110 """Checks whether the kindspats match everything, as e.g.
110 """Checks whether the kindspats match everything, as e.g.
111 'relpath:.' does.
111 'relpath:.' does.
112 """
112 """
113 for kind, pat, source in kindpats:
113 for kind, pat, source in kindpats:
114 if pat != b'' or kind not in [b'relpath', b'glob']:
114 if pat != b'' or kind not in [b'relpath', b'glob']:
115 return False
115 return False
116 return True
116 return True
117
117
118
118
119 def _buildkindpatsmatcher(
119 def _buildkindpatsmatcher(
120 matchercls,
120 matchercls,
121 root,
121 root,
122 cwd,
122 cwd,
123 kindpats,
123 kindpats,
124 ctx=None,
124 ctx=None,
125 listsubrepos=False,
125 listsubrepos=False,
126 badfn=None,
126 badfn=None,
127 ):
127 ):
128 matchers = []
128 matchers = []
129 fms, kindpats = _expandsets(
129 fms, kindpats = _expandsets(
130 cwd,
130 cwd,
131 kindpats,
131 kindpats,
132 ctx=ctx,
132 ctx=ctx,
133 listsubrepos=listsubrepos,
133 listsubrepos=listsubrepos,
134 badfn=badfn,
134 badfn=badfn,
135 )
135 )
136 if kindpats:
136 if kindpats:
137 m = matchercls(root, kindpats, badfn=badfn)
137 m = matchercls(root, kindpats, badfn=badfn)
138 matchers.append(m)
138 matchers.append(m)
139 if fms:
139 if fms:
140 matchers.extend(fms)
140 matchers.extend(fms)
141 if not matchers:
141 if not matchers:
142 return nevermatcher(badfn=badfn)
142 return nevermatcher(badfn=badfn)
143 if len(matchers) == 1:
143 if len(matchers) == 1:
144 return matchers[0]
144 return matchers[0]
145 return unionmatcher(matchers)
145 return unionmatcher(matchers)
146
146
147
147
148 def match(
148 def match(
149 root,
149 root,
150 cwd,
150 cwd,
151 patterns=None,
151 patterns=None,
152 include=None,
152 include=None,
153 exclude=None,
153 exclude=None,
154 default=b'glob',
154 default=b'glob',
155 auditor=None,
155 auditor=None,
156 ctx=None,
156 ctx=None,
157 listsubrepos=False,
157 listsubrepos=False,
158 warn=None,
158 warn=None,
159 badfn=None,
159 badfn=None,
160 icasefs=False,
160 icasefs=False,
161 ):
161 ):
162 r"""build an object to match a set of file patterns
162 r"""build an object to match a set of file patterns
163
163
164 arguments:
164 arguments:
165 root - the canonical root of the tree you're matching against
165 root - the canonical root of the tree you're matching against
166 cwd - the current working directory, if relevant
166 cwd - the current working directory, if relevant
167 patterns - patterns to find
167 patterns - patterns to find
168 include - patterns to include (unless they are excluded)
168 include - patterns to include (unless they are excluded)
169 exclude - patterns to exclude (even if they are included)
169 exclude - patterns to exclude (even if they are included)
170 default - if a pattern in patterns has no explicit type, assume this one
170 default - if a pattern in patterns has no explicit type, assume this one
171 auditor - optional path auditor
171 auditor - optional path auditor
172 ctx - optional changecontext
172 ctx - optional changecontext
173 listsubrepos - if True, recurse into subrepositories
173 listsubrepos - if True, recurse into subrepositories
174 warn - optional function used for printing warnings
174 warn - optional function used for printing warnings
175 badfn - optional bad() callback for this matcher instead of the default
175 badfn - optional bad() callback for this matcher instead of the default
176 icasefs - make a matcher for wdir on case insensitive filesystems, which
176 icasefs - make a matcher for wdir on case insensitive filesystems, which
177 normalizes the given patterns to the case in the filesystem
177 normalizes the given patterns to the case in the filesystem
178
178
179 a pattern is one of:
179 a pattern is one of:
180 'glob:<glob>' - a glob relative to cwd
180 'glob:<glob>' - a glob relative to cwd
181 're:<regexp>' - a regular expression
181 're:<regexp>' - a regular expression
182 'path:<path>' - a path relative to repository root, which is matched
182 'path:<path>' - a path relative to repository root, which is matched
183 recursively
183 recursively
184 'rootfilesin:<path>' - a path relative to repository root, which is
184 'rootfilesin:<path>' - a path relative to repository root, which is
185 matched non-recursively (will not match subdirectories)
185 matched non-recursively (will not match subdirectories)
186 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
186 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
187 'relpath:<path>' - a path relative to cwd
187 'relpath:<path>' - a path relative to cwd
188 'relre:<regexp>' - a regexp that needn't match the start of a name
188 'relre:<regexp>' - a regexp that needn't match the start of a name
189 'set:<fileset>' - a fileset expression
189 'set:<fileset>' - a fileset expression
190 'include:<path>' - a file of patterns to read and include
190 'include:<path>' - a file of patterns to read and include
191 'subinclude:<path>' - a file of patterns to match against files under
191 'subinclude:<path>' - a file of patterns to match against files under
192 the same directory
192 the same directory
193 '<something>' - a pattern of the specified default type
193 '<something>' - a pattern of the specified default type
194
194
195 >>> def _match(root, *args, **kwargs):
195 >>> def _match(root, *args, **kwargs):
196 ... return match(util.localpath(root), *args, **kwargs)
196 ... return match(util.localpath(root), *args, **kwargs)
197
197
198 Usually a patternmatcher is returned:
198 Usually a patternmatcher is returned:
199 >>> _match(b'/foo', b'.', [br're:.*\.c$', b'path:foo/a', b'*.py'])
199 >>> _match(b'/foo', b'.', [br're:.*\.c$', b'path:foo/a', b'*.py'])
200 <patternmatcher patterns='[^/]*\\.py$|foo/a(?:/|$)|.*\\.c$'>
200 <patternmatcher patterns='[^/]*\\.py$|foo/a(?:/|$)|.*\\.c$'>
201
201
202 Combining 'patterns' with 'include' (resp. 'exclude') gives an
202 Combining 'patterns' with 'include' (resp. 'exclude') gives an
203 intersectionmatcher (resp. a differencematcher):
203 intersectionmatcher (resp. a differencematcher):
204 >>> type(_match(b'/foo', b'.', [br're:.*\.c$'], include=[b'path:lib']))
204 >>> type(_match(b'/foo', b'.', [br're:.*\.c$'], include=[b'path:lib']))
205 <class 'mercurial.match.intersectionmatcher'>
205 <class 'mercurial.match.intersectionmatcher'>
206 >>> type(_match(b'/foo', b'.', [br're:.*\.c$'], exclude=[b'path:build']))
206 >>> type(_match(b'/foo', b'.', [br're:.*\.c$'], exclude=[b'path:build']))
207 <class 'mercurial.match.differencematcher'>
207 <class 'mercurial.match.differencematcher'>
208
208
209 Notice that, if 'patterns' is empty, an alwaysmatcher is returned:
209 Notice that, if 'patterns' is empty, an alwaysmatcher is returned:
210 >>> _match(b'/foo', b'.', [])
210 >>> _match(b'/foo', b'.', [])
211 <alwaysmatcher>
211 <alwaysmatcher>
212
212
213 The 'default' argument determines which kind of pattern is assumed if a
213 The 'default' argument determines which kind of pattern is assumed if a
214 pattern has no prefix:
214 pattern has no prefix:
215 >>> _match(b'/foo', b'.', [br'.*\.c$'], default=b're')
215 >>> _match(b'/foo', b'.', [br'.*\.c$'], default=b're')
216 <patternmatcher patterns='.*\\.c$'>
216 <patternmatcher patterns='.*\\.c$'>
217 >>> _match(b'/foo', b'.', [b'main.py'], default=b'relpath')
217 >>> _match(b'/foo', b'.', [b'main.py'], default=b'relpath')
218 <patternmatcher patterns='main\\.py(?:/|$)'>
218 <patternmatcher patterns='main\\.py(?:/|$)'>
219 >>> _match(b'/foo', b'.', [b'main.py'], default=b're')
219 >>> _match(b'/foo', b'.', [b'main.py'], default=b're')
220 <patternmatcher patterns='main.py'>
220 <patternmatcher patterns='main.py'>
221
221
222 The primary use of matchers is to check whether a value (usually a file
222 The primary use of matchers is to check whether a value (usually a file
223 name) matches againset one of the patterns given at initialization. There
223 name) matches againset one of the patterns given at initialization. There
224 are two ways of doing this check.
224 are two ways of doing this check.
225
225
226 >>> m = _match(b'/foo', b'', [br're:.*\.c$', b'relpath:a'])
226 >>> m = _match(b'/foo', b'', [br're:.*\.c$', b'relpath:a'])
227
227
228 1. Calling the matcher with a file name returns True if any pattern
228 1. Calling the matcher with a file name returns True if any pattern
229 matches that file name:
229 matches that file name:
230 >>> m(b'a')
230 >>> m(b'a')
231 True
231 True
232 >>> m(b'main.c')
232 >>> m(b'main.c')
233 True
233 True
234 >>> m(b'test.py')
234 >>> m(b'test.py')
235 False
235 False
236
236
237 2. Using the exact() method only returns True if the file name matches one
237 2. Using the exact() method only returns True if the file name matches one
238 of the exact patterns (i.e. not re: or glob: patterns):
238 of the exact patterns (i.e. not re: or glob: patterns):
239 >>> m.exact(b'a')
239 >>> m.exact(b'a')
240 True
240 True
241 >>> m.exact(b'main.c')
241 >>> m.exact(b'main.c')
242 False
242 False
243 """
243 """
244 assert os.path.isabs(root)
244 assert os.path.isabs(root)
245 cwd = os.path.join(root, util.localpath(cwd))
245 cwd = os.path.join(root, util.localpath(cwd))
246 normalize = _donormalize
246 normalize = _donormalize
247 if icasefs:
247 if icasefs:
248 dirstate = ctx.repo().dirstate
248 dirstate = ctx.repo().dirstate
249 dsnormalize = dirstate.normalize
249 dsnormalize = dirstate.normalize
250
250
251 def normalize(patterns, default, root, cwd, auditor, warn):
251 def normalize(patterns, default, root, cwd, auditor, warn):
252 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
252 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
253 kindpats = []
253 kindpats = []
254 for kind, pats, source in kp:
254 for kind, pats, source in kp:
255 if kind not in (b're', b'relre'): # regex can't be normalized
255 if kind not in (b're', b'relre'): # regex can't be normalized
256 p = pats
256 p = pats
257 pats = dsnormalize(pats)
257 pats = dsnormalize(pats)
258
258
259 # Preserve the original to handle a case only rename.
259 # Preserve the original to handle a case only rename.
260 if p != pats and p in dirstate:
260 if p != pats and p in dirstate:
261 kindpats.append((kind, p, source))
261 kindpats.append((kind, p, source))
262
262
263 kindpats.append((kind, pats, source))
263 kindpats.append((kind, pats, source))
264 return kindpats
264 return kindpats
265
265
266 if patterns:
266 if patterns:
267 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
267 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
268 if _kindpatsalwaysmatch(kindpats):
268 if _kindpatsalwaysmatch(kindpats):
269 m = alwaysmatcher(badfn)
269 m = alwaysmatcher(badfn)
270 else:
270 else:
271 m = _buildkindpatsmatcher(
271 m = _buildkindpatsmatcher(
272 patternmatcher,
272 patternmatcher,
273 root,
273 root,
274 cwd,
274 cwd,
275 kindpats,
275 kindpats,
276 ctx=ctx,
276 ctx=ctx,
277 listsubrepos=listsubrepos,
277 listsubrepos=listsubrepos,
278 badfn=badfn,
278 badfn=badfn,
279 )
279 )
280 else:
280 else:
281 # It's a little strange that no patterns means to match everything.
281 # It's a little strange that no patterns means to match everything.
282 # Consider changing this to match nothing (probably using nevermatcher).
282 # Consider changing this to match nothing (probably using nevermatcher).
283 m = alwaysmatcher(badfn)
283 m = alwaysmatcher(badfn)
284
284
285 if include:
285 if include:
286 kindpats = normalize(include, b'glob', root, cwd, auditor, warn)
286 kindpats = normalize(include, b'glob', root, cwd, auditor, warn)
287 im = _buildkindpatsmatcher(
287 im = _buildkindpatsmatcher(
288 includematcher,
288 includematcher,
289 root,
289 root,
290 cwd,
290 cwd,
291 kindpats,
291 kindpats,
292 ctx=ctx,
292 ctx=ctx,
293 listsubrepos=listsubrepos,
293 listsubrepos=listsubrepos,
294 badfn=None,
294 badfn=None,
295 )
295 )
296 m = intersectmatchers(m, im)
296 m = intersectmatchers(m, im)
297 if exclude:
297 if exclude:
298 kindpats = normalize(exclude, b'glob', root, cwd, auditor, warn)
298 kindpats = normalize(exclude, b'glob', root, cwd, auditor, warn)
299 em = _buildkindpatsmatcher(
299 em = _buildkindpatsmatcher(
300 includematcher,
300 includematcher,
301 root,
301 root,
302 cwd,
302 cwd,
303 kindpats,
303 kindpats,
304 ctx=ctx,
304 ctx=ctx,
305 listsubrepos=listsubrepos,
305 listsubrepos=listsubrepos,
306 badfn=None,
306 badfn=None,
307 )
307 )
308 m = differencematcher(m, em)
308 m = differencematcher(m, em)
309 return m
309 return m
310
310
311
311
312 def exact(files, badfn=None):
312 def exact(files, badfn=None):
313 return exactmatcher(files, badfn=badfn)
313 return exactmatcher(files, badfn=badfn)
314
314
315
315
316 def always(badfn=None):
316 def always(badfn=None):
317 return alwaysmatcher(badfn)
317 return alwaysmatcher(badfn)
318
318
319
319
320 def never(badfn=None):
320 def never(badfn=None):
321 return nevermatcher(badfn)
321 return nevermatcher(badfn)
322
322
323
323
324 def badmatch(match, badfn):
324 def badmatch(match, badfn):
325 """Make a copy of the given matcher, replacing its bad method with the given
325 """Make a copy of the given matcher, replacing its bad method with the given
326 one.
326 one.
327 """
327 """
328 m = copy.copy(match)
328 m = copy.copy(match)
329 m.bad = badfn
329 m.bad = badfn
330 return m
330 return m
331
331
332
332
333 def _donormalize(patterns, default, root, cwd, auditor=None, warn=None):
333 def _donormalize(patterns, default, root, cwd, auditor=None, warn=None):
334 """Convert 'kind:pat' from the patterns list to tuples with kind and
334 """Convert 'kind:pat' from the patterns list to tuples with kind and
335 normalized and rooted patterns and with listfiles expanded."""
335 normalized and rooted patterns and with listfiles expanded."""
336 kindpats = []
336 kindpats = []
337 for kind, pat in [_patsplit(p, default) for p in patterns]:
337 for kind, pat in [_patsplit(p, default) for p in patterns]:
338 if kind in cwdrelativepatternkinds:
338 if kind in cwdrelativepatternkinds:
339 pat = pathutil.canonpath(root, cwd, pat, auditor=auditor)
339 pat = pathutil.canonpath(root, cwd, pat, auditor=auditor)
340 elif kind in (b'relglob', b'path', b'rootfilesin', b'rootglob'):
340 elif kind in (b'relglob', b'path', b'rootfilesin', b'rootglob'):
341 pat = util.normpath(pat)
341 pat = util.normpath(pat)
342 elif kind in (b'listfile', b'listfile0'):
342 elif kind in (b'listfile', b'listfile0'):
343 try:
343 try:
344 files = util.readfile(pat)
344 files = util.readfile(pat)
345 if kind == b'listfile0':
345 if kind == b'listfile0':
346 files = files.split(b'\0')
346 files = files.split(b'\0')
347 else:
347 else:
348 files = files.splitlines()
348 files = files.splitlines()
349 files = [f for f in files if f]
349 files = [f for f in files if f]
350 except EnvironmentError:
350 except EnvironmentError:
351 raise error.Abort(_(b"unable to read file list (%s)") % pat)
351 raise error.Abort(_(b"unable to read file list (%s)") % pat)
352 for k, p, source in _donormalize(
352 for k, p, source in _donormalize(
353 files, default, root, cwd, auditor, warn
353 files, default, root, cwd, auditor, warn
354 ):
354 ):
355 kindpats.append((k, p, pat))
355 kindpats.append((k, p, pat))
356 continue
356 continue
357 elif kind == b'include':
357 elif kind == b'include':
358 try:
358 try:
359 fullpath = os.path.join(root, util.localpath(pat))
359 fullpath = os.path.join(root, util.localpath(pat))
360 includepats = readpatternfile(fullpath, warn)
360 includepats = readpatternfile(fullpath, warn)
361 for k, p, source in _donormalize(
361 for k, p, source in _donormalize(
362 includepats, default, root, cwd, auditor, warn
362 includepats, default, root, cwd, auditor, warn
363 ):
363 ):
364 kindpats.append((k, p, source or pat))
364 kindpats.append((k, p, source or pat))
365 except error.Abort as inst:
365 except error.Abort as inst:
366 raise error.Abort(
366 raise error.Abort(
367 b'%s: %s'
367 b'%s: %s'
368 % (
368 % (
369 pat,
369 pat,
370 inst.message,
370 inst.message,
371 )
371 )
372 )
372 )
373 except IOError as inst:
373 except IOError as inst:
374 if warn:
374 if warn:
375 warn(
375 warn(
376 _(b"skipping unreadable pattern file '%s': %s\n")
376 _(b"skipping unreadable pattern file '%s': %s\n")
377 % (pat, stringutil.forcebytestr(inst.strerror))
377 % (pat, stringutil.forcebytestr(inst.strerror))
378 )
378 )
379 continue
379 continue
380 # else: re or relre - which cannot be normalized
380 # else: re or relre - which cannot be normalized
381 kindpats.append((kind, pat, b''))
381 kindpats.append((kind, pat, b''))
382 return kindpats
382 return kindpats
383
383
384
384
385 class basematcher:
385 class basematcher:
386 def __init__(self, badfn=None):
386 def __init__(self, badfn=None):
387 if badfn is not None:
387 if badfn is not None:
388 self.bad = badfn
388 self.bad = badfn
389
389
390 def __call__(self, fn):
390 def __call__(self, fn):
391 return self.matchfn(fn)
391 return self.matchfn(fn)
392
392
393 # Callbacks related to how the matcher is used by dirstate.walk.
393 # Callbacks related to how the matcher is used by dirstate.walk.
394 # Subscribers to these events must monkeypatch the matcher object.
394 # Subscribers to these events must monkeypatch the matcher object.
395 def bad(self, f, msg):
395 def bad(self, f, msg):
396 """Callback from dirstate.walk for each explicit file that can't be
396 """Callback from dirstate.walk for each explicit file that can't be
397 found/accessed, with an error message."""
397 found/accessed, with an error message."""
398
398
399 # If an traversedir is set, it will be called when a directory discovered
399 # If an traversedir is set, it will be called when a directory discovered
400 # by recursive traversal is visited.
400 # by recursive traversal is visited.
401 traversedir = None
401 traversedir = None
402
402
403 @propertycache
403 @propertycache
404 def _files(self):
404 def _files(self):
405 return []
405 return []
406
406
407 def files(self):
407 def files(self):
408 """Explicitly listed files or patterns or roots:
408 """Explicitly listed files or patterns or roots:
409 if no patterns or .always(): empty list,
409 if no patterns or .always(): empty list,
410 if exact: list exact files,
410 if exact: list exact files,
411 if not .anypats(): list all files and dirs,
411 if not .anypats(): list all files and dirs,
412 else: optimal roots"""
412 else: optimal roots"""
413 return self._files
413 return self._files
414
414
415 @propertycache
415 @propertycache
416 def _fileset(self):
416 def _fileset(self):
417 return set(self._files)
417 return set(self._files)
418
418
419 def exact(self, f):
419 def exact(self, f):
420 '''Returns True if f is in .files().'''
420 '''Returns True if f is in .files().'''
421 return f in self._fileset
421 return f in self._fileset
422
422
423 def matchfn(self, f):
423 def matchfn(self, f):
424 return False
424 return False
425
425
426 def visitdir(self, dir):
426 def visitdir(self, dir):
427 """Decides whether a directory should be visited based on whether it
427 """Decides whether a directory should be visited based on whether it
428 has potential matches in it or one of its subdirectories. This is
428 has potential matches in it or one of its subdirectories. This is
429 based on the match's primary, included, and excluded patterns.
429 based on the match's primary, included, and excluded patterns.
430
430
431 Returns the string 'all' if the given directory and all subdirectories
431 Returns the string 'all' if the given directory and all subdirectories
432 should be visited. Otherwise returns True or False indicating whether
432 should be visited. Otherwise returns True or False indicating whether
433 the given directory should be visited.
433 the given directory should be visited.
434 """
434 """
435 return True
435 return True
436
436
437 def visitchildrenset(self, dir):
437 def visitchildrenset(self, dir):
438 """Decides whether a directory should be visited based on whether it
438 """Decides whether a directory should be visited based on whether it
439 has potential matches in it or one of its subdirectories, and
439 has potential matches in it or one of its subdirectories, and
440 potentially lists which subdirectories of that directory should be
440 potentially lists which subdirectories of that directory should be
441 visited. This is based on the match's primary, included, and excluded
441 visited. This is based on the match's primary, included, and excluded
442 patterns.
442 patterns.
443
443
444 This function is very similar to 'visitdir', and the following mapping
444 This function is very similar to 'visitdir', and the following mapping
445 can be applied:
445 can be applied:
446
446
447 visitdir | visitchildrenlist
447 visitdir | visitchildrenlist
448 ----------+-------------------
448 ----------+-------------------
449 False | set()
449 False | set()
450 'all' | 'all'
450 'all' | 'all'
451 True | 'this' OR non-empty set of subdirs -or files- to visit
451 True | 'this' OR non-empty set of subdirs -or files- to visit
452
452
453 Example:
453 Example:
454 Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return
454 Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return
455 the following values (assuming the implementation of visitchildrenset
455 the following values (assuming the implementation of visitchildrenset
456 is capable of recognizing this; some implementations are not).
456 is capable of recognizing this; some implementations are not).
457
457
458 '' -> {'foo', 'qux'}
458 '' -> {'foo', 'qux'}
459 'baz' -> set()
459 'baz' -> set()
460 'foo' -> {'bar'}
460 'foo' -> {'bar'}
461 # Ideally this would be 'all', but since the prefix nature of matchers
461 # Ideally this would be 'all', but since the prefix nature of matchers
462 # is applied to the entire matcher, we have to downgrade this to
462 # is applied to the entire matcher, we have to downgrade this to
463 # 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed
463 # 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed
464 # in.
464 # in.
465 'foo/bar' -> 'this'
465 'foo/bar' -> 'this'
466 'qux' -> 'this'
466 'qux' -> 'this'
467
467
468 Important:
468 Important:
469 Most matchers do not know if they're representing files or
469 Most matchers do not know if they're representing files or
470 directories. They see ['path:dir/f'] and don't know whether 'f' is a
470 directories. They see ['path:dir/f'] and don't know whether 'f' is a
471 file or a directory, so visitchildrenset('dir') for most matchers will
471 file or a directory, so visitchildrenset('dir') for most matchers will
472 return {'f'}, but if the matcher knows it's a file (like exactmatcher
472 return {'f'}, but if the matcher knows it's a file (like exactmatcher
473 does), it may return 'this'. Do not rely on the return being a set
473 does), it may return 'this'. Do not rely on the return being a set
474 indicating that there are no files in this dir to investigate (or
474 indicating that there are no files in this dir to investigate (or
475 equivalently that if there are files to investigate in 'dir' that it
475 equivalently that if there are files to investigate in 'dir' that it
476 will always return 'this').
476 will always return 'this').
477 """
477 """
478 return b'this'
478 return b'this'
479
479
480 def always(self):
480 def always(self):
481 """Matcher will match everything and .files() will be empty --
481 """Matcher will match everything and .files() will be empty --
482 optimization might be possible."""
482 optimization might be possible."""
483 return False
483 return False
484
484
485 def isexact(self):
485 def isexact(self):
486 """Matcher will match exactly the list of files in .files() --
486 """Matcher will match exactly the list of files in .files() --
487 optimization might be possible."""
487 optimization might be possible."""
488 return False
488 return False
489
489
490 def prefix(self):
490 def prefix(self):
491 """Matcher will match the paths in .files() recursively --
491 """Matcher will match the paths in .files() recursively --
492 optimization might be possible."""
492 optimization might be possible."""
493 return False
493 return False
494
494
495 def anypats(self):
495 def anypats(self):
496 """None of .always(), .isexact(), and .prefix() is true --
496 """None of .always(), .isexact(), and .prefix() is true --
497 optimizations will be difficult."""
497 optimizations will be difficult."""
498 return not self.always() and not self.isexact() and not self.prefix()
498 return not self.always() and not self.isexact() and not self.prefix()
499
499
500
500
501 class alwaysmatcher(basematcher):
501 class alwaysmatcher(basematcher):
502 '''Matches everything.'''
502 '''Matches everything.'''
503
503
504 def __init__(self, badfn=None):
504 def __init__(self, badfn=None):
505 super(alwaysmatcher, self).__init__(badfn)
505 super(alwaysmatcher, self).__init__(badfn)
506
506
507 def always(self):
507 def always(self):
508 return True
508 return True
509
509
510 def matchfn(self, f):
510 def matchfn(self, f):
511 return True
511 return True
512
512
513 def visitdir(self, dir):
513 def visitdir(self, dir):
514 return b'all'
514 return b'all'
515
515
516 def visitchildrenset(self, dir):
516 def visitchildrenset(self, dir):
517 return b'all'
517 return b'all'
518
518
519 def __repr__(self):
519 def __repr__(self):
520 return r'<alwaysmatcher>'
520 return r'<alwaysmatcher>'
521
521
522
522
523 class nevermatcher(basematcher):
523 class nevermatcher(basematcher):
524 '''Matches nothing.'''
524 '''Matches nothing.'''
525
525
526 def __init__(self, badfn=None):
526 def __init__(self, badfn=None):
527 super(nevermatcher, self).__init__(badfn)
527 super(nevermatcher, self).__init__(badfn)
528
528
529 # It's a little weird to say that the nevermatcher is an exact matcher
529 # It's a little weird to say that the nevermatcher is an exact matcher
530 # or a prefix matcher, but it seems to make sense to let callers take
530 # or a prefix matcher, but it seems to make sense to let callers take
531 # fast paths based on either. There will be no exact matches, nor any
531 # fast paths based on either. There will be no exact matches, nor any
532 # prefixes (files() returns []), so fast paths iterating over them should
532 # prefixes (files() returns []), so fast paths iterating over them should
533 # be efficient (and correct).
533 # be efficient (and correct).
534 def isexact(self):
534 def isexact(self):
535 return True
535 return True
536
536
537 def prefix(self):
537 def prefix(self):
538 return True
538 return True
539
539
540 def visitdir(self, dir):
540 def visitdir(self, dir):
541 return False
541 return False
542
542
543 def visitchildrenset(self, dir):
543 def visitchildrenset(self, dir):
544 return set()
544 return set()
545
545
546 def __repr__(self):
546 def __repr__(self):
547 return r'<nevermatcher>'
547 return r'<nevermatcher>'
548
548
549
549
550 class predicatematcher(basematcher):
550 class predicatematcher(basematcher):
551 """A matcher adapter for a simple boolean function"""
551 """A matcher adapter for a simple boolean function"""
552
552
553 def __init__(self, predfn, predrepr=None, badfn=None):
553 def __init__(self, predfn, predrepr=None, badfn=None):
554 super(predicatematcher, self).__init__(badfn)
554 super(predicatematcher, self).__init__(badfn)
555 self.matchfn = predfn
555 self.matchfn = predfn
556 self._predrepr = predrepr
556 self._predrepr = predrepr
557
557
558 @encoding.strmethod
558 @encoding.strmethod
559 def __repr__(self):
559 def __repr__(self):
560 s = stringutil.buildrepr(self._predrepr) or pycompat.byterepr(
560 s = stringutil.buildrepr(self._predrepr) or pycompat.byterepr(
561 self.matchfn
561 self.matchfn
562 )
562 )
563 return b'<predicatenmatcher pred=%s>' % s
563 return b'<predicatenmatcher pred=%s>' % s
564
564
565
565
566 def path_or_parents_in_set(path, prefix_set):
566 def path_or_parents_in_set(path, prefix_set):
567 """Returns True if `path` (or any parent of `path`) is in `prefix_set`."""
567 """Returns True if `path` (or any parent of `path`) is in `prefix_set`."""
568 l = len(prefix_set)
568 l = len(prefix_set)
569 if l == 0:
569 if l == 0:
570 return False
570 return False
571 if path in prefix_set:
571 if path in prefix_set:
572 return True
572 return True
573 # If there's more than 5 paths in prefix_set, it's *probably* quicker to
573 # If there's more than 5 paths in prefix_set, it's *probably* quicker to
574 # "walk up" the directory hierarchy instead, with the assumption that most
574 # "walk up" the directory hierarchy instead, with the assumption that most
575 # directory hierarchies are relatively shallow and hash lookup is cheap.
575 # directory hierarchies are relatively shallow and hash lookup is cheap.
576 if l > 5:
576 if l > 5:
577 return any(
577 return any(
578 parentdir in prefix_set for parentdir in pathutil.finddirs(path)
578 parentdir in prefix_set for parentdir in pathutil.finddirs(path)
579 )
579 )
580
580
581 # FIXME: Ideally we'd never get to this point if this is the case - we'd
581 # FIXME: Ideally we'd never get to this point if this is the case - we'd
582 # recognize ourselves as an 'always' matcher and skip this.
582 # recognize ourselves as an 'always' matcher and skip this.
583 if b'' in prefix_set:
583 if b'' in prefix_set:
584 return True
584 return True
585
585
586 sl = ord(b'/')
586 sl = ord(b'/')
587
587
588 # We already checked that path isn't in prefix_set exactly, so
588 # We already checked that path isn't in prefix_set exactly, so
589 # `path[len(pf)] should never raise IndexError.
589 # `path[len(pf)] should never raise IndexError.
590 return any(path.startswith(pf) and path[len(pf)] == sl for pf in prefix_set)
590 return any(path.startswith(pf) and path[len(pf)] == sl for pf in prefix_set)
591
591
592
592
593 class patternmatcher(basematcher):
593 class patternmatcher(basematcher):
594 r"""Matches a set of (kind, pat, source) against a 'root' directory.
594 r"""Matches a set of (kind, pat, source) against a 'root' directory.
595
595
596 >>> kindpats = [
596 >>> kindpats = [
597 ... (b're', br'.*\.c$', b''),
597 ... (b're', br'.*\.c$', b''),
598 ... (b'path', b'foo/a', b''),
598 ... (b'path', b'foo/a', b''),
599 ... (b'relpath', b'b', b''),
599 ... (b'relpath', b'b', b''),
600 ... (b'glob', b'*.h', b''),
600 ... (b'glob', b'*.h', b''),
601 ... ]
601 ... ]
602 >>> m = patternmatcher(b'foo', kindpats)
602 >>> m = patternmatcher(b'foo', kindpats)
603 >>> m(b'main.c') # matches re:.*\.c$
603 >>> m(b'main.c') # matches re:.*\.c$
604 True
604 True
605 >>> m(b'b.txt')
605 >>> m(b'b.txt')
606 False
606 False
607 >>> m(b'foo/a') # matches path:foo/a
607 >>> m(b'foo/a') # matches path:foo/a
608 True
608 True
609 >>> m(b'a') # does not match path:b, since 'root' is 'foo'
609 >>> m(b'a') # does not match path:b, since 'root' is 'foo'
610 False
610 False
611 >>> m(b'b') # matches relpath:b, since 'root' is 'foo'
611 >>> m(b'b') # matches relpath:b, since 'root' is 'foo'
612 True
612 True
613 >>> m(b'lib.h') # matches glob:*.h
613 >>> m(b'lib.h') # matches glob:*.h
614 True
614 True
615
615
616 >>> m.files()
616 >>> m.files()
617 [b'', b'foo/a', b'', b'b']
617 [b'', b'foo/a', b'', b'b']
618 >>> m.exact(b'foo/a')
618 >>> m.exact(b'foo/a')
619 True
619 True
620 >>> m.exact(b'b')
620 >>> m.exact(b'b')
621 True
621 True
622 >>> m.exact(b'lib.h') # exact matches are for (rel)path kinds
622 >>> m.exact(b'lib.h') # exact matches are for (rel)path kinds
623 False
623 False
624 """
624 """
625
625
626 def __init__(self, root, kindpats, badfn=None):
626 def __init__(self, root, kindpats, badfn=None):
627 super(patternmatcher, self).__init__(badfn)
627 super(patternmatcher, self).__init__(badfn)
628 kindpats.sort()
628 kindpats.sort()
629
629
630 self._files = _explicitfiles(kindpats)
630 self._files = _explicitfiles(kindpats)
631 self._prefix = _prefix(kindpats)
631 self._prefix = _prefix(kindpats)
632 self._pats, self.matchfn = _buildmatch(kindpats, b'$', root)
632 self._pats, self._matchfn = _buildmatch(kindpats, b'$', root)
633
634 def matchfn(self, fn):
635 if fn in self._fileset:
636 return True
637 return self._matchfn(fn)
633
638
634 @propertycache
639 @propertycache
635 def _dirs(self):
640 def _dirs(self):
636 return set(pathutil.dirs(self._fileset))
641 return set(pathutil.dirs(self._fileset))
637
642
638 def visitdir(self, dir):
643 def visitdir(self, dir):
639 if self._prefix and dir in self._fileset:
644 if self._prefix and dir in self._fileset:
640 return b'all'
645 return b'all'
641 return dir in self._dirs or path_or_parents_in_set(dir, self._fileset)
646 return dir in self._dirs or path_or_parents_in_set(dir, self._fileset)
642
647
643 def visitchildrenset(self, dir):
648 def visitchildrenset(self, dir):
644 ret = self.visitdir(dir)
649 ret = self.visitdir(dir)
645 if ret is True:
650 if ret is True:
646 return b'this'
651 return b'this'
647 elif not ret:
652 elif not ret:
648 return set()
653 return set()
649 assert ret == b'all'
654 assert ret == b'all'
650 return b'all'
655 return b'all'
651
656
652 def prefix(self):
657 def prefix(self):
653 return self._prefix
658 return self._prefix
654
659
655 @encoding.strmethod
660 @encoding.strmethod
656 def __repr__(self):
661 def __repr__(self):
657 return b'<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats)
662 return b'<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats)
658
663
659
664
660 # This is basically a reimplementation of pathutil.dirs that stores the
665 # This is basically a reimplementation of pathutil.dirs that stores the
661 # children instead of just a count of them, plus a small optional optimization
666 # children instead of just a count of them, plus a small optional optimization
662 # to avoid some directories we don't need.
667 # to avoid some directories we don't need.
663 class _dirchildren:
668 class _dirchildren:
664 def __init__(self, paths, onlyinclude=None):
669 def __init__(self, paths, onlyinclude=None):
665 self._dirs = {}
670 self._dirs = {}
666 self._onlyinclude = onlyinclude or []
671 self._onlyinclude = onlyinclude or []
667 addpath = self.addpath
672 addpath = self.addpath
668 for f in paths:
673 for f in paths:
669 addpath(f)
674 addpath(f)
670
675
671 def addpath(self, path):
676 def addpath(self, path):
672 if path == b'':
677 if path == b'':
673 return
678 return
674 dirs = self._dirs
679 dirs = self._dirs
675 findsplitdirs = _dirchildren._findsplitdirs
680 findsplitdirs = _dirchildren._findsplitdirs
676 for d, b in findsplitdirs(path):
681 for d, b in findsplitdirs(path):
677 if d not in self._onlyinclude:
682 if d not in self._onlyinclude:
678 continue
683 continue
679 dirs.setdefault(d, set()).add(b)
684 dirs.setdefault(d, set()).add(b)
680
685
681 @staticmethod
686 @staticmethod
682 def _findsplitdirs(path):
687 def _findsplitdirs(path):
683 # yields (dirname, basename) tuples, walking back to the root. This is
688 # yields (dirname, basename) tuples, walking back to the root. This is
684 # very similar to pathutil.finddirs, except:
689 # very similar to pathutil.finddirs, except:
685 # - produces a (dirname, basename) tuple, not just 'dirname'
690 # - produces a (dirname, basename) tuple, not just 'dirname'
686 # Unlike manifest._splittopdir, this does not suffix `dirname` with a
691 # Unlike manifest._splittopdir, this does not suffix `dirname` with a
687 # slash.
692 # slash.
688 oldpos = len(path)
693 oldpos = len(path)
689 pos = path.rfind(b'/')
694 pos = path.rfind(b'/')
690 while pos != -1:
695 while pos != -1:
691 yield path[:pos], path[pos + 1 : oldpos]
696 yield path[:pos], path[pos + 1 : oldpos]
692 oldpos = pos
697 oldpos = pos
693 pos = path.rfind(b'/', 0, pos)
698 pos = path.rfind(b'/', 0, pos)
694 yield b'', path[:oldpos]
699 yield b'', path[:oldpos]
695
700
696 def get(self, path):
701 def get(self, path):
697 return self._dirs.get(path, set())
702 return self._dirs.get(path, set())
698
703
699
704
700 class includematcher(basematcher):
705 class includematcher(basematcher):
701 def __init__(self, root, kindpats, badfn=None):
706 def __init__(self, root, kindpats, badfn=None):
702 super(includematcher, self).__init__(badfn)
707 super(includematcher, self).__init__(badfn)
703 if rustmod is not None:
708 if rustmod is not None:
704 # We need to pass the patterns to Rust because they can contain
709 # We need to pass the patterns to Rust because they can contain
705 # patterns from the user interface
710 # patterns from the user interface
706 self._kindpats = kindpats
711 self._kindpats = kindpats
707 self._pats, self.matchfn = _buildmatch(kindpats, b'(?:/|$)', root)
712 self._pats, self.matchfn = _buildmatch(kindpats, b'(?:/|$)', root)
708 self._prefix = _prefix(kindpats)
713 self._prefix = _prefix(kindpats)
709 roots, dirs, parents = _rootsdirsandparents(kindpats)
714 roots, dirs, parents = _rootsdirsandparents(kindpats)
710 # roots are directories which are recursively included.
715 # roots are directories which are recursively included.
711 self._roots = set(roots)
716 self._roots = set(roots)
712 # dirs are directories which are non-recursively included.
717 # dirs are directories which are non-recursively included.
713 self._dirs = set(dirs)
718 self._dirs = set(dirs)
714 # parents are directories which are non-recursively included because
719 # parents are directories which are non-recursively included because
715 # they are needed to get to items in _dirs or _roots.
720 # they are needed to get to items in _dirs or _roots.
716 self._parents = parents
721 self._parents = parents
717
722
718 def visitdir(self, dir):
723 def visitdir(self, dir):
719 if self._prefix and dir in self._roots:
724 if self._prefix and dir in self._roots:
720 return b'all'
725 return b'all'
721 return (
726 return (
722 dir in self._dirs
727 dir in self._dirs
723 or dir in self._parents
728 or dir in self._parents
724 or path_or_parents_in_set(dir, self._roots)
729 or path_or_parents_in_set(dir, self._roots)
725 )
730 )
726
731
727 @propertycache
732 @propertycache
728 def _allparentschildren(self):
733 def _allparentschildren(self):
729 # It may seem odd that we add dirs, roots, and parents, and then
734 # It may seem odd that we add dirs, roots, and parents, and then
730 # restrict to only parents. This is to catch the case of:
735 # restrict to only parents. This is to catch the case of:
731 # dirs = ['foo/bar']
736 # dirs = ['foo/bar']
732 # parents = ['foo']
737 # parents = ['foo']
733 # if we asked for the children of 'foo', but had only added
738 # if we asked for the children of 'foo', but had only added
734 # self._parents, we wouldn't be able to respond ['bar'].
739 # self._parents, we wouldn't be able to respond ['bar'].
735 return _dirchildren(
740 return _dirchildren(
736 itertools.chain(self._dirs, self._roots, self._parents),
741 itertools.chain(self._dirs, self._roots, self._parents),
737 onlyinclude=self._parents,
742 onlyinclude=self._parents,
738 )
743 )
739
744
740 def visitchildrenset(self, dir):
745 def visitchildrenset(self, dir):
741 if self._prefix and dir in self._roots:
746 if self._prefix and dir in self._roots:
742 return b'all'
747 return b'all'
743 # Note: this does *not* include the 'dir in self._parents' case from
748 # Note: this does *not* include the 'dir in self._parents' case from
744 # visitdir, that's handled below.
749 # visitdir, that's handled below.
745 if (
750 if (
746 b'' in self._roots
751 b'' in self._roots
747 or dir in self._dirs
752 or dir in self._dirs
748 or path_or_parents_in_set(dir, self._roots)
753 or path_or_parents_in_set(dir, self._roots)
749 ):
754 ):
750 return b'this'
755 return b'this'
751
756
752 if dir in self._parents:
757 if dir in self._parents:
753 return self._allparentschildren.get(dir) or set()
758 return self._allparentschildren.get(dir) or set()
754 return set()
759 return set()
755
760
756 @encoding.strmethod
761 @encoding.strmethod
757 def __repr__(self):
762 def __repr__(self):
758 return b'<includematcher includes=%r>' % pycompat.bytestr(self._pats)
763 return b'<includematcher includes=%r>' % pycompat.bytestr(self._pats)
759
764
760
765
761 class exactmatcher(basematcher):
766 class exactmatcher(basematcher):
762 r"""Matches the input files exactly. They are interpreted as paths, not
767 r"""Matches the input files exactly. They are interpreted as paths, not
763 patterns (so no kind-prefixes).
768 patterns (so no kind-prefixes).
764
769
765 >>> m = exactmatcher([b'a.txt', br're:.*\.c$'])
770 >>> m = exactmatcher([b'a.txt', br're:.*\.c$'])
766 >>> m(b'a.txt')
771 >>> m(b'a.txt')
767 True
772 True
768 >>> m(b'b.txt')
773 >>> m(b'b.txt')
769 False
774 False
770
775
771 Input files that would be matched are exactly those returned by .files()
776 Input files that would be matched are exactly those returned by .files()
772 >>> m.files()
777 >>> m.files()
773 ['a.txt', 're:.*\\.c$']
778 ['a.txt', 're:.*\\.c$']
774
779
775 So pattern 're:.*\.c$' is not considered as a regex, but as a file name
780 So pattern 're:.*\.c$' is not considered as a regex, but as a file name
776 >>> m(b'main.c')
781 >>> m(b'main.c')
777 False
782 False
778 >>> m(br're:.*\.c$')
783 >>> m(br're:.*\.c$')
779 True
784 True
780 """
785 """
781
786
782 def __init__(self, files, badfn=None):
787 def __init__(self, files, badfn=None):
783 super(exactmatcher, self).__init__(badfn)
788 super(exactmatcher, self).__init__(badfn)
784
789
785 if isinstance(files, list):
790 if isinstance(files, list):
786 self._files = files
791 self._files = files
787 else:
792 else:
788 self._files = list(files)
793 self._files = list(files)
789
794
790 matchfn = basematcher.exact
795 matchfn = basematcher.exact
791
796
792 @propertycache
797 @propertycache
793 def _dirs(self):
798 def _dirs(self):
794 return set(pathutil.dirs(self._fileset))
799 return set(pathutil.dirs(self._fileset))
795
800
796 def visitdir(self, dir):
801 def visitdir(self, dir):
797 return dir in self._dirs
802 return dir in self._dirs
798
803
799 @propertycache
804 @propertycache
800 def _visitchildrenset_candidates(self):
805 def _visitchildrenset_candidates(self):
801 """A memoized set of candidates for visitchildrenset."""
806 """A memoized set of candidates for visitchildrenset."""
802 return self._fileset | self._dirs - {b''}
807 return self._fileset | self._dirs - {b''}
803
808
804 @propertycache
809 @propertycache
805 def _sorted_visitchildrenset_candidates(self):
810 def _sorted_visitchildrenset_candidates(self):
806 """A memoized sorted list of candidates for visitchildrenset."""
811 """A memoized sorted list of candidates for visitchildrenset."""
807 return sorted(self._visitchildrenset_candidates)
812 return sorted(self._visitchildrenset_candidates)
808
813
809 def visitchildrenset(self, dir):
814 def visitchildrenset(self, dir):
810 if not self._fileset or dir not in self._dirs:
815 if not self._fileset or dir not in self._dirs:
811 return set()
816 return set()
812
817
813 if dir == b'':
818 if dir == b'':
814 candidates = self._visitchildrenset_candidates
819 candidates = self._visitchildrenset_candidates
815 else:
820 else:
816 candidates = self._sorted_visitchildrenset_candidates
821 candidates = self._sorted_visitchildrenset_candidates
817 d = dir + b'/'
822 d = dir + b'/'
818 # Use bisect to find the first element potentially starting with d
823 # Use bisect to find the first element potentially starting with d
819 # (i.e. >= d). This should always find at least one element (we'll
824 # (i.e. >= d). This should always find at least one element (we'll
820 # assert later if this is not the case).
825 # assert later if this is not the case).
821 first = bisect.bisect_left(candidates, d)
826 first = bisect.bisect_left(candidates, d)
822 # We need a representation of the first element that is > d that
827 # We need a representation of the first element that is > d that
823 # does not start with d, so since we added a `/` on the end of dir,
828 # does not start with d, so since we added a `/` on the end of dir,
824 # we'll add whatever comes after slash (we could probably assume
829 # we'll add whatever comes after slash (we could probably assume
825 # that `0` is after `/`, but let's not) to the end of dir instead.
830 # that `0` is after `/`, but let's not) to the end of dir instead.
826 dnext = dir + encoding.strtolocal(chr(ord(b'/') + 1))
831 dnext = dir + encoding.strtolocal(chr(ord(b'/') + 1))
827 # Use bisect to find the first element >= d_next
832 # Use bisect to find the first element >= d_next
828 last = bisect.bisect_left(candidates, dnext, lo=first)
833 last = bisect.bisect_left(candidates, dnext, lo=first)
829 dlen = len(d)
834 dlen = len(d)
830 candidates = {c[dlen:] for c in candidates[first:last]}
835 candidates = {c[dlen:] for c in candidates[first:last]}
831 # self._dirs includes all of the directories, recursively, so if
836 # self._dirs includes all of the directories, recursively, so if
832 # we're attempting to match foo/bar/baz.txt, it'll have '', 'foo',
837 # we're attempting to match foo/bar/baz.txt, it'll have '', 'foo',
833 # 'foo/bar' in it. Thus we can safely ignore a candidate that has a
838 # 'foo/bar' in it. Thus we can safely ignore a candidate that has a
834 # '/' in it, indicating a it's for a subdir-of-a-subdir; the
839 # '/' in it, indicating a it's for a subdir-of-a-subdir; the
835 # immediate subdir will be in there without a slash.
840 # immediate subdir will be in there without a slash.
836 ret = {c for c in candidates if b'/' not in c}
841 ret = {c for c in candidates if b'/' not in c}
837 # We really do not expect ret to be empty, since that would imply that
842 # We really do not expect ret to be empty, since that would imply that
838 # there's something in _dirs that didn't have a file in _fileset.
843 # there's something in _dirs that didn't have a file in _fileset.
839 assert ret
844 assert ret
840 return ret
845 return ret
841
846
842 def isexact(self):
847 def isexact(self):
843 return True
848 return True
844
849
845 @encoding.strmethod
850 @encoding.strmethod
846 def __repr__(self):
851 def __repr__(self):
847 return b'<exactmatcher files=%r>' % self._files
852 return b'<exactmatcher files=%r>' % self._files
848
853
849
854
850 class differencematcher(basematcher):
855 class differencematcher(basematcher):
851 """Composes two matchers by matching if the first matches and the second
856 """Composes two matchers by matching if the first matches and the second
852 does not.
857 does not.
853
858
854 The second matcher's non-matching-attributes (bad, traversedir) are ignored.
859 The second matcher's non-matching-attributes (bad, traversedir) are ignored.
855 """
860 """
856
861
857 def __init__(self, m1, m2):
862 def __init__(self, m1, m2):
858 super(differencematcher, self).__init__()
863 super(differencematcher, self).__init__()
859 self._m1 = m1
864 self._m1 = m1
860 self._m2 = m2
865 self._m2 = m2
861 self.bad = m1.bad
866 self.bad = m1.bad
862 self.traversedir = m1.traversedir
867 self.traversedir = m1.traversedir
863
868
864 def matchfn(self, f):
869 def matchfn(self, f):
865 return self._m1(f) and not self._m2(f)
870 return self._m1(f) and not self._m2(f)
866
871
867 @propertycache
872 @propertycache
868 def _files(self):
873 def _files(self):
869 if self.isexact():
874 if self.isexact():
870 return [f for f in self._m1.files() if self(f)]
875 return [f for f in self._m1.files() if self(f)]
871 # If m1 is not an exact matcher, we can't easily figure out the set of
876 # If m1 is not an exact matcher, we can't easily figure out the set of
872 # files, because its files() are not always files. For example, if
877 # files, because its files() are not always files. For example, if
873 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
878 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
874 # want to remove "dir" from the set even though it would match m2,
879 # want to remove "dir" from the set even though it would match m2,
875 # because the "dir" in m1 may not be a file.
880 # because the "dir" in m1 may not be a file.
876 return self._m1.files()
881 return self._m1.files()
877
882
878 def visitdir(self, dir):
883 def visitdir(self, dir):
879 if self._m2.visitdir(dir) == b'all':
884 if self._m2.visitdir(dir) == b'all':
880 return False
885 return False
881 elif not self._m2.visitdir(dir):
886 elif not self._m2.visitdir(dir):
882 # m2 does not match dir, we can return 'all' here if possible
887 # m2 does not match dir, we can return 'all' here if possible
883 return self._m1.visitdir(dir)
888 return self._m1.visitdir(dir)
884 return bool(self._m1.visitdir(dir))
889 return bool(self._m1.visitdir(dir))
885
890
886 def visitchildrenset(self, dir):
891 def visitchildrenset(self, dir):
887 m2_set = self._m2.visitchildrenset(dir)
892 m2_set = self._m2.visitchildrenset(dir)
888 if m2_set == b'all':
893 if m2_set == b'all':
889 return set()
894 return set()
890 m1_set = self._m1.visitchildrenset(dir)
895 m1_set = self._m1.visitchildrenset(dir)
891 # Possible values for m1: 'all', 'this', set(...), set()
896 # Possible values for m1: 'all', 'this', set(...), set()
892 # Possible values for m2: 'this', set(...), set()
897 # Possible values for m2: 'this', set(...), set()
893 # If m2 has nothing under here that we care about, return m1, even if
898 # If m2 has nothing under here that we care about, return m1, even if
894 # it's 'all'. This is a change in behavior from visitdir, which would
899 # it's 'all'. This is a change in behavior from visitdir, which would
895 # return True, not 'all', for some reason.
900 # return True, not 'all', for some reason.
896 if not m2_set:
901 if not m2_set:
897 return m1_set
902 return m1_set
898 if m1_set in [b'all', b'this']:
903 if m1_set in [b'all', b'this']:
899 # Never return 'all' here if m2_set is any kind of non-empty (either
904 # Never return 'all' here if m2_set is any kind of non-empty (either
900 # 'this' or set(foo)), since m2 might return set() for a
905 # 'this' or set(foo)), since m2 might return set() for a
901 # subdirectory.
906 # subdirectory.
902 return b'this'
907 return b'this'
903 # Possible values for m1: set(...), set()
908 # Possible values for m1: set(...), set()
904 # Possible values for m2: 'this', set(...)
909 # Possible values for m2: 'this', set(...)
905 # We ignore m2's set results. They're possibly incorrect:
910 # We ignore m2's set results. They're possibly incorrect:
906 # m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset(''):
911 # m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset(''):
907 # m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd
912 # m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd
908 # return set(), which is *not* correct, we still need to visit 'dir'!
913 # return set(), which is *not* correct, we still need to visit 'dir'!
909 return m1_set
914 return m1_set
910
915
911 def isexact(self):
916 def isexact(self):
912 return self._m1.isexact()
917 return self._m1.isexact()
913
918
914 @encoding.strmethod
919 @encoding.strmethod
915 def __repr__(self):
920 def __repr__(self):
916 return b'<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2)
921 return b'<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2)
917
922
918
923
919 def intersectmatchers(m1, m2):
924 def intersectmatchers(m1, m2):
920 """Composes two matchers by matching if both of them match.
925 """Composes two matchers by matching if both of them match.
921
926
922 The second matcher's non-matching-attributes (bad, traversedir) are ignored.
927 The second matcher's non-matching-attributes (bad, traversedir) are ignored.
923 """
928 """
924 if m1 is None or m2 is None:
929 if m1 is None or m2 is None:
925 return m1 or m2
930 return m1 or m2
926 if m1.always():
931 if m1.always():
927 m = copy.copy(m2)
932 m = copy.copy(m2)
928 # TODO: Consider encapsulating these things in a class so there's only
933 # TODO: Consider encapsulating these things in a class so there's only
929 # one thing to copy from m1.
934 # one thing to copy from m1.
930 m.bad = m1.bad
935 m.bad = m1.bad
931 m.traversedir = m1.traversedir
936 m.traversedir = m1.traversedir
932 return m
937 return m
933 if m2.always():
938 if m2.always():
934 m = copy.copy(m1)
939 m = copy.copy(m1)
935 return m
940 return m
936 return intersectionmatcher(m1, m2)
941 return intersectionmatcher(m1, m2)
937
942
938
943
939 class intersectionmatcher(basematcher):
944 class intersectionmatcher(basematcher):
940 def __init__(self, m1, m2):
945 def __init__(self, m1, m2):
941 super(intersectionmatcher, self).__init__()
946 super(intersectionmatcher, self).__init__()
942 self._m1 = m1
947 self._m1 = m1
943 self._m2 = m2
948 self._m2 = m2
944 self.bad = m1.bad
949 self.bad = m1.bad
945 self.traversedir = m1.traversedir
950 self.traversedir = m1.traversedir
946
951
947 @propertycache
952 @propertycache
948 def _files(self):
953 def _files(self):
949 if self.isexact():
954 if self.isexact():
950 m1, m2 = self._m1, self._m2
955 m1, m2 = self._m1, self._m2
951 if not m1.isexact():
956 if not m1.isexact():
952 m1, m2 = m2, m1
957 m1, m2 = m2, m1
953 return [f for f in m1.files() if m2(f)]
958 return [f for f in m1.files() if m2(f)]
954 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
959 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
955 # the set of files, because their files() are not always files. For
960 # the set of files, because their files() are not always files. For
956 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
961 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
957 # "path:dir2", we don't want to remove "dir2" from the set.
962 # "path:dir2", we don't want to remove "dir2" from the set.
958 return self._m1.files() + self._m2.files()
963 return self._m1.files() + self._m2.files()
959
964
960 def matchfn(self, f):
965 def matchfn(self, f):
961 return self._m1(f) and self._m2(f)
966 return self._m1(f) and self._m2(f)
962
967
963 def visitdir(self, dir):
968 def visitdir(self, dir):
964 visit1 = self._m1.visitdir(dir)
969 visit1 = self._m1.visitdir(dir)
965 if visit1 == b'all':
970 if visit1 == b'all':
966 return self._m2.visitdir(dir)
971 return self._m2.visitdir(dir)
967 # bool() because visit1=True + visit2='all' should not be 'all'
972 # bool() because visit1=True + visit2='all' should not be 'all'
968 return bool(visit1 and self._m2.visitdir(dir))
973 return bool(visit1 and self._m2.visitdir(dir))
969
974
970 def visitchildrenset(self, dir):
975 def visitchildrenset(self, dir):
971 m1_set = self._m1.visitchildrenset(dir)
976 m1_set = self._m1.visitchildrenset(dir)
972 if not m1_set:
977 if not m1_set:
973 return set()
978 return set()
974 m2_set = self._m2.visitchildrenset(dir)
979 m2_set = self._m2.visitchildrenset(dir)
975 if not m2_set:
980 if not m2_set:
976 return set()
981 return set()
977
982
978 if m1_set == b'all':
983 if m1_set == b'all':
979 return m2_set
984 return m2_set
980 elif m2_set == b'all':
985 elif m2_set == b'all':
981 return m1_set
986 return m1_set
982
987
983 if m1_set == b'this' or m2_set == b'this':
988 if m1_set == b'this' or m2_set == b'this':
984 return b'this'
989 return b'this'
985
990
986 assert isinstance(m1_set, set) and isinstance(m2_set, set)
991 assert isinstance(m1_set, set) and isinstance(m2_set, set)
987 return m1_set.intersection(m2_set)
992 return m1_set.intersection(m2_set)
988
993
989 def always(self):
994 def always(self):
990 return self._m1.always() and self._m2.always()
995 return self._m1.always() and self._m2.always()
991
996
992 def isexact(self):
997 def isexact(self):
993 return self._m1.isexact() or self._m2.isexact()
998 return self._m1.isexact() or self._m2.isexact()
994
999
995 @encoding.strmethod
1000 @encoding.strmethod
996 def __repr__(self):
1001 def __repr__(self):
997 return b'<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2)
1002 return b'<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2)
998
1003
999
1004
1000 class subdirmatcher(basematcher):
1005 class subdirmatcher(basematcher):
1001 """Adapt a matcher to work on a subdirectory only.
1006 """Adapt a matcher to work on a subdirectory only.
1002
1007
1003 The paths are remapped to remove/insert the path as needed:
1008 The paths are remapped to remove/insert the path as needed:
1004
1009
1005 >>> from . import pycompat
1010 >>> from . import pycompat
1006 >>> m1 = match(util.localpath(b'/root'), b'', [b'a.txt', b'sub/b.txt'], auditor=lambda name: None)
1011 >>> m1 = match(util.localpath(b'/root'), b'', [b'a.txt', b'sub/b.txt'], auditor=lambda name: None)
1007 >>> m2 = subdirmatcher(b'sub', m1)
1012 >>> m2 = subdirmatcher(b'sub', m1)
1008 >>> m2(b'a.txt')
1013 >>> m2(b'a.txt')
1009 False
1014 False
1010 >>> m2(b'b.txt')
1015 >>> m2(b'b.txt')
1011 True
1016 True
1012 >>> m2.matchfn(b'a.txt')
1017 >>> m2.matchfn(b'a.txt')
1013 False
1018 False
1014 >>> m2.matchfn(b'b.txt')
1019 >>> m2.matchfn(b'b.txt')
1015 True
1020 True
1016 >>> m2.files()
1021 >>> m2.files()
1017 ['b.txt']
1022 ['b.txt']
1018 >>> m2.exact(b'b.txt')
1023 >>> m2.exact(b'b.txt')
1019 True
1024 True
1020 >>> def bad(f, msg):
1025 >>> def bad(f, msg):
1021 ... print(pycompat.sysstr(b"%s: %s" % (f, msg)))
1026 ... print(pycompat.sysstr(b"%s: %s" % (f, msg)))
1022 >>> m1.bad = bad
1027 >>> m1.bad = bad
1023 >>> m2.bad(b'x.txt', b'No such file')
1028 >>> m2.bad(b'x.txt', b'No such file')
1024 sub/x.txt: No such file
1029 sub/x.txt: No such file
1025 """
1030 """
1026
1031
1027 def __init__(self, path, matcher):
1032 def __init__(self, path, matcher):
1028 super(subdirmatcher, self).__init__()
1033 super(subdirmatcher, self).__init__()
1029 self._path = path
1034 self._path = path
1030 self._matcher = matcher
1035 self._matcher = matcher
1031 self._always = matcher.always()
1036 self._always = matcher.always()
1032
1037
1033 self._files = [
1038 self._files = [
1034 f[len(path) + 1 :]
1039 f[len(path) + 1 :]
1035 for f in matcher._files
1040 for f in matcher._files
1036 if f.startswith(path + b"/")
1041 if f.startswith(path + b"/")
1037 ]
1042 ]
1038
1043
1039 # If the parent repo had a path to this subrepo and the matcher is
1044 # If the parent repo had a path to this subrepo and the matcher is
1040 # a prefix matcher, this submatcher always matches.
1045 # a prefix matcher, this submatcher always matches.
1041 if matcher.prefix():
1046 if matcher.prefix():
1042 self._always = any(f == path for f in matcher._files)
1047 self._always = any(f == path for f in matcher._files)
1043
1048
1044 def bad(self, f, msg):
1049 def bad(self, f, msg):
1045 self._matcher.bad(self._path + b"/" + f, msg)
1050 self._matcher.bad(self._path + b"/" + f, msg)
1046
1051
1047 def matchfn(self, f):
1052 def matchfn(self, f):
1048 # Some information is lost in the superclass's constructor, so we
1053 # Some information is lost in the superclass's constructor, so we
1049 # can not accurately create the matching function for the subdirectory
1054 # can not accurately create the matching function for the subdirectory
1050 # from the inputs. Instead, we override matchfn() and visitdir() to
1055 # from the inputs. Instead, we override matchfn() and visitdir() to
1051 # call the original matcher with the subdirectory path prepended.
1056 # call the original matcher with the subdirectory path prepended.
1052 return self._matcher.matchfn(self._path + b"/" + f)
1057 return self._matcher.matchfn(self._path + b"/" + f)
1053
1058
1054 def visitdir(self, dir):
1059 def visitdir(self, dir):
1055 if dir == b'':
1060 if dir == b'':
1056 dir = self._path
1061 dir = self._path
1057 else:
1062 else:
1058 dir = self._path + b"/" + dir
1063 dir = self._path + b"/" + dir
1059 return self._matcher.visitdir(dir)
1064 return self._matcher.visitdir(dir)
1060
1065
1061 def visitchildrenset(self, dir):
1066 def visitchildrenset(self, dir):
1062 if dir == b'':
1067 if dir == b'':
1063 dir = self._path
1068 dir = self._path
1064 else:
1069 else:
1065 dir = self._path + b"/" + dir
1070 dir = self._path + b"/" + dir
1066 return self._matcher.visitchildrenset(dir)
1071 return self._matcher.visitchildrenset(dir)
1067
1072
1068 def always(self):
1073 def always(self):
1069 return self._always
1074 return self._always
1070
1075
1071 def prefix(self):
1076 def prefix(self):
1072 return self._matcher.prefix() and not self._always
1077 return self._matcher.prefix() and not self._always
1073
1078
1074 @encoding.strmethod
1079 @encoding.strmethod
1075 def __repr__(self):
1080 def __repr__(self):
1076 return b'<subdirmatcher path=%r, matcher=%r>' % (
1081 return b'<subdirmatcher path=%r, matcher=%r>' % (
1077 self._path,
1082 self._path,
1078 self._matcher,
1083 self._matcher,
1079 )
1084 )
1080
1085
1081
1086
1082 class prefixdirmatcher(basematcher):
1087 class prefixdirmatcher(basematcher):
1083 """Adapt a matcher to work on a parent directory.
1088 """Adapt a matcher to work on a parent directory.
1084
1089
1085 The matcher's non-matching-attributes (bad, traversedir) are ignored.
1090 The matcher's non-matching-attributes (bad, traversedir) are ignored.
1086
1091
1087 The prefix path should usually be the relative path from the root of
1092 The prefix path should usually be the relative path from the root of
1088 this matcher to the root of the wrapped matcher.
1093 this matcher to the root of the wrapped matcher.
1089
1094
1090 >>> m1 = match(util.localpath(b'/root/d/e'), b'f', [b'../a.txt', b'b.txt'], auditor=lambda name: None)
1095 >>> m1 = match(util.localpath(b'/root/d/e'), b'f', [b'../a.txt', b'b.txt'], auditor=lambda name: None)
1091 >>> m2 = prefixdirmatcher(b'd/e', m1)
1096 >>> m2 = prefixdirmatcher(b'd/e', m1)
1092 >>> m2(b'a.txt')
1097 >>> m2(b'a.txt')
1093 False
1098 False
1094 >>> m2(b'd/e/a.txt')
1099 >>> m2(b'd/e/a.txt')
1095 True
1100 True
1096 >>> m2(b'd/e/b.txt')
1101 >>> m2(b'd/e/b.txt')
1097 False
1102 False
1098 >>> m2.files()
1103 >>> m2.files()
1099 ['d/e/a.txt', 'd/e/f/b.txt']
1104 ['d/e/a.txt', 'd/e/f/b.txt']
1100 >>> m2.exact(b'd/e/a.txt')
1105 >>> m2.exact(b'd/e/a.txt')
1101 True
1106 True
1102 >>> m2.visitdir(b'd')
1107 >>> m2.visitdir(b'd')
1103 True
1108 True
1104 >>> m2.visitdir(b'd/e')
1109 >>> m2.visitdir(b'd/e')
1105 True
1110 True
1106 >>> m2.visitdir(b'd/e/f')
1111 >>> m2.visitdir(b'd/e/f')
1107 True
1112 True
1108 >>> m2.visitdir(b'd/e/g')
1113 >>> m2.visitdir(b'd/e/g')
1109 False
1114 False
1110 >>> m2.visitdir(b'd/ef')
1115 >>> m2.visitdir(b'd/ef')
1111 False
1116 False
1112 """
1117 """
1113
1118
1114 def __init__(self, path, matcher, badfn=None):
1119 def __init__(self, path, matcher, badfn=None):
1115 super(prefixdirmatcher, self).__init__(badfn)
1120 super(prefixdirmatcher, self).__init__(badfn)
1116 if not path:
1121 if not path:
1117 raise error.ProgrammingError(b'prefix path must not be empty')
1122 raise error.ProgrammingError(b'prefix path must not be empty')
1118 self._path = path
1123 self._path = path
1119 self._pathprefix = path + b'/'
1124 self._pathprefix = path + b'/'
1120 self._matcher = matcher
1125 self._matcher = matcher
1121
1126
1122 @propertycache
1127 @propertycache
1123 def _files(self):
1128 def _files(self):
1124 return [self._pathprefix + f for f in self._matcher._files]
1129 return [self._pathprefix + f for f in self._matcher._files]
1125
1130
1126 def matchfn(self, f):
1131 def matchfn(self, f):
1127 if not f.startswith(self._pathprefix):
1132 if not f.startswith(self._pathprefix):
1128 return False
1133 return False
1129 return self._matcher.matchfn(f[len(self._pathprefix) :])
1134 return self._matcher.matchfn(f[len(self._pathprefix) :])
1130
1135
1131 @propertycache
1136 @propertycache
1132 def _pathdirs(self):
1137 def _pathdirs(self):
1133 return set(pathutil.finddirs(self._path))
1138 return set(pathutil.finddirs(self._path))
1134
1139
1135 def visitdir(self, dir):
1140 def visitdir(self, dir):
1136 if dir == self._path:
1141 if dir == self._path:
1137 return self._matcher.visitdir(b'')
1142 return self._matcher.visitdir(b'')
1138 if dir.startswith(self._pathprefix):
1143 if dir.startswith(self._pathprefix):
1139 return self._matcher.visitdir(dir[len(self._pathprefix) :])
1144 return self._matcher.visitdir(dir[len(self._pathprefix) :])
1140 return dir in self._pathdirs
1145 return dir in self._pathdirs
1141
1146
1142 def visitchildrenset(self, dir):
1147 def visitchildrenset(self, dir):
1143 if dir == self._path:
1148 if dir == self._path:
1144 return self._matcher.visitchildrenset(b'')
1149 return self._matcher.visitchildrenset(b'')
1145 if dir.startswith(self._pathprefix):
1150 if dir.startswith(self._pathprefix):
1146 return self._matcher.visitchildrenset(dir[len(self._pathprefix) :])
1151 return self._matcher.visitchildrenset(dir[len(self._pathprefix) :])
1147 if dir in self._pathdirs:
1152 if dir in self._pathdirs:
1148 return b'this'
1153 return b'this'
1149 return set()
1154 return set()
1150
1155
1151 def isexact(self):
1156 def isexact(self):
1152 return self._matcher.isexact()
1157 return self._matcher.isexact()
1153
1158
1154 def prefix(self):
1159 def prefix(self):
1155 return self._matcher.prefix()
1160 return self._matcher.prefix()
1156
1161
1157 @encoding.strmethod
1162 @encoding.strmethod
1158 def __repr__(self):
1163 def __repr__(self):
1159 return b'<prefixdirmatcher path=%r, matcher=%r>' % (
1164 return b'<prefixdirmatcher path=%r, matcher=%r>' % (
1160 pycompat.bytestr(self._path),
1165 pycompat.bytestr(self._path),
1161 self._matcher,
1166 self._matcher,
1162 )
1167 )
1163
1168
1164
1169
1165 class unionmatcher(basematcher):
1170 class unionmatcher(basematcher):
1166 """A matcher that is the union of several matchers.
1171 """A matcher that is the union of several matchers.
1167
1172
1168 The non-matching-attributes (bad, traversedir) are taken from the first
1173 The non-matching-attributes (bad, traversedir) are taken from the first
1169 matcher.
1174 matcher.
1170 """
1175 """
1171
1176
1172 def __init__(self, matchers):
1177 def __init__(self, matchers):
1173 m1 = matchers[0]
1178 m1 = matchers[0]
1174 super(unionmatcher, self).__init__()
1179 super(unionmatcher, self).__init__()
1175 self.traversedir = m1.traversedir
1180 self.traversedir = m1.traversedir
1176 self._matchers = matchers
1181 self._matchers = matchers
1177
1182
1178 def matchfn(self, f):
1183 def matchfn(self, f):
1179 for match in self._matchers:
1184 for match in self._matchers:
1180 if match(f):
1185 if match(f):
1181 return True
1186 return True
1182 return False
1187 return False
1183
1188
1184 def visitdir(self, dir):
1189 def visitdir(self, dir):
1185 r = False
1190 r = False
1186 for m in self._matchers:
1191 for m in self._matchers:
1187 v = m.visitdir(dir)
1192 v = m.visitdir(dir)
1188 if v == b'all':
1193 if v == b'all':
1189 return v
1194 return v
1190 r |= v
1195 r |= v
1191 return r
1196 return r
1192
1197
1193 def visitchildrenset(self, dir):
1198 def visitchildrenset(self, dir):
1194 r = set()
1199 r = set()
1195 this = False
1200 this = False
1196 for m in self._matchers:
1201 for m in self._matchers:
1197 v = m.visitchildrenset(dir)
1202 v = m.visitchildrenset(dir)
1198 if not v:
1203 if not v:
1199 continue
1204 continue
1200 if v == b'all':
1205 if v == b'all':
1201 return v
1206 return v
1202 if this or v == b'this':
1207 if this or v == b'this':
1203 this = True
1208 this = True
1204 # don't break, we might have an 'all' in here.
1209 # don't break, we might have an 'all' in here.
1205 continue
1210 continue
1206 assert isinstance(v, set)
1211 assert isinstance(v, set)
1207 r = r.union(v)
1212 r = r.union(v)
1208 if this:
1213 if this:
1209 return b'this'
1214 return b'this'
1210 return r
1215 return r
1211
1216
1212 @encoding.strmethod
1217 @encoding.strmethod
1213 def __repr__(self):
1218 def __repr__(self):
1214 return b'<unionmatcher matchers=%r>' % self._matchers
1219 return b'<unionmatcher matchers=%r>' % self._matchers
1215
1220
1216
1221
1217 def patkind(pattern, default=None):
1222 def patkind(pattern, default=None):
1218 r"""If pattern is 'kind:pat' with a known kind, return kind.
1223 r"""If pattern is 'kind:pat' with a known kind, return kind.
1219
1224
1220 >>> patkind(br're:.*\.c$')
1225 >>> patkind(br're:.*\.c$')
1221 're'
1226 're'
1222 >>> patkind(b'glob:*.c')
1227 >>> patkind(b'glob:*.c')
1223 'glob'
1228 'glob'
1224 >>> patkind(b'relpath:test.py')
1229 >>> patkind(b'relpath:test.py')
1225 'relpath'
1230 'relpath'
1226 >>> patkind(b'main.py')
1231 >>> patkind(b'main.py')
1227 >>> patkind(b'main.py', default=b're')
1232 >>> patkind(b'main.py', default=b're')
1228 're'
1233 're'
1229 """
1234 """
1230 return _patsplit(pattern, default)[0]
1235 return _patsplit(pattern, default)[0]
1231
1236
1232
1237
1233 def _patsplit(pattern, default):
1238 def _patsplit(pattern, default):
1234 """Split a string into the optional pattern kind prefix and the actual
1239 """Split a string into the optional pattern kind prefix and the actual
1235 pattern."""
1240 pattern."""
1236 if b':' in pattern:
1241 if b':' in pattern:
1237 kind, pat = pattern.split(b':', 1)
1242 kind, pat = pattern.split(b':', 1)
1238 if kind in allpatternkinds:
1243 if kind in allpatternkinds:
1239 return kind, pat
1244 return kind, pat
1240 return default, pattern
1245 return default, pattern
1241
1246
1242
1247
1243 def _globre(pat):
1248 def _globre(pat):
1244 r"""Convert an extended glob string to a regexp string.
1249 r"""Convert an extended glob string to a regexp string.
1245
1250
1246 >>> from . import pycompat
1251 >>> from . import pycompat
1247 >>> def bprint(s):
1252 >>> def bprint(s):
1248 ... print(pycompat.sysstr(s))
1253 ... print(pycompat.sysstr(s))
1249 >>> bprint(_globre(br'?'))
1254 >>> bprint(_globre(br'?'))
1250 .
1255 .
1251 >>> bprint(_globre(br'*'))
1256 >>> bprint(_globre(br'*'))
1252 [^/]*
1257 [^/]*
1253 >>> bprint(_globre(br'**'))
1258 >>> bprint(_globre(br'**'))
1254 .*
1259 .*
1255 >>> bprint(_globre(br'**/a'))
1260 >>> bprint(_globre(br'**/a'))
1256 (?:.*/)?a
1261 (?:.*/)?a
1257 >>> bprint(_globre(br'a/**/b'))
1262 >>> bprint(_globre(br'a/**/b'))
1258 a/(?:.*/)?b
1263 a/(?:.*/)?b
1259 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
1264 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
1260 [a*?!^][\^b][^c]
1265 [a*?!^][\^b][^c]
1261 >>> bprint(_globre(br'{a,b}'))
1266 >>> bprint(_globre(br'{a,b}'))
1262 (?:a|b)
1267 (?:a|b)
1263 >>> bprint(_globre(br'.\*\?'))
1268 >>> bprint(_globre(br'.\*\?'))
1264 \.\*\?
1269 \.\*\?
1265 """
1270 """
1266 i, n = 0, len(pat)
1271 i, n = 0, len(pat)
1267 res = b''
1272 res = b''
1268 group = 0
1273 group = 0
1269 escape = util.stringutil.regexbytesescapemap.get
1274 escape = util.stringutil.regexbytesescapemap.get
1270
1275
1271 def peek():
1276 def peek():
1272 return i < n and pat[i : i + 1]
1277 return i < n and pat[i : i + 1]
1273
1278
1274 while i < n:
1279 while i < n:
1275 c = pat[i : i + 1]
1280 c = pat[i : i + 1]
1276 i += 1
1281 i += 1
1277 if c not in b'*?[{},\\':
1282 if c not in b'*?[{},\\':
1278 res += escape(c, c)
1283 res += escape(c, c)
1279 elif c == b'*':
1284 elif c == b'*':
1280 if peek() == b'*':
1285 if peek() == b'*':
1281 i += 1
1286 i += 1
1282 if peek() == b'/':
1287 if peek() == b'/':
1283 i += 1
1288 i += 1
1284 res += b'(?:.*/)?'
1289 res += b'(?:.*/)?'
1285 else:
1290 else:
1286 res += b'.*'
1291 res += b'.*'
1287 else:
1292 else:
1288 res += b'[^/]*'
1293 res += b'[^/]*'
1289 elif c == b'?':
1294 elif c == b'?':
1290 res += b'.'
1295 res += b'.'
1291 elif c == b'[':
1296 elif c == b'[':
1292 j = i
1297 j = i
1293 if j < n and pat[j : j + 1] in b'!]':
1298 if j < n and pat[j : j + 1] in b'!]':
1294 j += 1
1299 j += 1
1295 while j < n and pat[j : j + 1] != b']':
1300 while j < n and pat[j : j + 1] != b']':
1296 j += 1
1301 j += 1
1297 if j >= n:
1302 if j >= n:
1298 res += b'\\['
1303 res += b'\\['
1299 else:
1304 else:
1300 stuff = pat[i:j].replace(b'\\', b'\\\\')
1305 stuff = pat[i:j].replace(b'\\', b'\\\\')
1301 i = j + 1
1306 i = j + 1
1302 if stuff[0:1] == b'!':
1307 if stuff[0:1] == b'!':
1303 stuff = b'^' + stuff[1:]
1308 stuff = b'^' + stuff[1:]
1304 elif stuff[0:1] == b'^':
1309 elif stuff[0:1] == b'^':
1305 stuff = b'\\' + stuff
1310 stuff = b'\\' + stuff
1306 res = b'%s[%s]' % (res, stuff)
1311 res = b'%s[%s]' % (res, stuff)
1307 elif c == b'{':
1312 elif c == b'{':
1308 group += 1
1313 group += 1
1309 res += b'(?:'
1314 res += b'(?:'
1310 elif c == b'}' and group:
1315 elif c == b'}' and group:
1311 res += b')'
1316 res += b')'
1312 group -= 1
1317 group -= 1
1313 elif c == b',' and group:
1318 elif c == b',' and group:
1314 res += b'|'
1319 res += b'|'
1315 elif c == b'\\':
1320 elif c == b'\\':
1316 p = peek()
1321 p = peek()
1317 if p:
1322 if p:
1318 i += 1
1323 i += 1
1319 res += escape(p, p)
1324 res += escape(p, p)
1320 else:
1325 else:
1321 res += escape(c, c)
1326 res += escape(c, c)
1322 else:
1327 else:
1323 res += escape(c, c)
1328 res += escape(c, c)
1324 return res
1329 return res
1325
1330
1326
1331
1327 FLAG_RE = util.re.compile(br'^\(\?([aiLmsux]+)\)(.*)')
1332 FLAG_RE = util.re.compile(br'^\(\?([aiLmsux]+)\)(.*)')
1328
1333
1329
1334
1330 def _regex(kind, pat, globsuffix):
1335 def _regex(kind, pat, globsuffix):
1331 """Convert a (normalized) pattern of any kind into a
1336 """Convert a (normalized) pattern of any kind into a
1332 regular expression.
1337 regular expression.
1333 globsuffix is appended to the regexp of globs."""
1338 globsuffix is appended to the regexp of globs."""
1334 if not pat and kind in (b'glob', b'relpath'):
1339 if not pat and kind in (b'glob', b'relpath'):
1335 return b''
1340 return b''
1336 if kind == b're':
1341 if kind == b're':
1337 return pat
1342 return pat
1338 if kind in (b'path', b'relpath'):
1343 if kind in (b'path', b'relpath'):
1339 if pat == b'.':
1344 if pat == b'.':
1340 return b''
1345 return b''
1341 return util.stringutil.reescape(pat) + b'(?:/|$)'
1346 return util.stringutil.reescape(pat) + b'(?:/|$)'
1342 if kind == b'rootfilesin':
1347 if kind == b'rootfilesin':
1343 if pat == b'.':
1348 if pat == b'.':
1344 escaped = b''
1349 escaped = b''
1345 else:
1350 else:
1346 # Pattern is a directory name.
1351 # Pattern is a directory name.
1347 escaped = util.stringutil.reescape(pat) + b'/'
1352 escaped = util.stringutil.reescape(pat) + b'/'
1348 # Anything after the pattern must be a non-directory.
1353 # Anything after the pattern must be a non-directory.
1349 return escaped + b'[^/]+$'
1354 return escaped + b'[^/]+$'
1350 if kind == b'relglob':
1355 if kind == b'relglob':
1351 globre = _globre(pat)
1356 globre = _globre(pat)
1352 if globre.startswith(b'[^/]*'):
1357 if globre.startswith(b'[^/]*'):
1353 # When pat has the form *XYZ (common), make the returned regex more
1358 # When pat has the form *XYZ (common), make the returned regex more
1354 # legible by returning the regex for **XYZ instead of **/*XYZ.
1359 # legible by returning the regex for **XYZ instead of **/*XYZ.
1355 return b'.*' + globre[len(b'[^/]*') :] + globsuffix
1360 return b'.*' + globre[len(b'[^/]*') :] + globsuffix
1356 return b'(?:|.*/)' + globre + globsuffix
1361 return b'(?:|.*/)' + globre + globsuffix
1357 if kind == b'relre':
1362 if kind == b'relre':
1358 flag = None
1363 flag = None
1359 m = FLAG_RE.match(pat)
1364 m = FLAG_RE.match(pat)
1360 if m:
1365 if m:
1361 flag, pat = m.groups()
1366 flag, pat = m.groups()
1362 if not pat.startswith(b'^'):
1367 if not pat.startswith(b'^'):
1363 pat = b'.*' + pat
1368 pat = b'.*' + pat
1364 if flag is not None:
1369 if flag is not None:
1365 pat = br'(?%s:%s)' % (flag, pat)
1370 pat = br'(?%s:%s)' % (flag, pat)
1366 return pat
1371 return pat
1367 if kind in (b'glob', b'rootglob'):
1372 if kind in (b'glob', b'rootglob'):
1368 return _globre(pat) + globsuffix
1373 return _globre(pat) + globsuffix
1369 raise error.ProgrammingError(b'not a regex pattern: %s:%s' % (kind, pat))
1374 raise error.ProgrammingError(b'not a regex pattern: %s:%s' % (kind, pat))
1370
1375
1371
1376
1372 def _buildmatch(kindpats, globsuffix, root):
1377 def _buildmatch(kindpats, globsuffix, root):
1373 """Return regexp string and a matcher function for kindpats.
1378 """Return regexp string and a matcher function for kindpats.
1374 globsuffix is appended to the regexp of globs."""
1379 globsuffix is appended to the regexp of globs."""
1375 matchfuncs = []
1380 matchfuncs = []
1376
1381
1377 subincludes, kindpats = _expandsubinclude(kindpats, root)
1382 subincludes, kindpats = _expandsubinclude(kindpats, root)
1378 if subincludes:
1383 if subincludes:
1379 submatchers = {}
1384 submatchers = {}
1380
1385
1381 def matchsubinclude(f):
1386 def matchsubinclude(f):
1382 for prefix, matcherargs in subincludes:
1387 for prefix, matcherargs in subincludes:
1383 if f.startswith(prefix):
1388 if f.startswith(prefix):
1384 mf = submatchers.get(prefix)
1389 mf = submatchers.get(prefix)
1385 if mf is None:
1390 if mf is None:
1386 mf = match(*matcherargs)
1391 mf = match(*matcherargs)
1387 submatchers[prefix] = mf
1392 submatchers[prefix] = mf
1388
1393
1389 if mf(f[len(prefix) :]):
1394 if mf(f[len(prefix) :]):
1390 return True
1395 return True
1391 return False
1396 return False
1392
1397
1393 matchfuncs.append(matchsubinclude)
1398 matchfuncs.append(matchsubinclude)
1394
1399
1395 regex = b''
1400 regex = b''
1396 if kindpats:
1401 if kindpats:
1397 if all(k == b'rootfilesin' for k, p, s in kindpats):
1402 if all(k == b'rootfilesin' for k, p, s in kindpats):
1398 dirs = {p for k, p, s in kindpats}
1403 dirs = {p for k, p, s in kindpats}
1399
1404
1400 def mf(f):
1405 def mf(f):
1401 i = f.rfind(b'/')
1406 i = f.rfind(b'/')
1402 if i >= 0:
1407 if i >= 0:
1403 dir = f[:i]
1408 dir = f[:i]
1404 else:
1409 else:
1405 dir = b'.'
1410 dir = b'.'
1406 return dir in dirs
1411 return dir in dirs
1407
1412
1408 regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))
1413 regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))
1409 matchfuncs.append(mf)
1414 matchfuncs.append(mf)
1410 else:
1415 else:
1411 regex, mf = _buildregexmatch(kindpats, globsuffix)
1416 regex, mf = _buildregexmatch(kindpats, globsuffix)
1412 matchfuncs.append(mf)
1417 matchfuncs.append(mf)
1413
1418
1414 if len(matchfuncs) == 1:
1419 if len(matchfuncs) == 1:
1415 return regex, matchfuncs[0]
1420 return regex, matchfuncs[0]
1416 else:
1421 else:
1417 return regex, lambda f: any(mf(f) for mf in matchfuncs)
1422 return regex, lambda f: any(mf(f) for mf in matchfuncs)
1418
1423
1419
1424
1420 MAX_RE_SIZE = 20000
1425 MAX_RE_SIZE = 20000
1421
1426
1422
1427
1423 def _joinregexes(regexps):
1428 def _joinregexes(regexps):
1424 """gather multiple regular expressions into a single one"""
1429 """gather multiple regular expressions into a single one"""
1425 return b'|'.join(regexps)
1430 return b'|'.join(regexps)
1426
1431
1427
1432
1428 def _buildregexmatch(kindpats, globsuffix):
1433 def _buildregexmatch(kindpats, globsuffix):
1429 """Build a match function from a list of kinds and kindpats,
1434 """Build a match function from a list of kinds and kindpats,
1430 return regexp string and a matcher function.
1435 return regexp string and a matcher function.
1431
1436
1432 Test too large input
1437 Test too large input
1433 >>> _buildregexmatch([
1438 >>> _buildregexmatch([
1434 ... (b'relglob', b'?' * MAX_RE_SIZE, b'')
1439 ... (b'relglob', b'?' * MAX_RE_SIZE, b'')
1435 ... ], b'$')
1440 ... ], b'$')
1436 Traceback (most recent call last):
1441 Traceback (most recent call last):
1437 ...
1442 ...
1438 Abort: matcher pattern is too long (20009 bytes)
1443 Abort: matcher pattern is too long (20009 bytes)
1439 """
1444 """
1440 try:
1445 try:
1441 allgroups = []
1446 allgroups = []
1442 regexps = [_regex(k, p, globsuffix) for (k, p, s) in kindpats]
1447 regexps = [_regex(k, p, globsuffix) for (k, p, s) in kindpats]
1443 fullregexp = _joinregexes(regexps)
1448 fullregexp = _joinregexes(regexps)
1444
1449
1445 startidx = 0
1450 startidx = 0
1446 groupsize = 0
1451 groupsize = 0
1447 for idx, r in enumerate(regexps):
1452 for idx, r in enumerate(regexps):
1448 piecesize = len(r)
1453 piecesize = len(r)
1449 if piecesize > MAX_RE_SIZE:
1454 if piecesize > MAX_RE_SIZE:
1450 msg = _(b"matcher pattern is too long (%d bytes)") % piecesize
1455 msg = _(b"matcher pattern is too long (%d bytes)") % piecesize
1451 raise error.Abort(msg)
1456 raise error.Abort(msg)
1452 elif (groupsize + piecesize) > MAX_RE_SIZE:
1457 elif (groupsize + piecesize) > MAX_RE_SIZE:
1453 group = regexps[startidx:idx]
1458 group = regexps[startidx:idx]
1454 allgroups.append(_joinregexes(group))
1459 allgroups.append(_joinregexes(group))
1455 startidx = idx
1460 startidx = idx
1456 groupsize = 0
1461 groupsize = 0
1457 groupsize += piecesize + 1
1462 groupsize += piecesize + 1
1458
1463
1459 if startidx == 0:
1464 if startidx == 0:
1460 matcher = _rematcher(fullregexp)
1465 matcher = _rematcher(fullregexp)
1461 func = lambda s: bool(matcher(s))
1466 func = lambda s: bool(matcher(s))
1462 else:
1467 else:
1463 group = regexps[startidx:]
1468 group = regexps[startidx:]
1464 allgroups.append(_joinregexes(group))
1469 allgroups.append(_joinregexes(group))
1465 allmatchers = [_rematcher(g) for g in allgroups]
1470 allmatchers = [_rematcher(g) for g in allgroups]
1466 func = lambda s: any(m(s) for m in allmatchers)
1471 func = lambda s: any(m(s) for m in allmatchers)
1467 return fullregexp, func
1472 return fullregexp, func
1468 except re.error:
1473 except re.error:
1469 for k, p, s in kindpats:
1474 for k, p, s in kindpats:
1470 try:
1475 try:
1471 _rematcher(_regex(k, p, globsuffix))
1476 _rematcher(_regex(k, p, globsuffix))
1472 except re.error:
1477 except re.error:
1473 if s:
1478 if s:
1474 raise error.Abort(
1479 raise error.Abort(
1475 _(b"%s: invalid pattern (%s): %s") % (s, k, p)
1480 _(b"%s: invalid pattern (%s): %s") % (s, k, p)
1476 )
1481 )
1477 else:
1482 else:
1478 raise error.Abort(_(b"invalid pattern (%s): %s") % (k, p))
1483 raise error.Abort(_(b"invalid pattern (%s): %s") % (k, p))
1479 raise error.Abort(_(b"invalid pattern"))
1484 raise error.Abort(_(b"invalid pattern"))
1480
1485
1481
1486
1482 def _patternrootsanddirs(kindpats):
1487 def _patternrootsanddirs(kindpats):
1483 """Returns roots and directories corresponding to each pattern.
1488 """Returns roots and directories corresponding to each pattern.
1484
1489
1485 This calculates the roots and directories exactly matching the patterns and
1490 This calculates the roots and directories exactly matching the patterns and
1486 returns a tuple of (roots, dirs) for each. It does not return other
1491 returns a tuple of (roots, dirs) for each. It does not return other
1487 directories which may also need to be considered, like the parent
1492 directories which may also need to be considered, like the parent
1488 directories.
1493 directories.
1489 """
1494 """
1490 r = []
1495 r = []
1491 d = []
1496 d = []
1492 for kind, pat, source in kindpats:
1497 for kind, pat, source in kindpats:
1493 if kind in (b'glob', b'rootglob'): # find the non-glob prefix
1498 if kind in (b'glob', b'rootglob'): # find the non-glob prefix
1494 root = []
1499 root = []
1495 for p in pat.split(b'/'):
1500 for p in pat.split(b'/'):
1496 if b'[' in p or b'{' in p or b'*' in p or b'?' in p:
1501 if b'[' in p or b'{' in p or b'*' in p or b'?' in p:
1497 break
1502 break
1498 root.append(p)
1503 root.append(p)
1499 r.append(b'/'.join(root))
1504 r.append(b'/'.join(root))
1500 elif kind in (b'relpath', b'path'):
1505 elif kind in (b'relpath', b'path'):
1501 if pat == b'.':
1506 if pat == b'.':
1502 pat = b''
1507 pat = b''
1503 r.append(pat)
1508 r.append(pat)
1504 elif kind in (b'rootfilesin',):
1509 elif kind in (b'rootfilesin',):
1505 if pat == b'.':
1510 if pat == b'.':
1506 pat = b''
1511 pat = b''
1507 d.append(pat)
1512 d.append(pat)
1508 else: # relglob, re, relre
1513 else: # relglob, re, relre
1509 r.append(b'')
1514 r.append(b'')
1510 return r, d
1515 return r, d
1511
1516
1512
1517
1513 def _roots(kindpats):
1518 def _roots(kindpats):
1514 '''Returns root directories to match recursively from the given patterns.'''
1519 '''Returns root directories to match recursively from the given patterns.'''
1515 roots, dirs = _patternrootsanddirs(kindpats)
1520 roots, dirs = _patternrootsanddirs(kindpats)
1516 return roots
1521 return roots
1517
1522
1518
1523
1519 def _rootsdirsandparents(kindpats):
1524 def _rootsdirsandparents(kindpats):
1520 """Returns roots and exact directories from patterns.
1525 """Returns roots and exact directories from patterns.
1521
1526
1522 `roots` are directories to match recursively, `dirs` should
1527 `roots` are directories to match recursively, `dirs` should
1523 be matched non-recursively, and `parents` are the implicitly required
1528 be matched non-recursively, and `parents` are the implicitly required
1524 directories to walk to items in either roots or dirs.
1529 directories to walk to items in either roots or dirs.
1525
1530
1526 Returns a tuple of (roots, dirs, parents).
1531 Returns a tuple of (roots, dirs, parents).
1527
1532
1528 >>> r = _rootsdirsandparents(
1533 >>> r = _rootsdirsandparents(
1529 ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
1534 ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
1530 ... (b'glob', b'g*', b'')])
1535 ... (b'glob', b'g*', b'')])
1531 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1536 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1532 (['g/h', 'g/h', ''], []) ['', 'g']
1537 (['g/h', 'g/h', ''], []) ['', 'g']
1533 >>> r = _rootsdirsandparents(
1538 >>> r = _rootsdirsandparents(
1534 ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
1539 ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
1535 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1540 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1536 ([], ['g/h', '']) ['', 'g']
1541 ([], ['g/h', '']) ['', 'g']
1537 >>> r = _rootsdirsandparents(
1542 >>> r = _rootsdirsandparents(
1538 ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
1543 ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
1539 ... (b'path', b'', b'')])
1544 ... (b'path', b'', b'')])
1540 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1545 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1541 (['r', 'p/p', ''], []) ['', 'p']
1546 (['r', 'p/p', ''], []) ['', 'p']
1542 >>> r = _rootsdirsandparents(
1547 >>> r = _rootsdirsandparents(
1543 ... [(b'relglob', b'rg*', b''), (b're', b're/', b''),
1548 ... [(b'relglob', b'rg*', b''), (b're', b're/', b''),
1544 ... (b'relre', b'rr', b'')])
1549 ... (b'relre', b'rr', b'')])
1545 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1550 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1546 (['', '', ''], []) ['']
1551 (['', '', ''], []) ['']
1547 """
1552 """
1548 r, d = _patternrootsanddirs(kindpats)
1553 r, d = _patternrootsanddirs(kindpats)
1549
1554
1550 p = set()
1555 p = set()
1551 # Add the parents as non-recursive/exact directories, since they must be
1556 # Add the parents as non-recursive/exact directories, since they must be
1552 # scanned to get to either the roots or the other exact directories.
1557 # scanned to get to either the roots or the other exact directories.
1553 p.update(pathutil.dirs(d))
1558 p.update(pathutil.dirs(d))
1554 p.update(pathutil.dirs(r))
1559 p.update(pathutil.dirs(r))
1555
1560
1556 # FIXME: all uses of this function convert these to sets, do so before
1561 # FIXME: all uses of this function convert these to sets, do so before
1557 # returning.
1562 # returning.
1558 # FIXME: all uses of this function do not need anything in 'roots' and
1563 # FIXME: all uses of this function do not need anything in 'roots' and
1559 # 'dirs' to also be in 'parents', consider removing them before returning.
1564 # 'dirs' to also be in 'parents', consider removing them before returning.
1560 return r, d, p
1565 return r, d, p
1561
1566
1562
1567
1563 def _explicitfiles(kindpats):
1568 def _explicitfiles(kindpats):
1564 """Returns the potential explicit filenames from the patterns.
1569 """Returns the potential explicit filenames from the patterns.
1565
1570
1566 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
1571 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
1567 ['foo/bar']
1572 ['foo/bar']
1568 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
1573 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
1569 []
1574 []
1570 """
1575 """
1571 # Keep only the pattern kinds where one can specify filenames (vs only
1576 # Keep only the pattern kinds where one can specify filenames (vs only
1572 # directory names).
1577 # directory names).
1573 filable = [kp for kp in kindpats if kp[0] not in (b'rootfilesin',)]
1578 filable = [kp for kp in kindpats if kp[0] not in (b'rootfilesin',)]
1574 return _roots(filable)
1579 return _roots(filable)
1575
1580
1576
1581
1577 def _prefix(kindpats):
1582 def _prefix(kindpats):
1578 '''Whether all the patterns match a prefix (i.e. recursively)'''
1583 '''Whether all the patterns match a prefix (i.e. recursively)'''
1579 for kind, pat, source in kindpats:
1584 for kind, pat, source in kindpats:
1580 if kind not in (b'path', b'relpath'):
1585 if kind not in (b'path', b'relpath'):
1581 return False
1586 return False
1582 return True
1587 return True
1583
1588
1584
1589
1585 _commentre = None
1590 _commentre = None
1586
1591
1587
1592
1588 def readpatternfile(filepath, warn, sourceinfo=False):
1593 def readpatternfile(filepath, warn, sourceinfo=False):
1589 """parse a pattern file, returning a list of
1594 """parse a pattern file, returning a list of
1590 patterns. These patterns should be given to compile()
1595 patterns. These patterns should be given to compile()
1591 to be validated and converted into a match function.
1596 to be validated and converted into a match function.
1592
1597
1593 trailing white space is dropped.
1598 trailing white space is dropped.
1594 the escape character is backslash.
1599 the escape character is backslash.
1595 comments start with #.
1600 comments start with #.
1596 empty lines are skipped.
1601 empty lines are skipped.
1597
1602
1598 lines can be of the following formats:
1603 lines can be of the following formats:
1599
1604
1600 syntax: regexp # defaults following lines to non-rooted regexps
1605 syntax: regexp # defaults following lines to non-rooted regexps
1601 syntax: glob # defaults following lines to non-rooted globs
1606 syntax: glob # defaults following lines to non-rooted globs
1602 re:pattern # non-rooted regular expression
1607 re:pattern # non-rooted regular expression
1603 glob:pattern # non-rooted glob
1608 glob:pattern # non-rooted glob
1604 rootglob:pat # rooted glob (same root as ^ in regexps)
1609 rootglob:pat # rooted glob (same root as ^ in regexps)
1605 pattern # pattern of the current default type
1610 pattern # pattern of the current default type
1606
1611
1607 if sourceinfo is set, returns a list of tuples:
1612 if sourceinfo is set, returns a list of tuples:
1608 (pattern, lineno, originalline).
1613 (pattern, lineno, originalline).
1609 This is useful to debug ignore patterns.
1614 This is useful to debug ignore patterns.
1610 """
1615 """
1611
1616
1612 syntaxes = {
1617 syntaxes = {
1613 b're': b'relre:',
1618 b're': b'relre:',
1614 b'regexp': b'relre:',
1619 b'regexp': b'relre:',
1615 b'glob': b'relglob:',
1620 b'glob': b'relglob:',
1616 b'rootglob': b'rootglob:',
1621 b'rootglob': b'rootglob:',
1617 b'include': b'include',
1622 b'include': b'include',
1618 b'subinclude': b'subinclude',
1623 b'subinclude': b'subinclude',
1619 }
1624 }
1620 syntax = b'relre:'
1625 syntax = b'relre:'
1621 patterns = []
1626 patterns = []
1622
1627
1623 fp = open(filepath, b'rb')
1628 fp = open(filepath, b'rb')
1624 for lineno, line in enumerate(fp, start=1):
1629 for lineno, line in enumerate(fp, start=1):
1625 if b"#" in line:
1630 if b"#" in line:
1626 global _commentre
1631 global _commentre
1627 if not _commentre:
1632 if not _commentre:
1628 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
1633 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
1629 # remove comments prefixed by an even number of escapes
1634 # remove comments prefixed by an even number of escapes
1630 m = _commentre.search(line)
1635 m = _commentre.search(line)
1631 if m:
1636 if m:
1632 line = line[: m.end(1)]
1637 line = line[: m.end(1)]
1633 # fixup properly escaped comments that survived the above
1638 # fixup properly escaped comments that survived the above
1634 line = line.replace(b"\\#", b"#")
1639 line = line.replace(b"\\#", b"#")
1635 line = line.rstrip()
1640 line = line.rstrip()
1636 if not line:
1641 if not line:
1637 continue
1642 continue
1638
1643
1639 if line.startswith(b'syntax:'):
1644 if line.startswith(b'syntax:'):
1640 s = line[7:].strip()
1645 s = line[7:].strip()
1641 try:
1646 try:
1642 syntax = syntaxes[s]
1647 syntax = syntaxes[s]
1643 except KeyError:
1648 except KeyError:
1644 if warn:
1649 if warn:
1645 warn(
1650 warn(
1646 _(b"%s: ignoring invalid syntax '%s'\n") % (filepath, s)
1651 _(b"%s: ignoring invalid syntax '%s'\n") % (filepath, s)
1647 )
1652 )
1648 continue
1653 continue
1649
1654
1650 linesyntax = syntax
1655 linesyntax = syntax
1651 for s, rels in syntaxes.items():
1656 for s, rels in syntaxes.items():
1652 if line.startswith(rels):
1657 if line.startswith(rels):
1653 linesyntax = rels
1658 linesyntax = rels
1654 line = line[len(rels) :]
1659 line = line[len(rels) :]
1655 break
1660 break
1656 elif line.startswith(s + b':'):
1661 elif line.startswith(s + b':'):
1657 linesyntax = rels
1662 linesyntax = rels
1658 line = line[len(s) + 1 :]
1663 line = line[len(s) + 1 :]
1659 break
1664 break
1660 if sourceinfo:
1665 if sourceinfo:
1661 patterns.append((linesyntax + line, lineno, line))
1666 patterns.append((linesyntax + line, lineno, line))
1662 else:
1667 else:
1663 patterns.append(linesyntax + line)
1668 patterns.append(linesyntax + line)
1664 fp.close()
1669 fp.close()
1665 return patterns
1670 return patterns
General Comments 0
You need to be logged in to leave comments. Login now