##// END OF EJS Templates
match: make the FLAG_RE pattern a raw string...
Matt Harbison -
r50540:c4874ebe stable
parent child Browse files
Show More
@@ -1,1664 +1,1664 b''
1 # match.py - filename matching
1 # match.py - filename matching
2 #
2 #
3 # Copyright 2008, 2009 Olivia Mackall <olivia@selenic.com> and others
3 # Copyright 2008, 2009 Olivia Mackall <olivia@selenic.com> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8
8
9 import bisect
9 import bisect
10 import copy
10 import copy
11 import itertools
11 import itertools
12 import os
12 import os
13 import re
13 import re
14
14
15 from .i18n import _
15 from .i18n import _
16 from .pycompat import open
16 from .pycompat import open
17 from . import (
17 from . import (
18 encoding,
18 encoding,
19 error,
19 error,
20 pathutil,
20 pathutil,
21 policy,
21 policy,
22 pycompat,
22 pycompat,
23 util,
23 util,
24 )
24 )
25 from .utils import stringutil
25 from .utils import stringutil
26
26
27 rustmod = policy.importrust('dirstate')
27 rustmod = policy.importrust('dirstate')
28
28
29 allpatternkinds = (
29 allpatternkinds = (
30 b're',
30 b're',
31 b'glob',
31 b'glob',
32 b'path',
32 b'path',
33 b'relglob',
33 b'relglob',
34 b'relpath',
34 b'relpath',
35 b'relre',
35 b'relre',
36 b'rootglob',
36 b'rootglob',
37 b'listfile',
37 b'listfile',
38 b'listfile0',
38 b'listfile0',
39 b'set',
39 b'set',
40 b'include',
40 b'include',
41 b'subinclude',
41 b'subinclude',
42 b'rootfilesin',
42 b'rootfilesin',
43 )
43 )
44 cwdrelativepatternkinds = (b'relpath', b'glob')
44 cwdrelativepatternkinds = (b'relpath', b'glob')
45
45
46 propertycache = util.propertycache
46 propertycache = util.propertycache
47
47
48
48
49 def _rematcher(regex):
49 def _rematcher(regex):
50 """compile the regexp with the best available regexp engine and return a
50 """compile the regexp with the best available regexp engine and return a
51 matcher function"""
51 matcher function"""
52 m = util.re.compile(regex)
52 m = util.re.compile(regex)
53 try:
53 try:
54 # slightly faster, provided by facebook's re2 bindings
54 # slightly faster, provided by facebook's re2 bindings
55 return m.test_match
55 return m.test_match
56 except AttributeError:
56 except AttributeError:
57 return m.match
57 return m.match
58
58
59
59
60 def _expandsets(cwd, kindpats, ctx=None, listsubrepos=False, badfn=None):
60 def _expandsets(cwd, kindpats, ctx=None, listsubrepos=False, badfn=None):
61 '''Returns the kindpats list with the 'set' patterns expanded to matchers'''
61 '''Returns the kindpats list with the 'set' patterns expanded to matchers'''
62 matchers = []
62 matchers = []
63 other = []
63 other = []
64
64
65 for kind, pat, source in kindpats:
65 for kind, pat, source in kindpats:
66 if kind == b'set':
66 if kind == b'set':
67 if ctx is None:
67 if ctx is None:
68 raise error.ProgrammingError(
68 raise error.ProgrammingError(
69 b"fileset expression with no context"
69 b"fileset expression with no context"
70 )
70 )
71 matchers.append(ctx.matchfileset(cwd, pat, badfn=badfn))
71 matchers.append(ctx.matchfileset(cwd, pat, badfn=badfn))
72
72
73 if listsubrepos:
73 if listsubrepos:
74 for subpath in ctx.substate:
74 for subpath in ctx.substate:
75 sm = ctx.sub(subpath).matchfileset(cwd, pat, badfn=badfn)
75 sm = ctx.sub(subpath).matchfileset(cwd, pat, badfn=badfn)
76 pm = prefixdirmatcher(subpath, sm, badfn=badfn)
76 pm = prefixdirmatcher(subpath, sm, badfn=badfn)
77 matchers.append(pm)
77 matchers.append(pm)
78
78
79 continue
79 continue
80 other.append((kind, pat, source))
80 other.append((kind, pat, source))
81 return matchers, other
81 return matchers, other
82
82
83
83
84 def _expandsubinclude(kindpats, root):
84 def _expandsubinclude(kindpats, root):
85 """Returns the list of subinclude matcher args and the kindpats without the
85 """Returns the list of subinclude matcher args and the kindpats without the
86 subincludes in it."""
86 subincludes in it."""
87 relmatchers = []
87 relmatchers = []
88 other = []
88 other = []
89
89
90 for kind, pat, source in kindpats:
90 for kind, pat, source in kindpats:
91 if kind == b'subinclude':
91 if kind == b'subinclude':
92 sourceroot = pathutil.dirname(util.normpath(source))
92 sourceroot = pathutil.dirname(util.normpath(source))
93 pat = util.pconvert(pat)
93 pat = util.pconvert(pat)
94 path = pathutil.join(sourceroot, pat)
94 path = pathutil.join(sourceroot, pat)
95
95
96 newroot = pathutil.dirname(path)
96 newroot = pathutil.dirname(path)
97 matcherargs = (newroot, b'', [], [b'include:%s' % path])
97 matcherargs = (newroot, b'', [], [b'include:%s' % path])
98
98
99 prefix = pathutil.canonpath(root, root, newroot)
99 prefix = pathutil.canonpath(root, root, newroot)
100 if prefix:
100 if prefix:
101 prefix += b'/'
101 prefix += b'/'
102 relmatchers.append((prefix, matcherargs))
102 relmatchers.append((prefix, matcherargs))
103 else:
103 else:
104 other.append((kind, pat, source))
104 other.append((kind, pat, source))
105
105
106 return relmatchers, other
106 return relmatchers, other
107
107
108
108
109 def _kindpatsalwaysmatch(kindpats):
109 def _kindpatsalwaysmatch(kindpats):
110 """Checks whether the kindspats match everything, as e.g.
110 """Checks whether the kindspats match everything, as e.g.
111 'relpath:.' does.
111 'relpath:.' does.
112 """
112 """
113 for kind, pat, source in kindpats:
113 for kind, pat, source in kindpats:
114 if pat != b'' or kind not in [b'relpath', b'glob']:
114 if pat != b'' or kind not in [b'relpath', b'glob']:
115 return False
115 return False
116 return True
116 return True
117
117
118
118
119 def _buildkindpatsmatcher(
119 def _buildkindpatsmatcher(
120 matchercls,
120 matchercls,
121 root,
121 root,
122 cwd,
122 cwd,
123 kindpats,
123 kindpats,
124 ctx=None,
124 ctx=None,
125 listsubrepos=False,
125 listsubrepos=False,
126 badfn=None,
126 badfn=None,
127 ):
127 ):
128 matchers = []
128 matchers = []
129 fms, kindpats = _expandsets(
129 fms, kindpats = _expandsets(
130 cwd,
130 cwd,
131 kindpats,
131 kindpats,
132 ctx=ctx,
132 ctx=ctx,
133 listsubrepos=listsubrepos,
133 listsubrepos=listsubrepos,
134 badfn=badfn,
134 badfn=badfn,
135 )
135 )
136 if kindpats:
136 if kindpats:
137 m = matchercls(root, kindpats, badfn=badfn)
137 m = matchercls(root, kindpats, badfn=badfn)
138 matchers.append(m)
138 matchers.append(m)
139 if fms:
139 if fms:
140 matchers.extend(fms)
140 matchers.extend(fms)
141 if not matchers:
141 if not matchers:
142 return nevermatcher(badfn=badfn)
142 return nevermatcher(badfn=badfn)
143 if len(matchers) == 1:
143 if len(matchers) == 1:
144 return matchers[0]
144 return matchers[0]
145 return unionmatcher(matchers)
145 return unionmatcher(matchers)
146
146
147
147
148 def match(
148 def match(
149 root,
149 root,
150 cwd,
150 cwd,
151 patterns=None,
151 patterns=None,
152 include=None,
152 include=None,
153 exclude=None,
153 exclude=None,
154 default=b'glob',
154 default=b'glob',
155 auditor=None,
155 auditor=None,
156 ctx=None,
156 ctx=None,
157 listsubrepos=False,
157 listsubrepos=False,
158 warn=None,
158 warn=None,
159 badfn=None,
159 badfn=None,
160 icasefs=False,
160 icasefs=False,
161 ):
161 ):
162 r"""build an object to match a set of file patterns
162 r"""build an object to match a set of file patterns
163
163
164 arguments:
164 arguments:
165 root - the canonical root of the tree you're matching against
165 root - the canonical root of the tree you're matching against
166 cwd - the current working directory, if relevant
166 cwd - the current working directory, if relevant
167 patterns - patterns to find
167 patterns - patterns to find
168 include - patterns to include (unless they are excluded)
168 include - patterns to include (unless they are excluded)
169 exclude - patterns to exclude (even if they are included)
169 exclude - patterns to exclude (even if they are included)
170 default - if a pattern in patterns has no explicit type, assume this one
170 default - if a pattern in patterns has no explicit type, assume this one
171 auditor - optional path auditor
171 auditor - optional path auditor
172 ctx - optional changecontext
172 ctx - optional changecontext
173 listsubrepos - if True, recurse into subrepositories
173 listsubrepos - if True, recurse into subrepositories
174 warn - optional function used for printing warnings
174 warn - optional function used for printing warnings
175 badfn - optional bad() callback for this matcher instead of the default
175 badfn - optional bad() callback for this matcher instead of the default
176 icasefs - make a matcher for wdir on case insensitive filesystems, which
176 icasefs - make a matcher for wdir on case insensitive filesystems, which
177 normalizes the given patterns to the case in the filesystem
177 normalizes the given patterns to the case in the filesystem
178
178
179 a pattern is one of:
179 a pattern is one of:
180 'glob:<glob>' - a glob relative to cwd
180 'glob:<glob>' - a glob relative to cwd
181 're:<regexp>' - a regular expression
181 're:<regexp>' - a regular expression
182 'path:<path>' - a path relative to repository root, which is matched
182 'path:<path>' - a path relative to repository root, which is matched
183 recursively
183 recursively
184 'rootfilesin:<path>' - a path relative to repository root, which is
184 'rootfilesin:<path>' - a path relative to repository root, which is
185 matched non-recursively (will not match subdirectories)
185 matched non-recursively (will not match subdirectories)
186 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
186 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
187 'relpath:<path>' - a path relative to cwd
187 'relpath:<path>' - a path relative to cwd
188 'relre:<regexp>' - a regexp that needn't match the start of a name
188 'relre:<regexp>' - a regexp that needn't match the start of a name
189 'set:<fileset>' - a fileset expression
189 'set:<fileset>' - a fileset expression
190 'include:<path>' - a file of patterns to read and include
190 'include:<path>' - a file of patterns to read and include
191 'subinclude:<path>' - a file of patterns to match against files under
191 'subinclude:<path>' - a file of patterns to match against files under
192 the same directory
192 the same directory
193 '<something>' - a pattern of the specified default type
193 '<something>' - a pattern of the specified default type
194
194
195 >>> def _match(root, *args, **kwargs):
195 >>> def _match(root, *args, **kwargs):
196 ... return match(util.localpath(root), *args, **kwargs)
196 ... return match(util.localpath(root), *args, **kwargs)
197
197
198 Usually a patternmatcher is returned:
198 Usually a patternmatcher is returned:
199 >>> _match(b'/foo', b'.', [b're:.*\.c$', b'path:foo/a', b'*.py'])
199 >>> _match(b'/foo', b'.', [b're:.*\.c$', b'path:foo/a', b'*.py'])
200 <patternmatcher patterns='.*\\.c$|foo/a(?:/|$)|[^/]*\\.py$'>
200 <patternmatcher patterns='.*\\.c$|foo/a(?:/|$)|[^/]*\\.py$'>
201
201
202 Combining 'patterns' with 'include' (resp. 'exclude') gives an
202 Combining 'patterns' with 'include' (resp. 'exclude') gives an
203 intersectionmatcher (resp. a differencematcher):
203 intersectionmatcher (resp. a differencematcher):
204 >>> type(_match(b'/foo', b'.', [b're:.*\.c$'], include=[b'path:lib']))
204 >>> type(_match(b'/foo', b'.', [b're:.*\.c$'], include=[b'path:lib']))
205 <class 'mercurial.match.intersectionmatcher'>
205 <class 'mercurial.match.intersectionmatcher'>
206 >>> type(_match(b'/foo', b'.', [b're:.*\.c$'], exclude=[b'path:build']))
206 >>> type(_match(b'/foo', b'.', [b're:.*\.c$'], exclude=[b'path:build']))
207 <class 'mercurial.match.differencematcher'>
207 <class 'mercurial.match.differencematcher'>
208
208
209 Notice that, if 'patterns' is empty, an alwaysmatcher is returned:
209 Notice that, if 'patterns' is empty, an alwaysmatcher is returned:
210 >>> _match(b'/foo', b'.', [])
210 >>> _match(b'/foo', b'.', [])
211 <alwaysmatcher>
211 <alwaysmatcher>
212
212
213 The 'default' argument determines which kind of pattern is assumed if a
213 The 'default' argument determines which kind of pattern is assumed if a
214 pattern has no prefix:
214 pattern has no prefix:
215 >>> _match(b'/foo', b'.', [b'.*\.c$'], default=b're')
215 >>> _match(b'/foo', b'.', [b'.*\.c$'], default=b're')
216 <patternmatcher patterns='.*\\.c$'>
216 <patternmatcher patterns='.*\\.c$'>
217 >>> _match(b'/foo', b'.', [b'main.py'], default=b'relpath')
217 >>> _match(b'/foo', b'.', [b'main.py'], default=b'relpath')
218 <patternmatcher patterns='main\\.py(?:/|$)'>
218 <patternmatcher patterns='main\\.py(?:/|$)'>
219 >>> _match(b'/foo', b'.', [b'main.py'], default=b're')
219 >>> _match(b'/foo', b'.', [b'main.py'], default=b're')
220 <patternmatcher patterns='main.py'>
220 <patternmatcher patterns='main.py'>
221
221
222 The primary use of matchers is to check whether a value (usually a file
222 The primary use of matchers is to check whether a value (usually a file
223 name) matches againset one of the patterns given at initialization. There
223 name) matches againset one of the patterns given at initialization. There
224 are two ways of doing this check.
224 are two ways of doing this check.
225
225
226 >>> m = _match(b'/foo', b'', [b're:.*\.c$', b'relpath:a'])
226 >>> m = _match(b'/foo', b'', [b're:.*\.c$', b'relpath:a'])
227
227
228 1. Calling the matcher with a file name returns True if any pattern
228 1. Calling the matcher with a file name returns True if any pattern
229 matches that file name:
229 matches that file name:
230 >>> m(b'a')
230 >>> m(b'a')
231 True
231 True
232 >>> m(b'main.c')
232 >>> m(b'main.c')
233 True
233 True
234 >>> m(b'test.py')
234 >>> m(b'test.py')
235 False
235 False
236
236
237 2. Using the exact() method only returns True if the file name matches one
237 2. Using the exact() method only returns True if the file name matches one
238 of the exact patterns (i.e. not re: or glob: patterns):
238 of the exact patterns (i.e. not re: or glob: patterns):
239 >>> m.exact(b'a')
239 >>> m.exact(b'a')
240 True
240 True
241 >>> m.exact(b'main.c')
241 >>> m.exact(b'main.c')
242 False
242 False
243 """
243 """
244 assert os.path.isabs(root)
244 assert os.path.isabs(root)
245 cwd = os.path.join(root, util.localpath(cwd))
245 cwd = os.path.join(root, util.localpath(cwd))
246 normalize = _donormalize
246 normalize = _donormalize
247 if icasefs:
247 if icasefs:
248 dirstate = ctx.repo().dirstate
248 dirstate = ctx.repo().dirstate
249 dsnormalize = dirstate.normalize
249 dsnormalize = dirstate.normalize
250
250
251 def normalize(patterns, default, root, cwd, auditor, warn):
251 def normalize(patterns, default, root, cwd, auditor, warn):
252 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
252 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
253 kindpats = []
253 kindpats = []
254 for kind, pats, source in kp:
254 for kind, pats, source in kp:
255 if kind not in (b're', b'relre'): # regex can't be normalized
255 if kind not in (b're', b'relre'): # regex can't be normalized
256 p = pats
256 p = pats
257 pats = dsnormalize(pats)
257 pats = dsnormalize(pats)
258
258
259 # Preserve the original to handle a case only rename.
259 # Preserve the original to handle a case only rename.
260 if p != pats and p in dirstate:
260 if p != pats and p in dirstate:
261 kindpats.append((kind, p, source))
261 kindpats.append((kind, p, source))
262
262
263 kindpats.append((kind, pats, source))
263 kindpats.append((kind, pats, source))
264 return kindpats
264 return kindpats
265
265
266 if patterns:
266 if patterns:
267 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
267 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
268 if _kindpatsalwaysmatch(kindpats):
268 if _kindpatsalwaysmatch(kindpats):
269 m = alwaysmatcher(badfn)
269 m = alwaysmatcher(badfn)
270 else:
270 else:
271 m = _buildkindpatsmatcher(
271 m = _buildkindpatsmatcher(
272 patternmatcher,
272 patternmatcher,
273 root,
273 root,
274 cwd,
274 cwd,
275 kindpats,
275 kindpats,
276 ctx=ctx,
276 ctx=ctx,
277 listsubrepos=listsubrepos,
277 listsubrepos=listsubrepos,
278 badfn=badfn,
278 badfn=badfn,
279 )
279 )
280 else:
280 else:
281 # It's a little strange that no patterns means to match everything.
281 # It's a little strange that no patterns means to match everything.
282 # Consider changing this to match nothing (probably using nevermatcher).
282 # Consider changing this to match nothing (probably using nevermatcher).
283 m = alwaysmatcher(badfn)
283 m = alwaysmatcher(badfn)
284
284
285 if include:
285 if include:
286 kindpats = normalize(include, b'glob', root, cwd, auditor, warn)
286 kindpats = normalize(include, b'glob', root, cwd, auditor, warn)
287 im = _buildkindpatsmatcher(
287 im = _buildkindpatsmatcher(
288 includematcher,
288 includematcher,
289 root,
289 root,
290 cwd,
290 cwd,
291 kindpats,
291 kindpats,
292 ctx=ctx,
292 ctx=ctx,
293 listsubrepos=listsubrepos,
293 listsubrepos=listsubrepos,
294 badfn=None,
294 badfn=None,
295 )
295 )
296 m = intersectmatchers(m, im)
296 m = intersectmatchers(m, im)
297 if exclude:
297 if exclude:
298 kindpats = normalize(exclude, b'glob', root, cwd, auditor, warn)
298 kindpats = normalize(exclude, b'glob', root, cwd, auditor, warn)
299 em = _buildkindpatsmatcher(
299 em = _buildkindpatsmatcher(
300 includematcher,
300 includematcher,
301 root,
301 root,
302 cwd,
302 cwd,
303 kindpats,
303 kindpats,
304 ctx=ctx,
304 ctx=ctx,
305 listsubrepos=listsubrepos,
305 listsubrepos=listsubrepos,
306 badfn=None,
306 badfn=None,
307 )
307 )
308 m = differencematcher(m, em)
308 m = differencematcher(m, em)
309 return m
309 return m
310
310
311
311
312 def exact(files, badfn=None):
312 def exact(files, badfn=None):
313 return exactmatcher(files, badfn=badfn)
313 return exactmatcher(files, badfn=badfn)
314
314
315
315
316 def always(badfn=None):
316 def always(badfn=None):
317 return alwaysmatcher(badfn)
317 return alwaysmatcher(badfn)
318
318
319
319
320 def never(badfn=None):
320 def never(badfn=None):
321 return nevermatcher(badfn)
321 return nevermatcher(badfn)
322
322
323
323
324 def badmatch(match, badfn):
324 def badmatch(match, badfn):
325 """Make a copy of the given matcher, replacing its bad method with the given
325 """Make a copy of the given matcher, replacing its bad method with the given
326 one.
326 one.
327 """
327 """
328 m = copy.copy(match)
328 m = copy.copy(match)
329 m.bad = badfn
329 m.bad = badfn
330 return m
330 return m
331
331
332
332
333 def _donormalize(patterns, default, root, cwd, auditor=None, warn=None):
333 def _donormalize(patterns, default, root, cwd, auditor=None, warn=None):
334 """Convert 'kind:pat' from the patterns list to tuples with kind and
334 """Convert 'kind:pat' from the patterns list to tuples with kind and
335 normalized and rooted patterns and with listfiles expanded."""
335 normalized and rooted patterns and with listfiles expanded."""
336 kindpats = []
336 kindpats = []
337 for kind, pat in [_patsplit(p, default) for p in patterns]:
337 for kind, pat in [_patsplit(p, default) for p in patterns]:
338 if kind in cwdrelativepatternkinds:
338 if kind in cwdrelativepatternkinds:
339 pat = pathutil.canonpath(root, cwd, pat, auditor=auditor)
339 pat = pathutil.canonpath(root, cwd, pat, auditor=auditor)
340 elif kind in (b'relglob', b'path', b'rootfilesin', b'rootglob'):
340 elif kind in (b'relglob', b'path', b'rootfilesin', b'rootglob'):
341 pat = util.normpath(pat)
341 pat = util.normpath(pat)
342 elif kind in (b'listfile', b'listfile0'):
342 elif kind in (b'listfile', b'listfile0'):
343 try:
343 try:
344 files = util.readfile(pat)
344 files = util.readfile(pat)
345 if kind == b'listfile0':
345 if kind == b'listfile0':
346 files = files.split(b'\0')
346 files = files.split(b'\0')
347 else:
347 else:
348 files = files.splitlines()
348 files = files.splitlines()
349 files = [f for f in files if f]
349 files = [f for f in files if f]
350 except EnvironmentError:
350 except EnvironmentError:
351 raise error.Abort(_(b"unable to read file list (%s)") % pat)
351 raise error.Abort(_(b"unable to read file list (%s)") % pat)
352 for k, p, source in _donormalize(
352 for k, p, source in _donormalize(
353 files, default, root, cwd, auditor, warn
353 files, default, root, cwd, auditor, warn
354 ):
354 ):
355 kindpats.append((k, p, pat))
355 kindpats.append((k, p, pat))
356 continue
356 continue
357 elif kind == b'include':
357 elif kind == b'include':
358 try:
358 try:
359 fullpath = os.path.join(root, util.localpath(pat))
359 fullpath = os.path.join(root, util.localpath(pat))
360 includepats = readpatternfile(fullpath, warn)
360 includepats = readpatternfile(fullpath, warn)
361 for k, p, source in _donormalize(
361 for k, p, source in _donormalize(
362 includepats, default, root, cwd, auditor, warn
362 includepats, default, root, cwd, auditor, warn
363 ):
363 ):
364 kindpats.append((k, p, source or pat))
364 kindpats.append((k, p, source or pat))
365 except error.Abort as inst:
365 except error.Abort as inst:
366 raise error.Abort(
366 raise error.Abort(
367 b'%s: %s'
367 b'%s: %s'
368 % (
368 % (
369 pat,
369 pat,
370 inst.message,
370 inst.message,
371 ) # pytype: disable=unsupported-operands
371 ) # pytype: disable=unsupported-operands
372 )
372 )
373 except IOError as inst:
373 except IOError as inst:
374 if warn:
374 if warn:
375 warn(
375 warn(
376 _(b"skipping unreadable pattern file '%s': %s\n")
376 _(b"skipping unreadable pattern file '%s': %s\n")
377 % (pat, stringutil.forcebytestr(inst.strerror))
377 % (pat, stringutil.forcebytestr(inst.strerror))
378 )
378 )
379 continue
379 continue
380 # else: re or relre - which cannot be normalized
380 # else: re or relre - which cannot be normalized
381 kindpats.append((kind, pat, b''))
381 kindpats.append((kind, pat, b''))
382 return kindpats
382 return kindpats
383
383
384
384
385 class basematcher:
385 class basematcher:
386 def __init__(self, badfn=None):
386 def __init__(self, badfn=None):
387 if badfn is not None:
387 if badfn is not None:
388 self.bad = badfn
388 self.bad = badfn
389
389
390 def __call__(self, fn):
390 def __call__(self, fn):
391 return self.matchfn(fn)
391 return self.matchfn(fn)
392
392
393 # Callbacks related to how the matcher is used by dirstate.walk.
393 # Callbacks related to how the matcher is used by dirstate.walk.
394 # Subscribers to these events must monkeypatch the matcher object.
394 # Subscribers to these events must monkeypatch the matcher object.
395 def bad(self, f, msg):
395 def bad(self, f, msg):
396 """Callback from dirstate.walk for each explicit file that can't be
396 """Callback from dirstate.walk for each explicit file that can't be
397 found/accessed, with an error message."""
397 found/accessed, with an error message."""
398
398
399 # If an traversedir is set, it will be called when a directory discovered
399 # If an traversedir is set, it will be called when a directory discovered
400 # by recursive traversal is visited.
400 # by recursive traversal is visited.
401 traversedir = None
401 traversedir = None
402
402
403 @propertycache
403 @propertycache
404 def _files(self):
404 def _files(self):
405 return []
405 return []
406
406
407 def files(self):
407 def files(self):
408 """Explicitly listed files or patterns or roots:
408 """Explicitly listed files or patterns or roots:
409 if no patterns or .always(): empty list,
409 if no patterns or .always(): empty list,
410 if exact: list exact files,
410 if exact: list exact files,
411 if not .anypats(): list all files and dirs,
411 if not .anypats(): list all files and dirs,
412 else: optimal roots"""
412 else: optimal roots"""
413 return self._files
413 return self._files
414
414
415 @propertycache
415 @propertycache
416 def _fileset(self):
416 def _fileset(self):
417 return set(self._files)
417 return set(self._files)
418
418
419 def exact(self, f):
419 def exact(self, f):
420 '''Returns True if f is in .files().'''
420 '''Returns True if f is in .files().'''
421 return f in self._fileset
421 return f in self._fileset
422
422
423 def matchfn(self, f):
423 def matchfn(self, f):
424 return False
424 return False
425
425
426 def visitdir(self, dir):
426 def visitdir(self, dir):
427 """Decides whether a directory should be visited based on whether it
427 """Decides whether a directory should be visited based on whether it
428 has potential matches in it or one of its subdirectories. This is
428 has potential matches in it or one of its subdirectories. This is
429 based on the match's primary, included, and excluded patterns.
429 based on the match's primary, included, and excluded patterns.
430
430
431 Returns the string 'all' if the given directory and all subdirectories
431 Returns the string 'all' if the given directory and all subdirectories
432 should be visited. Otherwise returns True or False indicating whether
432 should be visited. Otherwise returns True or False indicating whether
433 the given directory should be visited.
433 the given directory should be visited.
434 """
434 """
435 return True
435 return True
436
436
437 def visitchildrenset(self, dir):
437 def visitchildrenset(self, dir):
438 """Decides whether a directory should be visited based on whether it
438 """Decides whether a directory should be visited based on whether it
439 has potential matches in it or one of its subdirectories, and
439 has potential matches in it or one of its subdirectories, and
440 potentially lists which subdirectories of that directory should be
440 potentially lists which subdirectories of that directory should be
441 visited. This is based on the match's primary, included, and excluded
441 visited. This is based on the match's primary, included, and excluded
442 patterns.
442 patterns.
443
443
444 This function is very similar to 'visitdir', and the following mapping
444 This function is very similar to 'visitdir', and the following mapping
445 can be applied:
445 can be applied:
446
446
447 visitdir | visitchildrenlist
447 visitdir | visitchildrenlist
448 ----------+-------------------
448 ----------+-------------------
449 False | set()
449 False | set()
450 'all' | 'all'
450 'all' | 'all'
451 True | 'this' OR non-empty set of subdirs -or files- to visit
451 True | 'this' OR non-empty set of subdirs -or files- to visit
452
452
453 Example:
453 Example:
454 Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return
454 Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return
455 the following values (assuming the implementation of visitchildrenset
455 the following values (assuming the implementation of visitchildrenset
456 is capable of recognizing this; some implementations are not).
456 is capable of recognizing this; some implementations are not).
457
457
458 '' -> {'foo', 'qux'}
458 '' -> {'foo', 'qux'}
459 'baz' -> set()
459 'baz' -> set()
460 'foo' -> {'bar'}
460 'foo' -> {'bar'}
461 # Ideally this would be 'all', but since the prefix nature of matchers
461 # Ideally this would be 'all', but since the prefix nature of matchers
462 # is applied to the entire matcher, we have to downgrade this to
462 # is applied to the entire matcher, we have to downgrade this to
463 # 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed
463 # 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed
464 # in.
464 # in.
465 'foo/bar' -> 'this'
465 'foo/bar' -> 'this'
466 'qux' -> 'this'
466 'qux' -> 'this'
467
467
468 Important:
468 Important:
469 Most matchers do not know if they're representing files or
469 Most matchers do not know if they're representing files or
470 directories. They see ['path:dir/f'] and don't know whether 'f' is a
470 directories. They see ['path:dir/f'] and don't know whether 'f' is a
471 file or a directory, so visitchildrenset('dir') for most matchers will
471 file or a directory, so visitchildrenset('dir') for most matchers will
472 return {'f'}, but if the matcher knows it's a file (like exactmatcher
472 return {'f'}, but if the matcher knows it's a file (like exactmatcher
473 does), it may return 'this'. Do not rely on the return being a set
473 does), it may return 'this'. Do not rely on the return being a set
474 indicating that there are no files in this dir to investigate (or
474 indicating that there are no files in this dir to investigate (or
475 equivalently that if there are files to investigate in 'dir' that it
475 equivalently that if there are files to investigate in 'dir' that it
476 will always return 'this').
476 will always return 'this').
477 """
477 """
478 return b'this'
478 return b'this'
479
479
480 def always(self):
480 def always(self):
481 """Matcher will match everything and .files() will be empty --
481 """Matcher will match everything and .files() will be empty --
482 optimization might be possible."""
482 optimization might be possible."""
483 return False
483 return False
484
484
485 def isexact(self):
485 def isexact(self):
486 """Matcher will match exactly the list of files in .files() --
486 """Matcher will match exactly the list of files in .files() --
487 optimization might be possible."""
487 optimization might be possible."""
488 return False
488 return False
489
489
490 def prefix(self):
490 def prefix(self):
491 """Matcher will match the paths in .files() recursively --
491 """Matcher will match the paths in .files() recursively --
492 optimization might be possible."""
492 optimization might be possible."""
493 return False
493 return False
494
494
495 def anypats(self):
495 def anypats(self):
496 """None of .always(), .isexact(), and .prefix() is true --
496 """None of .always(), .isexact(), and .prefix() is true --
497 optimizations will be difficult."""
497 optimizations will be difficult."""
498 return not self.always() and not self.isexact() and not self.prefix()
498 return not self.always() and not self.isexact() and not self.prefix()
499
499
500
500
501 class alwaysmatcher(basematcher):
501 class alwaysmatcher(basematcher):
502 '''Matches everything.'''
502 '''Matches everything.'''
503
503
504 def __init__(self, badfn=None):
504 def __init__(self, badfn=None):
505 super(alwaysmatcher, self).__init__(badfn)
505 super(alwaysmatcher, self).__init__(badfn)
506
506
507 def always(self):
507 def always(self):
508 return True
508 return True
509
509
510 def matchfn(self, f):
510 def matchfn(self, f):
511 return True
511 return True
512
512
513 def visitdir(self, dir):
513 def visitdir(self, dir):
514 return b'all'
514 return b'all'
515
515
516 def visitchildrenset(self, dir):
516 def visitchildrenset(self, dir):
517 return b'all'
517 return b'all'
518
518
519 def __repr__(self):
519 def __repr__(self):
520 return r'<alwaysmatcher>'
520 return r'<alwaysmatcher>'
521
521
522
522
523 class nevermatcher(basematcher):
523 class nevermatcher(basematcher):
524 '''Matches nothing.'''
524 '''Matches nothing.'''
525
525
526 def __init__(self, badfn=None):
526 def __init__(self, badfn=None):
527 super(nevermatcher, self).__init__(badfn)
527 super(nevermatcher, self).__init__(badfn)
528
528
529 # It's a little weird to say that the nevermatcher is an exact matcher
529 # It's a little weird to say that the nevermatcher is an exact matcher
530 # or a prefix matcher, but it seems to make sense to let callers take
530 # or a prefix matcher, but it seems to make sense to let callers take
531 # fast paths based on either. There will be no exact matches, nor any
531 # fast paths based on either. There will be no exact matches, nor any
532 # prefixes (files() returns []), so fast paths iterating over them should
532 # prefixes (files() returns []), so fast paths iterating over them should
533 # be efficient (and correct).
533 # be efficient (and correct).
534 def isexact(self):
534 def isexact(self):
535 return True
535 return True
536
536
537 def prefix(self):
537 def prefix(self):
538 return True
538 return True
539
539
540 def visitdir(self, dir):
540 def visitdir(self, dir):
541 return False
541 return False
542
542
543 def visitchildrenset(self, dir):
543 def visitchildrenset(self, dir):
544 return set()
544 return set()
545
545
546 def __repr__(self):
546 def __repr__(self):
547 return r'<nevermatcher>'
547 return r'<nevermatcher>'
548
548
549
549
550 class predicatematcher(basematcher):
550 class predicatematcher(basematcher):
551 """A matcher adapter for a simple boolean function"""
551 """A matcher adapter for a simple boolean function"""
552
552
553 def __init__(self, predfn, predrepr=None, badfn=None):
553 def __init__(self, predfn, predrepr=None, badfn=None):
554 super(predicatematcher, self).__init__(badfn)
554 super(predicatematcher, self).__init__(badfn)
555 self.matchfn = predfn
555 self.matchfn = predfn
556 self._predrepr = predrepr
556 self._predrepr = predrepr
557
557
558 @encoding.strmethod
558 @encoding.strmethod
559 def __repr__(self):
559 def __repr__(self):
560 s = stringutil.buildrepr(self._predrepr) or pycompat.byterepr(
560 s = stringutil.buildrepr(self._predrepr) or pycompat.byterepr(
561 self.matchfn
561 self.matchfn
562 )
562 )
563 return b'<predicatenmatcher pred=%s>' % s
563 return b'<predicatenmatcher pred=%s>' % s
564
564
565
565
566 def path_or_parents_in_set(path, prefix_set):
566 def path_or_parents_in_set(path, prefix_set):
567 """Returns True if `path` (or any parent of `path`) is in `prefix_set`."""
567 """Returns True if `path` (or any parent of `path`) is in `prefix_set`."""
568 l = len(prefix_set)
568 l = len(prefix_set)
569 if l == 0:
569 if l == 0:
570 return False
570 return False
571 if path in prefix_set:
571 if path in prefix_set:
572 return True
572 return True
573 # If there's more than 5 paths in prefix_set, it's *probably* quicker to
573 # If there's more than 5 paths in prefix_set, it's *probably* quicker to
574 # "walk up" the directory hierarchy instead, with the assumption that most
574 # "walk up" the directory hierarchy instead, with the assumption that most
575 # directory hierarchies are relatively shallow and hash lookup is cheap.
575 # directory hierarchies are relatively shallow and hash lookup is cheap.
576 if l > 5:
576 if l > 5:
577 return any(
577 return any(
578 parentdir in prefix_set for parentdir in pathutil.finddirs(path)
578 parentdir in prefix_set for parentdir in pathutil.finddirs(path)
579 )
579 )
580
580
581 # FIXME: Ideally we'd never get to this point if this is the case - we'd
581 # FIXME: Ideally we'd never get to this point if this is the case - we'd
582 # recognize ourselves as an 'always' matcher and skip this.
582 # recognize ourselves as an 'always' matcher and skip this.
583 if b'' in prefix_set:
583 if b'' in prefix_set:
584 return True
584 return True
585
585
586 sl = ord(b'/')
586 sl = ord(b'/')
587
587
588 # We already checked that path isn't in prefix_set exactly, so
588 # We already checked that path isn't in prefix_set exactly, so
589 # `path[len(pf)] should never raise IndexError.
589 # `path[len(pf)] should never raise IndexError.
590 return any(path.startswith(pf) and path[len(pf)] == sl for pf in prefix_set)
590 return any(path.startswith(pf) and path[len(pf)] == sl for pf in prefix_set)
591
591
592
592
593 class patternmatcher(basematcher):
593 class patternmatcher(basematcher):
594 r"""Matches a set of (kind, pat, source) against a 'root' directory.
594 r"""Matches a set of (kind, pat, source) against a 'root' directory.
595
595
596 >>> kindpats = [
596 >>> kindpats = [
597 ... (b're', br'.*\.c$', b''),
597 ... (b're', br'.*\.c$', b''),
598 ... (b'path', b'foo/a', b''),
598 ... (b'path', b'foo/a', b''),
599 ... (b'relpath', b'b', b''),
599 ... (b'relpath', b'b', b''),
600 ... (b'glob', b'*.h', b''),
600 ... (b'glob', b'*.h', b''),
601 ... ]
601 ... ]
602 >>> m = patternmatcher(b'foo', kindpats)
602 >>> m = patternmatcher(b'foo', kindpats)
603 >>> m(b'main.c') # matches re:.*\.c$
603 >>> m(b'main.c') # matches re:.*\.c$
604 True
604 True
605 >>> m(b'b.txt')
605 >>> m(b'b.txt')
606 False
606 False
607 >>> m(b'foo/a') # matches path:foo/a
607 >>> m(b'foo/a') # matches path:foo/a
608 True
608 True
609 >>> m(b'a') # does not match path:b, since 'root' is 'foo'
609 >>> m(b'a') # does not match path:b, since 'root' is 'foo'
610 False
610 False
611 >>> m(b'b') # matches relpath:b, since 'root' is 'foo'
611 >>> m(b'b') # matches relpath:b, since 'root' is 'foo'
612 True
612 True
613 >>> m(b'lib.h') # matches glob:*.h
613 >>> m(b'lib.h') # matches glob:*.h
614 True
614 True
615
615
616 >>> m.files()
616 >>> m.files()
617 ['', 'foo/a', 'b', '']
617 ['', 'foo/a', 'b', '']
618 >>> m.exact(b'foo/a')
618 >>> m.exact(b'foo/a')
619 True
619 True
620 >>> m.exact(b'b')
620 >>> m.exact(b'b')
621 True
621 True
622 >>> m.exact(b'lib.h') # exact matches are for (rel)path kinds
622 >>> m.exact(b'lib.h') # exact matches are for (rel)path kinds
623 False
623 False
624 """
624 """
625
625
626 def __init__(self, root, kindpats, badfn=None):
626 def __init__(self, root, kindpats, badfn=None):
627 super(patternmatcher, self).__init__(badfn)
627 super(patternmatcher, self).__init__(badfn)
628
628
629 self._files = _explicitfiles(kindpats)
629 self._files = _explicitfiles(kindpats)
630 self._prefix = _prefix(kindpats)
630 self._prefix = _prefix(kindpats)
631 self._pats, self.matchfn = _buildmatch(kindpats, b'$', root)
631 self._pats, self.matchfn = _buildmatch(kindpats, b'$', root)
632
632
633 @propertycache
633 @propertycache
634 def _dirs(self):
634 def _dirs(self):
635 return set(pathutil.dirs(self._fileset))
635 return set(pathutil.dirs(self._fileset))
636
636
637 def visitdir(self, dir):
637 def visitdir(self, dir):
638 if self._prefix and dir in self._fileset:
638 if self._prefix and dir in self._fileset:
639 return b'all'
639 return b'all'
640 return dir in self._dirs or path_or_parents_in_set(dir, self._fileset)
640 return dir in self._dirs or path_or_parents_in_set(dir, self._fileset)
641
641
642 def visitchildrenset(self, dir):
642 def visitchildrenset(self, dir):
643 ret = self.visitdir(dir)
643 ret = self.visitdir(dir)
644 if ret is True:
644 if ret is True:
645 return b'this'
645 return b'this'
646 elif not ret:
646 elif not ret:
647 return set()
647 return set()
648 assert ret == b'all'
648 assert ret == b'all'
649 return b'all'
649 return b'all'
650
650
651 def prefix(self):
651 def prefix(self):
652 return self._prefix
652 return self._prefix
653
653
654 @encoding.strmethod
654 @encoding.strmethod
655 def __repr__(self):
655 def __repr__(self):
656 return b'<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats)
656 return b'<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats)
657
657
658
658
659 # This is basically a reimplementation of pathutil.dirs that stores the
659 # This is basically a reimplementation of pathutil.dirs that stores the
660 # children instead of just a count of them, plus a small optional optimization
660 # children instead of just a count of them, plus a small optional optimization
661 # to avoid some directories we don't need.
661 # to avoid some directories we don't need.
662 class _dirchildren:
662 class _dirchildren:
663 def __init__(self, paths, onlyinclude=None):
663 def __init__(self, paths, onlyinclude=None):
664 self._dirs = {}
664 self._dirs = {}
665 self._onlyinclude = onlyinclude or []
665 self._onlyinclude = onlyinclude or []
666 addpath = self.addpath
666 addpath = self.addpath
667 for f in paths:
667 for f in paths:
668 addpath(f)
668 addpath(f)
669
669
670 def addpath(self, path):
670 def addpath(self, path):
671 if path == b'':
671 if path == b'':
672 return
672 return
673 dirs = self._dirs
673 dirs = self._dirs
674 findsplitdirs = _dirchildren._findsplitdirs
674 findsplitdirs = _dirchildren._findsplitdirs
675 for d, b in findsplitdirs(path):
675 for d, b in findsplitdirs(path):
676 if d not in self._onlyinclude:
676 if d not in self._onlyinclude:
677 continue
677 continue
678 dirs.setdefault(d, set()).add(b)
678 dirs.setdefault(d, set()).add(b)
679
679
680 @staticmethod
680 @staticmethod
681 def _findsplitdirs(path):
681 def _findsplitdirs(path):
682 # yields (dirname, basename) tuples, walking back to the root. This is
682 # yields (dirname, basename) tuples, walking back to the root. This is
683 # very similar to pathutil.finddirs, except:
683 # very similar to pathutil.finddirs, except:
684 # - produces a (dirname, basename) tuple, not just 'dirname'
684 # - produces a (dirname, basename) tuple, not just 'dirname'
685 # Unlike manifest._splittopdir, this does not suffix `dirname` with a
685 # Unlike manifest._splittopdir, this does not suffix `dirname` with a
686 # slash.
686 # slash.
687 oldpos = len(path)
687 oldpos = len(path)
688 pos = path.rfind(b'/')
688 pos = path.rfind(b'/')
689 while pos != -1:
689 while pos != -1:
690 yield path[:pos], path[pos + 1 : oldpos]
690 yield path[:pos], path[pos + 1 : oldpos]
691 oldpos = pos
691 oldpos = pos
692 pos = path.rfind(b'/', 0, pos)
692 pos = path.rfind(b'/', 0, pos)
693 yield b'', path[:oldpos]
693 yield b'', path[:oldpos]
694
694
695 def get(self, path):
695 def get(self, path):
696 return self._dirs.get(path, set())
696 return self._dirs.get(path, set())
697
697
698
698
699 class includematcher(basematcher):
699 class includematcher(basematcher):
700 def __init__(self, root, kindpats, badfn=None):
700 def __init__(self, root, kindpats, badfn=None):
701 super(includematcher, self).__init__(badfn)
701 super(includematcher, self).__init__(badfn)
702 if rustmod is not None:
702 if rustmod is not None:
703 # We need to pass the patterns to Rust because they can contain
703 # We need to pass the patterns to Rust because they can contain
704 # patterns from the user interface
704 # patterns from the user interface
705 self._kindpats = kindpats
705 self._kindpats = kindpats
706 self._pats, self.matchfn = _buildmatch(kindpats, b'(?:/|$)', root)
706 self._pats, self.matchfn = _buildmatch(kindpats, b'(?:/|$)', root)
707 self._prefix = _prefix(kindpats)
707 self._prefix = _prefix(kindpats)
708 roots, dirs, parents = _rootsdirsandparents(kindpats)
708 roots, dirs, parents = _rootsdirsandparents(kindpats)
709 # roots are directories which are recursively included.
709 # roots are directories which are recursively included.
710 self._roots = set(roots)
710 self._roots = set(roots)
711 # dirs are directories which are non-recursively included.
711 # dirs are directories which are non-recursively included.
712 self._dirs = set(dirs)
712 self._dirs = set(dirs)
713 # parents are directories which are non-recursively included because
713 # parents are directories which are non-recursively included because
714 # they are needed to get to items in _dirs or _roots.
714 # they are needed to get to items in _dirs or _roots.
715 self._parents = parents
715 self._parents = parents
716
716
717 def visitdir(self, dir):
717 def visitdir(self, dir):
718 if self._prefix and dir in self._roots:
718 if self._prefix and dir in self._roots:
719 return b'all'
719 return b'all'
720 return (
720 return (
721 dir in self._dirs
721 dir in self._dirs
722 or dir in self._parents
722 or dir in self._parents
723 or path_or_parents_in_set(dir, self._roots)
723 or path_or_parents_in_set(dir, self._roots)
724 )
724 )
725
725
726 @propertycache
726 @propertycache
727 def _allparentschildren(self):
727 def _allparentschildren(self):
728 # It may seem odd that we add dirs, roots, and parents, and then
728 # It may seem odd that we add dirs, roots, and parents, and then
729 # restrict to only parents. This is to catch the case of:
729 # restrict to only parents. This is to catch the case of:
730 # dirs = ['foo/bar']
730 # dirs = ['foo/bar']
731 # parents = ['foo']
731 # parents = ['foo']
732 # if we asked for the children of 'foo', but had only added
732 # if we asked for the children of 'foo', but had only added
733 # self._parents, we wouldn't be able to respond ['bar'].
733 # self._parents, we wouldn't be able to respond ['bar'].
734 return _dirchildren(
734 return _dirchildren(
735 itertools.chain(self._dirs, self._roots, self._parents),
735 itertools.chain(self._dirs, self._roots, self._parents),
736 onlyinclude=self._parents,
736 onlyinclude=self._parents,
737 )
737 )
738
738
739 def visitchildrenset(self, dir):
739 def visitchildrenset(self, dir):
740 if self._prefix and dir in self._roots:
740 if self._prefix and dir in self._roots:
741 return b'all'
741 return b'all'
742 # Note: this does *not* include the 'dir in self._parents' case from
742 # Note: this does *not* include the 'dir in self._parents' case from
743 # visitdir, that's handled below.
743 # visitdir, that's handled below.
744 if (
744 if (
745 b'' in self._roots
745 b'' in self._roots
746 or dir in self._dirs
746 or dir in self._dirs
747 or path_or_parents_in_set(dir, self._roots)
747 or path_or_parents_in_set(dir, self._roots)
748 ):
748 ):
749 return b'this'
749 return b'this'
750
750
751 if dir in self._parents:
751 if dir in self._parents:
752 return self._allparentschildren.get(dir) or set()
752 return self._allparentschildren.get(dir) or set()
753 return set()
753 return set()
754
754
755 @encoding.strmethod
755 @encoding.strmethod
756 def __repr__(self):
756 def __repr__(self):
757 return b'<includematcher includes=%r>' % pycompat.bytestr(self._pats)
757 return b'<includematcher includes=%r>' % pycompat.bytestr(self._pats)
758
758
759
759
760 class exactmatcher(basematcher):
760 class exactmatcher(basematcher):
761 r"""Matches the input files exactly. They are interpreted as paths, not
761 r"""Matches the input files exactly. They are interpreted as paths, not
762 patterns (so no kind-prefixes).
762 patterns (so no kind-prefixes).
763
763
764 >>> m = exactmatcher([b'a.txt', br're:.*\.c$'])
764 >>> m = exactmatcher([b'a.txt', br're:.*\.c$'])
765 >>> m(b'a.txt')
765 >>> m(b'a.txt')
766 True
766 True
767 >>> m(b'b.txt')
767 >>> m(b'b.txt')
768 False
768 False
769
769
770 Input files that would be matched are exactly those returned by .files()
770 Input files that would be matched are exactly those returned by .files()
771 >>> m.files()
771 >>> m.files()
772 ['a.txt', 're:.*\\.c$']
772 ['a.txt', 're:.*\\.c$']
773
773
774 So pattern 're:.*\.c$' is not considered as a regex, but as a file name
774 So pattern 're:.*\.c$' is not considered as a regex, but as a file name
775 >>> m(b'main.c')
775 >>> m(b'main.c')
776 False
776 False
777 >>> m(br're:.*\.c$')
777 >>> m(br're:.*\.c$')
778 True
778 True
779 """
779 """
780
780
781 def __init__(self, files, badfn=None):
781 def __init__(self, files, badfn=None):
782 super(exactmatcher, self).__init__(badfn)
782 super(exactmatcher, self).__init__(badfn)
783
783
784 if isinstance(files, list):
784 if isinstance(files, list):
785 self._files = files
785 self._files = files
786 else:
786 else:
787 self._files = list(files)
787 self._files = list(files)
788
788
789 matchfn = basematcher.exact
789 matchfn = basematcher.exact
790
790
791 @propertycache
791 @propertycache
792 def _dirs(self):
792 def _dirs(self):
793 return set(pathutil.dirs(self._fileset))
793 return set(pathutil.dirs(self._fileset))
794
794
795 def visitdir(self, dir):
795 def visitdir(self, dir):
796 return dir in self._dirs
796 return dir in self._dirs
797
797
798 @propertycache
798 @propertycache
799 def _visitchildrenset_candidates(self):
799 def _visitchildrenset_candidates(self):
800 """A memoized set of candidates for visitchildrenset."""
800 """A memoized set of candidates for visitchildrenset."""
801 return self._fileset | self._dirs - {b''}
801 return self._fileset | self._dirs - {b''}
802
802
803 @propertycache
803 @propertycache
804 def _sorted_visitchildrenset_candidates(self):
804 def _sorted_visitchildrenset_candidates(self):
805 """A memoized sorted list of candidates for visitchildrenset."""
805 """A memoized sorted list of candidates for visitchildrenset."""
806 return sorted(self._visitchildrenset_candidates)
806 return sorted(self._visitchildrenset_candidates)
807
807
808 def visitchildrenset(self, dir):
808 def visitchildrenset(self, dir):
809 if not self._fileset or dir not in self._dirs:
809 if not self._fileset or dir not in self._dirs:
810 return set()
810 return set()
811
811
812 if dir == b'':
812 if dir == b'':
813 candidates = self._visitchildrenset_candidates
813 candidates = self._visitchildrenset_candidates
814 else:
814 else:
815 candidates = self._sorted_visitchildrenset_candidates
815 candidates = self._sorted_visitchildrenset_candidates
816 d = dir + b'/'
816 d = dir + b'/'
817 # Use bisect to find the first element potentially starting with d
817 # Use bisect to find the first element potentially starting with d
818 # (i.e. >= d). This should always find at least one element (we'll
818 # (i.e. >= d). This should always find at least one element (we'll
819 # assert later if this is not the case).
819 # assert later if this is not the case).
820 first = bisect.bisect_left(candidates, d)
820 first = bisect.bisect_left(candidates, d)
821 # We need a representation of the first element that is > d that
821 # We need a representation of the first element that is > d that
822 # does not start with d, so since we added a `/` on the end of dir,
822 # does not start with d, so since we added a `/` on the end of dir,
823 # we'll add whatever comes after slash (we could probably assume
823 # we'll add whatever comes after slash (we could probably assume
824 # that `0` is after `/`, but let's not) to the end of dir instead.
824 # that `0` is after `/`, but let's not) to the end of dir instead.
825 dnext = dir + encoding.strtolocal(chr(ord(b'/') + 1))
825 dnext = dir + encoding.strtolocal(chr(ord(b'/') + 1))
826 # Use bisect to find the first element >= d_next
826 # Use bisect to find the first element >= d_next
827 last = bisect.bisect_left(candidates, dnext, lo=first)
827 last = bisect.bisect_left(candidates, dnext, lo=first)
828 dlen = len(d)
828 dlen = len(d)
829 candidates = {c[dlen:] for c in candidates[first:last]}
829 candidates = {c[dlen:] for c in candidates[first:last]}
830 # self._dirs includes all of the directories, recursively, so if
830 # self._dirs includes all of the directories, recursively, so if
831 # we're attempting to match foo/bar/baz.txt, it'll have '', 'foo',
831 # we're attempting to match foo/bar/baz.txt, it'll have '', 'foo',
832 # 'foo/bar' in it. Thus we can safely ignore a candidate that has a
832 # 'foo/bar' in it. Thus we can safely ignore a candidate that has a
833 # '/' in it, indicating a it's for a subdir-of-a-subdir; the
833 # '/' in it, indicating a it's for a subdir-of-a-subdir; the
834 # immediate subdir will be in there without a slash.
834 # immediate subdir will be in there without a slash.
835 ret = {c for c in candidates if b'/' not in c}
835 ret = {c for c in candidates if b'/' not in c}
836 # We really do not expect ret to be empty, since that would imply that
836 # We really do not expect ret to be empty, since that would imply that
837 # there's something in _dirs that didn't have a file in _fileset.
837 # there's something in _dirs that didn't have a file in _fileset.
838 assert ret
838 assert ret
839 return ret
839 return ret
840
840
841 def isexact(self):
841 def isexact(self):
842 return True
842 return True
843
843
844 @encoding.strmethod
844 @encoding.strmethod
845 def __repr__(self):
845 def __repr__(self):
846 return b'<exactmatcher files=%r>' % self._files
846 return b'<exactmatcher files=%r>' % self._files
847
847
848
848
849 class differencematcher(basematcher):
849 class differencematcher(basematcher):
850 """Composes two matchers by matching if the first matches and the second
850 """Composes two matchers by matching if the first matches and the second
851 does not.
851 does not.
852
852
853 The second matcher's non-matching-attributes (bad, traversedir) are ignored.
853 The second matcher's non-matching-attributes (bad, traversedir) are ignored.
854 """
854 """
855
855
856 def __init__(self, m1, m2):
856 def __init__(self, m1, m2):
857 super(differencematcher, self).__init__()
857 super(differencematcher, self).__init__()
858 self._m1 = m1
858 self._m1 = m1
859 self._m2 = m2
859 self._m2 = m2
860 self.bad = m1.bad
860 self.bad = m1.bad
861 self.traversedir = m1.traversedir
861 self.traversedir = m1.traversedir
862
862
863 def matchfn(self, f):
863 def matchfn(self, f):
864 return self._m1(f) and not self._m2(f)
864 return self._m1(f) and not self._m2(f)
865
865
866 @propertycache
866 @propertycache
867 def _files(self):
867 def _files(self):
868 if self.isexact():
868 if self.isexact():
869 return [f for f in self._m1.files() if self(f)]
869 return [f for f in self._m1.files() if self(f)]
870 # If m1 is not an exact matcher, we can't easily figure out the set of
870 # If m1 is not an exact matcher, we can't easily figure out the set of
871 # files, because its files() are not always files. For example, if
871 # files, because its files() are not always files. For example, if
872 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
872 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
873 # want to remove "dir" from the set even though it would match m2,
873 # want to remove "dir" from the set even though it would match m2,
874 # because the "dir" in m1 may not be a file.
874 # because the "dir" in m1 may not be a file.
875 return self._m1.files()
875 return self._m1.files()
876
876
877 def visitdir(self, dir):
877 def visitdir(self, dir):
878 if self._m2.visitdir(dir) == b'all':
878 if self._m2.visitdir(dir) == b'all':
879 return False
879 return False
880 elif not self._m2.visitdir(dir):
880 elif not self._m2.visitdir(dir):
881 # m2 does not match dir, we can return 'all' here if possible
881 # m2 does not match dir, we can return 'all' here if possible
882 return self._m1.visitdir(dir)
882 return self._m1.visitdir(dir)
883 return bool(self._m1.visitdir(dir))
883 return bool(self._m1.visitdir(dir))
884
884
885 def visitchildrenset(self, dir):
885 def visitchildrenset(self, dir):
886 m2_set = self._m2.visitchildrenset(dir)
886 m2_set = self._m2.visitchildrenset(dir)
887 if m2_set == b'all':
887 if m2_set == b'all':
888 return set()
888 return set()
889 m1_set = self._m1.visitchildrenset(dir)
889 m1_set = self._m1.visitchildrenset(dir)
890 # Possible values for m1: 'all', 'this', set(...), set()
890 # Possible values for m1: 'all', 'this', set(...), set()
891 # Possible values for m2: 'this', set(...), set()
891 # Possible values for m2: 'this', set(...), set()
892 # If m2 has nothing under here that we care about, return m1, even if
892 # If m2 has nothing under here that we care about, return m1, even if
893 # it's 'all'. This is a change in behavior from visitdir, which would
893 # it's 'all'. This is a change in behavior from visitdir, which would
894 # return True, not 'all', for some reason.
894 # return True, not 'all', for some reason.
895 if not m2_set:
895 if not m2_set:
896 return m1_set
896 return m1_set
897 if m1_set in [b'all', b'this']:
897 if m1_set in [b'all', b'this']:
898 # Never return 'all' here if m2_set is any kind of non-empty (either
898 # Never return 'all' here if m2_set is any kind of non-empty (either
899 # 'this' or set(foo)), since m2 might return set() for a
899 # 'this' or set(foo)), since m2 might return set() for a
900 # subdirectory.
900 # subdirectory.
901 return b'this'
901 return b'this'
902 # Possible values for m1: set(...), set()
902 # Possible values for m1: set(...), set()
903 # Possible values for m2: 'this', set(...)
903 # Possible values for m2: 'this', set(...)
904 # We ignore m2's set results. They're possibly incorrect:
904 # We ignore m2's set results. They're possibly incorrect:
905 # m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset(''):
905 # m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset(''):
906 # m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd
906 # m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd
907 # return set(), which is *not* correct, we still need to visit 'dir'!
907 # return set(), which is *not* correct, we still need to visit 'dir'!
908 return m1_set
908 return m1_set
909
909
910 def isexact(self):
910 def isexact(self):
911 return self._m1.isexact()
911 return self._m1.isexact()
912
912
913 @encoding.strmethod
913 @encoding.strmethod
914 def __repr__(self):
914 def __repr__(self):
915 return b'<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2)
915 return b'<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2)
916
916
917
917
918 def intersectmatchers(m1, m2):
918 def intersectmatchers(m1, m2):
919 """Composes two matchers by matching if both of them match.
919 """Composes two matchers by matching if both of them match.
920
920
921 The second matcher's non-matching-attributes (bad, traversedir) are ignored.
921 The second matcher's non-matching-attributes (bad, traversedir) are ignored.
922 """
922 """
923 if m1 is None or m2 is None:
923 if m1 is None or m2 is None:
924 return m1 or m2
924 return m1 or m2
925 if m1.always():
925 if m1.always():
926 m = copy.copy(m2)
926 m = copy.copy(m2)
927 # TODO: Consider encapsulating these things in a class so there's only
927 # TODO: Consider encapsulating these things in a class so there's only
928 # one thing to copy from m1.
928 # one thing to copy from m1.
929 m.bad = m1.bad
929 m.bad = m1.bad
930 m.traversedir = m1.traversedir
930 m.traversedir = m1.traversedir
931 return m
931 return m
932 if m2.always():
932 if m2.always():
933 m = copy.copy(m1)
933 m = copy.copy(m1)
934 return m
934 return m
935 return intersectionmatcher(m1, m2)
935 return intersectionmatcher(m1, m2)
936
936
937
937
938 class intersectionmatcher(basematcher):
938 class intersectionmatcher(basematcher):
939 def __init__(self, m1, m2):
939 def __init__(self, m1, m2):
940 super(intersectionmatcher, self).__init__()
940 super(intersectionmatcher, self).__init__()
941 self._m1 = m1
941 self._m1 = m1
942 self._m2 = m2
942 self._m2 = m2
943 self.bad = m1.bad
943 self.bad = m1.bad
944 self.traversedir = m1.traversedir
944 self.traversedir = m1.traversedir
945
945
946 @propertycache
946 @propertycache
947 def _files(self):
947 def _files(self):
948 if self.isexact():
948 if self.isexact():
949 m1, m2 = self._m1, self._m2
949 m1, m2 = self._m1, self._m2
950 if not m1.isexact():
950 if not m1.isexact():
951 m1, m2 = m2, m1
951 m1, m2 = m2, m1
952 return [f for f in m1.files() if m2(f)]
952 return [f for f in m1.files() if m2(f)]
953 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
953 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
954 # the set of files, because their files() are not always files. For
954 # the set of files, because their files() are not always files. For
955 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
955 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
956 # "path:dir2", we don't want to remove "dir2" from the set.
956 # "path:dir2", we don't want to remove "dir2" from the set.
957 return self._m1.files() + self._m2.files()
957 return self._m1.files() + self._m2.files()
958
958
959 def matchfn(self, f):
959 def matchfn(self, f):
960 return self._m1(f) and self._m2(f)
960 return self._m1(f) and self._m2(f)
961
961
962 def visitdir(self, dir):
962 def visitdir(self, dir):
963 visit1 = self._m1.visitdir(dir)
963 visit1 = self._m1.visitdir(dir)
964 if visit1 == b'all':
964 if visit1 == b'all':
965 return self._m2.visitdir(dir)
965 return self._m2.visitdir(dir)
966 # bool() because visit1=True + visit2='all' should not be 'all'
966 # bool() because visit1=True + visit2='all' should not be 'all'
967 return bool(visit1 and self._m2.visitdir(dir))
967 return bool(visit1 and self._m2.visitdir(dir))
968
968
969 def visitchildrenset(self, dir):
969 def visitchildrenset(self, dir):
970 m1_set = self._m1.visitchildrenset(dir)
970 m1_set = self._m1.visitchildrenset(dir)
971 if not m1_set:
971 if not m1_set:
972 return set()
972 return set()
973 m2_set = self._m2.visitchildrenset(dir)
973 m2_set = self._m2.visitchildrenset(dir)
974 if not m2_set:
974 if not m2_set:
975 return set()
975 return set()
976
976
977 if m1_set == b'all':
977 if m1_set == b'all':
978 return m2_set
978 return m2_set
979 elif m2_set == b'all':
979 elif m2_set == b'all':
980 return m1_set
980 return m1_set
981
981
982 if m1_set == b'this' or m2_set == b'this':
982 if m1_set == b'this' or m2_set == b'this':
983 return b'this'
983 return b'this'
984
984
985 assert isinstance(m1_set, set) and isinstance(m2_set, set)
985 assert isinstance(m1_set, set) and isinstance(m2_set, set)
986 return m1_set.intersection(m2_set)
986 return m1_set.intersection(m2_set)
987
987
988 def always(self):
988 def always(self):
989 return self._m1.always() and self._m2.always()
989 return self._m1.always() and self._m2.always()
990
990
991 def isexact(self):
991 def isexact(self):
992 return self._m1.isexact() or self._m2.isexact()
992 return self._m1.isexact() or self._m2.isexact()
993
993
994 @encoding.strmethod
994 @encoding.strmethod
995 def __repr__(self):
995 def __repr__(self):
996 return b'<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2)
996 return b'<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2)
997
997
998
998
999 class subdirmatcher(basematcher):
999 class subdirmatcher(basematcher):
1000 """Adapt a matcher to work on a subdirectory only.
1000 """Adapt a matcher to work on a subdirectory only.
1001
1001
1002 The paths are remapped to remove/insert the path as needed:
1002 The paths are remapped to remove/insert the path as needed:
1003
1003
1004 >>> from . import pycompat
1004 >>> from . import pycompat
1005 >>> m1 = match(util.localpath(b'/root'), b'', [b'a.txt', b'sub/b.txt'], auditor=lambda name: None)
1005 >>> m1 = match(util.localpath(b'/root'), b'', [b'a.txt', b'sub/b.txt'], auditor=lambda name: None)
1006 >>> m2 = subdirmatcher(b'sub', m1)
1006 >>> m2 = subdirmatcher(b'sub', m1)
1007 >>> m2(b'a.txt')
1007 >>> m2(b'a.txt')
1008 False
1008 False
1009 >>> m2(b'b.txt')
1009 >>> m2(b'b.txt')
1010 True
1010 True
1011 >>> m2.matchfn(b'a.txt')
1011 >>> m2.matchfn(b'a.txt')
1012 False
1012 False
1013 >>> m2.matchfn(b'b.txt')
1013 >>> m2.matchfn(b'b.txt')
1014 True
1014 True
1015 >>> m2.files()
1015 >>> m2.files()
1016 ['b.txt']
1016 ['b.txt']
1017 >>> m2.exact(b'b.txt')
1017 >>> m2.exact(b'b.txt')
1018 True
1018 True
1019 >>> def bad(f, msg):
1019 >>> def bad(f, msg):
1020 ... print(pycompat.sysstr(b"%s: %s" % (f, msg)))
1020 ... print(pycompat.sysstr(b"%s: %s" % (f, msg)))
1021 >>> m1.bad = bad
1021 >>> m1.bad = bad
1022 >>> m2.bad(b'x.txt', b'No such file')
1022 >>> m2.bad(b'x.txt', b'No such file')
1023 sub/x.txt: No such file
1023 sub/x.txt: No such file
1024 """
1024 """
1025
1025
1026 def __init__(self, path, matcher):
1026 def __init__(self, path, matcher):
1027 super(subdirmatcher, self).__init__()
1027 super(subdirmatcher, self).__init__()
1028 self._path = path
1028 self._path = path
1029 self._matcher = matcher
1029 self._matcher = matcher
1030 self._always = matcher.always()
1030 self._always = matcher.always()
1031
1031
1032 self._files = [
1032 self._files = [
1033 f[len(path) + 1 :]
1033 f[len(path) + 1 :]
1034 for f in matcher._files
1034 for f in matcher._files
1035 if f.startswith(path + b"/")
1035 if f.startswith(path + b"/")
1036 ]
1036 ]
1037
1037
1038 # If the parent repo had a path to this subrepo and the matcher is
1038 # If the parent repo had a path to this subrepo and the matcher is
1039 # a prefix matcher, this submatcher always matches.
1039 # a prefix matcher, this submatcher always matches.
1040 if matcher.prefix():
1040 if matcher.prefix():
1041 self._always = any(f == path for f in matcher._files)
1041 self._always = any(f == path for f in matcher._files)
1042
1042
1043 def bad(self, f, msg):
1043 def bad(self, f, msg):
1044 self._matcher.bad(self._path + b"/" + f, msg)
1044 self._matcher.bad(self._path + b"/" + f, msg)
1045
1045
1046 def matchfn(self, f):
1046 def matchfn(self, f):
1047 # Some information is lost in the superclass's constructor, so we
1047 # Some information is lost in the superclass's constructor, so we
1048 # can not accurately create the matching function for the subdirectory
1048 # can not accurately create the matching function for the subdirectory
1049 # from the inputs. Instead, we override matchfn() and visitdir() to
1049 # from the inputs. Instead, we override matchfn() and visitdir() to
1050 # call the original matcher with the subdirectory path prepended.
1050 # call the original matcher with the subdirectory path prepended.
1051 return self._matcher.matchfn(self._path + b"/" + f)
1051 return self._matcher.matchfn(self._path + b"/" + f)
1052
1052
1053 def visitdir(self, dir):
1053 def visitdir(self, dir):
1054 if dir == b'':
1054 if dir == b'':
1055 dir = self._path
1055 dir = self._path
1056 else:
1056 else:
1057 dir = self._path + b"/" + dir
1057 dir = self._path + b"/" + dir
1058 return self._matcher.visitdir(dir)
1058 return self._matcher.visitdir(dir)
1059
1059
1060 def visitchildrenset(self, dir):
1060 def visitchildrenset(self, dir):
1061 if dir == b'':
1061 if dir == b'':
1062 dir = self._path
1062 dir = self._path
1063 else:
1063 else:
1064 dir = self._path + b"/" + dir
1064 dir = self._path + b"/" + dir
1065 return self._matcher.visitchildrenset(dir)
1065 return self._matcher.visitchildrenset(dir)
1066
1066
1067 def always(self):
1067 def always(self):
1068 return self._always
1068 return self._always
1069
1069
1070 def prefix(self):
1070 def prefix(self):
1071 return self._matcher.prefix() and not self._always
1071 return self._matcher.prefix() and not self._always
1072
1072
1073 @encoding.strmethod
1073 @encoding.strmethod
1074 def __repr__(self):
1074 def __repr__(self):
1075 return b'<subdirmatcher path=%r, matcher=%r>' % (
1075 return b'<subdirmatcher path=%r, matcher=%r>' % (
1076 self._path,
1076 self._path,
1077 self._matcher,
1077 self._matcher,
1078 )
1078 )
1079
1079
1080
1080
1081 class prefixdirmatcher(basematcher):
1081 class prefixdirmatcher(basematcher):
1082 """Adapt a matcher to work on a parent directory.
1082 """Adapt a matcher to work on a parent directory.
1083
1083
1084 The matcher's non-matching-attributes (bad, traversedir) are ignored.
1084 The matcher's non-matching-attributes (bad, traversedir) are ignored.
1085
1085
1086 The prefix path should usually be the relative path from the root of
1086 The prefix path should usually be the relative path from the root of
1087 this matcher to the root of the wrapped matcher.
1087 this matcher to the root of the wrapped matcher.
1088
1088
1089 >>> m1 = match(util.localpath(b'/root/d/e'), b'f', [b'../a.txt', b'b.txt'], auditor=lambda name: None)
1089 >>> m1 = match(util.localpath(b'/root/d/e'), b'f', [b'../a.txt', b'b.txt'], auditor=lambda name: None)
1090 >>> m2 = prefixdirmatcher(b'd/e', m1)
1090 >>> m2 = prefixdirmatcher(b'd/e', m1)
1091 >>> m2(b'a.txt')
1091 >>> m2(b'a.txt')
1092 False
1092 False
1093 >>> m2(b'd/e/a.txt')
1093 >>> m2(b'd/e/a.txt')
1094 True
1094 True
1095 >>> m2(b'd/e/b.txt')
1095 >>> m2(b'd/e/b.txt')
1096 False
1096 False
1097 >>> m2.files()
1097 >>> m2.files()
1098 ['d/e/a.txt', 'd/e/f/b.txt']
1098 ['d/e/a.txt', 'd/e/f/b.txt']
1099 >>> m2.exact(b'd/e/a.txt')
1099 >>> m2.exact(b'd/e/a.txt')
1100 True
1100 True
1101 >>> m2.visitdir(b'd')
1101 >>> m2.visitdir(b'd')
1102 True
1102 True
1103 >>> m2.visitdir(b'd/e')
1103 >>> m2.visitdir(b'd/e')
1104 True
1104 True
1105 >>> m2.visitdir(b'd/e/f')
1105 >>> m2.visitdir(b'd/e/f')
1106 True
1106 True
1107 >>> m2.visitdir(b'd/e/g')
1107 >>> m2.visitdir(b'd/e/g')
1108 False
1108 False
1109 >>> m2.visitdir(b'd/ef')
1109 >>> m2.visitdir(b'd/ef')
1110 False
1110 False
1111 """
1111 """
1112
1112
1113 def __init__(self, path, matcher, badfn=None):
1113 def __init__(self, path, matcher, badfn=None):
1114 super(prefixdirmatcher, self).__init__(badfn)
1114 super(prefixdirmatcher, self).__init__(badfn)
1115 if not path:
1115 if not path:
1116 raise error.ProgrammingError(b'prefix path must not be empty')
1116 raise error.ProgrammingError(b'prefix path must not be empty')
1117 self._path = path
1117 self._path = path
1118 self._pathprefix = path + b'/'
1118 self._pathprefix = path + b'/'
1119 self._matcher = matcher
1119 self._matcher = matcher
1120
1120
1121 @propertycache
1121 @propertycache
1122 def _files(self):
1122 def _files(self):
1123 return [self._pathprefix + f for f in self._matcher._files]
1123 return [self._pathprefix + f for f in self._matcher._files]
1124
1124
1125 def matchfn(self, f):
1125 def matchfn(self, f):
1126 if not f.startswith(self._pathprefix):
1126 if not f.startswith(self._pathprefix):
1127 return False
1127 return False
1128 return self._matcher.matchfn(f[len(self._pathprefix) :])
1128 return self._matcher.matchfn(f[len(self._pathprefix) :])
1129
1129
1130 @propertycache
1130 @propertycache
1131 def _pathdirs(self):
1131 def _pathdirs(self):
1132 return set(pathutil.finddirs(self._path))
1132 return set(pathutil.finddirs(self._path))
1133
1133
1134 def visitdir(self, dir):
1134 def visitdir(self, dir):
1135 if dir == self._path:
1135 if dir == self._path:
1136 return self._matcher.visitdir(b'')
1136 return self._matcher.visitdir(b'')
1137 if dir.startswith(self._pathprefix):
1137 if dir.startswith(self._pathprefix):
1138 return self._matcher.visitdir(dir[len(self._pathprefix) :])
1138 return self._matcher.visitdir(dir[len(self._pathprefix) :])
1139 return dir in self._pathdirs
1139 return dir in self._pathdirs
1140
1140
1141 def visitchildrenset(self, dir):
1141 def visitchildrenset(self, dir):
1142 if dir == self._path:
1142 if dir == self._path:
1143 return self._matcher.visitchildrenset(b'')
1143 return self._matcher.visitchildrenset(b'')
1144 if dir.startswith(self._pathprefix):
1144 if dir.startswith(self._pathprefix):
1145 return self._matcher.visitchildrenset(dir[len(self._pathprefix) :])
1145 return self._matcher.visitchildrenset(dir[len(self._pathprefix) :])
1146 if dir in self._pathdirs:
1146 if dir in self._pathdirs:
1147 return b'this'
1147 return b'this'
1148 return set()
1148 return set()
1149
1149
1150 def isexact(self):
1150 def isexact(self):
1151 return self._matcher.isexact()
1151 return self._matcher.isexact()
1152
1152
1153 def prefix(self):
1153 def prefix(self):
1154 return self._matcher.prefix()
1154 return self._matcher.prefix()
1155
1155
1156 @encoding.strmethod
1156 @encoding.strmethod
1157 def __repr__(self):
1157 def __repr__(self):
1158 return b'<prefixdirmatcher path=%r, matcher=%r>' % (
1158 return b'<prefixdirmatcher path=%r, matcher=%r>' % (
1159 pycompat.bytestr(self._path),
1159 pycompat.bytestr(self._path),
1160 self._matcher,
1160 self._matcher,
1161 )
1161 )
1162
1162
1163
1163
1164 class unionmatcher(basematcher):
1164 class unionmatcher(basematcher):
1165 """A matcher that is the union of several matchers.
1165 """A matcher that is the union of several matchers.
1166
1166
1167 The non-matching-attributes (bad, traversedir) are taken from the first
1167 The non-matching-attributes (bad, traversedir) are taken from the first
1168 matcher.
1168 matcher.
1169 """
1169 """
1170
1170
1171 def __init__(self, matchers):
1171 def __init__(self, matchers):
1172 m1 = matchers[0]
1172 m1 = matchers[0]
1173 super(unionmatcher, self).__init__()
1173 super(unionmatcher, self).__init__()
1174 self.traversedir = m1.traversedir
1174 self.traversedir = m1.traversedir
1175 self._matchers = matchers
1175 self._matchers = matchers
1176
1176
1177 def matchfn(self, f):
1177 def matchfn(self, f):
1178 for match in self._matchers:
1178 for match in self._matchers:
1179 if match(f):
1179 if match(f):
1180 return True
1180 return True
1181 return False
1181 return False
1182
1182
1183 def visitdir(self, dir):
1183 def visitdir(self, dir):
1184 r = False
1184 r = False
1185 for m in self._matchers:
1185 for m in self._matchers:
1186 v = m.visitdir(dir)
1186 v = m.visitdir(dir)
1187 if v == b'all':
1187 if v == b'all':
1188 return v
1188 return v
1189 r |= v
1189 r |= v
1190 return r
1190 return r
1191
1191
1192 def visitchildrenset(self, dir):
1192 def visitchildrenset(self, dir):
1193 r = set()
1193 r = set()
1194 this = False
1194 this = False
1195 for m in self._matchers:
1195 for m in self._matchers:
1196 v = m.visitchildrenset(dir)
1196 v = m.visitchildrenset(dir)
1197 if not v:
1197 if not v:
1198 continue
1198 continue
1199 if v == b'all':
1199 if v == b'all':
1200 return v
1200 return v
1201 if this or v == b'this':
1201 if this or v == b'this':
1202 this = True
1202 this = True
1203 # don't break, we might have an 'all' in here.
1203 # don't break, we might have an 'all' in here.
1204 continue
1204 continue
1205 assert isinstance(v, set)
1205 assert isinstance(v, set)
1206 r = r.union(v)
1206 r = r.union(v)
1207 if this:
1207 if this:
1208 return b'this'
1208 return b'this'
1209 return r
1209 return r
1210
1210
1211 @encoding.strmethod
1211 @encoding.strmethod
1212 def __repr__(self):
1212 def __repr__(self):
1213 return b'<unionmatcher matchers=%r>' % self._matchers
1213 return b'<unionmatcher matchers=%r>' % self._matchers
1214
1214
1215
1215
1216 def patkind(pattern, default=None):
1216 def patkind(pattern, default=None):
1217 r"""If pattern is 'kind:pat' with a known kind, return kind.
1217 r"""If pattern is 'kind:pat' with a known kind, return kind.
1218
1218
1219 >>> patkind(br're:.*\.c$')
1219 >>> patkind(br're:.*\.c$')
1220 're'
1220 're'
1221 >>> patkind(b'glob:*.c')
1221 >>> patkind(b'glob:*.c')
1222 'glob'
1222 'glob'
1223 >>> patkind(b'relpath:test.py')
1223 >>> patkind(b'relpath:test.py')
1224 'relpath'
1224 'relpath'
1225 >>> patkind(b'main.py')
1225 >>> patkind(b'main.py')
1226 >>> patkind(b'main.py', default=b're')
1226 >>> patkind(b'main.py', default=b're')
1227 're'
1227 're'
1228 """
1228 """
1229 return _patsplit(pattern, default)[0]
1229 return _patsplit(pattern, default)[0]
1230
1230
1231
1231
1232 def _patsplit(pattern, default):
1232 def _patsplit(pattern, default):
1233 """Split a string into the optional pattern kind prefix and the actual
1233 """Split a string into the optional pattern kind prefix and the actual
1234 pattern."""
1234 pattern."""
1235 if b':' in pattern:
1235 if b':' in pattern:
1236 kind, pat = pattern.split(b':', 1)
1236 kind, pat = pattern.split(b':', 1)
1237 if kind in allpatternkinds:
1237 if kind in allpatternkinds:
1238 return kind, pat
1238 return kind, pat
1239 return default, pattern
1239 return default, pattern
1240
1240
1241
1241
1242 def _globre(pat):
1242 def _globre(pat):
1243 r"""Convert an extended glob string to a regexp string.
1243 r"""Convert an extended glob string to a regexp string.
1244
1244
1245 >>> from . import pycompat
1245 >>> from . import pycompat
1246 >>> def bprint(s):
1246 >>> def bprint(s):
1247 ... print(pycompat.sysstr(s))
1247 ... print(pycompat.sysstr(s))
1248 >>> bprint(_globre(br'?'))
1248 >>> bprint(_globre(br'?'))
1249 .
1249 .
1250 >>> bprint(_globre(br'*'))
1250 >>> bprint(_globre(br'*'))
1251 [^/]*
1251 [^/]*
1252 >>> bprint(_globre(br'**'))
1252 >>> bprint(_globre(br'**'))
1253 .*
1253 .*
1254 >>> bprint(_globre(br'**/a'))
1254 >>> bprint(_globre(br'**/a'))
1255 (?:.*/)?a
1255 (?:.*/)?a
1256 >>> bprint(_globre(br'a/**/b'))
1256 >>> bprint(_globre(br'a/**/b'))
1257 a/(?:.*/)?b
1257 a/(?:.*/)?b
1258 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
1258 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
1259 [a*?!^][\^b][^c]
1259 [a*?!^][\^b][^c]
1260 >>> bprint(_globre(br'{a,b}'))
1260 >>> bprint(_globre(br'{a,b}'))
1261 (?:a|b)
1261 (?:a|b)
1262 >>> bprint(_globre(br'.\*\?'))
1262 >>> bprint(_globre(br'.\*\?'))
1263 \.\*\?
1263 \.\*\?
1264 """
1264 """
1265 i, n = 0, len(pat)
1265 i, n = 0, len(pat)
1266 res = b''
1266 res = b''
1267 group = 0
1267 group = 0
1268 escape = util.stringutil.regexbytesescapemap.get
1268 escape = util.stringutil.regexbytesescapemap.get
1269
1269
1270 def peek():
1270 def peek():
1271 return i < n and pat[i : i + 1]
1271 return i < n and pat[i : i + 1]
1272
1272
1273 while i < n:
1273 while i < n:
1274 c = pat[i : i + 1]
1274 c = pat[i : i + 1]
1275 i += 1
1275 i += 1
1276 if c not in b'*?[{},\\':
1276 if c not in b'*?[{},\\':
1277 res += escape(c, c)
1277 res += escape(c, c)
1278 elif c == b'*':
1278 elif c == b'*':
1279 if peek() == b'*':
1279 if peek() == b'*':
1280 i += 1
1280 i += 1
1281 if peek() == b'/':
1281 if peek() == b'/':
1282 i += 1
1282 i += 1
1283 res += b'(?:.*/)?'
1283 res += b'(?:.*/)?'
1284 else:
1284 else:
1285 res += b'.*'
1285 res += b'.*'
1286 else:
1286 else:
1287 res += b'[^/]*'
1287 res += b'[^/]*'
1288 elif c == b'?':
1288 elif c == b'?':
1289 res += b'.'
1289 res += b'.'
1290 elif c == b'[':
1290 elif c == b'[':
1291 j = i
1291 j = i
1292 if j < n and pat[j : j + 1] in b'!]':
1292 if j < n and pat[j : j + 1] in b'!]':
1293 j += 1
1293 j += 1
1294 while j < n and pat[j : j + 1] != b']':
1294 while j < n and pat[j : j + 1] != b']':
1295 j += 1
1295 j += 1
1296 if j >= n:
1296 if j >= n:
1297 res += b'\\['
1297 res += b'\\['
1298 else:
1298 else:
1299 stuff = pat[i:j].replace(b'\\', b'\\\\')
1299 stuff = pat[i:j].replace(b'\\', b'\\\\')
1300 i = j + 1
1300 i = j + 1
1301 if stuff[0:1] == b'!':
1301 if stuff[0:1] == b'!':
1302 stuff = b'^' + stuff[1:]
1302 stuff = b'^' + stuff[1:]
1303 elif stuff[0:1] == b'^':
1303 elif stuff[0:1] == b'^':
1304 stuff = b'\\' + stuff
1304 stuff = b'\\' + stuff
1305 res = b'%s[%s]' % (res, stuff)
1305 res = b'%s[%s]' % (res, stuff)
1306 elif c == b'{':
1306 elif c == b'{':
1307 group += 1
1307 group += 1
1308 res += b'(?:'
1308 res += b'(?:'
1309 elif c == b'}' and group:
1309 elif c == b'}' and group:
1310 res += b')'
1310 res += b')'
1311 group -= 1
1311 group -= 1
1312 elif c == b',' and group:
1312 elif c == b',' and group:
1313 res += b'|'
1313 res += b'|'
1314 elif c == b'\\':
1314 elif c == b'\\':
1315 p = peek()
1315 p = peek()
1316 if p:
1316 if p:
1317 i += 1
1317 i += 1
1318 res += escape(p, p)
1318 res += escape(p, p)
1319 else:
1319 else:
1320 res += escape(c, c)
1320 res += escape(c, c)
1321 else:
1321 else:
1322 res += escape(c, c)
1322 res += escape(c, c)
1323 return res
1323 return res
1324
1324
1325
1325
1326 FLAG_RE = util.re.compile(b'^\(\?([aiLmsux]+)\)(.*)')
1326 FLAG_RE = util.re.compile(br'^\(\?([aiLmsux]+)\)(.*)')
1327
1327
1328
1328
1329 def _regex(kind, pat, globsuffix):
1329 def _regex(kind, pat, globsuffix):
1330 """Convert a (normalized) pattern of any kind into a
1330 """Convert a (normalized) pattern of any kind into a
1331 regular expression.
1331 regular expression.
1332 globsuffix is appended to the regexp of globs."""
1332 globsuffix is appended to the regexp of globs."""
1333 if not pat and kind in (b'glob', b'relpath'):
1333 if not pat and kind in (b'glob', b'relpath'):
1334 return b''
1334 return b''
1335 if kind == b're':
1335 if kind == b're':
1336 return pat
1336 return pat
1337 if kind in (b'path', b'relpath'):
1337 if kind in (b'path', b'relpath'):
1338 if pat == b'.':
1338 if pat == b'.':
1339 return b''
1339 return b''
1340 return util.stringutil.reescape(pat) + b'(?:/|$)'
1340 return util.stringutil.reescape(pat) + b'(?:/|$)'
1341 if kind == b'rootfilesin':
1341 if kind == b'rootfilesin':
1342 if pat == b'.':
1342 if pat == b'.':
1343 escaped = b''
1343 escaped = b''
1344 else:
1344 else:
1345 # Pattern is a directory name.
1345 # Pattern is a directory name.
1346 escaped = util.stringutil.reescape(pat) + b'/'
1346 escaped = util.stringutil.reescape(pat) + b'/'
1347 # Anything after the pattern must be a non-directory.
1347 # Anything after the pattern must be a non-directory.
1348 return escaped + b'[^/]+$'
1348 return escaped + b'[^/]+$'
1349 if kind == b'relglob':
1349 if kind == b'relglob':
1350 globre = _globre(pat)
1350 globre = _globre(pat)
1351 if globre.startswith(b'[^/]*'):
1351 if globre.startswith(b'[^/]*'):
1352 # When pat has the form *XYZ (common), make the returned regex more
1352 # When pat has the form *XYZ (common), make the returned regex more
1353 # legible by returning the regex for **XYZ instead of **/*XYZ.
1353 # legible by returning the regex for **XYZ instead of **/*XYZ.
1354 return b'.*' + globre[len(b'[^/]*') :] + globsuffix
1354 return b'.*' + globre[len(b'[^/]*') :] + globsuffix
1355 return b'(?:|.*/)' + globre + globsuffix
1355 return b'(?:|.*/)' + globre + globsuffix
1356 if kind == b'relre':
1356 if kind == b'relre':
1357 flag = None
1357 flag = None
1358 m = FLAG_RE.match(pat)
1358 m = FLAG_RE.match(pat)
1359 if m:
1359 if m:
1360 flag, pat = m.groups()
1360 flag, pat = m.groups()
1361 if not pat.startswith(b'^'):
1361 if not pat.startswith(b'^'):
1362 pat = b'.*' + pat
1362 pat = b'.*' + pat
1363 if flag is not None:
1363 if flag is not None:
1364 pat = br'(?%s:%s)' % (flag, pat)
1364 pat = br'(?%s:%s)' % (flag, pat)
1365 return pat
1365 return pat
1366 if kind in (b'glob', b'rootglob'):
1366 if kind in (b'glob', b'rootglob'):
1367 return _globre(pat) + globsuffix
1367 return _globre(pat) + globsuffix
1368 raise error.ProgrammingError(b'not a regex pattern: %s:%s' % (kind, pat))
1368 raise error.ProgrammingError(b'not a regex pattern: %s:%s' % (kind, pat))
1369
1369
1370
1370
1371 def _buildmatch(kindpats, globsuffix, root):
1371 def _buildmatch(kindpats, globsuffix, root):
1372 """Return regexp string and a matcher function for kindpats.
1372 """Return regexp string and a matcher function for kindpats.
1373 globsuffix is appended to the regexp of globs."""
1373 globsuffix is appended to the regexp of globs."""
1374 matchfuncs = []
1374 matchfuncs = []
1375
1375
1376 subincludes, kindpats = _expandsubinclude(kindpats, root)
1376 subincludes, kindpats = _expandsubinclude(kindpats, root)
1377 if subincludes:
1377 if subincludes:
1378 submatchers = {}
1378 submatchers = {}
1379
1379
1380 def matchsubinclude(f):
1380 def matchsubinclude(f):
1381 for prefix, matcherargs in subincludes:
1381 for prefix, matcherargs in subincludes:
1382 if f.startswith(prefix):
1382 if f.startswith(prefix):
1383 mf = submatchers.get(prefix)
1383 mf = submatchers.get(prefix)
1384 if mf is None:
1384 if mf is None:
1385 mf = match(*matcherargs)
1385 mf = match(*matcherargs)
1386 submatchers[prefix] = mf
1386 submatchers[prefix] = mf
1387
1387
1388 if mf(f[len(prefix) :]):
1388 if mf(f[len(prefix) :]):
1389 return True
1389 return True
1390 return False
1390 return False
1391
1391
1392 matchfuncs.append(matchsubinclude)
1392 matchfuncs.append(matchsubinclude)
1393
1393
1394 regex = b''
1394 regex = b''
1395 if kindpats:
1395 if kindpats:
1396 if all(k == b'rootfilesin' for k, p, s in kindpats):
1396 if all(k == b'rootfilesin' for k, p, s in kindpats):
1397 dirs = {p for k, p, s in kindpats}
1397 dirs = {p for k, p, s in kindpats}
1398
1398
1399 def mf(f):
1399 def mf(f):
1400 i = f.rfind(b'/')
1400 i = f.rfind(b'/')
1401 if i >= 0:
1401 if i >= 0:
1402 dir = f[:i]
1402 dir = f[:i]
1403 else:
1403 else:
1404 dir = b'.'
1404 dir = b'.'
1405 return dir in dirs
1405 return dir in dirs
1406
1406
1407 regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))
1407 regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))
1408 matchfuncs.append(mf)
1408 matchfuncs.append(mf)
1409 else:
1409 else:
1410 regex, mf = _buildregexmatch(kindpats, globsuffix)
1410 regex, mf = _buildregexmatch(kindpats, globsuffix)
1411 matchfuncs.append(mf)
1411 matchfuncs.append(mf)
1412
1412
1413 if len(matchfuncs) == 1:
1413 if len(matchfuncs) == 1:
1414 return regex, matchfuncs[0]
1414 return regex, matchfuncs[0]
1415 else:
1415 else:
1416 return regex, lambda f: any(mf(f) for mf in matchfuncs)
1416 return regex, lambda f: any(mf(f) for mf in matchfuncs)
1417
1417
1418
1418
1419 MAX_RE_SIZE = 20000
1419 MAX_RE_SIZE = 20000
1420
1420
1421
1421
1422 def _joinregexes(regexps):
1422 def _joinregexes(regexps):
1423 """gather multiple regular expressions into a single one"""
1423 """gather multiple regular expressions into a single one"""
1424 return b'|'.join(regexps)
1424 return b'|'.join(regexps)
1425
1425
1426
1426
1427 def _buildregexmatch(kindpats, globsuffix):
1427 def _buildregexmatch(kindpats, globsuffix):
1428 """Build a match function from a list of kinds and kindpats,
1428 """Build a match function from a list of kinds and kindpats,
1429 return regexp string and a matcher function.
1429 return regexp string and a matcher function.
1430
1430
1431 Test too large input
1431 Test too large input
1432 >>> _buildregexmatch([
1432 >>> _buildregexmatch([
1433 ... (b'relglob', b'?' * MAX_RE_SIZE, b'')
1433 ... (b'relglob', b'?' * MAX_RE_SIZE, b'')
1434 ... ], b'$')
1434 ... ], b'$')
1435 Traceback (most recent call last):
1435 Traceback (most recent call last):
1436 ...
1436 ...
1437 Abort: matcher pattern is too long (20009 bytes)
1437 Abort: matcher pattern is too long (20009 bytes)
1438 """
1438 """
1439 try:
1439 try:
1440 allgroups = []
1440 allgroups = []
1441 regexps = [_regex(k, p, globsuffix) for (k, p, s) in kindpats]
1441 regexps = [_regex(k, p, globsuffix) for (k, p, s) in kindpats]
1442 fullregexp = _joinregexes(regexps)
1442 fullregexp = _joinregexes(regexps)
1443
1443
1444 startidx = 0
1444 startidx = 0
1445 groupsize = 0
1445 groupsize = 0
1446 for idx, r in enumerate(regexps):
1446 for idx, r in enumerate(regexps):
1447 piecesize = len(r)
1447 piecesize = len(r)
1448 if piecesize > MAX_RE_SIZE:
1448 if piecesize > MAX_RE_SIZE:
1449 msg = _(b"matcher pattern is too long (%d bytes)") % piecesize
1449 msg = _(b"matcher pattern is too long (%d bytes)") % piecesize
1450 raise error.Abort(msg)
1450 raise error.Abort(msg)
1451 elif (groupsize + piecesize) > MAX_RE_SIZE:
1451 elif (groupsize + piecesize) > MAX_RE_SIZE:
1452 group = regexps[startidx:idx]
1452 group = regexps[startidx:idx]
1453 allgroups.append(_joinregexes(group))
1453 allgroups.append(_joinregexes(group))
1454 startidx = idx
1454 startidx = idx
1455 groupsize = 0
1455 groupsize = 0
1456 groupsize += piecesize + 1
1456 groupsize += piecesize + 1
1457
1457
1458 if startidx == 0:
1458 if startidx == 0:
1459 matcher = _rematcher(fullregexp)
1459 matcher = _rematcher(fullregexp)
1460 func = lambda s: bool(matcher(s))
1460 func = lambda s: bool(matcher(s))
1461 else:
1461 else:
1462 group = regexps[startidx:]
1462 group = regexps[startidx:]
1463 allgroups.append(_joinregexes(group))
1463 allgroups.append(_joinregexes(group))
1464 allmatchers = [_rematcher(g) for g in allgroups]
1464 allmatchers = [_rematcher(g) for g in allgroups]
1465 func = lambda s: any(m(s) for m in allmatchers)
1465 func = lambda s: any(m(s) for m in allmatchers)
1466 return fullregexp, func
1466 return fullregexp, func
1467 except re.error:
1467 except re.error:
1468 for k, p, s in kindpats:
1468 for k, p, s in kindpats:
1469 try:
1469 try:
1470 _rematcher(_regex(k, p, globsuffix))
1470 _rematcher(_regex(k, p, globsuffix))
1471 except re.error:
1471 except re.error:
1472 if s:
1472 if s:
1473 raise error.Abort(
1473 raise error.Abort(
1474 _(b"%s: invalid pattern (%s): %s") % (s, k, p)
1474 _(b"%s: invalid pattern (%s): %s") % (s, k, p)
1475 )
1475 )
1476 else:
1476 else:
1477 raise error.Abort(_(b"invalid pattern (%s): %s") % (k, p))
1477 raise error.Abort(_(b"invalid pattern (%s): %s") % (k, p))
1478 raise error.Abort(_(b"invalid pattern"))
1478 raise error.Abort(_(b"invalid pattern"))
1479
1479
1480
1480
1481 def _patternrootsanddirs(kindpats):
1481 def _patternrootsanddirs(kindpats):
1482 """Returns roots and directories corresponding to each pattern.
1482 """Returns roots and directories corresponding to each pattern.
1483
1483
1484 This calculates the roots and directories exactly matching the patterns and
1484 This calculates the roots and directories exactly matching the patterns and
1485 returns a tuple of (roots, dirs) for each. It does not return other
1485 returns a tuple of (roots, dirs) for each. It does not return other
1486 directories which may also need to be considered, like the parent
1486 directories which may also need to be considered, like the parent
1487 directories.
1487 directories.
1488 """
1488 """
1489 r = []
1489 r = []
1490 d = []
1490 d = []
1491 for kind, pat, source in kindpats:
1491 for kind, pat, source in kindpats:
1492 if kind in (b'glob', b'rootglob'): # find the non-glob prefix
1492 if kind in (b'glob', b'rootglob'): # find the non-glob prefix
1493 root = []
1493 root = []
1494 for p in pat.split(b'/'):
1494 for p in pat.split(b'/'):
1495 if b'[' in p or b'{' in p or b'*' in p or b'?' in p:
1495 if b'[' in p or b'{' in p or b'*' in p or b'?' in p:
1496 break
1496 break
1497 root.append(p)
1497 root.append(p)
1498 r.append(b'/'.join(root))
1498 r.append(b'/'.join(root))
1499 elif kind in (b'relpath', b'path'):
1499 elif kind in (b'relpath', b'path'):
1500 if pat == b'.':
1500 if pat == b'.':
1501 pat = b''
1501 pat = b''
1502 r.append(pat)
1502 r.append(pat)
1503 elif kind in (b'rootfilesin',):
1503 elif kind in (b'rootfilesin',):
1504 if pat == b'.':
1504 if pat == b'.':
1505 pat = b''
1505 pat = b''
1506 d.append(pat)
1506 d.append(pat)
1507 else: # relglob, re, relre
1507 else: # relglob, re, relre
1508 r.append(b'')
1508 r.append(b'')
1509 return r, d
1509 return r, d
1510
1510
1511
1511
1512 def _roots(kindpats):
1512 def _roots(kindpats):
1513 '''Returns root directories to match recursively from the given patterns.'''
1513 '''Returns root directories to match recursively from the given patterns.'''
1514 roots, dirs = _patternrootsanddirs(kindpats)
1514 roots, dirs = _patternrootsanddirs(kindpats)
1515 return roots
1515 return roots
1516
1516
1517
1517
1518 def _rootsdirsandparents(kindpats):
1518 def _rootsdirsandparents(kindpats):
1519 """Returns roots and exact directories from patterns.
1519 """Returns roots and exact directories from patterns.
1520
1520
1521 `roots` are directories to match recursively, `dirs` should
1521 `roots` are directories to match recursively, `dirs` should
1522 be matched non-recursively, and `parents` are the implicitly required
1522 be matched non-recursively, and `parents` are the implicitly required
1523 directories to walk to items in either roots or dirs.
1523 directories to walk to items in either roots or dirs.
1524
1524
1525 Returns a tuple of (roots, dirs, parents).
1525 Returns a tuple of (roots, dirs, parents).
1526
1526
1527 >>> r = _rootsdirsandparents(
1527 >>> r = _rootsdirsandparents(
1528 ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
1528 ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
1529 ... (b'glob', b'g*', b'')])
1529 ... (b'glob', b'g*', b'')])
1530 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1530 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1531 (['g/h', 'g/h', ''], []) ['', 'g']
1531 (['g/h', 'g/h', ''], []) ['', 'g']
1532 >>> r = _rootsdirsandparents(
1532 >>> r = _rootsdirsandparents(
1533 ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
1533 ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
1534 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1534 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1535 ([], ['g/h', '']) ['', 'g']
1535 ([], ['g/h', '']) ['', 'g']
1536 >>> r = _rootsdirsandparents(
1536 >>> r = _rootsdirsandparents(
1537 ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
1537 ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
1538 ... (b'path', b'', b'')])
1538 ... (b'path', b'', b'')])
1539 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1539 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1540 (['r', 'p/p', ''], []) ['', 'p']
1540 (['r', 'p/p', ''], []) ['', 'p']
1541 >>> r = _rootsdirsandparents(
1541 >>> r = _rootsdirsandparents(
1542 ... [(b'relglob', b'rg*', b''), (b're', b're/', b''),
1542 ... [(b'relglob', b'rg*', b''), (b're', b're/', b''),
1543 ... (b'relre', b'rr', b'')])
1543 ... (b'relre', b'rr', b'')])
1544 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1544 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1545 (['', '', ''], []) ['']
1545 (['', '', ''], []) ['']
1546 """
1546 """
1547 r, d = _patternrootsanddirs(kindpats)
1547 r, d = _patternrootsanddirs(kindpats)
1548
1548
1549 p = set()
1549 p = set()
1550 # Add the parents as non-recursive/exact directories, since they must be
1550 # Add the parents as non-recursive/exact directories, since they must be
1551 # scanned to get to either the roots or the other exact directories.
1551 # scanned to get to either the roots or the other exact directories.
1552 p.update(pathutil.dirs(d))
1552 p.update(pathutil.dirs(d))
1553 p.update(pathutil.dirs(r))
1553 p.update(pathutil.dirs(r))
1554
1554
1555 # FIXME: all uses of this function convert these to sets, do so before
1555 # FIXME: all uses of this function convert these to sets, do so before
1556 # returning.
1556 # returning.
1557 # FIXME: all uses of this function do not need anything in 'roots' and
1557 # FIXME: all uses of this function do not need anything in 'roots' and
1558 # 'dirs' to also be in 'parents', consider removing them before returning.
1558 # 'dirs' to also be in 'parents', consider removing them before returning.
1559 return r, d, p
1559 return r, d, p
1560
1560
1561
1561
1562 def _explicitfiles(kindpats):
1562 def _explicitfiles(kindpats):
1563 """Returns the potential explicit filenames from the patterns.
1563 """Returns the potential explicit filenames from the patterns.
1564
1564
1565 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
1565 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
1566 ['foo/bar']
1566 ['foo/bar']
1567 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
1567 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
1568 []
1568 []
1569 """
1569 """
1570 # Keep only the pattern kinds where one can specify filenames (vs only
1570 # Keep only the pattern kinds where one can specify filenames (vs only
1571 # directory names).
1571 # directory names).
1572 filable = [kp for kp in kindpats if kp[0] not in (b'rootfilesin',)]
1572 filable = [kp for kp in kindpats if kp[0] not in (b'rootfilesin',)]
1573 return _roots(filable)
1573 return _roots(filable)
1574
1574
1575
1575
1576 def _prefix(kindpats):
1576 def _prefix(kindpats):
1577 '''Whether all the patterns match a prefix (i.e. recursively)'''
1577 '''Whether all the patterns match a prefix (i.e. recursively)'''
1578 for kind, pat, source in kindpats:
1578 for kind, pat, source in kindpats:
1579 if kind not in (b'path', b'relpath'):
1579 if kind not in (b'path', b'relpath'):
1580 return False
1580 return False
1581 return True
1581 return True
1582
1582
1583
1583
1584 _commentre = None
1584 _commentre = None
1585
1585
1586
1586
1587 def readpatternfile(filepath, warn, sourceinfo=False):
1587 def readpatternfile(filepath, warn, sourceinfo=False):
1588 """parse a pattern file, returning a list of
1588 """parse a pattern file, returning a list of
1589 patterns. These patterns should be given to compile()
1589 patterns. These patterns should be given to compile()
1590 to be validated and converted into a match function.
1590 to be validated and converted into a match function.
1591
1591
1592 trailing white space is dropped.
1592 trailing white space is dropped.
1593 the escape character is backslash.
1593 the escape character is backslash.
1594 comments start with #.
1594 comments start with #.
1595 empty lines are skipped.
1595 empty lines are skipped.
1596
1596
1597 lines can be of the following formats:
1597 lines can be of the following formats:
1598
1598
1599 syntax: regexp # defaults following lines to non-rooted regexps
1599 syntax: regexp # defaults following lines to non-rooted regexps
1600 syntax: glob # defaults following lines to non-rooted globs
1600 syntax: glob # defaults following lines to non-rooted globs
1601 re:pattern # non-rooted regular expression
1601 re:pattern # non-rooted regular expression
1602 glob:pattern # non-rooted glob
1602 glob:pattern # non-rooted glob
1603 rootglob:pat # rooted glob (same root as ^ in regexps)
1603 rootglob:pat # rooted glob (same root as ^ in regexps)
1604 pattern # pattern of the current default type
1604 pattern # pattern of the current default type
1605
1605
1606 if sourceinfo is set, returns a list of tuples:
1606 if sourceinfo is set, returns a list of tuples:
1607 (pattern, lineno, originalline).
1607 (pattern, lineno, originalline).
1608 This is useful to debug ignore patterns.
1608 This is useful to debug ignore patterns.
1609 """
1609 """
1610
1610
1611 syntaxes = {
1611 syntaxes = {
1612 b're': b'relre:',
1612 b're': b'relre:',
1613 b'regexp': b'relre:',
1613 b'regexp': b'relre:',
1614 b'glob': b'relglob:',
1614 b'glob': b'relglob:',
1615 b'rootglob': b'rootglob:',
1615 b'rootglob': b'rootglob:',
1616 b'include': b'include',
1616 b'include': b'include',
1617 b'subinclude': b'subinclude',
1617 b'subinclude': b'subinclude',
1618 }
1618 }
1619 syntax = b'relre:'
1619 syntax = b'relre:'
1620 patterns = []
1620 patterns = []
1621
1621
1622 fp = open(filepath, b'rb')
1622 fp = open(filepath, b'rb')
1623 for lineno, line in enumerate(fp, start=1):
1623 for lineno, line in enumerate(fp, start=1):
1624 if b"#" in line:
1624 if b"#" in line:
1625 global _commentre
1625 global _commentre
1626 if not _commentre:
1626 if not _commentre:
1627 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
1627 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
1628 # remove comments prefixed by an even number of escapes
1628 # remove comments prefixed by an even number of escapes
1629 m = _commentre.search(line)
1629 m = _commentre.search(line)
1630 if m:
1630 if m:
1631 line = line[: m.end(1)]
1631 line = line[: m.end(1)]
1632 # fixup properly escaped comments that survived the above
1632 # fixup properly escaped comments that survived the above
1633 line = line.replace(b"\\#", b"#")
1633 line = line.replace(b"\\#", b"#")
1634 line = line.rstrip()
1634 line = line.rstrip()
1635 if not line:
1635 if not line:
1636 continue
1636 continue
1637
1637
1638 if line.startswith(b'syntax:'):
1638 if line.startswith(b'syntax:'):
1639 s = line[7:].strip()
1639 s = line[7:].strip()
1640 try:
1640 try:
1641 syntax = syntaxes[s]
1641 syntax = syntaxes[s]
1642 except KeyError:
1642 except KeyError:
1643 if warn:
1643 if warn:
1644 warn(
1644 warn(
1645 _(b"%s: ignoring invalid syntax '%s'\n") % (filepath, s)
1645 _(b"%s: ignoring invalid syntax '%s'\n") % (filepath, s)
1646 )
1646 )
1647 continue
1647 continue
1648
1648
1649 linesyntax = syntax
1649 linesyntax = syntax
1650 for s, rels in syntaxes.items():
1650 for s, rels in syntaxes.items():
1651 if line.startswith(rels):
1651 if line.startswith(rels):
1652 linesyntax = rels
1652 linesyntax = rels
1653 line = line[len(rels) :]
1653 line = line[len(rels) :]
1654 break
1654 break
1655 elif line.startswith(s + b':'):
1655 elif line.startswith(s + b':'):
1656 linesyntax = rels
1656 linesyntax = rels
1657 line = line[len(s) + 1 :]
1657 line = line[len(s) + 1 :]
1658 break
1658 break
1659 if sourceinfo:
1659 if sourceinfo:
1660 patterns.append((linesyntax + line, lineno, line))
1660 patterns.append((linesyntax + line, lineno, line))
1661 else:
1661 else:
1662 patterns.append(linesyntax + line)
1662 patterns.append(linesyntax + line)
1663 fp.close()
1663 fp.close()
1664 return patterns
1664 return patterns
General Comments 0
You need to be logged in to leave comments. Login now