##// END OF EJS Templates
match: skip walking up the directory hierarchy if the number of pats are small...
Kyle Lippincott -
r46614:c4c7a6b6 default
parent child Browse files
Show More
@@ -1,1617 +1,1637 b''
1 # match.py - filename matching
1 # match.py - filename matching
2 #
2 #
3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import, print_function
8 from __future__ import absolute_import, print_function
9
9
10 import copy
10 import copy
11 import itertools
11 import itertools
12 import os
12 import os
13 import re
13 import re
14
14
15 from .i18n import _
15 from .i18n import _
16 from .pycompat import open
16 from .pycompat import open
17 from . import (
17 from . import (
18 encoding,
18 encoding,
19 error,
19 error,
20 pathutil,
20 pathutil,
21 policy,
21 policy,
22 pycompat,
22 pycompat,
23 util,
23 util,
24 )
24 )
25 from .utils import stringutil
25 from .utils import stringutil
26
26
27 rustmod = policy.importrust('dirstate')
27 rustmod = policy.importrust('dirstate')
28
28
29 allpatternkinds = (
29 allpatternkinds = (
30 b're',
30 b're',
31 b'glob',
31 b'glob',
32 b'path',
32 b'path',
33 b'relglob',
33 b'relglob',
34 b'relpath',
34 b'relpath',
35 b'relre',
35 b'relre',
36 b'rootglob',
36 b'rootglob',
37 b'listfile',
37 b'listfile',
38 b'listfile0',
38 b'listfile0',
39 b'set',
39 b'set',
40 b'include',
40 b'include',
41 b'subinclude',
41 b'subinclude',
42 b'rootfilesin',
42 b'rootfilesin',
43 )
43 )
44 cwdrelativepatternkinds = (b'relpath', b'glob')
44 cwdrelativepatternkinds = (b'relpath', b'glob')
45
45
46 propertycache = util.propertycache
46 propertycache = util.propertycache
47
47
48
48
49 def _rematcher(regex):
49 def _rematcher(regex):
50 """compile the regexp with the best available regexp engine and return a
50 """compile the regexp with the best available regexp engine and return a
51 matcher function"""
51 matcher function"""
52 m = util.re.compile(regex)
52 m = util.re.compile(regex)
53 try:
53 try:
54 # slightly faster, provided by facebook's re2 bindings
54 # slightly faster, provided by facebook's re2 bindings
55 return m.test_match
55 return m.test_match
56 except AttributeError:
56 except AttributeError:
57 return m.match
57 return m.match
58
58
59
59
60 def _expandsets(cwd, kindpats, ctx=None, listsubrepos=False, badfn=None):
60 def _expandsets(cwd, kindpats, ctx=None, listsubrepos=False, badfn=None):
61 '''Returns the kindpats list with the 'set' patterns expanded to matchers'''
61 '''Returns the kindpats list with the 'set' patterns expanded to matchers'''
62 matchers = []
62 matchers = []
63 other = []
63 other = []
64
64
65 for kind, pat, source in kindpats:
65 for kind, pat, source in kindpats:
66 if kind == b'set':
66 if kind == b'set':
67 if ctx is None:
67 if ctx is None:
68 raise error.ProgrammingError(
68 raise error.ProgrammingError(
69 b"fileset expression with no context"
69 b"fileset expression with no context"
70 )
70 )
71 matchers.append(ctx.matchfileset(cwd, pat, badfn=badfn))
71 matchers.append(ctx.matchfileset(cwd, pat, badfn=badfn))
72
72
73 if listsubrepos:
73 if listsubrepos:
74 for subpath in ctx.substate:
74 for subpath in ctx.substate:
75 sm = ctx.sub(subpath).matchfileset(cwd, pat, badfn=badfn)
75 sm = ctx.sub(subpath).matchfileset(cwd, pat, badfn=badfn)
76 pm = prefixdirmatcher(subpath, sm, badfn=badfn)
76 pm = prefixdirmatcher(subpath, sm, badfn=badfn)
77 matchers.append(pm)
77 matchers.append(pm)
78
78
79 continue
79 continue
80 other.append((kind, pat, source))
80 other.append((kind, pat, source))
81 return matchers, other
81 return matchers, other
82
82
83
83
84 def _expandsubinclude(kindpats, root):
84 def _expandsubinclude(kindpats, root):
85 """Returns the list of subinclude matcher args and the kindpats without the
85 """Returns the list of subinclude matcher args and the kindpats without the
86 subincludes in it."""
86 subincludes in it."""
87 relmatchers = []
87 relmatchers = []
88 other = []
88 other = []
89
89
90 for kind, pat, source in kindpats:
90 for kind, pat, source in kindpats:
91 if kind == b'subinclude':
91 if kind == b'subinclude':
92 sourceroot = pathutil.dirname(util.normpath(source))
92 sourceroot = pathutil.dirname(util.normpath(source))
93 pat = util.pconvert(pat)
93 pat = util.pconvert(pat)
94 path = pathutil.join(sourceroot, pat)
94 path = pathutil.join(sourceroot, pat)
95
95
96 newroot = pathutil.dirname(path)
96 newroot = pathutil.dirname(path)
97 matcherargs = (newroot, b'', [], [b'include:%s' % path])
97 matcherargs = (newroot, b'', [], [b'include:%s' % path])
98
98
99 prefix = pathutil.canonpath(root, root, newroot)
99 prefix = pathutil.canonpath(root, root, newroot)
100 if prefix:
100 if prefix:
101 prefix += b'/'
101 prefix += b'/'
102 relmatchers.append((prefix, matcherargs))
102 relmatchers.append((prefix, matcherargs))
103 else:
103 else:
104 other.append((kind, pat, source))
104 other.append((kind, pat, source))
105
105
106 return relmatchers, other
106 return relmatchers, other
107
107
108
108
109 def _kindpatsalwaysmatch(kindpats):
109 def _kindpatsalwaysmatch(kindpats):
110 """Checks whether the kindspats match everything, as e.g.
110 """Checks whether the kindspats match everything, as e.g.
111 'relpath:.' does.
111 'relpath:.' does.
112 """
112 """
113 for kind, pat, source in kindpats:
113 for kind, pat, source in kindpats:
114 if pat != b'' or kind not in [b'relpath', b'glob']:
114 if pat != b'' or kind not in [b'relpath', b'glob']:
115 return False
115 return False
116 return True
116 return True
117
117
118
118
119 def _buildkindpatsmatcher(
119 def _buildkindpatsmatcher(
120 matchercls,
120 matchercls,
121 root,
121 root,
122 cwd,
122 cwd,
123 kindpats,
123 kindpats,
124 ctx=None,
124 ctx=None,
125 listsubrepos=False,
125 listsubrepos=False,
126 badfn=None,
126 badfn=None,
127 ):
127 ):
128 matchers = []
128 matchers = []
129 fms, kindpats = _expandsets(
129 fms, kindpats = _expandsets(
130 cwd,
130 cwd,
131 kindpats,
131 kindpats,
132 ctx=ctx,
132 ctx=ctx,
133 listsubrepos=listsubrepos,
133 listsubrepos=listsubrepos,
134 badfn=badfn,
134 badfn=badfn,
135 )
135 )
136 if kindpats:
136 if kindpats:
137 m = matchercls(root, kindpats, badfn=badfn)
137 m = matchercls(root, kindpats, badfn=badfn)
138 matchers.append(m)
138 matchers.append(m)
139 if fms:
139 if fms:
140 matchers.extend(fms)
140 matchers.extend(fms)
141 if not matchers:
141 if not matchers:
142 return nevermatcher(badfn=badfn)
142 return nevermatcher(badfn=badfn)
143 if len(matchers) == 1:
143 if len(matchers) == 1:
144 return matchers[0]
144 return matchers[0]
145 return unionmatcher(matchers)
145 return unionmatcher(matchers)
146
146
147
147
148 def match(
148 def match(
149 root,
149 root,
150 cwd,
150 cwd,
151 patterns=None,
151 patterns=None,
152 include=None,
152 include=None,
153 exclude=None,
153 exclude=None,
154 default=b'glob',
154 default=b'glob',
155 auditor=None,
155 auditor=None,
156 ctx=None,
156 ctx=None,
157 listsubrepos=False,
157 listsubrepos=False,
158 warn=None,
158 warn=None,
159 badfn=None,
159 badfn=None,
160 icasefs=False,
160 icasefs=False,
161 ):
161 ):
162 r"""build an object to match a set of file patterns
162 r"""build an object to match a set of file patterns
163
163
164 arguments:
164 arguments:
165 root - the canonical root of the tree you're matching against
165 root - the canonical root of the tree you're matching against
166 cwd - the current working directory, if relevant
166 cwd - the current working directory, if relevant
167 patterns - patterns to find
167 patterns - patterns to find
168 include - patterns to include (unless they are excluded)
168 include - patterns to include (unless they are excluded)
169 exclude - patterns to exclude (even if they are included)
169 exclude - patterns to exclude (even if they are included)
170 default - if a pattern in patterns has no explicit type, assume this one
170 default - if a pattern in patterns has no explicit type, assume this one
171 auditor - optional path auditor
171 auditor - optional path auditor
172 ctx - optional changecontext
172 ctx - optional changecontext
173 listsubrepos - if True, recurse into subrepositories
173 listsubrepos - if True, recurse into subrepositories
174 warn - optional function used for printing warnings
174 warn - optional function used for printing warnings
175 badfn - optional bad() callback for this matcher instead of the default
175 badfn - optional bad() callback for this matcher instead of the default
176 icasefs - make a matcher for wdir on case insensitive filesystems, which
176 icasefs - make a matcher for wdir on case insensitive filesystems, which
177 normalizes the given patterns to the case in the filesystem
177 normalizes the given patterns to the case in the filesystem
178
178
179 a pattern is one of:
179 a pattern is one of:
180 'glob:<glob>' - a glob relative to cwd
180 'glob:<glob>' - a glob relative to cwd
181 're:<regexp>' - a regular expression
181 're:<regexp>' - a regular expression
182 'path:<path>' - a path relative to repository root, which is matched
182 'path:<path>' - a path relative to repository root, which is matched
183 recursively
183 recursively
184 'rootfilesin:<path>' - a path relative to repository root, which is
184 'rootfilesin:<path>' - a path relative to repository root, which is
185 matched non-recursively (will not match subdirectories)
185 matched non-recursively (will not match subdirectories)
186 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
186 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
187 'relpath:<path>' - a path relative to cwd
187 'relpath:<path>' - a path relative to cwd
188 'relre:<regexp>' - a regexp that needn't match the start of a name
188 'relre:<regexp>' - a regexp that needn't match the start of a name
189 'set:<fileset>' - a fileset expression
189 'set:<fileset>' - a fileset expression
190 'include:<path>' - a file of patterns to read and include
190 'include:<path>' - a file of patterns to read and include
191 'subinclude:<path>' - a file of patterns to match against files under
191 'subinclude:<path>' - a file of patterns to match against files under
192 the same directory
192 the same directory
193 '<something>' - a pattern of the specified default type
193 '<something>' - a pattern of the specified default type
194
194
195 >>> def _match(root, *args, **kwargs):
195 >>> def _match(root, *args, **kwargs):
196 ... return match(util.localpath(root), *args, **kwargs)
196 ... return match(util.localpath(root), *args, **kwargs)
197
197
198 Usually a patternmatcher is returned:
198 Usually a patternmatcher is returned:
199 >>> _match(b'/foo', b'.', [b're:.*\.c$', b'path:foo/a', b'*.py'])
199 >>> _match(b'/foo', b'.', [b're:.*\.c$', b'path:foo/a', b'*.py'])
200 <patternmatcher patterns='.*\\.c$|foo/a(?:/|$)|[^/]*\\.py$'>
200 <patternmatcher patterns='.*\\.c$|foo/a(?:/|$)|[^/]*\\.py$'>
201
201
202 Combining 'patterns' with 'include' (resp. 'exclude') gives an
202 Combining 'patterns' with 'include' (resp. 'exclude') gives an
203 intersectionmatcher (resp. a differencematcher):
203 intersectionmatcher (resp. a differencematcher):
204 >>> type(_match(b'/foo', b'.', [b're:.*\.c$'], include=[b'path:lib']))
204 >>> type(_match(b'/foo', b'.', [b're:.*\.c$'], include=[b'path:lib']))
205 <class 'mercurial.match.intersectionmatcher'>
205 <class 'mercurial.match.intersectionmatcher'>
206 >>> type(_match(b'/foo', b'.', [b're:.*\.c$'], exclude=[b'path:build']))
206 >>> type(_match(b'/foo', b'.', [b're:.*\.c$'], exclude=[b'path:build']))
207 <class 'mercurial.match.differencematcher'>
207 <class 'mercurial.match.differencematcher'>
208
208
209 Notice that, if 'patterns' is empty, an alwaysmatcher is returned:
209 Notice that, if 'patterns' is empty, an alwaysmatcher is returned:
210 >>> _match(b'/foo', b'.', [])
210 >>> _match(b'/foo', b'.', [])
211 <alwaysmatcher>
211 <alwaysmatcher>
212
212
213 The 'default' argument determines which kind of pattern is assumed if a
213 The 'default' argument determines which kind of pattern is assumed if a
214 pattern has no prefix:
214 pattern has no prefix:
215 >>> _match(b'/foo', b'.', [b'.*\.c$'], default=b're')
215 >>> _match(b'/foo', b'.', [b'.*\.c$'], default=b're')
216 <patternmatcher patterns='.*\\.c$'>
216 <patternmatcher patterns='.*\\.c$'>
217 >>> _match(b'/foo', b'.', [b'main.py'], default=b'relpath')
217 >>> _match(b'/foo', b'.', [b'main.py'], default=b'relpath')
218 <patternmatcher patterns='main\\.py(?:/|$)'>
218 <patternmatcher patterns='main\\.py(?:/|$)'>
219 >>> _match(b'/foo', b'.', [b'main.py'], default=b're')
219 >>> _match(b'/foo', b'.', [b'main.py'], default=b're')
220 <patternmatcher patterns='main.py'>
220 <patternmatcher patterns='main.py'>
221
221
222 The primary use of matchers is to check whether a value (usually a file
222 The primary use of matchers is to check whether a value (usually a file
223 name) matches againset one of the patterns given at initialization. There
223 name) matches againset one of the patterns given at initialization. There
224 are two ways of doing this check.
224 are two ways of doing this check.
225
225
226 >>> m = _match(b'/foo', b'', [b're:.*\.c$', b'relpath:a'])
226 >>> m = _match(b'/foo', b'', [b're:.*\.c$', b'relpath:a'])
227
227
228 1. Calling the matcher with a file name returns True if any pattern
228 1. Calling the matcher with a file name returns True if any pattern
229 matches that file name:
229 matches that file name:
230 >>> m(b'a')
230 >>> m(b'a')
231 True
231 True
232 >>> m(b'main.c')
232 >>> m(b'main.c')
233 True
233 True
234 >>> m(b'test.py')
234 >>> m(b'test.py')
235 False
235 False
236
236
237 2. Using the exact() method only returns True if the file name matches one
237 2. Using the exact() method only returns True if the file name matches one
238 of the exact patterns (i.e. not re: or glob: patterns):
238 of the exact patterns (i.e. not re: or glob: patterns):
239 >>> m.exact(b'a')
239 >>> m.exact(b'a')
240 True
240 True
241 >>> m.exact(b'main.c')
241 >>> m.exact(b'main.c')
242 False
242 False
243 """
243 """
244 assert os.path.isabs(root)
244 assert os.path.isabs(root)
245 cwd = os.path.join(root, util.localpath(cwd))
245 cwd = os.path.join(root, util.localpath(cwd))
246 normalize = _donormalize
246 normalize = _donormalize
247 if icasefs:
247 if icasefs:
248 dirstate = ctx.repo().dirstate
248 dirstate = ctx.repo().dirstate
249 dsnormalize = dirstate.normalize
249 dsnormalize = dirstate.normalize
250
250
251 def normalize(patterns, default, root, cwd, auditor, warn):
251 def normalize(patterns, default, root, cwd, auditor, warn):
252 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
252 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
253 kindpats = []
253 kindpats = []
254 for kind, pats, source in kp:
254 for kind, pats, source in kp:
255 if kind not in (b're', b'relre'): # regex can't be normalized
255 if kind not in (b're', b'relre'): # regex can't be normalized
256 p = pats
256 p = pats
257 pats = dsnormalize(pats)
257 pats = dsnormalize(pats)
258
258
259 # Preserve the original to handle a case only rename.
259 # Preserve the original to handle a case only rename.
260 if p != pats and p in dirstate:
260 if p != pats and p in dirstate:
261 kindpats.append((kind, p, source))
261 kindpats.append((kind, p, source))
262
262
263 kindpats.append((kind, pats, source))
263 kindpats.append((kind, pats, source))
264 return kindpats
264 return kindpats
265
265
266 if patterns:
266 if patterns:
267 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
267 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
268 if _kindpatsalwaysmatch(kindpats):
268 if _kindpatsalwaysmatch(kindpats):
269 m = alwaysmatcher(badfn)
269 m = alwaysmatcher(badfn)
270 else:
270 else:
271 m = _buildkindpatsmatcher(
271 m = _buildkindpatsmatcher(
272 patternmatcher,
272 patternmatcher,
273 root,
273 root,
274 cwd,
274 cwd,
275 kindpats,
275 kindpats,
276 ctx=ctx,
276 ctx=ctx,
277 listsubrepos=listsubrepos,
277 listsubrepos=listsubrepos,
278 badfn=badfn,
278 badfn=badfn,
279 )
279 )
280 else:
280 else:
281 # It's a little strange that no patterns means to match everything.
281 # It's a little strange that no patterns means to match everything.
282 # Consider changing this to match nothing (probably using nevermatcher).
282 # Consider changing this to match nothing (probably using nevermatcher).
283 m = alwaysmatcher(badfn)
283 m = alwaysmatcher(badfn)
284
284
285 if include:
285 if include:
286 kindpats = normalize(include, b'glob', root, cwd, auditor, warn)
286 kindpats = normalize(include, b'glob', root, cwd, auditor, warn)
287 im = _buildkindpatsmatcher(
287 im = _buildkindpatsmatcher(
288 includematcher,
288 includematcher,
289 root,
289 root,
290 cwd,
290 cwd,
291 kindpats,
291 kindpats,
292 ctx=ctx,
292 ctx=ctx,
293 listsubrepos=listsubrepos,
293 listsubrepos=listsubrepos,
294 badfn=None,
294 badfn=None,
295 )
295 )
296 m = intersectmatchers(m, im)
296 m = intersectmatchers(m, im)
297 if exclude:
297 if exclude:
298 kindpats = normalize(exclude, b'glob', root, cwd, auditor, warn)
298 kindpats = normalize(exclude, b'glob', root, cwd, auditor, warn)
299 em = _buildkindpatsmatcher(
299 em = _buildkindpatsmatcher(
300 includematcher,
300 includematcher,
301 root,
301 root,
302 cwd,
302 cwd,
303 kindpats,
303 kindpats,
304 ctx=ctx,
304 ctx=ctx,
305 listsubrepos=listsubrepos,
305 listsubrepos=listsubrepos,
306 badfn=None,
306 badfn=None,
307 )
307 )
308 m = differencematcher(m, em)
308 m = differencematcher(m, em)
309 return m
309 return m
310
310
311
311
312 def exact(files, badfn=None):
312 def exact(files, badfn=None):
313 return exactmatcher(files, badfn=badfn)
313 return exactmatcher(files, badfn=badfn)
314
314
315
315
316 def always(badfn=None):
316 def always(badfn=None):
317 return alwaysmatcher(badfn)
317 return alwaysmatcher(badfn)
318
318
319
319
320 def never(badfn=None):
320 def never(badfn=None):
321 return nevermatcher(badfn)
321 return nevermatcher(badfn)
322
322
323
323
324 def badmatch(match, badfn):
324 def badmatch(match, badfn):
325 """Make a copy of the given matcher, replacing its bad method with the given
325 """Make a copy of the given matcher, replacing its bad method with the given
326 one.
326 one.
327 """
327 """
328 m = copy.copy(match)
328 m = copy.copy(match)
329 m.bad = badfn
329 m.bad = badfn
330 return m
330 return m
331
331
332
332
333 def _donormalize(patterns, default, root, cwd, auditor=None, warn=None):
333 def _donormalize(patterns, default, root, cwd, auditor=None, warn=None):
334 """Convert 'kind:pat' from the patterns list to tuples with kind and
334 """Convert 'kind:pat' from the patterns list to tuples with kind and
335 normalized and rooted patterns and with listfiles expanded."""
335 normalized and rooted patterns and with listfiles expanded."""
336 kindpats = []
336 kindpats = []
337 for kind, pat in [_patsplit(p, default) for p in patterns]:
337 for kind, pat in [_patsplit(p, default) for p in patterns]:
338 if kind in cwdrelativepatternkinds:
338 if kind in cwdrelativepatternkinds:
339 pat = pathutil.canonpath(root, cwd, pat, auditor=auditor)
339 pat = pathutil.canonpath(root, cwd, pat, auditor=auditor)
340 elif kind in (b'relglob', b'path', b'rootfilesin', b'rootglob'):
340 elif kind in (b'relglob', b'path', b'rootfilesin', b'rootglob'):
341 pat = util.normpath(pat)
341 pat = util.normpath(pat)
342 elif kind in (b'listfile', b'listfile0'):
342 elif kind in (b'listfile', b'listfile0'):
343 try:
343 try:
344 files = util.readfile(pat)
344 files = util.readfile(pat)
345 if kind == b'listfile0':
345 if kind == b'listfile0':
346 files = files.split(b'\0')
346 files = files.split(b'\0')
347 else:
347 else:
348 files = files.splitlines()
348 files = files.splitlines()
349 files = [f for f in files if f]
349 files = [f for f in files if f]
350 except EnvironmentError:
350 except EnvironmentError:
351 raise error.Abort(_(b"unable to read file list (%s)") % pat)
351 raise error.Abort(_(b"unable to read file list (%s)") % pat)
352 for k, p, source in _donormalize(
352 for k, p, source in _donormalize(
353 files, default, root, cwd, auditor, warn
353 files, default, root, cwd, auditor, warn
354 ):
354 ):
355 kindpats.append((k, p, pat))
355 kindpats.append((k, p, pat))
356 continue
356 continue
357 elif kind == b'include':
357 elif kind == b'include':
358 try:
358 try:
359 fullpath = os.path.join(root, util.localpath(pat))
359 fullpath = os.path.join(root, util.localpath(pat))
360 includepats = readpatternfile(fullpath, warn)
360 includepats = readpatternfile(fullpath, warn)
361 for k, p, source in _donormalize(
361 for k, p, source in _donormalize(
362 includepats, default, root, cwd, auditor, warn
362 includepats, default, root, cwd, auditor, warn
363 ):
363 ):
364 kindpats.append((k, p, source or pat))
364 kindpats.append((k, p, source or pat))
365 except error.Abort as inst:
365 except error.Abort as inst:
366 raise error.Abort(
366 raise error.Abort(
367 b'%s: %s'
367 b'%s: %s'
368 % (
368 % (
369 pat,
369 pat,
370 inst.message,
370 inst.message,
371 ) # pytype: disable=unsupported-operands
371 ) # pytype: disable=unsupported-operands
372 )
372 )
373 except IOError as inst:
373 except IOError as inst:
374 if warn:
374 if warn:
375 warn(
375 warn(
376 _(b"skipping unreadable pattern file '%s': %s\n")
376 _(b"skipping unreadable pattern file '%s': %s\n")
377 % (pat, stringutil.forcebytestr(inst.strerror))
377 % (pat, stringutil.forcebytestr(inst.strerror))
378 )
378 )
379 continue
379 continue
380 # else: re or relre - which cannot be normalized
380 # else: re or relre - which cannot be normalized
381 kindpats.append((kind, pat, b''))
381 kindpats.append((kind, pat, b''))
382 return kindpats
382 return kindpats
383
383
384
384
385 class basematcher(object):
385 class basematcher(object):
386 def __init__(self, badfn=None):
386 def __init__(self, badfn=None):
387 if badfn is not None:
387 if badfn is not None:
388 self.bad = badfn
388 self.bad = badfn
389
389
390 def __call__(self, fn):
390 def __call__(self, fn):
391 return self.matchfn(fn)
391 return self.matchfn(fn)
392
392
393 # Callbacks related to how the matcher is used by dirstate.walk.
393 # Callbacks related to how the matcher is used by dirstate.walk.
394 # Subscribers to these events must monkeypatch the matcher object.
394 # Subscribers to these events must monkeypatch the matcher object.
395 def bad(self, f, msg):
395 def bad(self, f, msg):
396 """Callback from dirstate.walk for each explicit file that can't be
396 """Callback from dirstate.walk for each explicit file that can't be
397 found/accessed, with an error message."""
397 found/accessed, with an error message."""
398
398
399 # If an traversedir is set, it will be called when a directory discovered
399 # If an traversedir is set, it will be called when a directory discovered
400 # by recursive traversal is visited.
400 # by recursive traversal is visited.
401 traversedir = None
401 traversedir = None
402
402
403 @propertycache
403 @propertycache
404 def _files(self):
404 def _files(self):
405 return []
405 return []
406
406
407 def files(self):
407 def files(self):
408 """Explicitly listed files or patterns or roots:
408 """Explicitly listed files or patterns or roots:
409 if no patterns or .always(): empty list,
409 if no patterns or .always(): empty list,
410 if exact: list exact files,
410 if exact: list exact files,
411 if not .anypats(): list all files and dirs,
411 if not .anypats(): list all files and dirs,
412 else: optimal roots"""
412 else: optimal roots"""
413 return self._files
413 return self._files
414
414
415 @propertycache
415 @propertycache
416 def _fileset(self):
416 def _fileset(self):
417 return set(self._files)
417 return set(self._files)
418
418
419 def exact(self, f):
419 def exact(self, f):
420 '''Returns True if f is in .files().'''
420 '''Returns True if f is in .files().'''
421 return f in self._fileset
421 return f in self._fileset
422
422
423 def matchfn(self, f):
423 def matchfn(self, f):
424 return False
424 return False
425
425
426 def visitdir(self, dir):
426 def visitdir(self, dir):
427 """Decides whether a directory should be visited based on whether it
427 """Decides whether a directory should be visited based on whether it
428 has potential matches in it or one of its subdirectories. This is
428 has potential matches in it or one of its subdirectories. This is
429 based on the match's primary, included, and excluded patterns.
429 based on the match's primary, included, and excluded patterns.
430
430
431 Returns the string 'all' if the given directory and all subdirectories
431 Returns the string 'all' if the given directory and all subdirectories
432 should be visited. Otherwise returns True or False indicating whether
432 should be visited. Otherwise returns True or False indicating whether
433 the given directory should be visited.
433 the given directory should be visited.
434 """
434 """
435 return True
435 return True
436
436
437 def visitchildrenset(self, dir):
437 def visitchildrenset(self, dir):
438 """Decides whether a directory should be visited based on whether it
438 """Decides whether a directory should be visited based on whether it
439 has potential matches in it or one of its subdirectories, and
439 has potential matches in it or one of its subdirectories, and
440 potentially lists which subdirectories of that directory should be
440 potentially lists which subdirectories of that directory should be
441 visited. This is based on the match's primary, included, and excluded
441 visited. This is based on the match's primary, included, and excluded
442 patterns.
442 patterns.
443
443
444 This function is very similar to 'visitdir', and the following mapping
444 This function is very similar to 'visitdir', and the following mapping
445 can be applied:
445 can be applied:
446
446
447 visitdir | visitchildrenlist
447 visitdir | visitchildrenlist
448 ----------+-------------------
448 ----------+-------------------
449 False | set()
449 False | set()
450 'all' | 'all'
450 'all' | 'all'
451 True | 'this' OR non-empty set of subdirs -or files- to visit
451 True | 'this' OR non-empty set of subdirs -or files- to visit
452
452
453 Example:
453 Example:
454 Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return
454 Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return
455 the following values (assuming the implementation of visitchildrenset
455 the following values (assuming the implementation of visitchildrenset
456 is capable of recognizing this; some implementations are not).
456 is capable of recognizing this; some implementations are not).
457
457
458 '' -> {'foo', 'qux'}
458 '' -> {'foo', 'qux'}
459 'baz' -> set()
459 'baz' -> set()
460 'foo' -> {'bar'}
460 'foo' -> {'bar'}
461 # Ideally this would be 'all', but since the prefix nature of matchers
461 # Ideally this would be 'all', but since the prefix nature of matchers
462 # is applied to the entire matcher, we have to downgrade this to
462 # is applied to the entire matcher, we have to downgrade this to
463 # 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed
463 # 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed
464 # in.
464 # in.
465 'foo/bar' -> 'this'
465 'foo/bar' -> 'this'
466 'qux' -> 'this'
466 'qux' -> 'this'
467
467
468 Important:
468 Important:
469 Most matchers do not know if they're representing files or
469 Most matchers do not know if they're representing files or
470 directories. They see ['path:dir/f'] and don't know whether 'f' is a
470 directories. They see ['path:dir/f'] and don't know whether 'f' is a
471 file or a directory, so visitchildrenset('dir') for most matchers will
471 file or a directory, so visitchildrenset('dir') for most matchers will
472 return {'f'}, but if the matcher knows it's a file (like exactmatcher
472 return {'f'}, but if the matcher knows it's a file (like exactmatcher
473 does), it may return 'this'. Do not rely on the return being a set
473 does), it may return 'this'. Do not rely on the return being a set
474 indicating that there are no files in this dir to investigate (or
474 indicating that there are no files in this dir to investigate (or
475 equivalently that if there are files to investigate in 'dir' that it
475 equivalently that if there are files to investigate in 'dir' that it
476 will always return 'this').
476 will always return 'this').
477 """
477 """
478 return b'this'
478 return b'this'
479
479
480 def always(self):
480 def always(self):
481 """Matcher will match everything and .files() will be empty --
481 """Matcher will match everything and .files() will be empty --
482 optimization might be possible."""
482 optimization might be possible."""
483 return False
483 return False
484
484
485 def isexact(self):
485 def isexact(self):
486 """Matcher will match exactly the list of files in .files() --
486 """Matcher will match exactly the list of files in .files() --
487 optimization might be possible."""
487 optimization might be possible."""
488 return False
488 return False
489
489
490 def prefix(self):
490 def prefix(self):
491 """Matcher will match the paths in .files() recursively --
491 """Matcher will match the paths in .files() recursively --
492 optimization might be possible."""
492 optimization might be possible."""
493 return False
493 return False
494
494
495 def anypats(self):
495 def anypats(self):
496 """None of .always(), .isexact(), and .prefix() is true --
496 """None of .always(), .isexact(), and .prefix() is true --
497 optimizations will be difficult."""
497 optimizations will be difficult."""
498 return not self.always() and not self.isexact() and not self.prefix()
498 return not self.always() and not self.isexact() and not self.prefix()
499
499
500
500
501 class alwaysmatcher(basematcher):
501 class alwaysmatcher(basematcher):
502 '''Matches everything.'''
502 '''Matches everything.'''
503
503
504 def __init__(self, badfn=None):
504 def __init__(self, badfn=None):
505 super(alwaysmatcher, self).__init__(badfn)
505 super(alwaysmatcher, self).__init__(badfn)
506
506
507 def always(self):
507 def always(self):
508 return True
508 return True
509
509
510 def matchfn(self, f):
510 def matchfn(self, f):
511 return True
511 return True
512
512
513 def visitdir(self, dir):
513 def visitdir(self, dir):
514 return b'all'
514 return b'all'
515
515
516 def visitchildrenset(self, dir):
516 def visitchildrenset(self, dir):
517 return b'all'
517 return b'all'
518
518
519 def __repr__(self):
519 def __repr__(self):
520 return r'<alwaysmatcher>'
520 return r'<alwaysmatcher>'
521
521
522
522
523 class nevermatcher(basematcher):
523 class nevermatcher(basematcher):
524 '''Matches nothing.'''
524 '''Matches nothing.'''
525
525
526 def __init__(self, badfn=None):
526 def __init__(self, badfn=None):
527 super(nevermatcher, self).__init__(badfn)
527 super(nevermatcher, self).__init__(badfn)
528
528
529 # It's a little weird to say that the nevermatcher is an exact matcher
529 # It's a little weird to say that the nevermatcher is an exact matcher
530 # or a prefix matcher, but it seems to make sense to let callers take
530 # or a prefix matcher, but it seems to make sense to let callers take
531 # fast paths based on either. There will be no exact matches, nor any
531 # fast paths based on either. There will be no exact matches, nor any
532 # prefixes (files() returns []), so fast paths iterating over them should
532 # prefixes (files() returns []), so fast paths iterating over them should
533 # be efficient (and correct).
533 # be efficient (and correct).
534 def isexact(self):
534 def isexact(self):
535 return True
535 return True
536
536
537 def prefix(self):
537 def prefix(self):
538 return True
538 return True
539
539
540 def visitdir(self, dir):
540 def visitdir(self, dir):
541 return False
541 return False
542
542
543 def visitchildrenset(self, dir):
543 def visitchildrenset(self, dir):
544 return set()
544 return set()
545
545
546 def __repr__(self):
546 def __repr__(self):
547 return r'<nevermatcher>'
547 return r'<nevermatcher>'
548
548
549
549
550 class predicatematcher(basematcher):
550 class predicatematcher(basematcher):
551 """A matcher adapter for a simple boolean function"""
551 """A matcher adapter for a simple boolean function"""
552
552
553 def __init__(self, predfn, predrepr=None, badfn=None):
553 def __init__(self, predfn, predrepr=None, badfn=None):
554 super(predicatematcher, self).__init__(badfn)
554 super(predicatematcher, self).__init__(badfn)
555 self.matchfn = predfn
555 self.matchfn = predfn
556 self._predrepr = predrepr
556 self._predrepr = predrepr
557
557
558 @encoding.strmethod
558 @encoding.strmethod
559 def __repr__(self):
559 def __repr__(self):
560 s = stringutil.buildrepr(self._predrepr) or pycompat.byterepr(
560 s = stringutil.buildrepr(self._predrepr) or pycompat.byterepr(
561 self.matchfn
561 self.matchfn
562 )
562 )
563 return b'<predicatenmatcher pred=%s>' % s
563 return b'<predicatenmatcher pred=%s>' % s
564
564
565
565
566 def path_or_parents_in_set(path, prefix_set):
567 """Returns True if `path` (or any parent of `path`) is in `prefix_set`."""
568 l = len(prefix_set)
569 if l == 0:
570 return False
571 if path in prefix_set:
572 return True
573 # If there's more than 5 paths in prefix_set, it's *probably* quicker to
574 # "walk up" the directory hierarchy instead, with the assumption that most
575 # directory hierarchies are relatively shallow and hash lookup is cheap.
576 if l > 5:
577 return any(
578 parentdir in prefix_set for parentdir in pathutil.finddirs(path)
579 )
580
581 # FIXME: Ideally we'd never get to this point if this is the case - we'd
582 # recognize ourselves as an 'always' matcher and skip this.
583 if b'' in prefix_set:
584 return True
585
586 if pycompat.ispy3:
587 sl = ord(b'/')
588 else:
589 sl = '/'
590
591 # We already checked that path isn't in prefix_set exactly, so
592 # `path[len(pf)] should never raise IndexError.
593 return any(path.startswith(pf) and path[len(pf)] == sl for pf in prefix_set)
594
595
566 class patternmatcher(basematcher):
596 class patternmatcher(basematcher):
567 r"""Matches a set of (kind, pat, source) against a 'root' directory.
597 r"""Matches a set of (kind, pat, source) against a 'root' directory.
568
598
569 >>> kindpats = [
599 >>> kindpats = [
570 ... (b're', br'.*\.c$', b''),
600 ... (b're', br'.*\.c$', b''),
571 ... (b'path', b'foo/a', b''),
601 ... (b'path', b'foo/a', b''),
572 ... (b'relpath', b'b', b''),
602 ... (b'relpath', b'b', b''),
573 ... (b'glob', b'*.h', b''),
603 ... (b'glob', b'*.h', b''),
574 ... ]
604 ... ]
575 >>> m = patternmatcher(b'foo', kindpats)
605 >>> m = patternmatcher(b'foo', kindpats)
576 >>> m(b'main.c') # matches re:.*\.c$
606 >>> m(b'main.c') # matches re:.*\.c$
577 True
607 True
578 >>> m(b'b.txt')
608 >>> m(b'b.txt')
579 False
609 False
580 >>> m(b'foo/a') # matches path:foo/a
610 >>> m(b'foo/a') # matches path:foo/a
581 True
611 True
582 >>> m(b'a') # does not match path:b, since 'root' is 'foo'
612 >>> m(b'a') # does not match path:b, since 'root' is 'foo'
583 False
613 False
584 >>> m(b'b') # matches relpath:b, since 'root' is 'foo'
614 >>> m(b'b') # matches relpath:b, since 'root' is 'foo'
585 True
615 True
586 >>> m(b'lib.h') # matches glob:*.h
616 >>> m(b'lib.h') # matches glob:*.h
587 True
617 True
588
618
589 >>> m.files()
619 >>> m.files()
590 ['', 'foo/a', 'b', '']
620 ['', 'foo/a', 'b', '']
591 >>> m.exact(b'foo/a')
621 >>> m.exact(b'foo/a')
592 True
622 True
593 >>> m.exact(b'b')
623 >>> m.exact(b'b')
594 True
624 True
595 >>> m.exact(b'lib.h') # exact matches are for (rel)path kinds
625 >>> m.exact(b'lib.h') # exact matches are for (rel)path kinds
596 False
626 False
597 """
627 """
598
628
599 def __init__(self, root, kindpats, badfn=None):
629 def __init__(self, root, kindpats, badfn=None):
600 super(patternmatcher, self).__init__(badfn)
630 super(patternmatcher, self).__init__(badfn)
601
631
602 self._files = _explicitfiles(kindpats)
632 self._files = _explicitfiles(kindpats)
603 self._prefix = _prefix(kindpats)
633 self._prefix = _prefix(kindpats)
604 self._pats, self.matchfn = _buildmatch(kindpats, b'$', root)
634 self._pats, self.matchfn = _buildmatch(kindpats, b'$', root)
605
635
606 @propertycache
636 @propertycache
607 def _dirs(self):
637 def _dirs(self):
608 return set(pathutil.dirs(self._fileset))
638 return set(pathutil.dirs(self._fileset))
609
639
610 def visitdir(self, dir):
640 def visitdir(self, dir):
611 if self._prefix and dir in self._fileset:
641 if self._prefix and dir in self._fileset:
612 return b'all'
642 return b'all'
613 return (
643 return (
614 dir in self._fileset
644 dir in self._dirs
615 or dir in self._dirs
645 or path_or_parents_in_set(dir, self._fileset)
616 or any(
617 parentdir in self._fileset
618 for parentdir in pathutil.finddirs(dir)
619 )
620 )
646 )
621
647
622 def visitchildrenset(self, dir):
648 def visitchildrenset(self, dir):
623 ret = self.visitdir(dir)
649 ret = self.visitdir(dir)
624 if ret is True:
650 if ret is True:
625 return b'this'
651 return b'this'
626 elif not ret:
652 elif not ret:
627 return set()
653 return set()
628 assert ret == b'all'
654 assert ret == b'all'
629 return b'all'
655 return b'all'
630
656
631 def prefix(self):
657 def prefix(self):
632 return self._prefix
658 return self._prefix
633
659
634 @encoding.strmethod
660 @encoding.strmethod
635 def __repr__(self):
661 def __repr__(self):
636 return b'<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats)
662 return b'<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats)
637
663
638
664
639 # This is basically a reimplementation of pathutil.dirs that stores the
665 # This is basically a reimplementation of pathutil.dirs that stores the
640 # children instead of just a count of them, plus a small optional optimization
666 # children instead of just a count of them, plus a small optional optimization
641 # to avoid some directories we don't need.
667 # to avoid some directories we don't need.
642 class _dirchildren(object):
668 class _dirchildren(object):
643 def __init__(self, paths, onlyinclude=None):
669 def __init__(self, paths, onlyinclude=None):
644 self._dirs = {}
670 self._dirs = {}
645 self._onlyinclude = onlyinclude or []
671 self._onlyinclude = onlyinclude or []
646 addpath = self.addpath
672 addpath = self.addpath
647 for f in paths:
673 for f in paths:
648 addpath(f)
674 addpath(f)
649
675
650 def addpath(self, path):
676 def addpath(self, path):
651 if path == b'':
677 if path == b'':
652 return
678 return
653 dirs = self._dirs
679 dirs = self._dirs
654 findsplitdirs = _dirchildren._findsplitdirs
680 findsplitdirs = _dirchildren._findsplitdirs
655 for d, b in findsplitdirs(path):
681 for d, b in findsplitdirs(path):
656 if d not in self._onlyinclude:
682 if d not in self._onlyinclude:
657 continue
683 continue
658 dirs.setdefault(d, set()).add(b)
684 dirs.setdefault(d, set()).add(b)
659
685
660 @staticmethod
686 @staticmethod
661 def _findsplitdirs(path):
687 def _findsplitdirs(path):
662 # yields (dirname, basename) tuples, walking back to the root. This is
688 # yields (dirname, basename) tuples, walking back to the root. This is
663 # very similar to pathutil.finddirs, except:
689 # very similar to pathutil.finddirs, except:
664 # - produces a (dirname, basename) tuple, not just 'dirname'
690 # - produces a (dirname, basename) tuple, not just 'dirname'
665 # Unlike manifest._splittopdir, this does not suffix `dirname` with a
691 # Unlike manifest._splittopdir, this does not suffix `dirname` with a
666 # slash.
692 # slash.
667 oldpos = len(path)
693 oldpos = len(path)
668 pos = path.rfind(b'/')
694 pos = path.rfind(b'/')
669 while pos != -1:
695 while pos != -1:
670 yield path[:pos], path[pos + 1 : oldpos]
696 yield path[:pos], path[pos + 1 : oldpos]
671 oldpos = pos
697 oldpos = pos
672 pos = path.rfind(b'/', 0, pos)
698 pos = path.rfind(b'/', 0, pos)
673 yield b'', path[:oldpos]
699 yield b'', path[:oldpos]
674
700
675 def get(self, path):
701 def get(self, path):
676 return self._dirs.get(path, set())
702 return self._dirs.get(path, set())
677
703
678
704
679 class includematcher(basematcher):
705 class includematcher(basematcher):
680 def __init__(self, root, kindpats, badfn=None):
706 def __init__(self, root, kindpats, badfn=None):
681 super(includematcher, self).__init__(badfn)
707 super(includematcher, self).__init__(badfn)
682 if rustmod is not None:
708 if rustmod is not None:
683 # We need to pass the patterns to Rust because they can contain
709 # We need to pass the patterns to Rust because they can contain
684 # patterns from the user interface
710 # patterns from the user interface
685 self._kindpats = kindpats
711 self._kindpats = kindpats
686 self._pats, self.matchfn = _buildmatch(kindpats, b'(?:/|$)', root)
712 self._pats, self.matchfn = _buildmatch(kindpats, b'(?:/|$)', root)
687 self._prefix = _prefix(kindpats)
713 self._prefix = _prefix(kindpats)
688 roots, dirs, parents = _rootsdirsandparents(kindpats)
714 roots, dirs, parents = _rootsdirsandparents(kindpats)
689 # roots are directories which are recursively included.
715 # roots are directories which are recursively included.
690 self._roots = set(roots)
716 self._roots = set(roots)
691 # dirs are directories which are non-recursively included.
717 # dirs are directories which are non-recursively included.
692 self._dirs = set(dirs)
718 self._dirs = set(dirs)
693 # parents are directories which are non-recursively included because
719 # parents are directories which are non-recursively included because
694 # they are needed to get to items in _dirs or _roots.
720 # they are needed to get to items in _dirs or _roots.
695 self._parents = parents
721 self._parents = parents
696
722
697 def visitdir(self, dir):
723 def visitdir(self, dir):
698 if self._prefix and dir in self._roots:
724 if self._prefix and dir in self._roots:
699 return b'all'
725 return b'all'
700 return (
726 return (
701 dir in self._roots
727 dir in self._dirs
702 or dir in self._dirs
703 or dir in self._parents
728 or dir in self._parents
704 or any(
729 or path_or_parents_in_set(dir, self._roots)
705 parentdir in self._roots for parentdir in pathutil.finddirs(dir)
706 )
707 )
730 )
708
731
709 @propertycache
732 @propertycache
710 def _allparentschildren(self):
733 def _allparentschildren(self):
711 # It may seem odd that we add dirs, roots, and parents, and then
734 # It may seem odd that we add dirs, roots, and parents, and then
712 # restrict to only parents. This is to catch the case of:
735 # restrict to only parents. This is to catch the case of:
713 # dirs = ['foo/bar']
736 # dirs = ['foo/bar']
714 # parents = ['foo']
737 # parents = ['foo']
715 # if we asked for the children of 'foo', but had only added
738 # if we asked for the children of 'foo', but had only added
716 # self._parents, we wouldn't be able to respond ['bar'].
739 # self._parents, we wouldn't be able to respond ['bar'].
717 return _dirchildren(
740 return _dirchildren(
718 itertools.chain(self._dirs, self._roots, self._parents),
741 itertools.chain(self._dirs, self._roots, self._parents),
719 onlyinclude=self._parents,
742 onlyinclude=self._parents,
720 )
743 )
721
744
722 def visitchildrenset(self, dir):
745 def visitchildrenset(self, dir):
723 if self._prefix and dir in self._roots:
746 if self._prefix and dir in self._roots:
724 return b'all'
747 return b'all'
725 # Note: this does *not* include the 'dir in self._parents' case from
748 # Note: this does *not* include the 'dir in self._parents' case from
726 # visitdir, that's handled below.
749 # visitdir, that's handled below.
727 if (
750 if (
728 b'' in self._roots
751 b'' in self._roots
729 or dir in self._roots
730 or dir in self._dirs
752 or dir in self._dirs
731 or any(
753 or path_or_parents_in_set(dir, self._roots)
732 parentdir in self._roots for parentdir in pathutil.finddirs(dir)
733 )
734 ):
754 ):
735 return b'this'
755 return b'this'
736
756
737 if dir in self._parents:
757 if dir in self._parents:
738 return self._allparentschildren.get(dir) or set()
758 return self._allparentschildren.get(dir) or set()
739 return set()
759 return set()
740
760
741 @encoding.strmethod
761 @encoding.strmethod
742 def __repr__(self):
762 def __repr__(self):
743 return b'<includematcher includes=%r>' % pycompat.bytestr(self._pats)
763 return b'<includematcher includes=%r>' % pycompat.bytestr(self._pats)
744
764
745
765
746 class exactmatcher(basematcher):
766 class exactmatcher(basematcher):
747 r"""Matches the input files exactly. They are interpreted as paths, not
767 r"""Matches the input files exactly. They are interpreted as paths, not
748 patterns (so no kind-prefixes).
768 patterns (so no kind-prefixes).
749
769
750 >>> m = exactmatcher([b'a.txt', br're:.*\.c$'])
770 >>> m = exactmatcher([b'a.txt', br're:.*\.c$'])
751 >>> m(b'a.txt')
771 >>> m(b'a.txt')
752 True
772 True
753 >>> m(b'b.txt')
773 >>> m(b'b.txt')
754 False
774 False
755
775
756 Input files that would be matched are exactly those returned by .files()
776 Input files that would be matched are exactly those returned by .files()
757 >>> m.files()
777 >>> m.files()
758 ['a.txt', 're:.*\\.c$']
778 ['a.txt', 're:.*\\.c$']
759
779
760 So pattern 're:.*\.c$' is not considered as a regex, but as a file name
780 So pattern 're:.*\.c$' is not considered as a regex, but as a file name
761 >>> m(b'main.c')
781 >>> m(b'main.c')
762 False
782 False
763 >>> m(br're:.*\.c$')
783 >>> m(br're:.*\.c$')
764 True
784 True
765 """
785 """
766
786
767 def __init__(self, files, badfn=None):
787 def __init__(self, files, badfn=None):
768 super(exactmatcher, self).__init__(badfn)
788 super(exactmatcher, self).__init__(badfn)
769
789
770 if isinstance(files, list):
790 if isinstance(files, list):
771 self._files = files
791 self._files = files
772 else:
792 else:
773 self._files = list(files)
793 self._files = list(files)
774
794
775 matchfn = basematcher.exact
795 matchfn = basematcher.exact
776
796
777 @propertycache
797 @propertycache
778 def _dirs(self):
798 def _dirs(self):
779 return set(pathutil.dirs(self._fileset))
799 return set(pathutil.dirs(self._fileset))
780
800
781 def visitdir(self, dir):
801 def visitdir(self, dir):
782 return dir in self._dirs
802 return dir in self._dirs
783
803
784 def visitchildrenset(self, dir):
804 def visitchildrenset(self, dir):
785 if not self._fileset or dir not in self._dirs:
805 if not self._fileset or dir not in self._dirs:
786 return set()
806 return set()
787
807
788 candidates = self._fileset | self._dirs - {b''}
808 candidates = self._fileset | self._dirs - {b''}
789 if dir != b'':
809 if dir != b'':
790 d = dir + b'/'
810 d = dir + b'/'
791 candidates = {c[len(d) :] for c in candidates if c.startswith(d)}
811 candidates = {c[len(d) :] for c in candidates if c.startswith(d)}
792 # self._dirs includes all of the directories, recursively, so if
812 # self._dirs includes all of the directories, recursively, so if
793 # we're attempting to match foo/bar/baz.txt, it'll have '', 'foo',
813 # we're attempting to match foo/bar/baz.txt, it'll have '', 'foo',
794 # 'foo/bar' in it. Thus we can safely ignore a candidate that has a
814 # 'foo/bar' in it. Thus we can safely ignore a candidate that has a
795 # '/' in it, indicating a it's for a subdir-of-a-subdir; the
815 # '/' in it, indicating a it's for a subdir-of-a-subdir; the
796 # immediate subdir will be in there without a slash.
816 # immediate subdir will be in there without a slash.
797 ret = {c for c in candidates if b'/' not in c}
817 ret = {c for c in candidates if b'/' not in c}
798 # We really do not expect ret to be empty, since that would imply that
818 # We really do not expect ret to be empty, since that would imply that
799 # there's something in _dirs that didn't have a file in _fileset.
819 # there's something in _dirs that didn't have a file in _fileset.
800 assert ret
820 assert ret
801 return ret
821 return ret
802
822
803 def isexact(self):
823 def isexact(self):
804 return True
824 return True
805
825
806 @encoding.strmethod
826 @encoding.strmethod
807 def __repr__(self):
827 def __repr__(self):
808 return b'<exactmatcher files=%r>' % self._files
828 return b'<exactmatcher files=%r>' % self._files
809
829
810
830
811 class differencematcher(basematcher):
831 class differencematcher(basematcher):
812 """Composes two matchers by matching if the first matches and the second
832 """Composes two matchers by matching if the first matches and the second
813 does not.
833 does not.
814
834
815 The second matcher's non-matching-attributes (bad, traversedir) are ignored.
835 The second matcher's non-matching-attributes (bad, traversedir) are ignored.
816 """
836 """
817
837
818 def __init__(self, m1, m2):
838 def __init__(self, m1, m2):
819 super(differencematcher, self).__init__()
839 super(differencematcher, self).__init__()
820 self._m1 = m1
840 self._m1 = m1
821 self._m2 = m2
841 self._m2 = m2
822 self.bad = m1.bad
842 self.bad = m1.bad
823 self.traversedir = m1.traversedir
843 self.traversedir = m1.traversedir
824
844
825 def matchfn(self, f):
845 def matchfn(self, f):
826 return self._m1(f) and not self._m2(f)
846 return self._m1(f) and not self._m2(f)
827
847
828 @propertycache
848 @propertycache
829 def _files(self):
849 def _files(self):
830 if self.isexact():
850 if self.isexact():
831 return [f for f in self._m1.files() if self(f)]
851 return [f for f in self._m1.files() if self(f)]
832 # If m1 is not an exact matcher, we can't easily figure out the set of
852 # If m1 is not an exact matcher, we can't easily figure out the set of
833 # files, because its files() are not always files. For example, if
853 # files, because its files() are not always files. For example, if
834 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
854 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
835 # want to remove "dir" from the set even though it would match m2,
855 # want to remove "dir" from the set even though it would match m2,
836 # because the "dir" in m1 may not be a file.
856 # because the "dir" in m1 may not be a file.
837 return self._m1.files()
857 return self._m1.files()
838
858
839 def visitdir(self, dir):
859 def visitdir(self, dir):
840 if self._m2.visitdir(dir) == b'all':
860 if self._m2.visitdir(dir) == b'all':
841 return False
861 return False
842 elif not self._m2.visitdir(dir):
862 elif not self._m2.visitdir(dir):
843 # m2 does not match dir, we can return 'all' here if possible
863 # m2 does not match dir, we can return 'all' here if possible
844 return self._m1.visitdir(dir)
864 return self._m1.visitdir(dir)
845 return bool(self._m1.visitdir(dir))
865 return bool(self._m1.visitdir(dir))
846
866
847 def visitchildrenset(self, dir):
867 def visitchildrenset(self, dir):
848 m2_set = self._m2.visitchildrenset(dir)
868 m2_set = self._m2.visitchildrenset(dir)
849 if m2_set == b'all':
869 if m2_set == b'all':
850 return set()
870 return set()
851 m1_set = self._m1.visitchildrenset(dir)
871 m1_set = self._m1.visitchildrenset(dir)
852 # Possible values for m1: 'all', 'this', set(...), set()
872 # Possible values for m1: 'all', 'this', set(...), set()
853 # Possible values for m2: 'this', set(...), set()
873 # Possible values for m2: 'this', set(...), set()
854 # If m2 has nothing under here that we care about, return m1, even if
874 # If m2 has nothing under here that we care about, return m1, even if
855 # it's 'all'. This is a change in behavior from visitdir, which would
875 # it's 'all'. This is a change in behavior from visitdir, which would
856 # return True, not 'all', for some reason.
876 # return True, not 'all', for some reason.
857 if not m2_set:
877 if not m2_set:
858 return m1_set
878 return m1_set
859 if m1_set in [b'all', b'this']:
879 if m1_set in [b'all', b'this']:
860 # Never return 'all' here if m2_set is any kind of non-empty (either
880 # Never return 'all' here if m2_set is any kind of non-empty (either
861 # 'this' or set(foo)), since m2 might return set() for a
881 # 'this' or set(foo)), since m2 might return set() for a
862 # subdirectory.
882 # subdirectory.
863 return b'this'
883 return b'this'
864 # Possible values for m1: set(...), set()
884 # Possible values for m1: set(...), set()
865 # Possible values for m2: 'this', set(...)
885 # Possible values for m2: 'this', set(...)
866 # We ignore m2's set results. They're possibly incorrect:
886 # We ignore m2's set results. They're possibly incorrect:
867 # m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset(''):
887 # m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset(''):
868 # m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd
888 # m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd
869 # return set(), which is *not* correct, we still need to visit 'dir'!
889 # return set(), which is *not* correct, we still need to visit 'dir'!
870 return m1_set
890 return m1_set
871
891
872 def isexact(self):
892 def isexact(self):
873 return self._m1.isexact()
893 return self._m1.isexact()
874
894
875 @encoding.strmethod
895 @encoding.strmethod
876 def __repr__(self):
896 def __repr__(self):
877 return b'<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2)
897 return b'<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2)
878
898
879
899
880 def intersectmatchers(m1, m2):
900 def intersectmatchers(m1, m2):
881 """Composes two matchers by matching if both of them match.
901 """Composes two matchers by matching if both of them match.
882
902
883 The second matcher's non-matching-attributes (bad, traversedir) are ignored.
903 The second matcher's non-matching-attributes (bad, traversedir) are ignored.
884 """
904 """
885 if m1 is None or m2 is None:
905 if m1 is None or m2 is None:
886 return m1 or m2
906 return m1 or m2
887 if m1.always():
907 if m1.always():
888 m = copy.copy(m2)
908 m = copy.copy(m2)
889 # TODO: Consider encapsulating these things in a class so there's only
909 # TODO: Consider encapsulating these things in a class so there's only
890 # one thing to copy from m1.
910 # one thing to copy from m1.
891 m.bad = m1.bad
911 m.bad = m1.bad
892 m.traversedir = m1.traversedir
912 m.traversedir = m1.traversedir
893 return m
913 return m
894 if m2.always():
914 if m2.always():
895 m = copy.copy(m1)
915 m = copy.copy(m1)
896 return m
916 return m
897 return intersectionmatcher(m1, m2)
917 return intersectionmatcher(m1, m2)
898
918
899
919
900 class intersectionmatcher(basematcher):
920 class intersectionmatcher(basematcher):
901 def __init__(self, m1, m2):
921 def __init__(self, m1, m2):
902 super(intersectionmatcher, self).__init__()
922 super(intersectionmatcher, self).__init__()
903 self._m1 = m1
923 self._m1 = m1
904 self._m2 = m2
924 self._m2 = m2
905 self.bad = m1.bad
925 self.bad = m1.bad
906 self.traversedir = m1.traversedir
926 self.traversedir = m1.traversedir
907
927
908 @propertycache
928 @propertycache
909 def _files(self):
929 def _files(self):
910 if self.isexact():
930 if self.isexact():
911 m1, m2 = self._m1, self._m2
931 m1, m2 = self._m1, self._m2
912 if not m1.isexact():
932 if not m1.isexact():
913 m1, m2 = m2, m1
933 m1, m2 = m2, m1
914 return [f for f in m1.files() if m2(f)]
934 return [f for f in m1.files() if m2(f)]
915 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
935 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
916 # the set of files, because their files() are not always files. For
936 # the set of files, because their files() are not always files. For
917 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
937 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
918 # "path:dir2", we don't want to remove "dir2" from the set.
938 # "path:dir2", we don't want to remove "dir2" from the set.
919 return self._m1.files() + self._m2.files()
939 return self._m1.files() + self._m2.files()
920
940
921 def matchfn(self, f):
941 def matchfn(self, f):
922 return self._m1(f) and self._m2(f)
942 return self._m1(f) and self._m2(f)
923
943
924 def visitdir(self, dir):
944 def visitdir(self, dir):
925 visit1 = self._m1.visitdir(dir)
945 visit1 = self._m1.visitdir(dir)
926 if visit1 == b'all':
946 if visit1 == b'all':
927 return self._m2.visitdir(dir)
947 return self._m2.visitdir(dir)
928 # bool() because visit1=True + visit2='all' should not be 'all'
948 # bool() because visit1=True + visit2='all' should not be 'all'
929 return bool(visit1 and self._m2.visitdir(dir))
949 return bool(visit1 and self._m2.visitdir(dir))
930
950
931 def visitchildrenset(self, dir):
951 def visitchildrenset(self, dir):
932 m1_set = self._m1.visitchildrenset(dir)
952 m1_set = self._m1.visitchildrenset(dir)
933 if not m1_set:
953 if not m1_set:
934 return set()
954 return set()
935 m2_set = self._m2.visitchildrenset(dir)
955 m2_set = self._m2.visitchildrenset(dir)
936 if not m2_set:
956 if not m2_set:
937 return set()
957 return set()
938
958
939 if m1_set == b'all':
959 if m1_set == b'all':
940 return m2_set
960 return m2_set
941 elif m2_set == b'all':
961 elif m2_set == b'all':
942 return m1_set
962 return m1_set
943
963
944 if m1_set == b'this' or m2_set == b'this':
964 if m1_set == b'this' or m2_set == b'this':
945 return b'this'
965 return b'this'
946
966
947 assert isinstance(m1_set, set) and isinstance(m2_set, set)
967 assert isinstance(m1_set, set) and isinstance(m2_set, set)
948 return m1_set.intersection(m2_set)
968 return m1_set.intersection(m2_set)
949
969
950 def always(self):
970 def always(self):
951 return self._m1.always() and self._m2.always()
971 return self._m1.always() and self._m2.always()
952
972
953 def isexact(self):
973 def isexact(self):
954 return self._m1.isexact() or self._m2.isexact()
974 return self._m1.isexact() or self._m2.isexact()
955
975
956 @encoding.strmethod
976 @encoding.strmethod
957 def __repr__(self):
977 def __repr__(self):
958 return b'<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2)
978 return b'<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2)
959
979
960
980
961 class subdirmatcher(basematcher):
981 class subdirmatcher(basematcher):
962 """Adapt a matcher to work on a subdirectory only.
982 """Adapt a matcher to work on a subdirectory only.
963
983
964 The paths are remapped to remove/insert the path as needed:
984 The paths are remapped to remove/insert the path as needed:
965
985
966 >>> from . import pycompat
986 >>> from . import pycompat
967 >>> m1 = match(util.localpath(b'/root'), b'', [b'a.txt', b'sub/b.txt'], auditor=lambda name: None)
987 >>> m1 = match(util.localpath(b'/root'), b'', [b'a.txt', b'sub/b.txt'], auditor=lambda name: None)
968 >>> m2 = subdirmatcher(b'sub', m1)
988 >>> m2 = subdirmatcher(b'sub', m1)
969 >>> m2(b'a.txt')
989 >>> m2(b'a.txt')
970 False
990 False
971 >>> m2(b'b.txt')
991 >>> m2(b'b.txt')
972 True
992 True
973 >>> m2.matchfn(b'a.txt')
993 >>> m2.matchfn(b'a.txt')
974 False
994 False
975 >>> m2.matchfn(b'b.txt')
995 >>> m2.matchfn(b'b.txt')
976 True
996 True
977 >>> m2.files()
997 >>> m2.files()
978 ['b.txt']
998 ['b.txt']
979 >>> m2.exact(b'b.txt')
999 >>> m2.exact(b'b.txt')
980 True
1000 True
981 >>> def bad(f, msg):
1001 >>> def bad(f, msg):
982 ... print(pycompat.sysstr(b"%s: %s" % (f, msg)))
1002 ... print(pycompat.sysstr(b"%s: %s" % (f, msg)))
983 >>> m1.bad = bad
1003 >>> m1.bad = bad
984 >>> m2.bad(b'x.txt', b'No such file')
1004 >>> m2.bad(b'x.txt', b'No such file')
985 sub/x.txt: No such file
1005 sub/x.txt: No such file
986 """
1006 """
987
1007
988 def __init__(self, path, matcher):
1008 def __init__(self, path, matcher):
989 super(subdirmatcher, self).__init__()
1009 super(subdirmatcher, self).__init__()
990 self._path = path
1010 self._path = path
991 self._matcher = matcher
1011 self._matcher = matcher
992 self._always = matcher.always()
1012 self._always = matcher.always()
993
1013
994 self._files = [
1014 self._files = [
995 f[len(path) + 1 :]
1015 f[len(path) + 1 :]
996 for f in matcher._files
1016 for f in matcher._files
997 if f.startswith(path + b"/")
1017 if f.startswith(path + b"/")
998 ]
1018 ]
999
1019
1000 # If the parent repo had a path to this subrepo and the matcher is
1020 # If the parent repo had a path to this subrepo and the matcher is
1001 # a prefix matcher, this submatcher always matches.
1021 # a prefix matcher, this submatcher always matches.
1002 if matcher.prefix():
1022 if matcher.prefix():
1003 self._always = any(f == path for f in matcher._files)
1023 self._always = any(f == path for f in matcher._files)
1004
1024
1005 def bad(self, f, msg):
1025 def bad(self, f, msg):
1006 self._matcher.bad(self._path + b"/" + f, msg)
1026 self._matcher.bad(self._path + b"/" + f, msg)
1007
1027
1008 def matchfn(self, f):
1028 def matchfn(self, f):
1009 # Some information is lost in the superclass's constructor, so we
1029 # Some information is lost in the superclass's constructor, so we
1010 # can not accurately create the matching function for the subdirectory
1030 # can not accurately create the matching function for the subdirectory
1011 # from the inputs. Instead, we override matchfn() and visitdir() to
1031 # from the inputs. Instead, we override matchfn() and visitdir() to
1012 # call the original matcher with the subdirectory path prepended.
1032 # call the original matcher with the subdirectory path prepended.
1013 return self._matcher.matchfn(self._path + b"/" + f)
1033 return self._matcher.matchfn(self._path + b"/" + f)
1014
1034
1015 def visitdir(self, dir):
1035 def visitdir(self, dir):
1016 if dir == b'':
1036 if dir == b'':
1017 dir = self._path
1037 dir = self._path
1018 else:
1038 else:
1019 dir = self._path + b"/" + dir
1039 dir = self._path + b"/" + dir
1020 return self._matcher.visitdir(dir)
1040 return self._matcher.visitdir(dir)
1021
1041
1022 def visitchildrenset(self, dir):
1042 def visitchildrenset(self, dir):
1023 if dir == b'':
1043 if dir == b'':
1024 dir = self._path
1044 dir = self._path
1025 else:
1045 else:
1026 dir = self._path + b"/" + dir
1046 dir = self._path + b"/" + dir
1027 return self._matcher.visitchildrenset(dir)
1047 return self._matcher.visitchildrenset(dir)
1028
1048
1029 def always(self):
1049 def always(self):
1030 return self._always
1050 return self._always
1031
1051
1032 def prefix(self):
1052 def prefix(self):
1033 return self._matcher.prefix() and not self._always
1053 return self._matcher.prefix() and not self._always
1034
1054
1035 @encoding.strmethod
1055 @encoding.strmethod
1036 def __repr__(self):
1056 def __repr__(self):
1037 return b'<subdirmatcher path=%r, matcher=%r>' % (
1057 return b'<subdirmatcher path=%r, matcher=%r>' % (
1038 self._path,
1058 self._path,
1039 self._matcher,
1059 self._matcher,
1040 )
1060 )
1041
1061
1042
1062
1043 class prefixdirmatcher(basematcher):
1063 class prefixdirmatcher(basematcher):
1044 """Adapt a matcher to work on a parent directory.
1064 """Adapt a matcher to work on a parent directory.
1045
1065
1046 The matcher's non-matching-attributes (bad, traversedir) are ignored.
1066 The matcher's non-matching-attributes (bad, traversedir) are ignored.
1047
1067
1048 The prefix path should usually be the relative path from the root of
1068 The prefix path should usually be the relative path from the root of
1049 this matcher to the root of the wrapped matcher.
1069 this matcher to the root of the wrapped matcher.
1050
1070
1051 >>> m1 = match(util.localpath(b'/root/d/e'), b'f', [b'../a.txt', b'b.txt'], auditor=lambda name: None)
1071 >>> m1 = match(util.localpath(b'/root/d/e'), b'f', [b'../a.txt', b'b.txt'], auditor=lambda name: None)
1052 >>> m2 = prefixdirmatcher(b'd/e', m1)
1072 >>> m2 = prefixdirmatcher(b'd/e', m1)
1053 >>> m2(b'a.txt')
1073 >>> m2(b'a.txt')
1054 False
1074 False
1055 >>> m2(b'd/e/a.txt')
1075 >>> m2(b'd/e/a.txt')
1056 True
1076 True
1057 >>> m2(b'd/e/b.txt')
1077 >>> m2(b'd/e/b.txt')
1058 False
1078 False
1059 >>> m2.files()
1079 >>> m2.files()
1060 ['d/e/a.txt', 'd/e/f/b.txt']
1080 ['d/e/a.txt', 'd/e/f/b.txt']
1061 >>> m2.exact(b'd/e/a.txt')
1081 >>> m2.exact(b'd/e/a.txt')
1062 True
1082 True
1063 >>> m2.visitdir(b'd')
1083 >>> m2.visitdir(b'd')
1064 True
1084 True
1065 >>> m2.visitdir(b'd/e')
1085 >>> m2.visitdir(b'd/e')
1066 True
1086 True
1067 >>> m2.visitdir(b'd/e/f')
1087 >>> m2.visitdir(b'd/e/f')
1068 True
1088 True
1069 >>> m2.visitdir(b'd/e/g')
1089 >>> m2.visitdir(b'd/e/g')
1070 False
1090 False
1071 >>> m2.visitdir(b'd/ef')
1091 >>> m2.visitdir(b'd/ef')
1072 False
1092 False
1073 """
1093 """
1074
1094
1075 def __init__(self, path, matcher, badfn=None):
1095 def __init__(self, path, matcher, badfn=None):
1076 super(prefixdirmatcher, self).__init__(badfn)
1096 super(prefixdirmatcher, self).__init__(badfn)
1077 if not path:
1097 if not path:
1078 raise error.ProgrammingError(b'prefix path must not be empty')
1098 raise error.ProgrammingError(b'prefix path must not be empty')
1079 self._path = path
1099 self._path = path
1080 self._pathprefix = path + b'/'
1100 self._pathprefix = path + b'/'
1081 self._matcher = matcher
1101 self._matcher = matcher
1082
1102
1083 @propertycache
1103 @propertycache
1084 def _files(self):
1104 def _files(self):
1085 return [self._pathprefix + f for f in self._matcher._files]
1105 return [self._pathprefix + f for f in self._matcher._files]
1086
1106
1087 def matchfn(self, f):
1107 def matchfn(self, f):
1088 if not f.startswith(self._pathprefix):
1108 if not f.startswith(self._pathprefix):
1089 return False
1109 return False
1090 return self._matcher.matchfn(f[len(self._pathprefix) :])
1110 return self._matcher.matchfn(f[len(self._pathprefix) :])
1091
1111
1092 @propertycache
1112 @propertycache
1093 def _pathdirs(self):
1113 def _pathdirs(self):
1094 return set(pathutil.finddirs(self._path))
1114 return set(pathutil.finddirs(self._path))
1095
1115
1096 def visitdir(self, dir):
1116 def visitdir(self, dir):
1097 if dir == self._path:
1117 if dir == self._path:
1098 return self._matcher.visitdir(b'')
1118 return self._matcher.visitdir(b'')
1099 if dir.startswith(self._pathprefix):
1119 if dir.startswith(self._pathprefix):
1100 return self._matcher.visitdir(dir[len(self._pathprefix) :])
1120 return self._matcher.visitdir(dir[len(self._pathprefix) :])
1101 return dir in self._pathdirs
1121 return dir in self._pathdirs
1102
1122
1103 def visitchildrenset(self, dir):
1123 def visitchildrenset(self, dir):
1104 if dir == self._path:
1124 if dir == self._path:
1105 return self._matcher.visitchildrenset(b'')
1125 return self._matcher.visitchildrenset(b'')
1106 if dir.startswith(self._pathprefix):
1126 if dir.startswith(self._pathprefix):
1107 return self._matcher.visitchildrenset(dir[len(self._pathprefix) :])
1127 return self._matcher.visitchildrenset(dir[len(self._pathprefix) :])
1108 if dir in self._pathdirs:
1128 if dir in self._pathdirs:
1109 return b'this'
1129 return b'this'
1110 return set()
1130 return set()
1111
1131
1112 def isexact(self):
1132 def isexact(self):
1113 return self._matcher.isexact()
1133 return self._matcher.isexact()
1114
1134
1115 def prefix(self):
1135 def prefix(self):
1116 return self._matcher.prefix()
1136 return self._matcher.prefix()
1117
1137
1118 @encoding.strmethod
1138 @encoding.strmethod
1119 def __repr__(self):
1139 def __repr__(self):
1120 return b'<prefixdirmatcher path=%r, matcher=%r>' % (
1140 return b'<prefixdirmatcher path=%r, matcher=%r>' % (
1121 pycompat.bytestr(self._path),
1141 pycompat.bytestr(self._path),
1122 self._matcher,
1142 self._matcher,
1123 )
1143 )
1124
1144
1125
1145
1126 class unionmatcher(basematcher):
1146 class unionmatcher(basematcher):
1127 """A matcher that is the union of several matchers.
1147 """A matcher that is the union of several matchers.
1128
1148
1129 The non-matching-attributes (bad, traversedir) are taken from the first
1149 The non-matching-attributes (bad, traversedir) are taken from the first
1130 matcher.
1150 matcher.
1131 """
1151 """
1132
1152
1133 def __init__(self, matchers):
1153 def __init__(self, matchers):
1134 m1 = matchers[0]
1154 m1 = matchers[0]
1135 super(unionmatcher, self).__init__()
1155 super(unionmatcher, self).__init__()
1136 self.traversedir = m1.traversedir
1156 self.traversedir = m1.traversedir
1137 self._matchers = matchers
1157 self._matchers = matchers
1138
1158
1139 def matchfn(self, f):
1159 def matchfn(self, f):
1140 for match in self._matchers:
1160 for match in self._matchers:
1141 if match(f):
1161 if match(f):
1142 return True
1162 return True
1143 return False
1163 return False
1144
1164
1145 def visitdir(self, dir):
1165 def visitdir(self, dir):
1146 r = False
1166 r = False
1147 for m in self._matchers:
1167 for m in self._matchers:
1148 v = m.visitdir(dir)
1168 v = m.visitdir(dir)
1149 if v == b'all':
1169 if v == b'all':
1150 return v
1170 return v
1151 r |= v
1171 r |= v
1152 return r
1172 return r
1153
1173
1154 def visitchildrenset(self, dir):
1174 def visitchildrenset(self, dir):
1155 r = set()
1175 r = set()
1156 this = False
1176 this = False
1157 for m in self._matchers:
1177 for m in self._matchers:
1158 v = m.visitchildrenset(dir)
1178 v = m.visitchildrenset(dir)
1159 if not v:
1179 if not v:
1160 continue
1180 continue
1161 if v == b'all':
1181 if v == b'all':
1162 return v
1182 return v
1163 if this or v == b'this':
1183 if this or v == b'this':
1164 this = True
1184 this = True
1165 # don't break, we might have an 'all' in here.
1185 # don't break, we might have an 'all' in here.
1166 continue
1186 continue
1167 assert isinstance(v, set)
1187 assert isinstance(v, set)
1168 r = r.union(v)
1188 r = r.union(v)
1169 if this:
1189 if this:
1170 return b'this'
1190 return b'this'
1171 return r
1191 return r
1172
1192
1173 @encoding.strmethod
1193 @encoding.strmethod
1174 def __repr__(self):
1194 def __repr__(self):
1175 return b'<unionmatcher matchers=%r>' % self._matchers
1195 return b'<unionmatcher matchers=%r>' % self._matchers
1176
1196
1177
1197
1178 def patkind(pattern, default=None):
1198 def patkind(pattern, default=None):
1179 r"""If pattern is 'kind:pat' with a known kind, return kind.
1199 r"""If pattern is 'kind:pat' with a known kind, return kind.
1180
1200
1181 >>> patkind(br're:.*\.c$')
1201 >>> patkind(br're:.*\.c$')
1182 're'
1202 're'
1183 >>> patkind(b'glob:*.c')
1203 >>> patkind(b'glob:*.c')
1184 'glob'
1204 'glob'
1185 >>> patkind(b'relpath:test.py')
1205 >>> patkind(b'relpath:test.py')
1186 'relpath'
1206 'relpath'
1187 >>> patkind(b'main.py')
1207 >>> patkind(b'main.py')
1188 >>> patkind(b'main.py', default=b're')
1208 >>> patkind(b'main.py', default=b're')
1189 're'
1209 're'
1190 """
1210 """
1191 return _patsplit(pattern, default)[0]
1211 return _patsplit(pattern, default)[0]
1192
1212
1193
1213
1194 def _patsplit(pattern, default):
1214 def _patsplit(pattern, default):
1195 """Split a string into the optional pattern kind prefix and the actual
1215 """Split a string into the optional pattern kind prefix and the actual
1196 pattern."""
1216 pattern."""
1197 if b':' in pattern:
1217 if b':' in pattern:
1198 kind, pat = pattern.split(b':', 1)
1218 kind, pat = pattern.split(b':', 1)
1199 if kind in allpatternkinds:
1219 if kind in allpatternkinds:
1200 return kind, pat
1220 return kind, pat
1201 return default, pattern
1221 return default, pattern
1202
1222
1203
1223
1204 def _globre(pat):
1224 def _globre(pat):
1205 r"""Convert an extended glob string to a regexp string.
1225 r"""Convert an extended glob string to a regexp string.
1206
1226
1207 >>> from . import pycompat
1227 >>> from . import pycompat
1208 >>> def bprint(s):
1228 >>> def bprint(s):
1209 ... print(pycompat.sysstr(s))
1229 ... print(pycompat.sysstr(s))
1210 >>> bprint(_globre(br'?'))
1230 >>> bprint(_globre(br'?'))
1211 .
1231 .
1212 >>> bprint(_globre(br'*'))
1232 >>> bprint(_globre(br'*'))
1213 [^/]*
1233 [^/]*
1214 >>> bprint(_globre(br'**'))
1234 >>> bprint(_globre(br'**'))
1215 .*
1235 .*
1216 >>> bprint(_globre(br'**/a'))
1236 >>> bprint(_globre(br'**/a'))
1217 (?:.*/)?a
1237 (?:.*/)?a
1218 >>> bprint(_globre(br'a/**/b'))
1238 >>> bprint(_globre(br'a/**/b'))
1219 a/(?:.*/)?b
1239 a/(?:.*/)?b
1220 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
1240 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
1221 [a*?!^][\^b][^c]
1241 [a*?!^][\^b][^c]
1222 >>> bprint(_globre(br'{a,b}'))
1242 >>> bprint(_globre(br'{a,b}'))
1223 (?:a|b)
1243 (?:a|b)
1224 >>> bprint(_globre(br'.\*\?'))
1244 >>> bprint(_globre(br'.\*\?'))
1225 \.\*\?
1245 \.\*\?
1226 """
1246 """
1227 i, n = 0, len(pat)
1247 i, n = 0, len(pat)
1228 res = b''
1248 res = b''
1229 group = 0
1249 group = 0
1230 escape = util.stringutil.regexbytesescapemap.get
1250 escape = util.stringutil.regexbytesescapemap.get
1231
1251
1232 def peek():
1252 def peek():
1233 return i < n and pat[i : i + 1]
1253 return i < n and pat[i : i + 1]
1234
1254
1235 while i < n:
1255 while i < n:
1236 c = pat[i : i + 1]
1256 c = pat[i : i + 1]
1237 i += 1
1257 i += 1
1238 if c not in b'*?[{},\\':
1258 if c not in b'*?[{},\\':
1239 res += escape(c, c)
1259 res += escape(c, c)
1240 elif c == b'*':
1260 elif c == b'*':
1241 if peek() == b'*':
1261 if peek() == b'*':
1242 i += 1
1262 i += 1
1243 if peek() == b'/':
1263 if peek() == b'/':
1244 i += 1
1264 i += 1
1245 res += b'(?:.*/)?'
1265 res += b'(?:.*/)?'
1246 else:
1266 else:
1247 res += b'.*'
1267 res += b'.*'
1248 else:
1268 else:
1249 res += b'[^/]*'
1269 res += b'[^/]*'
1250 elif c == b'?':
1270 elif c == b'?':
1251 res += b'.'
1271 res += b'.'
1252 elif c == b'[':
1272 elif c == b'[':
1253 j = i
1273 j = i
1254 if j < n and pat[j : j + 1] in b'!]':
1274 if j < n and pat[j : j + 1] in b'!]':
1255 j += 1
1275 j += 1
1256 while j < n and pat[j : j + 1] != b']':
1276 while j < n and pat[j : j + 1] != b']':
1257 j += 1
1277 j += 1
1258 if j >= n:
1278 if j >= n:
1259 res += b'\\['
1279 res += b'\\['
1260 else:
1280 else:
1261 stuff = pat[i:j].replace(b'\\', b'\\\\')
1281 stuff = pat[i:j].replace(b'\\', b'\\\\')
1262 i = j + 1
1282 i = j + 1
1263 if stuff[0:1] == b'!':
1283 if stuff[0:1] == b'!':
1264 stuff = b'^' + stuff[1:]
1284 stuff = b'^' + stuff[1:]
1265 elif stuff[0:1] == b'^':
1285 elif stuff[0:1] == b'^':
1266 stuff = b'\\' + stuff
1286 stuff = b'\\' + stuff
1267 res = b'%s[%s]' % (res, stuff)
1287 res = b'%s[%s]' % (res, stuff)
1268 elif c == b'{':
1288 elif c == b'{':
1269 group += 1
1289 group += 1
1270 res += b'(?:'
1290 res += b'(?:'
1271 elif c == b'}' and group:
1291 elif c == b'}' and group:
1272 res += b')'
1292 res += b')'
1273 group -= 1
1293 group -= 1
1274 elif c == b',' and group:
1294 elif c == b',' and group:
1275 res += b'|'
1295 res += b'|'
1276 elif c == b'\\':
1296 elif c == b'\\':
1277 p = peek()
1297 p = peek()
1278 if p:
1298 if p:
1279 i += 1
1299 i += 1
1280 res += escape(p, p)
1300 res += escape(p, p)
1281 else:
1301 else:
1282 res += escape(c, c)
1302 res += escape(c, c)
1283 else:
1303 else:
1284 res += escape(c, c)
1304 res += escape(c, c)
1285 return res
1305 return res
1286
1306
1287
1307
1288 def _regex(kind, pat, globsuffix):
1308 def _regex(kind, pat, globsuffix):
1289 """Convert a (normalized) pattern of any kind into a
1309 """Convert a (normalized) pattern of any kind into a
1290 regular expression.
1310 regular expression.
1291 globsuffix is appended to the regexp of globs."""
1311 globsuffix is appended to the regexp of globs."""
1292 if not pat and kind in (b'glob', b'relpath'):
1312 if not pat and kind in (b'glob', b'relpath'):
1293 return b''
1313 return b''
1294 if kind == b're':
1314 if kind == b're':
1295 return pat
1315 return pat
1296 if kind in (b'path', b'relpath'):
1316 if kind in (b'path', b'relpath'):
1297 if pat == b'.':
1317 if pat == b'.':
1298 return b''
1318 return b''
1299 return util.stringutil.reescape(pat) + b'(?:/|$)'
1319 return util.stringutil.reescape(pat) + b'(?:/|$)'
1300 if kind == b'rootfilesin':
1320 if kind == b'rootfilesin':
1301 if pat == b'.':
1321 if pat == b'.':
1302 escaped = b''
1322 escaped = b''
1303 else:
1323 else:
1304 # Pattern is a directory name.
1324 # Pattern is a directory name.
1305 escaped = util.stringutil.reescape(pat) + b'/'
1325 escaped = util.stringutil.reescape(pat) + b'/'
1306 # Anything after the pattern must be a non-directory.
1326 # Anything after the pattern must be a non-directory.
1307 return escaped + b'[^/]+$'
1327 return escaped + b'[^/]+$'
1308 if kind == b'relglob':
1328 if kind == b'relglob':
1309 globre = _globre(pat)
1329 globre = _globre(pat)
1310 if globre.startswith(b'[^/]*'):
1330 if globre.startswith(b'[^/]*'):
1311 # When pat has the form *XYZ (common), make the returned regex more
1331 # When pat has the form *XYZ (common), make the returned regex more
1312 # legible by returning the regex for **XYZ instead of **/*XYZ.
1332 # legible by returning the regex for **XYZ instead of **/*XYZ.
1313 return b'.*' + globre[len(b'[^/]*') :] + globsuffix
1333 return b'.*' + globre[len(b'[^/]*') :] + globsuffix
1314 return b'(?:|.*/)' + globre + globsuffix
1334 return b'(?:|.*/)' + globre + globsuffix
1315 if kind == b'relre':
1335 if kind == b'relre':
1316 if pat.startswith(b'^'):
1336 if pat.startswith(b'^'):
1317 return pat
1337 return pat
1318 return b'.*' + pat
1338 return b'.*' + pat
1319 if kind in (b'glob', b'rootglob'):
1339 if kind in (b'glob', b'rootglob'):
1320 return _globre(pat) + globsuffix
1340 return _globre(pat) + globsuffix
1321 raise error.ProgrammingError(b'not a regex pattern: %s:%s' % (kind, pat))
1341 raise error.ProgrammingError(b'not a regex pattern: %s:%s' % (kind, pat))
1322
1342
1323
1343
1324 def _buildmatch(kindpats, globsuffix, root):
1344 def _buildmatch(kindpats, globsuffix, root):
1325 """Return regexp string and a matcher function for kindpats.
1345 """Return regexp string and a matcher function for kindpats.
1326 globsuffix is appended to the regexp of globs."""
1346 globsuffix is appended to the regexp of globs."""
1327 matchfuncs = []
1347 matchfuncs = []
1328
1348
1329 subincludes, kindpats = _expandsubinclude(kindpats, root)
1349 subincludes, kindpats = _expandsubinclude(kindpats, root)
1330 if subincludes:
1350 if subincludes:
1331 submatchers = {}
1351 submatchers = {}
1332
1352
1333 def matchsubinclude(f):
1353 def matchsubinclude(f):
1334 for prefix, matcherargs in subincludes:
1354 for prefix, matcherargs in subincludes:
1335 if f.startswith(prefix):
1355 if f.startswith(prefix):
1336 mf = submatchers.get(prefix)
1356 mf = submatchers.get(prefix)
1337 if mf is None:
1357 if mf is None:
1338 mf = match(*matcherargs)
1358 mf = match(*matcherargs)
1339 submatchers[prefix] = mf
1359 submatchers[prefix] = mf
1340
1360
1341 if mf(f[len(prefix) :]):
1361 if mf(f[len(prefix) :]):
1342 return True
1362 return True
1343 return False
1363 return False
1344
1364
1345 matchfuncs.append(matchsubinclude)
1365 matchfuncs.append(matchsubinclude)
1346
1366
1347 regex = b''
1367 regex = b''
1348 if kindpats:
1368 if kindpats:
1349 if all(k == b'rootfilesin' for k, p, s in kindpats):
1369 if all(k == b'rootfilesin' for k, p, s in kindpats):
1350 dirs = {p for k, p, s in kindpats}
1370 dirs = {p for k, p, s in kindpats}
1351
1371
1352 def mf(f):
1372 def mf(f):
1353 i = f.rfind(b'/')
1373 i = f.rfind(b'/')
1354 if i >= 0:
1374 if i >= 0:
1355 dir = f[:i]
1375 dir = f[:i]
1356 else:
1376 else:
1357 dir = b'.'
1377 dir = b'.'
1358 return dir in dirs
1378 return dir in dirs
1359
1379
1360 regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))
1380 regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))
1361 matchfuncs.append(mf)
1381 matchfuncs.append(mf)
1362 else:
1382 else:
1363 regex, mf = _buildregexmatch(kindpats, globsuffix)
1383 regex, mf = _buildregexmatch(kindpats, globsuffix)
1364 matchfuncs.append(mf)
1384 matchfuncs.append(mf)
1365
1385
1366 if len(matchfuncs) == 1:
1386 if len(matchfuncs) == 1:
1367 return regex, matchfuncs[0]
1387 return regex, matchfuncs[0]
1368 else:
1388 else:
1369 return regex, lambda f: any(mf(f) for mf in matchfuncs)
1389 return regex, lambda f: any(mf(f) for mf in matchfuncs)
1370
1390
1371
1391
1372 MAX_RE_SIZE = 20000
1392 MAX_RE_SIZE = 20000
1373
1393
1374
1394
1375 def _joinregexes(regexps):
1395 def _joinregexes(regexps):
1376 """gather multiple regular expressions into a single one"""
1396 """gather multiple regular expressions into a single one"""
1377 return b'|'.join(regexps)
1397 return b'|'.join(regexps)
1378
1398
1379
1399
1380 def _buildregexmatch(kindpats, globsuffix):
1400 def _buildregexmatch(kindpats, globsuffix):
1381 """Build a match function from a list of kinds and kindpats,
1401 """Build a match function from a list of kinds and kindpats,
1382 return regexp string and a matcher function.
1402 return regexp string and a matcher function.
1383
1403
1384 Test too large input
1404 Test too large input
1385 >>> _buildregexmatch([
1405 >>> _buildregexmatch([
1386 ... (b'relglob', b'?' * MAX_RE_SIZE, b'')
1406 ... (b'relglob', b'?' * MAX_RE_SIZE, b'')
1387 ... ], b'$')
1407 ... ], b'$')
1388 Traceback (most recent call last):
1408 Traceback (most recent call last):
1389 ...
1409 ...
1390 Abort: matcher pattern is too long (20009 bytes)
1410 Abort: matcher pattern is too long (20009 bytes)
1391 """
1411 """
1392 try:
1412 try:
1393 allgroups = []
1413 allgroups = []
1394 regexps = [_regex(k, p, globsuffix) for (k, p, s) in kindpats]
1414 regexps = [_regex(k, p, globsuffix) for (k, p, s) in kindpats]
1395 fullregexp = _joinregexes(regexps)
1415 fullregexp = _joinregexes(regexps)
1396
1416
1397 startidx = 0
1417 startidx = 0
1398 groupsize = 0
1418 groupsize = 0
1399 for idx, r in enumerate(regexps):
1419 for idx, r in enumerate(regexps):
1400 piecesize = len(r)
1420 piecesize = len(r)
1401 if piecesize > MAX_RE_SIZE:
1421 if piecesize > MAX_RE_SIZE:
1402 msg = _(b"matcher pattern is too long (%d bytes)") % piecesize
1422 msg = _(b"matcher pattern is too long (%d bytes)") % piecesize
1403 raise error.Abort(msg)
1423 raise error.Abort(msg)
1404 elif (groupsize + piecesize) > MAX_RE_SIZE:
1424 elif (groupsize + piecesize) > MAX_RE_SIZE:
1405 group = regexps[startidx:idx]
1425 group = regexps[startidx:idx]
1406 allgroups.append(_joinregexes(group))
1426 allgroups.append(_joinregexes(group))
1407 startidx = idx
1427 startidx = idx
1408 groupsize = 0
1428 groupsize = 0
1409 groupsize += piecesize + 1
1429 groupsize += piecesize + 1
1410
1430
1411 if startidx == 0:
1431 if startidx == 0:
1412 matcher = _rematcher(fullregexp)
1432 matcher = _rematcher(fullregexp)
1413 func = lambda s: bool(matcher(s))
1433 func = lambda s: bool(matcher(s))
1414 else:
1434 else:
1415 group = regexps[startidx:]
1435 group = regexps[startidx:]
1416 allgroups.append(_joinregexes(group))
1436 allgroups.append(_joinregexes(group))
1417 allmatchers = [_rematcher(g) for g in allgroups]
1437 allmatchers = [_rematcher(g) for g in allgroups]
1418 func = lambda s: any(m(s) for m in allmatchers)
1438 func = lambda s: any(m(s) for m in allmatchers)
1419 return fullregexp, func
1439 return fullregexp, func
1420 except re.error:
1440 except re.error:
1421 for k, p, s in kindpats:
1441 for k, p, s in kindpats:
1422 try:
1442 try:
1423 _rematcher(_regex(k, p, globsuffix))
1443 _rematcher(_regex(k, p, globsuffix))
1424 except re.error:
1444 except re.error:
1425 if s:
1445 if s:
1426 raise error.Abort(
1446 raise error.Abort(
1427 _(b"%s: invalid pattern (%s): %s") % (s, k, p)
1447 _(b"%s: invalid pattern (%s): %s") % (s, k, p)
1428 )
1448 )
1429 else:
1449 else:
1430 raise error.Abort(_(b"invalid pattern (%s): %s") % (k, p))
1450 raise error.Abort(_(b"invalid pattern (%s): %s") % (k, p))
1431 raise error.Abort(_(b"invalid pattern"))
1451 raise error.Abort(_(b"invalid pattern"))
1432
1452
1433
1453
1434 def _patternrootsanddirs(kindpats):
1454 def _patternrootsanddirs(kindpats):
1435 """Returns roots and directories corresponding to each pattern.
1455 """Returns roots and directories corresponding to each pattern.
1436
1456
1437 This calculates the roots and directories exactly matching the patterns and
1457 This calculates the roots and directories exactly matching the patterns and
1438 returns a tuple of (roots, dirs) for each. It does not return other
1458 returns a tuple of (roots, dirs) for each. It does not return other
1439 directories which may also need to be considered, like the parent
1459 directories which may also need to be considered, like the parent
1440 directories.
1460 directories.
1441 """
1461 """
1442 r = []
1462 r = []
1443 d = []
1463 d = []
1444 for kind, pat, source in kindpats:
1464 for kind, pat, source in kindpats:
1445 if kind in (b'glob', b'rootglob'): # find the non-glob prefix
1465 if kind in (b'glob', b'rootglob'): # find the non-glob prefix
1446 root = []
1466 root = []
1447 for p in pat.split(b'/'):
1467 for p in pat.split(b'/'):
1448 if b'[' in p or b'{' in p or b'*' in p or b'?' in p:
1468 if b'[' in p or b'{' in p or b'*' in p or b'?' in p:
1449 break
1469 break
1450 root.append(p)
1470 root.append(p)
1451 r.append(b'/'.join(root))
1471 r.append(b'/'.join(root))
1452 elif kind in (b'relpath', b'path'):
1472 elif kind in (b'relpath', b'path'):
1453 if pat == b'.':
1473 if pat == b'.':
1454 pat = b''
1474 pat = b''
1455 r.append(pat)
1475 r.append(pat)
1456 elif kind in (b'rootfilesin',):
1476 elif kind in (b'rootfilesin',):
1457 if pat == b'.':
1477 if pat == b'.':
1458 pat = b''
1478 pat = b''
1459 d.append(pat)
1479 d.append(pat)
1460 else: # relglob, re, relre
1480 else: # relglob, re, relre
1461 r.append(b'')
1481 r.append(b'')
1462 return r, d
1482 return r, d
1463
1483
1464
1484
1465 def _roots(kindpats):
1485 def _roots(kindpats):
1466 '''Returns root directories to match recursively from the given patterns.'''
1486 '''Returns root directories to match recursively from the given patterns.'''
1467 roots, dirs = _patternrootsanddirs(kindpats)
1487 roots, dirs = _patternrootsanddirs(kindpats)
1468 return roots
1488 return roots
1469
1489
1470
1490
1471 def _rootsdirsandparents(kindpats):
1491 def _rootsdirsandparents(kindpats):
1472 """Returns roots and exact directories from patterns.
1492 """Returns roots and exact directories from patterns.
1473
1493
1474 `roots` are directories to match recursively, `dirs` should
1494 `roots` are directories to match recursively, `dirs` should
1475 be matched non-recursively, and `parents` are the implicitly required
1495 be matched non-recursively, and `parents` are the implicitly required
1476 directories to walk to items in either roots or dirs.
1496 directories to walk to items in either roots or dirs.
1477
1497
1478 Returns a tuple of (roots, dirs, parents).
1498 Returns a tuple of (roots, dirs, parents).
1479
1499
1480 >>> r = _rootsdirsandparents(
1500 >>> r = _rootsdirsandparents(
1481 ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
1501 ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
1482 ... (b'glob', b'g*', b'')])
1502 ... (b'glob', b'g*', b'')])
1483 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1503 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1484 (['g/h', 'g/h', ''], []) ['', 'g']
1504 (['g/h', 'g/h', ''], []) ['', 'g']
1485 >>> r = _rootsdirsandparents(
1505 >>> r = _rootsdirsandparents(
1486 ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
1506 ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
1487 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1507 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1488 ([], ['g/h', '']) ['', 'g']
1508 ([], ['g/h', '']) ['', 'g']
1489 >>> r = _rootsdirsandparents(
1509 >>> r = _rootsdirsandparents(
1490 ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
1510 ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
1491 ... (b'path', b'', b'')])
1511 ... (b'path', b'', b'')])
1492 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1512 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1493 (['r', 'p/p', ''], []) ['', 'p']
1513 (['r', 'p/p', ''], []) ['', 'p']
1494 >>> r = _rootsdirsandparents(
1514 >>> r = _rootsdirsandparents(
1495 ... [(b'relglob', b'rg*', b''), (b're', b're/', b''),
1515 ... [(b'relglob', b'rg*', b''), (b're', b're/', b''),
1496 ... (b'relre', b'rr', b'')])
1516 ... (b'relre', b'rr', b'')])
1497 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1517 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1498 (['', '', ''], []) ['']
1518 (['', '', ''], []) ['']
1499 """
1519 """
1500 r, d = _patternrootsanddirs(kindpats)
1520 r, d = _patternrootsanddirs(kindpats)
1501
1521
1502 p = set()
1522 p = set()
1503 # Add the parents as non-recursive/exact directories, since they must be
1523 # Add the parents as non-recursive/exact directories, since they must be
1504 # scanned to get to either the roots or the other exact directories.
1524 # scanned to get to either the roots or the other exact directories.
1505 p.update(pathutil.dirs(d))
1525 p.update(pathutil.dirs(d))
1506 p.update(pathutil.dirs(r))
1526 p.update(pathutil.dirs(r))
1507
1527
1508 # FIXME: all uses of this function convert these to sets, do so before
1528 # FIXME: all uses of this function convert these to sets, do so before
1509 # returning.
1529 # returning.
1510 # FIXME: all uses of this function do not need anything in 'roots' and
1530 # FIXME: all uses of this function do not need anything in 'roots' and
1511 # 'dirs' to also be in 'parents', consider removing them before returning.
1531 # 'dirs' to also be in 'parents', consider removing them before returning.
1512 return r, d, p
1532 return r, d, p
1513
1533
1514
1534
1515 def _explicitfiles(kindpats):
1535 def _explicitfiles(kindpats):
1516 """Returns the potential explicit filenames from the patterns.
1536 """Returns the potential explicit filenames from the patterns.
1517
1537
1518 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
1538 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
1519 ['foo/bar']
1539 ['foo/bar']
1520 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
1540 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
1521 []
1541 []
1522 """
1542 """
1523 # Keep only the pattern kinds where one can specify filenames (vs only
1543 # Keep only the pattern kinds where one can specify filenames (vs only
1524 # directory names).
1544 # directory names).
1525 filable = [kp for kp in kindpats if kp[0] not in (b'rootfilesin',)]
1545 filable = [kp for kp in kindpats if kp[0] not in (b'rootfilesin',)]
1526 return _roots(filable)
1546 return _roots(filable)
1527
1547
1528
1548
1529 def _prefix(kindpats):
1549 def _prefix(kindpats):
1530 '''Whether all the patterns match a prefix (i.e. recursively)'''
1550 '''Whether all the patterns match a prefix (i.e. recursively)'''
1531 for kind, pat, source in kindpats:
1551 for kind, pat, source in kindpats:
1532 if kind not in (b'path', b'relpath'):
1552 if kind not in (b'path', b'relpath'):
1533 return False
1553 return False
1534 return True
1554 return True
1535
1555
1536
1556
1537 _commentre = None
1557 _commentre = None
1538
1558
1539
1559
1540 def readpatternfile(filepath, warn, sourceinfo=False):
1560 def readpatternfile(filepath, warn, sourceinfo=False):
1541 """parse a pattern file, returning a list of
1561 """parse a pattern file, returning a list of
1542 patterns. These patterns should be given to compile()
1562 patterns. These patterns should be given to compile()
1543 to be validated and converted into a match function.
1563 to be validated and converted into a match function.
1544
1564
1545 trailing white space is dropped.
1565 trailing white space is dropped.
1546 the escape character is backslash.
1566 the escape character is backslash.
1547 comments start with #.
1567 comments start with #.
1548 empty lines are skipped.
1568 empty lines are skipped.
1549
1569
1550 lines can be of the following formats:
1570 lines can be of the following formats:
1551
1571
1552 syntax: regexp # defaults following lines to non-rooted regexps
1572 syntax: regexp # defaults following lines to non-rooted regexps
1553 syntax: glob # defaults following lines to non-rooted globs
1573 syntax: glob # defaults following lines to non-rooted globs
1554 re:pattern # non-rooted regular expression
1574 re:pattern # non-rooted regular expression
1555 glob:pattern # non-rooted glob
1575 glob:pattern # non-rooted glob
1556 rootglob:pat # rooted glob (same root as ^ in regexps)
1576 rootglob:pat # rooted glob (same root as ^ in regexps)
1557 pattern # pattern of the current default type
1577 pattern # pattern of the current default type
1558
1578
1559 if sourceinfo is set, returns a list of tuples:
1579 if sourceinfo is set, returns a list of tuples:
1560 (pattern, lineno, originalline).
1580 (pattern, lineno, originalline).
1561 This is useful to debug ignore patterns.
1581 This is useful to debug ignore patterns.
1562 """
1582 """
1563
1583
1564 syntaxes = {
1584 syntaxes = {
1565 b're': b'relre:',
1585 b're': b'relre:',
1566 b'regexp': b'relre:',
1586 b'regexp': b'relre:',
1567 b'glob': b'relglob:',
1587 b'glob': b'relglob:',
1568 b'rootglob': b'rootglob:',
1588 b'rootglob': b'rootglob:',
1569 b'include': b'include',
1589 b'include': b'include',
1570 b'subinclude': b'subinclude',
1590 b'subinclude': b'subinclude',
1571 }
1591 }
1572 syntax = b'relre:'
1592 syntax = b'relre:'
1573 patterns = []
1593 patterns = []
1574
1594
1575 fp = open(filepath, b'rb')
1595 fp = open(filepath, b'rb')
1576 for lineno, line in enumerate(util.iterfile(fp), start=1):
1596 for lineno, line in enumerate(util.iterfile(fp), start=1):
1577 if b"#" in line:
1597 if b"#" in line:
1578 global _commentre
1598 global _commentre
1579 if not _commentre:
1599 if not _commentre:
1580 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
1600 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
1581 # remove comments prefixed by an even number of escapes
1601 # remove comments prefixed by an even number of escapes
1582 m = _commentre.search(line)
1602 m = _commentre.search(line)
1583 if m:
1603 if m:
1584 line = line[: m.end(1)]
1604 line = line[: m.end(1)]
1585 # fixup properly escaped comments that survived the above
1605 # fixup properly escaped comments that survived the above
1586 line = line.replace(b"\\#", b"#")
1606 line = line.replace(b"\\#", b"#")
1587 line = line.rstrip()
1607 line = line.rstrip()
1588 if not line:
1608 if not line:
1589 continue
1609 continue
1590
1610
1591 if line.startswith(b'syntax:'):
1611 if line.startswith(b'syntax:'):
1592 s = line[7:].strip()
1612 s = line[7:].strip()
1593 try:
1613 try:
1594 syntax = syntaxes[s]
1614 syntax = syntaxes[s]
1595 except KeyError:
1615 except KeyError:
1596 if warn:
1616 if warn:
1597 warn(
1617 warn(
1598 _(b"%s: ignoring invalid syntax '%s'\n") % (filepath, s)
1618 _(b"%s: ignoring invalid syntax '%s'\n") % (filepath, s)
1599 )
1619 )
1600 continue
1620 continue
1601
1621
1602 linesyntax = syntax
1622 linesyntax = syntax
1603 for s, rels in pycompat.iteritems(syntaxes):
1623 for s, rels in pycompat.iteritems(syntaxes):
1604 if line.startswith(rels):
1624 if line.startswith(rels):
1605 linesyntax = rels
1625 linesyntax = rels
1606 line = line[len(rels) :]
1626 line = line[len(rels) :]
1607 break
1627 break
1608 elif line.startswith(s + b':'):
1628 elif line.startswith(s + b':'):
1609 linesyntax = rels
1629 linesyntax = rels
1610 line = line[len(s) + 1 :]
1630 line = line[len(s) + 1 :]
1611 break
1631 break
1612 if sourceinfo:
1632 if sourceinfo:
1613 patterns.append((linesyntax + line, lineno, line))
1633 patterns.append((linesyntax + line, lineno, line))
1614 else:
1634 else:
1615 patterns.append(linesyntax + line)
1635 patterns.append(linesyntax + line)
1616 fp.close()
1636 fp.close()
1617 return patterns
1637 return patterns
General Comments 0
You need to be logged in to leave comments. Login now